contentctl 4.4.7__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. contentctl/__init__.py +1 -1
  2. contentctl/actions/build.py +102 -57
  3. contentctl/actions/deploy_acs.py +29 -24
  4. contentctl/actions/detection_testing/DetectionTestingManager.py +66 -42
  5. contentctl/actions/detection_testing/GitService.py +134 -76
  6. contentctl/actions/detection_testing/generate_detection_coverage_badge.py +48 -30
  7. contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py +192 -147
  8. contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructureContainer.py +45 -32
  9. contentctl/actions/detection_testing/progress_bar.py +9 -6
  10. contentctl/actions/detection_testing/views/DetectionTestingView.py +16 -19
  11. contentctl/actions/detection_testing/views/DetectionTestingViewCLI.py +1 -5
  12. contentctl/actions/detection_testing/views/DetectionTestingViewFile.py +2 -2
  13. contentctl/actions/detection_testing/views/DetectionTestingViewWeb.py +1 -4
  14. contentctl/actions/doc_gen.py +9 -5
  15. contentctl/actions/initialize.py +45 -33
  16. contentctl/actions/inspect.py +118 -61
  17. contentctl/actions/new_content.py +155 -108
  18. contentctl/actions/release_notes.py +276 -146
  19. contentctl/actions/reporting.py +23 -19
  20. contentctl/actions/test.py +33 -28
  21. contentctl/actions/validate.py +55 -34
  22. contentctl/api.py +54 -45
  23. contentctl/contentctl.py +124 -90
  24. contentctl/enrichments/attack_enrichment.py +112 -72
  25. contentctl/enrichments/cve_enrichment.py +34 -28
  26. contentctl/enrichments/splunk_app_enrichment.py +38 -36
  27. contentctl/helper/link_validator.py +101 -78
  28. contentctl/helper/splunk_app.py +69 -41
  29. contentctl/helper/utils.py +58 -53
  30. contentctl/input/director.py +68 -36
  31. contentctl/input/new_content_questions.py +27 -35
  32. contentctl/input/yml_reader.py +28 -18
  33. contentctl/objects/abstract_security_content_objects/detection_abstract.py +303 -259
  34. contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py +115 -52
  35. contentctl/objects/alert_action.py +10 -9
  36. contentctl/objects/annotated_types.py +1 -1
  37. contentctl/objects/atomic.py +65 -54
  38. contentctl/objects/base_test.py +5 -3
  39. contentctl/objects/base_test_result.py +19 -11
  40. contentctl/objects/baseline.py +62 -30
  41. contentctl/objects/baseline_tags.py +30 -24
  42. contentctl/objects/config.py +790 -597
  43. contentctl/objects/constants.py +33 -56
  44. contentctl/objects/correlation_search.py +150 -136
  45. contentctl/objects/dashboard.py +55 -41
  46. contentctl/objects/data_source.py +16 -17
  47. contentctl/objects/deployment.py +43 -44
  48. contentctl/objects/deployment_email.py +3 -2
  49. contentctl/objects/deployment_notable.py +4 -2
  50. contentctl/objects/deployment_phantom.py +7 -6
  51. contentctl/objects/deployment_rba.py +3 -2
  52. contentctl/objects/deployment_scheduling.py +3 -2
  53. contentctl/objects/deployment_slack.py +3 -2
  54. contentctl/objects/detection.py +5 -2
  55. contentctl/objects/detection_metadata.py +1 -0
  56. contentctl/objects/detection_stanza.py +7 -2
  57. contentctl/objects/detection_tags.py +58 -103
  58. contentctl/objects/drilldown.py +66 -34
  59. contentctl/objects/enums.py +81 -100
  60. contentctl/objects/errors.py +16 -24
  61. contentctl/objects/integration_test.py +3 -3
  62. contentctl/objects/integration_test_result.py +1 -0
  63. contentctl/objects/investigation.py +59 -36
  64. contentctl/objects/investigation_tags.py +30 -19
  65. contentctl/objects/lookup.py +304 -101
  66. contentctl/objects/macro.py +55 -39
  67. contentctl/objects/manual_test.py +3 -3
  68. contentctl/objects/manual_test_result.py +1 -0
  69. contentctl/objects/mitre_attack_enrichment.py +17 -16
  70. contentctl/objects/notable_action.py +2 -1
  71. contentctl/objects/notable_event.py +1 -3
  72. contentctl/objects/playbook.py +37 -35
  73. contentctl/objects/playbook_tags.py +23 -13
  74. contentctl/objects/rba.py +96 -0
  75. contentctl/objects/risk_analysis_action.py +15 -11
  76. contentctl/objects/risk_event.py +110 -160
  77. contentctl/objects/risk_object.py +1 -0
  78. contentctl/objects/savedsearches_conf.py +9 -7
  79. contentctl/objects/security_content_object.py +5 -2
  80. contentctl/objects/story.py +54 -49
  81. contentctl/objects/story_tags.py +56 -45
  82. contentctl/objects/test_attack_data.py +2 -1
  83. contentctl/objects/test_group.py +5 -2
  84. contentctl/objects/threat_object.py +1 -0
  85. contentctl/objects/throttling.py +27 -18
  86. contentctl/objects/unit_test.py +3 -4
  87. contentctl/objects/unit_test_baseline.py +5 -5
  88. contentctl/objects/unit_test_result.py +6 -6
  89. contentctl/output/api_json_output.py +233 -220
  90. contentctl/output/attack_nav_output.py +21 -21
  91. contentctl/output/attack_nav_writer.py +29 -37
  92. contentctl/output/conf_output.py +235 -172
  93. contentctl/output/conf_writer.py +201 -125
  94. contentctl/output/data_source_writer.py +38 -26
  95. contentctl/output/doc_md_output.py +53 -27
  96. contentctl/output/jinja_writer.py +19 -15
  97. contentctl/output/json_writer.py +21 -11
  98. contentctl/output/svg_output.py +56 -38
  99. contentctl/output/templates/analyticstories_detections.j2 +2 -2
  100. contentctl/output/templates/analyticstories_stories.j2 +1 -1
  101. contentctl/output/templates/collections.j2 +1 -1
  102. contentctl/output/templates/doc_detections.j2 +0 -5
  103. contentctl/output/templates/es_investigations_investigations.j2 +1 -1
  104. contentctl/output/templates/es_investigations_stories.j2 +1 -1
  105. contentctl/output/templates/savedsearches_baselines.j2 +2 -2
  106. contentctl/output/templates/savedsearches_detections.j2 +10 -11
  107. contentctl/output/templates/savedsearches_investigations.j2 +2 -2
  108. contentctl/output/templates/transforms.j2 +6 -8
  109. contentctl/output/yml_writer.py +29 -20
  110. contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml +16 -34
  111. contentctl/templates/stories/cobalt_strike.yml +1 -0
  112. {contentctl-4.4.7.dist-info → contentctl-5.0.0.dist-info}/METADATA +5 -4
  113. contentctl-5.0.0.dist-info/RECORD +168 -0
  114. {contentctl-4.4.7.dist-info → contentctl-5.0.0.dist-info}/WHEEL +1 -1
  115. contentctl/actions/initialize_old.py +0 -245
  116. contentctl/objects/event_source.py +0 -11
  117. contentctl/objects/observable.py +0 -37
  118. contentctl/output/detection_writer.py +0 -28
  119. contentctl/output/new_content_yml_output.py +0 -56
  120. contentctl/output/yml_output.py +0 -66
  121. contentctl-4.4.7.dist-info/RECORD +0 -173
  122. {contentctl-4.4.7.dist-info → contentctl-5.0.0.dist-info}/LICENSE.md +0 -0
  123. {contentctl-4.4.7.dist-info → contentctl-5.0.0.dist-info}/entry_points.txt +0 -0
@@ -1,34 +1,45 @@
1
1
  from __future__ import annotations
2
2
  from typing import List
3
- from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer
3
+ from pydantic import (
4
+ BaseModel,
5
+ Field,
6
+ field_validator,
7
+ ValidationInfo,
8
+ model_serializer,
9
+ ConfigDict,
10
+ )
4
11
  from contentctl.objects.story import Story
5
- from contentctl.objects.enums import SecurityContentInvestigationProductName, SecurityDomain
12
+ from contentctl.objects.enums import (
13
+ SecurityContentInvestigationProductName,
14
+ SecurityDomain,
15
+ )
16
+
6
17
 
7
18
  class InvestigationTags(BaseModel):
8
- analytic_story: List[Story] = Field([],min_length=1)
9
- product: List[SecurityContentInvestigationProductName] = Field(...,min_length=1)
10
- required_fields: List[str] = Field(min_length=1)
19
+ model_config = ConfigDict(extra="forbid")
20
+ analytic_story: List[Story] = Field([], min_length=1)
21
+ product: List[SecurityContentInvestigationProductName] = Field(..., min_length=1)
11
22
  security_domain: SecurityDomain = Field(...)
12
23
 
13
-
14
- @field_validator('analytic_story',mode="before")
24
+ @field_validator("analytic_story", mode="before")
15
25
  @classmethod
16
- def mapStoryNamesToStoryObjects(cls, v:list[str], info:ValidationInfo)->list[Story]:
17
- return Story.mapNamesToSecurityContentObjects(v, info.context.get("output_dto",None))
18
-
26
+ def mapStoryNamesToStoryObjects(
27
+ cls, v: list[str], info: ValidationInfo
28
+ ) -> list[Story]:
29
+ return Story.mapNamesToSecurityContentObjects(
30
+ v, info.context.get("output_dto", None)
31
+ )
19
32
 
20
33
  @model_serializer
21
34
  def serialize_model(self):
22
- #All fields custom to this model
23
- model= {
35
+ # All fields custom to this model
36
+ model = {
24
37
  "analytic_story": [story.name for story in self.analytic_story],
25
38
  "product": self.product,
26
- "required_fields": self.required_fields,
27
39
  "security_domain": self.security_domain,
28
40
  }
29
-
30
- #Combine fields from this model with fields from parent
31
-
32
-
33
- #return the model
34
- return model
41
+
42
+ # Combine fields from this model with fields from parent
43
+
44
+ # return the model
45
+ return model
@@ -1,153 +1,356 @@
1
1
  from __future__ import annotations
2
- from pydantic import field_validator, ValidationInfo, model_validator, FilePath, model_serializer, Field, NonNegativeInt
3
- from typing import TYPE_CHECKING, Optional, Any, Union
4
- import re
2
+
3
+ import abc
5
4
  import csv
6
- import uuid
7
- import datetime
5
+ import pathlib
6
+ import re
7
+ from enum import StrEnum, auto
8
+ from functools import cached_property
9
+ from typing import TYPE_CHECKING, Annotated, Any, Literal, Optional, Self
10
+
11
+ from pydantic import (
12
+ Field,
13
+ FilePath,
14
+ NonNegativeInt,
15
+ TypeAdapter,
16
+ ValidationInfo,
17
+ computed_field,
18
+ field_validator,
19
+ model_serializer,
20
+ model_validator,
21
+ )
22
+
8
23
  if TYPE_CHECKING:
9
24
  from contentctl.input.director import DirectorOutputDto
10
25
  from contentctl.objects.config import validate
26
+
11
27
  from contentctl.objects.security_content_object import SecurityContentObject
12
28
 
13
29
  # This section is used to ignore lookups that are NOT shipped with ESCU app but are used in the detections. Adding exclusions here will so that contentctl builds will not fail.
14
30
  LOOKUPS_TO_IGNORE = set(["outputlookup"])
15
- LOOKUPS_TO_IGNORE.add("ut_shannon_lookup") #In the URL toolbox app which is recommended for ESCU
16
- LOOKUPS_TO_IGNORE.add("identity_lookup_expanded") #Shipped with the Asset and Identity Framework
17
- LOOKUPS_TO_IGNORE.add("cim_corporate_web_domain_lookup") #Shipped with the Asset and Identity Framework
18
- LOOKUPS_TO_IGNORE.add("alexa_lookup_by_str") #Shipped with the Asset and Identity Framework
19
- LOOKUPS_TO_IGNORE.add("interesting_ports_lookup") #Shipped with the Asset and Identity Framework
20
- LOOKUPS_TO_IGNORE.add("admon_groups_def") #Shipped with the SA-admon addon
31
+ LOOKUPS_TO_IGNORE.add(
32
+ "ut_shannon_lookup"
33
+ ) # In the URL toolbox app which is recommended for ESCU
34
+ LOOKUPS_TO_IGNORE.add(
35
+ "identity_lookup_expanded"
36
+ ) # Shipped with the Asset and Identity Framework
37
+ LOOKUPS_TO_IGNORE.add(
38
+ "cim_corporate_web_domain_lookup"
39
+ ) # Shipped with the Asset and Identity Framework
40
+ LOOKUPS_TO_IGNORE.add(
41
+ "cim_corporate_email_domain_lookup"
42
+ ) # Shipped with the Enterprise Security
43
+ LOOKUPS_TO_IGNORE.add("cim_cloud_domain_lookup") # Shipped with the Enterprise Security
44
+
45
+ LOOKUPS_TO_IGNORE.add(
46
+ "alexa_lookup_by_str"
47
+ ) # Shipped with the Asset and Identity Framework
48
+ LOOKUPS_TO_IGNORE.add(
49
+ "interesting_ports_lookup"
50
+ ) # Shipped with the Asset and Identity Framework
51
+ LOOKUPS_TO_IGNORE.add(
52
+ "asset_lookup_by_str"
53
+ ) # Shipped with the Asset and Identity Framework
54
+ LOOKUPS_TO_IGNORE.add("admon_groups_def") # Shipped with the SA-admon addon
55
+ LOOKUPS_TO_IGNORE.add(
56
+ "identity_lookup_expanded"
57
+ ) # Shipped with the Enterprise Security
58
+
59
+ # Special case for the Detection "Exploit Public Facing Application via Apache Commons Text"
60
+ LOOKUPS_TO_IGNORE.add("=")
61
+ LOOKUPS_TO_IGNORE.add("other_lookups")
21
62
 
22
- #Special case for the Detection "Exploit Public Facing Application via Apache Commons Text"
23
- LOOKUPS_TO_IGNORE.add("=")
24
- LOOKUPS_TO_IGNORE.add("other_lookups")
63
+
64
+ class Lookup_Type(StrEnum):
65
+ csv = auto()
66
+ kvstore = auto()
67
+ mlmodel = auto()
25
68
 
26
69
 
27
70
  # TODO (#220): Split Lookup into 2 classes
28
- class Lookup(SecurityContentObject):
29
-
30
- collection: Optional[str] = None
31
- fields_list: Optional[str] = None
32
- filename: Optional[FilePath] = None
71
+ class Lookup(SecurityContentObject, abc.ABC):
33
72
  default_match: Optional[bool] = None
34
- match_type: Optional[str] = None
35
- min_matches: Optional[int] = None
36
- case_sensitive_match: Optional[bool] = None
37
- # TODO: Add id field to all lookup ymls
38
- id: uuid.UUID = Field(default_factory=uuid.uuid4)
39
- date: datetime.date = Field(datetime.date.today())
40
- author: str = Field("NO AUTHOR DEFINED",max_length=255)
41
- version: NonNegativeInt = 1
42
-
73
+ # Per the documentation for transforms.conf, EXACT should not be specified in this list,
74
+ # so we include only WILDCARD and CIDR
75
+ match_type: list[Annotated[str, Field(pattern=r"(^WILDCARD|CIDR)\(.+\)$")]] = Field(
76
+ default=[]
77
+ )
78
+ min_matches: None | NonNegativeInt = Field(default=None)
79
+ max_matches: None | Annotated[NonNegativeInt, Field(ge=1, le=1000)] = Field(
80
+ default=None
81
+ )
82
+ case_sensitive_match: None | bool = Field(default=None)
43
83
 
44
84
  @model_serializer
45
85
  def serialize_model(self):
46
- #Call parent serializer
86
+ # Call parent serializer
47
87
  super_fields = super().serialize_model()
48
88
 
49
- #All fields custom to this model
50
- model= {
51
- "filename": self.filename.name if self.filename is not None else None,
89
+ # All fields custom to this model
90
+ model = {
52
91
  "default_match": "true" if self.default_match is True else "false",
53
- "match_type": self.match_type,
92
+ "match_type": self.match_type_to_conf_format,
54
93
  "min_matches": self.min_matches,
55
- "case_sensitive_match": "true" if self.case_sensitive_match is True else "false",
56
- "collection": self.collection,
57
- "fields_list": self.fields_list
94
+ "max_matches": self.max_matches,
95
+ "case_sensitive_match": "true"
96
+ if self.case_sensitive_match is True
97
+ else "false",
58
98
  }
59
-
60
- #return the model
99
+
100
+ # return the model
61
101
  model.update(super_fields)
62
102
  return model
63
103
 
64
104
  @model_validator(mode="before")
65
- def fix_lookup_path(cls, data:Any, info: ValidationInfo)->Any:
105
+ def fix_lookup_path(cls, data: Any, info: ValidationInfo) -> Any:
66
106
  if data.get("filename"):
67
- config:validate = info.context.get("config",None)
107
+ config: validate = info.context.get("config", None)
68
108
  if config is not None:
69
109
  data["filename"] = config.path / "lookups/" / data["filename"]
70
110
  else:
71
- raise ValueError("config required for constructing lookup filename, but it was not")
111
+ raise ValueError(
112
+ "config required for constructing lookup filename, but it was not"
113
+ )
72
114
  return data
73
115
 
116
+ @computed_field
117
+ @cached_property
118
+ def match_type_to_conf_format(self) -> str:
119
+ return ", ".join(self.match_type)
120
+
121
+ @staticmethod
122
+ def get_lookups(
123
+ text_field: str,
124
+ director: DirectorOutputDto,
125
+ ignore_lookups: set[str] = LOOKUPS_TO_IGNORE,
126
+ ) -> list[Lookup]:
127
+ # Comprehensively match all kinds of lookups, including inputlookup and outputlookup
128
+ inputLookupsToGet = set(
129
+ re.findall(
130
+ r"[^\w]inputlookup(?:\s*(?:(?:append|strict|start|max)\s*=\s*(?:true|t|false|f))){0,4}\s+([\w]+)",
131
+ text_field,
132
+ re.IGNORECASE,
133
+ )
134
+ )
135
+ outputLookupsToGet = set(
136
+ re.findall(
137
+ r"[^\w]outputlookup(?:\s*(?:(?:append|create_empty|override_if_empty|max|key_field|allow_updates|createinapp|create_context|output_format)\s*=\s*[^\s]*))*\s+([\w]+)",
138
+ text_field,
139
+ re.IGNORECASE,
140
+ )
141
+ )
142
+ lookupsToGet = set(
143
+ re.findall(
144
+ r"[^\w](?:(?<!output)(?<!input))lookup(?:\s*(?:(?:local|update)\s*=\s*(?:true|t|false|f))){0,2}\s+([\w]+)",
145
+ text_field,
146
+ re.IGNORECASE,
147
+ )
148
+ )
149
+
150
+ input_lookups = Lookup.mapNamesToSecurityContentObjects(
151
+ list(inputLookupsToGet - LOOKUPS_TO_IGNORE), director
152
+ )
153
+ output_lookups = Lookup.mapNamesToSecurityContentObjects(
154
+ list(outputLookupsToGet - LOOKUPS_TO_IGNORE), director
155
+ )
156
+ lookups = Lookup.mapNamesToSecurityContentObjects(
157
+ list(lookupsToGet - LOOKUPS_TO_IGNORE), director
158
+ )
159
+
160
+ all_lookups = set(input_lookups + output_lookups + lookups)
74
161
 
75
- def model_post_init(self, ctx:dict[str,Any]):
76
- if not self.filename:
77
- return
78
- import pathlib
79
- filenamePath = pathlib.Path(self.filename)
80
-
81
- if filenamePath.suffix not in [".csv", ".mlmodel"]:
82
- raise ValueError(f"All Lookup files must be CSV files and end in .csv. The following file does not: '{filenamePath}'")
83
-
84
-
162
+ return list(all_lookups)
85
163
 
86
- if filenamePath.suffix == ".mlmodel":
87
- # Do not need any additional checks for an mlmodel file
88
- return
89
164
 
165
+ class FileBackedLookup(Lookup, abc.ABC):
166
+ # For purposes of the disciminated union, the child classes which
167
+ # inherit from this class must declare the typing of lookup_type
168
+ # themselves, hence it is not defined in the Lookup class
169
+
170
+ @model_validator(mode="after")
171
+ def ensure_lookup_file_exists(self) -> Self:
172
+ if not self.filename.exists():
173
+ raise ValueError(f"Expected lookup filename {self.filename} does not exist")
174
+ return self
175
+
176
+ @computed_field
177
+ @cached_property
178
+ @abc.abstractmethod
179
+ def filename(self) -> FilePath:
180
+ """
181
+ This function computes the backing file for the lookup. It is abstract because different types of lookups
182
+ (CSV for MlModel) backing files have different name format.
183
+ """
184
+ pass
185
+
186
+ @computed_field
187
+ @cached_property
188
+ @abc.abstractmethod
189
+ def app_filename(self) -> FilePath:
190
+ """
191
+ This function computes the filenames to write into the app itself. This is abstract because
192
+ CSV and MLmodel requirements are different.
193
+ """
194
+ pass
195
+
196
+
197
+ class CSVLookup(FileBackedLookup):
198
+ lookup_type: Literal[Lookup_Type.csv]
199
+
200
+ @model_serializer
201
+ def serialize_model(self):
202
+ # Call parent serializer
203
+ super_fields = super().serialize_model()
204
+
205
+ # All fields custom to this model
206
+ model = {"filename": self.app_filename.name}
207
+
208
+ # return the model
209
+ model.update(super_fields)
210
+ return model
211
+
212
+ @computed_field
213
+ @cached_property
214
+ def filename(self) -> FilePath:
215
+ """
216
+ This function computes the backing file for the lookup. The names of CSV files must EXACTLY match the
217
+ names of their lookup definitions except with the CSV file extension rather than the YML file extension.
218
+ """
219
+ if self.file_path is None:
220
+ raise ValueError(
221
+ f"Cannot get the filename of the lookup {self.lookup_type} because the YML file_path attribute is None"
222
+ ) # type: ignore
223
+
224
+ csv_file = self.file_path.parent / f"{self.file_path.stem}.{self.lookup_type}" # type: ignore
225
+
226
+ return csv_file
227
+
228
+ @computed_field
229
+ @cached_property
230
+ def app_filename(self) -> FilePath:
231
+ """
232
+ This function computes the filenames to write into the app itself. This is abstract because
233
+ CSV and MLmodel requirements are different.
234
+ """
235
+ return pathlib.Path(
236
+ f"{self.filename.stem}_{self.date.year}{self.date.month:02}{self.date.day:02}.{self.lookup_type}"
237
+ )
238
+
239
+ @model_validator(mode="after")
240
+ def ensure_correct_csv_structure(self) -> Self:
90
241
  # https://docs.python.org/3/library/csv.html#csv.DictReader
91
242
  # Column Names (fieldnames) determine by the number of columns in the first row.
92
243
  # If a row has MORE fields than fieldnames, they will be dumped in a list under the key 'restkey' - this should throw an Exception
93
- # If a row has LESS fields than fieldnames, then the field should contain None by default. This should also throw an exception.
94
- csv_errors:list[str] = []
95
- with open(filenamePath, "r") as csv_fp:
244
+ # If a row has LESS fields than fieldnames, then the field should contain None by default. This should also throw an exception.
245
+ csv_errors: list[str] = []
246
+ with open(self.filename, "r") as csv_fp:
96
247
  RESTKEY = "extra_fields_in_a_row"
97
- csv_dict = csv.DictReader(csv_fp, restkey=RESTKEY)
248
+ csv_dict = csv.DictReader(csv_fp, restkey=RESTKEY)
98
249
  if csv_dict.fieldnames is None:
99
- raise ValueError(f"Error validating the CSV referenced by the lookup: {filenamePath}:\n\t"
100
- "Unable to read fieldnames from CSV. Is the CSV empty?\n"
101
- " Please try opening the file with a CSV Editor to ensure that it is correct.")
250
+ raise ValueError(
251
+ f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t"
252
+ "Unable to read fieldnames from CSV. Is the CSV empty?\n"
253
+ " Please try opening the file with a CSV Editor to ensure that it is correct."
254
+ )
102
255
  # Remember that row 1 has the headers and we do not iterate over it in the loop below
103
256
  # CSVs are typically indexed starting a row 1 for the header.
104
257
  for row_index, data_row in enumerate(csv_dict):
105
- row_index+=2
106
- if len(data_row.get(RESTKEY,[])) > 0:
107
- csv_errors.append(f"row [{row_index}] should have [{len(csv_dict.fieldnames)}] columns,"
108
- f" but instead had [{len(csv_dict.fieldnames) + len(data_row.get(RESTKEY,[]))}].")
109
-
258
+ row_index += 2
259
+ if len(data_row.get(RESTKEY, [])) > 0:
260
+ csv_errors.append(
261
+ f"row [{row_index}] should have [{len(csv_dict.fieldnames)}] columns,"
262
+ f" but instead had [{len(csv_dict.fieldnames) + len(data_row.get(RESTKEY, []))}]."
263
+ )
264
+
110
265
  for column_index, column_name in enumerate(data_row):
111
266
  if data_row[column_name] is None:
112
- csv_errors.append(f"row [{row_index}] should have [{len(csv_dict.fieldnames)}] columns, "
113
- f"but instead had [{column_index}].")
267
+ csv_errors.append(
268
+ f"row [{row_index}] should have [{len(csv_dict.fieldnames)}] columns, "
269
+ f"but instead had [{column_index}]."
270
+ )
114
271
  if len(csv_errors) > 0:
115
- err_string = '\n\t'.join(csv_errors)
116
- raise ValueError(f"Error validating the CSV referenced by the lookup: {filenamePath}:\n\t{err_string}\n"
117
- f" Please try opening the file with a CSV Editor to ensure that it is correct.")
118
-
119
- return
120
-
121
-
122
- @field_validator('match_type')
272
+ err_string = "\n\t".join(csv_errors)
273
+ raise ValueError(
274
+ f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t{err_string}\n"
275
+ f" Please try opening the file with a CSV Editor to ensure that it is correct."
276
+ )
277
+
278
+ return self
279
+
280
+
281
+ class KVStoreLookup(Lookup):
282
+ lookup_type: Literal[Lookup_Type.kvstore]
283
+ fields: list[str] = Field(
284
+ description="The names of the fields/headings for the KVStore.", min_length=1
285
+ )
286
+
287
+ @field_validator("fields", mode="after")
123
288
  @classmethod
124
- def match_type_valid(cls, v: Union[str,None], info: ValidationInfo):
125
- if not v:
126
- #Match type can be None and that's okay
127
- return v
289
+ def ensure_key(cls, values: list[str]):
290
+ if values[0] != "_key":
291
+ raise ValueError(f"fields MUST begin with '_key', not '{values[0]}'")
292
+ return values
293
+
294
+ @computed_field
295
+ @cached_property
296
+ def collection(self) -> str:
297
+ return self.name
298
+
299
+ @computed_field
300
+ @cached_property
301
+ def fields_to_fields_list_conf_format(self) -> str:
302
+ return ", ".join(self.fields)
303
+
304
+ @model_serializer
305
+ def serialize_model(self):
306
+ # Call parent serializer
307
+ super_fields = super().serialize_model()
308
+
309
+ # All fields custom to this model
310
+ model = {
311
+ "collection": self.collection,
312
+ "fields_list": self.fields_to_fields_list_conf_format,
313
+ }
314
+
315
+ # return the model
316
+ model.update(super_fields)
317
+ return model
128
318
 
129
- if not (v.startswith("WILDCARD(") or v.endswith(")")) :
130
- raise ValueError(f"All match_types must take the format 'WILDCARD(field_name)'. The following file does not: '{v}'")
131
- return v
132
319
 
320
+ class MlModel(FileBackedLookup):
321
+ lookup_type: Literal[Lookup_Type.mlmodel]
133
322
 
134
- #Ensure that exactly one of location or filename are defined
135
- @model_validator(mode='after')
136
- def ensure_mutually_exclusive_fields(self)->Lookup:
137
- if self.filename is not None and self.collection is not None:
138
- raise ValueError("filename and collection cannot be defined in the lookup file. Exactly one must be defined.")
139
- elif self.filename is None and self.collection is None:
140
- raise ValueError("Neither filename nor collection were defined in the lookup file. Exactly one must "
141
- "be defined.")
323
+ @computed_field
324
+ @cached_property
325
+ def filename(self) -> FilePath:
326
+ """
327
+ This function computes the backing file for the lookup. The names of mlmodel files must EXACTLY match the
328
+ names of their lookup definitions except with:
329
+ - __mlspl_ prefix
330
+ - .mlmodel file extension rather than the YML file extension.
331
+ """
332
+ if self.file_path is None:
333
+ raise ValueError(
334
+ f"Cannot get the filename of the lookup {self.lookup_type} because the YML file_path attribute is None"
335
+ ) # type: ignore
142
336
 
337
+ if not self.file_path.stem.startswith("__mlspl_"):
338
+ raise ValueError(
339
+ f"The file_path for ML Model {self.name} MUST start with '__mlspl_', but it does not."
340
+ )
143
341
 
144
- return self
145
-
146
-
147
- @staticmethod
148
- def get_lookups(text_field: str, director:DirectorOutputDto, ignore_lookups:set[str]=LOOKUPS_TO_IGNORE)->list[Lookup]:
149
- lookups_to_get = set(re.findall(r'[^output]lookup (?:update=true)?(?:append=t)?\s*([^\s]*)', text_field))
150
- lookups_to_ignore = set([lookup for lookup in lookups_to_get if any(to_ignore in lookups_to_get for to_ignore in ignore_lookups)])
151
- lookups_to_get -= lookups_to_ignore
152
- return Lookup.mapNamesToSecurityContentObjects(list(lookups_to_get), director)
153
-
342
+ return self.file_path.parent / f"{self.file_path.stem}.{self.lookup_type}"
343
+
344
+ @computed_field
345
+ @cached_property
346
+ def app_filename(self) -> FilePath:
347
+ """
348
+ This function computes the filenames to write into the app itself. This is abstract because
349
+ CSV and MLmodel requirements are different.
350
+ """
351
+ return pathlib.Path(f"{self.filename.stem}.{self.lookup_type}")
352
+
353
+
354
+ LookupAdapter = TypeAdapter(
355
+ Annotated[CSVLookup | KVStoreLookup | MlModel, Field(discriminator="lookup_type")]
356
+ )
@@ -1,4 +1,4 @@
1
- # Used so that we can have a staticmethod that takes the class
1
+ # Used so that we can have a staticmethod that takes the class
2
2
  # type Macro as an argument
3
3
  from __future__ import annotations
4
4
  from typing import TYPE_CHECKING, List
@@ -6,18 +6,21 @@ import re
6
6
  from pydantic import Field, model_serializer, NonNegativeInt
7
7
  import uuid
8
8
  import datetime
9
+
9
10
  if TYPE_CHECKING:
10
11
  from contentctl.input.director import DirectorOutputDto
11
12
  from contentctl.objects.security_content_object import SecurityContentObject
12
13
 
13
- #The following macros are included in commonly-installed apps.
14
- #As such, we will ignore if they are missing from our app.
15
- #Included in
16
- MACROS_TO_IGNORE = set(["drop_dm_object_name"]) # Part of CIM/Splunk_SA_CIM
17
- MACROS_TO_IGNORE.add("get_asset") #SA-IdentityManagement, part of Enterprise Security
18
- MACROS_TO_IGNORE.add("get_risk_severity") #SA-ThreatIntelligence, part of Enterprise Security
19
- MACROS_TO_IGNORE.add("cim_corporate_web_domain_search") #Part of CIM/Splunk_SA_CIM
20
- #MACROS_TO_IGNORE.add("prohibited_processes")
14
+ # The following macros are included in commonly-installed apps.
15
+ # As such, we will ignore if they are missing from our app.
16
+ # Included in
17
+ MACROS_TO_IGNORE = set(["drop_dm_object_name"]) # Part of CIM/Splunk_SA_CIM
18
+ MACROS_TO_IGNORE.add("get_asset") # SA-IdentityManagement, part of Enterprise Security
19
+ MACROS_TO_IGNORE.add(
20
+ "get_risk_severity"
21
+ ) # SA-ThreatIntelligence, part of Enterprise Security
22
+ MACROS_TO_IGNORE.add("cim_corporate_web_domain_search") # Part of CIM/Splunk_SA_CIM
23
+ # MACROS_TO_IGNORE.add("prohibited_processes")
21
24
 
22
25
 
23
26
  class Macro(SecurityContentObject):
@@ -26,49 +29,62 @@ class Macro(SecurityContentObject):
26
29
  # TODO: Add id field to all macro ymls
27
30
  id: uuid.UUID = Field(default_factory=uuid.uuid4)
28
31
  date: datetime.date = Field(datetime.date.today())
29
- author: str = Field("NO AUTHOR DEFINED",max_length=255)
32
+ author: str = Field("NO AUTHOR DEFINED", max_length=255)
30
33
  version: NonNegativeInt = 1
31
-
32
-
33
34
 
34
35
  @model_serializer
35
36
  def serialize_model(self):
36
- #Call serializer for parent
37
+ # Call serializer for parent
37
38
  super_fields = super().serialize_model()
38
39
 
39
- #All fields custom to this model
40
- model= {
40
+ # All fields custom to this model
41
+ model = {
41
42
  "definition": self.definition,
42
43
  "description": self.description,
43
44
  }
44
-
45
- #return the model
45
+
46
+ # return the model
46
47
  model.update(super_fields)
47
-
48
+
48
49
  return model
49
-
50
- @staticmethod
51
50
 
52
- def get_macros(text_field:str, director:DirectorOutputDto , ignore_macros:set[str]=MACROS_TO_IGNORE)->list[Macro]:
53
- #Remove any comments, allowing there to be macros (which have a single backtick) inside those comments
54
- #If a comment ENDS in a macro, for example ```this is a comment with a macro `macro_here````
55
- #then there is a small edge case where the regex below does not work properly. If that is
56
- #the case, we edit the search slightly to insert a space
51
+ @staticmethod
52
+ def get_macros(
53
+ text_field: str,
54
+ director: DirectorOutputDto,
55
+ ignore_macros: set[str] = MACROS_TO_IGNORE,
56
+ ) -> list[Macro]:
57
+ # Remove any comments, allowing there to be macros (which have a single backtick) inside those comments
58
+ # If a comment ENDS in a macro, for example ```this is a comment with a macro `macro_here````
59
+ # then there is a small edge case where the regex below does not work properly. If that is
60
+ # the case, we edit the search slightly to insert a space
57
61
  if re.findall(r"\`\`\`\`", text_field):
58
- raise ValueError("Search contained four or more '`' characters in a row which is invalid SPL"
59
- "This may have occurred when a macro was commented out.\n"
60
- "Please ammend your search to remove the substring '````'")
62
+ raise ValueError(
63
+ "Search contained four or more '`' characters in a row which is invalid SPL"
64
+ "This may have occurred when a macro was commented out.\n"
65
+ "Please ammend your search to remove the substring '````'"
66
+ )
67
+
68
+ # Replace all the comments with a space. This prevents a comment from looking like a macro to the parser below
69
+ text_field = re.sub(r"\`\`\`[\s\S]*?\`\`\`", " ", text_field)
70
+
71
+ # Find all the macros, which start and end with a '`' character
72
+ macros_to_get = re.findall(r"`([^\s]+)`", text_field)
73
+ # If macros take arguments, stop at the first argument. We just want the name of the macro
74
+ macros_to_get = set(
75
+ [
76
+ macro[: macro.find("(")] if macro.find("(") != -1 else macro
77
+ for macro in macros_to_get
78
+ ]
79
+ )
61
80
 
62
- # replace all the macros with a space
63
- text_field = re.sub(r"\`\`\`[\s\S]*?\`\`\`", " ", text_field)
64
-
65
-
66
- macros_to_get = re.findall(r'`([^\s]+)`', text_field)
67
- #If macros take arguments, stop at the first argument. We just want the name of the macro
68
- macros_to_get = set([macro[:macro.find('(')] if macro.find('(') != -1 else macro for macro in macros_to_get])
69
-
70
- macros_to_ignore = set([macro for macro in macros_to_get if any(to_ignore in macro for to_ignore in ignore_macros)])
71
- #remove the ones that we will ignore
81
+ macros_to_ignore = set(
82
+ [
83
+ macro
84
+ for macro in macros_to_get
85
+ if any(to_ignore in macro for to_ignore in ignore_macros)
86
+ ]
87
+ )
88
+ # remove the ones that we will ignore
72
89
  macros_to_get -= macros_to_ignore
73
90
  return Macro.mapNamesToSecurityContentObjects(list(macros_to_get), director)
74
-