contentctl 4.2.5__py3-none-any.whl → 4.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,144 +0,0 @@
1
- import re
2
- from sigma.conversion.state import ConversionState
3
- from sigma.rule import SigmaRule
4
- from sigma.conversion.base import TextQueryBackend
5
- from sigma.conversion.deferred import DeferredTextQueryExpression
6
- from sigma.conditions import ConditionFieldEqualsValueExpression, ConditionOR, ConditionAND, ConditionNOT, ConditionItem
7
- from sigma.types import SigmaCompareExpression
8
- from sigma.exceptions import SigmaFeatureNotSupportedByBackendError
9
- from sigma.pipelines.splunk.splunk import splunk_sysmon_process_creation_cim_mapping, splunk_windows_registry_cim_mapping, splunk_windows_file_event_cim_mapping
10
-
11
- from contentctl.objects.ssa_detection import SSADetection
12
-
13
- from typing import ClassVar, Dict, List, Optional, Pattern, Tuple
14
-
15
-
16
- class SplunkBABackend(TextQueryBackend):
17
- """Splunk SPL backend."""
18
- precedence: ClassVar[Tuple[ConditionItem, ConditionItem, ConditionItem]] = (ConditionNOT, ConditionOR, ConditionAND)
19
- group_expression : ClassVar[str] = "({expr})"
20
- parenthesize : bool = True
21
-
22
- or_token : ClassVar[str] = "OR"
23
- and_token : ClassVar[str] = "AND"
24
- not_token : ClassVar[str] = "NOT"
25
- eq_token : ClassVar[str] = "="
26
-
27
- field_quote: ClassVar[str] = '"'
28
- field_quote_pattern: ClassVar[Pattern] = re.compile("^[\w.]+$")
29
-
30
- str_quote : ClassVar[str] = '"'
31
- escape_char : ClassVar[str] = "\\"
32
- wildcard_multi : ClassVar[str] = "%"
33
- wildcard_single : ClassVar[str] = "%"
34
- add_escaped : ClassVar[str] = "\\"
35
-
36
- re_expression : ClassVar[str] = "match({field}, /(?i){regex}/)=true"
37
- re_escape_char : ClassVar[str] = ""
38
- re_escape : ClassVar[Tuple[str]] = ('"',)
39
-
40
- cidr_expression : ClassVar[str] = "{value}"
41
-
42
- compare_op_expression : ClassVar[str] = "{field}{operator}{value}"
43
- compare_operators : ClassVar[Dict[SigmaCompareExpression.CompareOperators, str]] = {
44
- SigmaCompareExpression.CompareOperators.LT : "<",
45
- SigmaCompareExpression.CompareOperators.LTE : "<=",
46
- SigmaCompareExpression.CompareOperators.GT : ">",
47
- SigmaCompareExpression.CompareOperators.GTE : ">=",
48
- }
49
-
50
- field_null_expression : ClassVar[str] = "{field} IS NOT NULL"
51
-
52
- convert_or_as_in : ClassVar[bool] = True
53
- convert_and_as_in : ClassVar[bool] = False
54
- in_expressions_allow_wildcards : ClassVar[bool] = False
55
- field_in_list_expression : ClassVar[str] = "{field} {op} ({list})"
56
- or_in_operator : ClassVar[Optional[str]] = "IN"
57
- list_separator : ClassVar[str] = ", "
58
-
59
- unbound_value_str_expression : ClassVar[str] = '{value}'
60
- unbound_value_num_expression : ClassVar[str] = '{value}'
61
- unbound_value_re_expression : ClassVar[str] = '{value}'
62
-
63
- deferred_start : ClassVar[str] = " "
64
- deferred_separator : ClassVar[str] = " OR "
65
- deferred_only_query : ClassVar[str] = "*"
66
-
67
- wildcard_match_expression : ClassVar[Optional[str]] = "{field} LIKE {value}"
68
-
69
-
70
- def __init__(self, processing_pipeline: Optional["sigma.processing.pipeline.ProcessingPipeline"] = None, collect_errors: bool = False, min_time : str = "-30d", max_time : str = "now", detection : SSADetection = None, field_mapping: dict = None, **kwargs):
71
- super().__init__(processing_pipeline, collect_errors, **kwargs)
72
- self.min_time = min_time or "-30d"
73
- self.max_time = max_time or "now"
74
- self.detection = detection
75
- self.field_mapping = field_mapping
76
-
77
- def finalize_query_data_model(self, rule: SigmaRule, query: str, index: int, state: ConversionState) -> str:
78
-
79
- try:
80
- fields = state.processing_state["fields"]
81
- except KeyError:
82
- raise SigmaFeatureNotSupportedByBackendError("No fields specified by processing pipeline")
83
-
84
- # fields_input_parsing = ''
85
- # for count, value in enumerate(fields):
86
- # fields_input_parsing = fields_input_parsing + value + '=ucast(map_get(input_event, "' + value + '"), "string", null)'
87
- # if not count == len(fields) - 1:
88
- # fields_input_parsing = fields_input_parsing + ', '
89
-
90
- detection_str = """
91
- $main = from source
92
- | eval timestamp = time
93
- | eval metadata_uid = metadata.uid
94
- """.replace("\n", " ")
95
-
96
- parsed_fields = []
97
-
98
- for field in self.field_mapping["mapping"].keys():
99
- mapped_field = self.field_mapping["mapping"][field]
100
- parent = 'parent'
101
- i = 1
102
- values = mapped_field.split('.')
103
- for val in values:
104
- if parent == "parent":
105
- parent = val
106
- continue
107
- else:
108
- new_val = parent + '_' + val
109
- if new_val in parsed_fields:
110
- parent = new_val
111
- i = i + 1
112
- continue
113
-
114
-
115
- new_val_equals = new_val + "="
116
- new_val_IN = new_val + " IN"
117
- if new_val_equals in query or new_val_IN in query:
118
- parser_str = '| eval ' + new_val + ' = ' + 'lower(' + parent + '.' + val + ') '
119
- else:
120
- parser_str = '| eval ' + new_val + ' = ' + parent + '.' + val + ' '
121
- detection_str = detection_str + parser_str
122
- parsed_fields.append(new_val)
123
- parent = new_val
124
- i = i + 1
125
-
126
-
127
- ### Convert sigma values into lower case
128
- lower_query = ""
129
- in_quotes = False
130
- for char in query:
131
- if char == '"':
132
- in_quotes = not in_quotes
133
- if in_quotes:
134
- lower_query += char.lower()
135
- else:
136
- lower_query += char
137
-
138
- detection_str = detection_str + "| where " + lower_query
139
-
140
- detection_str = detection_str.replace("\\\\\\\\", "\\\\")
141
- return detection_str
142
-
143
- def finalize_output_data_model(self, queries: List[str]) -> List[str]:
144
- return queries
@@ -1,436 +0,0 @@
1
- import os
2
- import sys
3
- import copy
4
- import pathlib
5
-
6
- from dataclasses import dataclass
7
- from jinja2 import Environment, FileSystemLoader
8
-
9
- from sigma.processing.conditions import LogsourceCondition
10
- from sigma.processing.transformations import AddConditionTransformation, FieldMappingTransformation, DetectionItemFailureTransformation, RuleFailureTransformation, SetStateTransformation
11
- from sigma.processing.conditions import LogsourceCondition, IncludeFieldCondition, ExcludeFieldCondition, RuleProcessingItemAppliedCondition
12
- from sigma.collection import SigmaCollection
13
- from sigma.backends.splunk import SplunkBackend
14
- from sigma.processing.pipeline import ProcessingItem, ProcessingPipeline
15
-
16
- from contentctl.input.yml_reader import YmlReader
17
- from contentctl.objects.detection import Detection
18
- from contentctl.objects.data_source import DataSource
19
- from contentctl.objects.unit_test import UnitTest
20
- from contentctl.objects.enums import *
21
- from contentctl.helper.utils import Utils
22
- from contentctl.input.backend_splunk_ba import SplunkBABackend
23
-
24
-
25
- @dataclass(frozen=True)
26
- class SigmaConverterInputDto:
27
- data_model: SigmaConverterTarget
28
- detection_path: str
29
- detection_folder : str
30
- input_path: str
31
- log_source: str
32
-
33
-
34
- @dataclass(frozen=True)
35
- class SigmaConverterOutputDto:
36
- detections: list
37
-
38
-
39
- class SigmaConverter():
40
- output_dto : SigmaConverterOutputDto
41
-
42
- def __init__(self, output_dto: SigmaConverterOutputDto) -> None:
43
- self.output_dto = output_dto
44
-
45
-
46
- def execute(self, input_dto: SigmaConverterInputDto) -> None:
47
-
48
- detection_files = []
49
- errors = []
50
-
51
- if input_dto.detection_path:
52
- detection_files.append(input_dto.detection_path)
53
- elif input_dto.detection_folder:
54
- detection_files = Utils.get_all_yml_files_from_directory(input_dto.detection_folder)
55
- else:
56
- print("ERROR: --detection_path or --detection_folder needed.")
57
- sys.exit(1)
58
-
59
- for detection_file in detection_files:
60
- try:
61
- detection = self.read_detection(str(detection_file))
62
- print("Converting detection: " + detection.name)
63
- data_source = self.load_data_source(input_dto.input_path, detection.data_source[0])
64
- if not data_source:
65
- print("ERROR: Didn't find data source with name: " + detection.data_source[0] + " for detection " + detection.name)
66
- sys.exit(1)
67
-
68
- file_name = detection.name.replace(' ', '_').replace('-','_').replace('.','_').replace('/','_').lower()
69
-
70
-
71
- if input_dto.data_model == SigmaConverterTarget.RAW:
72
- if input_dto.log_source and input_dto.log_source != detection.data_source[0][0]:
73
- try:
74
- field_mapping = self.find_mapping(data_source.convert_to_log_source, 'data_source', input_dto.log_source)
75
- except Exception as e:
76
- print(e)
77
- print("ERROR: Couldn't find data source mapping for log source " + input_dto.log_source + " for detection: " + detection.name)
78
- sys.exit(1)
79
-
80
- detection = self.convert_detection_fields(detection, field_mapping)
81
-
82
- logsource_condition = self.get_logsource_condition(data_source)
83
- processing_item = self.get_field_transformation_processing_item(
84
- field_mapping['mapping'],
85
- logsource_condition
86
- )
87
- sigma_processing_pipeline = self.get_pipeline_from_processing_items([processing_item])
88
- splunk_backend = SplunkBackend(processing_pipeline=sigma_processing_pipeline)
89
- data_source = self.load_data_source(input_dto.input_path, input_dto.log_source)
90
-
91
- else:
92
- splunk_backend = SplunkBackend()
93
-
94
- sigma_rule = self.get_sigma_rule(detection, data_source)
95
- search = splunk_backend.convert(sigma_rule)[0]
96
- search = self.add_source_macro(search, data_source.type)
97
- search = self.add_stats_count(search, data_source.raw_fields)
98
- search = self.add_timeformat_conversion(search)
99
- search = self.add_filter_macro(search, file_name)
100
-
101
- detection.file_path = file_name + '.yml'
102
-
103
- elif input_dto.data_model == SigmaConverterTarget.CIM:
104
- logsource_condition = self.get_logsource_condition(data_source)
105
- try:
106
- field_mapping = self.find_mapping(data_source.field_mappings, 'data_model', 'cim')
107
- except Exception as e:
108
- print(e)
109
- print("ERROR: Couldn't find data source mapping to cim for log source " + detection.data_source[0] + " and detection " + detection.name)
110
- sys.exit(1)
111
-
112
- detection = self.convert_detection_fields(detection, field_mapping)
113
- sigma_rule = self.get_sigma_rule(detection, data_source)
114
-
115
- sigma_transformation_processing_item = self.get_field_transformation_processing_item(
116
- field_mapping['mapping'],
117
- logsource_condition
118
- )
119
-
120
- sigma_state_fields_processing_item = self.get_state_fields_processing_item(
121
- field_mapping['mapping'].values(),
122
- logsource_condition
123
- )
124
- sigma_state_data_model_processing_item = self.get_state_data_model_processing_item(
125
- field_mapping['data_set'],
126
- logsource_condition
127
- )
128
- sigma_processing_pipeline = self.get_pipeline_from_processing_items([
129
- sigma_transformation_processing_item,
130
- sigma_state_fields_processing_item,
131
- sigma_state_data_model_processing_item
132
- ])
133
- splunk_backend = SplunkBackend(processing_pipeline=sigma_processing_pipeline)
134
- search = splunk_backend.convert(sigma_rule, "data_model")[0]
135
- search = self.add_filter_macro(search, file_name)
136
-
137
- detection.file_path = file_name + '.yml'
138
-
139
- elif input_dto.data_model == SigmaConverterTarget.OCSF:
140
-
141
- processing_items = list()
142
- logsource_condition = self.get_logsource_condition(data_source)
143
- if input_dto.log_source and input_dto.log_source != detection.data_source[0]:
144
- data_source_new = self.load_data_source(input_dto.input_path, input_dto.log_source)
145
-
146
- try:
147
- field_mapping = self.get_mapping_converted_data_source(
148
- data_source,
149
- "data_source",
150
- input_dto.log_source,
151
- data_source_new,
152
- "data_model",
153
- "ocsf"
154
- )
155
- except Exception as e:
156
- print(e)
157
- print("ERROR: Couldn't find data source mapping for log source " + input_dto.log_source + " and detection " + detection.name)
158
- sys.exit(1)
159
-
160
- cim_to_ocsf_mapping = self.get_cim_to_ocsf_mapping(data_source_new)
161
-
162
- # elif input_dto.cim_to_ocsf:
163
- # field_mapping = self.get_cim_to_ocsf_mapping(data_source)
164
- # cim_to_ocsf_mapping = field_mapping
165
-
166
- else:
167
- field_mapping = self.find_mapping(data_source.field_mappings, 'data_model', 'ocsf')
168
- cim_to_ocsf_mapping = self.get_cim_to_ocsf_mapping(data_source)
169
-
170
- field_mapping_underline = copy.deepcopy(field_mapping)
171
- for field in field_mapping_underline["mapping"].keys():
172
- field_mapping_underline["mapping"][field] = field_mapping_underline["mapping"][field].replace(".", "_")
173
-
174
- self.add_required_fields(cim_to_ocsf_mapping, detection)
175
- self.add_mappings(cim_to_ocsf_mapping, detection)
176
-
177
- self.update_observables(detection)
178
-
179
- processing_items.append(
180
- self.get_field_transformation_processing_item(
181
- field_mapping_underline['mapping'],
182
- logsource_condition
183
- )
184
- )
185
- processing_items.append(
186
- self.get_state_fields_processing_item(
187
- field_mapping_underline['mapping'].values(),
188
- logsource_condition
189
- )
190
- )
191
-
192
- detection = self.convert_detection_fields(detection)
193
- sigma_rule = self.get_sigma_rule(detection, data_source)
194
- sigma_processing_pipeline = self.get_pipeline_from_processing_items(processing_items)
195
-
196
- splunk_backend = SplunkBABackend(processing_pipeline=sigma_processing_pipeline, detection=detection, field_mapping=field_mapping)
197
-
198
- search = splunk_backend.convert(sigma_rule, "data_model")[0]
199
-
200
- search = search + ' --finding_report--'
201
- detection.file_path = 'ssa___' + file_name + '.yml'
202
-
203
- detection.search = search
204
-
205
- self.output_dto.detections.append(detection)
206
-
207
- except Exception as e:
208
- errors.append(f"ERROR: Converting detection file '{detection_file}': {str(e)}")
209
-
210
- if len(errors) > 0:
211
- errors_string = '\n\t'.join(errors)
212
- raise Exception(f"The following errors were encountered during conversion:\n\t{errors_string}")
213
-
214
- def read_detection(self, detection_path : str) -> Detection:
215
- yml_dict = YmlReader.load_file(detection_path)
216
-
217
- #SSA Detections are ALLOWED to have names longer than 67 characters,
218
- #unlike Splunk App Detections. Because we still want to use the
219
- #Detection Object (and its validations), we will arbitrarily
220
- #truncate the name of a detection if it is too long so that
221
- #it passes validation, then updated the name after the object
222
- #is constructed. Because we do not have Pydantic configured
223
- #to validate each new field assignment, this will not throw
224
- #an error
225
- name = yml_dict.get("name","")
226
- yml_dict["name"] = name[:67]
227
- detection = Detection.parse_obj(yml_dict)
228
- # Remove any Integration Tests. IntegrationTests are only relevant
229
- # for ESCU Content and NOT for BA Content. Instead of filtering OUT
230
- # IntegrationTest, we will ONLY include UnitTest. This supports the introduction
231
- # of additional ESCU Test Types in the future.
232
- detection.tests = list(filter(lambda t: isinstance(t, UnitTest), detection.tests))
233
-
234
- detection.name = name
235
-
236
-
237
- return detection
238
-
239
-
240
- def load_data_source(self, input_path: str, data_source_name: str) -> DataSource:
241
- data_sources = list()
242
- files = Utils.get_all_yml_files_from_directory(os.path.join(input_path, 'data_sources'))
243
- for file in files:
244
- data_sources.append(DataSource.parse_obj(YmlReader.load_file(str(file))))
245
-
246
- data_source = None
247
-
248
- for obj in data_sources:
249
- if obj.name == data_source_name:
250
- return obj
251
-
252
- return None
253
-
254
-
255
- def get_sigma_rule(self, detection: Detection, data_source: DataSource) -> SigmaCollection:
256
- return SigmaCollection.from_dicts([{
257
- "title": detection.name,
258
- "status": "experimental",
259
- "logsource": {
260
- "category": data_source.category,
261
- "product": data_source.product
262
- },
263
- "detection": detection.search
264
- }])
265
-
266
-
267
- # def convert_detection_fields(self, detection: Detection, mappings: dict) -> Detection:
268
- # for selection in detection.search.keys():
269
- # if selection != "condition":
270
- # new_selection = copy.deepcopy(detection.search[selection])
271
- # for field in detection.search[selection].keys():
272
- # for mapping in mappings["mapping"].keys():
273
- # if mapping == field:
274
- # new_selection[mappings["mapping"][mapping]] = detection.search[selection][field]
275
- # new_selection.pop(field)
276
- # detection.search[selection] = new_selection
277
-
278
- # return detection
279
-
280
- def convert_detection_fields(self, detection: Detection) -> Detection:
281
- for selection in detection.search.keys():
282
- if selection != "condition":
283
- new_selection = copy.deepcopy(detection.search[selection])
284
- for field in detection.search[selection].keys():
285
- new_field_name = field.replace(".", "_")
286
- new_selection[new_field_name] = detection.search[selection][field]
287
- new_selection.pop(field)
288
- detection.search[selection] = new_selection
289
-
290
- return detection
291
-
292
-
293
- def get_logsource_condition(self, data_source: DataSource) -> LogsourceCondition:
294
- return LogsourceCondition(
295
- category=data_source.category,
296
- product=data_source.product,
297
- )
298
-
299
-
300
- def get_field_transformation_processing_item(self, data_source_mapping: dict, logsource_condition: LogsourceCondition) -> ProcessingItem:
301
- return ProcessingItem(
302
- identifier="field_mapping_transformation",
303
- transformation=FieldMappingTransformation(data_source_mapping),
304
- rule_conditions=[
305
- logsource_condition
306
- ]
307
- )
308
-
309
-
310
- def get_state_fields_processing_item(self, fields: list, logsource_condition: LogsourceCondition) -> ProcessingItem:
311
- return ProcessingItem(
312
- identifier="fields",
313
- transformation=SetStateTransformation("fields", fields),
314
- rule_conditions=[
315
- logsource_condition
316
- ]
317
- )
318
-
319
-
320
- def get_state_data_model_processing_item(self, data_model: str, logsource_condition: LogsourceCondition) -> ProcessingItem:
321
- return ProcessingItem(
322
- identifier="data_model",
323
- transformation=SetStateTransformation("data_model_set", data_model),
324
- rule_conditions=[
325
- logsource_condition
326
- ]
327
- )
328
-
329
-
330
- def get_pipeline_from_processing_items(self, processing_items: list) -> ProcessingPipeline:
331
- return ProcessingPipeline(
332
- name="Splunk Sigma",
333
- priority=10,
334
- items=processing_items
335
- )
336
-
337
- def add_source_macro(self, search: str, data_source_type: str) -> str:
338
- return "`" + data_source_type + "` " + search
339
-
340
- def add_stats_count(self, search: str, fields: list) -> str:
341
- search = search + " | fillnull | stats count min(_time) as firstTime max(_time) as lastTime by "
342
- for key in fields:
343
- search = search + key + " "
344
- return search
345
-
346
- def add_timeformat_conversion(self, search: str) -> str:
347
- return search + '| convert timeformat="%Y-%m-%dT%H:%M:%S" ctime(firstTime) | convert timeformat="%Y-%m-%dT%H:%M:%S" ctime(lastTime) '
348
-
349
- def add_filter_macro(self, search: str, file_name: str) -> str:
350
- return search + '| `' + file_name + '_filter`'
351
-
352
- def find(self, name: str, path: str) -> str:
353
- for root, dirs, files in os.walk(path):
354
- if name in files:
355
- return os.path.join(root, name)
356
- return None
357
-
358
- def find_mapping(self, field_mappings: list, object: str, data_model: str) -> dict:
359
- for mapping in field_mappings:
360
- if mapping[object] == data_model:
361
- return mapping
362
-
363
- raise AttributeError("ERROR: Couldn't find mapping.")
364
-
365
-
366
- def add_required_fields(self, field_mapping: dict, detection: Detection) -> None:
367
- required_fields = list()
368
- # required_fields = ["process.user.name", "device.hostname"]
369
- for mapping in field_mapping["mapping"].keys():
370
- required_fields.append(field_mapping["mapping"][mapping])
371
-
372
- detection.tags.required_fields = required_fields
373
-
374
-
375
- def add_mappings(self, field_mapping: dict, detection: Detection) -> None:
376
- mappings = list()
377
- for mapping in field_mapping["mapping"].keys():
378
- mappings.append({
379
- "ocsf": field_mapping["mapping"][mapping],
380
- "cim": mapping
381
- })
382
- detection.tags.mappings = mappings
383
-
384
- def update_observables(self, detection : Detection) -> None:
385
- mapping_field_to_type = {
386
- "process.user.name": "User Name",
387
- "actor.user.name": "User Name",
388
- "device.hostname": "Hostname",
389
- "process.file.name": "File Name",
390
- "actor.process.file.name": "File Name",
391
- "actor.process.file.path": "File Name",
392
- "actor.process.cmd_line": "Process",
393
- "actor.user.uid": "Other",
394
- "process.cmd_line": "Other",
395
- "process.file.path": "File",
396
- "process.file.name": "File",
397
- "process.uid": "Other",
398
- "process.pid": "Other",
399
- "actor.process.pid": "Other"
400
- }
401
-
402
- observables = list()
403
-
404
- for field in detection.tags.required_fields:
405
- observables.append({
406
- "name": field,
407
- "type": mapping_field_to_type[field]
408
- })
409
-
410
- detection.tags.observable = observables
411
-
412
-
413
- def get_cim_to_ocsf_mapping(self, data_source : DataSource) -> dict:
414
- cim_to_ocsf_mapping = dict()
415
- cim_to_ocsf_mapping["mapping"] = dict()
416
- cim_mapping = self.find_mapping(data_source.field_mappings, "data_model", "cim")
417
- ocsf_mapping = self.find_mapping(data_source.field_mappings, "data_model", "ocsf")
418
-
419
- for key in cim_mapping["mapping"].keys():
420
- cim_field = cim_mapping["mapping"][key].split(".")[1]
421
- cim_to_ocsf_mapping["mapping"][cim_field] = ocsf_mapping["mapping"][key]
422
-
423
- return cim_to_ocsf_mapping
424
-
425
-
426
- def get_mapping_converted_data_source(self, det_ds: DataSource, det_ds_obj: str, det_ds_dm: str, con_ds: DataSource, con_ds_obj: str, con_ds_dm: str) -> dict:
427
- mapping = dict()
428
- mapping["mapping"] = dict()
429
- det_ds_mapping = self.find_mapping(det_ds.convert_to_log_source, det_ds_obj, det_ds_dm)
430
- con_ds_mapping = self.find_mapping(con_ds.field_mappings, con_ds_obj, con_ds_dm)
431
-
432
- for key in det_ds_mapping["mapping"].keys():
433
- mapped_field = con_ds_mapping["mapping"][det_ds_mapping["mapping"][key]]
434
- mapping["mapping"][key] = mapped_field
435
-
436
- return mapping