contentctl 4.4.7__py3-none-any.whl → 5.0.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. contentctl/actions/build.py +39 -27
  2. contentctl/actions/detection_testing/DetectionTestingManager.py +0 -1
  3. contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py +32 -26
  4. contentctl/actions/detection_testing/progress_bar.py +6 -6
  5. contentctl/actions/detection_testing/views/DetectionTestingView.py +4 -4
  6. contentctl/actions/new_content.py +98 -81
  7. contentctl/actions/test.py +4 -5
  8. contentctl/actions/validate.py +2 -1
  9. contentctl/contentctl.py +114 -79
  10. contentctl/helper/utils.py +0 -14
  11. contentctl/input/director.py +5 -5
  12. contentctl/input/new_content_questions.py +2 -2
  13. contentctl/input/yml_reader.py +11 -6
  14. contentctl/objects/abstract_security_content_objects/detection_abstract.py +228 -120
  15. contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py +5 -7
  16. contentctl/objects/alert_action.py +2 -1
  17. contentctl/objects/atomic.py +1 -0
  18. contentctl/objects/base_test.py +4 -3
  19. contentctl/objects/base_test_result.py +3 -3
  20. contentctl/objects/baseline.py +26 -6
  21. contentctl/objects/baseline_tags.py +2 -3
  22. contentctl/objects/config.py +26 -45
  23. contentctl/objects/constants.py +4 -1
  24. contentctl/objects/correlation_search.py +89 -95
  25. contentctl/objects/data_source.py +5 -6
  26. contentctl/objects/deployment.py +2 -10
  27. contentctl/objects/deployment_email.py +2 -1
  28. contentctl/objects/deployment_notable.py +2 -1
  29. contentctl/objects/deployment_phantom.py +2 -1
  30. contentctl/objects/deployment_rba.py +2 -1
  31. contentctl/objects/deployment_scheduling.py +2 -1
  32. contentctl/objects/deployment_slack.py +2 -1
  33. contentctl/objects/detection_tags.py +7 -42
  34. contentctl/objects/drilldown.py +1 -0
  35. contentctl/objects/enums.py +21 -58
  36. contentctl/objects/investigation.py +6 -5
  37. contentctl/objects/investigation_tags.py +2 -3
  38. contentctl/objects/lookup.py +145 -63
  39. contentctl/objects/macro.py +2 -3
  40. contentctl/objects/mitre_attack_enrichment.py +2 -2
  41. contentctl/objects/observable.py +3 -1
  42. contentctl/objects/playbook_tags.py +5 -1
  43. contentctl/objects/rba.py +90 -0
  44. contentctl/objects/risk_event.py +87 -144
  45. contentctl/objects/story_tags.py +1 -2
  46. contentctl/objects/test_attack_data.py +2 -1
  47. contentctl/objects/unit_test_baseline.py +2 -1
  48. contentctl/output/api_json_output.py +233 -220
  49. contentctl/output/conf_output.py +51 -44
  50. contentctl/output/conf_writer.py +201 -125
  51. contentctl/output/data_source_writer.py +0 -1
  52. contentctl/output/json_writer.py +2 -4
  53. contentctl/output/svg_output.py +1 -1
  54. contentctl/output/templates/analyticstories_detections.j2 +1 -1
  55. contentctl/output/templates/collections.j2 +1 -1
  56. contentctl/output/templates/doc_detections.j2 +0 -5
  57. contentctl/output/templates/savedsearches_detections.j2 +8 -3
  58. contentctl/output/templates/transforms.j2 +4 -4
  59. contentctl/output/yml_writer.py +15 -0
  60. contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml +16 -34
  61. {contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/METADATA +5 -4
  62. {contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/RECORD +65 -68
  63. {contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/WHEEL +1 -1
  64. contentctl/objects/event_source.py +0 -11
  65. contentctl/output/detection_writer.py +0 -28
  66. contentctl/output/new_content_yml_output.py +0 -56
  67. contentctl/output/yml_output.py +0 -66
  68. {contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/LICENSE.md +0 -0
  69. {contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/entry_points.txt +0 -0
@@ -4,8 +4,6 @@ from typing import TYPE_CHECKING, List, Optional, Union
4
4
  from pydantic import (
5
5
  BaseModel,
6
6
  Field,
7
- NonNegativeInt,
8
- PositiveInt,
9
7
  computed_field,
10
8
  UUID4,
11
9
  HttpUrl,
@@ -27,7 +25,6 @@ from contentctl.objects.enums import (
27
25
  Cis18Value,
28
26
  AssetType,
29
27
  SecurityDomain,
30
- RiskSeverity,
31
28
  KillChainPhase,
32
29
  NistCategory,
33
30
  SecurityContentProductName
@@ -35,57 +32,29 @@ from contentctl.objects.enums import (
35
32
  from contentctl.objects.atomic import AtomicEnrichment, AtomicTest
36
33
  from contentctl.objects.annotated_types import MITRE_ATTACK_ID_TYPE, CVE_TYPE
37
34
 
38
- # TODO (#266): disable the use_enum_values configuration
35
+
39
36
  class DetectionTags(BaseModel):
40
37
  # detection spec
41
- model_config = ConfigDict(use_enum_values=True, validate_default=False)
38
+
39
+ model_config = ConfigDict(validate_default=False, extra='forbid')
42
40
  analytic_story: list[Story] = Field(...)
43
41
  asset_type: AssetType = Field(...)
44
-
45
- confidence: NonNegativeInt = Field(..., le=100)
46
- impact: NonNegativeInt = Field(..., le=100)
47
-
48
- @computed_field
49
- @property
50
- def risk_score(self) -> int:
51
- return round((self.confidence * self.impact)/100)
52
-
53
- @computed_field
54
- @property
55
- def severity(self)->RiskSeverity:
56
- if 0 <= self.risk_score <= 20:
57
- return RiskSeverity.INFORMATIONAL
58
- elif 20 < self.risk_score <= 40:
59
- return RiskSeverity.LOW
60
- elif 40 < self.risk_score <= 60:
61
- return RiskSeverity.MEDIUM
62
- elif 60 < self.risk_score <= 80:
63
- return RiskSeverity.HIGH
64
- elif 80 < self.risk_score <= 100:
65
- return RiskSeverity.CRITICAL
66
- else:
67
- raise Exception(f"Error getting severity - risk_score must be between 0-100, but was actually {self.risk_score}")
68
-
42
+ group: list[str] = []
69
43
 
70
44
  mitre_attack_id: List[MITRE_ATTACK_ID_TYPE] = []
71
45
  nist: list[NistCategory] = []
72
46
 
47
+ # TODO (cmcginley): observable should be removed as well, yes?
73
48
  # TODO (#249): Add pydantic validator to ensure observables are unique within a detection
74
49
  observable: List[Observable] = []
75
- message: str = Field(...)
76
50
  product: list[SecurityContentProductName] = Field(..., min_length=1)
77
- required_fields: list[str] = Field(min_length=1)
78
51
  throttling: Optional[Throttling] = None
79
52
  security_domain: SecurityDomain = Field(...)
80
53
  cve: List[CVE_TYPE] = []
81
54
  atomic_guid: List[AtomicTest] = []
82
-
83
55
 
84
56
  # enrichment
85
57
  mitre_attack_enrichments: List[MitreAttackEnrichment] = Field([], validate_default=True)
86
- confidence_id: Optional[PositiveInt] = Field(None, ge=1, le=3)
87
- impact_id: Optional[PositiveInt] = Field(None, ge=1, le=5)
88
- evidence_str: Optional[str] = None
89
58
 
90
59
  @computed_field
91
60
  @property
@@ -114,7 +83,7 @@ class DetectionTags(BaseModel):
114
83
 
115
84
  # TODO (#268): Validate manual_test has length > 0 if not None
116
85
  manual_test: Optional[str] = None
117
-
86
+
118
87
  # The following validator is temporarily disabled pending further discussions
119
88
  # @validator('message')
120
89
  # def validate_message(cls,v,values):
@@ -152,15 +121,11 @@ class DetectionTags(BaseModel):
152
121
  # Since this field has no parent, there is no need to call super() serialization function
153
122
  return {
154
123
  "analytic_story": [story.name for story in self.analytic_story],
155
- "asset_type": self.asset_type.value,
124
+ "asset_type": self.asset_type,
156
125
  "cis20": self.cis20,
157
126
  "kill_chain_phases": self.kill_chain_phases,
158
127
  "nist": self.nist,
159
- "observable": self.observable,
160
- "message": self.message,
161
- "risk_score": self.risk_score,
162
128
  "security_domain": self.security_domain,
163
- "risk_severity": self.severity,
164
129
  "mitre_attack_id": self.mitre_attack_id,
165
130
  "mitre_attack_enrichments": self.mitre_attack_enrichments
166
131
  }
@@ -23,6 +23,7 @@ class Drilldown(BaseModel):
23
23
  "but it is NOT the default value and must be supplied explicitly.",
24
24
  min_length= 1)
25
25
 
26
+ # TODO (cmcginley): @ljstella the drilldowns will need to be updated
26
27
  @classmethod
27
28
  def constructDrilldownsFromDetection(cls, detection: Detection) -> list[Drilldown]:
28
29
  victim_observables = [o for o in detection.tags.observable if o.role[0] == "Victim"]
@@ -1,15 +1,15 @@
1
1
  from __future__ import annotations
2
2
  from typing import List
3
- import enum
3
+ from enum import StrEnum, IntEnum
4
4
 
5
5
 
6
- class AnalyticsType(str, enum.Enum):
6
+ class AnalyticsType(StrEnum):
7
7
  TTP = "TTP"
8
8
  Anomaly = "Anomaly"
9
9
  Hunting = "Hunting"
10
10
  Correlation = "Correlation"
11
11
 
12
- class DeploymentType(str, enum.Enum):
12
+ class DeploymentType(StrEnum):
13
13
  TTP = "TTP"
14
14
  Anomaly = "Anomaly"
15
15
  Hunting = "Hunting"
@@ -18,7 +18,7 @@ class DeploymentType(str, enum.Enum):
18
18
  Embedded = "Embedded"
19
19
 
20
20
 
21
- class DataModel(str,enum.Enum):
21
+ class DataModel(StrEnum):
22
22
  ENDPOINT = "Endpoint"
23
23
  NETWORK_TRAFFIC = "Network_Traffic"
24
24
  AUTHENTICATION = "Authentication"
@@ -40,11 +40,11 @@ class DataModel(str,enum.Enum):
40
40
  SPLUNK_AUDIT = "Splunk_Audit"
41
41
 
42
42
 
43
- class PlaybookType(str, enum.Enum):
43
+ class PlaybookType(StrEnum):
44
44
  INVESTIGATION = "Investigation"
45
45
  RESPONSE = "Response"
46
46
 
47
- class SecurityContentType(enum.Enum):
47
+ class SecurityContentType(IntEnum):
48
48
  detections = 1
49
49
  baselines = 2
50
50
  stories = 3
@@ -68,20 +68,15 @@ class SecurityContentType(enum.Enum):
68
68
  # json_objects = "json_objects"
69
69
 
70
70
 
71
- class SecurityContentProduct(enum.Enum):
72
- SPLUNK_APP = 1
73
- API = 3
74
- CUSTOM = 4
75
71
 
76
-
77
- class SecurityContentProductName(str, enum.Enum):
72
+ class SecurityContentProductName(StrEnum):
78
73
  SPLUNK_ENTERPRISE = "Splunk Enterprise"
79
74
  SPLUNK_ENTERPRISE_SECURITY = "Splunk Enterprise Security"
80
75
  SPLUNK_CLOUD = "Splunk Cloud"
81
76
  SPLUNK_SECURITY_ANALYTICS_FOR_AWS = "Splunk Security Analytics for AWS"
82
77
  SPLUNK_BEHAVIORAL_ANALYTICS = "Splunk Behavioral Analytics"
83
78
 
84
- class SecurityContentInvestigationProductName(str, enum.Enum):
79
+ class SecurityContentInvestigationProductName(StrEnum):
85
80
  SPLUNK_ENTERPRISE = "Splunk Enterprise"
86
81
  SPLUNK_ENTERPRISE_SECURITY = "Splunk Enterprise Security"
87
82
  SPLUNK_CLOUD = "Splunk Cloud"
@@ -90,33 +85,20 @@ class SecurityContentInvestigationProductName(str, enum.Enum):
90
85
  SPLUNK_PHANTOM = "Splunk Phantom"
91
86
 
92
87
 
93
- class DetectionStatus(enum.Enum):
94
- production = "production"
95
- deprecated = "deprecated"
96
- experimental = "experimental"
97
- validation = "validation"
98
-
99
-
100
- class DetectionStatusSSA(enum.Enum):
88
+ class DetectionStatus(StrEnum):
101
89
  production = "production"
102
90
  deprecated = "deprecated"
103
91
  experimental = "experimental"
104
92
  validation = "validation"
105
93
 
106
94
 
107
- class LogLevel(enum.Enum):
95
+ class LogLevel(StrEnum):
108
96
  NONE = "NONE"
109
97
  ERROR = "ERROR"
110
98
  INFO = "INFO"
111
99
 
112
100
 
113
- class AlertActions(enum.Enum):
114
- notable = "notable"
115
- rba = "rba"
116
- email = "email"
117
-
118
-
119
- class StoryCategory(str, enum.Enum):
101
+ class StoryCategory(StrEnum):
120
102
  ABUSE = "Abuse"
121
103
  ADVERSARY_TACTICS = "Adversary Tactics"
122
104
  BEST_PRACTICES = "Best Practices"
@@ -139,37 +121,18 @@ class StoryCategory(str, enum.Enum):
139
121
  UNAUTHORIZED_SOFTWARE = "Unauthorized Software"
140
122
 
141
123
 
142
- class PostTestBehavior(str, enum.Enum):
124
+ class PostTestBehavior(StrEnum):
143
125
  always_pause = "always_pause"
144
126
  pause_on_failure = "pause_on_failure"
145
127
  never_pause = "never_pause"
146
128
 
147
129
 
148
- class DetectionTestingMode(str, enum.Enum):
130
+ class DetectionTestingMode(StrEnum):
149
131
  selected = "selected"
150
132
  all = "all"
151
133
  changes = "changes"
152
134
 
153
135
 
154
- class DetectionTestingTargetInfrastructure(str, enum.Enum):
155
- container = "container"
156
- server = "server"
157
-
158
-
159
- class InstanceState(str, enum.Enum):
160
- starting = "starting"
161
- running = "running"
162
- error = "error"
163
- stopping = "stopping"
164
- stopped = "stopped"
165
-
166
-
167
- class SigmaConverterTarget(enum.Enum):
168
- CIM = 1
169
- RAW = 2
170
- OCSF = 3
171
- ALL = 4
172
-
173
136
  # It's unclear why we use a mix of constants and enums. The following list was taken from:
174
137
  # contentctl/contentctl/helper/constants.py.
175
138
  # We convect it to an enum here
@@ -183,7 +146,7 @@ class SigmaConverterTarget(enum.Enum):
183
146
  # "Command And Control": 6,
184
147
  # "Actions on Objectives": 7
185
148
  # }
186
- class KillChainPhase(str, enum.Enum):
149
+ class KillChainPhase(StrEnum):
187
150
  UNKNOWN ="Unknown"
188
151
  RECONNAISSANCE = "Reconnaissance"
189
152
  WEAPONIZATION = "Weaponization"
@@ -194,7 +157,7 @@ class KillChainPhase(str, enum.Enum):
194
157
  ACTIONS_ON_OBJECTIVES = "Actions on Objectives"
195
158
 
196
159
 
197
- class DataSource(str,enum.Enum):
160
+ class DataSource(StrEnum):
198
161
  OSQUERY_ES_PROCESS_EVENTS = "OSQuery ES Process Events"
199
162
  POWERSHELL_4104 = "Powershell 4104"
200
163
  SYSMON_EVENT_ID_1 = "Sysmon EventID 1"
@@ -234,7 +197,7 @@ class DataSource(str,enum.Enum):
234
197
  WINDOWS_SECURITY_5145 = "Windows Security 5145"
235
198
  WINDOWS_SYSTEM_7045 = "Windows System 7045"
236
199
 
237
- class ProvidingTechnology(str, enum.Enum):
200
+ class ProvidingTechnology(StrEnum):
238
201
  AMAZON_SECURITY_LAKE = "Amazon Security Lake"
239
202
  AMAZON_WEB_SERVICES_CLOUDTRAIL = "Amazon Web Services - Cloudtrail"
240
203
  AZURE_AD = "Azure AD"
@@ -302,7 +265,7 @@ class ProvidingTechnology(str, enum.Enum):
302
265
  return sorted(list(matched_technologies))
303
266
 
304
267
 
305
- class Cis18Value(str,enum.Enum):
268
+ class Cis18Value(StrEnum):
306
269
  CIS_0 = "CIS 0"
307
270
  CIS_1 = "CIS 1"
308
271
  CIS_2 = "CIS 2"
@@ -323,7 +286,7 @@ class Cis18Value(str,enum.Enum):
323
286
  CIS_17 = "CIS 17"
324
287
  CIS_18 = "CIS 18"
325
288
 
326
- class SecurityDomain(str, enum.Enum):
289
+ class SecurityDomain(StrEnum):
327
290
  ENDPOINT = "endpoint"
328
291
  NETWORK = "network"
329
292
  THREAT = "threat"
@@ -331,7 +294,7 @@ class SecurityDomain(str, enum.Enum):
331
294
  ACCESS = "access"
332
295
  AUDIT = "audit"
333
296
 
334
- class AssetType(str, enum.Enum):
297
+ class AssetType(StrEnum):
335
298
  AWS_ACCOUNT = "AWS Account"
336
299
  AWS_EKS_KUBERNETES_CLUSTER = "AWS EKS Kubernetes cluster"
337
300
  AWS_FEDERATED_ACCOUNT = "AWS Federated Account"
@@ -382,7 +345,7 @@ class AssetType(str, enum.Enum):
382
345
  WEB_APPLICATION = "Web Application"
383
346
  WINDOWS = "Windows"
384
347
 
385
- class NistCategory(str, enum.Enum):
348
+ class NistCategory(StrEnum):
386
349
  ID_AM = "ID.AM"
387
350
  ID_BE = "ID.BE"
388
351
  ID_GV = "ID.GV"
@@ -406,7 +369,7 @@ class NistCategory(str, enum.Enum):
406
369
  RC_IM = "RC.IM"
407
370
  RC_CO = "RC.CO"
408
371
 
409
- class RiskSeverity(str,enum.Enum):
372
+ class RiskSeverity(StrEnum):
410
373
  # Levels taken from the following documentation link
411
374
  # https://docs.splunk.com/Documentation/ES/7.3.2/User/RiskScoring
412
375
  # 20 - info (0-20 for us)
@@ -12,17 +12,13 @@ from contentctl.objects.constants import (
12
12
  )
13
13
  from contentctl.objects.config import CustomApp
14
14
 
15
- # TODO (#266): disable the use_enum_values configuration
16
15
  class Investigation(SecurityContentObject):
17
- model_config = ConfigDict(use_enum_values=True,validate_default=False)
16
+ model_config = ConfigDict(validate_default=False)
18
17
  type: str = Field(...,pattern="^Investigation$")
19
- datamodel: list[DataModel] = Field(...)
20
18
  name:str = Field(...,max_length=CONTENTCTL_MAX_SEARCH_NAME_LENGTH)
21
19
  search: str = Field(...)
22
20
  how_to_implement: str = Field(...)
23
21
  known_false_positives: str = Field(...)
24
-
25
-
26
22
  tags: InvestigationTags
27
23
 
28
24
  # enrichment
@@ -38,6 +34,11 @@ class Investigation(SecurityContentObject):
38
34
 
39
35
  return inputs
40
36
 
37
+ @computed_field
38
+ @property
39
+ def datamodel(self) -> List[DataModel]:
40
+ return [dm for dm in DataModel if dm in self.search]
41
+
41
42
  @computed_field
42
43
  @property
43
44
  def lowercase_name(self)->str:
@@ -1,13 +1,13 @@
1
1
  from __future__ import annotations
2
2
  from typing import List
3
- from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer
3
+ from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer,ConfigDict
4
4
  from contentctl.objects.story import Story
5
5
  from contentctl.objects.enums import SecurityContentInvestigationProductName, SecurityDomain
6
6
 
7
7
  class InvestigationTags(BaseModel):
8
+ model_config = ConfigDict(extra="forbid")
8
9
  analytic_story: List[Story] = Field([],min_length=1)
9
10
  product: List[SecurityContentInvestigationProductName] = Field(...,min_length=1)
10
- required_fields: List[str] = Field(min_length=1)
11
11
  security_domain: SecurityDomain = Field(...)
12
12
 
13
13
 
@@ -23,7 +23,6 @@ class InvestigationTags(BaseModel):
23
23
  model= {
24
24
  "analytic_story": [story.name for story in self.analytic_story],
25
25
  "product": self.product,
26
- "required_fields": self.required_fields,
27
26
  "security_domain": self.security_domain,
28
27
  }
29
28
 
@@ -1,10 +1,13 @@
1
1
  from __future__ import annotations
2
- from pydantic import field_validator, ValidationInfo, model_validator, FilePath, model_serializer, Field, NonNegativeInt
3
- from typing import TYPE_CHECKING, Optional, Any, Union
2
+
3
+ from pydantic import field_validator, ValidationInfo, model_validator, FilePath, model_serializer, Field, NonNegativeInt, computed_field, TypeAdapter
4
+ from enum import StrEnum, auto
5
+ from typing import TYPE_CHECKING, Optional, Any, Union, Literal, Annotated, Self
4
6
  import re
5
7
  import csv
6
- import uuid
7
- import datetime
8
+ import abc
9
+ from functools import cached_property
10
+ import pathlib
8
11
  if TYPE_CHECKING:
9
12
  from contentctl.input.director import DirectorOutputDto
10
13
  from contentctl.objects.config import validate
@@ -15,32 +18,41 @@ LOOKUPS_TO_IGNORE = set(["outputlookup"])
15
18
  LOOKUPS_TO_IGNORE.add("ut_shannon_lookup") #In the URL toolbox app which is recommended for ESCU
16
19
  LOOKUPS_TO_IGNORE.add("identity_lookup_expanded") #Shipped with the Asset and Identity Framework
17
20
  LOOKUPS_TO_IGNORE.add("cim_corporate_web_domain_lookup") #Shipped with the Asset and Identity Framework
21
+ LOOKUPS_TO_IGNORE.add("cim_corporate_email_domain_lookup") #Shipped with the Enterprise Security
22
+ LOOKUPS_TO_IGNORE.add("cim_cloud_domain_lookup") #Shipped with the Enterprise Security
23
+
18
24
  LOOKUPS_TO_IGNORE.add("alexa_lookup_by_str") #Shipped with the Asset and Identity Framework
19
25
  LOOKUPS_TO_IGNORE.add("interesting_ports_lookup") #Shipped with the Asset and Identity Framework
26
+ LOOKUPS_TO_IGNORE.add("asset_lookup_by_str") #Shipped with the Asset and Identity Framework
20
27
  LOOKUPS_TO_IGNORE.add("admon_groups_def") #Shipped with the SA-admon addon
28
+ LOOKUPS_TO_IGNORE.add("identity_lookup_expanded") #Shipped with the Enterprise Security
21
29
 
22
30
  #Special case for the Detection "Exploit Public Facing Application via Apache Commons Text"
23
31
  LOOKUPS_TO_IGNORE.add("=")
24
32
  LOOKUPS_TO_IGNORE.add("other_lookups")
25
33
 
26
34
 
35
+ class Lookup_Type(StrEnum):
36
+ csv = auto()
37
+ kvstore = auto()
38
+ mlmodel = auto()
39
+
40
+
41
+
27
42
  # TODO (#220): Split Lookup into 2 classes
28
- class Lookup(SecurityContentObject):
29
-
30
- collection: Optional[str] = None
31
- fields_list: Optional[str] = None
32
- filename: Optional[FilePath] = None
43
+ class Lookup(SecurityContentObject, abc.ABC):
33
44
  default_match: Optional[bool] = None
34
- match_type: Optional[str] = None
35
- min_matches: Optional[int] = None
36
- case_sensitive_match: Optional[bool] = None
37
- # TODO: Add id field to all lookup ymls
38
- id: uuid.UUID = Field(default_factory=uuid.uuid4)
39
- date: datetime.date = Field(datetime.date.today())
40
- author: str = Field("NO AUTHOR DEFINED",max_length=255)
41
- version: NonNegativeInt = 1
45
+ # Per the documentation for transforms.conf, EXACT should not be specified in this list,
46
+ # so we include only WILDCARD and CIDR
47
+ match_type: list[Annotated[str, Field(pattern=r"(^WILDCARD|CIDR)\(.+\)$")]] = Field(default=[])
48
+ min_matches: None | NonNegativeInt = Field(default=None)
49
+ max_matches: None | Annotated[NonNegativeInt, Field(ge=1, le=1000)] = Field(default=None)
50
+ case_sensitive_match: None | bool = Field(default=None)
51
+
52
+
42
53
 
43
54
 
55
+
44
56
  @model_serializer
45
57
  def serialize_model(self):
46
58
  #Call parent serializer
@@ -48,13 +60,12 @@ class Lookup(SecurityContentObject):
48
60
 
49
61
  #All fields custom to this model
50
62
  model= {
51
- "filename": self.filename.name if self.filename is not None else None,
63
+
52
64
  "default_match": "true" if self.default_match is True else "false",
53
- "match_type": self.match_type,
65
+ "match_type": self.match_type_to_conf_format,
54
66
  "min_matches": self.min_matches,
67
+ "max_matches": self.max_matches,
55
68
  "case_sensitive_match": "true" if self.case_sensitive_match is True else "false",
56
- "collection": self.collection,
57
- "fields_list": self.fields_list
58
69
  }
59
70
 
60
71
  #return the model
@@ -72,31 +83,91 @@ class Lookup(SecurityContentObject):
72
83
  return data
73
84
 
74
85
 
75
- def model_post_init(self, ctx:dict[str,Any]):
76
- if not self.filename:
77
- return
78
- import pathlib
79
- filenamePath = pathlib.Path(self.filename)
80
-
81
- if filenamePath.suffix not in [".csv", ".mlmodel"]:
82
- raise ValueError(f"All Lookup files must be CSV files and end in .csv. The following file does not: '{filenamePath}'")
86
+ @computed_field
87
+ @cached_property
88
+ def match_type_to_conf_format(self)->str:
89
+ return ', '.join(self.match_type)
90
+
83
91
 
92
+ @staticmethod
93
+ def get_lookups(text_field: str, director:DirectorOutputDto, ignore_lookups:set[str]=LOOKUPS_TO_IGNORE)->list[Lookup]:
94
+ # Comprehensively match all kinds of lookups, including inputlookup and outputlookup
95
+ inputLookupsToGet = set(re.findall(r'[^\w]inputlookup(?:\s*(?:(?:append|strict|start|max)\s*=\s*(?:true|t|false|f))){0,4}\s+([\w]+)', text_field, re.IGNORECASE))
96
+ outputLookupsToGet = set(re.findall(r'[^\w]outputlookup(?:\s*(?:(?:append|create_empty|override_if_empty|max|key_field|allow_updates|createinapp|create_context|output_format)\s*=\s*[^\s]*))*\s+([\w]+)',text_field,re.IGNORECASE))
97
+ lookupsToGet = set(re.findall(r'[^\w](?:(?<!output)(?<!input))lookup(?:\s*(?:(?:local|update)\s*=\s*(?:true|t|false|f))){0,2}\s+([\w]+)', text_field, re.IGNORECASE))
98
+
84
99
 
100
+ input_lookups = Lookup.mapNamesToSecurityContentObjects(list(inputLookupsToGet-LOOKUPS_TO_IGNORE), director)
101
+ output_lookups = Lookup.mapNamesToSecurityContentObjects(list(outputLookupsToGet-LOOKUPS_TO_IGNORE), director)
102
+ lookups = Lookup.mapNamesToSecurityContentObjects(list(lookupsToGet-LOOKUPS_TO_IGNORE), director)
103
+
104
+ all_lookups = set(input_lookups + output_lookups + lookups)
85
105
 
86
- if filenamePath.suffix == ".mlmodel":
87
- # Do not need any additional checks for an mlmodel file
88
- return
106
+ return list(all_lookups)
89
107
 
108
+
109
+
110
+
111
+ class FileBackedLookup(Lookup, abc.ABC):
112
+ # For purposes of the disciminated union, the child classes which
113
+ # inherit from this class must declare the typing of lookup_type
114
+ # themselves, hence it is not defined in the Lookup class
115
+
116
+ @model_validator(mode="after")
117
+ def ensure_lookup_file_exists(self)->Self:
118
+ if not self.filename.exists():
119
+ raise ValueError(f"Expected lookup filename {self.filename} does not exist")
120
+ return self
121
+
122
+ @computed_field
123
+ @cached_property
124
+ def filename(self)->FilePath:
125
+ if self.file_path is None:
126
+ raise ValueError(f"Cannot get the filename of the lookup {self.lookup_type} because the YML file_path attribute is None") #type: ignore
127
+
128
+ csv_file = self.file_path.parent / f"{self.file_path.stem}.{self.lookup_type}" #type: ignore
129
+ return csv_file
130
+
131
+ @computed_field
132
+ @cached_property
133
+ def app_filename(self)->FilePath:
134
+ '''
135
+ We may consider two options:
136
+ 1. Always apply the datetime stamp to the end of the file. This makes the code easier
137
+ 2. Only apply the datetime stamp if it is version > 1. This makes the code a small fraction
138
+ more complicated, but preserves longstanding CSV that have not been modified in a long time
139
+ '''
140
+ return pathlib.Path(f"{self.filename.stem}_{self.date.year}{self.date.month:02}{self.date.day:02}.{self.lookup_type}") #type: ignore
141
+
142
+ class CSVLookup(FileBackedLookup):
143
+ lookup_type:Literal[Lookup_Type.csv]
144
+
145
+ @model_serializer
146
+ def serialize_model(self):
147
+ #Call parent serializer
148
+ super_fields = super().serialize_model()
149
+
150
+ #All fields custom to this model
151
+ model= {
152
+ "filename": self.app_filename.name
153
+ }
154
+
155
+ #return the model
156
+ model.update(super_fields)
157
+ return model
158
+
159
+ @model_validator(mode="after")
160
+ def ensure_correct_csv_structure(self)->Self:
90
161
  # https://docs.python.org/3/library/csv.html#csv.DictReader
91
162
  # Column Names (fieldnames) determine by the number of columns in the first row.
92
163
  # If a row has MORE fields than fieldnames, they will be dumped in a list under the key 'restkey' - this should throw an Exception
93
164
  # If a row has LESS fields than fieldnames, then the field should contain None by default. This should also throw an exception.
94
165
  csv_errors:list[str] = []
95
- with open(filenamePath, "r") as csv_fp:
166
+ with open(self.filename, "r") as csv_fp:
96
167
  RESTKEY = "extra_fields_in_a_row"
97
168
  csv_dict = csv.DictReader(csv_fp, restkey=RESTKEY)
98
169
  if csv_dict.fieldnames is None:
99
- raise ValueError(f"Error validating the CSV referenced by the lookup: {filenamePath}:\n\t"
170
+ raise ValueError(f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t"
100
171
  "Unable to read fieldnames from CSV. Is the CSV empty?\n"
101
172
  " Please try opening the file with a CSV Editor to ensure that it is correct.")
102
173
  # Remember that row 1 has the headers and we do not iterate over it in the loop below
@@ -113,41 +184,52 @@ class Lookup(SecurityContentObject):
113
184
  f"but instead had [{column_index}].")
114
185
  if len(csv_errors) > 0:
115
186
  err_string = '\n\t'.join(csv_errors)
116
- raise ValueError(f"Error validating the CSV referenced by the lookup: {filenamePath}:\n\t{err_string}\n"
187
+ raise ValueError(f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t{err_string}\n"
117
188
  f" Please try opening the file with a CSV Editor to ensure that it is correct.")
118
189
 
119
- return
120
-
121
-
122
- @field_validator('match_type')
190
+ return self
191
+
192
+
193
+
194
+ class KVStoreLookup(Lookup):
195
+ lookup_type: Literal[Lookup_Type.kvstore]
196
+ fields: list[str] = Field(description="The names of the fields/headings for the KVStore.", min_length=1)
197
+
198
+ @field_validator("fields", mode='after')
123
199
  @classmethod
124
- def match_type_valid(cls, v: Union[str,None], info: ValidationInfo):
125
- if not v:
126
- #Match type can be None and that's okay
127
- return v
200
+ def ensure_key(cls, values: list[str]):
201
+ if values[0] != "_key":
202
+ raise ValueError(f"fields MUST begin with '_key', not '{values[0]}'")
203
+ return values
128
204
 
129
- if not (v.startswith("WILDCARD(") or v.endswith(")")) :
130
- raise ValueError(f"All match_types must take the format 'WILDCARD(field_name)'. The following file does not: '{v}'")
131
- return v
205
+ @computed_field
206
+ @cached_property
207
+ def collection(self)->str:
208
+ return self.name
132
209
 
210
+ @computed_field
211
+ @cached_property
212
+ def fields_to_fields_list_conf_format(self)->str:
213
+ return ', '.join(self.fields)
133
214
 
134
- #Ensure that exactly one of location or filename are defined
135
- @model_validator(mode='after')
136
- def ensure_mutually_exclusive_fields(self)->Lookup:
137
- if self.filename is not None and self.collection is not None:
138
- raise ValueError("filename and collection cannot be defined in the lookup file. Exactly one must be defined.")
139
- elif self.filename is None and self.collection is None:
140
- raise ValueError("Neither filename nor collection were defined in the lookup file. Exactly one must "
141
- "be defined.")
215
+ @model_serializer
216
+ def serialize_model(self):
217
+ #Call parent serializer
218
+ super_fields = super().serialize_model()
142
219
 
220
+ #All fields custom to this model
221
+ model= {
222
+ "collection": self.collection,
223
+ "fields_list": self.fields_to_fields_list_conf_format
224
+ }
225
+
226
+ #return the model
227
+ model.update(super_fields)
228
+ return model
143
229
 
144
- return self
230
+ class MlModel(FileBackedLookup):
231
+ lookup_type: Literal[Lookup_Type.mlmodel]
145
232
 
146
-
147
- @staticmethod
148
- def get_lookups(text_field: str, director:DirectorOutputDto, ignore_lookups:set[str]=LOOKUPS_TO_IGNORE)->list[Lookup]:
149
- lookups_to_get = set(re.findall(r'[^output]lookup (?:update=true)?(?:append=t)?\s*([^\s]*)', text_field))
150
- lookups_to_ignore = set([lookup for lookup in lookups_to_get if any(to_ignore in lookups_to_get for to_ignore in ignore_lookups)])
151
- lookups_to_get -= lookups_to_ignore
152
- return Lookup.mapNamesToSecurityContentObjects(list(lookups_to_get), director)
153
-
233
+
234
+ LookupAdapter = TypeAdapter(Annotated[CSVLookup | KVStoreLookup | MlModel, Field(discriminator="lookup_type")])
235
+
@@ -48,7 +48,6 @@ class Macro(SecurityContentObject):
48
48
  return model
49
49
 
50
50
  @staticmethod
51
-
52
51
  def get_macros(text_field:str, director:DirectorOutputDto , ignore_macros:set[str]=MACROS_TO_IGNORE)->list[Macro]:
53
52
  #Remove any comments, allowing there to be macros (which have a single backtick) inside those comments
54
53
  #If a comment ENDS in a macro, for example ```this is a comment with a macro `macro_here````
@@ -59,10 +58,10 @@ class Macro(SecurityContentObject):
59
58
  "This may have occurred when a macro was commented out.\n"
60
59
  "Please ammend your search to remove the substring '````'")
61
60
 
62
- # replace all the macros with a space
61
+ # Replace all the comments with a space. This prevents a comment from looking like a macro to the parser below
63
62
  text_field = re.sub(r"\`\`\`[\s\S]*?\`\`\`", " ", text_field)
64
63
 
65
-
64
+ # Find all the macros, which start and end with a '`' character
66
65
  macros_to_get = re.findall(r'`([^\s]+)`', text_field)
67
66
  #If macros take arguments, stop at the first argument. We just want the name of the macro
68
67
  macros_to_get = set([macro[:macro.find('(')] if macro.find('(') != -1 else macro for macro in macros_to_get])