PyPI - contentctl - Versions diffs - 4.4.7__py3-none-any.whl → 5.0.0a0__py3-none-any.whl - Mend

contentctl 4.4.7py3-none-any.whl → 5.0.0a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

contentctl/actions/build.py +39 -27
contentctl/actions/detection_testing/DetectionTestingManager.py +0 -1
contentctl/actions/detection_testing/infrastructures/DetectionTestingInfrastructure.py +32 -26
contentctl/actions/detection_testing/progress_bar.py +6 -6
contentctl/actions/detection_testing/views/DetectionTestingView.py +4 -4
contentctl/actions/new_content.py +98 -81
contentctl/actions/test.py +4 -5
contentctl/actions/validate.py +2 -1
contentctl/contentctl.py +114 -79
contentctl/helper/utils.py +0 -14
contentctl/input/director.py +5 -5
contentctl/input/new_content_questions.py +2 -2
contentctl/input/yml_reader.py +11 -6
contentctl/objects/abstract_security_content_objects/detection_abstract.py +228 -120
contentctl/objects/abstract_security_content_objects/security_content_object_abstract.py +5 -7
contentctl/objects/alert_action.py +2 -1
contentctl/objects/atomic.py +1 -0
contentctl/objects/base_test.py +4 -3
contentctl/objects/base_test_result.py +3 -3
contentctl/objects/baseline.py +26 -6
contentctl/objects/baseline_tags.py +2 -3
contentctl/objects/config.py +26 -45
contentctl/objects/constants.py +4 -1
contentctl/objects/correlation_search.py +89 -95
contentctl/objects/data_source.py +5 -6
contentctl/objects/deployment.py +2 -10
contentctl/objects/deployment_email.py +2 -1
contentctl/objects/deployment_notable.py +2 -1
contentctl/objects/deployment_phantom.py +2 -1
contentctl/objects/deployment_rba.py +2 -1
contentctl/objects/deployment_scheduling.py +2 -1
contentctl/objects/deployment_slack.py +2 -1
contentctl/objects/detection_tags.py +7 -42
contentctl/objects/drilldown.py +1 -0
contentctl/objects/enums.py +21 -58
contentctl/objects/investigation.py +6 -5
contentctl/objects/investigation_tags.py +2 -3
contentctl/objects/lookup.py +145 -63
contentctl/objects/macro.py +2 -3
contentctl/objects/mitre_attack_enrichment.py +2 -2
contentctl/objects/observable.py +3 -1
contentctl/objects/playbook_tags.py +5 -1
contentctl/objects/rba.py +90 -0
contentctl/objects/risk_event.py +87 -144
contentctl/objects/story_tags.py +1 -2
contentctl/objects/test_attack_data.py +2 -1
contentctl/objects/unit_test_baseline.py +2 -1
contentctl/output/api_json_output.py +233 -220
contentctl/output/conf_output.py +51 -44
contentctl/output/conf_writer.py +201 -125
contentctl/output/data_source_writer.py +0 -1
contentctl/output/json_writer.py +2 -4
contentctl/output/svg_output.py +1 -1
contentctl/output/templates/analyticstories_detections.j2 +1 -1
contentctl/output/templates/collections.j2 +1 -1
contentctl/output/templates/doc_detections.j2 +0 -5
contentctl/output/templates/savedsearches_detections.j2 +8 -3
contentctl/output/templates/transforms.j2 +4 -4
contentctl/output/yml_writer.py +15 -0
contentctl/templates/detections/endpoint/anomalous_usage_of_7zip.yml +16 -34
{contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/METADATA +5 -4
{contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/RECORD +65 -68
{contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/WHEEL +1 -1
contentctl/objects/event_source.py +0 -11
contentctl/output/detection_writer.py +0 -28
contentctl/output/new_content_yml_output.py +0 -56
contentctl/output/yml_output.py +0 -66
{contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/LICENSE.md +0 -0
{contentctl-4.4.7.dist-info → contentctl-5.0.0a0.dist-info}/entry_points.txt +0 -0

contentctl/objects/detection_tags.py CHANGED Viewed

@@ -4,8 +4,6 @@ from typing import TYPE_CHECKING, List, Optional, Union
 from pydantic import (
     BaseModel,
     Field,
-    NonNegativeInt,
-    PositiveInt,
     computed_field,
     UUID4,
     HttpUrl,
@@ -27,7 +25,6 @@ from contentctl.objects.enums import (
     Cis18Value,
     AssetType,
     SecurityDomain,
-    RiskSeverity,
     KillChainPhase,
     NistCategory,
     SecurityContentProductName
@@ -35,57 +32,29 @@ from contentctl.objects.enums import (
 from contentctl.objects.atomic import AtomicEnrichment, AtomicTest
 from contentctl.objects.annotated_types import MITRE_ATTACK_ID_TYPE, CVE_TYPE
-# TODO (#266): disable the use_enum_values configuration
 class DetectionTags(BaseModel):
     # detection spec
-    model_config = ConfigDict(use_enum_values=True, validate_default=False)
+    model_config = ConfigDict(validate_default=False, extra='forbid')
     analytic_story: list[Story] = Field(...)
     asset_type: AssetType = Field(...)
-    confidence: NonNegativeInt = Field(..., le=100)
-    impact: NonNegativeInt = Field(..., le=100)
-    @computed_field
-    @property
-    def risk_score(self) -> int:
-        return round((self.confidence * self.impact)/100)
-    @computed_field
-    @property
-    def severity(self)->RiskSeverity:
-        if 0 <= self.risk_score <= 20:
-            return RiskSeverity.INFORMATIONAL
-        elif 20 < self.risk_score <= 40:
-            return RiskSeverity.LOW
-        elif 40 < self.risk_score <= 60:
-            return RiskSeverity.MEDIUM
-        elif 60 < self.risk_score <= 80:
-            return RiskSeverity.HIGH
-        elif 80 < self.risk_score <= 100:
-            return RiskSeverity.CRITICAL
-        else:
-            raise Exception(f"Error getting severity - risk_score must be between 0-100, but was actually {self.risk_score}")
+    group: list[str] = []
     mitre_attack_id: List[MITRE_ATTACK_ID_TYPE] = []
     nist: list[NistCategory] = []
+    # TODO (cmcginley): observable should be removed as well, yes?
     # TODO (#249): Add pydantic validator to ensure observables are unique within a detection
     observable: List[Observable] = []
-    message: str = Field(...)
     product: list[SecurityContentProductName] = Field(..., min_length=1)
-    required_fields: list[str] = Field(min_length=1)
     throttling: Optional[Throttling] = None
     security_domain: SecurityDomain = Field(...)
     cve: List[CVE_TYPE] = []
     atomic_guid: List[AtomicTest] = []
     # enrichment
     mitre_attack_enrichments: List[MitreAttackEnrichment] = Field([], validate_default=True)
-    confidence_id: Optional[PositiveInt] = Field(None, ge=1, le=3)
-    impact_id: Optional[PositiveInt] = Field(None, ge=1, le=5)
-    evidence_str: Optional[str] = None
     @computed_field
     @property
@@ -114,7 +83,7 @@ class DetectionTags(BaseModel):
     # TODO (#268): Validate manual_test has length > 0 if not None
     manual_test: Optional[str] = None
     # The following validator is temporarily disabled pending further discussions
     # @validator('message')
     # def validate_message(cls,v,values):
@@ -152,15 +121,11 @@ class DetectionTags(BaseModel):
         # Since this field has no parent, there is no need to call super() serialization function
         return {
             "analytic_story": [story.name for story in self.analytic_story],
-            "asset_type": self.asset_type.value,
+            "asset_type": self.asset_type,
             "cis20": self.cis20,
             "kill_chain_phases": self.kill_chain_phases,
             "nist": self.nist,
-            "observable": self.observable,
-            "message": self.message,
-            "risk_score": self.risk_score,
             "security_domain": self.security_domain,
-            "risk_severity": self.severity,
             "mitre_attack_id": self.mitre_attack_id,
             "mitre_attack_enrichments": self.mitre_attack_enrichments
         }

contentctl/objects/drilldown.py CHANGED Viewed

@@ -23,6 +23,7 @@ class Drilldown(BaseModel):
                               "but it is NOT the default value and must be supplied explicitly.",
                               min_length= 1)
+    # TODO (cmcginley): @ljstella the drilldowns will need to be updated
     @classmethod
     def constructDrilldownsFromDetection(cls, detection: Detection) -> list[Drilldown]:
         victim_observables = [o for o in detection.tags.observable if o.role[0] == "Victim"]

contentctl/objects/enums.py CHANGED Viewed

@@ -1,15 +1,15 @@
 from __future__ import annotations
 from typing import List
-import enum
+from enum import StrEnum, IntEnum
-class AnalyticsType(str, enum.Enum):
+class AnalyticsType(StrEnum):
     TTP = "TTP"
     Anomaly = "Anomaly"
     Hunting = "Hunting"
     Correlation = "Correlation"
-class DeploymentType(str, enum.Enum):
+class DeploymentType(StrEnum):
     TTP = "TTP"
     Anomaly = "Anomaly"
     Hunting = "Hunting"
@@ -18,7 +18,7 @@ class DeploymentType(str, enum.Enum):
     Embedded = "Embedded"
-class DataModel(str,enum.Enum):
+class DataModel(StrEnum):
     ENDPOINT = "Endpoint"
     NETWORK_TRAFFIC  = "Network_Traffic"
     AUTHENTICATION = "Authentication"
@@ -40,11 +40,11 @@ class DataModel(str,enum.Enum):
     SPLUNK_AUDIT = "Splunk_Audit"
-class PlaybookType(str, enum.Enum):
+class PlaybookType(StrEnum):
     INVESTIGATION = "Investigation"
     RESPONSE = "Response"
-class SecurityContentType(enum.Enum):
+class SecurityContentType(IntEnum):
     detections = 1
     baselines = 2
     stories = 3
@@ -68,20 +68,15 @@ class SecurityContentType(enum.Enum):
 #     json_objects = "json_objects"
-class SecurityContentProduct(enum.Enum):
-    SPLUNK_APP = 1
-    API = 3
-    CUSTOM = 4
-class SecurityContentProductName(str, enum.Enum):
+class SecurityContentProductName(StrEnum):
     SPLUNK_ENTERPRISE = "Splunk Enterprise"
     SPLUNK_ENTERPRISE_SECURITY = "Splunk Enterprise Security"
     SPLUNK_CLOUD = "Splunk Cloud"
     SPLUNK_SECURITY_ANALYTICS_FOR_AWS = "Splunk Security Analytics for AWS"
     SPLUNK_BEHAVIORAL_ANALYTICS = "Splunk Behavioral Analytics"
-class SecurityContentInvestigationProductName(str, enum.Enum):
+class SecurityContentInvestigationProductName(StrEnum):
     SPLUNK_ENTERPRISE = "Splunk Enterprise"
     SPLUNK_ENTERPRISE_SECURITY = "Splunk Enterprise Security"
     SPLUNK_CLOUD = "Splunk Cloud"
@@ -90,33 +85,20 @@ class SecurityContentInvestigationProductName(str, enum.Enum):
     SPLUNK_PHANTOM = "Splunk Phantom"
-class DetectionStatus(enum.Enum):
-    production = "production"
-    deprecated = "deprecated"
-    experimental = "experimental"
-    validation = "validation"
-class DetectionStatusSSA(enum.Enum):
+class DetectionStatus(StrEnum):
     production = "production"
     deprecated = "deprecated"
     experimental = "experimental"
     validation = "validation"
-class LogLevel(enum.Enum):
+class LogLevel(StrEnum):
     NONE = "NONE"
     ERROR = "ERROR"
     INFO = "INFO"
-class AlertActions(enum.Enum):
-    notable = "notable"
-    rba = "rba"
-    email = "email"
-class StoryCategory(str, enum.Enum):
+class StoryCategory(StrEnum):
     ABUSE = "Abuse"
     ADVERSARY_TACTICS = "Adversary Tactics"
     BEST_PRACTICES = "Best Practices"
@@ -139,37 +121,18 @@ class StoryCategory(str, enum.Enum):
     UNAUTHORIZED_SOFTWARE = "Unauthorized Software"
-class PostTestBehavior(str, enum.Enum):
+class PostTestBehavior(StrEnum):
     always_pause = "always_pause"
     pause_on_failure = "pause_on_failure"
     never_pause = "never_pause"
-class DetectionTestingMode(str, enum.Enum):
+class DetectionTestingMode(StrEnum):
     selected = "selected"
     all = "all"
     changes = "changes"
-class DetectionTestingTargetInfrastructure(str, enum.Enum):
-    container = "container"
-    server = "server"
-class InstanceState(str, enum.Enum):
-    starting = "starting"
-    running = "running"
-    error = "error"
-    stopping = "stopping"
-    stopped = "stopped"
-class SigmaConverterTarget(enum.Enum):
-    CIM = 1
-    RAW = 2
-    OCSF = 3
-    ALL = 4
 # It's unclear why we use a mix of constants and enums. The following list was taken from:
 # contentctl/contentctl/helper/constants.py.
 # We convect it to an enum here
@@ -183,7 +146,7 @@ class SigmaConverterTarget(enum.Enum):
 #     "Command And Control": 6,
 #     "Actions on Objectives": 7
 # }
-class KillChainPhase(str, enum.Enum):
+class KillChainPhase(StrEnum):
     UNKNOWN ="Unknown"
     RECONNAISSANCE = "Reconnaissance"
     WEAPONIZATION = "Weaponization"
@@ -194,7 +157,7 @@ class KillChainPhase(str, enum.Enum):
     ACTIONS_ON_OBJECTIVES = "Actions on Objectives"
-class DataSource(str,enum.Enum):
+class DataSource(StrEnum):
     OSQUERY_ES_PROCESS_EVENTS = "OSQuery ES Process Events"
     POWERSHELL_4104 = "Powershell 4104"
     SYSMON_EVENT_ID_1 = "Sysmon EventID 1"
@@ -234,7 +197,7 @@ class DataSource(str,enum.Enum):
     WINDOWS_SECURITY_5145 = "Windows Security 5145"
     WINDOWS_SYSTEM_7045 = "Windows System 7045"
-class ProvidingTechnology(str, enum.Enum):
+class ProvidingTechnology(StrEnum):
     AMAZON_SECURITY_LAKE = "Amazon Security Lake"
     AMAZON_WEB_SERVICES_CLOUDTRAIL = "Amazon Web Services - Cloudtrail"
     AZURE_AD = "Azure AD"
@@ -302,7 +265,7 @@ class ProvidingTechnology(str, enum.Enum):
         return sorted(list(matched_technologies))
-class Cis18Value(str,enum.Enum):
+class Cis18Value(StrEnum):
     CIS_0 = "CIS 0"
     CIS_1 = "CIS 1"
     CIS_2 = "CIS 2"
@@ -323,7 +286,7 @@ class Cis18Value(str,enum.Enum):
     CIS_17 = "CIS 17"
     CIS_18 = "CIS 18"
-class SecurityDomain(str, enum.Enum):
+class SecurityDomain(StrEnum):
     ENDPOINT = "endpoint"
     NETWORK = "network"
     THREAT = "threat"
@@ -331,7 +294,7 @@ class SecurityDomain(str, enum.Enum):
     ACCESS = "access"
     AUDIT = "audit"
-class AssetType(str, enum.Enum):
+class AssetType(StrEnum):
     AWS_ACCOUNT = "AWS Account"
     AWS_EKS_KUBERNETES_CLUSTER = "AWS EKS Kubernetes cluster"
     AWS_FEDERATED_ACCOUNT = "AWS Federated Account"
@@ -382,7 +345,7 @@ class AssetType(str, enum.Enum):
     WEB_APPLICATION = "Web Application"
     WINDOWS = "Windows"
-class NistCategory(str, enum.Enum):
+class NistCategory(StrEnum):
     ID_AM = "ID.AM"
     ID_BE = "ID.BE"
     ID_GV = "ID.GV"
@@ -406,7 +369,7 @@ class NistCategory(str, enum.Enum):
     RC_IM = "RC.IM"
     RC_CO = "RC.CO"
-class RiskSeverity(str,enum.Enum):
+class RiskSeverity(StrEnum):
     # Levels taken from the following documentation link
     # https://docs.splunk.com/Documentation/ES/7.3.2/User/RiskScoring
     # 20 - info (0-20 for us)

contentctl/objects/investigation.py CHANGED Viewed

@@ -12,17 +12,13 @@ from contentctl.objects.constants import (
 )
 from contentctl.objects.config import CustomApp
-# TODO (#266): disable the use_enum_values configuration
 class Investigation(SecurityContentObject):
-    model_config = ConfigDict(use_enum_values=True,validate_default=False)
+    model_config = ConfigDict(validate_default=False)
     type: str = Field(...,pattern="^Investigation$")
-    datamodel: list[DataModel] = Field(...)
     name:str = Field(...,max_length=CONTENTCTL_MAX_SEARCH_NAME_LENGTH)
     search: str = Field(...)
     how_to_implement: str = Field(...)
     known_false_positives: str = Field(...)
     tags: InvestigationTags
     # enrichment
@@ -38,6 +34,11 @@ class Investigation(SecurityContentObject):
         return inputs
+    @computed_field
+    @property
+    def datamodel(self) -> List[DataModel]:
+        return [dm for dm in DataModel if dm in self.search]
     @computed_field
     @property
     def lowercase_name(self)->str:

contentctl/objects/investigation_tags.py CHANGED Viewed

@@ -1,13 +1,13 @@
 from __future__ import annotations
 from typing import List
-from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer
+from pydantic import BaseModel, Field, field_validator, ValidationInfo, model_serializer,ConfigDict
 from contentctl.objects.story import Story
 from contentctl.objects.enums import SecurityContentInvestigationProductName, SecurityDomain
 class InvestigationTags(BaseModel):
+    model_config = ConfigDict(extra="forbid")
     analytic_story: List[Story] = Field([],min_length=1)
     product: List[SecurityContentInvestigationProductName] = Field(...,min_length=1)
-    required_fields: List[str] = Field(min_length=1)
     security_domain: SecurityDomain = Field(...)
@@ -23,7 +23,6 @@ class InvestigationTags(BaseModel):
         model= {
             "analytic_story": [story.name for story in self.analytic_story],
             "product": self.product,
-            "required_fields": self.required_fields,
             "security_domain": self.security_domain,
         }

contentctl/objects/lookup.py CHANGED Viewed

@@ -1,10 +1,13 @@
 from __future__ import annotations
-from pydantic import field_validator, ValidationInfo, model_validator, FilePath, model_serializer, Field, NonNegativeInt
-from typing import TYPE_CHECKING, Optional, Any, Union
+from pydantic import field_validator, ValidationInfo, model_validator, FilePath, model_serializer, Field, NonNegativeInt, computed_field, TypeAdapter
+from enum import StrEnum, auto
+from typing import TYPE_CHECKING, Optional, Any, Union, Literal, Annotated, Self
 import re
 import csv
-import uuid
-import datetime
+import abc
+from functools import cached_property
+import pathlib
 if TYPE_CHECKING:
     from contentctl.input.director import DirectorOutputDto
     from contentctl.objects.config import validate
@@ -15,32 +18,41 @@ LOOKUPS_TO_IGNORE = set(["outputlookup"])
 LOOKUPS_TO_IGNORE.add("ut_shannon_lookup") #In the URL toolbox app which is recommended for ESCU
 LOOKUPS_TO_IGNORE.add("identity_lookup_expanded") #Shipped with the Asset and Identity Framework
 LOOKUPS_TO_IGNORE.add("cim_corporate_web_domain_lookup") #Shipped with the Asset and Identity Framework
+LOOKUPS_TO_IGNORE.add("cim_corporate_email_domain_lookup") #Shipped with the Enterprise Security
+LOOKUPS_TO_IGNORE.add("cim_cloud_domain_lookup") #Shipped with the Enterprise Security
 LOOKUPS_TO_IGNORE.add("alexa_lookup_by_str") #Shipped with the Asset and Identity Framework
 LOOKUPS_TO_IGNORE.add("interesting_ports_lookup") #Shipped with the Asset and Identity Framework
+LOOKUPS_TO_IGNORE.add("asset_lookup_by_str") #Shipped with the Asset and Identity Framework
 LOOKUPS_TO_IGNORE.add("admon_groups_def") #Shipped with the SA-admon addon
+LOOKUPS_TO_IGNORE.add("identity_lookup_expanded") #Shipped with the Enterprise Security
 #Special case for the Detection "Exploit Public Facing Application via Apache Commons Text"
 LOOKUPS_TO_IGNORE.add("=")
 LOOKUPS_TO_IGNORE.add("other_lookups")
+class Lookup_Type(StrEnum):
+    csv = auto()
+    kvstore = auto()
+    mlmodel = auto()
 # TODO (#220): Split Lookup into 2 classes
-class Lookup(SecurityContentObject):
-    collection: Optional[str] = None
-    fields_list: Optional[str] = None
-    filename: Optional[FilePath] = None
+class Lookup(SecurityContentObject, abc.ABC):
     default_match: Optional[bool] = None
-    match_type: Optional[str] = None
-    min_matches: Optional[int] = None
-    case_sensitive_match: Optional[bool] = None
-    # TODO: Add id field to all lookup ymls
-    id: uuid.UUID = Field(default_factory=uuid.uuid4)
-    date: datetime.date = Field(datetime.date.today())
-    author: str = Field("NO AUTHOR DEFINED",max_length=255)
-    version: NonNegativeInt = 1
+    # Per the documentation for transforms.conf, EXACT should not be specified in this list,
+    # so we include only WILDCARD and CIDR
+    match_type: list[Annotated[str, Field(pattern=r"(^WILDCARD|CIDR)\(.+\)$")]] = Field(default=[])
+    min_matches: None | NonNegativeInt = Field(default=None)
+    max_matches: None | Annotated[NonNegativeInt, Field(ge=1, le=1000)] = Field(default=None)
+    case_sensitive_match: None | bool = Field(default=None)
     @model_serializer
     def serialize_model(self):
         #Call parent serializer
@@ -48,13 +60,12 @@ class Lookup(SecurityContentObject):
         #All fields custom to this model
         model= {
-            "filename": self.filename.name if self.filename is not None else None,
             "default_match": "true" if self.default_match is True else "false",
-            "match_type": self.match_type,
+            "match_type": self.match_type_to_conf_format,
             "min_matches": self.min_matches,
+            "max_matches": self.max_matches,
             "case_sensitive_match": "true" if self.case_sensitive_match is True else "false",
-            "collection": self.collection,
-            "fields_list": self.fields_list
         }
         #return the model
@@ -72,31 +83,91 @@ class Lookup(SecurityContentObject):
         return data
-    def model_post_init(self, ctx:dict[str,Any]):
-        if not self.filename:
-            return
-        import pathlib
-        filenamePath = pathlib.Path(self.filename)
-        if filenamePath.suffix not in [".csv", ".mlmodel"]:
-            raise ValueError(f"All Lookup files must be CSV files and end in .csv.  The following file does not: '{filenamePath}'")
+    @computed_field
+    @cached_property
+    def match_type_to_conf_format(self)->str:
+        return ', '.join(self.match_type)
+    @staticmethod
+    def get_lookups(text_field: str, director:DirectorOutputDto, ignore_lookups:set[str]=LOOKUPS_TO_IGNORE)->list[Lookup]:
+        # Comprehensively match all kinds of lookups, including inputlookup and outputlookup
+        inputLookupsToGet = set(re.findall(r'[^\w]inputlookup(?:\s*(?:(?:append|strict|start|max)\s*=\s*(?:true|t|false|f))){0,4}\s+([\w]+)', text_field, re.IGNORECASE))
+        outputLookupsToGet = set(re.findall(r'[^\w]outputlookup(?:\s*(?:(?:append|create_empty|override_if_empty|max|key_field|allow_updates|createinapp|create_context|output_format)\s*=\s*[^\s]*))*\s+([\w]+)',text_field,re.IGNORECASE))
+        lookupsToGet = set(re.findall(r'[^\w](?:(?<!output)(?<!input))lookup(?:\s*(?:(?:local|update)\s*=\s*(?:true|t|false|f))){0,2}\s+([\w]+)', text_field, re.IGNORECASE))
+        input_lookups = Lookup.mapNamesToSecurityContentObjects(list(inputLookupsToGet-LOOKUPS_TO_IGNORE), director)
+        output_lookups = Lookup.mapNamesToSecurityContentObjects(list(outputLookupsToGet-LOOKUPS_TO_IGNORE), director)
+        lookups = Lookup.mapNamesToSecurityContentObjects(list(lookupsToGet-LOOKUPS_TO_IGNORE), director)
+        all_lookups = set(input_lookups + output_lookups + lookups)
-        if filenamePath.suffix == ".mlmodel":
-            # Do not need any additional checks for an mlmodel file
-            return
+        return list(all_lookups)
+class FileBackedLookup(Lookup, abc.ABC):
+    # For purposes of the disciminated union, the child classes which
+    # inherit from this class must declare the typing of lookup_type
+    # themselves, hence it is not defined in the Lookup class
+    @model_validator(mode="after")
+    def ensure_lookup_file_exists(self)->Self:
+        if not self.filename.exists():
+            raise ValueError(f"Expected lookup filename {self.filename} does not exist")
+        return self
+    @computed_field
+    @cached_property
+    def filename(self)->FilePath:
+        if self.file_path is None:
+            raise ValueError(f"Cannot get the filename of the lookup {self.lookup_type} because the YML file_path attribute is None") #type: ignore
+        csv_file = self.file_path.parent / f"{self.file_path.stem}.{self.lookup_type}" #type: ignore
+        return csv_file
+    @computed_field
+    @cached_property
+    def app_filename(self)->FilePath:
+        '''
+        We may consider two options:
+        1. Always apply the datetime stamp to the end of the file. This makes the code easier
+        2. Only apply the datetime stamp if it is version > 1.  This makes the code a small fraction
+        more complicated, but preserves longstanding CSV that have not been modified in a long time
+        '''
+        return pathlib.Path(f"{self.filename.stem}_{self.date.year}{self.date.month:02}{self.date.day:02}.{self.lookup_type}") #type: ignore
+class CSVLookup(FileBackedLookup):
+    lookup_type:Literal[Lookup_Type.csv]
+    @model_serializer
+    def serialize_model(self):
+        #Call parent serializer
+        super_fields = super().serialize_model()
+        #All fields custom to this model
+        model= {
+            "filename": self.app_filename.name
+        }
+        #return the model
+        model.update(super_fields)
+        return model
+    @model_validator(mode="after")
+    def ensure_correct_csv_structure(self)->Self:
         # https://docs.python.org/3/library/csv.html#csv.DictReader
         # Column Names (fieldnames) determine by the number of columns in the first row.
         # If a row has MORE fields than fieldnames, they will be dumped in a list under the key 'restkey' - this should throw an Exception
         # If a row has LESS fields than fieldnames, then the field should contain None by default. This should also throw an exception.
         csv_errors:list[str] = []
-        with open(filenamePath, "r") as csv_fp:
+        with open(self.filename, "r") as csv_fp:
             RESTKEY = "extra_fields_in_a_row"
             csv_dict = csv.DictReader(csv_fp, restkey=RESTKEY)
             if csv_dict.fieldnames is None:
-                raise ValueError(f"Error validating the CSV referenced by the lookup: {filenamePath}:\n\t"
+                raise ValueError(f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t"
                                  "Unable to read fieldnames from CSV. Is the CSV empty?\n"
                                  "  Please try opening the file with a CSV Editor to ensure that it is correct.")
             # Remember that row 1 has the headers and we do not iterate over it in the loop below
@@ -113,41 +184,52 @@ class Lookup(SecurityContentObject):
                                           f"but instead had [{column_index}].")
         if len(csv_errors) > 0:
             err_string = '\n\t'.join(csv_errors)
-            raise ValueError(f"Error validating the CSV referenced by the lookup: {filenamePath}:\n\t{err_string}\n"
+            raise ValueError(f"Error validating the CSV referenced by the lookup: {self.filename}:\n\t{err_string}\n"
                              f"  Please try opening the file with a CSV Editor to ensure that it is correct.")
-        return
-    @field_validator('match_type')
+        return self
+class KVStoreLookup(Lookup):
+    lookup_type: Literal[Lookup_Type.kvstore]
+    fields: list[str] = Field(description="The names of the fields/headings for the KVStore.", min_length=1)
+    @field_validator("fields", mode='after')
     @classmethod
-    def match_type_valid(cls, v: Union[str,None], info: ValidationInfo):
-        if not v:
-            #Match type can be None and that's okay
-            return v
+    def ensure_key(cls, values: list[str]):
+        if values[0] != "_key":
+            raise ValueError(f"fields MUST begin with '_key', not '{values[0]}'")
+        return values
-        if not (v.startswith("WILDCARD(") or v.endswith(")")) :
-            raise ValueError(f"All match_types must take the format 'WILDCARD(field_name)'. The following file does not: '{v}'")
-        return v
+    @computed_field
+    @cached_property
+    def collection(self)->str:
+        return self.name
+    @computed_field
+    @cached_property
+    def fields_to_fields_list_conf_format(self)->str:
+        return ', '.join(self.fields)
-    #Ensure that exactly one of location or filename are defined
-    @model_validator(mode='after')
-    def ensure_mutually_exclusive_fields(self)->Lookup:
-        if self.filename is not None and self.collection is not None:
-            raise ValueError("filename and collection cannot be defined in the lookup file.  Exactly one must be defined.")
-        elif self.filename is None and self.collection is None:
-            raise ValueError("Neither filename nor collection were defined in the lookup file.  Exactly one must "
-                             "be defined.")
+    @model_serializer
+    def serialize_model(self):
+        #Call parent serializer
+        super_fields = super().serialize_model()
+        #All fields custom to this model
+        model= {
+            "collection": self.collection,
+            "fields_list": self.fields_to_fields_list_conf_format
+        }
+        #return the model
+        model.update(super_fields)
+        return model
-        return self
+class MlModel(FileBackedLookup):
+    lookup_type: Literal[Lookup_Type.mlmodel]
-    @staticmethod
-    def get_lookups(text_field: str, director:DirectorOutputDto, ignore_lookups:set[str]=LOOKUPS_TO_IGNORE)->list[Lookup]:
-        lookups_to_get = set(re.findall(r'[^output]lookup (?:update=true)?(?:append=t)?\s*([^\s]*)', text_field))
-        lookups_to_ignore = set([lookup for lookup in lookups_to_get if any(to_ignore in lookups_to_get for to_ignore in ignore_lookups)])
-        lookups_to_get -= lookups_to_ignore
-        return Lookup.mapNamesToSecurityContentObjects(list(lookups_to_get), director)
+LookupAdapter = TypeAdapter(Annotated[CSVLookup | KVStoreLookup | MlModel, Field(discriminator="lookup_type")])

contentctl/objects/macro.py CHANGED Viewed

@@ -48,7 +48,6 @@ class Macro(SecurityContentObject):
         return model
     @staticmethod
     def get_macros(text_field:str, director:DirectorOutputDto , ignore_macros:set[str]=MACROS_TO_IGNORE)->list[Macro]:
         #Remove any comments, allowing there to be macros (which have a single backtick) inside those comments
         #If a comment ENDS in a macro, for example ```this is a comment with a macro `macro_here````
@@ -59,10 +58,10 @@ class Macro(SecurityContentObject):
                             "This may have occurred when a macro was commented out.\n"
                             "Please ammend your search to remove the substring '````'")
-        # replace all the macros with a space
+        # Replace all the comments with a space. This prevents a comment from looking like a macro to the parser below
         text_field = re.sub(r"\`\`\`[\s\S]*?\`\`\`", " ", text_field)
+        # Find all the macros, which start and end with a '`' character
         macros_to_get = re.findall(r'`([^\s]+)`', text_field)
         #If macros take arguments, stop at the first argument.  We just want the name of the macro
         macros_to_get = set([macro[:macro.find('(')] if macro.find('(') != -1 else macro for macro in macros_to_get])

contentctl 4.4.7__py3-none-any.whl → 5.0.0a0__py3-none-any.whl

contentctl 4.4.7py3-none-any.whl → 5.0.0a0py3-none-any.whl