pySigma 1.3.2__tar.gz → 1.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. {pysigma-1.3.2 → pysigma-1.4.0}/PKG-INFO +3 -2
  2. {pysigma-1.3.2 → pysigma-1.4.0}/pyproject.toml +5 -4
  3. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/conditions.py +9 -1
  4. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/data/mitre_attack.py +1 -1
  5. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/data/mitre_d3fend.py +1 -1
  6. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/filters.py +40 -10
  7. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/modifiers.py +9 -0
  8. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/pipeline.py +24 -3
  9. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/__init__.py +13 -0
  10. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/base.py +23 -0
  11. pysigma-1.4.0/sigma/processing/transformations/external.py +390 -0
  12. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/rule/base.py +1 -1
  13. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/rule/detection.py +16 -5
  14. {pysigma-1.3.2 → pysigma-1.4.0}/LICENSE +0 -0
  15. {pysigma-1.3.2 → pysigma-1.4.0}/README.md +0 -0
  16. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/backends/test/__init__.py +0 -0
  17. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/backends/test/backend.py +0 -0
  18. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/collection.py +0 -0
  19. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/conversion/__init__.py +0 -0
  20. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/conversion/base.py +0 -0
  21. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/conversion/deferred.py +0 -0
  22. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/conversion/state.py +0 -0
  23. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/correlations.py +0 -0
  24. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/exceptions.py +0 -0
  25. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/pipelines/base.py +0 -0
  26. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/pipelines/common.py +0 -0
  27. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/pipelines/test/__init__.py +0 -0
  28. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/pipelines/test/pipeline.py +0 -0
  29. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/plugins.py +0 -0
  30. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/__init__.py +0 -0
  31. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/condition_expressions.py +0 -0
  32. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/conditions/__init__.py +0 -0
  33. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/conditions/base.py +0 -0
  34. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/conditions/fields.py +0 -0
  35. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/conditions/rule.py +0 -0
  36. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/conditions/state.py +0 -0
  37. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/conditions/values.py +0 -0
  38. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/finalization.py +0 -0
  39. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/postprocessing.py +0 -0
  40. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/resolver.py +0 -0
  41. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/templates.py +0 -0
  42. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/tracking.py +0 -0
  43. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/condition.py +0 -0
  44. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/detection_item.py +0 -0
  45. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/failure.py +0 -0
  46. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/fields.py +0 -0
  47. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/meta.py +0 -0
  48. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/placeholder.py +0 -0
  49. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/rule.py +0 -0
  50. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/state.py +0 -0
  51. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/processing/transformations/values.py +0 -0
  52. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/py.typed +0 -0
  53. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/rule/__init__.py +0 -0
  54. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/rule/attributes.py +0 -0
  55. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/rule/logsource.py +0 -0
  56. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/rule/rule.py +0 -0
  57. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/types.py +0 -0
  58. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/validation.py +0 -0
  59. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/validators/base.py +0 -0
  60. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/validators/core/__init__.py +0 -0
  61. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/validators/core/condition.py +0 -0
  62. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/validators/core/logsources.py +0 -0
  63. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/validators/core/metadata.py +0 -0
  64. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/validators/core/modifiers.py +0 -0
  65. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/validators/core/tags.py +0 -0
  66. {pysigma-1.3.2 → pysigma-1.4.0}/sigma/validators/core/values.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pySigma
3
- Version: 1.3.2
3
+ Version: 1.4.0
4
4
  Summary: Sigma rule processing and conversion tools
5
5
  License-Expression: LGPL-2.1-only
6
6
  License-File: LICENSE
@@ -17,8 +17,9 @@ Classifier: Programming Language :: Python :: 3.13
17
17
  Classifier: Programming Language :: Python :: 3.14
18
18
  Classifier: Topic :: Security
19
19
  Requires-Dist: diskcache (>=5.6.3,<6.0.0)
20
- Requires-Dist: diskcache-stubs (>=5.6.3.6.20240818,<6.0.0.0.0)
20
+ Requires-Dist: diskcache-stubs (>=5.6.3)
21
21
  Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
22
+ Requires-Dist: jq (>=1.6,<2.0)
22
23
  Requires-Dist: packaging (>=26.0,<27.0)
23
24
  Requires-Dist: pyparsing (>=3.2.5,<4.0.0)
24
25
  Requires-Dist: pyyaml (>=6.0.3,<7.0.0)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pySigma"
3
- version = "1.3.2"
3
+ version = "1.4.0"
4
4
  license = "LGPL-2.1-only"
5
5
  description = "Sigma rule processing and conversion tools"
6
6
  authors = [
@@ -37,20 +37,21 @@ jinja2 = "^3.1.6"
37
37
  types-pyyaml = "^6.0.12.20250915"
38
38
  typing-extensions = "^4.15.0"
39
39
  diskcache = "^5.6.3"
40
- diskcache-stubs = "^5.6.3.6.20240818"
40
+ diskcache-stubs = ">=5.6.3"
41
+ jq = "^1.6"
41
42
 
42
43
  [tool.poetry.group.dev.dependencies]
43
44
  black = "^26.3.1"
44
- mypy = "^1.18"
45
45
  pip = "^26.0.1"
46
46
  pre-commit = "^4.4"
47
47
  pylint = "^4.0"
48
- pytest = "^9.0"
48
+ pytest = "^9.1"
49
49
  pytest-cov = "^7.0"
50
50
  pytest-mypy = "^1.0"
51
51
  Sphinx = "^8"
52
52
  defusedxml = "^0.7"
53
53
  types-requests = "^2.32.4.20250913"
54
+ mypy = "^2.1"
54
55
 
55
56
  [tool.black]
56
57
  line-length = 100
@@ -223,10 +223,18 @@ class ConditionSelector(ConditionItem):
223
223
  else:
224
224
  r = re.compile(self.pattern.replace("*", ".*"))
225
225
 
226
+ # When a filter is applied to a rule its detection identifiers are renamed to
227
+ # start with a `_filt_<random>_` prefix, and its condition patterns receive the
228
+ # same prefix. We therefore allow `_`-prefixed identifiers to be matched when
229
+ # the pattern itself starts with `_` (i.e. it is a filter-internal pattern).
230
+ # For patterns that do NOT start with `_` (i.e. rule-level patterns such as
231
+ # "1 of selection_*") the original restriction is kept so that filter identifiers
232
+ # are never accidentally pulled into the rule's own selectors.
226
233
  return [
227
234
  ConditionIdentifier([identifier])
228
235
  for identifier in detections.detections.keys()
229
- if r.match(identifier) and not identifier.startswith("_")
236
+ if r.match(identifier)
237
+ and (self.pattern.startswith("_") or not identifier.startswith("_"))
230
238
  ]
231
239
 
232
240
  def postprocess(
@@ -65,7 +65,7 @@ def _load_mitre_attack_data() -> dict[str, Any]:
65
65
  - mitre_attack_mitigations: dict[str, str] mapping mitigation IDs to names
66
66
  """
67
67
  cache = _get_cache()
68
- cache_key = f"mitre_attack_data_{_custom_url or 'default'}"
68
+ cache_key = "mitre_attack_data"
69
69
 
70
70
  # Try to get from cache first
71
71
  cached_data = cache.get(cache_key)
@@ -51,7 +51,7 @@ def _load_mitre_d3fend_data() -> dict[str, Any]:
51
51
  - mitre_d3fend_artifacts: dict[str, str] mapping artifact IDs to names
52
52
  """
53
53
  cache = _get_cache()
54
- cache_key = f"mitre_d3fend_data_{_custom_url or 'default'}"
54
+ cache_key = "mitre_d3fend_data"
55
55
 
56
56
  # Try to get from cache first
57
57
  cached_data = cache.get(cache_key)
@@ -206,23 +206,53 @@ class SigmaFilter(SigmaRuleBase):
206
206
 
207
207
  return True
208
208
 
209
+ # Keywords that must not be prefixed when rewriting filter conditions
210
+ _CONDITION_KEYWORDS: frozenset[str] = frozenset({"not", "and", "or", "all", "any", "of", "1"})
211
+
209
212
  def apply_on_rule(
210
213
  self: Self, rule: SigmaRule | SigmaCorrelationRule
211
214
  ) -> SigmaRule | SigmaCorrelationRule:
212
215
  if not self._should_apply_on_rule(rule) or isinstance(rule, SigmaCorrelationRule):
213
216
  return rule
214
217
 
215
- filter_condition = self.filter.condition[0]
216
- for original_cond_name, condition in self.filter.detections.items():
217
- cond_name = "_filt_" + ("".join(random.choices(string.ascii_lowercase, k=10)))
218
+ # Generate one random prefix shared by all filter identifiers in this application.
219
+ # Using a single prefix (rather than a fresh random name per identifier) preserves
220
+ # the structure of the original identifier names so that wildcard patterns in the
221
+ # filter condition (e.g. "1 of selection_*") continue to work after renaming.
222
+ prefix = "_filt_" + "".join(random.choices(string.ascii_lowercase, k=10))
218
223
 
219
- # Replace each instance of the original condition name with the new condition name to avoid conflicts
220
- filter_condition = re.sub(
221
- rf"(\s|\(|^){original_cond_name}(\s|$|\))",
222
- r"\1" + cond_name + r"\2",
223
- filter_condition,
224
- )
225
- rule.detection.detections[cond_name] = condition
224
+ # Rename every filter detection identifier with the shared prefix.
225
+ for original_cond_name, condition in self.filter.detections.items():
226
+ rule.detection.detections[prefix + "_" + original_cond_name] = condition
227
+
228
+ # Rewrite the filter condition string so that every identifier/pattern token is
229
+ # prefixed. This handles:
230
+ # - exact names: "selection" -> "PREFIX_selection"
231
+ # - suffix wildcards: "selection_*" -> "PREFIX_selection_*"
232
+ # - prefix wildcards: "*_allow" -> "PREFIX_*_allow"
233
+ # - the "them" keyword: "1 of them" -> "1 of PREFIX_*"
234
+ # Sigma keywords (not, and, or, all, any, of, 1) are left unchanged.
235
+ #
236
+ # The regex matches a single Sigma condition token: an optional leading `*`
237
+ # (wildcard prefix) or a letter, followed by alphanumerics, `*`, `_`, or `-`.
238
+ # Wildcards are only valid at the start or end of a Sigma identifier pattern
239
+ # but this regex accepts any occurrence; the Sigma condition parser is
240
+ # responsible for rejecting syntactically invalid patterns at parse time.
241
+ def _replace_token(m: re.Match[str]) -> str:
242
+ token = m.group(0)
243
+ if token.lower() in self._CONDITION_KEYWORDS:
244
+ return token
245
+ if token == "them":
246
+ # "them" means all detections; replace with a pattern that matches all
247
+ # filter identifiers carrying the current prefix.
248
+ return prefix + "_*"
249
+ return prefix + "_" + token
250
+
251
+ filter_condition = re.sub(
252
+ r"[a-zA-Z*][a-zA-Z0-9*_-]*",
253
+ _replace_token,
254
+ self.filter.condition[0],
255
+ )
226
256
 
227
257
  for i, condition_str in enumerate(rule.detection.condition):
228
258
  rule.detection.condition[i] = f"({condition_str}) and " + f"({filter_condition})"
@@ -387,6 +387,14 @@ class SigmaAllModifier(SigmaListModifier[Any, Any]):
387
387
  return val
388
388
 
389
389
 
390
+ class SigmaNegateModifier(SigmaListModifier[Any, Any]):
391
+ """Negate the match - turns the detection item into a NOT match."""
392
+
393
+ def modify(self, val: Any) -> Any:
394
+ self.detection_item.negated = True
395
+ return val
396
+
397
+
390
398
  class SigmaCompareModifier(SigmaValueModifier[SigmaNumber, SigmaCompareExpression]):
391
399
  """Base class for numeric comparison operator modifiers."""
392
400
 
@@ -526,6 +534,7 @@ class SigmaTimestampYearModifier(SigmaTimestampModifier):
526
534
  # Mapping from modifier identifier strings to modifier classes
527
535
  modifier_mapping: dict[str, Type[SigmaModifier[Any, Any]]] = {
528
536
  "all": SigmaAllModifier,
537
+ "neq": SigmaNegateModifier,
529
538
  "base64": SigmaBase64Modifier,
530
539
  "base64offset": SigmaBase64OffsetModifier,
531
540
  "cased": SigmaCaseSensitiveModifier,
@@ -25,6 +25,7 @@ from sigma.processing.finalization import Finalizer, NestedFinalizer, finalizers
25
25
  from sigma.processing.templates import TemplateBase
26
26
  from sigma.processing.tracking import FieldMappingTracking
27
27
  from sigma.processing.transformations import transformations
28
+ from sigma.processing.transformations.external import ExternalSourceBaseTransformation
28
29
  from sigma.rule import SigmaDetectionItem, SigmaRule
29
30
  from sigma.correlations import SigmaCorrelationRule
30
31
  from sigma.processing.transformations.base import PreprocessingTransformation, Transformation
@@ -74,6 +75,7 @@ class ProcessingItemBase:
74
75
  transformations: dict[str, Type[Transformation]],
75
76
  allow_template_vars: bool = False,
76
77
  vars_allowed_paths: tuple[str, ...] | None = None,
78
+ allow_external_sources: bool = False,
77
79
  ) -> dict[str, Any]:
78
80
  """Return class instantiation parameters for attributes contained in base class for further
79
81
  usage in similar methods of classes inherited from this class."""
@@ -103,6 +105,7 @@ class ProcessingItemBase:
103
105
  transformations,
104
106
  allow_template_vars=allow_template_vars,
105
107
  vars_allowed_paths=vars_allowed_paths,
108
+ allow_external_sources=allow_external_sources,
106
109
  ),
107
110
  }
108
111
 
@@ -307,6 +310,7 @@ class ProcessingItemBase:
307
310
  transformations: dict[str, Type[Transformation]],
308
311
  allow_template_vars: bool = False,
309
312
  vars_allowed_paths: tuple[str, ...] | None = None,
313
+ allow_external_sources: bool = False,
310
314
  ) -> Transformation:
311
315
  try:
312
316
  transformation_class_name = d["type"]
@@ -341,11 +345,14 @@ class ProcessingItemBase:
341
345
  "id",
342
346
  "allow_template_vars",
343
347
  "vars_allowed_paths",
348
+ "allow_external_sources",
344
349
  }
345
350
  }
346
351
  if issubclass(transformation_class, TemplateBase):
347
352
  params["allow_template_vars"] = allow_template_vars
348
353
  params["vars_allowed_paths"] = vars_allowed_paths
354
+ if issubclass(transformation_class, ExternalSourceBaseTransformation):
355
+ params["allow_external_sources"] = allow_external_sources
349
356
  try:
350
357
  return transformation_class(**params)
351
358
  except (SigmaConfigurationError, TypeError) as e:
@@ -420,9 +427,17 @@ class ProcessingItem(ProcessingItemBase):
420
427
  field_name_condition_expression: ConditionExpression | None = None
421
428
 
422
429
  @classmethod
423
- def from_dict(cls, d: dict[str, Any]) -> "ProcessingItem":
430
+ def from_dict(
431
+ cls,
432
+ d: dict[str, Any],
433
+ allow_external_sources: bool = False,
434
+ ) -> "ProcessingItem":
424
435
  """Instantiate processing item from parsed definition and variables."""
425
- kwargs = super()._base_args_from_dict(d, transformations)
436
+ kwargs = super()._base_args_from_dict(
437
+ d,
438
+ transformations,
439
+ allow_external_sources=allow_external_sources,
440
+ )
426
441
 
427
442
  detection_item_conds = cls._parse_conditions(
428
443
  cast(Mapping[str, Type[ProcessingCondition]], detection_item_conditions),
@@ -762,6 +777,7 @@ class ProcessingPipeline:
762
777
  d: dict[str, Any],
763
778
  allow_template_vars: bool = False,
764
779
  vars_allowed_paths: tuple[str, ...] | None = None,
780
+ allow_external_sources: bool = False,
765
781
  ) -> "ProcessingPipeline":
766
782
  """Instantiate processing pipeline from a parsed processing item description."""
767
783
 
@@ -788,7 +804,9 @@ class ProcessingPipeline:
788
804
  processing_items = list()
789
805
  for i, item in enumerate(items):
790
806
  try:
791
- processing_item = ProcessingItem.from_dict(item)
807
+ processing_item = ProcessingItem.from_dict(
808
+ item, allow_external_sources=allow_external_sources
809
+ )
792
810
  processing_items.append(processing_item)
793
811
  except SigmaConfigurationError as e:
794
812
  raise SigmaConfigurationError(f"Error in processing rule {i + 1}: {str(e)}") from e
@@ -812,6 +830,7 @@ class ProcessingPipeline:
812
830
  for fd in fds:
813
831
  fd.pop("allow_template_vars", None) # Strip untrusted YAML value
814
832
  fd.pop("vars_allowed_paths", None) # Strip untrusted YAML value
833
+ fd.pop("allow_external_sources", None) # Strip untrusted YAML value
815
834
  try:
816
835
  finalizer_type = fd.pop("type")
817
836
  except KeyError:
@@ -860,6 +879,7 @@ class ProcessingPipeline:
860
879
  allow_template_vars: bool = False,
861
880
  vars_allowed_paths: tuple[str, ...] | None = None,
862
881
  source_path: str | None = None,
882
+ allow_external_sources: bool = False,
863
883
  ) -> "ProcessingPipeline":
864
884
  """Convert YAML input string into processing pipeline.
865
885
 
@@ -879,6 +899,7 @@ class ProcessingPipeline:
879
899
  parsed_pipeline,
880
900
  allow_template_vars=allow_template_vars,
881
901
  vars_allowed_paths=vars_allowed_paths,
902
+ allow_external_sources=allow_external_sources,
882
903
  )
883
904
 
884
905
  def apply(self, rule: SigmaRule | SigmaCorrelationRule) -> SigmaRule | SigmaCorrelationRule:
@@ -23,6 +23,12 @@ from sigma.processing.transformations.placeholder import (
23
23
  ValueListPlaceholderTransformation,
24
24
  WildcardPlaceholderTransformation,
25
25
  )
26
+ from sigma.processing.transformations.external import (
27
+ ExternalSourceBaseTransformation,
28
+ FilePlaceholderTransformation,
29
+ HTTPPlaceholderTransformation,
30
+ CommandPlaceholderTransformation,
31
+ )
26
32
  from sigma.processing.transformations.rule import (
27
33
  ChangeLogsourceTransformation,
28
34
  SetCustomAttributeTransformation,
@@ -49,6 +55,9 @@ transformations: dict[str, Type[Transformation]] = {
49
55
  "wildcard_placeholders": WildcardPlaceholderTransformation,
50
56
  "value_placeholders": ValueListPlaceholderTransformation,
51
57
  "query_expression_placeholders": QueryExpressionPlaceholderTransformation,
58
+ "file_placeholders": FilePlaceholderTransformation,
59
+ "http_placeholders": HTTPPlaceholderTransformation,
60
+ "command_placeholders": CommandPlaceholderTransformation,
52
61
  "add_condition": AddConditionTransformation,
53
62
  "change_logsource": ChangeLogsourceTransformation,
54
63
  "add_field": AddFieldTransformation,
@@ -81,6 +90,10 @@ __all__ = [
81
90
  "WildcardPlaceholderTransformation",
82
91
  "ValueListPlaceholderTransformation",
83
92
  "QueryExpressionPlaceholderTransformation",
93
+ "ExternalSourceBaseTransformation",
94
+ "FilePlaceholderTransformation",
95
+ "HTTPPlaceholderTransformation",
96
+ "CommandPlaceholderTransformation",
84
97
  "AddConditionTransformation",
85
98
  "ChangeLogsourceTransformation",
86
99
  "AddFieldTransformation",
@@ -183,6 +183,29 @@ class FieldMappingTransformationBase(DetectionItemTransformation):
183
183
  else:
184
184
  return [field]
185
185
 
186
+ def apply_detection(self, detection: SigmaDetection) -> None:
187
+ for i, detection_item in enumerate(detection.detection_items):
188
+ if isinstance(detection_item, SigmaDetection): # recurse into nested detection items
189
+ self.apply_detection(detection_item)
190
+ else:
191
+ # Save a reference to the value list *before* the transformation so we can
192
+ # detect whether the transformation replaced it with a new list object.
193
+ # Field-only renames keep the same list instance; value-modifying operations
194
+ # (e.g. keyword-to-field mapping that adds wildcards, field-reference remapping)
195
+ # always assign a new list to detection_item.value.
196
+ value_before = detection_item.value
197
+ if (
198
+ self.processing_item is None
199
+ or self.processing_item.match_detection_item(detection_item)
200
+ ) and (r := self.apply_detection_item(detection_item)) is not None:
201
+ if isinstance(r, SigmaDetectionItem) and r.value is not value_before:
202
+ # The value list was replaced, so original_value is no longer in sync
203
+ # with the current values. Disable conversion to prevent to_plain()
204
+ # from producing stale output.
205
+ r.disable_conversion_to_plain()
206
+ detection.detection_items[i] = r
207
+ self.processing_item_applied(r)
208
+
186
209
  def apply(
187
210
  self,
188
211
  rule: SigmaRule | SigmaCorrelationRule,
@@ -0,0 +1,390 @@
1
+ """External data source placeholder transformations.
2
+
3
+ These transformations replace Sigma placeholders with values fetched from external sources
4
+ such as local files, HTTP endpoints, or command output.
5
+
6
+ Security note: Because these transformations access external sources, they are **disabled by
7
+ default** and must be explicitly enabled by passing ``allow_external_sources=True`` when
8
+ loading a :class:`~sigma.processing.pipeline.ProcessingPipeline` or by setting the
9
+ environment variable ``PYSIGMA_ALLOW_EXTERNAL_SOURCES=1``.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import csv
15
+ import io
16
+ import json
17
+ import os
18
+ import re
19
+ import subprocess
20
+ from abc import abstractmethod
21
+ from dataclasses import dataclass, field
22
+ from typing import Any, Iterable
23
+
24
+ import yaml
25
+
26
+ from sigma.exceptions import SigmaConfigurationError, SigmaSecurityError, SigmaValueError
27
+ from sigma.processing.transformations.placeholder import BasePlaceholderTransformation
28
+ from sigma.types import Placeholder, SigmaString
29
+
30
+ PYSIGMA_ALLOW_EXTERNAL_SOURCES_ENV = "PYSIGMA_ALLOW_EXTERNAL_SOURCES"
31
+
32
+ # Default cap on the amount of data accepted from an external source (10 MiB).
33
+ DEFAULT_MAX_RESPONSE_BYTES = 10 * 1024 * 1024
34
+
35
+ # Data formats understood by the external source parsers.
36
+ SUPPORTED_FORMATS = ("plaintext", "csv", "json", "yaml")
37
+
38
+
39
+ @dataclass
40
+ class ExternalSourceBaseTransformation(BasePlaceholderTransformation):
41
+ """Base class for placeholder transformations that fetch replacement values from
42
+ an external source (file, HTTP, command).
43
+
44
+ **Supported formats** (controlled by the *format* parameter):
45
+
46
+ * ``"plaintext"`` — one value per line; an optional *filter* regex must
47
+ match for a line to be included.
48
+ * ``"csv"`` — CSV data; *csv_column* selects the column (column header
49
+ name **or** 0-based integer index); *csv_has_header* (default ``True``)
50
+ controls whether the first row is treated as a header; an optional
51
+ *filter* regex is applied to each extracted cell value.
52
+ * ``"json"`` — JSON data; *jq_expression* selects the value(s).
53
+ * ``"yaml"`` — YAML data; *jq_expression* selects the value(s).
54
+
55
+ **Security**: external-source transformations are disabled by default.
56
+ Enable them by passing ``allow_external_sources=True`` when loading the
57
+ pipeline or by setting the environment variable
58
+ ``PYSIGMA_ALLOW_EXTERNAL_SOURCES=1``.
59
+ """
60
+
61
+ format: str = "plaintext"
62
+ filter: str | None = None
63
+ csv_column: str | int | None = None
64
+ csv_has_header: bool = True
65
+ jq_expression: str | None = None
66
+ allow_external_sources: bool = False
67
+
68
+ _values_cache: list[str] | None = field(init=False, default=None, repr=False, compare=False)
69
+
70
+ def __post_init__(self) -> None:
71
+ if self.format not in SUPPORTED_FORMATS:
72
+ raise SigmaConfigurationError(
73
+ f"Unknown external source format '{self.format}'. "
74
+ f"Supported formats: {', '.join(SUPPORTED_FORMATS)}."
75
+ )
76
+ super().__post_init__()
77
+
78
+ def _external_sources_allowed(self) -> bool:
79
+ """Return *True* if external data sources are permitted."""
80
+ if self.allow_external_sources:
81
+ return True
82
+ return os.environ.get(PYSIGMA_ALLOW_EXTERNAL_SOURCES_ENV, "").lower() in (
83
+ "1",
84
+ "true",
85
+ )
86
+
87
+ @abstractmethod
88
+ def _fetch_data(self) -> str:
89
+ """Fetch raw text data from the external source.
90
+
91
+ Subclasses **must** override this method.
92
+ """
93
+
94
+ def _get_values(self) -> list[str]:
95
+ """Return the list of replacement values, fetching and caching them if necessary.
96
+
97
+ Raises :class:`~sigma.exceptions.SigmaSecurityError` when external
98
+ data sources are not enabled.
99
+ """
100
+ if self._values_cache is not None:
101
+ return self._values_cache
102
+
103
+ if not self._external_sources_allowed():
104
+ raise SigmaSecurityError(
105
+ "External data source transformations are disabled by default for security "
106
+ "reasons. Enable them with allow_external_sources=True when loading the "
107
+ "pipeline or by setting the environment variable "
108
+ f"{PYSIGMA_ALLOW_EXTERNAL_SOURCES_ENV}=1."
109
+ )
110
+
111
+ data = self._fetch_data()
112
+ self._values_cache = self._parse_data(data)
113
+ return self._values_cache
114
+
115
+ def _parse_data(self, data: str) -> list[str]:
116
+ """Dispatch to the appropriate format parser."""
117
+ if self.format == "plaintext":
118
+ return self._parse_plaintext(data)
119
+ elif self.format == "csv":
120
+ return self._parse_csv(data)
121
+ elif self.format == "json":
122
+ return self._parse_json(data)
123
+ elif self.format == "yaml":
124
+ return self._parse_yaml_data(data)
125
+ else:
126
+ raise SigmaConfigurationError(
127
+ f"Unknown external source format '{self.format}'. "
128
+ f"Supported formats: {', '.join(SUPPORTED_FORMATS)}."
129
+ )
130
+
131
+ def _parse_plaintext(self, data: str) -> list[str]:
132
+ values = [line for line in (line.strip() for line in data.splitlines()) if line]
133
+ if self.filter:
134
+ pattern = re.compile(self.filter)
135
+ values = [v for v in values if pattern.search(v)]
136
+ return values
137
+
138
+ def _parse_csv(self, data: str) -> list[str]:
139
+ if self.csv_column is None:
140
+ raise SigmaConfigurationError("'csv_column' must be specified when format is 'csv'")
141
+
142
+ values: list[str] = []
143
+
144
+ if isinstance(self.csv_column, str):
145
+ if not self.csv_has_header:
146
+ raise SigmaConfigurationError(
147
+ "CSV column referenced by name requires a header row (csv_has_header=True)"
148
+ )
149
+ reader_dict = csv.DictReader(io.StringIO(data))
150
+ for row in reader_dict:
151
+ if self.csv_column not in row:
152
+ raise SigmaConfigurationError(
153
+ f"CSV column '{self.csv_column}' not found in data"
154
+ )
155
+ val = row[self.csv_column]
156
+ if val is not None:
157
+ values.append(val)
158
+ else:
159
+ col_idx = self.csv_column
160
+ rows = iter(csv.reader(io.StringIO(data)))
161
+ if self.csv_has_header:
162
+ next(rows, None) # discard header row for consistency with name-based mode
163
+ for csv_row in rows:
164
+ if col_idx < len(csv_row):
165
+ values.append(csv_row[col_idx])
166
+
167
+ if self.filter:
168
+ pattern = re.compile(self.filter)
169
+ values = [v for v in values if pattern.search(v)]
170
+ return values
171
+
172
+ def _parse_json(self, data: str) -> list[str]:
173
+ import jq # type: ignore[import-not-found]
174
+
175
+ if self.jq_expression is None:
176
+ raise SigmaConfigurationError("'jq_expression' must be specified when format is 'json'")
177
+ try:
178
+ parsed = json.loads(data)
179
+ except json.JSONDecodeError as e:
180
+ raise SigmaValueError(f"Failed to parse JSON data: {e}") from e
181
+ try:
182
+ result = jq.all(self.jq_expression, parsed)
183
+ except ValueError as e:
184
+ raise SigmaConfigurationError(f"Invalid jq expression: {e}") from e
185
+ return self._jq_results_to_values(result)
186
+
187
+ def _parse_yaml_data(self, data: str) -> list[str]:
188
+ import jq
189
+
190
+ if self.jq_expression is None:
191
+ raise SigmaConfigurationError("'jq_expression' must be specified when format is 'yaml'")
192
+ try:
193
+ parsed = yaml.safe_load(data)
194
+ except yaml.YAMLError as e:
195
+ raise SigmaValueError(f"Failed to parse YAML data: {e}") from e
196
+ try:
197
+ result = jq.all(self.jq_expression, parsed)
198
+ except ValueError as e:
199
+ raise SigmaConfigurationError(f"Invalid jq expression: {e}") from e
200
+ return self._jq_results_to_values(result)
201
+
202
+ @staticmethod
203
+ def _jq_results_to_values(result: Iterable[Any]) -> list[str]:
204
+ """Convert jq output into scalar placeholder values.
205
+
206
+ Each placeholder replacement becomes an individual field match value,
207
+ so a jq result must be a scalar. An expression that yields an array or
208
+ object is rejected with guidance to project to scalars (e.g. use
209
+ ``.items[]`` instead of ``.items``). ``None`` results are skipped.
210
+ """
211
+ values: list[str] = []
212
+ for v in result:
213
+ if v is None:
214
+ continue
215
+ if isinstance(v, (dict, list)):
216
+ raise SigmaConfigurationError(
217
+ "jq_expression must select scalar values for placeholder "
218
+ f"replacement, but a {type(v).__name__} was returned; project "
219
+ "to scalars (e.g. '.items[]' instead of '.items')"
220
+ )
221
+ values.append(str(v))
222
+ return values
223
+
224
+ def placeholder_replacements(self, p: Placeholder) -> Iterable[SigmaString]:
225
+ return [SigmaString(v) for v in self._get_values()]
226
+
227
+
228
+ @dataclass
229
+ class FilePlaceholderTransformation(ExternalSourceBaseTransformation):
230
+ """Replace placeholders with values read from a local file.
231
+
232
+ Parameters:
233
+ * **path** — path to the file (required)
234
+ * **format** — data format: ``"plaintext"`` (default), ``"csv"``, ``"json"``, ``"yaml"``
235
+ * **filter** — optional regex that each value must match (plaintext/csv)
236
+ * **csv_column** — column name (str) or 0-based index (int) for CSV format
237
+ * **csv_has_header** — whether the first CSV row is a header (default ``True``)
238
+ * **jq_expression** — path expression for JSON/YAML formats (e.g. ``.items[]``)
239
+ * **include** / **exclude** — placeholder name lists (from
240
+ :class:`~sigma.processing.transformations.placeholder.BasePlaceholderTransformation`)
241
+ """
242
+
243
+ path: str = ""
244
+
245
+ def __post_init__(self) -> None:
246
+ if not self.path:
247
+ raise SigmaConfigurationError(
248
+ "FilePlaceholderTransformation requires a non-empty 'path'"
249
+ )
250
+ super().__post_init__()
251
+
252
+ def _fetch_data(self) -> str:
253
+ try:
254
+ with open(self.path, encoding="utf-8") as fh:
255
+ return fh.read()
256
+ except OSError as e:
257
+ raise SigmaValueError(
258
+ f"FilePlaceholderTransformation: failed to read '{self.path}': {e}"
259
+ ) from e
260
+
261
+
262
+ @dataclass
263
+ class HTTPPlaceholderTransformation(ExternalSourceBaseTransformation):
264
+ """Replace placeholders with values fetched from an HTTP(S) endpoint.
265
+
266
+ Parameters:
267
+ * **url** — URL to fetch (required)
268
+ * **method** — HTTP method (default: ``"GET"``)
269
+ * **timeout** — request timeout in seconds (default: 10)
270
+ * **headers** — optional dict of custom HTTP request headers
271
+ * **params** — optional dict of URL query parameters
272
+ * **form_data** — optional dict to send as a form-encoded request body
273
+ (``application/x-www-form-urlencoded``)
274
+ * **json_body** — optional dict to send as a JSON request body
275
+ (``application/json``)
276
+ * **max_body_size** — maximum response body size in bytes; the fetch is
277
+ aborted with an error once this many bytes have been read (default 10 MiB)
278
+ * **format** — data format: ``"plaintext"`` (default), ``"csv"``, ``"json"``, ``"yaml"``
279
+ * **filter** — optional regex that each value must match (plaintext/csv)
280
+ * **csv_column** — column name (str) or 0-based index (int) for CSV format
281
+ * **csv_has_header** — whether the first CSV row is a header (default ``True``)
282
+ * **jq_expression** — path expression for JSON/YAML formats
283
+ * **include** / **exclude** — placeholder name lists
284
+ """
285
+
286
+ url: str = ""
287
+ method: str = "GET"
288
+ timeout: int = 10
289
+ headers: dict[str, str] | None = None
290
+ params: dict[str, str] | None = None
291
+ form_data: dict[str, Any] | None = None
292
+ json_body: dict[str, Any] | None = None
293
+ max_body_size: int = DEFAULT_MAX_RESPONSE_BYTES
294
+
295
+ def __post_init__(self) -> None:
296
+ if not self.url:
297
+ raise SigmaConfigurationError(
298
+ "HTTPPlaceholderTransformation requires a non-empty 'url'"
299
+ )
300
+ super().__post_init__()
301
+
302
+ def _fetch_data(self) -> str:
303
+ import requests
304
+
305
+ try:
306
+ with requests.request(
307
+ method=self.method,
308
+ url=self.url,
309
+ timeout=self.timeout,
310
+ headers=self.headers,
311
+ params=self.params,
312
+ data=self.form_data,
313
+ json=self.json_body,
314
+ stream=True,
315
+ ) as response:
316
+ response.raise_for_status()
317
+ content = bytearray()
318
+ for chunk in response.iter_content(chunk_size=8192):
319
+ content.extend(chunk)
320
+ if len(content) > self.max_body_size:
321
+ raise SigmaValueError(
322
+ f"HTTPPlaceholderTransformation: response from '{self.url}' "
323
+ f"exceeds max_body_size ({self.max_body_size} bytes)"
324
+ )
325
+ encoding = response.encoding or response.apparent_encoding or "utf-8"
326
+ return content.decode(encoding, errors="replace")
327
+ except requests.RequestException as e:
328
+ raise SigmaValueError(
329
+ f"HTTPPlaceholderTransformation: failed to fetch '{self.url}': {e}"
330
+ ) from e
331
+
332
+
333
+ @dataclass
334
+ class CommandPlaceholderTransformation(ExternalSourceBaseTransformation):
335
+ """Replace placeholders with the stdout output of a shell command.
336
+
337
+ Parameters:
338
+ * **cmd** — command string (passed to ``/bin/sh -c``) or a list of
339
+ arguments (required)
340
+ * **timeout** — maximum execution time in seconds (default: 30)
341
+ * **max_stdout** — maximum accepted stdout size in bytes; output larger
342
+ than this is rejected with an error (default 10 MiB)
343
+ * **format** — data format: ``"plaintext"`` (default), ``"csv"``, ``"json"``, ``"yaml"``
344
+ * **filter** — optional regex that each value must match (plaintext/csv)
345
+ * **csv_column** — column name (str) or 0-based index (int) for CSV format
346
+ * **csv_has_header** — whether the first CSV row is a header (default ``True``)
347
+ * **jq_expression** — path expression for JSON/YAML formats
348
+ * **include** / **exclude** — placeholder name lists
349
+ """
350
+
351
+ cmd: str | list[str] = ""
352
+ timeout: int = 30
353
+ max_stdout: int = DEFAULT_MAX_RESPONSE_BYTES
354
+
355
+ def __post_init__(self) -> None:
356
+ if not self.cmd:
357
+ raise SigmaConfigurationError(
358
+ "CommandPlaceholderTransformation requires a non-empty 'cmd'"
359
+ )
360
+ super().__post_init__()
361
+
362
+ def _fetch_data(self) -> str:
363
+ try:
364
+ result = subprocess.run(
365
+ self.cmd,
366
+ shell=isinstance(self.cmd, str),
367
+ capture_output=True,
368
+ text=True,
369
+ timeout=self.timeout,
370
+ )
371
+ except subprocess.TimeoutExpired as e:
372
+ raise SigmaValueError(
373
+ f"CommandPlaceholderTransformation: command timed out: {e}"
374
+ ) from e
375
+ except OSError as e:
376
+ raise SigmaValueError(
377
+ f"CommandPlaceholderTransformation: failed to execute command: {e}"
378
+ ) from e
379
+
380
+ if result.returncode != 0:
381
+ raise SigmaValueError(
382
+ f"CommandPlaceholderTransformation: command exited with code "
383
+ f"{result.returncode}: {result.stderr.strip()}"
384
+ )
385
+ if len(result.stdout.encode("utf-8", errors="replace")) > self.max_stdout:
386
+ raise SigmaValueError(
387
+ f"CommandPlaceholderTransformation: command output exceeds "
388
+ f"max_stdout ({self.max_stdout} bytes)"
389
+ )
390
+ return result.stdout
@@ -25,7 +25,7 @@ class SigmaYAMLLoader(yaml.CSafeLoader):
25
25
  def construct_mapping(self, node: yaml.MappingNode, deep: bool = False) -> dict[Any, Any]:
26
26
  keys = set()
27
27
  for k, v in node.value:
28
- key = self.construct_object(k, deep=deep) # type: ignore
28
+ key = self.construct_object(k, deep=deep)
29
29
  if key in keys:
30
30
  raise yaml.error.YAMLError("Duplicate key '{k}'")
31
31
  else:
@@ -12,6 +12,7 @@ from sigma.conditions import (
12
12
  ConditionAND,
13
13
  ConditionFieldEqualsValueExpression,
14
14
  ConditionItem,
15
+ ConditionNOT,
15
16
  ConditionOR,
16
17
  ConditionValueExpression,
17
18
  ParentChainMixin,
@@ -67,6 +68,7 @@ class SigmaDetectionItem(ProcessingItemTrackingMixin, ParentChainMixin):
67
68
  modifiers: list[type[SigmaModifier[Any, Any]]]
68
69
  value: list[SigmaType]
69
70
  value_linking: type[ConditionAND | ConditionOR] = ConditionOR
71
+ negated: bool = False
70
72
  source: SigmaRuleLocation | None = dataclasses.field(default=None, compare=False)
71
73
  original_value: list[SigmaType] | None = dataclasses.field(
72
74
  init=False, repr=False, hash=False, compare=False
@@ -241,16 +243,16 @@ class SigmaDetectionItem(ProcessingItemTrackingMixin, ParentChainMixin):
241
243
  "Null value must be bound to a field", source=self.source
242
244
  )
243
245
  else:
244
- return ConditionFieldEqualsValueExpression(self.field, SigmaNull()).postprocess(
246
+ cond = ConditionFieldEqualsValueExpression(self.field, SigmaNull()).postprocess(
245
247
  detections, self, self.source
246
248
  )
247
- if len(self.value) == 1: # single value: return key/value or value-only expression
249
+ elif len(self.value) == 1: # single value: return key/value or value-only expression
248
250
  if self.field is None:
249
- return ConditionValueExpression(self.value[0]).postprocess(
251
+ cond = ConditionValueExpression(self.value[0]).postprocess(
250
252
  detections, self, self.source
251
253
  )
252
254
  else:
253
- return ConditionFieldEqualsValueExpression(self.field, self.value[0]).postprocess(
255
+ cond = ConditionFieldEqualsValueExpression(self.field, self.value[0]).postprocess(
254
256
  detections, self, self.source
255
257
  )
256
258
  else: # more than one value, return logically linked values or an "in" expression
@@ -261,7 +263,16 @@ class SigmaDetectionItem(ProcessingItemTrackingMixin, ParentChainMixin):
261
263
  [ConditionFieldEqualsValueExpression(self.field, v) for v in self.value]
262
264
  )
263
265
  cond.postprocess(detections, parent, self.source)
264
- return cond
266
+
267
+ if self.negated:
268
+ if cond is None:
269
+ return None
270
+ not_cond = ConditionNOT([cond])
271
+ not_cond.parent = parent
272
+ not_cond.source = self.source
273
+ cond.parent = not_cond
274
+ return not_cond
275
+ return cond
265
276
 
266
277
  def is_keyword(self: Self) -> bool:
267
278
  """Returns True if detection item is a keyword detection without field reference."""
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes