malwareDetector 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- malware_detector/__init__.py +37 -0
- malware_detector/cli.py +146 -0
- malware_detector/config.py +135 -0
- malware_detector/detector.py +128 -0
- malware_detector/logging.py +85 -0
- malware_detector/py.typed +0 -0
- malwaredetector-0.2.0.dist-info/METADATA +251 -0
- malwaredetector-0.2.0.dist-info/RECORD +10 -0
- malwaredetector-0.2.0.dist-info/WHEEL +4 -0
- malwaredetector-0.2.0.dist-info/licenses/LICENCE.txt +21 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""Malware Detector Framework.
|
|
2
|
+
|
|
3
|
+
A base framework for building malware detectors with:
|
|
4
|
+
- Pydantic v2 configuration system
|
|
5
|
+
- Typer-based extensible CLI
|
|
6
|
+
- Structured logging with structlog
|
|
7
|
+
- ABC-based detector with customizable pipeline
|
|
8
|
+
|
|
9
|
+
Source code: https://github.com/louiskyee/malwareDetector.git
|
|
10
|
+
Documentation: https://github.com/louiskyee/malwareDetector/wiki
|
|
11
|
+
PyPI: https://pypi.org/project/malware-detector/
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from importlib.metadata import version
|
|
15
|
+
|
|
16
|
+
from .cli import create_cli
|
|
17
|
+
from .config import BaseDetectorConfig, FolderConfig, PathConfig
|
|
18
|
+
from .detector import BaseDetector
|
|
19
|
+
from .logging import configure_logging, get_logger
|
|
20
|
+
|
|
21
|
+
__version__ = version(__package__ or __name__)
|
|
22
|
+
|
|
23
|
+
__all__ = [
|
|
24
|
+
# Version
|
|
25
|
+
"__version__",
|
|
26
|
+
# Config
|
|
27
|
+
"BaseDetectorConfig",
|
|
28
|
+
"PathConfig",
|
|
29
|
+
"FolderConfig",
|
|
30
|
+
# Detector
|
|
31
|
+
"BaseDetector",
|
|
32
|
+
# CLI
|
|
33
|
+
"create_cli",
|
|
34
|
+
# Logging
|
|
35
|
+
"configure_logging",
|
|
36
|
+
"get_logger",
|
|
37
|
+
]
|
malware_detector/cli.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
"""CLI factory using Typer for extensible command-line interfaces.
|
|
2
|
+
|
|
3
|
+
This module provides a factory function to create CLI applications for
|
|
4
|
+
any BaseDetector subclass. The generated CLI includes common commands
|
|
5
|
+
like `run` and `init`, and can be extended with custom commands.
|
|
6
|
+
|
|
7
|
+
Typical usage:
|
|
8
|
+
from malware_detector import create_cli
|
|
9
|
+
from my_detector import MyDetector
|
|
10
|
+
|
|
11
|
+
app = create_cli(MyDetector)
|
|
12
|
+
|
|
13
|
+
# Add custom commands
|
|
14
|
+
@app.command()
|
|
15
|
+
def evaluate():
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
if __name__ == "__main__":
|
|
19
|
+
app()
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Annotated, Literal
|
|
24
|
+
|
|
25
|
+
import typer
|
|
26
|
+
|
|
27
|
+
from .config import BaseDetectorConfig
|
|
28
|
+
from .detector import BaseDetector
|
|
29
|
+
from .logging import configure_logging
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def config_to_toml(config: BaseDetectorConfig) -> str:
|
|
33
|
+
"""Convert configuration to TOML format string.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
config: Configuration instance to serialize.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
TOML-formatted configuration string.
|
|
40
|
+
"""
|
|
41
|
+
lines: list[str] = []
|
|
42
|
+
|
|
43
|
+
# Path section
|
|
44
|
+
lines.append("[path]")
|
|
45
|
+
for name, value in config.path.model_dump().items():
|
|
46
|
+
lines.append(f'{name} = "{value}"')
|
|
47
|
+
lines.append("")
|
|
48
|
+
|
|
49
|
+
# Folder section
|
|
50
|
+
lines.append("[folder]")
|
|
51
|
+
for name, value in config.folder.model_dump().items():
|
|
52
|
+
lines.append(f'{name} = "{value}"')
|
|
53
|
+
lines.append("")
|
|
54
|
+
|
|
55
|
+
# Top-level fields
|
|
56
|
+
lines.append(f"classify = {str(config.classify).lower()}")
|
|
57
|
+
|
|
58
|
+
return "\n".join(lines)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def create_cli(
|
|
62
|
+
detector_class: type[BaseDetector],
|
|
63
|
+
config_class: type[BaseDetectorConfig] | None = None,
|
|
64
|
+
) -> typer.Typer:
|
|
65
|
+
"""Create a CLI application for a detector class.
|
|
66
|
+
|
|
67
|
+
This factory function generates a Typer CLI with standard commands
|
|
68
|
+
for running the detector pipeline and initializing configuration.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
detector_class: The detector class to create CLI for.
|
|
72
|
+
config_class: Optional configuration class. If None, uses
|
|
73
|
+
detector_class.config_class.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Configured Typer application.
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
app = create_cli(MyDetector)
|
|
80
|
+
|
|
81
|
+
@app.command()
|
|
82
|
+
def custom_command():
|
|
83
|
+
'''Add custom commands to the CLI.'''
|
|
84
|
+
...
|
|
85
|
+
|
|
86
|
+
if __name__ == "__main__":
|
|
87
|
+
app()
|
|
88
|
+
"""
|
|
89
|
+
app = typer.Typer(
|
|
90
|
+
name=detector_class.__name__,
|
|
91
|
+
help=detector_class.__doc__ or f"CLI for {detector_class.__name__}",
|
|
92
|
+
)
|
|
93
|
+
resolved_config_class = config_class or detector_class.config_class
|
|
94
|
+
|
|
95
|
+
@app.callback()
|
|
96
|
+
def main(
|
|
97
|
+
log_level: Annotated[
|
|
98
|
+
str,
|
|
99
|
+
typer.Option("--log-level", "-l", help="Log level"),
|
|
100
|
+
] = "INFO",
|
|
101
|
+
log_format: Annotated[
|
|
102
|
+
Literal["console", "json"],
|
|
103
|
+
typer.Option("--log-format", help="Log format: console or json"),
|
|
104
|
+
] = "console",
|
|
105
|
+
) -> None:
|
|
106
|
+
"""Configure global options for the CLI."""
|
|
107
|
+
configure_logging(level=log_level, format=log_format)
|
|
108
|
+
|
|
109
|
+
@app.command()
|
|
110
|
+
def run(
|
|
111
|
+
config_path: Annotated[
|
|
112
|
+
Path | None,
|
|
113
|
+
typer.Option("--config", "-c", help="Config file path"),
|
|
114
|
+
] = None,
|
|
115
|
+
stages: Annotated[
|
|
116
|
+
str | None,
|
|
117
|
+
typer.Option("--stages", "-s", help="Comma-separated stages to run"),
|
|
118
|
+
] = None,
|
|
119
|
+
) -> None:
|
|
120
|
+
"""Run the detector pipeline."""
|
|
121
|
+
if config_path and config_path.exists():
|
|
122
|
+
config = resolved_config_class.model_validate_json(
|
|
123
|
+
config_path.read_text()
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
config = resolved_config_class()
|
|
127
|
+
|
|
128
|
+
stage_list = stages.split(",") if stages else None
|
|
129
|
+
|
|
130
|
+
detector = detector_class(config)
|
|
131
|
+
detector.setup()
|
|
132
|
+
detector.run(stage_list)
|
|
133
|
+
|
|
134
|
+
@app.command()
|
|
135
|
+
def init(
|
|
136
|
+
output: Annotated[
|
|
137
|
+
Path,
|
|
138
|
+
typer.Option("--output", "-o", help="Output config file path"),
|
|
139
|
+
] = Path("config.toml"),
|
|
140
|
+
) -> None:
|
|
141
|
+
"""Generate default configuration file."""
|
|
142
|
+
config = resolved_config_class()
|
|
143
|
+
output.write_text(config_to_toml(config))
|
|
144
|
+
typer.echo(f"Config written to {output}")
|
|
145
|
+
|
|
146
|
+
return app
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Configuration system using Pydantic v2 and pydantic-settings.
|
|
2
|
+
|
|
3
|
+
This module provides a hierarchical configuration system that supports:
|
|
4
|
+
- Default values
|
|
5
|
+
- TOML configuration files
|
|
6
|
+
- Environment variables
|
|
7
|
+
- Programmatic overrides
|
|
8
|
+
|
|
9
|
+
Configuration priority (later overrides earlier):
|
|
10
|
+
1. Default values (defined in models)
|
|
11
|
+
2. Config file (config.toml)
|
|
12
|
+
3. Environment variables (MALWARE_DETECTOR_*)
|
|
13
|
+
4. Direct assignment
|
|
14
|
+
|
|
15
|
+
Typical usage:
|
|
16
|
+
from malware_detector.config import BaseDetectorConfig
|
|
17
|
+
|
|
18
|
+
# Load with defaults
|
|
19
|
+
config = BaseDetectorConfig()
|
|
20
|
+
|
|
21
|
+
# Load from file
|
|
22
|
+
config = BaseDetectorConfig.load(Path("config.toml"))
|
|
23
|
+
|
|
24
|
+
# Subclass for custom configs
|
|
25
|
+
class MyConfig(BaseDetectorConfig):
|
|
26
|
+
batch_size: int = 32
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import Self
|
|
31
|
+
|
|
32
|
+
from pydantic import BaseModel, ConfigDict
|
|
33
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class PathConfig(BaseModel):
|
|
37
|
+
"""Path configuration for input/output files.
|
|
38
|
+
|
|
39
|
+
Attributes:
|
|
40
|
+
input: Path to input dataset directory or file.
|
|
41
|
+
output: Path to output prediction file.
|
|
42
|
+
config: Path to configuration file.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
model_config = ConfigDict(extra="allow")
|
|
46
|
+
|
|
47
|
+
input: Path = Path("./Dataset/program")
|
|
48
|
+
output: Path = Path("./Predict/predict.json")
|
|
49
|
+
config: Path = Path("./config.toml")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class FolderConfig(BaseModel):
|
|
53
|
+
"""Folder configuration for data storage directories.
|
|
54
|
+
|
|
55
|
+
Attributes:
|
|
56
|
+
dataset: Directory for raw dataset files.
|
|
57
|
+
feature: Directory for extracted features.
|
|
58
|
+
vectorize: Directory for vectorized data.
|
|
59
|
+
model: Directory for trained models.
|
|
60
|
+
predict: Directory for prediction outputs.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
model_config = ConfigDict(extra="allow")
|
|
64
|
+
|
|
65
|
+
dataset: Path = Path("./Dataset/")
|
|
66
|
+
feature: Path = Path("./Feature/")
|
|
67
|
+
vectorize: Path = Path("./Vectorize/")
|
|
68
|
+
model: Path = Path("./Model/")
|
|
69
|
+
predict: Path = Path("./Predict/")
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def all_folders(self) -> list[Path]:
|
|
73
|
+
"""Return all folder paths defined in this config.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
List of all Path-typed field values.
|
|
77
|
+
"""
|
|
78
|
+
return [
|
|
79
|
+
getattr(self, name)
|
|
80
|
+
for name, field_info in type(self).model_fields.items()
|
|
81
|
+
if field_info.annotation is Path
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class BaseDetectorConfig(BaseSettings):
|
|
86
|
+
"""Base configuration for malware detectors.
|
|
87
|
+
|
|
88
|
+
This class can be subclassed to add detector-specific configuration
|
|
89
|
+
options. All fields support loading from environment variables with
|
|
90
|
+
the MALWARE_DETECTOR_ prefix.
|
|
91
|
+
|
|
92
|
+
Attributes:
|
|
93
|
+
path: Path configuration for input/output files.
|
|
94
|
+
folder: Folder configuration for data directories.
|
|
95
|
+
classify: Whether to perform family classification (vs detection).
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
class MyDetectorConfig(BaseDetectorConfig):
|
|
99
|
+
model_config = SettingsConfigDict(
|
|
100
|
+
env_prefix="MY_DETECTOR_",
|
|
101
|
+
toml_file="my_config.toml",
|
|
102
|
+
)
|
|
103
|
+
batch_size: int = 32
|
|
104
|
+
"""
|
|
105
|
+
|
|
106
|
+
model_config = SettingsConfigDict(
|
|
107
|
+
env_prefix="MALWARE_DETECTOR_",
|
|
108
|
+
env_nested_delimiter="__",
|
|
109
|
+
extra="allow",
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
path: PathConfig = PathConfig()
|
|
113
|
+
folder: FolderConfig = FolderConfig()
|
|
114
|
+
classify: bool = False
|
|
115
|
+
|
|
116
|
+
def save(self, path: Path | None = None) -> None:
|
|
117
|
+
"""Save configuration to a JSON file.
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
path: Output file path. Defaults to self.path.config.
|
|
121
|
+
"""
|
|
122
|
+
save_path = path or self.path.config
|
|
123
|
+
save_path.write_text(self.model_dump_json(indent=2))
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def load(cls, path: Path) -> Self:
|
|
127
|
+
"""Load configuration from a JSON file.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
path: Path to the configuration file.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Loaded configuration instance.
|
|
134
|
+
"""
|
|
135
|
+
return cls.model_validate_json(path.read_text())
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Base detector class for malware detection pipelines.
|
|
2
|
+
|
|
3
|
+
This module provides the base class for all malware detectors.
|
|
4
|
+
Subclasses should implement stage methods matching the names in
|
|
5
|
+
default_stages.
|
|
6
|
+
|
|
7
|
+
Typical usage:
|
|
8
|
+
from malware_detector import BaseDetector, BaseDetectorConfig
|
|
9
|
+
|
|
10
|
+
class MyDetector(BaseDetector):
|
|
11
|
+
def stage_extract(self) -> Any:
|
|
12
|
+
# Extract features from dataset
|
|
13
|
+
...
|
|
14
|
+
|
|
15
|
+
def stage_vectorize(self) -> Any:
|
|
16
|
+
# Convert features to vectors
|
|
17
|
+
...
|
|
18
|
+
|
|
19
|
+
def stage_train(self) -> Any:
|
|
20
|
+
# Train the model
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
def stage_predict(self) -> Any:
|
|
24
|
+
# Run predictions
|
|
25
|
+
...
|
|
26
|
+
|
|
27
|
+
detector = MyDetector()
|
|
28
|
+
detector.setup()
|
|
29
|
+
results = detector.run()
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
import structlog
|
|
35
|
+
|
|
36
|
+
from .config import BaseDetectorConfig
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class BaseDetector:
|
|
40
|
+
"""Base class for malware detectors.
|
|
41
|
+
|
|
42
|
+
This class defines the interface and common functionality for all
|
|
43
|
+
malware detectors. Subclasses should implement stage methods matching
|
|
44
|
+
the names in default_stages (e.g., stage_extract for "extract").
|
|
45
|
+
|
|
46
|
+
For standard detectors, implement: stage_extract, stage_vectorize,
|
|
47
|
+
stage_train, and stage_predict.
|
|
48
|
+
|
|
49
|
+
For custom pipelines, override default_stages and implement matching
|
|
50
|
+
stage methods.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
default_stages: List of stage names to run by default.
|
|
54
|
+
config_class: Configuration class to use for this detector.
|
|
55
|
+
config: The loaded configuration instance.
|
|
56
|
+
log: Structured logger bound to this detector.
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
class MyDetector(BaseDetector):
|
|
60
|
+
config_class = MyConfig
|
|
61
|
+
|
|
62
|
+
def stage_extract(self) -> Path:
|
|
63
|
+
self.log.info("extracting_features")
|
|
64
|
+
# Implementation...
|
|
65
|
+
return self.config.folder.feature
|
|
66
|
+
|
|
67
|
+
def stage_vectorize(self) -> Any:
|
|
68
|
+
...
|
|
69
|
+
|
|
70
|
+
def stage_train(self) -> Any:
|
|
71
|
+
...
|
|
72
|
+
|
|
73
|
+
def stage_predict(self) -> Any:
|
|
74
|
+
...
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
default_stages: list[str] = ["extract", "vectorize", "train", "predict"]
|
|
78
|
+
config_class: type[BaseDetectorConfig] = BaseDetectorConfig
|
|
79
|
+
|
|
80
|
+
def __init__(self, config: BaseDetectorConfig | None = None) -> None:
|
|
81
|
+
"""Initialize the detector with configuration.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
config: Configuration instance. If None, creates default config
|
|
85
|
+
using config_class.
|
|
86
|
+
"""
|
|
87
|
+
self.config = config or self.config_class()
|
|
88
|
+
self.log = structlog.get_logger().bind(
|
|
89
|
+
detector=self.__class__.__name__,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def setup(self) -> None:
|
|
93
|
+
"""Create all required directories from folder configuration."""
|
|
94
|
+
for folder in self.config.folder.all_folders:
|
|
95
|
+
folder.mkdir(parents=True, exist_ok=True)
|
|
96
|
+
self.log.debug("folder_created", path=str(folder))
|
|
97
|
+
|
|
98
|
+
def run(self, stages: list[str] | None = None) -> dict[str, Any]:
|
|
99
|
+
"""Execute the detection pipeline.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
stages: List of stage names to run. If None, runs all
|
|
103
|
+
default_stages.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Dictionary mapping stage names to their return values.
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
NotImplementedError: If a requested stage method doesn't exist.
|
|
110
|
+
"""
|
|
111
|
+
stages = stages or self.default_stages
|
|
112
|
+
results: dict[str, Any] = {}
|
|
113
|
+
|
|
114
|
+
self.log.info("pipeline_started", stages=stages)
|
|
115
|
+
|
|
116
|
+
for stage in stages:
|
|
117
|
+
method = getattr(self, f"stage_{stage}", None)
|
|
118
|
+
if method is None:
|
|
119
|
+
self.log.error("stage_not_found", stage=stage)
|
|
120
|
+
raise NotImplementedError(f"Stage '{stage}' not implemented")
|
|
121
|
+
|
|
122
|
+
self.log.info("stage_started", stage=stage)
|
|
123
|
+
result = method()
|
|
124
|
+
results[stage] = result
|
|
125
|
+
self.log.info("stage_completed", stage=stage)
|
|
126
|
+
|
|
127
|
+
self.log.info("pipeline_completed", stages=stages)
|
|
128
|
+
return results
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Structured logging configuration using structlog.
|
|
2
|
+
|
|
3
|
+
This module provides a unified logging interface with support for both
|
|
4
|
+
console (human-readable) and JSON (machine-readable) output formats.
|
|
5
|
+
|
|
6
|
+
Typical usage:
|
|
7
|
+
from malware_detector.logging import configure_logging, get_logger
|
|
8
|
+
|
|
9
|
+
configure_logging(level="INFO", format="console")
|
|
10
|
+
log = get_logger(__name__)
|
|
11
|
+
log.info("event_name", key="value")
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import sys
|
|
16
|
+
from typing import Literal
|
|
17
|
+
|
|
18
|
+
import structlog
|
|
19
|
+
from structlog.types import Processor
|
|
20
|
+
|
|
21
|
+
# Map string level names to logging module constants
|
|
22
|
+
_LOG_LEVELS: dict[str, int] = {
|
|
23
|
+
"DEBUG": logging.DEBUG,
|
|
24
|
+
"INFO": logging.INFO,
|
|
25
|
+
"WARNING": logging.WARNING,
|
|
26
|
+
"ERROR": logging.ERROR,
|
|
27
|
+
"CRITICAL": logging.CRITICAL,
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def configure_logging(
|
|
32
|
+
level: str = "INFO",
|
|
33
|
+
format: Literal["console", "json"] = "console",
|
|
34
|
+
) -> None:
|
|
35
|
+
"""Configure structlog for the application.
|
|
36
|
+
|
|
37
|
+
This function should be called once at application startup to configure
|
|
38
|
+
the logging format and level.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
|
42
|
+
format: Output format - "console" for human-readable, "json" for
|
|
43
|
+
machine-readable structured logs.
|
|
44
|
+
"""
|
|
45
|
+
log_level = _LOG_LEVELS.get(level.upper(), logging.INFO)
|
|
46
|
+
|
|
47
|
+
shared_processors: list[Processor] = [
|
|
48
|
+
structlog.contextvars.merge_contextvars,
|
|
49
|
+
structlog.processors.add_log_level,
|
|
50
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
if format == "json":
|
|
54
|
+
processors: list[Processor] = [
|
|
55
|
+
*shared_processors,
|
|
56
|
+
structlog.processors.JSONRenderer(),
|
|
57
|
+
]
|
|
58
|
+
else:
|
|
59
|
+
processors = [
|
|
60
|
+
*shared_processors,
|
|
61
|
+
structlog.dev.ConsoleRenderer(colors=sys.stdout.isatty()),
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
structlog.configure(
|
|
65
|
+
processors=processors,
|
|
66
|
+
wrapper_class=structlog.make_filtering_bound_logger(log_level),
|
|
67
|
+
context_class=dict,
|
|
68
|
+
logger_factory=structlog.PrintLoggerFactory(),
|
|
69
|
+
cache_logger_on_first_use=True,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_logger(name: str | None = None) -> structlog.typing.FilteringBoundLogger:
|
|
74
|
+
"""Get a logger instance.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
name: Optional module name to bind to the logger.
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
A configured structlog BoundLogger instance.
|
|
81
|
+
"""
|
|
82
|
+
logger: structlog.typing.FilteringBoundLogger = structlog.get_logger()
|
|
83
|
+
if name:
|
|
84
|
+
return logger.bind(module=name)
|
|
85
|
+
return logger
|
|
File without changes
|
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: malwareDetector
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Base framework for building malware detectors
|
|
5
|
+
Project-URL: Homepage, https://github.com/louiskyee/malwareDetector
|
|
6
|
+
Project-URL: Documentation, https://github.com/louiskyee/malwareDetector/wiki
|
|
7
|
+
Project-URL: Repository, https://github.com/louiskyee/malwareDetector.git
|
|
8
|
+
Author-email: PO-LIN LAI <bolin8017@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENCE.txt
|
|
11
|
+
Keywords: detector,framework,malware,security
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Requires-Dist: pydantic-settings<3.0,>=2.0
|
|
20
|
+
Requires-Dist: pydantic<3.0,>=2.0
|
|
21
|
+
Requires-Dist: structlog<25.0,>=24.0
|
|
22
|
+
Requires-Dist: typer<1.0,>=0.9
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: mypy<2.0,>=1.8; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-cov<5.0,>=4.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: pytest<9.0,>=8.0; extra == 'dev'
|
|
27
|
+
Requires-Dist: ruff<1.0,>=0.1; extra == 'dev'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# malware-detector
|
|
31
|
+
|
|
32
|
+
A base framework for building malware detectors with modern Python.
|
|
33
|
+
|
|
34
|
+
* Source code: https://github.com/louiskyee/malwareDetector.git
|
|
35
|
+
* Wiki: https://github.com/louiskyee/malwareDetector/wiki
|
|
36
|
+
* PyPI: https://pypi.org/project/malware-detector/
|
|
37
|
+
|
|
38
|
+
## Features
|
|
39
|
+
|
|
40
|
+
- **Pydantic v2 Configuration** - Type-safe config with env vars and file support
|
|
41
|
+
- **Typer CLI** - Extensible command-line interface via factory function
|
|
42
|
+
- **Structured Logging** - Console and JSON output formats with structlog
|
|
43
|
+
- **Customizable Pipeline** - Define your own stages or use defaults
|
|
44
|
+
- **Type Hints** - Full typing support with py.typed marker
|
|
45
|
+
|
|
46
|
+
## Requirements
|
|
47
|
+
|
|
48
|
+
| Tool | Version |
|
|
49
|
+
|------|---------|
|
|
50
|
+
| Python | >= 3.12 |
|
|
51
|
+
|
|
52
|
+
## Installation
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
pip install malware-detector
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Or with [uv](https://github.com/astral-sh/uv):
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
uv add malware-detector
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Quick Start
|
|
65
|
+
|
|
66
|
+
### Basic Usage
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from malware_detector import BaseDetector, BaseDetectorConfig
|
|
70
|
+
|
|
71
|
+
class MyDetector(BaseDetector):
|
|
72
|
+
"""My custom malware detector."""
|
|
73
|
+
|
|
74
|
+
def stage_extract(self):
|
|
75
|
+
self.log.info("extracting_features", input=str(self.config.path.input))
|
|
76
|
+
# Extract features from dataset
|
|
77
|
+
return self.config.folder.feature
|
|
78
|
+
|
|
79
|
+
def stage_vectorize(self):
|
|
80
|
+
# Convert features to vectors
|
|
81
|
+
return self.config.folder.vectorize
|
|
82
|
+
|
|
83
|
+
def stage_train(self):
|
|
84
|
+
# Train the model
|
|
85
|
+
return self.config.folder.model
|
|
86
|
+
|
|
87
|
+
def stage_predict(self):
|
|
88
|
+
# Run predictions
|
|
89
|
+
return self.config.path.output
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# Run the detector
|
|
93
|
+
detector = MyDetector()
|
|
94
|
+
detector.setup() # Creates directories
|
|
95
|
+
results = detector.run() # Runs all stages
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Run Specific Stages
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
# Run only extract and vectorize
|
|
102
|
+
results = detector.run(stages=["extract", "vectorize"])
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Custom Pipeline
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
class ClusteringDetector(BaseDetector):
|
|
109
|
+
"""Detector with custom pipeline stages."""
|
|
110
|
+
|
|
111
|
+
default_stages = ["preprocess", "embed", "cluster", "export"]
|
|
112
|
+
|
|
113
|
+
def stage_preprocess(self):
|
|
114
|
+
...
|
|
115
|
+
|
|
116
|
+
def stage_embed(self):
|
|
117
|
+
...
|
|
118
|
+
|
|
119
|
+
def stage_cluster(self):
|
|
120
|
+
...
|
|
121
|
+
|
|
122
|
+
def stage_export(self):
|
|
123
|
+
...
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Configuration
|
|
127
|
+
|
|
128
|
+
### Custom Config
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from pydantic_settings import SettingsConfigDict
|
|
132
|
+
from malware_detector import BaseDetectorConfig
|
|
133
|
+
|
|
134
|
+
class MyConfig(BaseDetectorConfig):
|
|
135
|
+
"""Custom configuration with additional fields."""
|
|
136
|
+
|
|
137
|
+
model_config = SettingsConfigDict(
|
|
138
|
+
env_prefix="MY_DETECTOR_",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
batch_size: int = 32
|
|
142
|
+
model_name: str = "default"
|
|
143
|
+
use_gpu: bool = True
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class MyDetector(BaseDetector):
|
|
147
|
+
config_class = MyConfig
|
|
148
|
+
|
|
149
|
+
def stage_train(self):
|
|
150
|
+
self.log.info("training", batch_size=self.config.batch_size)
|
|
151
|
+
...
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Environment Variables
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
export MALWARE_DETECTOR_CLASSIFY=true
|
|
158
|
+
export MALWARE_DETECTOR_PATH__INPUT="./my_dataset"
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Config File
|
|
162
|
+
|
|
163
|
+
Save as `config.toml`:
|
|
164
|
+
|
|
165
|
+
```toml
|
|
166
|
+
[path]
|
|
167
|
+
input = "./Dataset/program"
|
|
168
|
+
output = "./Predict/predict.json"
|
|
169
|
+
|
|
170
|
+
[folder]
|
|
171
|
+
dataset = "./Dataset/"
|
|
172
|
+
feature = "./Feature/"
|
|
173
|
+
|
|
174
|
+
classify = false
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## CLI Integration
|
|
178
|
+
|
|
179
|
+
### Create CLI for Your Detector
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from malware_detector import create_cli
|
|
183
|
+
from my_detector import MyDetector
|
|
184
|
+
|
|
185
|
+
app = create_cli(MyDetector)
|
|
186
|
+
|
|
187
|
+
# Add custom commands
|
|
188
|
+
@app.command()
|
|
189
|
+
def evaluate():
|
|
190
|
+
"""Evaluate the trained model."""
|
|
191
|
+
...
|
|
192
|
+
|
|
193
|
+
if __name__ == "__main__":
|
|
194
|
+
app()
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
### CLI Usage
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
# Generate default config
|
|
201
|
+
python -m my_detector init --output config.toml
|
|
202
|
+
|
|
203
|
+
# Run full pipeline
|
|
204
|
+
python -m my_detector run --config config.toml
|
|
205
|
+
|
|
206
|
+
# Run specific stages
|
|
207
|
+
python -m my_detector run --stages extract,vectorize
|
|
208
|
+
|
|
209
|
+
# JSON logging for production
|
|
210
|
+
python -m my_detector run --log-format json --log-level DEBUG
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## Logging
|
|
214
|
+
|
|
215
|
+
```python
|
|
216
|
+
from malware_detector import configure_logging, get_logger
|
|
217
|
+
|
|
218
|
+
# Configure at startup
|
|
219
|
+
configure_logging(level="INFO", format="console")
|
|
220
|
+
|
|
221
|
+
# Get a logger
|
|
222
|
+
log = get_logger(__name__)
|
|
223
|
+
log.info("event_name", key="value", count=42)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
Output formats:
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
# Console (development)
|
|
230
|
+
2024-01-19T10:30:00 [info] event_name key=value count=42
|
|
231
|
+
|
|
232
|
+
# JSON (production)
|
|
233
|
+
{"event": "event_name", "key": "value", "count": 42, "timestamp": "..."}
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## Migration from v0.1.x
|
|
237
|
+
|
|
238
|
+
| v0.1.x | v0.2.0 |
|
|
239
|
+
|--------|--------|
|
|
240
|
+
| `from malwareDetector.detector import detector` | `from malware_detector import BaseDetector` |
|
|
241
|
+
| `class MyDetector(detector)` | `class MyDetector(BaseDetector)` |
|
|
242
|
+
| `def extractFeature(self)` | `def stage_extract(self)` |
|
|
243
|
+
| `def vectorize(self)` | `def stage_vectorize(self)` |
|
|
244
|
+
| `def model(self, training)` | `def stage_train(self)` |
|
|
245
|
+
| `def predict(self)` | `def stage_predict(self)` |
|
|
246
|
+
| `config.json()` | `config.model_dump_json()` |
|
|
247
|
+
| `Config.parse_raw(data)` | `Config.model_validate_json(data)` |
|
|
248
|
+
|
|
249
|
+
## License
|
|
250
|
+
|
|
251
|
+
MIT
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
malware_detector/__init__.py,sha256=TONSYdjbFUHghYdF0fvO__uDPfVJCgLykf1eH6Qs_M8,928
|
|
2
|
+
malware_detector/cli.py,sha256=5u4OnxflAQhzNO6Mhtq0Czehjl4GqP7WM5Ya2YgclX0,4114
|
|
3
|
+
malware_detector/config.py,sha256=NSCZo7HHfk-n7i2FUapTCTPwNySy0NnWyrzRmTa0rkc,3902
|
|
4
|
+
malware_detector/detector.py,sha256=veFgO8cvUAJBAOIo3WXNov1BoA5sEdxk3VrAPMIxo1o,3992
|
|
5
|
+
malware_detector/logging.py,sha256=gIhlYou81gadWwfzSCcppGRbI0bMwPJJ0QlJR8cQ_yk,2455
|
|
6
|
+
malware_detector/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
malwaredetector-0.2.0.dist-info/METADATA,sha256=NBChOTvhLVW2UzhYK8bOORvZU4-a22CUHOjjZWuS-s4,5904
|
|
8
|
+
malwaredetector-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
9
|
+
malwaredetector-0.2.0.dist-info/licenses/LICENCE.txt,sha256=mtPx5Z9lQ9ASzCLcwbit290IQIfmRQqstebUtzSzAvc,1068
|
|
10
|
+
malwaredetector-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 (PO-LIN LAI)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|