malwareDetector 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of malwareDetector might be problematic. Click here for more details.
- malwareDetector/__init__.py +10 -0
- malwareDetector/config.py +220 -0
- malwareDetector/const.py +20 -0
- malwareDetector/detector.py +28 -0
- malwareDetector/utils.py +10 -0
- malwareDetector-0.1.0.dist-info/LICENCE.txt +21 -0
- malwareDetector-0.1.0.dist-info/METADATA +55 -0
- malwareDetector-0.1.0.dist-info/RECORD +10 -0
- malwareDetector-0.1.0.dist-info/WHEEL +5 -0
- malwareDetector-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""Load all user defined config and env vars."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import argparse
|
|
6
|
+
from pydantic import BaseModel
|
|
7
|
+
from .const import *
|
|
8
|
+
|
|
9
|
+
class PathConfig(BaseModel):
|
|
10
|
+
input: str = DEFAULT_INPUT_PATH
|
|
11
|
+
output: str = DEFAULT_OUT_PATH
|
|
12
|
+
|
|
13
|
+
class ModelConfig(BaseModel):
|
|
14
|
+
modelName: str = DEFAULT_MODEL_NAME
|
|
15
|
+
dimensions: int = DEFAULT_DIMENSIONS
|
|
16
|
+
workers: int = DEFAULT_WORKERS
|
|
17
|
+
epochs: int = DEFAULT_EPOCHS
|
|
18
|
+
min_count: int = DEFAULT_MIN_COUNT
|
|
19
|
+
wl_iterations: int = DEFAULT_WL_ITERATIONS
|
|
20
|
+
learning_rate: float = DEFAULT_LEARNING_RATE
|
|
21
|
+
down_sampling: float = DEFAULT_DOWN_SAMPLING
|
|
22
|
+
judge: int = DEFAULT_JUDGE
|
|
23
|
+
|
|
24
|
+
class FolderConfig(BaseModel):
|
|
25
|
+
dataset:str = DATASET_DIR
|
|
26
|
+
feature:str = FEATURE_DIR
|
|
27
|
+
vectorize:str = VECTORIZE_DIR
|
|
28
|
+
model:str = MODEL_DIR
|
|
29
|
+
predict:str = PREDICT_DIR
|
|
30
|
+
|
|
31
|
+
def __iter__(self):
|
|
32
|
+
folder_list = [DATASET_DIR, FEATURE_DIR, VECTORIZE_DIR, MODEL_DIR, PREDICT_DIR]
|
|
33
|
+
return FolderClassIter(folder_list)
|
|
34
|
+
|
|
35
|
+
class FolderClassIter(object):
|
|
36
|
+
def __init__(self, folder_list):
|
|
37
|
+
self._folder = folder_list
|
|
38
|
+
self._folder_size = len(folder_list)
|
|
39
|
+
self._current_index = 0
|
|
40
|
+
|
|
41
|
+
def __iter__(self):
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
def __next__(self):
|
|
45
|
+
if self._current_index < self._folder_size:
|
|
46
|
+
member = self._folder[self._current_index]
|
|
47
|
+
self._current_index += 1
|
|
48
|
+
return member
|
|
49
|
+
raise StopIteration
|
|
50
|
+
|
|
51
|
+
class Config(BaseModel):
|
|
52
|
+
"""The blueprint for detector's whole config."""
|
|
53
|
+
|
|
54
|
+
# pylint: disable=too-few-public-
|
|
55
|
+
path: PathConfig = PathConfig()
|
|
56
|
+
folder: FolderConfig = FolderConfig()
|
|
57
|
+
model: ModelConfig = ModelConfig()
|
|
58
|
+
classify: bool = DEFAULT_CLASSIFY
|
|
59
|
+
|
|
60
|
+
def parameter_parser(config: Config) -> Config:
|
|
61
|
+
"""
|
|
62
|
+
A method to parse up command line parameters.
|
|
63
|
+
By default it gives an embedding of the partial NCI1 graph dataset.
|
|
64
|
+
The default hyperparameters give a good quality representation without grid search.
|
|
65
|
+
Representations are sorted by ID.
|
|
66
|
+
"""
|
|
67
|
+
parser = argparse.ArgumentParser(description="Run Graph2Vec.")
|
|
68
|
+
|
|
69
|
+
parser.add_argument("--input-path","-i",
|
|
70
|
+
dest="input_path",
|
|
71
|
+
nargs="?",
|
|
72
|
+
default=DEFAULT_INPUT_PATH,
|
|
73
|
+
help="Input folder with jsons."
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
parser.add_argument("--output-path","-o",
|
|
77
|
+
dest="output_path",
|
|
78
|
+
nargs="?",
|
|
79
|
+
default=DEFAULT_OUT_PATH,
|
|
80
|
+
help="Embeddings path."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
parser.add_argument("--dimensions",
|
|
84
|
+
dest="dimensions",
|
|
85
|
+
type=int,
|
|
86
|
+
default=DEFAULT_DIMENSIONS,
|
|
87
|
+
help="Number of dimensions. Default is 128."
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
parser.add_argument("--workers",
|
|
91
|
+
dest="workers",
|
|
92
|
+
type=int,
|
|
93
|
+
default=DEFAULT_WORKERS,
|
|
94
|
+
help="Number of workers. Default is 4."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
parser.add_argument("--epochs",
|
|
98
|
+
dest="epochs",
|
|
99
|
+
type=int,
|
|
100
|
+
default=DEFAULT_EPOCHS,
|
|
101
|
+
help="Number of epochs. Default is 10."
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
parser.add_argument("--min-count",
|
|
105
|
+
dest="min_count",
|
|
106
|
+
type=int,
|
|
107
|
+
default=DEFAULT_MIN_COUNT,
|
|
108
|
+
help="Minimal structural feature count. Default is 5."
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
parser.add_argument("--wl-iterations",
|
|
112
|
+
dest="wl_iterations",
|
|
113
|
+
type=int,
|
|
114
|
+
default=DEFAULT_WL_ITERATIONS,
|
|
115
|
+
help="Number of Weisfeiler-Lehman iterations. Default is 2."
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
parser.add_argument("--learning-rate",
|
|
119
|
+
dest="learning_rate",
|
|
120
|
+
type=float,
|
|
121
|
+
default=DEFAULT_LEARNING_RATE,
|
|
122
|
+
help="Initial learning rate. Default is 0.025."
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
parser.add_argument("--down-sampling",
|
|
126
|
+
dest="down_sampling",
|
|
127
|
+
type=float,
|
|
128
|
+
default=DEFAULT_DOWN_SAMPLING,
|
|
129
|
+
help="Down sampling rate of features. Default is 0.0001."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
parser.add_argument("--judge",
|
|
133
|
+
dest="judge",
|
|
134
|
+
type=int,
|
|
135
|
+
default=DEFAULT_JUDGE,
|
|
136
|
+
help="Rename?"
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
parser.add_argument('--model','-m',
|
|
140
|
+
dest="model",
|
|
141
|
+
nargs='?',
|
|
142
|
+
default=DEFAULT_MODEL_NAME,
|
|
143
|
+
help='Select the model(KNN,LR,MLP,RF,SVM).'
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
parser.add_argument('--classify', '-c',
|
|
147
|
+
dest="classify",
|
|
148
|
+
action='store_true',
|
|
149
|
+
help='apply the family classifier'
|
|
150
|
+
)
|
|
151
|
+
args = parser.parse_args()
|
|
152
|
+
if hasattr(args, 'input_path'):
|
|
153
|
+
config.path.input = args.input_path
|
|
154
|
+
if hasattr(args, 'output_path'):
|
|
155
|
+
config.path.output = args.output_path
|
|
156
|
+
if hasattr(args, 'dimensions'):
|
|
157
|
+
config.model.dimensions = args.dimensions
|
|
158
|
+
if hasattr(args, 'workers'):
|
|
159
|
+
config.model.workers = args.workers
|
|
160
|
+
if hasattr(args, 'epochs'):
|
|
161
|
+
config.model.epochs = args.epochs
|
|
162
|
+
if hasattr(args, 'min_count'):
|
|
163
|
+
config.model.min_count = args.min_count
|
|
164
|
+
if hasattr(args, 'wl_iterations'):
|
|
165
|
+
config.model.wl_iterations = args.wl_iterations
|
|
166
|
+
if hasattr(args, 'learning_rate'):
|
|
167
|
+
config.model.learning_rate = args.learning_rate
|
|
168
|
+
if hasattr(args, 'down_sampling'):
|
|
169
|
+
config.model.down_sampling = args.down_sampling
|
|
170
|
+
if hasattr(args, 'judge'):
|
|
171
|
+
config.model.judge = args.judge
|
|
172
|
+
if hasattr(args, 'model'):
|
|
173
|
+
config.model.modelName = args.model
|
|
174
|
+
if hasattr(args, 'classify'):
|
|
175
|
+
config.classify = args.classify
|
|
176
|
+
|
|
177
|
+
# return parser.parse_args()
|
|
178
|
+
return config
|
|
179
|
+
|
|
180
|
+
def write_config_to_file(config: Config):
|
|
181
|
+
with open(CONFIG_FILE_NAME, "w", encoding="utf8") as file:
|
|
182
|
+
file.write(config.json())
|
|
183
|
+
|
|
184
|
+
def detect_config_file(argparse: bool=False) -> None:
|
|
185
|
+
if argparse:
|
|
186
|
+
logging.info(
|
|
187
|
+
"Creating local config file by argparse."
|
|
188
|
+
)
|
|
189
|
+
write_config_to_file(parameter_parser(Config()))
|
|
190
|
+
logging.info(f"{CONFIG_FILE_NAME} created!")
|
|
191
|
+
elif CONFIG_FILE_NAME in os.listdir():
|
|
192
|
+
logging.info(f"{CONFIG_FILE_NAME} detected!")
|
|
193
|
+
else:
|
|
194
|
+
logging.info(
|
|
195
|
+
"config file not found. creating local config file by default config."
|
|
196
|
+
)
|
|
197
|
+
write_config_to_file(Config())
|
|
198
|
+
logging.info(f"{CONFIG_FILE_NAME} created!")
|
|
199
|
+
|
|
200
|
+
def read_config(count=1) -> Config:
|
|
201
|
+
"""Load the configuration defined by user."""
|
|
202
|
+
if count > 3:
|
|
203
|
+
logging.warning("Failed to read config, returning default config")
|
|
204
|
+
return Config()
|
|
205
|
+
if count != 1:
|
|
206
|
+
logging.info(f"Trying to read config time:{count}")
|
|
207
|
+
try:
|
|
208
|
+
with open(CONFIG_FILE_NAME, encoding="utf8") as file:
|
|
209
|
+
return Config.parse_raw(file.read())
|
|
210
|
+
# else:
|
|
211
|
+
# return Config()
|
|
212
|
+
except Exception as err:
|
|
213
|
+
logging.warning(err)
|
|
214
|
+
detect_config_file(argparse=False)
|
|
215
|
+
return read_config(count=count+1)
|
|
216
|
+
|
|
217
|
+
detect_config_file(argparse=True)
|
|
218
|
+
CONFIG = read_config()
|
|
219
|
+
|
|
220
|
+
logging.info("config.py got executed")
|
malwareDetector/const.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
CONFIG_FILE_NAME="config.json"
|
|
2
|
+
|
|
3
|
+
DEFAULT_INPUT_PATH="./Dataset/0481b823f05900ab9e4ac43d38bee84a912fa41f5796ce6ca820e259bc99cc63"
|
|
4
|
+
DEFAULT_OUT_PATH="./Feature/nci1.csv"
|
|
5
|
+
DEFAULT_MODEL_NAME="SVM"
|
|
6
|
+
DEFAULT_DIMENSIONS=128
|
|
7
|
+
DEFAULT_WORKERS=4
|
|
8
|
+
DEFAULT_EPOCHS=50
|
|
9
|
+
DEFAULT_MIN_COUNT=1
|
|
10
|
+
DEFAULT_WL_ITERATIONS=2
|
|
11
|
+
DEFAULT_LEARNING_RATE=0.025
|
|
12
|
+
DEFAULT_DOWN_SAMPLING=0.0001
|
|
13
|
+
DEFAULT_JUDGE=0
|
|
14
|
+
DEFAULT_CLASSIFY=True
|
|
15
|
+
|
|
16
|
+
DATASET_DIR="./Dataset/"
|
|
17
|
+
FEATURE_DIR="./Feature/"
|
|
18
|
+
VECTORIZE_DIR="./Vectorize/"
|
|
19
|
+
MODEL_DIR="./Model/"
|
|
20
|
+
PREDICT_DIR="./Predict/"
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import numpy as np
|
|
3
|
+
from .utils import platform_info
|
|
4
|
+
from .config import CONFIG
|
|
5
|
+
|
|
6
|
+
class detector(object):
|
|
7
|
+
def __init__(self) -> None:
|
|
8
|
+
self.config = CONFIG
|
|
9
|
+
|
|
10
|
+
def extractFeature(self) -> list:
|
|
11
|
+
raise NotImplementedError("extractFeature must be implemented in the derived class.")
|
|
12
|
+
|
|
13
|
+
def vectorize(self) -> np.array:
|
|
14
|
+
raise NotImplementedError("vectorize must be implemented in the derived class.")
|
|
15
|
+
|
|
16
|
+
def model(self):
|
|
17
|
+
raise NotImplementedError("model must be implemented in the derived class.")
|
|
18
|
+
|
|
19
|
+
def predict(self):
|
|
20
|
+
raise NotImplementedError("predict must be implemented in the derived class.")
|
|
21
|
+
|
|
22
|
+
def mkdir(self, folder_list) -> None:
|
|
23
|
+
for folder in folder_list:
|
|
24
|
+
if not os.path.exists(folder):
|
|
25
|
+
os.mkdir(folder)
|
|
26
|
+
|
|
27
|
+
if __name__ == "__main__":
|
|
28
|
+
platform_info()
|
malwareDetector/utils.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from . import __version__
|
|
2
|
+
import sys, os, platform
|
|
3
|
+
|
|
4
|
+
def platform_info():
|
|
5
|
+
nl = "\n"
|
|
6
|
+
return f"""Running tgcf {__version__}\
|
|
7
|
+
\nPython {sys.version.replace(nl,"")}\
|
|
8
|
+
\nOS {os.name}\
|
|
9
|
+
\nPlatform {platform.system()} {platform.release()}\
|
|
10
|
+
\n{platform.architecture()} {platform.processor()}"""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2023 (PO-LIN LAI)
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: malwareDetector
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Malware detector specification for NTUST isLab
|
|
5
|
+
Author: PO-LIN LAI
|
|
6
|
+
Author-email: bolin8017@gmail.com
|
|
7
|
+
License: MIT
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENCE.txt
|
|
11
|
+
Requires-Dist: numpy
|
|
12
|
+
Requires-Dist: pydantic
|
|
13
|
+
|
|
14
|
+
malwareDetector
|
|
15
|
+
===================
|
|
16
|
+
|
|
17
|
+
[GitHub](https://github.com/louiskyee/malwareDetector.git)
|
|
18
|
+
|
|
19
|
+
Description
|
|
20
|
+
-----------
|
|
21
|
+
|
|
22
|
+
This is a malware detector specification for NTUST isLab.
|
|
23
|
+
|
|
24
|
+
Installation
|
|
25
|
+
------------
|
|
26
|
+
|
|
27
|
+
Use the package manager [pip](https://pip.pypa.io/en/stable/) to install `malwareDetector`.
|
|
28
|
+
|
|
29
|
+
Usage
|
|
30
|
+
-----
|
|
31
|
+
|
|
32
|
+
### import
|
|
33
|
+
* import class `detector` from `malwareDetector.detector`
|
|
34
|
+
```python=
|
|
35
|
+
from malwareDetector.detector import detector
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Examples:
|
|
39
|
+
```python=
|
|
40
|
+
from malwareDetector.detector import detector
|
|
41
|
+
import numpy as np
|
|
42
|
+
|
|
43
|
+
class subDetector(detector):
|
|
44
|
+
def extractFeature(self) -> list:
|
|
45
|
+
return 'This is the implementation of the extractFeature function from the derived class.'
|
|
46
|
+
|
|
47
|
+
def vectorize(self) -> np.array:
|
|
48
|
+
return 'This is the implementation of the vectorize function from the derived class.'
|
|
49
|
+
|
|
50
|
+
def model(self):
|
|
51
|
+
return 'This is the implementation of the model function from the derived class.'
|
|
52
|
+
|
|
53
|
+
def predict(self):
|
|
54
|
+
return 'This is the implementation of the predict function from the derived class.'
|
|
55
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
malwareDetector/__init__.py,sha256=KlbADAUEmORAxlQoZ9JeppK6dZplWNgJfz_noesWwH0,175
|
|
2
|
+
malwareDetector/config.py,sha256=ZqfO4T07_S5x6oZwWhr_QI4zuA7Y7BdshkCGCbg6nlc,8299
|
|
3
|
+
malwareDetector/const.py,sha256=DmVapJFZsmrK65UFjJdNaPnjIhf7cuqrawXzxmeoO6E,533
|
|
4
|
+
malwareDetector/detector.py,sha256=GNuDVXbdNizEYRYsBIDSfE2C4AlL6A7nNy1JuasK_oA,881
|
|
5
|
+
malwareDetector/utils.py,sha256=RhE8BuDKT7lRT69jzvRlzDHF-YfnIGN1P4tieXCVwoI,307
|
|
6
|
+
malwareDetector-0.1.0.dist-info/LICENCE.txt,sha256=2XPCaZqZ-jgHh7e1DKa87JUeuOB6DC0jaZonmjDeILM,1088
|
|
7
|
+
malwareDetector-0.1.0.dist-info/METADATA,sha256=T5ud1YcPfN6UHPa8rmD9vB5DYkCUhpix30ejhb7i3I4,1438
|
|
8
|
+
malwareDetector-0.1.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
|
|
9
|
+
malwareDetector-0.1.0.dist-info/top_level.txt,sha256=wRXSanQD5XDXRYp3lPh1SjltOo6rpC5jktmR69tqIQo,16
|
|
10
|
+
malwareDetector-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
malwareDetector
|