malwareDetector 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of malwareDetector might be problematic. Click here for more details.

@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 (PO-LIN LAI)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.1
2
+ Name: malwareDetector
3
+ Version: 0.1.0
4
+ Summary: Malware detector specification for NTUST isLab
5
+ Author: PO-LIN LAI
6
+ Author-email: bolin8017@gmail.com
7
+ License: MIT
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENCE.txt
11
+
12
+ malwareDetector
13
+ ===================
14
+
15
+ [GitHub](https://github.com/louiskyee/malwareDetector.git)
16
+
17
+ Description
18
+ -----------
19
+
20
+ This is a malware detector specification for NTUST isLab.
21
+
22
+ Installation
23
+ ------------
24
+
25
+ Use the package manager [pip](https://pip.pypa.io/en/stable/) to install `malwareDetector`.
26
+
27
+ Usage
28
+ -----
29
+
30
+ ### import
31
+ * import class `detector` from `malwareDetector.detector`
32
+ ```python=
33
+ from malwareDetector.detector import detector
34
+ ```
35
+
36
+ ### Examples:
37
+ ```python=
38
+ from malwareDetector.detector import detector
39
+ import numpy as np
40
+
41
+ class subDetector(detector):
42
+ def extractFeature(self) -> list:
43
+ return 'This is the implementation of the extractFeature function from the derived class.'
44
+
45
+ def vectorize(self) -> np.array:
46
+ return 'This is the implementation of the vectorize function from the derived class.'
47
+
48
+ def model(self):
49
+ return 'This is the implementation of the model function from the derived class.'
50
+
51
+ def predict(self):
52
+ return 'This is the implementation of the predict function from the derived class.'
53
+ ```
@@ -0,0 +1,42 @@
1
+ malwareDetector
2
+ ===================
3
+
4
+ [GitHub](https://github.com/louiskyee/malwareDetector.git)
5
+
6
+ Description
7
+ -----------
8
+
9
+ This is a malware detector specification for NTUST isLab.
10
+
11
+ Installation
12
+ ------------
13
+
14
+ Use the package manager [pip](https://pip.pypa.io/en/stable/) to install `malwareDetector`.
15
+
16
+ Usage
17
+ -----
18
+
19
+ ### import
20
+ * import class `detector` from `malwareDetector.detector`
21
+ ```python=
22
+ from malwareDetector.detector import detector
23
+ ```
24
+
25
+ ### Examples:
26
+ ```python=
27
+ from malwareDetector.detector import detector
28
+ import numpy as np
29
+
30
+ class subDetector(detector):
31
+ def extractFeature(self) -> list:
32
+ return 'This is the implementation of the extractFeature function from the derived class.'
33
+
34
+ def vectorize(self) -> np.array:
35
+ return 'This is the implementation of the vectorize function from the derived class.'
36
+
37
+ def model(self):
38
+ return 'This is the implementation of the model function from the derived class.'
39
+
40
+ def predict(self):
41
+ return 'This is the implementation of the predict function from the derived class.'
42
+ ```
@@ -0,0 +1,10 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """detector
4
+
5
+ This is a malware detector spec for NTUST isLab.
6
+ """
7
+
8
+ from importlib.metadata import version
9
+
10
+ __version__ = version(__package__)
@@ -0,0 +1,220 @@
1
+ """Load all user defined config and env vars."""
2
+
3
+ import logging
4
+ import os
5
+ import argparse
6
+ from pydantic import BaseModel
7
+ from .const import *
8
+
9
+ class PathConfig(BaseModel):
10
+ input: str = DEFAULT_INPUT_PATH
11
+ output: str = DEFAULT_OUT_PATH
12
+
13
+ class ModelConfig(BaseModel):
14
+ modelName: str = DEFAULT_MODEL_NAME
15
+ dimensions: int = DEFAULT_DIMENSIONS
16
+ workers: int = DEFAULT_WORKERS
17
+ epochs: int = DEFAULT_EPOCHS
18
+ min_count: int = DEFAULT_MIN_COUNT
19
+ wl_iterations: int = DEFAULT_WL_ITERATIONS
20
+ learning_rate: float = DEFAULT_LEARNING_RATE
21
+ down_sampling: float = DEFAULT_DOWN_SAMPLING
22
+ judge: int = DEFAULT_JUDGE
23
+
24
+ class FolderConfig(BaseModel):
25
+ dataset:str = DATASET_DIR
26
+ feature:str = FEATURE_DIR
27
+ vectorize:str = VECTORIZE_DIR
28
+ model:str = MODEL_DIR
29
+ predict:str = PREDICT_DIR
30
+
31
+ def __iter__(self):
32
+ folder_list = [DATASET_DIR, FEATURE_DIR, VECTORIZE_DIR, MODEL_DIR, PREDICT_DIR]
33
+ return FolderClassIter(folder_list)
34
+
35
+ class FolderClassIter(object):
36
+ def __init__(self, folder_list):
37
+ self._folder = folder_list
38
+ self._folder_size = len(folder_list)
39
+ self._current_index = 0
40
+
41
+ def __iter__(self):
42
+ return self
43
+
44
+ def __next__(self):
45
+ if self._current_index < self._folder_size:
46
+ member = self._folder[self._current_index]
47
+ self._current_index += 1
48
+ return member
49
+ raise StopIteration
50
+
51
+ class Config(BaseModel):
52
+ """The blueprint for detector's whole config."""
53
+
54
+ # pylint: disable=too-few-public-
55
+ path: PathConfig = PathConfig()
56
+ folder: FolderConfig = FolderConfig()
57
+ model: ModelConfig = ModelConfig()
58
+ classify: bool = DEFAULT_CLASSIFY
59
+
60
+ def parameter_parser(config: Config) -> Config:
61
+ """
62
+ A method to parse up command line parameters.
63
+ By default it gives an embedding of the partial NCI1 graph dataset.
64
+ The default hyperparameters give a good quality representation without grid search.
65
+ Representations are sorted by ID.
66
+ """
67
+ parser = argparse.ArgumentParser(description="Run Graph2Vec.")
68
+
69
+ parser.add_argument("--input-path","-i",
70
+ dest="input_path",
71
+ nargs="?",
72
+ default=DEFAULT_INPUT_PATH,
73
+ help="Input folder with jsons."
74
+ )
75
+
76
+ parser.add_argument("--output-path","-o",
77
+ dest="output_path",
78
+ nargs="?",
79
+ default=DEFAULT_OUT_PATH,
80
+ help="Embeddings path."
81
+ )
82
+
83
+ parser.add_argument("--dimensions",
84
+ dest="dimensions",
85
+ type=int,
86
+ default=DEFAULT_DIMENSIONS,
87
+ help="Number of dimensions. Default is 128."
88
+ )
89
+
90
+ parser.add_argument("--workers",
91
+ dest="workers",
92
+ type=int,
93
+ default=DEFAULT_WORKERS,
94
+ help="Number of workers. Default is 4."
95
+ )
96
+
97
+ parser.add_argument("--epochs",
98
+ dest="epochs",
99
+ type=int,
100
+ default=DEFAULT_EPOCHS,
101
+ help="Number of epochs. Default is 10."
102
+ )
103
+
104
+ parser.add_argument("--min-count",
105
+ dest="min_count",
106
+ type=int,
107
+ default=DEFAULT_MIN_COUNT,
108
+ help="Minimal structural feature count. Default is 5."
109
+ )
110
+
111
+ parser.add_argument("--wl-iterations",
112
+ dest="wl_iterations",
113
+ type=int,
114
+ default=DEFAULT_WL_ITERATIONS,
115
+ help="Number of Weisfeiler-Lehman iterations. Default is 2."
116
+ )
117
+
118
+ parser.add_argument("--learning-rate",
119
+ dest="learning_rate",
120
+ type=float,
121
+ default=DEFAULT_LEARNING_RATE,
122
+ help="Initial learning rate. Default is 0.025."
123
+ )
124
+
125
+ parser.add_argument("--down-sampling",
126
+ dest="down_sampling",
127
+ type=float,
128
+ default=DEFAULT_DOWN_SAMPLING,
129
+ help="Down sampling rate of features. Default is 0.0001."
130
+ )
131
+
132
+ parser.add_argument("--judge",
133
+ dest="judge",
134
+ type=int,
135
+ default=DEFAULT_JUDGE,
136
+ help="Rename?"
137
+ )
138
+
139
+ parser.add_argument('--model','-m',
140
+ dest="model",
141
+ nargs='?',
142
+ default=DEFAULT_MODEL_NAME,
143
+ help='Select the model(KNN,LR,MLP,RF,SVM).'
144
+ )
145
+
146
+ parser.add_argument('--classify', '-c',
147
+ dest="classify",
148
+ action='store_true',
149
+ help='apply the family classifier'
150
+ )
151
+ args = parser.parse_args()
152
+ if hasattr(args, 'input_path'):
153
+ config.path.input = args.input_path
154
+ if hasattr(args, 'output_path'):
155
+ config.path.output = args.output_path
156
+ if hasattr(args, 'dimensions'):
157
+ config.model.dimensions = args.dimensions
158
+ if hasattr(args, 'workers'):
159
+ config.model.workers = args.workers
160
+ if hasattr(args, 'epochs'):
161
+ config.model.epochs = args.epochs
162
+ if hasattr(args, 'min_count'):
163
+ config.model.min_count = args.min_count
164
+ if hasattr(args, 'wl_iterations'):
165
+ config.model.wl_iterations = args.wl_iterations
166
+ if hasattr(args, 'learning_rate'):
167
+ config.model.learning_rate = args.learning_rate
168
+ if hasattr(args, 'down_sampling'):
169
+ config.model.down_sampling = args.down_sampling
170
+ if hasattr(args, 'judge'):
171
+ config.model.judge = args.judge
172
+ if hasattr(args, 'model'):
173
+ config.model.modelName = args.model
174
+ if hasattr(args, 'classify'):
175
+ config.classify = args.classify
176
+
177
+ # return parser.parse_args()
178
+ return config
179
+
180
+ def write_config_to_file(config: Config):
181
+ with open(CONFIG_FILE_NAME, "w", encoding="utf8") as file:
182
+ file.write(config.json())
183
+
184
+ def detect_config_file(argparse: bool=False) -> None:
185
+ if argparse:
186
+ logging.info(
187
+ "Creating local config file by argparse."
188
+ )
189
+ write_config_to_file(parameter_parser(Config()))
190
+ logging.info(f"{CONFIG_FILE_NAME} created!")
191
+ elif CONFIG_FILE_NAME in os.listdir():
192
+ logging.info(f"{CONFIG_FILE_NAME} detected!")
193
+ else:
194
+ logging.info(
195
+ "config file not found. creating local config file by default config."
196
+ )
197
+ write_config_to_file(Config())
198
+ logging.info(f"{CONFIG_FILE_NAME} created!")
199
+
200
+ def read_config(count=1) -> Config:
201
+ """Load the configuration defined by user."""
202
+ if count > 3:
203
+ logging.warning("Failed to read config, returning default config")
204
+ return Config()
205
+ if count != 1:
206
+ logging.info(f"Trying to read config time:{count}")
207
+ try:
208
+ with open(CONFIG_FILE_NAME, encoding="utf8") as file:
209
+ return Config.parse_raw(file.read())
210
+ # else:
211
+ # return Config()
212
+ except Exception as err:
213
+ logging.warning(err)
214
+ detect_config_file(argparse=False)
215
+ return read_config(count=count+1)
216
+
217
+ detect_config_file(argparse=True)
218
+ CONFIG = read_config()
219
+
220
+ logging.info("config.py got executed")
@@ -0,0 +1,20 @@
1
+ CONFIG_FILE_NAME="config.json"
2
+
3
+ DEFAULT_INPUT_PATH="./Dataset/0481b823f05900ab9e4ac43d38bee84a912fa41f5796ce6ca820e259bc99cc63"
4
+ DEFAULT_OUT_PATH="./Feature/nci1.csv"
5
+ DEFAULT_MODEL_NAME="SVM"
6
+ DEFAULT_DIMENSIONS=128
7
+ DEFAULT_WORKERS=4
8
+ DEFAULT_EPOCHS=50
9
+ DEFAULT_MIN_COUNT=1
10
+ DEFAULT_WL_ITERATIONS=2
11
+ DEFAULT_LEARNING_RATE=0.025
12
+ DEFAULT_DOWN_SAMPLING=0.0001
13
+ DEFAULT_JUDGE=0
14
+ DEFAULT_CLASSIFY=True
15
+
16
+ DATASET_DIR="./Dataset/"
17
+ FEATURE_DIR="./Feature/"
18
+ VECTORIZE_DIR="./Vectorize/"
19
+ MODEL_DIR="./Model/"
20
+ PREDICT_DIR="./Predict/"
@@ -0,0 +1,28 @@
1
+ import os
2
+ import numpy as np
3
+ from .utils import platform_info
4
+ from .config import CONFIG
5
+
6
+ class detector(object):
7
+ def __init__(self) -> None:
8
+ self.config = CONFIG
9
+
10
+ def extractFeature(self) -> list:
11
+ raise NotImplementedError("extractFeature must be implemented in the derived class.")
12
+
13
+ def vectorize(self) -> np.array:
14
+ raise NotImplementedError("vectorize must be implemented in the derived class.")
15
+
16
+ def model(self):
17
+ raise NotImplementedError("model must be implemented in the derived class.")
18
+
19
+ def predict(self):
20
+ raise NotImplementedError("predict must be implemented in the derived class.")
21
+
22
+ def mkdir(self, folder_list) -> None:
23
+ for folder in folder_list:
24
+ if not os.path.exists(folder):
25
+ os.mkdir(folder)
26
+
27
+ if __name__ == "__main__":
28
+ platform_info()
@@ -0,0 +1,10 @@
1
+ from . import __version__
2
+ import sys, os, platform
3
+
4
+ def platform_info():
5
+ nl = "\n"
6
+ return f"""Running tgcf {__version__}\
7
+ \nPython {sys.version.replace(nl,"")}\
8
+ \nOS {os.name}\
9
+ \nPlatform {platform.system()} {platform.release()}\
10
+ \n{platform.architecture()} {platform.processor()}"""
@@ -0,0 +1,53 @@
1
+ Metadata-Version: 2.1
2
+ Name: malwareDetector
3
+ Version: 0.1.0
4
+ Summary: Malware detector specification for NTUST isLab
5
+ Author: PO-LIN LAI
6
+ Author-email: bolin8017@gmail.com
7
+ License: MIT
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENCE.txt
11
+
12
+ malwareDetector
13
+ ===================
14
+
15
+ [GitHub](https://github.com/louiskyee/malwareDetector.git)
16
+
17
+ Description
18
+ -----------
19
+
20
+ This is a malware detector specification for NTUST isLab.
21
+
22
+ Installation
23
+ ------------
24
+
25
+ Use the package manager [pip](https://pip.pypa.io/en/stable/) to install `malwareDetector`.
26
+
27
+ Usage
28
+ -----
29
+
30
+ ### import
31
+ * import class `detector` from `malwareDetector.detector`
32
+ ```python=
33
+ from malwareDetector.detector import detector
34
+ ```
35
+
36
+ ### Examples:
37
+ ```python=
38
+ from malwareDetector.detector import detector
39
+ import numpy as np
40
+
41
+ class subDetector(detector):
42
+ def extractFeature(self) -> list:
43
+ return 'This is the implementation of the extractFeature function from the derived class.'
44
+
45
+ def vectorize(self) -> np.array:
46
+ return 'This is the implementation of the vectorize function from the derived class.'
47
+
48
+ def model(self):
49
+ return 'This is the implementation of the model function from the derived class.'
50
+
51
+ def predict(self):
52
+ return 'This is the implementation of the predict function from the derived class.'
53
+ ```
@@ -0,0 +1,14 @@
1
+ LICENCE.txt
2
+ README.md
3
+ setup.py
4
+ malwareDetector/__init__.py
5
+ malwareDetector/config.py
6
+ malwareDetector/const.py
7
+ malwareDetector/detector.py
8
+ malwareDetector/utils.py
9
+ malwareDetector.egg-info/PKG-INFO
10
+ malwareDetector.egg-info/SOURCES.txt
11
+ malwareDetector.egg-info/dependency_links.txt
12
+ malwareDetector.egg-info/requires.txt
13
+ malwareDetector.egg-info/top_level.txt
14
+ tests/test_detector.py
@@ -0,0 +1,2 @@
1
+ numpy
2
+ pydantic
@@ -0,0 +1 @@
1
+ malwareDetector
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,26 @@
1
+ from setuptools import find_packages, setup
2
+
3
+ with open("README.md", "r",encoding="utf-8") as f:
4
+ long_description = f.read()
5
+
6
+ required_packages = [
7
+ 'numpy',
8
+ 'pydantic',
9
+ ]
10
+
11
+ setup(
12
+ name='malwareDetector',
13
+ packages=find_packages(include=['malwareDetector']),
14
+ version='0.1.0',
15
+ description='Malware detector specification for NTUST isLab',
16
+ long_description=long_description,
17
+ long_description_content_type="text/markdown",
18
+ author='PO-LIN LAI',
19
+ author_email='bolin8017@gmail.com',
20
+ license='MIT',
21
+ install_requires=required_packages,
22
+ setup_requires=['pytest-runner'],
23
+ tests_require=['pytest'],
24
+ test_suite='tests',
25
+ python_requires='>=3.10',
26
+ )
@@ -0,0 +1,19 @@
1
+ import pytest, os, sys
2
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), "..")))
3
+ from malwareDetector.detector import detector
4
+
5
+ def test_detector_extractFeature():
6
+ with pytest.raises(NotImplementedError):
7
+ detector().predict()
8
+
9
+ def test_detector_vectorize():
10
+ with pytest.raises(NotImplementedError):
11
+ detector().predict()
12
+
13
+ def test_detector_model():
14
+ with pytest.raises(NotImplementedError):
15
+ detector().predict()
16
+
17
+ def test_detector_predict():
18
+ with pytest.raises(NotImplementedError):
19
+ detector().predict()