assemblyline-v4-service 4.5.1.dev76__py3-none-any.whl → 4.5.1.dev77__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of assemblyline-v4-service might be problematic. Click here for more details.
- assemblyline_v4_service/VERSION +1 -1
- assemblyline_v4_service/common/base.py +5 -0
- assemblyline_v4_service/common/ocr.py +42 -39
- {assemblyline_v4_service-4.5.1.dev76.dist-info → assemblyline_v4_service-4.5.1.dev77.dist-info}/METADATA +1 -1
- {assemblyline_v4_service-4.5.1.dev76.dist-info → assemblyline_v4_service-4.5.1.dev77.dist-info}/RECORD +9 -9
- test/test_common/test_ocr.py +4 -1
- {assemblyline_v4_service-4.5.1.dev76.dist-info → assemblyline_v4_service-4.5.1.dev77.dist-info}/LICENCE.md +0 -0
- {assemblyline_v4_service-4.5.1.dev76.dist-info → assemblyline_v4_service-4.5.1.dev77.dist-info}/WHEEL +0 -0
- {assemblyline_v4_service-4.5.1.dev76.dist-info → assemblyline_v4_service-4.5.1.dev77.dist-info}/top_level.txt +0 -0
assemblyline_v4_service/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
4.5.1.
|
|
1
|
+
4.5.1.dev77
|
|
@@ -19,6 +19,7 @@ from assemblyline.odm.messages.task import Task as ServiceTask
|
|
|
19
19
|
from assemblyline_v4_service.common import helper
|
|
20
20
|
from assemblyline_v4_service.common.api import PrivilegedServiceAPI, ServiceAPI
|
|
21
21
|
from assemblyline_v4_service.common.ontology_helper import OntologyHelper
|
|
22
|
+
from assemblyline_v4_service.common.ocr import update_ocr_config
|
|
22
23
|
from assemblyline_v4_service.common.request import ServiceRequest
|
|
23
24
|
from assemblyline_v4_service.common.task import Task
|
|
24
25
|
|
|
@@ -84,6 +85,10 @@ class ServiceBase:
|
|
|
84
85
|
self.rules_hash: str = None
|
|
85
86
|
self.signatures_meta: dict = {}
|
|
86
87
|
|
|
88
|
+
# OCR-related
|
|
89
|
+
if self.config.get('ocr'):
|
|
90
|
+
update_ocr_config(self.config['ocr'])
|
|
91
|
+
|
|
87
92
|
@property
|
|
88
93
|
def api_interface(self):
|
|
89
94
|
return self.get_api_interface()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Dict, List, TextIO
|
|
3
|
+
from typing import Any, Dict, List, TextIO, Union
|
|
4
4
|
|
|
5
5
|
from assemblyline_v4_service.common.helper import get_service_manifest
|
|
6
6
|
from assemblyline_v4_service.common.utils import PASSWORD_WORDS
|
|
@@ -154,44 +154,47 @@ OCR_INDICATORS_TERMS: dict[str, list[str]] = {
|
|
|
154
154
|
# The minimum number of indicator hits to avoid FP detections
|
|
155
155
|
OCR_INDICATORS_THRESHOLD: Dict[str, int] = {"ransomware": 2, "macros": 2, "banned": 1, "password": 1}
|
|
156
156
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
ocr_config:
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
terms
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
157
|
+
def update_ocr_config(ocr_config: Dict[str, Union[List[str], Dict[str, Any]]] = None):
|
|
158
|
+
global OCR_INDICATORS_TERMS
|
|
159
|
+
global OCR_INDICATORS_THRESHOLD
|
|
160
|
+
if not ocr_config:
|
|
161
|
+
try:
|
|
162
|
+
# Retrieve service-configured OCR settings on module load (primary used in testing)
|
|
163
|
+
ocr_config: Dict = get_service_manifest().get("config", {}).get("ocr", {})
|
|
164
|
+
except Exception:
|
|
165
|
+
# No configuration updates provided
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
indicators = set(list(OCR_INDICATORS_TERMS.keys()) + list(ocr_config.keys()))
|
|
169
|
+
# Iterate over the different indicators and include lines of detection in response
|
|
170
|
+
for indicator in indicators:
|
|
171
|
+
indicator_config = ocr_config.get(indicator)
|
|
172
|
+
terms = OCR_INDICATORS_TERMS.get(indicator, [])
|
|
173
|
+
hit_threshold = OCR_INDICATORS_THRESHOLD.get(indicator, 1)
|
|
174
|
+
# Backwards compatibility: Check how the OCR configuration is formatted
|
|
175
|
+
if not indicator_config:
|
|
176
|
+
# Empty block/no override provided by service
|
|
177
|
+
pass
|
|
178
|
+
elif isinstance(indicator_config, list):
|
|
179
|
+
# Legacy support (before configurable indicator thresholds)
|
|
180
|
+
terms = indicator_config
|
|
181
|
+
elif isinstance(indicator_config, dict):
|
|
182
|
+
# Either you're exclusively overwriting the terms list or you're selectively including/excluding terms
|
|
183
|
+
if indicator_config.get("terms"):
|
|
184
|
+
# Overwrite terms list with service configuration
|
|
185
|
+
terms = indicator_config["terms"]
|
|
186
|
+
else:
|
|
187
|
+
included_terms = set(indicator_config.get("include", []))
|
|
188
|
+
excluded_terms = set(indicator_config.get("exclude", []))
|
|
189
|
+
# Compute the new terms list for indicator type
|
|
190
|
+
terms = list(set(terms).union(included_terms) - excluded_terms)
|
|
191
|
+
|
|
192
|
+
# Set the indicator hit threshold
|
|
193
|
+
hit_threshold = indicator_config.get("threshold", 1)
|
|
194
|
+
|
|
195
|
+
# Overwrite key-value in respective constants
|
|
196
|
+
OCR_INDICATORS_TERMS[indicator] = terms
|
|
197
|
+
OCR_INDICATORS_THRESHOLD[indicator] = hit_threshold
|
|
195
198
|
|
|
196
199
|
|
|
197
200
|
def ocr_detections(image_path: str, ocr_io: TextIO = None) -> Dict[str, List[str]]:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
assemblyline_v4_service/VERSION,sha256=
|
|
1
|
+
assemblyline_v4_service/VERSION,sha256=s6mmrhL6AATHlUQnS9l3hT6r003DPEuWyYxn0OsQo8c,12
|
|
2
2
|
assemblyline_v4_service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
assemblyline_v4_service/healthz.py,sha256=sS1cFkDLw8hUPMpj7tbHXFv8ZmHcazrwZ0l6oQDwwkQ,1575
|
|
4
4
|
assemblyline_v4_service/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -6,9 +6,9 @@ assemblyline_v4_service/run_privileged_service.py,sha256=qd4DmHo5G_Tpv8tb0A96qNf
|
|
|
6
6
|
assemblyline_v4_service/run_service.py,sha256=NiFX52NfsbBQY6E3nrjoB3e2XxIlwFcQpYYY-rADIk4,5996
|
|
7
7
|
assemblyline_v4_service/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
assemblyline_v4_service/common/api.py,sha256=Xzp8j4HCCfjPvNSGKiZl5ttH2_Itg47cjlH0NXNtth0,6849
|
|
9
|
-
assemblyline_v4_service/common/base.py,sha256=
|
|
9
|
+
assemblyline_v4_service/common/base.py,sha256=1wxzg_enESdqxloiAWCL0fUNzcKg0kAVfNNkGAfHX0U,14218
|
|
10
10
|
assemblyline_v4_service/common/helper.py,sha256=xs9quuf-M1JOdKieBqOmWaOece0CtzXFhhe85xQYmuY,3289
|
|
11
|
-
assemblyline_v4_service/common/ocr.py,sha256=
|
|
11
|
+
assemblyline_v4_service/common/ocr.py,sha256=3fV0PyY3oui_ucAM9dkolP0VRYKACKJuGY4M64DudIE,8841
|
|
12
12
|
assemblyline_v4_service/common/ontology_helper.py,sha256=QpwerYoS5hXjWzpx3Pmwv6j2330PQVYqxYGamjcpW3I,7890
|
|
13
13
|
assemblyline_v4_service/common/request.py,sha256=XXBafAQCV43_OBLXOSHxYoDHmqwERBkNul8fb_X6Ves,11774
|
|
14
14
|
assemblyline_v4_service/common/result.py,sha256=9AqM6qCYiia_Bpyn_fBFhzNQMcqJbtFSiGjp57fXW2E,32713
|
|
@@ -32,14 +32,14 @@ test/test_common/__init__.py,sha256=RkOm3vnVp5L947mD1jTo4bdOgLTZJ24_NX-kqfMn5a8,
|
|
|
32
32
|
test/test_common/test_api.py,sha256=7wlo7wgB12T23zMLbwjJ3GIomLHqE_Qvs3xkibSsR1U,4902
|
|
33
33
|
test/test_common/test_base.py,sha256=fuJSSlPxIDHq6HU1xbvaMFitw2z1spOZNHD2SJ4UUic,13346
|
|
34
34
|
test/test_common/test_helper.py,sha256=sO6YAiBhKTqaxlpLhFYDuy2ZdbuF2cg07Ylzo83ZzQs,2575
|
|
35
|
-
test/test_common/test_ocr.py,sha256=
|
|
35
|
+
test/test_common/test_ocr.py,sha256=mt_PgElgwQKJmNrp2nRVx9NjfMedVk40I6IV317vATI,1753
|
|
36
36
|
test/test_common/test_ontology_helper.py,sha256=KhHEBg_ecJyQbDw79NMT4FzUyA4C1Aak3HEQCwBfM2s,7914
|
|
37
37
|
test/test_common/test_request.py,sha256=PPhHfrwpwMdNZ33P1Z_0h1Zaz9ao9VFiDr_MJrBS3Lg,11492
|
|
38
38
|
test/test_common/test_result.py,sha256=b96bCfyW0ukdTcCsl01jS_l5YhfzXFVYs_VPOwz7IEU,41982
|
|
39
39
|
test/test_common/test_task.py,sha256=jnfF68EgJIu30Pz_4jiJHkncfI-3XpGaut5r79KIXOA,18718
|
|
40
40
|
test/test_common/test_utils.py,sha256=TbnBxqpS_ZC5ptXR9XJX3xtbItD0mTbtiBxxdyP8J5k,5904
|
|
41
|
-
assemblyline_v4_service-4.5.1.
|
|
42
|
-
assemblyline_v4_service-4.5.1.
|
|
43
|
-
assemblyline_v4_service-4.5.1.
|
|
44
|
-
assemblyline_v4_service-4.5.1.
|
|
45
|
-
assemblyline_v4_service-4.5.1.
|
|
41
|
+
assemblyline_v4_service-4.5.1.dev77.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
|
|
42
|
+
assemblyline_v4_service-4.5.1.dev77.dist-info/METADATA,sha256=MGwF9WTfmHQfFbCpz0QiOFpdphkhqG7xuOAVdkuRxDY,9498
|
|
43
|
+
assemblyline_v4_service-4.5.1.dev77.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
44
|
+
assemblyline_v4_service-4.5.1.dev77.dist-info/top_level.txt,sha256=LpTOEaVCatkrvbVq3EZseMSIa2PQZU-2rhuO_FTpZgY,29
|
|
45
|
+
assemblyline_v4_service-4.5.1.dev77.dist-info/RECORD,,
|
test/test_common/test_ocr.py
CHANGED
|
@@ -3,10 +3,11 @@ from test.test_common import TESSERACT_LIST
|
|
|
3
3
|
|
|
4
4
|
import pytest
|
|
5
5
|
|
|
6
|
-
from assemblyline_v4_service.common.ocr import ocr_detections, detections
|
|
6
|
+
from assemblyline_v4_service.common.ocr import ocr_detections, detections, update_ocr_config
|
|
7
7
|
|
|
8
8
|
@pytest.mark.skipif(len(TESSERACT_LIST) < 1, reason="Requires tesseract-ocr apt package")
|
|
9
9
|
def test_ocr_detections():
|
|
10
|
+
update_ocr_config()
|
|
10
11
|
file_path = os.path.join(os.path.dirname(__file__), "b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
|
|
11
12
|
assert ocr_detections(file_path) == {
|
|
12
13
|
'ransomware': [
|
|
@@ -27,6 +28,8 @@ def test_ocr_detections():
|
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
def test_detections():
|
|
31
|
+
update_ocr_config()
|
|
32
|
+
|
|
30
33
|
# No detection
|
|
31
34
|
assert detections("blah") == {}
|
|
32
35
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|