assemblyline-v4-service 4.5.0.11__py3-none-any.whl → 4.5.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-v4-service might be problematic. Click here for more details.

@@ -1 +1 @@
1
- 4.5.0.11
1
+ 4.5.0.13
@@ -19,6 +19,7 @@ from assemblyline.odm.messages.task import Task as ServiceTask
19
19
  from assemblyline_v4_service.common import helper
20
20
  from assemblyline_v4_service.common.api import PrivilegedServiceAPI, ServiceAPI
21
21
  from assemblyline_v4_service.common.ontology_helper import OntologyHelper
22
+ from assemblyline_v4_service.common.ocr import update_ocr_config
22
23
  from assemblyline_v4_service.common.request import ServiceRequest
23
24
  from assemblyline_v4_service.common.task import Task
24
25
 
@@ -84,6 +85,10 @@ class ServiceBase:
84
85
  self.rules_hash: str = None
85
86
  self.signatures_meta: dict = {}
86
87
 
88
+ # OCR-related
89
+ if self.config.get('ocr'):
90
+ update_ocr_config(self.config['ocr'])
91
+
87
92
  @property
88
93
  def api_interface(self):
89
94
  return self.get_api_interface()
@@ -1,11 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Dict, List, TextIO
3
+ from typing import Any, Dict, List, TextIO, Union
4
4
 
5
5
  from assemblyline_v4_service.common.helper import get_service_manifest
6
6
  from assemblyline_v4_service.common.utils import PASSWORD_WORDS
7
7
 
8
- # TODO: Would prefer this mapping to be dynamic from trusted sources (ie. import from library), but will copy-paste for now
8
+ # The terms related to each indicator category
9
9
  OCR_INDICATORS_TERMS: dict[str, list[str]] = {
10
10
  "ransomware": [
11
11
  # https://github.com/cuckoosandbox/community/blob/master/modules/signatures/windows/ransomware_message.py
@@ -154,6 +154,48 @@ OCR_INDICATORS_TERMS: dict[str, list[str]] = {
154
154
  # The minimum number of indicator hits to avoid FP detections
155
155
  OCR_INDICATORS_THRESHOLD: Dict[str, int] = {"ransomware": 2, "macros": 2, "banned": 1, "password": 1}
156
156
 
157
+ def update_ocr_config(ocr_config: Dict[str, Union[List[str], Dict[str, Any]]] = None):
158
+ global OCR_INDICATORS_TERMS
159
+ global OCR_INDICATORS_THRESHOLD
160
+ if not ocr_config:
161
+ try:
162
+ # Retrieve service-configured OCR settings on module load (primary used in testing)
163
+ ocr_config: Dict = get_service_manifest().get("config", {}).get("ocr", {})
164
+ except Exception:
165
+ # No configuration updates provided
166
+ return
167
+
168
+ indicators = set(list(OCR_INDICATORS_TERMS.keys()) + list(ocr_config.keys()))
169
+ # Iterate over the different indicators and include lines of detection in response
170
+ for indicator in indicators:
171
+ indicator_config = ocr_config.get(indicator)
172
+ terms = OCR_INDICATORS_TERMS.get(indicator, [])
173
+ hit_threshold = OCR_INDICATORS_THRESHOLD.get(indicator, 1)
174
+ # Backwards compatibility: Check how the OCR configuration is formatted
175
+ if not indicator_config:
176
+ # Empty block/no override provided by service
177
+ pass
178
+ elif isinstance(indicator_config, list):
179
+ # Legacy support (before configurable indicator thresholds)
180
+ terms = indicator_config
181
+ elif isinstance(indicator_config, dict):
182
+ # Either you're exclusively overwriting the terms list or you're selectively including/excluding terms
183
+ if indicator_config.get("terms"):
184
+ # Overwrite terms list with service configuration
185
+ terms = indicator_config["terms"]
186
+ else:
187
+ included_terms = set(indicator_config.get("include", []))
188
+ excluded_terms = set(indicator_config.get("exclude", []))
189
+ # Compute the new terms list for indicator type
190
+ terms = list(set(terms).union(included_terms) - excluded_terms)
191
+
192
+ # Set the indicator hit threshold
193
+ hit_threshold = indicator_config.get("threshold", 1)
194
+
195
+ # Overwrite key-value in respective constants
196
+ OCR_INDICATORS_TERMS[indicator] = terms
197
+ OCR_INDICATORS_THRESHOLD[indicator] = hit_threshold
198
+
157
199
 
158
200
  def ocr_detections(image_path: str, ocr_io: TextIO = None) -> Dict[str, List[str]]:
159
201
  try:
@@ -185,57 +227,25 @@ def ocr_detections(image_path: str, ocr_io: TextIO = None) -> Dict[str, List[str
185
227
 
186
228
 
187
229
  def detections(ocr_output: str) -> Dict[str, List[str]]:
188
- try:
189
- # Retrieve service-configured OCR settings on module load
190
- ocr_config: Dict = get_service_manifest().get("config", {}).get("ocr", {})
191
- except Exception:
192
- # Service manifest not found
193
- ocr_config = {}
194
-
195
- indicators = set(list(OCR_INDICATORS_TERMS.keys()) + list(ocr_config.keys()))
196
230
  detection_output: Dict[str, List[str]] = {}
197
- # Iterate over the different indicators and include lines of detection in response
198
- for indicator in indicators:
199
- indicator_config = ocr_config.get(indicator)
200
- terms = OCR_INDICATORS_TERMS.get(indicator, [])
201
- hit_threshold = OCR_INDICATORS_THRESHOLD.get(indicator, 1)
202
- # Backwards compatibility: Check how the OCR configuration is formatted
203
- if not indicator_config:
204
- # Empty block/no override provided by service
205
- pass
206
- elif isinstance(indicator_config, list):
207
- # Legacy support (before configurable indicator thresholds)
208
- terms = indicator_config
209
- elif isinstance(indicator_config, dict):
210
- # Either you're exclusively overwriting the terms list or you're selectively including/excluding terms
211
- if indicator_config.get("terms"):
212
- # Overwrite terms list with service configuration
213
- terms = indicator_config["terms"]
214
- else:
215
- included_terms = set(indicator_config.get("include", []))
216
- excluded_terms = set(indicator_config.get("exclude", []))
217
- # Compute the new terms list for indicator type
218
- terms = list(set(terms).union(included_terms) - excluded_terms)
219
-
220
- # Set the indicator hit threshold
221
- hit_threshold = indicator_config.get("threshold", 1)
222
-
231
+ for indicator, terms in OCR_INDICATORS_TERMS.items():
232
+ hit_threshold = OCR_INDICATORS_THRESHOLD[indicator]
223
233
  # Perform a pre-check to see if the terms even exist in the OCR text
224
234
  if not any([t.lower() in ocr_output.lower() for t in terms]):
225
235
  continue
226
236
 
227
237
  # Keep a track of the hits and the lines corresponding with term hits
228
- indicator_hits: int = 0
238
+ indicator_hits: set = set()
229
239
  list_of_strings: List[str] = []
230
240
  for line in ocr_output.split("\n"):
231
241
  for t in terms:
232
242
  term_count = line.lower().count(t.lower())
233
243
  if term_count:
234
- indicator_hits += term_count
244
+ indicator_hits.add(t)
235
245
  if line not in list_of_strings:
236
246
  list_of_strings.append(line)
237
247
 
238
- if list_of_strings and indicator_hits >= hit_threshold:
248
+ if list_of_strings and len(indicator_hits) >= hit_threshold:
239
249
  # If we were to find hits and those hits are above the required threshold, then add them to output
240
250
  detection_output[indicator] = list_of_strings
241
251
  return detection_output
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-v4-service
3
- Version: 4.5.0.11
3
+ Version: 4.5.0.13
4
4
  Summary: Assemblyline 4 - Service base
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-v4-service/
6
6
  Author: CCCS Assemblyline development team
@@ -1,4 +1,4 @@
1
- assemblyline_v4_service/VERSION,sha256=1_9OL3qv21zjqWhS4QZ0yZBiBfz5-VmY3lDxRoxFMV4,9
1
+ assemblyline_v4_service/VERSION,sha256=VuBzD9mQzFaZYAMCWM9b0vxAyjHIbKMlmPzW3unLZFw,9
2
2
  assemblyline_v4_service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  assemblyline_v4_service/healthz.py,sha256=3QGBg0EZuXC6UN411HFwpLNEop9UvS9feFhvBUTP-k4,1576
4
4
  assemblyline_v4_service/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -6,9 +6,9 @@ assemblyline_v4_service/run_privileged_service.py,sha256=qd4DmHo5G_Tpv8tb0A96qNf
6
6
  assemblyline_v4_service/run_service.py,sha256=NiFX52NfsbBQY6E3nrjoB3e2XxIlwFcQpYYY-rADIk4,5996
7
7
  assemblyline_v4_service/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  assemblyline_v4_service/common/api.py,sha256=Xzp8j4HCCfjPvNSGKiZl5ttH2_Itg47cjlH0NXNtth0,6849
9
- assemblyline_v4_service/common/base.py,sha256=mKkkzbxVL_wVMy_VieU9mlHYLqZXndga_4dWWbnEnx8,14045
9
+ assemblyline_v4_service/common/base.py,sha256=1wxzg_enESdqxloiAWCL0fUNzcKg0kAVfNNkGAfHX0U,14218
10
10
  assemblyline_v4_service/common/helper.py,sha256=xs9quuf-M1JOdKieBqOmWaOece0CtzXFhhe85xQYmuY,3289
11
- assemblyline_v4_service/common/ocr.py,sha256=A8OnjpEor-S3OUC_jZzJ-Er3KKAsMdQLEXTtMS81Xbk,8397
11
+ assemblyline_v4_service/common/ocr.py,sha256=3fV0PyY3oui_ucAM9dkolP0VRYKACKJuGY4M64DudIE,8841
12
12
  assemblyline_v4_service/common/ontology_helper.py,sha256=QpwerYoS5hXjWzpx3Pmwv6j2330PQVYqxYGamjcpW3I,7890
13
13
  assemblyline_v4_service/common/request.py,sha256=NxtWxp8-ttC72i3Vnchc3fZTRKqPQAoMC4KAVwEj8-4,11714
14
14
  assemblyline_v4_service/common/result.py,sha256=9AqM6qCYiia_Bpyn_fBFhzNQMcqJbtFSiGjp57fXW2E,32713
@@ -28,18 +28,18 @@ test/conftest.py,sha256=W3SieQpZsZpGEmtLqY4aIlxREDSsHceyCrFcFsWUM0U,1851
28
28
  test/test_healthz.py,sha256=DkeLUlrb7rGx3nZ04aADU9HXXu5mZTf_DBwT0xhzIv4,7
29
29
  test/test_run_privileged_service.py,sha256=DkeLUlrb7rGx3nZ04aADU9HXXu5mZTf_DBwT0xhzIv4,7
30
30
  test/test_run_service.py,sha256=DkeLUlrb7rGx3nZ04aADU9HXXu5mZTf_DBwT0xhzIv4,7
31
- test/test_common/__init__.py,sha256=v3__rzWUBW0Smc2rFwfFR9WehQHM_uBTIFCdC_We3tA,1319
31
+ test/test_common/__init__.py,sha256=RkOm3vnVp5L947mD1jTo4bdOgLTZJ24_NX-kqfMn5a8,1259
32
32
  test/test_common/test_api.py,sha256=7wlo7wgB12T23zMLbwjJ3GIomLHqE_Qvs3xkibSsR1U,4902
33
- test/test_common/test_base.py,sha256=d61an3lRaes_cx0AOPMNFMVWxTOjilrcGpuVP0dqTvM,13198
33
+ test/test_common/test_base.py,sha256=fuJSSlPxIDHq6HU1xbvaMFitw2z1spOZNHD2SJ4UUic,13346
34
34
  test/test_common/test_helper.py,sha256=sO6YAiBhKTqaxlpLhFYDuy2ZdbuF2cg07Ylzo83ZzQs,2575
35
- test/test_common/test_ocr.py,sha256=s5kL0vKjLmbyXuCg-9v6V6wQwwFRu0w2hYpjG0_BXy4,1874
36
- test/test_common/test_ontology_helper.py,sha256=TuvTeP9BTRqklOlsLu_yMdN9wdPWlVAENx2JqUe9a-A,7856
37
- test/test_common/test_request.py,sha256=K-gaHe20AUztfU3pLdWYv5bc6088GBBKP-eb833cis0,11729
38
- test/test_common/test_result.py,sha256=Wm0Cs5kZRzlZr0jL-l8OTsYAvkoN2eaB3NkeXzvyssI,42208
35
+ test/test_common/test_ocr.py,sha256=mt_PgElgwQKJmNrp2nRVx9NjfMedVk40I6IV317vATI,1753
36
+ test/test_common/test_ontology_helper.py,sha256=KhHEBg_ecJyQbDw79NMT4FzUyA4C1Aak3HEQCwBfM2s,7914
37
+ test/test_common/test_request.py,sha256=VzWw-NZKTRIXKwCACIT2SSHE70HMZ3IeBk1AJiT-cjU,11472
38
+ test/test_common/test_result.py,sha256=b96bCfyW0ukdTcCsl01jS_l5YhfzXFVYs_VPOwz7IEU,41982
39
39
  test/test_common/test_task.py,sha256=RWsGEN0L-xKoRAsGv2x4JZcSAySS9aBF4dwL9t7tepo,18668
40
40
  test/test_common/test_utils.py,sha256=TbnBxqpS_ZC5ptXR9XJX3xtbItD0mTbtiBxxdyP8J5k,5904
41
- assemblyline_v4_service-4.5.0.11.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
42
- assemblyline_v4_service-4.5.0.11.dist-info/METADATA,sha256=DLdPO96aAuV6e-qQibM_zwEqYA5WY5mbUqOJfaj6Tz8,9495
43
- assemblyline_v4_service-4.5.0.11.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
44
- assemblyline_v4_service-4.5.0.11.dist-info/top_level.txt,sha256=LpTOEaVCatkrvbVq3EZseMSIa2PQZU-2rhuO_FTpZgY,29
45
- assemblyline_v4_service-4.5.0.11.dist-info/RECORD,,
41
+ assemblyline_v4_service-4.5.0.13.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
42
+ assemblyline_v4_service-4.5.0.13.dist-info/METADATA,sha256=ww3G0mD7HZ9MsPJxRQGyMQm6-AaGDOfNVZrutQ63-fg,9495
43
+ assemblyline_v4_service-4.5.0.13.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
44
+ assemblyline_v4_service-4.5.0.13.dist-info/top_level.txt,sha256=LpTOEaVCatkrvbVq3EZseMSIa2PQZU-2rhuO_FTpZgY,29
45
+ assemblyline_v4_service-4.5.0.13.dist-info/RECORD,,
@@ -3,8 +3,7 @@ import subprocess
3
3
 
4
4
  from assemblyline.common.version import FRAMEWORK_VERSION, SYSTEM_VERSION
5
5
 
6
- SERVICE_CONFIG_NAME = "service_manifest.yml"
7
- TEMP_SERVICE_CONFIG_PATH = os.path.join("/tmp", SERVICE_CONFIG_NAME)
6
+ TEMP_SERVICE_CONFIG_PATH ="/tmp/service_manifest.yml"
8
7
 
9
8
  ret = subprocess.run("dpkg -l | grep ^ii | awk '{print $2}' | grep -i 'tesseract'", capture_output=True, shell=True)
10
9
  TESSERACT_LIST = list(filter(None, ret.stdout.decode().split('\n')))
@@ -3,6 +3,12 @@ import os
3
3
  import time
4
4
  from logging import Logger
5
5
 
6
+
7
+ from test.test_common import setup_module
8
+
9
+ # Ensure service manifest is instantiated before loading assemblyline_v4_service module
10
+ setup_module()
11
+
6
12
  import pytest
7
13
  import requests_mock
8
14
  from assemblyline_v4_service.common.base import *
@@ -1,16 +1,14 @@
1
1
  import os
2
- from test.test_common import TESSERACT_LIST, setup_module
2
+ from test.test_common import TESSERACT_LIST
3
3
 
4
4
  import pytest
5
- from assemblyline_v4_service.common.ocr import *
6
5
 
6
+ from assemblyline_v4_service.common.ocr import ocr_detections, detections, update_ocr_config
7
7
 
8
8
  @pytest.mark.skipif(len(TESSERACT_LIST) < 1, reason="Requires tesseract-ocr apt package")
9
9
  def test_ocr_detections():
10
- if os.getcwd().endswith("/test"):
11
- file_path = os.path.join(os.getcwd(), "test_common/b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
12
- else:
13
- file_path = os.path.join(os.getcwd(), "test/test_common/b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
10
+ update_ocr_config()
11
+ file_path = os.path.join(os.path.dirname(__file__), "b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
14
12
  assert ocr_detections(file_path) == {
15
13
  'ransomware': [
16
14
  "YOUR FILES HAVE BEEN ENCRYPTED AND YOU WON'T BE ABLE TO "
@@ -30,7 +28,8 @@ def test_ocr_detections():
30
28
 
31
29
 
32
30
  def test_detections():
33
- setup_module()
31
+ update_ocr_config()
32
+
34
33
  # No detection
35
34
  assert detections("blah") == {}
36
35
 
@@ -3,6 +3,9 @@ import logging
3
3
  import os
4
4
  import tempfile
5
5
 
6
+ from test.test_common import setup_module
7
+ setup_module()
8
+
6
9
  import pytest
7
10
  from assemblyline_v4_service.common.ontology_helper import *
8
11
  from assemblyline_v4_service.common.result import ResultSection
@@ -1,7 +1,7 @@
1
1
  import os
2
2
  import tempfile
3
3
  from logging import Logger
4
- from test.test_common import TESSERACT_LIST
4
+ from test.test_common import TESSERACT_LIST, setup_module
5
5
 
6
6
  import pytest
7
7
  from assemblyline_v4_service.common.request import ServiceRequest
@@ -10,6 +10,8 @@ from assemblyline_v4_service.common.task import MaxExtractedExceeded, Task
10
10
 
11
11
  from assemblyline.odm.messages.task import Task as ServiceTask
12
12
 
13
+ # Ensure service manifest is instantiated before importing from OCR submodule
14
+ setup_module()
13
15
 
14
16
  @pytest.fixture
15
17
  def service_request():
@@ -105,10 +107,7 @@ def test_add_extracted(service_request):
105
107
 
106
108
  @pytest.mark.skipif(len(TESSERACT_LIST) < 1, reason="Requires tesseract-ocr apt package")
107
109
  def test_add_image(service_request):
108
- if os.getcwd().endswith("/test"):
109
- image_path = os.path.join(os.getcwd(), "test_common/b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
110
- else:
111
- image_path = os.path.join(os.getcwd(), "test/test_common/b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
110
+ image_path = os.path.join(os.path.dirname(__file__), "b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
112
111
 
113
112
  # Basic
114
113
  assert service_request.add_image(image_path, "image_name", "description of image") == {
@@ -196,10 +195,7 @@ def test_add_image(service_request):
196
195
  service_request.task.supplementary.clear()
197
196
 
198
197
  # Classification, OCR heuristic, OCR_IO, image with password
199
- if os.getcwd().endswith("/test"):
200
- image_path = os.path.join(os.getcwd(), "test_common/4031ed8786439eee24b87f84901e38038a76b8c55e9d87dd5a7d88df2806c1cf")
201
- else:
202
- image_path = os.path.join(os.getcwd(), "test/test_common/4031ed8786439eee24b87f84901e38038a76b8c55e9d87dd5a7d88df2806c1cf")
198
+ image_path = os.path.join(os.path.dirname(__file__), "4031ed8786439eee24b87f84901e38038a76b8c55e9d87dd5a7d88df2806c1cf")
203
199
  _, path = tempfile.mkstemp()
204
200
  ocr_io = open(path, "w")
205
201
  data = service_request.add_image(image_path, "image_name", "description of image", "TLP:A", ocr_heuristic_id, ocr_io)
@@ -1,6 +1,10 @@
1
1
  import os
2
2
  import tempfile
3
- from test.test_common import TESSERACT_LIST
3
+ from test.test_common import TESSERACT_LIST, setup_module
4
+
5
+ # Ensure service manifest is instantiated before importing from OCR submodule
6
+ setup_module()
7
+
4
8
 
5
9
  import pytest
6
10
  from assemblyline_v4_service.common.request import ServiceRequest
@@ -9,7 +13,6 @@ from assemblyline_v4_service.common.task import Task
9
13
 
10
14
  from assemblyline.odm.messages.task import Task as ServiceTask
11
15
 
12
-
13
16
  @pytest.fixture
14
17
  def heuristic():
15
18
  return Heuristic(1)
@@ -592,10 +595,7 @@ def test_imagesectionbody_init(service_request):
592
595
  @pytest.mark.skipif(len(TESSERACT_LIST) < 1, reason="Requires tesseract-ocr apt package")
593
596
  def test_imagesectionbody_add_image(service_request):
594
597
  isb = ImageSectionBody(service_request)
595
- if os.getcwd().endswith("/test"):
596
- image_path = os.path.join(os.getcwd(), "test_common/b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
597
- else:
598
- image_path = os.path.join(os.getcwd(), "test/test_common/b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
598
+ image_path = os.path.join(os.path.dirname(__file__), "b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
599
599
 
600
600
  # Basic
601
601
  assert isb.add_image(image_path, "image_name", "description of image") is None
@@ -1230,10 +1230,8 @@ def test_resultimagesection_init(service_request):
1230
1230
  def test_resultimagesection_add_image(service_request):
1231
1231
  ris = ResultImageSection(service_request, "title_text_as_str")
1232
1232
 
1233
- if os.getcwd().endswith("/test"):
1234
- image_path = os.path.join(os.getcwd(), "test_common/b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
1235
- else:
1236
- image_path = os.path.join(os.getcwd(), "test/test_common/b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
1233
+ image_path = os.path.join(os.path.dirname(__file__),
1234
+ "b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
1237
1235
 
1238
1236
  # Basic
1239
1237
  assert ris.add_image(image_path, "image_name", "description of image") is None