assemblyline-v4-service 4.5.1.dev75__py3-none-any.whl → 4.5.1.dev77__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-v4-service might be problematic. Click here for more details.

@@ -1 +1 @@
1
- 4.5.1.dev75
1
+ 4.5.1.dev77
@@ -19,6 +19,7 @@ from assemblyline.odm.messages.task import Task as ServiceTask
19
19
  from assemblyline_v4_service.common import helper
20
20
  from assemblyline_v4_service.common.api import PrivilegedServiceAPI, ServiceAPI
21
21
  from assemblyline_v4_service.common.ontology_helper import OntologyHelper
22
+ from assemblyline_v4_service.common.ocr import update_ocr_config
22
23
  from assemblyline_v4_service.common.request import ServiceRequest
23
24
  from assemblyline_v4_service.common.task import Task
24
25
 
@@ -84,6 +85,10 @@ class ServiceBase:
84
85
  self.rules_hash: str = None
85
86
  self.signatures_meta: dict = {}
86
87
 
88
+ # OCR-related
89
+ if self.config.get('ocr'):
90
+ update_ocr_config(self.config['ocr'])
91
+
87
92
  @property
88
93
  def api_interface(self):
89
94
  return self.get_api_interface()
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Dict, List, TextIO
3
+ from typing import Any, Dict, List, TextIO, Union
4
4
 
5
5
  from assemblyline_v4_service.common.helper import get_service_manifest
6
6
  from assemblyline_v4_service.common.utils import PASSWORD_WORDS
@@ -154,44 +154,47 @@ OCR_INDICATORS_TERMS: dict[str, list[str]] = {
154
154
  # The minimum number of indicator hits to avoid FP detections
155
155
  OCR_INDICATORS_THRESHOLD: Dict[str, int] = {"ransomware": 2, "macros": 2, "banned": 1, "password": 1}
156
156
 
157
- # Pre-compute the OCR_INDICATOR_* constants on module load so we only load the manifest once rather than per OCR request
158
- try:
159
- # Retrieve service-configured OCR settings on module load
160
- ocr_config: Dict = get_service_manifest().get("config", {}).get("ocr", {})
161
- except Exception:
162
- # Service manifest not found
163
- ocr_config = {}
164
-
165
- indicators = set(list(OCR_INDICATORS_TERMS.keys()) + list(ocr_config.keys()))
166
- # Iterate over the different indicators and include lines of detection in response
167
- for indicator in indicators:
168
- indicator_config = ocr_config.get(indicator)
169
- terms = OCR_INDICATORS_TERMS.get(indicator, [])
170
- hit_threshold = OCR_INDICATORS_THRESHOLD.get(indicator, 1)
171
- # Backwards compatibility: Check how the OCR configuration is formatted
172
- if not indicator_config:
173
- # Empty block/no override provided by service
174
- pass
175
- elif isinstance(indicator_config, list):
176
- # Legacy support (before configurable indicator thresholds)
177
- terms = indicator_config
178
- elif isinstance(indicator_config, dict):
179
- # Either you're exclusively overwriting the terms list or you're selectively including/excluding terms
180
- if indicator_config.get("terms"):
181
- # Overwrite terms list with service configuration
182
- terms = indicator_config["terms"]
183
- else:
184
- included_terms = set(indicator_config.get("include", []))
185
- excluded_terms = set(indicator_config.get("exclude", []))
186
- # Compute the new terms list for indicator type
187
- terms = list(set(terms).union(included_terms) - excluded_terms)
188
-
189
- # Set the indicator hit threshold
190
- hit_threshold = indicator_config.get("threshold", 1)
191
-
192
- # Overwrite key-value in respective constants
193
- OCR_INDICATORS_TERMS[indicator] = terms
194
- OCR_INDICATORS_THRESHOLD[indicator] = hit_threshold
157
+ def update_ocr_config(ocr_config: Dict[str, Union[List[str], Dict[str, Any]]] = None):
158
+ global OCR_INDICATORS_TERMS
159
+ global OCR_INDICATORS_THRESHOLD
160
+ if not ocr_config:
161
+ try:
162
+ # Retrieve service-configured OCR settings on module load (primary used in testing)
163
+ ocr_config: Dict = get_service_manifest().get("config", {}).get("ocr", {})
164
+ except Exception:
165
+ # No configuration updates provided
166
+ return
167
+
168
+ indicators = set(list(OCR_INDICATORS_TERMS.keys()) + list(ocr_config.keys()))
169
+ # Iterate over the different indicators and include lines of detection in response
170
+ for indicator in indicators:
171
+ indicator_config = ocr_config.get(indicator)
172
+ terms = OCR_INDICATORS_TERMS.get(indicator, [])
173
+ hit_threshold = OCR_INDICATORS_THRESHOLD.get(indicator, 1)
174
+ # Backwards compatibility: Check how the OCR configuration is formatted
175
+ if not indicator_config:
176
+ # Empty block/no override provided by service
177
+ pass
178
+ elif isinstance(indicator_config, list):
179
+ # Legacy support (before configurable indicator thresholds)
180
+ terms = indicator_config
181
+ elif isinstance(indicator_config, dict):
182
+ # Either you're exclusively overwriting the terms list or you're selectively including/excluding terms
183
+ if indicator_config.get("terms"):
184
+ # Overwrite terms list with service configuration
185
+ terms = indicator_config["terms"]
186
+ else:
187
+ included_terms = set(indicator_config.get("include", []))
188
+ excluded_terms = set(indicator_config.get("exclude", []))
189
+ # Compute the new terms list for indicator type
190
+ terms = list(set(terms).union(included_terms) - excluded_terms)
191
+
192
+ # Set the indicator hit threshold
193
+ hit_threshold = indicator_config.get("threshold", 1)
194
+
195
+ # Overwrite key-value in respective constants
196
+ OCR_INDICATORS_TERMS[indicator] = terms
197
+ OCR_INDICATORS_THRESHOLD[indicator] = hit_threshold
195
198
 
196
199
 
197
200
  def ocr_detections(image_path: str, ocr_io: TextIO = None) -> Dict[str, List[str]]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-v4-service
3
- Version: 4.5.1.dev75
3
+ Version: 4.5.1.dev77
4
4
  Summary: Assemblyline 4 - Service base
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-v4-service/
6
6
  Author: CCCS Assemblyline development team
@@ -1,4 +1,4 @@
1
- assemblyline_v4_service/VERSION,sha256=JZHoM4NWVTRtlPRRggVpGFHenhb-c_AXFCT7sYtWPQQ,12
1
+ assemblyline_v4_service/VERSION,sha256=s6mmrhL6AATHlUQnS9l3hT6r003DPEuWyYxn0OsQo8c,12
2
2
  assemblyline_v4_service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  assemblyline_v4_service/healthz.py,sha256=sS1cFkDLw8hUPMpj7tbHXFv8ZmHcazrwZ0l6oQDwwkQ,1575
4
4
  assemblyline_v4_service/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -6,9 +6,9 @@ assemblyline_v4_service/run_privileged_service.py,sha256=qd4DmHo5G_Tpv8tb0A96qNf
6
6
  assemblyline_v4_service/run_service.py,sha256=NiFX52NfsbBQY6E3nrjoB3e2XxIlwFcQpYYY-rADIk4,5996
7
7
  assemblyline_v4_service/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  assemblyline_v4_service/common/api.py,sha256=Xzp8j4HCCfjPvNSGKiZl5ttH2_Itg47cjlH0NXNtth0,6849
9
- assemblyline_v4_service/common/base.py,sha256=mKkkzbxVL_wVMy_VieU9mlHYLqZXndga_4dWWbnEnx8,14045
9
+ assemblyline_v4_service/common/base.py,sha256=1wxzg_enESdqxloiAWCL0fUNzcKg0kAVfNNkGAfHX0U,14218
10
10
  assemblyline_v4_service/common/helper.py,sha256=xs9quuf-M1JOdKieBqOmWaOece0CtzXFhhe85xQYmuY,3289
11
- assemblyline_v4_service/common/ocr.py,sha256=dzx5n9fzOHCVX3NnGKONpGAF3Cq16hw_y7M7g_nsJ7A,8588
11
+ assemblyline_v4_service/common/ocr.py,sha256=3fV0PyY3oui_ucAM9dkolP0VRYKACKJuGY4M64DudIE,8841
12
12
  assemblyline_v4_service/common/ontology_helper.py,sha256=QpwerYoS5hXjWzpx3Pmwv6j2330PQVYqxYGamjcpW3I,7890
13
13
  assemblyline_v4_service/common/request.py,sha256=XXBafAQCV43_OBLXOSHxYoDHmqwERBkNul8fb_X6Ves,11774
14
14
  assemblyline_v4_service/common/result.py,sha256=9AqM6qCYiia_Bpyn_fBFhzNQMcqJbtFSiGjp57fXW2E,32713
@@ -32,14 +32,14 @@ test/test_common/__init__.py,sha256=RkOm3vnVp5L947mD1jTo4bdOgLTZJ24_NX-kqfMn5a8,
32
32
  test/test_common/test_api.py,sha256=7wlo7wgB12T23zMLbwjJ3GIomLHqE_Qvs3xkibSsR1U,4902
33
33
  test/test_common/test_base.py,sha256=fuJSSlPxIDHq6HU1xbvaMFitw2z1spOZNHD2SJ4UUic,13346
34
34
  test/test_common/test_helper.py,sha256=sO6YAiBhKTqaxlpLhFYDuy2ZdbuF2cg07Ylzo83ZzQs,2575
35
- test/test_common/test_ocr.py,sha256=nel1GCkieDRW2F_6kYCbkIwB9Kwj_d2rJBgb8VZWXS8,1685
35
+ test/test_common/test_ocr.py,sha256=mt_PgElgwQKJmNrp2nRVx9NjfMedVk40I6IV317vATI,1753
36
36
  test/test_common/test_ontology_helper.py,sha256=KhHEBg_ecJyQbDw79NMT4FzUyA4C1Aak3HEQCwBfM2s,7914
37
37
  test/test_common/test_request.py,sha256=PPhHfrwpwMdNZ33P1Z_0h1Zaz9ao9VFiDr_MJrBS3Lg,11492
38
38
  test/test_common/test_result.py,sha256=b96bCfyW0ukdTcCsl01jS_l5YhfzXFVYs_VPOwz7IEU,41982
39
39
  test/test_common/test_task.py,sha256=jnfF68EgJIu30Pz_4jiJHkncfI-3XpGaut5r79KIXOA,18718
40
40
  test/test_common/test_utils.py,sha256=TbnBxqpS_ZC5ptXR9XJX3xtbItD0mTbtiBxxdyP8J5k,5904
41
- assemblyline_v4_service-4.5.1.dev75.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
42
- assemblyline_v4_service-4.5.1.dev75.dist-info/METADATA,sha256=G0uwI6VSb0U0defzTlpY1uKuw0qji1s4OGUTxvPuZig,9498
43
- assemblyline_v4_service-4.5.1.dev75.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
44
- assemblyline_v4_service-4.5.1.dev75.dist-info/top_level.txt,sha256=LpTOEaVCatkrvbVq3EZseMSIa2PQZU-2rhuO_FTpZgY,29
45
- assemblyline_v4_service-4.5.1.dev75.dist-info/RECORD,,
41
+ assemblyline_v4_service-4.5.1.dev77.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
42
+ assemblyline_v4_service-4.5.1.dev77.dist-info/METADATA,sha256=MGwF9WTfmHQfFbCpz0QiOFpdphkhqG7xuOAVdkuRxDY,9498
43
+ assemblyline_v4_service-4.5.1.dev77.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
44
+ assemblyline_v4_service-4.5.1.dev77.dist-info/top_level.txt,sha256=LpTOEaVCatkrvbVq3EZseMSIa2PQZU-2rhuO_FTpZgY,29
45
+ assemblyline_v4_service-4.5.1.dev77.dist-info/RECORD,,
@@ -3,10 +3,11 @@ from test.test_common import TESSERACT_LIST
3
3
 
4
4
  import pytest
5
5
 
6
- from assemblyline_v4_service.common.ocr import ocr_detections, detections
6
+ from assemblyline_v4_service.common.ocr import ocr_detections, detections, update_ocr_config
7
7
 
8
8
  @pytest.mark.skipif(len(TESSERACT_LIST) < 1, reason="Requires tesseract-ocr apt package")
9
9
  def test_ocr_detections():
10
+ update_ocr_config()
10
11
  file_path = os.path.join(os.path.dirname(__file__), "b32969aa664e3905c20f865cdd7b921f922678f5c3850c78e4c803fbc1757a8e")
11
12
  assert ocr_detections(file_path) == {
12
13
  'ransomware': [
@@ -27,6 +28,8 @@ def test_ocr_detections():
27
28
 
28
29
 
29
30
  def test_detections():
31
+ update_ocr_config()
32
+
30
33
  # No detection
31
34
  assert detections("blah") == {}
32
35