assemblyline-v4-service 4.5.1.dev10__py3-none-any.whl → 4.5.1.dev46__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-v4-service might be problematic. Click here for more details.

@@ -1 +1 @@
1
- 4.5.1.dev10
1
+ 4.5.1.dev46
@@ -207,8 +207,17 @@ def detections(ocr_output: str) -> Dict[str, List[str]]:
207
207
  # Legacy support (before configurable indicator thresholds)
208
208
  terms = indicator_config
209
209
  elif isinstance(indicator_config, dict):
210
- # Set indicator threshold before variable overwrite with terms list
211
- terms = indicator_config.get("terms", [])
210
+ # Either you're exclusively overwriting the terms list or you're selectively including/excluding terms
211
+ if indicator_config.get("terms"):
212
+ # Overwrite terms list with service configuration
213
+ terms = indicator_config["terms"]
214
+ else:
215
+ included_terms = set(indicator_config.get("include", []))
216
+ excluded_terms = set(indicator_config.get("exclude", []))
217
+ # Compute the new terms list for indicator type
218
+ terms = list(set(terms).union(included_terms) - excluded_terms)
219
+
220
+ # Set the indicator hit threshold
212
221
  hit_threshold = indicator_config.get("threshold", 1)
213
222
 
214
223
  # Perform a pre-check to see if the terms even exist in the OCR text
@@ -1,16 +1,124 @@
1
1
  import os
2
2
 
3
+ from assemblyline.datastore.collection import ESCollection
4
+ from assemblyline.odm.models.badlist import Badlist as BadlistModel
5
+ from assemblyline.odm.models.safelist import Safelist as SafelistModel
3
6
  from assemblyline.odm.models.signature import Signature as SignatureModel
4
7
  from assemblyline_core.badlist_client import BadlistClient
5
8
  from assemblyline_core.safelist_client import SafelistClient
6
9
  from assemblyline_core.signature_client import SignatureClient
7
10
 
8
- from typing import Any, Dict, List, Union
11
+ from typing import Any, Dict, List, Optional, Set, Union
9
12
 
10
13
  SIGNATURE_UPDATE_BATCH = int(os.environ.get('SIGNATURE_UPDATE_BATCH', '1000'))
11
14
 
12
15
 
13
- class SyncableSignature(SignatureClient):
16
+ class SyncableBadlistClient(BadlistClient):
17
+ def __init__(self, datastore, config=None):
18
+ super().__init__(datastore, config)
19
+ self.sync = False
20
+
21
+ def add_update_many(self, data: List[Union[dict, BadlistModel]]) -> Dict[str, Any]:
22
+ return hashlist_add_update_many(self, self.datastore.badlist, data)
23
+
24
+
25
+ class SyncableSafelistClient(SafelistClient):
26
+ def __init__(self, datastore, config=None):
27
+ super().__init__(datastore, config)
28
+ self.sync = False
29
+
30
+ def add_update_many(self, data: List[Union[dict, SafelistModel]]) -> Dict[str, Any]:
31
+ return hashlist_add_update_many(self, self.datastore.safelist, data)
32
+
33
+
34
+ def hashlist_add_update_many(client: Union[SyncableBadlistClient, SyncableSafelistClient],
35
+ collection: ESCollection,
36
+ data: List[Union[dict, BadlistModel, SafelistModel]]):
37
+ # This generic function allows to sync hashlist items with the system by making direct changes to the datastore
38
+ # Items that no longer exist at the source will be removed from `sources` to maintain active synchronicity
39
+ # amongst multiple sources.
40
+ # If there are no more sources actively blocking the item, then the item will be DISABLED but users can always
41
+ # re-deploy item if desired
42
+
43
+ if not data:
44
+ return {"success": 0, "errors": False}
45
+
46
+ current_ids: Set[str] = set()
47
+ source: Optional[str] = None
48
+
49
+ # Iterate over the list of signatures given
50
+ for i, d in enumerate(data):
51
+ if isinstance(d, collection.model_class):
52
+ d = d.as_primitives()
53
+
54
+ if not source:
55
+ # Set the source name
56
+ source = d["sources"][0]["name"]
57
+
58
+ if client.sync:
59
+ # Compute the expected ID and add it to the list
60
+ current_ids.add(client._preprocess_object(d))
61
+ # Update with JSON-friendly version of data to be sent to API
62
+ data[i] = d
63
+
64
+ if client.sync:
65
+ # Get the list of items that currently exists in the system for the source
66
+ existing_ids = set(
67
+ [
68
+ i["id"]
69
+ for i in collection.stream_search(
70
+ f"sources.name:{source}", fl="id", as_obj=False
71
+ )
72
+ ]
73
+ )
74
+
75
+ # Find the IDs that don't exist at this source anymore and remove the source from the source list
76
+ missing_ids = existing_ids - current_ids
77
+ for missing_id in missing_ids:
78
+ missing_item = collection.get(missing_id)
79
+ original_sources = missing_item.sources
80
+ missing_item.sources = [
81
+ s
82
+ for s in missing_item.sources
83
+ if not (s.type == "external" and s.name == source)
84
+ ]
85
+
86
+ # If there are no more sources to back this item, then disable it but leave at least one source
87
+ if not missing_item.sources:
88
+ missing_item.enabled = False
89
+ missing_item.sources = original_sources[:1]
90
+
91
+ # Update the last updated time
92
+ missing_item.updated = "NOW"
93
+
94
+ # Update missing item with latest changes
95
+ collection.save(missing_id, missing_item)
96
+
97
+ # Proceed with adding/updating items
98
+ if len(data) < SIGNATURE_UPDATE_BATCH:
99
+ # Update all of them in a single batch
100
+ return super(client.__class__, client).add_update_many(data)
101
+ else:
102
+ response = {"success": 0, "errors": False}
103
+
104
+ def update_response(r: Dict[str, Any]):
105
+ # Response has to be in the same format, but show the accumulation of batches
106
+ response["success"]: int = response["success"] + r["success"]
107
+ response["errors"]: bool = response["errors"] or r["errors"]
108
+
109
+ # Split up data into batches to avoid server timeouts handling requests
110
+ batch_num = 0
111
+ start = batch_num * SIGNATURE_UPDATE_BATCH
112
+ while start < len(data):
113
+ end = (batch_num + 1) * SIGNATURE_UPDATE_BATCH
114
+ update_response(super(client.__class__, client).add_update_many(data[start:end]))
115
+ batch_num += 1
116
+ start = batch_num * SIGNATURE_UPDATE_BATCH
117
+
118
+ return response
119
+
120
+
121
+ class SyncableSignatureClient(SignatureClient):
14
122
  def __init__(self, datastore, config=None):
15
123
  super().__init__(datastore, config)
16
124
  self.sync = False
@@ -93,6 +201,19 @@ class SyncableSignature(SignatureClient):
93
201
  class UpdaterClient(object):
94
202
  def __init__(self, datastore) -> None:
95
203
  self.datastore = datastore
96
- self.badlist = BadlistClient(datastore)
97
- self.safelist = SafelistClient(datastore)
98
- self.signature = SyncableSignature(datastore)
204
+ self._sync = False
205
+ self.badlist = SyncableBadlistClient(datastore)
206
+ self.safelist = SyncableSafelistClient(datastore)
207
+ self.signature = SyncableSignatureClient(datastore)
208
+
209
+ @property
210
+ def sync(self):
211
+ return self._sync
212
+
213
+ @sync.setter
214
+ def sync(self, value: bool):
215
+ # Set sync state across clients
216
+ self.badlist.sync = value
217
+ self.safelist.sync = value
218
+ self.signature.sync = value
219
+ self._sync = value
@@ -35,6 +35,9 @@ class SkipSource(RuntimeError):
35
35
  def add_cacert(cert: str) -> None:
36
36
  # Add certificate to requests
37
37
  cafile = certifi.where()
38
+ with open(cafile, 'r') as ca_reader:
39
+ if cert in ca_reader.read():
40
+ return
38
41
  with open(cafile, 'a') as ca_editor:
39
42
  ca_editor.write(f"\n{cert}")
40
43
 
@@ -371,8 +371,8 @@ class ServiceUpdater(ThreadedCoreBase):
371
371
  source = source_obj.as_primitives()
372
372
  uri: str = source['uri']
373
373
  default_classification = source.get('default_classification', classification.UNRESTRICTED)
374
- # Enable signature syncing if the source specifies it
375
- self.client.signature.sync = source.get('sync', False)
374
+ # Enable syncing if the source specifies it
375
+ self.client.sync = source.get('sync', False)
376
376
 
377
377
  try:
378
378
  self.push_status("UPDATING", "Pulling..")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-v4-service
3
- Version: 4.5.1.dev10
3
+ Version: 4.5.1.dev46
4
4
  Summary: Assemblyline 4 - Service base
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-v4-service/
6
6
  Author: CCCS Assemblyline development team
@@ -24,7 +24,7 @@ Requires-Dist: assemblyline-core
24
24
  Requires-Dist: cart
25
25
  Requires-Dist: fuzzywuzzy
26
26
  Requires-Dist: pefile
27
- Requires-Dist: pillow ==10.2.0
27
+ Requires-Dist: pillow !=10.1.0,!=10.2.0
28
28
  Requires-Dist: python-Levenshtein
29
29
  Requires-Dist: regex
30
30
  Provides-Extra: updater
@@ -242,9 +242,7 @@ To test an Assemblyline service in standalone mode, the [run_service_once.py](ht
242
242
  2. From a terminal, run the `run_service_once` script, where `<service path>` is the path to the service module and `<file path>` is the path of the file to be processed
243
243
 
244
244
  ```shell
245
- python3.9 -m assemblyline_v4_service.dev.run_service_once <service path> <file path>
246
- ```
247
- python3.7 -m assemblyline_v4_service.dev.run_service_once <service path> <file path>
245
+ python3.11 -m assemblyline_v4_service.dev.run_service_once <service path> <file path>
248
246
  ```
249
247
 
250
248
 
@@ -261,10 +259,7 @@ To test an Assemblyline service in standalone mode, the [run_service_once.py](ht
261
259
  2. From a terminal, run the `run_service_once` script
262
260
 
263
261
  ```shell
264
- python3.9 -m assemblyline_v4_service.dev.run_service_once assemblyline_result_sample_service.result_sample.ResultSample /home/ubuntu/testfile.doc
265
- ```
266
- python3.7 -m assemblyline_v4_service.dev.run_service_once assemblyline_result_sample_service.result_sample.ResultSample /home/ubuntu/testfile.doc
262
+ python3.11 -m assemblyline_v4_service.dev.run_service_once assemblyline_result_sample_service.result_sample.ResultSample /home/ubuntu/testfile.doc
267
263
  ```
268
264
 
269
-
270
265
  3. The `results.json` and any extracted/supplementary files will be outputted to `/home/ubuntu/testfile_resultsample`
@@ -1,4 +1,4 @@
1
- assemblyline_v4_service/VERSION,sha256=MJm_8v8CsbL_j67ZEe8Zv_rM-VewUZr4CgC4PU0CoUg,12
1
+ assemblyline_v4_service/VERSION,sha256=sQM09FS0BK9FrsHaapzAtmW4sNxx-dHBnjL5R7rkljk,12
2
2
  assemblyline_v4_service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  assemblyline_v4_service/healthz.py,sha256=sS1cFkDLw8hUPMpj7tbHXFv8ZmHcazrwZ0l6oQDwwkQ,1575
4
4
  assemblyline_v4_service/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -8,7 +8,7 @@ assemblyline_v4_service/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
8
8
  assemblyline_v4_service/common/api.py,sha256=Xzp8j4HCCfjPvNSGKiZl5ttH2_Itg47cjlH0NXNtth0,6849
9
9
  assemblyline_v4_service/common/base.py,sha256=mKkkzbxVL_wVMy_VieU9mlHYLqZXndga_4dWWbnEnx8,14045
10
10
  assemblyline_v4_service/common/helper.py,sha256=xs9quuf-M1JOdKieBqOmWaOece0CtzXFhhe85xQYmuY,3289
11
- assemblyline_v4_service/common/ocr.py,sha256=XHHD0bOKu7DdBEMQ3sPZxQjp4K4G3js_Ib2GKWD6Xuw,7897
11
+ assemblyline_v4_service/common/ocr.py,sha256=A8OnjpEor-S3OUC_jZzJ-Er3KKAsMdQLEXTtMS81Xbk,8397
12
12
  assemblyline_v4_service/common/ontology_helper.py,sha256=QpwerYoS5hXjWzpx3Pmwv6j2330PQVYqxYGamjcpW3I,7890
13
13
  assemblyline_v4_service/common/request.py,sha256=XXBafAQCV43_OBLXOSHxYoDHmqwERBkNul8fb_X6Ves,11774
14
14
  assemblyline_v4_service/common/result.py,sha256=9AqM6qCYiia_Bpyn_fBFhzNQMcqJbtFSiGjp57fXW2E,32713
@@ -19,11 +19,12 @@ assemblyline_v4_service/dev/run_service_once.py,sha256=4K3ljw0MnfPGw0-6lzc_vtUYg
19
19
  assemblyline_v4_service/updater/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  assemblyline_v4_service/updater/__main__.py,sha256=9Os-u8Tf7MD73JSrUSPmOaErTgfvesNLiEeszU4ujXA,133
21
21
  assemblyline_v4_service/updater/app.py,sha256=Mtmx4bkXfP4nFqqa5q15jW8QIXr4JK84lCovxAVyvPs,3317
22
- assemblyline_v4_service/updater/client.py,sha256=HNmBKJI9VXsRso8IG3JojWsk9ujXJUu_cSJmKh7oG_k,4772
22
+ assemblyline_v4_service/updater/client.py,sha256=oOOIzh-Q-b5uOTo3C4rVSfQxqUO5Y_ogDmUhvWfm5uo,9518
23
23
  assemblyline_v4_service/updater/gunicorn_config.py,sha256=p3j2KPBeD5jvMw9O5i7vAtlRgPSVVxIG9AO0DfN82J8,1247
24
- assemblyline_v4_service/updater/helper.py,sha256=HbH5p6UTdHyIgoctF1c1pQkoqTtzaxfHOi9KXGwn0eM,9435
25
- assemblyline_v4_service/updater/updater.py,sha256=yYDME2ir38Qr-Qe8S0uqcYNxjPRsgUrBByhLwg_rweU,29033
24
+ assemblyline_v4_service/updater/helper.py,sha256=-B35wdjpeY4t1R9SPDrTFHFKHwE3uzy9N69mV6mHy-g,9532
25
+ assemblyline_v4_service/updater/updater.py,sha256=5cJH82VTP3VrwoNE97ubKP2dr90AI_T-AKRistvN5rI,29013
26
26
  test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ test/conftest.py,sha256=W3SieQpZsZpGEmtLqY4aIlxREDSsHceyCrFcFsWUM0U,1851
27
28
  test/test_healthz.py,sha256=DkeLUlrb7rGx3nZ04aADU9HXXu5mZTf_DBwT0xhzIv4,7
28
29
  test/test_run_privileged_service.py,sha256=DkeLUlrb7rGx3nZ04aADU9HXXu5mZTf_DBwT0xhzIv4,7
29
30
  test/test_run_service.py,sha256=DkeLUlrb7rGx3nZ04aADU9HXXu5mZTf_DBwT0xhzIv4,7
@@ -37,8 +38,8 @@ test/test_common/test_request.py,sha256=wxSwnOj-_YOv2SuZjOJsw09q8A7p8GJmJuK4vozq
37
38
  test/test_common/test_result.py,sha256=Wm0Cs5kZRzlZr0jL-l8OTsYAvkoN2eaB3NkeXzvyssI,42208
38
39
  test/test_common/test_task.py,sha256=jnfF68EgJIu30Pz_4jiJHkncfI-3XpGaut5r79KIXOA,18718
39
40
  test/test_common/test_utils.py,sha256=TbnBxqpS_ZC5ptXR9XJX3xtbItD0mTbtiBxxdyP8J5k,5904
40
- assemblyline_v4_service-4.5.1.dev10.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
41
- assemblyline_v4_service-4.5.1.dev10.dist-info/METADATA,sha256=deXIyf91VVK9BRVU09vvD73BwcrirOx6_wXDfLzqIvg,9739
42
- assemblyline_v4_service-4.5.1.dev10.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
43
- assemblyline_v4_service-4.5.1.dev10.dist-info/top_level.txt,sha256=LpTOEaVCatkrvbVq3EZseMSIa2PQZU-2rhuO_FTpZgY,29
44
- assemblyline_v4_service-4.5.1.dev10.dist-info/RECORD,,
41
+ assemblyline_v4_service-4.5.1.dev46.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
42
+ assemblyline_v4_service-4.5.1.dev46.dist-info/METADATA,sha256=WMqxrxQMu_e8Zj5DbSRC_p1nA8VhyrDScDlSGGOk63Q,9498
43
+ assemblyline_v4_service-4.5.1.dev46.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
44
+ assemblyline_v4_service-4.5.1.dev46.dist-info/top_level.txt,sha256=LpTOEaVCatkrvbVq3EZseMSIa2PQZU-2rhuO_FTpZgY,29
45
+ assemblyline_v4_service-4.5.1.dev46.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
test/conftest.py ADDED
@@ -0,0 +1,61 @@
1
+ """
2
+ Pytest configuration file, setup global pytest fixtures and functions here.
3
+ """
4
+ import os
5
+ import pytest
6
+
7
+ from assemblyline.common import forge
8
+ from assemblyline.datastore.helper import AssemblylineDatastore
9
+ from assemblyline.datastore.store import ESStore
10
+
11
+ original_skip = pytest.skip
12
+
13
+ # Check if we are in an unattended build environment where skips won't be noticed
14
+ IN_CI_ENVIRONMENT = any(indicator in os.environ for indicator in
15
+ ['CI', 'BITBUCKET_BUILD_NUMBER', 'AGENT_JOBSTATUS'])
16
+
17
+
18
+ def skip_or_fail(message):
19
+ """Skip or fail the current test, based on the environment"""
20
+ if IN_CI_ENVIRONMENT:
21
+ pytest.fail(message)
22
+ else:
23
+ original_skip(message)
24
+
25
+
26
+ # Replace the built in skip function with our own
27
+ pytest.skip = skip_or_fail
28
+
29
+
30
+ @pytest.fixture(scope='session')
31
+ def config():
32
+ config = forge.get_config()
33
+ config.logging.log_level = 'INFO'
34
+ config.logging.log_as_json = False
35
+ config.core.metrics.apm_server.server_url = None
36
+ config.core.metrics.export_interval = 1
37
+ config.datastore.archive.enabled = True
38
+ return config
39
+
40
+
41
+ @pytest.fixture(scope='module')
42
+ def datastore_connection(config):
43
+ store = ESStore(config.datastore.hosts)
44
+ ret_val = store.ping()
45
+ if not ret_val:
46
+ pytest.skip("Could not connect to datastore")
47
+ return AssemblylineDatastore(store)
48
+
49
+
50
+ @pytest.fixture(scope='module')
51
+ def clean_datastore(datastore_connection: AssemblylineDatastore):
52
+ for name in datastore_connection.ds.get_models():
53
+ datastore_connection.get_collection(name).wipe()
54
+ return datastore_connection
55
+
56
+
57
+ @pytest.fixture(scope='function')
58
+ def function_clean_datastore(datastore_connection: AssemblylineDatastore):
59
+ for name in datastore_connection.ds.get_models():
60
+ datastore_connection.get_collection(name).wipe()
61
+ return datastore_connection