assemblyline-v4-service 4.5.0.64__py3-none-any.whl → 4.5.0.66__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-v4-service might be problematic. Click here for more details.

@@ -1 +1 @@
1
- 4.5.0.64
1
+ 4.5.0.66
@@ -202,6 +202,9 @@ class ServiceRequest:
202
202
  """
203
203
  return self.task.get_param(name)
204
204
 
205
+ def partial(self) -> None:
206
+ self.task.partial()
207
+
205
208
  @property
206
209
  def result(self) -> Result:
207
210
  """
@@ -70,7 +70,8 @@ class Task:
70
70
  self.min_classification = task.min_classification.value
71
71
  self.max_extracted = task.max_files
72
72
  self.metadata = task.metadata
73
- self.result: Optional[Result] = None
73
+ self.partial_result: bool = False
74
+ self.result: Result = Result()
74
75
  self.safelist_config: Dict[str, Any] = task.safelist_config
75
76
  self.service_config: Dict[str, Any] = dict(task.service_config)
76
77
  self.service_context: Optional[str] = None
@@ -263,11 +264,15 @@ class Task:
263
264
  type=self.file_type,
264
265
  size=self.file_size,
265
266
  drop_file=self.drop_file,
267
+ partial=self.partial_result,
266
268
  temp_submission_data=self.temp_submission_data,
267
269
  )
268
270
 
269
271
  return result
270
272
 
273
+ def partial(self) -> None:
274
+ self.partial_result = True
275
+
271
276
  def save_error(self, stack_info: str, recoverable: bool) -> None:
272
277
  self.error_message = stack_info
273
278
 
@@ -17,6 +17,7 @@ class SyncableBadlistClient(BadlistClient):
17
17
  def __init__(self, datastore, config=None):
18
18
  super().__init__(datastore, config)
19
19
  self.sync = False
20
+ self.classification_override = None
20
21
 
21
22
  def add_update_many(self, data: List[Union[dict, BadlistModel]]) -> Dict[str, Any]:
22
23
  return hashlist_add_update_many(self, self.datastore.badlist, data)
@@ -26,6 +27,7 @@ class SyncableSafelistClient(SafelistClient):
26
27
  def __init__(self, datastore, config=None):
27
28
  super().__init__(datastore, config)
28
29
  self.sync = False
30
+ self.classification_override = None
29
31
 
30
32
  def add_update_many(self, data: List[Union[dict, SafelistModel]]) -> Dict[str, Any]:
31
33
  return hashlist_add_update_many(self, self.datastore.safelist, data)
@@ -55,9 +57,14 @@ def hashlist_add_update_many(client: Union[SyncableBadlistClient, SyncableSafeli
55
57
  # Set the source name
56
58
  source = d["sources"][0]["name"]
57
59
 
60
+ if client.classification_override:
61
+ # Override the classification of the signature based on what's assigned to the client
62
+ d["sources"][0]["classification"] = client.classification_override
63
+
58
64
  if client.sync:
59
65
  # Compute the expected ID and add it to the list
60
66
  current_ids.add(client._preprocess_object(d))
67
+
61
68
  # Update with JSON-friendly version of data to be sent to API
62
69
  data[i] = d
63
70
 
@@ -122,6 +129,7 @@ class SyncableSignatureClient(SignatureClient):
122
129
  def __init__(self, datastore, config=None):
123
130
  super().__init__(datastore, config)
124
131
  self.sync = False
132
+ self.classification_override = None
125
133
 
126
134
  def add_update_many(self, source: str, sig_type: str, data: List[Union[dict, SignatureModel]],
127
135
  dedup_name: bool = True) -> Dict[str, Any]:
@@ -144,6 +152,10 @@ class SyncableSignatureClient(SignatureClient):
144
152
  d['status'] = sig_exists['status']
145
153
  d['state_change_user'] = sig_exists['state_change_user']
146
154
 
155
+ if self.classification_override:
156
+ # Override the classification of the signature based on what's assigned to the client
157
+ d['classification'] = self.classification_override
158
+
147
159
  if self.sync:
148
160
  # Add signature ID to the list
149
161
  current_signature_ids.add(sig_id)
@@ -208,6 +220,7 @@ class UpdaterClient(object):
208
220
  def __init__(self, datastore) -> None:
209
221
  self.datastore = datastore
210
222
  self._sync = False
223
+ self._classification_override = None
211
224
  self.badlist = SyncableBadlistClient(datastore)
212
225
  self.safelist = SyncableSafelistClient(datastore)
213
226
  self.signature = SyncableSignatureClient(datastore)
@@ -223,3 +236,15 @@ class UpdaterClient(object):
223
236
  self.safelist.sync = value
224
237
  self.signature.sync = value
225
238
  self._sync = value
239
+
240
+ @property
241
+ def classification_override(self):
242
+ return self._classification_override
243
+
244
+ @classification_override.setter
245
+ def classification_override(self, value: str):
246
+ # Set the classification override
247
+ self.badlist.classification_override = value
248
+ self.safelist.classification_override = value
249
+ self.signature.classification_override = value
250
+ self._classification_override = value
@@ -88,6 +88,7 @@ def url_download(source: Dict[str, Any], previous_update: int, logger: Logger, o
88
88
  ca_cert = source.get('ca_cert', None)
89
89
  ignore_ssl_errors = source.get('ssl_ignore_errors', False)
90
90
  auth = (username, password) if username and password else None
91
+ fetch_method = source.get('fetch_method', 'GET').lower()
91
92
 
92
93
  proxy = source.get('proxy', None)
93
94
  headers_list = source.get('headers', [])
@@ -134,7 +135,10 @@ def url_download(source: Dict[str, Any], previous_update: int, logger: Logger, o
134
135
  else:
135
136
  headers = {'If-Modified-Since': previous_update}
136
137
 
137
- response = session.get(uri, auth=auth, headers=headers, proxies=proxies, stream=True)
138
+ if fetch_method in ['get', 'post']:
139
+ response = getattr(session, fetch_method)(uri, auth=auth, headers=headers, proxies=proxies, stream=True)
140
+ else:
141
+ raise ValueError(f"Unknown fetch method: {fetch_method}")
138
142
 
139
143
  # Check the response code
140
144
  if response.status_code == requests.codes['not_modified']:
@@ -188,7 +192,13 @@ def git_clone_repo(source: Dict[str, Any], previous_update: int = None, logger=N
188
192
  ignore_ssl_errors = source.get("ssl_ignore_errors", False)
189
193
  ca_cert = source.get("ca_cert")
190
194
  proxy = source.get('proxy', None)
191
- auth = f'{username}:{password}@' if username and password else None
195
+ auth = None
196
+ if username and password:
197
+ # Basic authentication scheme
198
+ auth = f'{username}:{password}@'
199
+ elif password:
200
+ # Token-based authentication
201
+ auth = f'{password}@'
192
202
 
193
203
  git_env = {}
194
204
 
@@ -378,14 +378,40 @@ class ServiceUpdater(ThreadedCoreBase):
378
378
  source_obj = sources[source_name]
379
379
  old_update_time = self.get_source_update_time()
380
380
 
381
- self.push_status("UPDATING", "Starting..")
382
- source = source_obj.as_primitives()
383
- uri: str = source['uri']
384
- default_classification = source.get('default_classification', classification.UNRESTRICTED)
385
- # Enable syncing if the source specifies it
386
- self.client.sync = source.get('sync', False)
387
-
381
+ # Are we ignoring the cache for this source?
382
+ if source_obj.ignore_cache:
383
+ old_update_time = 0
388
384
  try:
385
+
386
+ source = source_obj.as_primitives()
387
+ uri: str = source_obj.uri
388
+
389
+ # If source is not currently enabled/active, skip..
390
+ if not source_obj.enabled:
391
+ raise SkipSource
392
+
393
+ # Is it time for this source to run?
394
+ elapsed_time = time.time() - old_update_time
395
+ update_interval = source.get('update_interval') or service.update_config.update_interval_seconds
396
+ if elapsed_time < update_interval:
397
+ # Too early to run the update for this particular source, skip for now
398
+ raise SkipSource
399
+
400
+
401
+ self.push_status("UPDATING", "Starting..")
402
+ fetch_method = source.get('fetch_method', 'GET')
403
+ default_classification = source.get('default_classification', classification.UNRESTRICTED)
404
+
405
+ # Configure the client as necessary
406
+
407
+ # Enable syncing if the source specifies it
408
+ self.client.sync = source.get('sync', False)
409
+ # Override classfication of signatures if specified
410
+ # Reset client back to original classification state between updates
411
+ self.client.classification_override = None
412
+ if source.get('override_classification', False):
413
+ self.client.classification_override = default_classification
414
+
389
415
  self.push_status("UPDATING", "Pulling..")
390
416
  output = None
391
417
  seen_fetch = seen_fetches.get(uri)
@@ -397,21 +423,20 @@ class ServiceUpdater(ThreadedCoreBase):
397
423
  self.log.info(f'Already visited {uri} in this run. Using cached download path..')
398
424
  output = seen_fetches[uri]
399
425
  else:
400
- # Pull sources from external locations (method depends on the URL)
401
- try:
426
+ self.log.info(f"Fetching {source_name} using {fetch_method}")
427
+ # Pull sources from external locations
428
+ if uri.startswith("file:///"):
429
+ # Perform an update using a local mount
430
+ output = uri.split("file://", 1)[1]
431
+ if not os.path.exists(output):
432
+ raise FileNotFoundError(f"{output} doesn't exist within container.")
433
+ elif fetch_method == "GIT" or uri.endswith('.git'):
402
434
  # First we'll attempt by performing a Git clone
403
435
  # (since not all services hint at being a repository in their URL),
404
436
  output = git_clone_repo(source, old_update_time, self.log, update_dir)
405
- except SkipSource:
406
- raise
407
- except Exception as git_ex:
408
- # Should that fail, we'll attempt a direct-download using Python Requests
409
- if not uri.endswith('.git'):
410
- # Proceed with direct download, raise exception as required if necessary
411
- output = url_download(source, old_update_time, self.log, update_dir)
412
- else:
413
- # Raise Git Exception
414
- raise git_ex
437
+ else:
438
+ # Other fetch methods are meant for URL downloads using Requests
439
+ output = url_download(source, old_update_time, self.log, update_dir)
415
440
  # Add output path to the list of seen fetches in this run
416
441
  seen_fetches[uri] = output
417
442
 
@@ -430,7 +455,8 @@ class ServiceUpdater(ThreadedCoreBase):
430
455
 
431
456
  self.push_status("UPDATING", "Importing..")
432
457
  # Import into Assemblyline
433
- self.import_update(validated_files, source_name, default_classification)
458
+ self.import_update(validated_files, source_name, default_classification,
459
+ source.get('configuration') or {})
434
460
  self.push_status("DONE", "Signature(s) Imported.")
435
461
  except SkipSource:
436
462
  # This source hasn't changed, no need to re-import into Assemblyline
@@ -457,7 +483,8 @@ class ServiceUpdater(ThreadedCoreBase):
457
483
  return True
458
484
 
459
485
  # Define how your source update gets imported into Assemblyline
460
- def import_update(self, files_sha256: List[Tuple[str, str]], source_name: str, default_classification=None):
486
+ def import_update(self, files_sha256: List[Tuple[str, str]], source_name: str, default_classification=None,
487
+ configuration: dict = {}, *args, **kwargs):
461
488
  raise NotImplementedError()
462
489
 
463
490
  # Define how to prepare the output directory before being served, must return the path of the directory to serve.
@@ -486,7 +513,10 @@ class ServiceUpdater(ThreadedCoreBase):
486
513
  while self.running:
487
514
  # Stringify and hash the the current update configuration
488
515
  service = self._service
489
- update_interval = service.update_config.update_interval_seconds
516
+
517
+ # The update interval (or sleep interval) will be based on the smallest interval across sources
518
+ update_interval = min([service.update_config.update_interval_seconds] +
519
+ [s.update_interval for s in service.update_config.sources if s.update_interval])
490
520
 
491
521
  # Is it time to update yet?
492
522
  if time.time() - self.get_scheduled_update_time() < update_interval \
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-v4-service
3
- Version: 4.5.0.64
3
+ Version: 4.5.0.66
4
4
  Summary: Assemblyline 4 - Service base
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-v4-service/
6
6
  Author: CCCS Assemblyline development team
@@ -1,4 +1,4 @@
1
- assemblyline_v4_service/VERSION,sha256=glDcmo_TI9B72qG6u3dZOf7DlHNMv80lFKQ2kz3j2p0,9
1
+ assemblyline_v4_service/VERSION,sha256=HYTwnZwSWV_SzMwzWkIBOOzMK6Rw4M8JAfuZ40V-_BY,9
2
2
  assemblyline_v4_service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  assemblyline_v4_service/healthz.py,sha256=3QGBg0EZuXC6UN411HFwpLNEop9UvS9feFhvBUTP-k4,1576
4
4
  assemblyline_v4_service/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -10,19 +10,19 @@ assemblyline_v4_service/common/base.py,sha256=4rnK_183qsSfkRkuVp_0wy-haW49umx4jJ
10
10
  assemblyline_v4_service/common/helper.py,sha256=xs9quuf-M1JOdKieBqOmWaOece0CtzXFhhe85xQYmuY,3289
11
11
  assemblyline_v4_service/common/ocr.py,sha256=3fV0PyY3oui_ucAM9dkolP0VRYKACKJuGY4M64DudIE,8841
12
12
  assemblyline_v4_service/common/ontology_helper.py,sha256=9Ad81qbddg_pRMupT8o_KzxbKgpodaRqpc3mPoEKLtw,8494
13
- assemblyline_v4_service/common/request.py,sha256=ZP80rDWIyVotwVDN70L7ujN9RoxZRdmaeUN0DFIvrT4,11732
13
+ assemblyline_v4_service/common/request.py,sha256=W7fqC2xQE3i5i2jlCDyUDp3ZqJQQqSshNW0mQfJMkFg,11792
14
14
  assemblyline_v4_service/common/result.py,sha256=9AqM6qCYiia_Bpyn_fBFhzNQMcqJbtFSiGjp57fXW2E,32713
15
- assemblyline_v4_service/common/task.py,sha256=CJ5Mw1Lre09HGf9rbWfKxQdxmRS_mImnHLlE1oQk5dM,14089
15
+ assemblyline_v4_service/common/task.py,sha256=dJsvRpW0x88CCF_LW6w87jQ_UKTVaOs2Gb117IDNiU8,14233
16
16
  assemblyline_v4_service/common/utils.py,sha256=k2__d-V5LjB6o2IKbjVe7tJWKcKuUHto5TyT5oKhIa0,3890
17
17
  assemblyline_v4_service/dev/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  assemblyline_v4_service/dev/run_service_once.py,sha256=cGwsNGWNp5KcZ4iVqMf76uGHR21faYhpEkEclMwEdcI,10505
19
19
  assemblyline_v4_service/updater/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
20
  assemblyline_v4_service/updater/__main__.py,sha256=9Os-u8Tf7MD73JSrUSPmOaErTgfvesNLiEeszU4ujXA,133
21
21
  assemblyline_v4_service/updater/app.py,sha256=Mtmx4bkXfP4nFqqa5q15jW8QIXr4JK84lCovxAVyvPs,3317
22
- assemblyline_v4_service/updater/client.py,sha256=7_56Ie-2073FlrVWscS-3lIIkg8F-81u97FVNqEJLyQ,9798
22
+ assemblyline_v4_service/updater/client.py,sha256=eBOK8doMu9nsIO6kXWtaBMy4hJIKRtQG0iC_21a5CG4,10849
23
23
  assemblyline_v4_service/updater/gunicorn_config.py,sha256=p3j2KPBeD5jvMw9O5i7vAtlRgPSVVxIG9AO0DfN82J8,1247
24
- assemblyline_v4_service/updater/helper.py,sha256=-B35wdjpeY4t1R9SPDrTFHFKHwE3uzy9N69mV6mHy-g,9532
25
- assemblyline_v4_service/updater/updater.py,sha256=knl8X1xG5wF9nl8j--0WC3BmzQ0PO1Zq7RW58euVjn0,29687
24
+ assemblyline_v4_service/updater/helper.py,sha256=DhxF2TVVpUY6S5dJnjRlTl6eBJrtTqw1uUAcvVWVkhM,9895
25
+ assemblyline_v4_service/updater/updater.py,sha256=8o-vEMMJS388Ci9mVk_6nwFayM5NToliOTFnnPWHTYE,31449
26
26
  test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
27
  test/conftest.py,sha256=W3SieQpZsZpGEmtLqY4aIlxREDSsHceyCrFcFsWUM0U,1851
28
28
  test/test_healthz.py,sha256=DkeLUlrb7rGx3nZ04aADU9HXXu5mZTf_DBwT0xhzIv4,7
@@ -34,12 +34,12 @@ test/test_common/test_base.py,sha256=fuJSSlPxIDHq6HU1xbvaMFitw2z1spOZNHD2SJ4UUic
34
34
  test/test_common/test_helper.py,sha256=sO6YAiBhKTqaxlpLhFYDuy2ZdbuF2cg07Ylzo83ZzQs,2575
35
35
  test/test_common/test_ocr.py,sha256=mt_PgElgwQKJmNrp2nRVx9NjfMedVk40I6IV317vATI,1753
36
36
  test/test_common/test_ontology_helper.py,sha256=Q9-Eqeo8Ih7XlbFmlUAXCtgnfW8JCDqqlYFb56077h4,10331
37
- test/test_common/test_request.py,sha256=zoBURe3QbycWK4I7uvkixcmykrOCqY_TchmxJinNXjU,11822
37
+ test/test_common/test_request.py,sha256=Ceyds8BNO1O0f1kH1VEb84faJcaupvSjVKIrGdHexsc,11842
38
38
  test/test_common/test_result.py,sha256=6BiOKxEPrKBjOY44jv3TY-yiXm0qI1ok_CZBnjP9TM4,45447
39
- test/test_common/test_task.py,sha256=LzEIfFHJEcB_YPucOZGtm1TwyHnqJE-0Qc2MDsH4TN4,18957
39
+ test/test_common/test_task.py,sha256=P44mNcSe-3tJgDk9ppN3KbM7oN4LBVIuhONG-Gveh74,19007
40
40
  test/test_common/test_utils.py,sha256=TbnBxqpS_ZC5ptXR9XJX3xtbItD0mTbtiBxxdyP8J5k,5904
41
- assemblyline_v4_service-4.5.0.64.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
42
- assemblyline_v4_service-4.5.0.64.dist-info/METADATA,sha256=3KEghl9lwtO8Q0IzJ6EI_l3miqNw74VMVwUGXdkNfk0,9489
43
- assemblyline_v4_service-4.5.0.64.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
44
- assemblyline_v4_service-4.5.0.64.dist-info/top_level.txt,sha256=LpTOEaVCatkrvbVq3EZseMSIa2PQZU-2rhuO_FTpZgY,29
45
- assemblyline_v4_service-4.5.0.64.dist-info/RECORD,,
41
+ assemblyline_v4_service-4.5.0.66.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
42
+ assemblyline_v4_service-4.5.0.66.dist-info/METADATA,sha256=UycdUpsYtr-vDw9QMoQ8AYMCheXMOP4My8sCfhXSpXM,9489
43
+ assemblyline_v4_service-4.5.0.66.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
44
+ assemblyline_v4_service-4.5.0.66.dist-info/top_level.txt,sha256=LpTOEaVCatkrvbVq3EZseMSIa2PQZU-2rhuO_FTpZgY,29
45
+ assemblyline_v4_service-4.5.0.66.dist-info/RECORD,,
@@ -5,7 +5,7 @@ from test.test_common import TESSERACT_LIST, setup_module
5
5
 
6
6
  import pytest
7
7
  from assemblyline_v4_service.common.request import ServiceRequest
8
- from assemblyline_v4_service.common.result import get_heuristic_primitives
8
+ from assemblyline_v4_service.common.result import Result, get_heuristic_primitives
9
9
  from assemblyline_v4_service.common.task import MaxExtractedExceeded, Task
10
10
 
11
11
  from assemblyline.odm.messages.task import Task as ServiceTask
@@ -303,7 +303,7 @@ def test_get_param(service_request):
303
303
 
304
304
 
305
305
  def test_result_getter(service_request):
306
- assert service_request.result is None
306
+ assert isinstance(service_request.result, Result)
307
307
 
308
308
 
309
309
  def test_result_setter(service_request):
@@ -61,7 +61,6 @@ def test_task_init(servicetask):
61
61
  assert t.metadata == {}
62
62
  assert t.md5 == "d41d8cd98f00b204e9800998ecf8427e"
63
63
  assert t.mime is None
64
- assert t.result is None
65
64
  assert isinstance(t.safelist_config, ServiceSafelist)
66
65
  assert t.service_config == {}
67
66
  assert t.service_context is None
@@ -380,6 +379,7 @@ def test_task_get_service_result(servicetask):
380
379
  "service_debug_info": None,
381
380
  },
382
381
  "result": {"score": 0, "sections": []},
382
+ "partial": False,
383
383
  "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
384
384
  "type": "text/plain",
385
385
  "size": 0,
@@ -423,6 +423,7 @@ def test_task_get_service_result(servicetask):
423
423
  "result": {
424
424
  "score": 0,
425
425
  },
426
+ "partial": False,
426
427
  "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
427
428
  "type": "text/plain",
428
429
  "size": 0,
@@ -486,6 +487,7 @@ def test_task_save_result(servicetask):
486
487
  "service_debug_info": None,
487
488
  },
488
489
  "result": {"score": 0, "sections": []},
490
+ "partial": False,
489
491
  "sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
490
492
  "type": "text/plain",
491
493
  "size": 0,