assemblyline-v4-service 4.4.1.dev3__py3-none-any.whl → 4.4.1.dev5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-v4-service might be problematic. Click here for more details.

@@ -1 +1 @@
1
- 4.4.1.dev3
1
+ 4.4.1.dev5
@@ -107,7 +107,7 @@ class TestHelper:
107
107
  for param in self.submission_params},
108
108
  "fileinfo": {k: v for k, v in self.identify.fileinfo(file_path).items() if k in fileinfo_keys},
109
109
  "filename": filename,
110
- "min_classification": "TLP:W",
110
+ "min_classification": "TLP:C",
111
111
  "max_files": 501,
112
112
  "ttl": 3600,
113
113
  "temporary_submission_data": [
@@ -56,8 +56,7 @@ def filter_downloads(update_directory, pattern, default_pattern=".*") -> List[Tu
56
56
  return f_files
57
57
 
58
58
 
59
- def url_download(source: Dict[str, Any], previous_update: int = None,
60
- logger=None, output_dir: str = None) -> List[Tuple[str, str]]:
59
+ def url_download(source: Dict[str, Any], previous_update: int = None, logger=None, output_dir: str = None) -> str:
61
60
  """
62
61
 
63
62
  :param source:
@@ -66,7 +65,6 @@ def url_download(source: Dict[str, Any], previous_update: int = None,
66
65
  """
67
66
  name = source['name']
68
67
  uri = source['uri']
69
- pattern = source.get('pattern', None)
70
68
  username = source.get('username', None)
71
69
  password = source.get('password', None)
72
70
  ca_cert = source.get('ca_cert', None)
@@ -143,9 +141,9 @@ def url_download(source: Dict[str, Any], previous_update: int = None,
143
141
  format = format if format in ["zip", "tar"] else None
144
142
  shutil.unpack_archive(file_path, extract_dir=extract_dir, format=format)
145
143
 
146
- return filter_downloads(extract_dir, pattern)
144
+ return extract_dir
147
145
  else:
148
- return [(file_path, get_sha256_for_file(file_path))]
146
+ return file_path
149
147
  else:
150
148
  logger.warning(f"Download not successful: {response.content}")
151
149
  return []
@@ -161,11 +159,9 @@ def url_download(source: Dict[str, Any], previous_update: int = None,
161
159
  session.close()
162
160
 
163
161
 
164
- def git_clone_repo(source: Dict[str, Any], previous_update: int = None, default_pattern: str = "*",
165
- logger=None, output_dir: str = None) -> List[Tuple[str, str]]:
162
+ def git_clone_repo(source: Dict[str, Any], previous_update: int = None, logger=None, output_dir: str = None) -> str:
166
163
  name = source['name']
167
164
  url = source['uri']
168
- pattern = source.get('pattern', None)
169
165
  key = source.get('private_key', None)
170
166
  username = source.get('username', None)
171
167
  password = source.get('password', None)
@@ -225,7 +221,7 @@ def git_clone_repo(source: Dict[str, Any], previous_update: int = None, default_
225
221
  raise SkipSource()
226
222
  break
227
223
 
228
- return filter_downloads(clone_dir, pattern, default_pattern)
224
+ return clone_dir
229
225
  except SkipSource:
230
226
  # Raise to calling function for handling
231
227
  raise
@@ -32,7 +32,7 @@ from assemblyline.remote.datatypes.lock import Lock
32
32
  from assemblyline.odm.models.user import User
33
33
  from assemblyline.odm.models.user_settings import UserSettings
34
34
 
35
- from assemblyline_v4_service.updater.helper import url_download, git_clone_repo, SkipSource
35
+ from assemblyline_v4_service.updater.helper import url_download, git_clone_repo, SkipSource, filter_downloads
36
36
 
37
37
 
38
38
  if typing.TYPE_CHECKING:
@@ -369,6 +369,9 @@ class ServiceUpdater(ThreadedCoreBase):
369
369
  sources: dict[str, UpdateSource] = {_s['name']: _s for _s in service.update_config.sources}
370
370
  files_sha256: dict[str, dict[str, str]] = {}
371
371
 
372
+ # Map already visited URIs to download paths (avoid re-cloning/re-downloads)
373
+ seen_fetches = dict()
374
+
372
375
  # Go through each source and download file
373
376
  for source_name, source_obj in sources.items():
374
377
  # Set current source for pushing state to UI
@@ -385,20 +388,29 @@ class ServiceUpdater(ThreadedCoreBase):
385
388
  default_classification = source.get('default_classification', classification.UNRESTRICTED)
386
389
  try:
387
390
  self.push_status("UPDATING", "Pulling..")
388
-
389
- # Pull sources from external locations (method depends on the URL)
390
- try:
391
- # First we'll attempt by performing a Git clone
392
- # (since not all services hint at being a repository in their URL),
393
- files = git_clone_repo(source, old_update_time, self.default_pattern, self.log, update_dir)
394
- except Exception as git_ex:
395
- # Should that fail, we'll attempt a direct-download using Python Requests
396
- if not uri.endswith('.git'):
397
- # Proceed with direct download, raise exception as required if necessary
398
- files = url_download(source, old_update_time, self.log, update_dir)
399
- else:
400
- # Raise Git Exception
401
- raise git_ex
391
+ output = None
392
+ if uri in seen_fetches:
393
+ # We've already fetched something from the same URI, re-use downloaded path
394
+ self.log.info(f'Already visited {uri} in this run. Using cached download path..')
395
+ output = seen_fetches[uri]
396
+ else:
397
+ # Pull sources from external locations (method depends on the URL)
398
+ try:
399
+ # First we'll attempt by performing a Git clone
400
+ # (since not all services hint at being a repository in their URL),
401
+ output = git_clone_repo(source, old_update_time, self.log, update_dir)
402
+ except Exception as git_ex:
403
+ # Should that fail, we'll attempt a direct-download using Python Requests
404
+ if not uri.endswith('.git'):
405
+ # Proceed with direct download, raise exception as required if necessary
406
+ output = url_download(source, old_update_time, self.log, update_dir)
407
+ else:
408
+ # Raise Git Exception
409
+ raise git_ex
410
+ # Add output path to the list of seen fetches in this run
411
+ seen_fetches[uri] = output
412
+
413
+ files = filter_downloads(output, source['pattern'], self.default_pattern)
402
414
 
403
415
  # Add to collection of sources for caching purposes
404
416
  self.log.info(f"Found new {self.updater_type} rule files to process for {source_name}!")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-v4-service
3
- Version: 4.4.1.dev3
3
+ Version: 4.4.1.dev5
4
4
  Summary: Assemblyline 4 - Service base
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-v4-service/
6
6
  Author: CCCS Assemblyline development team
@@ -1,4 +1,4 @@
1
- assemblyline_v4_service/VERSION,sha256=wYfUWkgn81gW-y16MRL4vOfEnNDzfQOAq0wnhDXxLak,11
1
+ assemblyline_v4_service/VERSION,sha256=TA5K6_WV8uRs4Zs_JwC03uQVzUZ_CjaprBvWthjQA8w,11
2
2
  assemblyline_v4_service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  assemblyline_v4_service/healthz.py,sha256=3QGBg0EZuXC6UN411HFwpLNEop9UvS9feFhvBUTP-k4,1576
4
4
  assemblyline_v4_service/run_privileged_service.py,sha256=9uTfHetXR5G-EDKMDrgfWUOw34yr64-cj6Cm9eZaCbQ,14547
@@ -39,16 +39,16 @@ assemblyline_v4_service/common/pestudio/xml/strings.xml,sha256=kRU8WbCcU1RckM6oC
39
39
  assemblyline_v4_service/dev/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
40
  assemblyline_v4_service/dev/run_service_once.py,sha256=4gnb09WeKXlyWQKCQdH4SoL4xtfIRWq_9nyIiECrJ7g,10592
41
41
  assemblyline_v4_service/testing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- assemblyline_v4_service/testing/helper.py,sha256=tVfTirHeckUxVAN6cRUZvnH8WG3ccp5IM4sB8n8W0nc,19711
42
+ assemblyline_v4_service/testing/helper.py,sha256=f0-qBtgR0vWZBpEV9sPfcworLtdh4h_CcoAofHlOtZE,19711
43
43
  assemblyline_v4_service/testing/regenerate_results.py,sha256=Cbp2CMAxbF3kz5vxEPPCxrgUp1Vl3Tz6e46aUhg_I4U,1101
44
44
  assemblyline_v4_service/updater/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  assemblyline_v4_service/updater/__main__.py,sha256=9Os-u8Tf7MD73JSrUSPmOaErTgfvesNLiEeszU4ujXA,133
46
46
  assemblyline_v4_service/updater/app.py,sha256=IB9UrfUesu88ixC_VKk8IbEVvhYFI7c7gla_IfS_g2g,2780
47
47
  assemblyline_v4_service/updater/gunicorn_config.py,sha256=8Qulsnw9pcsol4x_3oeyRkvovFEe0nEJQnPPemuOl-I,1155
48
- assemblyline_v4_service/updater/helper.py,sha256=I96U4y2RX9NLEJghziB3bn56pfB5jwU0kH3T2zeyFTA,9222
49
- assemblyline_v4_service/updater/updater.py,sha256=1MvpoA4uvxOd0F747rnQOaSI8oTUTtB5_5AV7plbbkA,27248
50
- assemblyline_v4_service-4.4.1.dev3.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
51
- assemblyline_v4_service-4.4.1.dev3.dist-info/METADATA,sha256=V6--cSxBapac8DC3HgbmT31Qs182UWtTcgdEeSnEWPo,9358
52
- assemblyline_v4_service-4.4.1.dev3.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
53
- assemblyline_v4_service-4.4.1.dev3.dist-info/top_level.txt,sha256=Ut5IqePObcxlJ8rv2--dOAzYbxzqlllfiV_51cbqjbA,24
54
- assemblyline_v4_service-4.4.1.dev3.dist-info/RECORD,,
48
+ assemblyline_v4_service/updater/helper.py,sha256=gsJwmdDz1hJYstRfg5jdkq7nH45ppJ0yPqEiH3IDsZE,8931
49
+ assemblyline_v4_service/updater/updater.py,sha256=MDKV_pvtmsJVv5qUH_qlhnWKW0oC-uXsoY2_bWqP9pI,28054
50
+ assemblyline_v4_service-4.4.1.dev5.dist-info/LICENCE.md,sha256=NSkYo9EH8h5oOkzg4VhjAHF4339MqPP2cQ8msTPgl-c,1396
51
+ assemblyline_v4_service-4.4.1.dev5.dist-info/METADATA,sha256=q1lJ0Vv_Eg3mA9tCdr_wHV7wXVAVQF3hdc4OyBaVj-o,9358
52
+ assemblyline_v4_service-4.4.1.dev5.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
53
+ assemblyline_v4_service-4.4.1.dev5.dist-info/top_level.txt,sha256=Ut5IqePObcxlJ8rv2--dOAzYbxzqlllfiV_51cbqjbA,24
54
+ assemblyline_v4_service-4.4.1.dev5.dist-info/RECORD,,