assemblyline-core 4.5.1.dev135__tar.gz → 4.5.1.dev137__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-core might be problematic. Click here for more details.

Files changed (88) hide show
  1. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/PKG-INFO +1 -1
  2. assemblyline-core-4.5.1.dev137/assemblyline_core/VERSION +1 -0
  3. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/expiry/run_expiry.py +90 -59
  4. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/updater/run_updater.py +7 -2
  5. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core.egg-info/PKG-INFO +1 -1
  6. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_expiry.py +2 -1
  7. assemblyline-core-4.5.1.dev135/assemblyline_core/VERSION +0 -1
  8. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/LICENCE.md +0 -0
  9. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/README.md +0 -0
  10. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/__init__.py +0 -0
  11. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/alerter/__init__.py +0 -0
  12. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/alerter/processing.py +0 -0
  13. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/alerter/run_alerter.py +0 -0
  14. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/archiver/__init__.py +0 -0
  15. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/archiver/run_archiver.py +0 -0
  16. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/badlist_client.py +0 -0
  17. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/dispatching/__init__.py +0 -0
  18. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/dispatching/__main__.py +0 -0
  19. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/dispatching/client.py +0 -0
  20. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/dispatching/dispatcher.py +0 -0
  21. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/dispatching/schedules.py +0 -0
  22. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/dispatching/timeout.py +0 -0
  23. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/expiry/__init__.py +0 -0
  24. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/ingester/__init__.py +0 -0
  25. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/ingester/__main__.py +0 -0
  26. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/ingester/constants.py +0 -0
  27. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/ingester/ingester.py +0 -0
  28. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/metrics/__init__.py +0 -0
  29. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/metrics/es_metrics.py +0 -0
  30. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
  31. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/metrics/helper.py +0 -0
  32. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/metrics/metrics_server.py +0 -0
  33. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  34. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  35. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  36. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/plumber/__init__.py +0 -0
  37. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/plumber/run_plumber.py +0 -0
  38. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/replay/__init__.py +0 -0
  39. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/replay/client.py +0 -0
  40. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/replay/creator/__init__.py +0 -0
  41. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/replay/creator/run.py +0 -0
  42. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/replay/creator/run_worker.py +0 -0
  43. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/replay/loader/__init__.py +0 -0
  44. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/replay/loader/run.py +0 -0
  45. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/replay/loader/run_worker.py +0 -0
  46. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/replay/replay.py +0 -0
  47. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/safelist_client.py +0 -0
  48. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/scaler/__init__.py +0 -0
  49. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/scaler/collection.py +0 -0
  50. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  51. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  52. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/scaler/controllers/interface.py +0 -0
  53. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
  54. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/scaler/run_scaler.py +0 -0
  55. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/scaler/scaler_server.py +0 -0
  56. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/server_base.py +0 -0
  57. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/signature_client.py +0 -0
  58. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/submission_client.py +0 -0
  59. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/tasking_client.py +0 -0
  60. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/updater/__init__.py +0 -0
  61. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/updater/helper.py +0 -0
  62. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/vacuum/__init__.py +0 -0
  63. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/vacuum/crawler.py +0 -0
  64. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/vacuum/department_map.py +0 -0
  65. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/vacuum/safelist.py +0 -0
  66. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/vacuum/stream_map.py +0 -0
  67. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/vacuum/worker.py +0 -0
  68. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/workflow/__init__.py +0 -0
  69. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core/workflow/run_workflow.py +0 -0
  70. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core.egg-info/SOURCES.txt +0 -0
  71. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  72. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core.egg-info/requires.txt +0 -0
  73. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/assemblyline_core.egg-info/top_level.txt +0 -0
  74. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/setup.cfg +0 -0
  75. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/setup.py +0 -0
  76. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_alerter.py +0 -0
  77. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_badlist_client.py +0 -0
  78. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_dispatcher.py +0 -0
  79. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_plumber.py +0 -0
  80. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_replay.py +0 -0
  81. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_safelist_client.py +0 -0
  82. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_scaler.py +0 -0
  83. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_scheduler.py +0 -0
  84. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_signature_client.py +0 -0
  85. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_simulation.py +0 -0
  86. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_vacuum.py +0 -0
  87. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_worker_ingest.py +0 -0
  88. {assemblyline-core-4.5.1.dev135 → assemblyline-core-4.5.1.dev137}/test/test_worker_submit.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.5.1.dev135
3
+ Version: 4.5.1.dev137
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.5.1.dev137
@@ -2,6 +2,7 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  import concurrent.futures
5
+ import threading
5
6
  import functools
6
7
  import elasticapm
7
8
  import time
@@ -12,6 +13,7 @@ from datemath import dm
12
13
  from typing import Callable, Optional, TYPE_CHECKING
13
14
 
14
15
  from assemblyline.common.isotime import epoch_to_iso, now_as_iso
16
+ from assemblyline.datastore.collection import Index
15
17
  from assemblyline_core.server_base import ServerBase
16
18
  from assemblyline_core.dispatching.dispatcher import BAD_SID_HASH
17
19
  from assemblyline.common import forge
@@ -151,19 +153,19 @@ class ExpiryManager(ServerBase):
151
153
  bulk.add_delete_operation(sha256)
152
154
 
153
155
  if len(file_list) > 0:
154
- self.log.info(f' Deleted associated files from the '
156
+ self.log.info(f'[{collection.name}] Deleted associated files from the '
155
157
  f'{"cachestore" if "cache" in collection.name else "filestore"}...')
156
158
  collection.bulk(bulk)
157
159
  self.counter.increment(f'{collection.name}', increment_by=len(file_list))
158
- self.log.info(f" Deleted {len(file_list)} items from the datastore...")
160
+ self.log.info(f"[{collection.name}] Deleted {len(file_list)} items from the datastore...")
159
161
  else:
160
- self.log.warning(' Expiry unable to clean up any of the files in filestore.')
162
+ self.log.warning(f'[{collection.name}] Expiry unable to clean up any of the files in filestore.')
161
163
 
162
164
  def _simple_delete(self, collection, delete_query, number_to_delete):
163
165
  self.heartbeat()
164
166
  collection.delete_by_query(delete_query)
165
167
  self.counter.increment(f'{collection.name}', increment_by=number_to_delete)
166
- self.log.info(f" Deleted {number_to_delete} items from the datastore...")
168
+ self.log.info(f"[{collection.name}] Deleted {number_to_delete} items from the datastore...")
167
169
 
168
170
  def _cleanup_canceled_submission(self, sid):
169
171
  # Allowing us at minimum 5 minutes to cleanup the submission
@@ -172,7 +174,7 @@ class ExpiryManager(ServerBase):
172
174
  self.apm_client.begin_transaction("Delete canceled submissions")
173
175
 
174
176
  # Cleaning up the submission
175
- self.log.info(f"Deleting incomplete submission {sid}...")
177
+ self.log.info(f"[submission] Deleting incomplete submission {sid}...")
176
178
  self.datastore.delete_submission_tree_bulk(sid, self.classification, transport=self.filestore)
177
179
  self.redis_bad_sids.remove(sid)
178
180
 
@@ -188,7 +190,6 @@ class ExpiryManager(ServerBase):
188
190
  # As long as these two things are true, the set returned by this query should be consistent.
189
191
  # The one race condition is that a record might be refreshed while the file
190
192
  # blob would be deleted anyway, leaving a file record with no filestore object
191
- self.log.info(f"Processing collection: {collection.name}")
192
193
  delete_query = f"expiry_ts:{{{start} TO {end}]"
193
194
 
194
195
  # check if we are dealing with an index that needs file cleanup
@@ -202,71 +203,53 @@ class ExpiryManager(ServerBase):
202
203
  # Filter archived documents if archive filestore is the same as the filestore
203
204
  expire_only = []
204
205
  if self.same_storage and self.config.datastore.archive.enabled and collection.name == 'file':
205
- archived_files = self.datastore.file.multiexists_in_archive(delete_objects)
206
+ archived_files = self.datastore.file.multiexists(delete_objects, index_type=Index.ARCHIVE)
206
207
  delete_objects = [k for k, v in archived_files.items() if not v]
207
208
  expire_only = [k for k, v in archived_files.items() if v]
208
209
 
209
210
  delete_tasks = self.fs_hashmap[collection.name](delete_objects, final_date)
210
211
 
211
212
  # Proceed with deletion, but only after all the scheduled deletes for this
212
- self.log.info(f"Scheduled {len(delete_objects)}/{number_to_delete} "
213
- f"files to be removed for: {collection.name}")
213
+ self.log.info(f"[{collection.name}] Scheduled {len(delete_objects)}/{number_to_delete} files to be removed")
214
214
  self._finish_delete(collection, delete_tasks, expire_only)
215
215
 
216
216
  else:
217
217
  # Proceed with deletion
218
218
  self._simple_delete(collection, delete_query, number_to_delete)
219
219
 
220
- def run_expiry_once(self, pool: ThreadPoolExecutor):
221
- busy_iteration = False
222
-
223
- # Delete canceled submissions
224
- # Make sure we're not dedicating more then a quarter of the pool to this operation because it is costly
225
- for submission in self.datastore.submission.search(
226
- "to_be_deleted:true", fl="sid", rows=max(1, int(self.config.core.expiry.workers / 4)))['items']:
227
- if submission.sid not in self.current_submission_cleanup:
228
- self.current_submission_cleanup.add(submission.sid)
229
- pool.submit(self.log_errors(self._cleanup_canceled_submission), submission.sid)
230
-
231
- # Expire data
232
- for collection in self.expirable_collections:
233
- self.heartbeat()
234
-
235
- # Start of expiry transaction
236
- if self.apm_client:
237
- self.apm_client.begin_transaction("Delete expired documents")
220
+ def feed_expiry_jobs(self, collection, start, jobs: list[concurrent.futures.Future],
221
+ pool: ThreadPoolExecutor) -> tuple[str, bool]:
222
+ _process_chunk = self.log_errors(self._process_chunk)
223
+ number_to_delete = 0
224
+ self.heartbeat()
238
225
 
239
- final_date = self._get_final_date()
226
+ # Start of expiry transaction
227
+ if self.apm_client:
228
+ self.apm_client.begin_transaction("Delete expired documents")
240
229
 
241
- # Break down the expiry window into smaller chunks of data
242
- start = "*"
243
- iterations = 0
244
- while iterations < self.config.core.expiry.iteration_max_tasks:
245
- self.heartbeat()
230
+ final_date = self._get_final_date()
246
231
 
247
- # Get the next chunk
248
- end, number_to_delete = self._get_next_chunk(collection, start, final_date)
232
+ # Break down the expiry window into smaller chunks of data
233
+ while len(jobs) < self.config.core.expiry.iteration_max_tasks:
249
234
 
250
- # Check if we got anything
251
- if number_to_delete == 0:
252
- break
235
+ # Get the next chunk
236
+ end, number_to_delete = self._get_next_chunk(collection, start, final_date)
253
237
 
254
- # Tell the outer loop not to sleep between runs
255
- if number_to_delete >= self.expiry_size:
256
- busy_iteration = True
238
+ # Check if we got anything
239
+ if number_to_delete == 0:
240
+ break
257
241
 
258
- # Process the chunk in the threadpool
259
- pool.submit(self.log_errors(self._process_chunk), collection, start, end, final_date, number_to_delete)
242
+ # Process the chunk in the threadpool
243
+ jobs.append(pool.submit(_process_chunk, collection, start, end, final_date, number_to_delete))
260
244
 
261
- # Prepare for next chunk
262
- start = end
263
- iterations += 1
245
+ # Prepare for next chunk
246
+ start = end
264
247
 
265
- # End of expiry transaction
266
- if self.apm_client:
267
- self.apm_client.end_transaction(collection.name, 'deleted')
248
+ # End of expiry transaction
249
+ if self.apm_client:
250
+ self.apm_client.end_transaction(collection.name, 'deleted')
268
251
 
269
- return busy_iteration
252
+ return start, number_to_delete < self.expiry_size
270
253
 
271
254
  def _get_final_date(self):
272
255
  now = now_as_iso()
@@ -287,17 +270,65 @@ class ExpiryManager(ServerBase):
287
270
  return final_date, rows['total']
288
271
 
289
272
  def try_run(self):
290
- while self.running:
291
- try:
292
- busy_iteration = False
273
+ pool = ThreadPoolExecutor(self.config.core.expiry.workers)
274
+ main_threads = []
275
+
276
+ # Launch a thread that will expire submissions that have been deleted
277
+ thread = threading.Thread(target=self.clean_deleted_submissions, args=[pool])
278
+ thread.start()
279
+ main_threads.append(thread)
280
+
281
+ # Launch threads that expire data from each collection of data
282
+ for collection in self.expirable_collections:
283
+ thread = threading.Thread(target=self.run_collection, args=[pool, collection])
284
+ thread.start()
285
+ main_threads.append(thread)
293
286
 
294
- with ThreadPoolExecutor(self.config.core.expiry.workers) as pool:
295
- try:
296
- busy_iteration = self.run_expiry_once(pool)
297
- except Exception as e:
298
- self.log.exception(str(e))
287
+ # Wait for all the threads to exit
288
+ for thread in main_threads:
289
+ thread.join()
299
290
 
300
- if not busy_iteration:
291
+ def clean_deleted_submissions(self, pool):
292
+ """Delete canceled submissions"""
293
+ while self.running:
294
+ # Make sure we're not dedicating more then a quarter of the pool to this operation because it is costly
295
+ for submission in self.datastore.submission.search(
296
+ "to_be_deleted:true", fl="sid", rows=max(1, int(self.config.core.expiry.workers / 4)))['items']:
297
+ if submission.sid not in self.current_submission_cleanup:
298
+ self.current_submission_cleanup.add(submission.sid)
299
+ pool.submit(self.log_errors(self._cleanup_canceled_submission), submission.sid)
300
+ self.sleep_with_heartbeat(self.config.core.expiry.sleep_time)
301
+
302
+ def run_collection(self, pool: concurrent.futures.ThreadPoolExecutor, collection):
303
+ """Feed batches of jobs to delete to the thread pool for the given collection."""
304
+ start = "*"
305
+ jobs: list[concurrent.futures.Future] = []
306
+
307
+ while self.running:
308
+ try:
309
+ try:
310
+ # Fill up 'jobs' with tasks that have been sent to the thread pool
311
+ # 'jobs' may already have items in it, but 'start' makes sure the new
312
+ # task added starts where the last finshed
313
+ start, final_job_small = self.feed_expiry_jobs(collection, start, jobs, pool)
314
+
315
+ # Wait until some of our work finishes and there is room in the queue for more work
316
+ finished, _jobs = concurrent.futures.wait(jobs, return_when=concurrent.futures.FIRST_COMPLETED)
317
+ jobs = list(_jobs)
318
+ for job in finished:
319
+ job.result()
320
+
321
+ # If we have expired all the data reset the start pointer
322
+ if len(jobs) == 0:
323
+ start = '*'
324
+
325
+ except Exception as e:
326
+ self.log.exception(str(e))
327
+ continue
328
+
329
+ # IF the most recent job added to the jobs list is short then
330
+ # all the data is currently queued up to delete and we can sleep
331
+ if final_job_small:
301
332
  self.sleep_with_heartbeat(self.config.core.expiry.sleep_time)
302
333
 
303
334
  except BrokenProcessPool:
@@ -367,8 +367,13 @@ class KubernetesUpdateInterface:
367
367
  status = self.batch_api.read_namespaced_job(namespace=self.namespace, name=name,
368
368
  _request_timeout=API_TIMEOUT).status
369
369
  # Monitor container's waiting status state
370
- pod_waiting_state = self.api.read_namespaced_pod(name=pod_name, namespace=self.namespace,
371
- _request_timeout=API_TIMEOUT).status.container_statuses[0].state.waiting
370
+ pod_waiting_state = False
371
+ pod_container_statuses = self.api.read_namespaced_pod(name=pod_name, namespace=self.namespace,
372
+ _request_timeout=API_TIMEOUT).status.container_statuses
373
+
374
+ if pod_container_statuses:
375
+ pod_waiting_state = pod_container_statuses[0].state.waiting
376
+
372
377
  # Check to see if we've encountered an issue before the container starts
373
378
  if pod_waiting_state and pod_waiting_state.reason == "ImagePullBackOff":
374
379
  # Delete job and raise exception
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.5.1.dev135
3
+ Version: 4.5.1.dev137
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -63,7 +63,8 @@ def test_expire_all(ds_expiry):
63
63
  expiry.running = True
64
64
  expiry.counter = FakeCounter()
65
65
  with concurrent.futures.ThreadPoolExecutor(5) as pool:
66
- expiry.run_expiry_once(pool)
66
+ for collection in expiry.expirable_collections:
67
+ expiry.feed_expiry_jobs(collection=collection, pool=pool, start='*', jobs=[])
67
68
 
68
69
  for k, v in expiry_collections_len.items():
69
70
  assert v == expiry.counter.get(k)
@@ -1 +0,0 @@
1
- 4.5.1.dev135