assemblyline-core 4.4.1.dev223__tar.gz → 4.4.1.dev224__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of assemblyline-core might be problematic. Click here for more details.

Files changed (83) hide show
  1. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/PKG-INFO +1 -1
  2. assemblyline-core-4.4.1.dev224/assemblyline_core/VERSION +1 -0
  3. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/client.py +3 -0
  4. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/dispatcher.py +4 -0
  5. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/ingester/ingester.py +62 -2
  6. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/PKG-INFO +1 -1
  7. assemblyline-core-4.4.1.dev223/assemblyline_core/VERSION +0 -1
  8. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/LICENCE.md +0 -0
  9. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/README.md +0 -0
  10. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/__init__.py +0 -0
  11. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/alerter/__init__.py +0 -0
  12. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/alerter/processing.py +0 -0
  13. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/alerter/run_alerter.py +0 -0
  14. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/archiver/__init__.py +0 -0
  15. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/archiver/run_archiver.py +0 -0
  16. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/__init__.py +0 -0
  17. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/__main__.py +0 -0
  18. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/schedules.py +0 -0
  19. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/dispatching/timeout.py +0 -0
  20. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/expiry/__init__.py +0 -0
  21. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/expiry/run_expiry.py +0 -0
  22. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/ingester/__init__.py +0 -0
  23. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/ingester/__main__.py +0 -0
  24. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/ingester/constants.py +0 -0
  25. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/__init__.py +0 -0
  26. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/es_metrics.py +0 -0
  27. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/heartbeat_formatter.py +0 -0
  28. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/helper.py +0 -0
  29. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/metrics_server.py +0 -0
  30. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/run_heartbeat_manager.py +0 -0
  31. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/run_metrics_aggregator.py +0 -0
  32. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/metrics/run_statistics_aggregator.py +0 -0
  33. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/plumber/__init__.py +0 -0
  34. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/plumber/run_plumber.py +0 -0
  35. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/__init__.py +0 -0
  36. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/client.py +0 -0
  37. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/creator/__init__.py +0 -0
  38. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/creator/run.py +0 -0
  39. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/creator/run_worker.py +0 -0
  40. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/loader/__init__.py +0 -0
  41. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/loader/run.py +0 -0
  42. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/loader/run_worker.py +0 -0
  43. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/replay/replay.py +0 -0
  44. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/safelist_client.py +0 -0
  45. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/__init__.py +0 -0
  46. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/collection.py +0 -0
  47. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/controllers/__init__.py +0 -0
  48. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/controllers/docker_ctl.py +0 -0
  49. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/controllers/interface.py +0 -0
  50. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/controllers/kubernetes_ctl.py +0 -0
  51. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/run_scaler.py +0 -0
  52. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/scaler/scaler_server.py +0 -0
  53. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/server_base.py +0 -0
  54. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/submission_client.py +0 -0
  55. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/tasking_client.py +0 -0
  56. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/updater/__init__.py +0 -0
  57. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/updater/helper.py +0 -0
  58. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/updater/run_updater.py +0 -0
  59. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/__init__.py +0 -0
  60. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/crawler.py +0 -0
  61. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/department_map.py +0 -0
  62. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/safelist.py +0 -0
  63. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/stream_map.py +0 -0
  64. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/vacuum/worker.py +0 -0
  65. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/workflow/__init__.py +0 -0
  66. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core/workflow/run_workflow.py +0 -0
  67. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/SOURCES.txt +0 -0
  68. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/dependency_links.txt +0 -0
  69. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/requires.txt +0 -0
  70. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/assemblyline_core.egg-info/top_level.txt +0 -0
  71. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/setup.cfg +0 -0
  72. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/setup.py +0 -0
  73. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_alerter.py +0 -0
  74. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_dispatcher.py +0 -0
  75. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_expiry.py +0 -0
  76. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_plumber.py +0 -0
  77. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_replay.py +0 -0
  78. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_scaler.py +0 -0
  79. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_scheduler.py +0 -0
  80. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_simulation.py +0 -0
  81. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_vacuum.py +0 -0
  82. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_worker_ingest.py +0 -0
  83. {assemblyline-core-4.4.1.dev223 → assemblyline-core-4.4.1.dev224}/test/test_worker_submit.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.4.1.dev223
3
+ Version: 4.4.1.dev224
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -0,0 +1 @@
1
+ 4.4.1.dev224
@@ -164,6 +164,9 @@ class DispatchClient:
164
164
  dispatcher_id = queue.pop(timeout=5)
165
165
  listed_dispatchers.discard(dispatcher_id)
166
166
 
167
+ def queued_submissions(self) -> list[dict]:
168
+ return self.submission_queue.content()
169
+
167
170
  def outstanding_services(self, sid) -> Optional[dict[str, int]]:
168
171
  """
169
172
  List outstanding services for a given submission and the number of file each
@@ -274,6 +274,10 @@ class Dispatcher(ThreadedCoreBase):
274
274
  def instance_assignment_size(persistent_redis, instance_id):
275
275
  return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).length()
276
276
 
277
+ @staticmethod
278
+ def instance_assignment(persistent_redis, instance_id) -> list[str]:
279
+ return Hash(DISPATCH_TASK_ASSIGNMENT + instance_id, host=persistent_redis).keys()
280
+
277
281
  @staticmethod
278
282
  def all_queue_lengths(redis, instance_id):
279
283
  return {
@@ -17,8 +17,8 @@ from random import random
17
17
  from typing import Any, Iterable, List, Optional, Tuple
18
18
 
19
19
  import elasticapm
20
- from assemblyline.common.postprocess import ActionWorker
21
20
 
21
+ from assemblyline.common.postprocess import ActionWorker
22
22
  from assemblyline_core.server_base import ThreadedCoreBase
23
23
  from assemblyline.common.metrics import MetricsFactory
24
24
  from assemblyline.common.str_utils import dotdump, safe_str
@@ -41,6 +41,7 @@ from assemblyline.odm.models.submission import SubmissionParams, Submission as D
41
41
  from assemblyline.odm.models.alert import EXTENDED_SCAN_VALUES
42
42
  from assemblyline.odm.messages.submission import Submission as MessageSubmission, SubmissionMessage
43
43
 
44
+ from assemblyline_core.dispatching.dispatcher import Dispatcher
44
45
  from assemblyline_core.submission_client import SubmissionClient
45
46
  from .constants import INGEST_QUEUE_NAME, drop_chance, COMPLETE_QUEUE_NAME
46
47
 
@@ -189,7 +190,8 @@ class Ingester(ThreadedCoreBase):
189
190
  def try_run(self):
190
191
  threads_to_maintain = {
191
192
  'Retries': self.handle_retries,
192
- 'Timeouts': self.handle_timeouts
193
+ 'Timeouts': self.handle_timeouts,
194
+ 'Missing': self.handle_missing,
193
195
  }
194
196
  threads_to_maintain.update({f'Complete_{n}': self.handle_complete for n in range(COMPLETE_THREADS)})
195
197
  threads_to_maintain.update({f'Ingest_{n}': self.handle_ingest for n in range(INGEST_THREADS)})
@@ -483,6 +485,64 @@ class Ingester(ThreadedCoreBase):
483
485
  self.counter.increment_execution_time('cpu_seconds', time.process_time() - cpu_mark)
484
486
  self.counter.increment_execution_time('busy_seconds', time.time() - time_mark)
485
487
 
488
+ def handle_missing(self) -> None:
489
+ """
490
+ Messages get dropped or only partially processed when ingester and dispatcher containers scale up and down.
491
+
492
+ This loop checks for submissions that are in two invalid states:
493
+ - finished but still listed as being scanned by ingester (message probably dropped by ingester)
494
+ - listed by ingester but unknown by dispatcher (message could have been dropped on either end)
495
+
496
+ Loading all the info needed to do these checks is a bit slow, but doing them every 5 or 15 minutes
497
+ per ingester shouldn't be noteworthy. While these missing messages are bound to happen from time to time
498
+ they should be rare. With that in mind, a warning is raised whenever this worker processes something
499
+ so that if a constant stream of items are falling through and getting processed here it might stand out.
500
+ """
501
+ last_round: set[str] = set()
502
+
503
+ while self.sleep(300 if last_round else 900):
504
+ # Get the current set of outstanding tasks
505
+ outstanding: dict[str, dict] = self.scanning.items()
506
+
507
+ # Get jobs being processed by dispatcher or in dispatcher queue
508
+ assignment: dict[str, str] = {}
509
+ for data in self.submit_client.dispatcher.queued_submissions():
510
+ assignment[data['submission']['sid']] = ''
511
+ for dis in Dispatcher.all_instances(self.redis_persist):
512
+ for key in Dispatcher.instance_assignment(self.redis_persist, dis):
513
+ assignment[key] = dis
514
+
515
+ # Filter out outstanding tasks currently assigned or in queue
516
+ outstanding = {
517
+ key: doc
518
+ for key, doc in outstanding.items()
519
+ if doc["submission"]["sid"] not in assignment
520
+ }
521
+
522
+ unprocessed = []
523
+ for key, data in outstanding.items():
524
+ task = IngestTask(data)
525
+ sid = task.submission.sid
526
+
527
+ # Check if its already complete in the database
528
+ from_db = self.datastore.submission.get_if_exists(sid)
529
+ if from_db and from_db.state == "completed":
530
+ self.log.warning("Completing a hanging finished submission [{}]", sid)
531
+ self.completed(from_db)
532
+
533
+ # Check for items that have been in an unknown state since the last round
534
+ # and put it back in processing
535
+ elif sid in last_round:
536
+ self.log.warning("Recovering a submission dispatcher hasn't processed [{}]", sid)
537
+ self.submit(task)
538
+
539
+ # Otherwise defer looking at this until next iteration
540
+ else:
541
+ unprocessed.append(sid)
542
+
543
+ # store items for next round
544
+ last_round = set(unprocessed)
545
+
486
546
  def get_groups_from_user(self, username: str) -> List[str]:
487
547
  # Reset the group cache at the top of each hour
488
548
  if time.time()//HOUR_IN_SECONDS > self._user_groups_reset:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: assemblyline-core
3
- Version: 4.4.1.dev223
3
+ Version: 4.4.1.dev224
4
4
  Summary: Assemblyline 4 - Core components
5
5
  Home-page: https://github.com/CybercentreCanada/assemblyline-core/
6
6
  Author: CCCS Assemblyline development team
@@ -1 +0,0 @@
1
- 4.4.1.dev223