ciocore 5.1.1__py2.py3-none-any.whl → 10.0.0b3__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. ciocore/VERSION +1 -1
  2. ciocore/__init__.py +23 -1
  3. ciocore/api_client.py +655 -160
  4. ciocore/auth/__init__.py +5 -3
  5. ciocore/cli.py +501 -0
  6. ciocore/common.py +15 -13
  7. ciocore/conductor_submit.py +77 -60
  8. ciocore/config.py +127 -13
  9. ciocore/data.py +162 -77
  10. ciocore/docsite/404.html +746 -0
  11. ciocore/docsite/apidoc/api_client/index.html +3605 -0
  12. ciocore/docsite/apidoc/apidoc/index.html +909 -0
  13. ciocore/docsite/apidoc/config/index.html +1652 -0
  14. ciocore/docsite/apidoc/data/index.html +1553 -0
  15. ciocore/docsite/apidoc/hardware_set/index.html +2460 -0
  16. ciocore/docsite/apidoc/package_environment/index.html +1507 -0
  17. ciocore/docsite/apidoc/package_tree/index.html +2386 -0
  18. ciocore/docsite/assets/_mkdocstrings.css +16 -0
  19. ciocore/docsite/assets/images/favicon.png +0 -0
  20. ciocore/docsite/assets/javascripts/bundle.471ce7a9.min.js +29 -0
  21. ciocore/docsite/assets/javascripts/bundle.471ce7a9.min.js.map +7 -0
  22. ciocore/docsite/assets/javascripts/lunr/min/lunr.ar.min.js +1 -0
  23. ciocore/docsite/assets/javascripts/lunr/min/lunr.da.min.js +18 -0
  24. ciocore/docsite/assets/javascripts/lunr/min/lunr.de.min.js +18 -0
  25. ciocore/docsite/assets/javascripts/lunr/min/lunr.du.min.js +18 -0
  26. ciocore/docsite/assets/javascripts/lunr/min/lunr.el.min.js +1 -0
  27. ciocore/docsite/assets/javascripts/lunr/min/lunr.es.min.js +18 -0
  28. ciocore/docsite/assets/javascripts/lunr/min/lunr.fi.min.js +18 -0
  29. ciocore/docsite/assets/javascripts/lunr/min/lunr.fr.min.js +18 -0
  30. ciocore/docsite/assets/javascripts/lunr/min/lunr.he.min.js +1 -0
  31. ciocore/docsite/assets/javascripts/lunr/min/lunr.hi.min.js +1 -0
  32. ciocore/docsite/assets/javascripts/lunr/min/lunr.hu.min.js +18 -0
  33. ciocore/docsite/assets/javascripts/lunr/min/lunr.hy.min.js +1 -0
  34. ciocore/docsite/assets/javascripts/lunr/min/lunr.it.min.js +18 -0
  35. ciocore/docsite/assets/javascripts/lunr/min/lunr.ja.min.js +1 -0
  36. ciocore/docsite/assets/javascripts/lunr/min/lunr.jp.min.js +1 -0
  37. ciocore/docsite/assets/javascripts/lunr/min/lunr.kn.min.js +1 -0
  38. ciocore/docsite/assets/javascripts/lunr/min/lunr.ko.min.js +1 -0
  39. ciocore/docsite/assets/javascripts/lunr/min/lunr.multi.min.js +1 -0
  40. ciocore/docsite/assets/javascripts/lunr/min/lunr.nl.min.js +18 -0
  41. ciocore/docsite/assets/javascripts/lunr/min/lunr.no.min.js +18 -0
  42. ciocore/docsite/assets/javascripts/lunr/min/lunr.pt.min.js +18 -0
  43. ciocore/docsite/assets/javascripts/lunr/min/lunr.ro.min.js +18 -0
  44. ciocore/docsite/assets/javascripts/lunr/min/lunr.ru.min.js +18 -0
  45. ciocore/docsite/assets/javascripts/lunr/min/lunr.sa.min.js +1 -0
  46. ciocore/docsite/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +1 -0
  47. ciocore/docsite/assets/javascripts/lunr/min/lunr.sv.min.js +18 -0
  48. ciocore/docsite/assets/javascripts/lunr/min/lunr.ta.min.js +1 -0
  49. ciocore/docsite/assets/javascripts/lunr/min/lunr.te.min.js +1 -0
  50. ciocore/docsite/assets/javascripts/lunr/min/lunr.th.min.js +1 -0
  51. ciocore/docsite/assets/javascripts/lunr/min/lunr.tr.min.js +18 -0
  52. ciocore/docsite/assets/javascripts/lunr/min/lunr.vi.min.js +1 -0
  53. ciocore/docsite/assets/javascripts/lunr/min/lunr.zh.min.js +1 -0
  54. ciocore/docsite/assets/javascripts/lunr/tinyseg.js +206 -0
  55. ciocore/docsite/assets/javascripts/lunr/wordcut.js +6708 -0
  56. ciocore/docsite/assets/javascripts/workers/search.b8dbb3d2.min.js +42 -0
  57. ciocore/docsite/assets/javascripts/workers/search.b8dbb3d2.min.js.map +7 -0
  58. ciocore/docsite/assets/stylesheets/main.3cba04c6.min.css +1 -0
  59. ciocore/docsite/assets/stylesheets/main.3cba04c6.min.css.map +1 -0
  60. ciocore/docsite/assets/stylesheets/palette.06af60db.min.css +1 -0
  61. ciocore/docsite/assets/stylesheets/palette.06af60db.min.css.map +1 -0
  62. ciocore/docsite/cmdline/docs/index.html +871 -0
  63. ciocore/docsite/cmdline/downloader/index.html +934 -0
  64. ciocore/docsite/cmdline/packages/index.html +878 -0
  65. ciocore/docsite/cmdline/uploader/index.html +995 -0
  66. ciocore/docsite/how-to-guides/index.html +869 -0
  67. ciocore/docsite/index.html +895 -0
  68. ciocore/docsite/logo.png +0 -0
  69. ciocore/docsite/objects.inv +0 -0
  70. ciocore/docsite/search/search_index.json +1 -0
  71. ciocore/docsite/sitemap.xml +3 -0
  72. ciocore/docsite/sitemap.xml.gz +0 -0
  73. ciocore/docsite/stylesheets/extra.css +26 -0
  74. ciocore/docsite/stylesheets/tables.css +167 -0
  75. ciocore/downloader/base_downloader.py +644 -0
  76. ciocore/downloader/download_runner_base.py +47 -0
  77. ciocore/downloader/job_downloader.py +119 -0
  78. ciocore/{downloader.py → downloader/legacy_downloader.py} +12 -9
  79. ciocore/downloader/log.py +73 -0
  80. ciocore/downloader/logging_download_runner.py +87 -0
  81. ciocore/downloader/perpetual_downloader.py +63 -0
  82. ciocore/downloader/registry.py +97 -0
  83. ciocore/downloader/reporter.py +135 -0
  84. ciocore/exceptions.py +8 -2
  85. ciocore/file_utils.py +51 -50
  86. ciocore/hardware_set.py +449 -0
  87. ciocore/loggeria.py +89 -20
  88. ciocore/package_environment.py +110 -48
  89. ciocore/package_query.py +182 -0
  90. ciocore/package_tree.py +319 -258
  91. ciocore/retry.py +0 -0
  92. ciocore/uploader/_uploader.py +547 -364
  93. ciocore/uploader/thread_queue_job.py +176 -0
  94. ciocore/uploader/upload_stats/__init__.py +3 -4
  95. ciocore/uploader/upload_stats/stats_formats.py +10 -4
  96. ciocore/validator.py +34 -2
  97. ciocore/worker.py +174 -151
  98. ciocore-10.0.0b3.dist-info/METADATA +928 -0
  99. ciocore-10.0.0b3.dist-info/RECORD +128 -0
  100. {ciocore-5.1.1.dist-info → ciocore-10.0.0b3.dist-info}/WHEEL +1 -1
  101. ciocore-10.0.0b3.dist-info/entry_points.txt +2 -0
  102. tests/instance_type_fixtures.py +175 -0
  103. tests/package_fixtures.py +205 -0
  104. tests/test_api_client.py +297 -12
  105. tests/test_base_downloader.py +104 -0
  106. tests/test_cli.py +149 -0
  107. tests/test_common.py +1 -7
  108. tests/test_config.py +40 -18
  109. tests/test_data.py +162 -173
  110. tests/test_downloader.py +118 -0
  111. tests/test_hardware_set.py +139 -0
  112. tests/test_job_downloader.py +213 -0
  113. tests/test_package_query.py +38 -0
  114. tests/test_package_tree.py +91 -291
  115. tests/test_submit.py +44 -18
  116. tests/test_uploader.py +1 -4
  117. ciocore/__about__.py +0 -10
  118. ciocore/cli/conductor.py +0 -191
  119. ciocore/compat.py +0 -15
  120. ciocore-5.1.1.data/scripts/conductor +0 -19
  121. ciocore-5.1.1.data/scripts/conductor.bat +0 -13
  122. ciocore-5.1.1.dist-info/METADATA +0 -408
  123. ciocore-5.1.1.dist-info/RECORD +0 -47
  124. tests/mocks/api_client_mock.py +0 -51
  125. /ciocore/{cli → downloader}/__init__.py +0 -0
  126. {ciocore-5.1.1.dist-info → ciocore-10.0.0b3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,119 @@
1
+ """
2
+ Job Downloader
3
+
4
+ Download output files from a Conductor job.
5
+
6
+ ENDPOINT
7
+ The outputs of a Conductor job are described in the response from the /jobs/{job_id}/downloads endpoint. The response is a list of tasks. Each task has a list of files. Each file (dict) has a signed URL plus other fields such as the md5 and fields describing the original path, size, and more.
8
+
9
+ PAGING
10
+ A job may contain thousands of tasks, each with several files. To reduce the time it takes to get started, this downloader makes requests for download information in batches, or pages. The number of tasks in each page is controlled by the page_size parameter. As soon as the first page of tasks is retrieved, we start downloading the files in threads. While the files are downloading, we fetch the next page of tasks. When the current page of tasks is exhausted, we start downloading the files in the next page of tasks. We continue until all tasks have been downloaded.
11
+
12
+ The get_some_tasks method is responsible for fetching the next page of tasks. It is called by the base class. It returns a list of tasks, and a locator. For this implementation, the locator is a dictionary containing the index of the current job, and the cursor for the next page of tasks for the job. A new locator is returned to the calling method so that it can be passed back to this method the next time it is called. When the calling method receives a falsey value for the locator, it knows that there are no more tasks to download.
13
+
14
+ See the documentation for the base downloader for more information about the locator and other behavior.
15
+ """
16
+
17
+ import json
18
+ import logging
19
+ from cioseq.sequence import Sequence
20
+ from ciocore.downloader.base_downloader import BaseDownloader
21
+ from ciocore.downloader.log import LOGGER_NAME
22
+ logger = logging.getLogger(LOGGER_NAME)
23
+
24
+
25
+ class JobDownloader(BaseDownloader):
26
+ CLIENT_NAME = "JobDownloader"
27
+
28
+ def __init__(self, jobs, *args, **kwargs):
29
+
30
+ super().__init__(*args, **kwargs)
31
+ """Initialize the downloader."""
32
+ logger.debug("Initializing paged job downloader")
33
+ self.jobs = flatten(jobs)
34
+ self.location = None # location is not used in this downloader
35
+
36
+ def get_some_tasks(self, locator):
37
+ """Fetch the next page of tasks from the server.
38
+
39
+ locator: a dictionary containing the index of the current job, and the cursor for the next page of tasks for the job.
40
+
41
+
42
+ # What is a locator? It's the information needed to request the next page of tasks. It consists of the index of the current job, and the cursor for the next page of tasks. It is provided to this method as a parameter, and when we're done, a new locator is returned to the run loop. The run loop passes it back to us the next time it is called.
43
+
44
+ # On the first call, the provided locator is None. In that case, we start with the first job, and no cursor.
45
+
46
+ # We return the locator to the run loop in the base class, along with any tasks to be downloaded. If we return a falsy locator, the run loop is exited, since it means we downloaded everything OR there was an error fetching tasks.
47
+
48
+ # If we got to the end of the current job, we increment the job index and reset the cursor to None. The next time this method is called, we'll start with the next job.
49
+
50
+ # If we got a next_cursor from the request, we return it in the locator along with the current job index. This is what we'll be given on the next call.
51
+ """
52
+
53
+ if not locator:
54
+ locator = {}
55
+
56
+ job_index = locator.get("job_index", 0)
57
+ cursor = locator.get("cursor", None)
58
+ if job_index >= len(self.jobs):
59
+ # return no tasks and no locator. Ends the download.
60
+ return [], None
61
+
62
+ # we have a job to download
63
+ job_info = self.jobs[job_index]
64
+ job_id = job_info["job_id"]
65
+ task_ids = job_info["task_ids"]
66
+ url = f"/jobs/{job_id}/downloads"
67
+ data = json.dumps({"tids": task_ids})
68
+ params = {"limit": self.page_size, "start_cursor": cursor}
69
+ try:
70
+ response, code = self.client.make_request(
71
+ url, verb="POST", params=params, data=data, use_api_key=True
72
+ )
73
+ if code != 201:
74
+ # we have an error. Return null locator to end the download
75
+ raise Exception(f"Code: {code}")
76
+ except Exception as exc:
77
+ logger.error("Error fetching download info for job ID: %s : %s : %s", job_id, url, exc)
78
+ return [], None
79
+ page = json.loads(response)
80
+ tasks = page.get("downloads", [])
81
+
82
+ tasks = self.filter(tasks)
83
+
84
+ next_cursor = page.get("next_cursor")
85
+
86
+ if not next_cursor:
87
+ # we're done with this job
88
+ job_index += 1
89
+
90
+ return tasks, {"job_index": job_index, "cursor": next_cursor}
91
+
92
+
93
+ def flatten(job_specs):
94
+ """Create a list of job objects with keys: job_id and tasks.
95
+
96
+ See tests/test_downloader.py for examples.
97
+
98
+ Example input: ["1234", "1235:12-15"]
99
+
100
+ Example result:
101
+ [
102
+ {"job_id": "01234", "task_ids":None},
103
+ {"job_id": "01235", "task_ids":["012","013","014","015"]}
104
+ ]
105
+ """
106
+ result = []
107
+ for job_spec in job_specs:
108
+ if ":" in job_spec:
109
+ job_id, range_spec = job_spec.split(":")
110
+ try:
111
+ seq = Sequence.create(range_spec)
112
+ task_ids = seq.expand("###")
113
+ except (ValueError, TypeError):
114
+ task_ids = None
115
+ else:
116
+ job_id, task_ids = job_spec, None
117
+ task_ids = None
118
+ result.append({"job_id": job_id.zfill(5), "task_ids": task_ids})
119
+ return result
@@ -393,7 +393,7 @@ class Downloader(object):
393
393
  def start_reporter_thread(self, download_data):
394
394
  reporter_thread_name = "ReporterThread"
395
395
  current_thread_name = threading.current_thread().name
396
- thread_number_match = re.match("Thread-(\d+)", current_thread_name)
396
+ thread_number_match = re.match(r"Thread-(\d+)", current_thread_name)
397
397
  if thread_number_match:
398
398
  reporter_thread_name += "-%s" % thread_number_match.groups()[0]
399
399
 
@@ -493,7 +493,6 @@ class Downloader(object):
493
493
  downloads = _get_job_download(endpoint, self.api_client, job_id, tid)
494
494
  if downloads:
495
495
  for task_download in downloads.get("downloads", []):
496
- print("putting in queue: %s" % task_download)
497
496
  self.pending_queue.put(task_download, block=True)
498
497
 
499
498
  @common.dec_catch_exception(raise_=True)
@@ -1230,9 +1229,14 @@ def run_downloader(args):
1230
1229
 
1231
1230
  # Set up logging
1232
1231
  log_level_name = args.get("log_level")
1233
- log_level = loggeria.LEVEL_MAP.get(log_level_name)
1234
- log_dirpath = args.get("log_dir")
1235
- set_logging(log_level, log_dirpath)
1232
+
1233
+ loggeria.setup_conductor_logging(
1234
+ logger_level=loggeria.LEVEL_MAP.get(log_level_name),
1235
+ log_dirpath=args.get("log_dir"),
1236
+ log_filename="conductor_downloader.log",
1237
+ console_formatter=LOG_FORMATTER,
1238
+ file_formatter=LOG_FORMATTER,
1239
+ )
1236
1240
 
1237
1241
  api_client.ApiClient.register_client(client_name = Downloader.CLIENT_NAME, client_version=ciocore.__version__)
1238
1242
 
@@ -1256,14 +1260,13 @@ def run_downloader(args):
1256
1260
 
1257
1261
 
1258
1262
  def set_logging(level=None, log_dirpath=None):
1259
- log_filepath = None
1260
- if log_dirpath:
1261
- log_filepath = os.path.join(log_dirpath, "conductor_dl_log")
1263
+
1262
1264
  loggeria.setup_conductor_logging(
1263
1265
  logger_level=level,
1266
+ log_dirpath=log_dirpath,
1267
+ log_filename="conductor_downloader.log",
1264
1268
  console_formatter=LOG_FORMATTER,
1265
1269
  file_formatter=LOG_FORMATTER,
1266
- log_filepath=log_filepath,
1267
1270
  )
1268
1271
 
1269
1272
 
@@ -0,0 +1,73 @@
1
+ import logging
2
+ import colorlog
3
+ import sys
4
+ LOGGER_NAME = "cw.download"
5
+
6
+ LOG_COLORS ={
7
+ 'DEBUG': 'purple',
8
+ 'INFO': 'blue',
9
+ 'WARNING': 'yellow',
10
+ 'ERROR': 'red',
11
+ 'CRITICAL': 'red,bg_white',
12
+ }
13
+
14
+ DEBUG_FORMATTER = colorlog.ColoredFormatter(
15
+ "%(log_color)s%(asctime)s %(name)s %(levelname)8s %(filename)s:%(lineno)d %(threadName)s> %(message)s",
16
+ datefmt="%Y-%m-%d %H:%M:%S",
17
+ log_colors=LOG_COLORS,
18
+ )
19
+
20
+ INFO_FORMATTER = colorlog.ColoredFormatter(
21
+ '%(log_color)s%(levelname)s:%(name)s> %(message)s',
22
+ log_colors=LOG_COLORS,
23
+ )
24
+
25
+ LEVEL_MAP = {
26
+ "DEBUG": logging.DEBUG,
27
+ "INFO": logging.INFO,
28
+ "WARNING": logging.WARNING,
29
+ "ERROR": logging.ERROR,
30
+ "CRITICAL": logging.CRITICAL,
31
+ "NOTSET": logging.NOTSET,
32
+ }
33
+
34
+ class GracefulLogger(logging.Logger):
35
+ def setLevel(self, level):
36
+ super().setLevel(level)
37
+
38
+ # Define formatters based on level
39
+ formatter = DEBUG_FORMATTER if level == logging.DEBUG else INFO_FORMATTER
40
+ for handler in self.handlers:
41
+ handler.setFormatter(formatter)
42
+
43
+
44
+ class GracefulStreamHandler(colorlog.StreamHandler):
45
+ """
46
+ A custom StreamHandler that suppresses BrokenPipeError.
47
+
48
+ This handler extends the standard logging.StreamHandler to gracefully handle
49
+ BrokenPipeErrors that can occur when output streams are closed prematurely.
50
+ It overrides the emit method to catch and ignore BrokenPipeError, allowing
51
+ the program to continue without interruption.
52
+ """
53
+
54
+ def emit(self, record):
55
+ """
56
+ Overrides the StreamHandler.emit method to gracefully handle BrokenPipeError.
57
+
58
+ Args:
59
+ record (logging.LogRecord): The log record to be emitted.
60
+ """
61
+ try:
62
+ super().emit(record)
63
+ except BrokenPipeError:
64
+ pass
65
+
66
+ logging.setLoggerClass(GracefulLogger)
67
+ logger = colorlog.getLogger(LOGGER_NAME)
68
+ logger.propagate = False
69
+
70
+ if not any(isinstance(handler, GracefulStreamHandler) for handler in logger.handlers):
71
+ stream_handler = GracefulStreamHandler(sys.stdout)
72
+ logger.addHandler(stream_handler)
73
+
@@ -0,0 +1,87 @@
1
+ """
2
+ Logging Download runner
3
+
4
+ This module contains the LoggingDownloadRunner class.
5
+
6
+ The LoggingDownloadRunner is a derived class of DownloadRunnerBase.
7
+
8
+ It registers callbacks that are called when certain events occur during the download.
9
+ It uses these callbacks to display progress via the logging module.
10
+
11
+ """
12
+
13
+ import logging
14
+ from ciocore.downloader.download_runner_base import DownloadRunnerBase
15
+ from ciocore.downloader.log import LOGGER_NAME
16
+
17
+ logger = logging.getLogger(LOGGER_NAME)
18
+
19
+
20
+ class LoggingDownloadRunner(DownloadRunnerBase):
21
+ CLIENT_NAME = "LoggingDownloadRunner"
22
+
23
+ def __init__(self, jobids=None, location=None, **kwargs):
24
+
25
+ super().__init__(jobids, location, **kwargs)
26
+
27
+ logger.debug("Assigning callbacks")
28
+ self.downloader.on("start", self.on_start)
29
+ self.downloader.on("start_task", self.on_start_task)
30
+ self.downloader.on("progress", self.on_progress)
31
+ self.downloader.on("file_done", self.on_file_done)
32
+ self.downloader.on("task_done", self.on_task_done)
33
+ self.downloader.on("done", self.on_done)
34
+
35
+ def on_start(self, evt):
36
+ logger.info("Starting download")
37
+
38
+ def on_start_task(self, evt):
39
+ logger.info("Starting task %s:%s", evt["job_id"], evt["task_id"])
40
+
41
+ def on_progress(self, evt):
42
+ percent = 0
43
+ if evt["size"] and evt["progress_bytes"]:
44
+ percent = round(evt["progress_bytes"] / evt["size"] * 100, 2)
45
+ logger.info("Progress: %s %.2f%%", evt["filepath"], percent)
46
+
47
+ def on_file_done(self, evt):
48
+ if evt["error"]:
49
+ logger.warning(
50
+ "File done with error: %s:%s:%s %s",
51
+ evt["job_id"],
52
+ evt["task_id"],
53
+ evt["filepath"],
54
+ evt["error"],
55
+ )
56
+ else:
57
+ logger.info(
58
+ "File done %s:%s:%s", evt["job_id"], evt["task_id"], evt["filepath"]
59
+ )
60
+
61
+ def on_task_done(self, evt):
62
+ if evt["preexisting"]:
63
+ logger.info(
64
+ "Task already existed locally %s:%s", evt["job_id"], evt["task_id"]
65
+ )
66
+ else:
67
+ logger.info("Task done %s:%s", evt["job_id"], evt["task_id"])
68
+
69
+ def on_done(self, evt):
70
+ """
71
+ When the job is done, check to see if any tasks were not completed.
72
+ """
73
+ logger.info("Download finished")
74
+ empty = True
75
+ for job_id, task_id, task in evt["registry"].each():
76
+ if task["completed_files"] < task["filecount"]:
77
+ logger.warning(
78
+ "Task not fully downloaded %s:%s: %s/%s files.",
79
+ job_id,
80
+ task_id,
81
+ task["completed_files"],
82
+ task["filecount"],
83
+ )
84
+ empty = False
85
+
86
+ if empty:
87
+ logger.info("No failed tasks.")
@@ -0,0 +1,63 @@
1
+ """
2
+ Perpetual Downloader
3
+
4
+ Not yet tested
5
+ """
6
+ import json
7
+ import logging
8
+ import time
9
+ import sys
10
+ from ciocore.downloader.base_downloader import BaseDownloader
11
+ from ciocore.downloader.log import LOGGER_NAME
12
+
13
+ logger = logging.getLogger(LOGGER_NAME)
14
+
15
+ def spinning_cursor():
16
+ while True:
17
+ for cursor in '|/-\\':
18
+ yield cursor
19
+
20
+ class PerpetualDownloader(BaseDownloader):
21
+ CLIENT_NAME = "PerpetualDownloader"
22
+ POLL_INTERVAL = 15
23
+ URL = "/downloads/next"
24
+ spinner = spinning_cursor()
25
+
26
+ def __init__(self, location, *args, **kwargs):
27
+ """Initialize the downloader."""
28
+ super().__init__(*args, **kwargs)
29
+ self.location = location
30
+ logger.debug("Initializing perpetual downloader")
31
+
32
+ def get_some_tasks(self, _):
33
+ """Fetch the next batch of tasks from the server.
34
+
35
+ Always set the return locator to True to signal that we should keep running this function.
36
+
37
+ This function never throws an error. If something goes wrong, it just sets the task array to be empty.
38
+
39
+ If tasks array is empty for any reason (error, filter, no tasks ready, etc.), it waits for POLL_INTERVAL seconds before trying again.
40
+ """
41
+ logger.debug("Fetching the next page of tasks")
42
+ params = {"count": self.page_size, "location": self.location}
43
+ tasks = []
44
+ try:
45
+ response, code = self.client.make_request(
46
+ self.URL, params=params, use_api_key=True
47
+ )
48
+ if code <= 201:
49
+ tasks = json.loads(response).get("data", [])
50
+ tasks = self.filter(tasks)
51
+ except Exception as exc:
52
+ logger.error("Error fetching download info from: %s : %s", self.URL, exc)
53
+
54
+ if not tasks:
55
+ for _ in range(self.POLL_INTERVAL):
56
+ spin_char = next(self.spinner)
57
+ line = f"Listening for files to download... ({spin_char})"
58
+ sys.stdout.write(line)
59
+ sys.stdout.flush()
60
+ sys.stdout.write('\b' * len(line))
61
+ time.sleep(1)
62
+
63
+ return tasks, True
@@ -0,0 +1,97 @@
1
+ import copy
2
+
3
+ import threading
4
+ import logging
5
+ from ciocore.downloader.log import LOGGER_NAME
6
+
7
+ logger = logging.getLogger(LOGGER_NAME)
8
+
9
+
10
+ class Registry(object):
11
+
12
+ def __init__(self):
13
+ self.data = {}
14
+ self.lock = threading.Lock()
15
+
16
+ def get_copy(self):
17
+ """
18
+ Get a copy of the registry.
19
+
20
+ Use a lock to ensure the registry is not modified while we're copying it.
21
+ """
22
+ with self.lock:
23
+ return copy.deepcopy(self.data)
24
+
25
+ def each(self):
26
+ """
27
+ Iterate over all tasks in the registry.
28
+
29
+ Use a lock to ensure the registry is not modified while we're iterating over it.
30
+ """
31
+ with self.lock:
32
+ for job_id, job in self.data.items():
33
+ for task_id, task in job.items():
34
+ yield job_id, task_id, task
35
+
36
+ def register_task(self, task_info):
37
+ """
38
+ Register a task as active
39
+
40
+ The registry is accessed in a thread-safe manner using a lock.
41
+ """
42
+ job_id = task_info["job_id"]
43
+ task_id = task_info["task_id"]
44
+ with self.lock:
45
+ if job_id not in self.data:
46
+ self.data[job_id] = {}
47
+
48
+ if task_id in self.data[job_id]:
49
+ logger.debug(
50
+ "Task %s for job %s is already in registry. Skipping.",
51
+ task_id,
52
+ job_id,
53
+ )
54
+ return False
55
+
56
+ self.data[job_id][task_id] = {
57
+ "download_id": task_info["download_id"],
58
+ "filecount": len(task_info["files"]),
59
+ "completed_files": 0,
60
+ "preexisting_files": 0,
61
+ "size": task_info["size"],
62
+ }
63
+ return True
64
+
65
+ def update_task(self, file_done_event):
66
+ """
67
+ Update the registry each time a file is done.
68
+
69
+ Access the registry in a thread-safe manner using a lock.
70
+
71
+ Steps:
72
+ 1. Get the task from the registry
73
+ 2. Increment the completed_files count
74
+ 3. If the file was preexisting, increment the preexisting_files count too
75
+ 4. If the task is now complete:
76
+ c. Remove the task from the registry
77
+ 5. Return the task copy so that the event_dispatcher can let handlers know the task is done.
78
+
79
+ """
80
+
81
+ job_id = file_done_event["job_id"]
82
+ task_id = file_done_event["task_id"]
83
+ with self.lock:
84
+ task = self.data.get(job_id, {}).get(task_id)
85
+ if not task:
86
+ return None
87
+ task["completed_files"] += 1
88
+ if file_done_event["preexisting"]:
89
+ task["preexisting_files"] += 1
90
+
91
+ task_copy = task.copy()
92
+
93
+ # Only really need ==, but I'm paranoid
94
+ if task["completed_files"] >= task["filecount"]:
95
+ del self.data[job_id][task_id]
96
+
97
+ return task_copy
@@ -0,0 +1,135 @@
1
+ """
2
+ This module contains the Reporter class.
3
+
4
+ It registers callbacks with the with the provided downloader instance that allow it to report "downloaded" or "pending" status back to the server.
5
+
6
+ It is set up in the download_runner_base module. Classes that derive from DownloadRunnerBase, such as LoggingDownloadRunner, do not need to be concerned with the details of the Reporter class.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ from ciocore import api_client
14
+ from ciocore.downloader.log import LOGGER_NAME
15
+
16
+ STATUS_ENDPOINT = "/downloads/status"
17
+ STATUS_DOWNLOADED = "downloaded"
18
+ STATUS_PENDING = "pending"
19
+
20
+ logger = logging.getLogger(LOGGER_NAME)
21
+
22
+ class Reporter(object):
23
+
24
+ def __init__(self, downloader, client=api_client.ApiClient(), num_threads=1):
25
+
26
+ self.downloader = downloader
27
+
28
+ self.num_threads = num_threads
29
+ self.client = client
30
+ self.executor = None
31
+
32
+ logger.debug("Assigning reporter callbacks")
33
+ self.downloader.on("task_done", self.on_task_done)
34
+ self.downloader.on("done", self.on_done)
35
+
36
+ def __enter__(self):
37
+ self.executor = ThreadPoolExecutor(max_workers=self.num_threads)
38
+ return self # Optionally return this reporter
39
+
40
+ def __exit__(self, exc_type, exc_value, traceback):
41
+ self.executor.shutdown()
42
+ # Handle exceptions, from inside the with block
43
+ if exc_type:
44
+ logger.exception("Error running downloader: %s", exc_value)
45
+ # return False to propagate the exception
46
+ return False
47
+
48
+
49
+
50
+ def report_task_status(
51
+ self, download_id, status=STATUS_DOWNLOADED, bytes_in_task=0
52
+ ):
53
+ """
54
+ Make a request to the server to report the status of a task.
55
+
56
+ If the user interrupted the download, then we set the task status to pending to be safe.
57
+ """
58
+ if self.downloader.interrupt_flag.is_set():
59
+ status = STATUS_PENDING
60
+
61
+ bytes_to_download = 0 if status == STATUS_DOWNLOADED else bytes_in_task
62
+
63
+ data = {
64
+ "download_id": download_id,
65
+ "status": status,
66
+ "bytes_downloaded": 0,
67
+ "bytes_to_download": bytes_to_download,
68
+ }
69
+ json_data = json.dumps(data)
70
+ try:
71
+ self.client.make_request(STATUS_ENDPOINT, data=json_data, use_api_key=True)
72
+ except Exception as exc:
73
+ data["error"] = str(exc)
74
+ return data
75
+
76
+ def on_task_done(self, evt):
77
+ """
78
+ Callback to run on a task-done event. Report status back to the server.
79
+
80
+ Note, the task may consist entirely of preexisting files. Nevertheless, we report the task as downloaded.
81
+ """
82
+
83
+ future = self.executor.submit(
84
+ self.report_task_status,
85
+ evt["download_id"],
86
+ status=STATUS_DOWNLOADED,
87
+ bytes_in_task=evt["size"],
88
+ )
89
+ future.add_done_callback(
90
+ lambda f, job_id=evt["job_id"], task_id=evt["task_id"]: log_report_result(
91
+ f.result(), job_id, task_id
92
+ )
93
+ )
94
+
95
+ def on_done(self, evt):
96
+ """
97
+ When the job is done, check to see if any tasks were not completed.
98
+
99
+ If we find any, then report them back to the server as pending.
100
+ """
101
+ logger.debug("Download done. Reporting remaining task statuses to server")
102
+ for job_id, task_id, task in evt["registry"].each():
103
+ if task["completed_files"] < task["filecount"]:
104
+
105
+ future = self.executor.submit(
106
+ self.report_task_status,
107
+ task["download_id"],
108
+ status=STATUS_PENDING,
109
+ bytes_in_task=task["size"],
110
+ )
111
+ future.add_done_callback(
112
+ lambda f, job_id=job_id, task_id=task_id: log_report_result(
113
+ f.result(), job_id, task_id
114
+ )
115
+ )
116
+
117
+
118
+ def log_report_result(report_result, job_id, task_id):
119
+ """Log the report result."""
120
+ if report_result.get("error"):
121
+ logger.error(
122
+ "Error reporting task to server: %s:%s (%s) %s",
123
+ job_id,
124
+ task_id,
125
+ report_result["download_id"],
126
+ report_result["error"],
127
+ )
128
+ return
129
+ logger.debug(
130
+ "Reported task to server: %s:%s (%s) %s",
131
+ job_id,
132
+ task_id,
133
+ report_result["download_id"],
134
+ report_result["status"],
135
+ )
ciocore/exceptions.py CHANGED
@@ -49,13 +49,19 @@ class InvalidPathException(Exception):
49
49
  pass
50
50
 
51
51
 
52
- class UploaderMissingFile(Exception):
52
+ class UploadError(Exception):
53
+ """
54
+ An upload failed
55
+ """
56
+
57
+
58
+ class UploaderMissingFile(UploadError):
53
59
  """
54
60
  A file is missing
55
61
  """
56
62
 
57
63
 
58
- class UploaderFileModified(Exception):
64
+ class UploaderFileModified(UploadError):
59
65
  """
60
66
  Something wrong with a local file
61
67
  """