ciocore 7.0.2b5__py2.py3-none-any.whl → 8.0.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ciocore might be problematic. Click here for more details.

Files changed (111) hide show
  1. ciocore/VERSION +1 -1
  2. ciocore/__init__.py +23 -1
  3. ciocore/api_client.py +422 -156
  4. ciocore/cli.py +503 -0
  5. ciocore/common.py +10 -1
  6. ciocore/config.py +86 -53
  7. ciocore/data.py +20 -73
  8. ciocore/docsite/404.html +723 -0
  9. ciocore/docsite/apidoc/api_client/index.html +3203 -0
  10. ciocore/docsite/apidoc/apidoc/index.html +868 -0
  11. ciocore/docsite/apidoc/config/index.html +1591 -0
  12. ciocore/docsite/apidoc/data/index.html +1480 -0
  13. ciocore/docsite/apidoc/hardware_set/index.html +2367 -0
  14. ciocore/docsite/apidoc/package_environment/index.html +1450 -0
  15. ciocore/docsite/apidoc/package_tree/index.html +2310 -0
  16. ciocore/docsite/assets/_mkdocstrings.css +16 -0
  17. ciocore/docsite/assets/images/favicon.png +0 -0
  18. ciocore/docsite/assets/javascripts/bundle.4e31edb1.min.js +29 -0
  19. ciocore/docsite/assets/javascripts/bundle.4e31edb1.min.js.map +8 -0
  20. ciocore/docsite/assets/javascripts/lunr/min/lunr.ar.min.js +1 -0
  21. ciocore/docsite/assets/javascripts/lunr/min/lunr.da.min.js +18 -0
  22. ciocore/docsite/assets/javascripts/lunr/min/lunr.de.min.js +18 -0
  23. ciocore/docsite/assets/javascripts/lunr/min/lunr.du.min.js +18 -0
  24. ciocore/docsite/assets/javascripts/lunr/min/lunr.es.min.js +18 -0
  25. ciocore/docsite/assets/javascripts/lunr/min/lunr.fi.min.js +18 -0
  26. ciocore/docsite/assets/javascripts/lunr/min/lunr.fr.min.js +18 -0
  27. ciocore/docsite/assets/javascripts/lunr/min/lunr.hi.min.js +1 -0
  28. ciocore/docsite/assets/javascripts/lunr/min/lunr.hu.min.js +18 -0
  29. ciocore/docsite/assets/javascripts/lunr/min/lunr.hy.min.js +1 -0
  30. ciocore/docsite/assets/javascripts/lunr/min/lunr.it.min.js +18 -0
  31. ciocore/docsite/assets/javascripts/lunr/min/lunr.ja.min.js +1 -0
  32. ciocore/docsite/assets/javascripts/lunr/min/lunr.jp.min.js +1 -0
  33. ciocore/docsite/assets/javascripts/lunr/min/lunr.kn.min.js +1 -0
  34. ciocore/docsite/assets/javascripts/lunr/min/lunr.ko.min.js +1 -0
  35. ciocore/docsite/assets/javascripts/lunr/min/lunr.multi.min.js +1 -0
  36. ciocore/docsite/assets/javascripts/lunr/min/lunr.nl.min.js +18 -0
  37. ciocore/docsite/assets/javascripts/lunr/min/lunr.no.min.js +18 -0
  38. ciocore/docsite/assets/javascripts/lunr/min/lunr.pt.min.js +18 -0
  39. ciocore/docsite/assets/javascripts/lunr/min/lunr.ro.min.js +18 -0
  40. ciocore/docsite/assets/javascripts/lunr/min/lunr.ru.min.js +18 -0
  41. ciocore/docsite/assets/javascripts/lunr/min/lunr.sa.min.js +1 -0
  42. ciocore/docsite/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +1 -0
  43. ciocore/docsite/assets/javascripts/lunr/min/lunr.sv.min.js +18 -0
  44. ciocore/docsite/assets/javascripts/lunr/min/lunr.ta.min.js +1 -0
  45. ciocore/docsite/assets/javascripts/lunr/min/lunr.te.min.js +1 -0
  46. ciocore/docsite/assets/javascripts/lunr/min/lunr.th.min.js +1 -0
  47. ciocore/docsite/assets/javascripts/lunr/min/lunr.tr.min.js +18 -0
  48. ciocore/docsite/assets/javascripts/lunr/min/lunr.vi.min.js +1 -0
  49. ciocore/docsite/assets/javascripts/lunr/min/lunr.zh.min.js +1 -0
  50. ciocore/docsite/assets/javascripts/lunr/tinyseg.js +206 -0
  51. ciocore/docsite/assets/javascripts/lunr/wordcut.js +6708 -0
  52. ciocore/docsite/assets/javascripts/workers/search.dfff1995.min.js +42 -0
  53. ciocore/docsite/assets/javascripts/workers/search.dfff1995.min.js.map +8 -0
  54. ciocore/docsite/assets/stylesheets/main.83068744.min.css +1 -0
  55. ciocore/docsite/assets/stylesheets/main.83068744.min.css.map +1 -0
  56. ciocore/docsite/assets/stylesheets/palette.ecc896b0.min.css +1 -0
  57. ciocore/docsite/assets/stylesheets/palette.ecc896b0.min.css.map +1 -0
  58. ciocore/docsite/cmdline/docs/index.html +834 -0
  59. ciocore/docsite/cmdline/downloader/index.html +897 -0
  60. ciocore/docsite/cmdline/packages/index.html +841 -0
  61. ciocore/docsite/cmdline/uploader/index.html +950 -0
  62. ciocore/docsite/how-to-guides/index.html +831 -0
  63. ciocore/docsite/index.html +853 -0
  64. ciocore/docsite/logo.png +0 -0
  65. ciocore/docsite/objects.inv +0 -0
  66. ciocore/docsite/search/search_index.json +1 -0
  67. ciocore/docsite/sitemap.xml +3 -0
  68. ciocore/docsite/sitemap.xml.gz +0 -0
  69. ciocore/docsite/stylesheets/extra.css +26 -0
  70. ciocore/docsite/stylesheets/tables.css +167 -0
  71. ciocore/downloader/__init__.py +0 -0
  72. ciocore/downloader/base_downloader.py +644 -0
  73. ciocore/downloader/download_runner_base.py +47 -0
  74. ciocore/downloader/job_downloader.py +119 -0
  75. ciocore/{downloader.py → downloader/legacy_downloader.py} +0 -1
  76. ciocore/downloader/log.py +73 -0
  77. ciocore/downloader/logging_download_runner.py +87 -0
  78. ciocore/downloader/perpetual_downloader.py +63 -0
  79. ciocore/downloader/registry.py +97 -0
  80. ciocore/downloader/reporter.py +135 -0
  81. ciocore/file_utils.py +3 -3
  82. ciocore/hardware_set.py +0 -4
  83. ciocore/package_environment.py +67 -75
  84. ciocore/package_query.py +171 -0
  85. ciocore/package_tree.py +300 -377
  86. ciocore/retry.py +0 -0
  87. ciocore/uploader/_uploader.py +205 -152
  88. {ciocore-7.0.2b5.dist-info → ciocore-8.0.0.dist-info}/METADATA +34 -16
  89. ciocore-8.0.0.dist-info/RECORD +127 -0
  90. {ciocore-7.0.2b5.dist-info → ciocore-8.0.0.dist-info}/WHEEL +1 -1
  91. ciocore-8.0.0.dist-info/entry_points.txt +2 -0
  92. tests/extra_env_fixtures.py +57 -0
  93. tests/instance_type_fixtures.py +42 -8
  94. tests/project_fixtures.py +8 -0
  95. tests/test_api_client.py +121 -2
  96. tests/test_base_downloader.py +104 -0
  97. tests/test_cli.py +163 -0
  98. tests/test_common.py +8 -8
  99. tests/test_config.py +23 -9
  100. tests/test_data.py +144 -160
  101. tests/test_downloader.py +118 -0
  102. tests/test_hardware_set.py +69 -20
  103. tests/test_job_downloader.py +213 -0
  104. ciocore/__about__.py +0 -10
  105. ciocore/cli/__init__.py +0 -3
  106. ciocore/cli/conductor.py +0 -210
  107. ciocore-7.0.2b5.data/scripts/conductor +0 -19
  108. ciocore-7.0.2b5.data/scripts/conductor.bat +0 -13
  109. ciocore-7.0.2b5.dist-info/RECORD +0 -51
  110. tests/mocks/api_client_mock.py +0 -31
  111. {ciocore-7.0.2b5.dist-info → ciocore-8.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,119 @@
1
+ """
2
+ Job Downloader
3
+
4
+ Download output files from a Conductor job.
5
+
6
+ ENDPOINT
7
+ The outputs of a Conductor job are described in the response from the /jobs/{job_id}/downloads endpoint. The response is a list of tasks. Each task has a list of files. Each file (dict) has a signed URL plus other fields such as the md5 and fields describing the original path, size, and more.
8
+
9
+ PAGING
10
+ A job may contain thousands of tasks, each with several files. To reduce the time it takes to get started, this downloader makes requests for download information in batches, or pages. The number of tasks in each page is controlled by the page_size parameter. As soon as the first page of tasks is retrieved, we start downloading the files in threads. While the files are downloading, we fetch the next page of tasks. When the current page of tasks is exhausted, we start downloading the files in the next page of tasks. We continue until all tasks have been downloaded.
11
+
12
+ The get_some_tasks method is responsible for fetching the next page of tasks. It is called by the base class. It returns a list of tasks, and a locator. For this implementation, the locator is a dictionary containing the index of the current job, and the cursor for the next page of tasks for the job. A new locator is returned to the calling method so that it can be passed back to this method the next time it is called. When the calling method receives a falsey value for the locator, it knows that there are no more tasks to download.
13
+
14
+ See the documentation for the base downloader for more information about the locator and other behavior.
15
+ """
16
+
17
+ import json
18
+ import logging
19
+ from cioseq.sequence import Sequence
20
+ from ciocore.downloader.base_downloader import BaseDownloader
21
+ from ciocore.downloader.log import LOGGER_NAME
22
+ logger = logging.getLogger(LOGGER_NAME)
23
+
24
+
25
+ class JobDownloader(BaseDownloader):
26
+ CLIENT_NAME = "JobDownloader"
27
+
28
+ def __init__(self, jobs, *args, **kwargs):
29
+
30
+ super().__init__(*args, **kwargs)
31
+ """Initialize the downloader."""
32
+ logger.debug("Initializing paged job downloader")
33
+ self.jobs = flatten(jobs)
34
+ self.location = None # location is not used in this downloader
35
+
36
+ def get_some_tasks(self, locator):
37
+ """Fetch the next page of tasks from the server.
38
+
39
+ locator: a dictionary containing the index of the current job, and the cursor for the next page of tasks for the job.
40
+
41
+
42
+ # What is a locator? It's the information needed to request the next page of tasks. It consists of the index of the current job, and the cursor for the next page of tasks. It is provided to this method as a parameter, and when we're done, a new locator is returned to the run loop. The run loop passes it back to us the next time it is called.
43
+
44
+ # On the first call, the provided locator is None. In that case, we start with the first job, and no cursor.
45
+
46
+ # We return the locator to the run loop in the base class, along with any tasks to be downloaded. If we return a falsy locator, the run loop is exited, since it means we downloaded everything OR there was an error fetching tasks.
47
+
48
+ # If we got to the end of the current job, we increment the job index and reset the cursor to None. The next time this method is called, we'll start with the next job.
49
+
50
+ # If we got a next_cursor from the request, we return it in the locator along with the current job index. This is what we'll be given on the next call.
51
+ """
52
+
53
+ if not locator:
54
+ locator = {}
55
+
56
+ job_index = locator.get("job_index", 0)
57
+ cursor = locator.get("cursor", None)
58
+ if job_index >= len(self.jobs):
59
+ # return no tasks and no locator. Ends the download.
60
+ return [], None
61
+
62
+ # we have a job to download
63
+ job_info = self.jobs[job_index]
64
+ job_id = job_info["job_id"]
65
+ task_ids = job_info["task_ids"]
66
+ url = f"/jobs/{job_id}/downloads"
67
+ data = json.dumps({"tids": task_ids})
68
+ params = {"limit": self.page_size, "start_cursor": cursor}
69
+ try:
70
+ response, code = self.client.make_request(
71
+ url, verb="POST", params=params, data=data, use_api_key=True
72
+ )
73
+ if code != 201:
74
+ # we have an error. Return null locator to end the download
75
+ raise Exception(f"Code: {code}")
76
+ except Exception as exc:
77
+ logger.error("Error fetching download info for job ID: %s : %s : %s", job_id, url, exc)
78
+ return [], None
79
+ page = json.loads(response)
80
+ tasks = page.get("downloads", [])
81
+
82
+ tasks = self.filter(tasks)
83
+
84
+ next_cursor = page.get("next_cursor")
85
+
86
+ if not next_cursor:
87
+ # we're done with this job
88
+ job_index += 1
89
+
90
+ return tasks, {"job_index": job_index, "cursor": next_cursor}
91
+
92
+
93
+ def flatten(job_specs):
94
+ """Create a list of job objects with keys: job_id and tasks.
95
+
96
+ See tests/test_downloader.py for examples.
97
+
98
+ Example input: ["1234", "1235:12-15"]
99
+
100
+ Example result:
101
+ [
102
+ {"job_id": "01234", "task_ids":None},
103
+ {"job_id": "01235", "task_ids":["012","013","014","015"]}
104
+ ]
105
+ """
106
+ result = []
107
+ for job_spec in job_specs:
108
+ if ":" in job_spec:
109
+ job_id, range_spec = job_spec.split(":")
110
+ try:
111
+ seq = Sequence.create(range_spec)
112
+ task_ids = seq.expand("###")
113
+ except (ValueError, TypeError):
114
+ task_ids = None
115
+ else:
116
+ job_id, task_ids = job_spec, None
117
+ task_ids = None
118
+ result.append({"job_id": job_id.zfill(5), "task_ids": task_ids})
119
+ return result
@@ -493,7 +493,6 @@ class Downloader(object):
493
493
  downloads = _get_job_download(endpoint, self.api_client, job_id, tid)
494
494
  if downloads:
495
495
  for task_download in downloads.get("downloads", []):
496
- print("putting in queue: %s" % task_download)
497
496
  self.pending_queue.put(task_download, block=True)
498
497
 
499
498
  @common.dec_catch_exception(raise_=True)
@@ -0,0 +1,73 @@
1
+ import logging
2
+ import colorlog
3
+ import sys
4
+ LOGGER_NAME = "cw.download"
5
+
6
+ LOG_COLORS ={
7
+ 'DEBUG': 'purple',
8
+ 'INFO': 'blue',
9
+ 'WARNING': 'yellow',
10
+ 'ERROR': 'red',
11
+ 'CRITICAL': 'red,bg_white',
12
+ }
13
+
14
+ DEBUG_FORMATTER = colorlog.ColoredFormatter(
15
+ "%(log_color)s%(asctime)s %(name)s %(levelname)8s %(filename)s:%(lineno)d %(threadName)s> %(message)s",
16
+ datefmt="%Y-%m-%d %H:%M:%S",
17
+ log_colors=LOG_COLORS,
18
+ )
19
+
20
+ INFO_FORMATTER = colorlog.ColoredFormatter(
21
+ '%(log_color)s%(levelname)s:%(name)s> %(message)s',
22
+ log_colors=LOG_COLORS,
23
+ )
24
+
25
+ LEVEL_MAP = {
26
+ "DEBUG": logging.DEBUG,
27
+ "INFO": logging.INFO,
28
+ "WARNING": logging.WARNING,
29
+ "ERROR": logging.ERROR,
30
+ "CRITICAL": logging.CRITICAL,
31
+ "NOTSET": logging.NOTSET,
32
+ }
33
+
34
+ class GracefulLogger(logging.Logger):
35
+ def setLevel(self, level):
36
+ super().setLevel(level)
37
+
38
+ # Define formatters based on level
39
+ formatter = DEBUG_FORMATTER if level == logging.DEBUG else INFO_FORMATTER
40
+ for handler in self.handlers:
41
+ handler.setFormatter(formatter)
42
+
43
+
44
+ class GracefulStreamHandler(colorlog.StreamHandler):
45
+ """
46
+ A custom StreamHandler that suppresses BrokenPipeError.
47
+
48
+ This handler extends the standard logging.StreamHandler to gracefully handle
49
+ BrokenPipeErrors that can occur when output streams are closed prematurely.
50
+ It overrides the emit method to catch and ignore BrokenPipeError, allowing
51
+ the program to continue without interruption.
52
+ """
53
+
54
+ def emit(self, record):
55
+ """
56
+ Overrides the StreamHandler.emit method to gracefully handle BrokenPipeError.
57
+
58
+ Args:
59
+ record (logging.LogRecord): The log record to be emitted.
60
+ """
61
+ try:
62
+ super().emit(record)
63
+ except BrokenPipeError:
64
+ pass
65
+
66
+ logging.setLoggerClass(GracefulLogger)
67
+ logger = colorlog.getLogger(LOGGER_NAME)
68
+ logger.propagate = False
69
+
70
+ if not any(isinstance(handler, GracefulStreamHandler) for handler in logger.handlers):
71
+ stream_handler = GracefulStreamHandler(sys.stdout)
72
+ logger.addHandler(stream_handler)
73
+
@@ -0,0 +1,87 @@
1
+ """
2
+ Logging Download runner
3
+
4
+ This module contains the LoggingDownloadRunner class.
5
+
6
+ The LoggingDownloadRunner is a derived class of DownloadRunnerBase.
7
+
8
+ It registers callbacks that are called when certain events occur during the download.
9
+ It uses these callbacks to display progress via the logging module.
10
+
11
+ """
12
+
13
+ import logging
14
+ from ciocore.downloader.download_runner_base import DownloadRunnerBase
15
+ from ciocore.downloader.log import LOGGER_NAME
16
+
17
+ logger = logging.getLogger(LOGGER_NAME)
18
+
19
+
20
+ class LoggingDownloadRunner(DownloadRunnerBase):
21
+ CLIENT_NAME = "LoggingDownloadRunner"
22
+
23
+ def __init__(self, jobids=None, location=None, **kwargs):
24
+
25
+ super().__init__(jobids, location, **kwargs)
26
+
27
+ logger.debug("Assigning callbacks")
28
+ self.downloader.on("start", self.on_start)
29
+ self.downloader.on("start_task", self.on_start_task)
30
+ self.downloader.on("progress", self.on_progress)
31
+ self.downloader.on("file_done", self.on_file_done)
32
+ self.downloader.on("task_done", self.on_task_done)
33
+ self.downloader.on("done", self.on_done)
34
+
35
+ def on_start(self, evt):
36
+ logger.info("Starting download")
37
+
38
+ def on_start_task(self, evt):
39
+ logger.info("Starting task %s:%s", evt["job_id"], evt["task_id"])
40
+
41
+ def on_progress(self, evt):
42
+ percent = 0
43
+ if evt["size"] and evt["progress_bytes"]:
44
+ percent = round(evt["progress_bytes"] / evt["size"] * 100, 2)
45
+ logger.info("Progress: %s %.2f%%", evt["filepath"], percent)
46
+
47
+ def on_file_done(self, evt):
48
+ if evt["error"]:
49
+ logger.warning(
50
+ "File done with error: %s:%s:%s %s",
51
+ evt["job_id"],
52
+ evt["task_id"],
53
+ evt["filepath"],
54
+ evt["error"],
55
+ )
56
+ else:
57
+ logger.info(
58
+ "File done %s:%s:%s", evt["job_id"], evt["task_id"], evt["filepath"]
59
+ )
60
+
61
+ def on_task_done(self, evt):
62
+ if evt["preexisting"]:
63
+ logger.info(
64
+ "Task already existed locally %s:%s", evt["job_id"], evt["task_id"]
65
+ )
66
+ else:
67
+ logger.info("Task done %s:%s", evt["job_id"], evt["task_id"])
68
+
69
+ def on_done(self, evt):
70
+ """
71
+ When the job is done, check to see if any tasks were not completed.
72
+ """
73
+ logger.info("Download finished")
74
+ empty = True
75
+ for job_id, task_id, task in evt["registry"].each():
76
+ if task["completed_files"] < task["filecount"]:
77
+ logger.warning(
78
+ "Task not fully downloaded %s:%s: %s/%s files.",
79
+ job_id,
80
+ task_id,
81
+ task["completed_files"],
82
+ task["filecount"],
83
+ )
84
+ empty = False
85
+
86
+ if empty:
87
+ logger.info("No failed tasks.")
@@ -0,0 +1,63 @@
1
+ """
2
+ Perpetual Downloader
3
+
4
+ Not yet tested
5
+ """
6
+ import json
7
+ import logging
8
+ import time
9
+ import sys
10
+ from ciocore.downloader.base_downloader import BaseDownloader
11
+ from ciocore.downloader.log import LOGGER_NAME
12
+
13
+ logger = logging.getLogger(LOGGER_NAME)
14
+
15
+ def spinning_cursor():
16
+ while True:
17
+ for cursor in '|/-\\':
18
+ yield cursor
19
+
20
+ class PerpetualDownloader(BaseDownloader):
21
+ CLIENT_NAME = "PerpetualDownloader"
22
+ POLL_INTERVAL = 15
23
+ URL = "/downloads/next"
24
+ spinner = spinning_cursor()
25
+
26
+ def __init__(self, location, *args, **kwargs):
27
+ """Initialize the downloader."""
28
+ super().__init__(*args, **kwargs)
29
+ self.location = location
30
+ logger.debug("Initializing perpetual downloader")
31
+
32
+ def get_some_tasks(self, _):
33
+ """Fetch the next batch of tasks from the server.
34
+
35
+ Always set the return locator to True to signal that we should keep running this function.
36
+
37
+ This function never throws an error. If something goes wrong, it just sets the task array to be empty.
38
+
39
+ If tasks array is empty for any reason (error, filter, no tasks ready, etc.), it waits for POLL_INTERVAL seconds before trying again.
40
+ """
41
+ logger.debug("Fetching the next page of tasks")
42
+ params = {"count": self.page_size, "location": self.location}
43
+ tasks = []
44
+ try:
45
+ response, code = self.client.make_request(
46
+ self.URL, params=params, use_api_key=True
47
+ )
48
+ if code <= 201:
49
+ tasks = json.loads(response).get("data", [])
50
+ tasks = self.filter(tasks)
51
+ except Exception as exc:
52
+ logger.error("Error fetching download info from: %s : %s", self.URL, exc)
53
+
54
+ if not tasks:
55
+ for _ in range(self.POLL_INTERVAL):
56
+ spin_char = next(self.spinner)
57
+ line = f"Listening for files to download... ({spin_char})"
58
+ sys.stdout.write(line)
59
+ sys.stdout.flush()
60
+ sys.stdout.write('\b' * len(line))
61
+ time.sleep(1)
62
+
63
+ return tasks, True
@@ -0,0 +1,97 @@
1
+ import copy
2
+
3
+ import threading
4
+ import logging
5
+ from ciocore.downloader.log import LOGGER_NAME
6
+
7
+ logger = logging.getLogger(LOGGER_NAME)
8
+
9
+
10
+ class Registry(object):
11
+
12
+ def __init__(self):
13
+ self.data = {}
14
+ self.lock = threading.Lock()
15
+
16
+ def get_copy(self):
17
+ """
18
+ Get a copy of the registry.
19
+
20
+ Use a lock to ensure the registry is not modified while we're copying it.
21
+ """
22
+ with self.lock:
23
+ return copy.deepcopy(self.data)
24
+
25
+ def each(self):
26
+ """
27
+ Iterate over all tasks in the registry.
28
+
29
+ Use a lock to ensure the registry is not modified while we're iterating over it.
30
+ """
31
+ with self.lock:
32
+ for job_id, job in self.data.items():
33
+ for task_id, task in job.items():
34
+ yield job_id, task_id, task
35
+
36
+ def register_task(self, task_info):
37
+ """
38
+ Register a task as active
39
+
40
+ The registry is accessed in a thread-safe manner using a lock.
41
+ """
42
+ job_id = task_info["job_id"]
43
+ task_id = task_info["task_id"]
44
+ with self.lock:
45
+ if job_id not in self.data:
46
+ self.data[job_id] = {}
47
+
48
+ if task_id in self.data[job_id]:
49
+ logger.debug(
50
+ "Task %s for job %s is already in registry. Skipping.",
51
+ task_id,
52
+ job_id,
53
+ )
54
+ return False
55
+
56
+ self.data[job_id][task_id] = {
57
+ "download_id": task_info["download_id"],
58
+ "filecount": len(task_info["files"]),
59
+ "completed_files": 0,
60
+ "preexisting_files": 0,
61
+ "size": task_info["size"],
62
+ }
63
+ return True
64
+
65
+ def update_task(self, file_done_event):
66
+ """
67
+ Update the registry each time a file is done.
68
+
69
+ Access the registry in a thread-safe manner using a lock.
70
+
71
+ Steps:
72
+ 1. Get the task from the registry
73
+ 2. Increment the completed_files count
74
+ 3. If the file was preexisting, increment the preexisting_files count too
75
+ 4. If the task is now complete:
76
+ c. Remove the task from the registry
77
+ 5. Return the task copy so that the event_dispatcher can let handlers know the task is done.
78
+
79
+ """
80
+
81
+ job_id = file_done_event["job_id"]
82
+ task_id = file_done_event["task_id"]
83
+ with self.lock:
84
+ task = self.data.get(job_id, {}).get(task_id)
85
+ if not task:
86
+ return None
87
+ task["completed_files"] += 1
88
+ if file_done_event["preexisting"]:
89
+ task["preexisting_files"] += 1
90
+
91
+ task_copy = task.copy()
92
+
93
+ # Only really need ==, but I'm paranoid
94
+ if task["completed_files"] >= task["filecount"]:
95
+ del self.data[job_id][task_id]
96
+
97
+ return task_copy
@@ -0,0 +1,135 @@
1
+ """
2
+ This module contains the Reporter class.
3
+
4
+ It registers callbacks with the with the provided downloader instance that allow it to report "downloaded" or "pending" status back to the server.
5
+
6
+ It is set up in the download_runner_base module. Classes that derive from DownloadRunnerBase, such as LoggingDownloadRunner, do not need to be concerned with the details of the Reporter class.
7
+ """
8
+
9
+ import json
10
+ import logging
11
+ from concurrent.futures import ThreadPoolExecutor
12
+
13
+ from ciocore import api_client
14
+ from ciocore.downloader.log import LOGGER_NAME
15
+
16
+ STATUS_ENDPOINT = "/downloads/status"
17
+ STATUS_DOWNLOADED = "downloaded"
18
+ STATUS_PENDING = "pending"
19
+
20
+ logger = logging.getLogger(LOGGER_NAME)
21
+
22
+ class Reporter(object):
23
+
24
+ def __init__(self, downloader, client=api_client.ApiClient(), num_threads=1):
25
+
26
+ self.downloader = downloader
27
+
28
+ self.num_threads = num_threads
29
+ self.client = client
30
+ self.executor = None
31
+
32
+ logger.debug("Assigning reporter callbacks")
33
+ self.downloader.on("task_done", self.on_task_done)
34
+ self.downloader.on("done", self.on_done)
35
+
36
+ def __enter__(self):
37
+ self.executor = ThreadPoolExecutor(max_workers=self.num_threads)
38
+ return self # Optionally return this reporter
39
+
40
+ def __exit__(self, exc_type, exc_value, traceback):
41
+ self.executor.shutdown()
42
+ # Handle exceptions, from inside the with block
43
+ if exc_type:
44
+ logger.exception("Error running downloader: %s", exc_value)
45
+ # return False to propagate the exception
46
+ return False
47
+
48
+
49
+
50
+ def report_task_status(
51
+ self, download_id, status=STATUS_DOWNLOADED, bytes_in_task=0
52
+ ):
53
+ """
54
+ Make a request to the server to report the status of a task.
55
+
56
+ If the user interrupted the download, then we set the task status to pending to be safe.
57
+ """
58
+ if self.downloader.interrupt_flag.is_set():
59
+ status = STATUS_PENDING
60
+
61
+ bytes_to_download = 0 if status == STATUS_DOWNLOADED else bytes_in_task
62
+
63
+ data = {
64
+ "download_id": download_id,
65
+ "status": status,
66
+ "bytes_downloaded": 0,
67
+ "bytes_to_download": bytes_to_download,
68
+ }
69
+ json_data = json.dumps(data)
70
+ try:
71
+ self.client.make_request(STATUS_ENDPOINT, data=json_data, use_api_key=True)
72
+ except Exception as exc:
73
+ data["error"] = str(exc)
74
+ return data
75
+
76
+ def on_task_done(self, evt):
77
+ """
78
+ Callback to run on a task-done event. Report status back to the server.
79
+
80
+ Note, the task may consist entirely of preexisting files. Nevertheless, we report the task as downloaded.
81
+ """
82
+
83
+ future = self.executor.submit(
84
+ self.report_task_status,
85
+ evt["download_id"],
86
+ status=STATUS_DOWNLOADED,
87
+ bytes_in_task=evt["size"],
88
+ )
89
+ future.add_done_callback(
90
+ lambda f, job_id=evt["job_id"], task_id=evt["task_id"]: log_report_result(
91
+ f.result(), job_id, task_id
92
+ )
93
+ )
94
+
95
+ def on_done(self, evt):
96
+ """
97
+ When the job is done, check to see if any tasks were not completed.
98
+
99
+ If we find any, then report them back to the server as pending.
100
+ """
101
+ logger.debug("Download done. Reporting remaining task statuses to server")
102
+ for job_id, task_id, task in evt["registry"].each():
103
+ if task["completed_files"] < task["filecount"]:
104
+
105
+ future = self.executor.submit(
106
+ self.report_task_status,
107
+ task["download_id"],
108
+ status=STATUS_PENDING,
109
+ bytes_in_task=task["size"],
110
+ )
111
+ future.add_done_callback(
112
+ lambda f, job_id=job_id, task_id=task_id: log_report_result(
113
+ f.result(), job_id, task_id
114
+ )
115
+ )
116
+
117
+
118
+ def log_report_result(report_result, job_id, task_id):
119
+ """Log the report result."""
120
+ if report_result.get("error"):
121
+ logger.error(
122
+ "Error reporting task to server: %s:%s (%s) %s",
123
+ job_id,
124
+ task_id,
125
+ report_result["download_id"],
126
+ report_result["error"],
127
+ )
128
+ return
129
+ logger.debug(
130
+ "Reported task to server: %s:%s (%s) %s",
131
+ job_id,
132
+ task_id,
133
+ report_result["download_id"],
134
+ report_result["status"],
135
+ )
ciocore/file_utils.py CHANGED
@@ -256,13 +256,13 @@ def get_common_dirpath(paths):
256
256
 
257
257
 
258
258
  def _is_valid_path(path_str):
259
- """
259
+ r"""
260
260
  This is dirty/inaccurate helper function to determine whether the given "path" is considered
261
261
  valid. If so, return True.
262
262
 
263
263
  If the given path_str is any of the following characters, then it's to be considered invalid:
264
264
 
265
- On linux\mac:
265
+ On linux/mac:
266
266
  /
267
267
  //
268
268
  lettered drive (e.g. x:\)
@@ -453,7 +453,7 @@ def get_tx_path(filepath, existing_only=False):
453
453
 
454
454
 
455
455
  def strip_drive_letter(filepath):
456
- """
456
+ r"""
457
457
  If the given filepath has a drive letter, remove it and return the rest of the path
458
458
 
459
459
  C:\cat.txt --> \cat.txt
ciocore/hardware_set.py CHANGED
@@ -66,7 +66,6 @@ class HardwareSet(object):
66
66
 
67
67
  def __init__(self, instance_types):
68
68
  """Initialize the HardwareSet with a list of instance types.
69
-
70
69
  Typically, you would access the HardwareSet through the ciocore.data.data() function, which initializes it for you. However, you can also initialize it directly with a list of instance types straight from ciocore.api_client. The difference being that the latter contains all instance types, whereas the former contains only the instance types compatible with the products you have specified, as well as being cached.
71
70
 
72
71
  Args:
@@ -162,7 +161,6 @@ class HardwareSet(object):
162
161
 
163
162
  Returns:
164
163
  dict: The instance type or None if not found.
165
-
166
164
  Example:
167
165
  >>> from ciocore import data as coredata
168
166
  >>> coredata.init()
@@ -197,7 +195,6 @@ class HardwareSet(object):
197
195
 
198
196
  Returns:
199
197
  dict: The category or None if not found.
200
-
201
198
  Example:
202
199
  >>> from ciocore import data as coredata
203
200
  >>> coredata.init()
@@ -352,7 +349,6 @@ class HardwareSet(object):
352
349
  for category in (it.get("categories") or [])
353
350
  ]
354
351
  result = {}
355
-
356
352
  for it in instance_types:
357
353
  is_gpu = it.get("gpu", False)
358
354
  if categories: