ciocore 5.1.1__py2.py3-none-any.whl → 10.0.0b3__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ciocore/VERSION +1 -1
- ciocore/__init__.py +23 -1
- ciocore/api_client.py +655 -160
- ciocore/auth/__init__.py +5 -3
- ciocore/cli.py +501 -0
- ciocore/common.py +15 -13
- ciocore/conductor_submit.py +77 -60
- ciocore/config.py +127 -13
- ciocore/data.py +162 -77
- ciocore/docsite/404.html +746 -0
- ciocore/docsite/apidoc/api_client/index.html +3605 -0
- ciocore/docsite/apidoc/apidoc/index.html +909 -0
- ciocore/docsite/apidoc/config/index.html +1652 -0
- ciocore/docsite/apidoc/data/index.html +1553 -0
- ciocore/docsite/apidoc/hardware_set/index.html +2460 -0
- ciocore/docsite/apidoc/package_environment/index.html +1507 -0
- ciocore/docsite/apidoc/package_tree/index.html +2386 -0
- ciocore/docsite/assets/_mkdocstrings.css +16 -0
- ciocore/docsite/assets/images/favicon.png +0 -0
- ciocore/docsite/assets/javascripts/bundle.471ce7a9.min.js +29 -0
- ciocore/docsite/assets/javascripts/bundle.471ce7a9.min.js.map +7 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ar.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.da.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.de.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.du.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.el.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.es.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.fi.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.fr.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.he.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.hi.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.hu.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.hy.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.it.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ja.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.jp.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.kn.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ko.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.multi.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.nl.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.no.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.pt.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ro.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ru.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.sa.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.sv.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ta.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.te.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.th.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.tr.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.vi.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.zh.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/tinyseg.js +206 -0
- ciocore/docsite/assets/javascripts/lunr/wordcut.js +6708 -0
- ciocore/docsite/assets/javascripts/workers/search.b8dbb3d2.min.js +42 -0
- ciocore/docsite/assets/javascripts/workers/search.b8dbb3d2.min.js.map +7 -0
- ciocore/docsite/assets/stylesheets/main.3cba04c6.min.css +1 -0
- ciocore/docsite/assets/stylesheets/main.3cba04c6.min.css.map +1 -0
- ciocore/docsite/assets/stylesheets/palette.06af60db.min.css +1 -0
- ciocore/docsite/assets/stylesheets/palette.06af60db.min.css.map +1 -0
- ciocore/docsite/cmdline/docs/index.html +871 -0
- ciocore/docsite/cmdline/downloader/index.html +934 -0
- ciocore/docsite/cmdline/packages/index.html +878 -0
- ciocore/docsite/cmdline/uploader/index.html +995 -0
- ciocore/docsite/how-to-guides/index.html +869 -0
- ciocore/docsite/index.html +895 -0
- ciocore/docsite/logo.png +0 -0
- ciocore/docsite/objects.inv +0 -0
- ciocore/docsite/search/search_index.json +1 -0
- ciocore/docsite/sitemap.xml +3 -0
- ciocore/docsite/sitemap.xml.gz +0 -0
- ciocore/docsite/stylesheets/extra.css +26 -0
- ciocore/docsite/stylesheets/tables.css +167 -0
- ciocore/downloader/base_downloader.py +644 -0
- ciocore/downloader/download_runner_base.py +47 -0
- ciocore/downloader/job_downloader.py +119 -0
- ciocore/{downloader.py → downloader/legacy_downloader.py} +12 -9
- ciocore/downloader/log.py +73 -0
- ciocore/downloader/logging_download_runner.py +87 -0
- ciocore/downloader/perpetual_downloader.py +63 -0
- ciocore/downloader/registry.py +97 -0
- ciocore/downloader/reporter.py +135 -0
- ciocore/exceptions.py +8 -2
- ciocore/file_utils.py +51 -50
- ciocore/hardware_set.py +449 -0
- ciocore/loggeria.py +89 -20
- ciocore/package_environment.py +110 -48
- ciocore/package_query.py +182 -0
- ciocore/package_tree.py +319 -258
- ciocore/retry.py +0 -0
- ciocore/uploader/_uploader.py +547 -364
- ciocore/uploader/thread_queue_job.py +176 -0
- ciocore/uploader/upload_stats/__init__.py +3 -4
- ciocore/uploader/upload_stats/stats_formats.py +10 -4
- ciocore/validator.py +34 -2
- ciocore/worker.py +174 -151
- ciocore-10.0.0b3.dist-info/METADATA +928 -0
- ciocore-10.0.0b3.dist-info/RECORD +128 -0
- {ciocore-5.1.1.dist-info → ciocore-10.0.0b3.dist-info}/WHEEL +1 -1
- ciocore-10.0.0b3.dist-info/entry_points.txt +2 -0
- tests/instance_type_fixtures.py +175 -0
- tests/package_fixtures.py +205 -0
- tests/test_api_client.py +297 -12
- tests/test_base_downloader.py +104 -0
- tests/test_cli.py +149 -0
- tests/test_common.py +1 -7
- tests/test_config.py +40 -18
- tests/test_data.py +162 -173
- tests/test_downloader.py +118 -0
- tests/test_hardware_set.py +139 -0
- tests/test_job_downloader.py +213 -0
- tests/test_package_query.py +38 -0
- tests/test_package_tree.py +91 -291
- tests/test_submit.py +44 -18
- tests/test_uploader.py +1 -4
- ciocore/__about__.py +0 -10
- ciocore/cli/conductor.py +0 -191
- ciocore/compat.py +0 -15
- ciocore-5.1.1.data/scripts/conductor +0 -19
- ciocore-5.1.1.data/scripts/conductor.bat +0 -13
- ciocore-5.1.1.dist-info/METADATA +0 -408
- ciocore-5.1.1.dist-info/RECORD +0 -47
- tests/mocks/api_client_mock.py +0 -51
- /ciocore/{cli → downloader}/__init__.py +0 -0
- {ciocore-5.1.1.dist-info → ciocore-10.0.0b3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Job Downloader
|
|
3
|
+
|
|
4
|
+
Download output files from a Conductor job.
|
|
5
|
+
|
|
6
|
+
ENDPOINT
|
|
7
|
+
The outputs of a Conductor job are described in the response from the /jobs/{job_id}/downloads endpoint. The response is a list of tasks. Each task has a list of files. Each file (dict) has a signed URL plus other fields such as the md5 and fields describing the original path, size, and more.
|
|
8
|
+
|
|
9
|
+
PAGING
|
|
10
|
+
A job may contain thousands of tasks, each with several files. To reduce the time it takes to get started, this downloader makes requests for download information in batches, or pages. The number of tasks in each page is controlled by the page_size parameter. As soon as the first page of tasks is retrieved, we start downloading the files in threads. While the files are downloading, we fetch the next page of tasks. When the current page of tasks is exhausted, we start downloading the files in the next page of tasks. We continue until all tasks have been downloaded.
|
|
11
|
+
|
|
12
|
+
The get_some_tasks method is responsible for fetching the next page of tasks. It is called by the base class. It returns a list of tasks, and a locator. For this implementation, the locator is a dictionary containing the index of the current job, and the cursor for the next page of tasks for the job. A new locator is returned to the calling method so that it can be passed back to this method the next time it is called. When the calling method receives a falsey value for the locator, it knows that there are no more tasks to download.
|
|
13
|
+
|
|
14
|
+
See the documentation for the base downloader for more information about the locator and other behavior.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
from cioseq.sequence import Sequence
|
|
20
|
+
from ciocore.downloader.base_downloader import BaseDownloader
|
|
21
|
+
from ciocore.downloader.log import LOGGER_NAME
|
|
22
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class JobDownloader(BaseDownloader):
|
|
26
|
+
CLIENT_NAME = "JobDownloader"
|
|
27
|
+
|
|
28
|
+
def __init__(self, jobs, *args, **kwargs):
|
|
29
|
+
|
|
30
|
+
super().__init__(*args, **kwargs)
|
|
31
|
+
"""Initialize the downloader."""
|
|
32
|
+
logger.debug("Initializing paged job downloader")
|
|
33
|
+
self.jobs = flatten(jobs)
|
|
34
|
+
self.location = None # location is not used in this downloader
|
|
35
|
+
|
|
36
|
+
def get_some_tasks(self, locator):
|
|
37
|
+
"""Fetch the next page of tasks from the server.
|
|
38
|
+
|
|
39
|
+
locator: a dictionary containing the index of the current job, and the cursor for the next page of tasks for the job.
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# What is a locator? It's the information needed to request the next page of tasks. It consists of the index of the current job, and the cursor for the next page of tasks. It is provided to this method as a parameter, and when we're done, a new locator is returned to the run loop. The run loop passes it back to us the next time it is called.
|
|
43
|
+
|
|
44
|
+
# On the first call, the provided locator is None. In that case, we start with the first job, and no cursor.
|
|
45
|
+
|
|
46
|
+
# We return the locator to the run loop in the base class, along with any tasks to be downloaded. If we return a falsy locator, the run loop is exited, since it means we downloaded everything OR there was an error fetching tasks.
|
|
47
|
+
|
|
48
|
+
# If we got to the end of the current job, we increment the job index and reset the cursor to None. The next time this method is called, we'll start with the next job.
|
|
49
|
+
|
|
50
|
+
# If we got a next_cursor from the request, we return it in the locator along with the current job index. This is what we'll be given on the next call.
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
if not locator:
|
|
54
|
+
locator = {}
|
|
55
|
+
|
|
56
|
+
job_index = locator.get("job_index", 0)
|
|
57
|
+
cursor = locator.get("cursor", None)
|
|
58
|
+
if job_index >= len(self.jobs):
|
|
59
|
+
# return no tasks and no locator. Ends the download.
|
|
60
|
+
return [], None
|
|
61
|
+
|
|
62
|
+
# we have a job to download
|
|
63
|
+
job_info = self.jobs[job_index]
|
|
64
|
+
job_id = job_info["job_id"]
|
|
65
|
+
task_ids = job_info["task_ids"]
|
|
66
|
+
url = f"/jobs/{job_id}/downloads"
|
|
67
|
+
data = json.dumps({"tids": task_ids})
|
|
68
|
+
params = {"limit": self.page_size, "start_cursor": cursor}
|
|
69
|
+
try:
|
|
70
|
+
response, code = self.client.make_request(
|
|
71
|
+
url, verb="POST", params=params, data=data, use_api_key=True
|
|
72
|
+
)
|
|
73
|
+
if code != 201:
|
|
74
|
+
# we have an error. Return null locator to end the download
|
|
75
|
+
raise Exception(f"Code: {code}")
|
|
76
|
+
except Exception as exc:
|
|
77
|
+
logger.error("Error fetching download info for job ID: %s : %s : %s", job_id, url, exc)
|
|
78
|
+
return [], None
|
|
79
|
+
page = json.loads(response)
|
|
80
|
+
tasks = page.get("downloads", [])
|
|
81
|
+
|
|
82
|
+
tasks = self.filter(tasks)
|
|
83
|
+
|
|
84
|
+
next_cursor = page.get("next_cursor")
|
|
85
|
+
|
|
86
|
+
if not next_cursor:
|
|
87
|
+
# we're done with this job
|
|
88
|
+
job_index += 1
|
|
89
|
+
|
|
90
|
+
return tasks, {"job_index": job_index, "cursor": next_cursor}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def flatten(job_specs):
|
|
94
|
+
"""Create a list of job objects with keys: job_id and tasks.
|
|
95
|
+
|
|
96
|
+
See tests/test_downloader.py for examples.
|
|
97
|
+
|
|
98
|
+
Example input: ["1234", "1235:12-15"]
|
|
99
|
+
|
|
100
|
+
Example result:
|
|
101
|
+
[
|
|
102
|
+
{"job_id": "01234", "task_ids":None},
|
|
103
|
+
{"job_id": "01235", "task_ids":["012","013","014","015"]}
|
|
104
|
+
]
|
|
105
|
+
"""
|
|
106
|
+
result = []
|
|
107
|
+
for job_spec in job_specs:
|
|
108
|
+
if ":" in job_spec:
|
|
109
|
+
job_id, range_spec = job_spec.split(":")
|
|
110
|
+
try:
|
|
111
|
+
seq = Sequence.create(range_spec)
|
|
112
|
+
task_ids = seq.expand("###")
|
|
113
|
+
except (ValueError, TypeError):
|
|
114
|
+
task_ids = None
|
|
115
|
+
else:
|
|
116
|
+
job_id, task_ids = job_spec, None
|
|
117
|
+
task_ids = None
|
|
118
|
+
result.append({"job_id": job_id.zfill(5), "task_ids": task_ids})
|
|
119
|
+
return result
|
|
@@ -393,7 +393,7 @@ class Downloader(object):
|
|
|
393
393
|
def start_reporter_thread(self, download_data):
|
|
394
394
|
reporter_thread_name = "ReporterThread"
|
|
395
395
|
current_thread_name = threading.current_thread().name
|
|
396
|
-
thread_number_match = re.match("Thread-(\d+)", current_thread_name)
|
|
396
|
+
thread_number_match = re.match(r"Thread-(\d+)", current_thread_name)
|
|
397
397
|
if thread_number_match:
|
|
398
398
|
reporter_thread_name += "-%s" % thread_number_match.groups()[0]
|
|
399
399
|
|
|
@@ -493,7 +493,6 @@ class Downloader(object):
|
|
|
493
493
|
downloads = _get_job_download(endpoint, self.api_client, job_id, tid)
|
|
494
494
|
if downloads:
|
|
495
495
|
for task_download in downloads.get("downloads", []):
|
|
496
|
-
print("putting in queue: %s" % task_download)
|
|
497
496
|
self.pending_queue.put(task_download, block=True)
|
|
498
497
|
|
|
499
498
|
@common.dec_catch_exception(raise_=True)
|
|
@@ -1230,9 +1229,14 @@ def run_downloader(args):
|
|
|
1230
1229
|
|
|
1231
1230
|
# Set up logging
|
|
1232
1231
|
log_level_name = args.get("log_level")
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1232
|
+
|
|
1233
|
+
loggeria.setup_conductor_logging(
|
|
1234
|
+
logger_level=loggeria.LEVEL_MAP.get(log_level_name),
|
|
1235
|
+
log_dirpath=args.get("log_dir"),
|
|
1236
|
+
log_filename="conductor_downloader.log",
|
|
1237
|
+
console_formatter=LOG_FORMATTER,
|
|
1238
|
+
file_formatter=LOG_FORMATTER,
|
|
1239
|
+
)
|
|
1236
1240
|
|
|
1237
1241
|
api_client.ApiClient.register_client(client_name = Downloader.CLIENT_NAME, client_version=ciocore.__version__)
|
|
1238
1242
|
|
|
@@ -1256,14 +1260,13 @@ def run_downloader(args):
|
|
|
1256
1260
|
|
|
1257
1261
|
|
|
1258
1262
|
def set_logging(level=None, log_dirpath=None):
|
|
1259
|
-
|
|
1260
|
-
if log_dirpath:
|
|
1261
|
-
log_filepath = os.path.join(log_dirpath, "conductor_dl_log")
|
|
1263
|
+
|
|
1262
1264
|
loggeria.setup_conductor_logging(
|
|
1263
1265
|
logger_level=level,
|
|
1266
|
+
log_dirpath=log_dirpath,
|
|
1267
|
+
log_filename="conductor_downloader.log",
|
|
1264
1268
|
console_formatter=LOG_FORMATTER,
|
|
1265
1269
|
file_formatter=LOG_FORMATTER,
|
|
1266
|
-
log_filepath=log_filepath,
|
|
1267
1270
|
)
|
|
1268
1271
|
|
|
1269
1272
|
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import colorlog
|
|
3
|
+
import sys
|
|
4
|
+
LOGGER_NAME = "cw.download"
|
|
5
|
+
|
|
6
|
+
LOG_COLORS ={
|
|
7
|
+
'DEBUG': 'purple',
|
|
8
|
+
'INFO': 'blue',
|
|
9
|
+
'WARNING': 'yellow',
|
|
10
|
+
'ERROR': 'red',
|
|
11
|
+
'CRITICAL': 'red,bg_white',
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
DEBUG_FORMATTER = colorlog.ColoredFormatter(
|
|
15
|
+
"%(log_color)s%(asctime)s %(name)s %(levelname)8s %(filename)s:%(lineno)d %(threadName)s> %(message)s",
|
|
16
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
17
|
+
log_colors=LOG_COLORS,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
INFO_FORMATTER = colorlog.ColoredFormatter(
|
|
21
|
+
'%(log_color)s%(levelname)s:%(name)s> %(message)s',
|
|
22
|
+
log_colors=LOG_COLORS,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
LEVEL_MAP = {
|
|
26
|
+
"DEBUG": logging.DEBUG,
|
|
27
|
+
"INFO": logging.INFO,
|
|
28
|
+
"WARNING": logging.WARNING,
|
|
29
|
+
"ERROR": logging.ERROR,
|
|
30
|
+
"CRITICAL": logging.CRITICAL,
|
|
31
|
+
"NOTSET": logging.NOTSET,
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
class GracefulLogger(logging.Logger):
|
|
35
|
+
def setLevel(self, level):
|
|
36
|
+
super().setLevel(level)
|
|
37
|
+
|
|
38
|
+
# Define formatters based on level
|
|
39
|
+
formatter = DEBUG_FORMATTER if level == logging.DEBUG else INFO_FORMATTER
|
|
40
|
+
for handler in self.handlers:
|
|
41
|
+
handler.setFormatter(formatter)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class GracefulStreamHandler(colorlog.StreamHandler):
|
|
45
|
+
"""
|
|
46
|
+
A custom StreamHandler that suppresses BrokenPipeError.
|
|
47
|
+
|
|
48
|
+
This handler extends the standard logging.StreamHandler to gracefully handle
|
|
49
|
+
BrokenPipeErrors that can occur when output streams are closed prematurely.
|
|
50
|
+
It overrides the emit method to catch and ignore BrokenPipeError, allowing
|
|
51
|
+
the program to continue without interruption.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def emit(self, record):
|
|
55
|
+
"""
|
|
56
|
+
Overrides the StreamHandler.emit method to gracefully handle BrokenPipeError.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
record (logging.LogRecord): The log record to be emitted.
|
|
60
|
+
"""
|
|
61
|
+
try:
|
|
62
|
+
super().emit(record)
|
|
63
|
+
except BrokenPipeError:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
logging.setLoggerClass(GracefulLogger)
|
|
67
|
+
logger = colorlog.getLogger(LOGGER_NAME)
|
|
68
|
+
logger.propagate = False
|
|
69
|
+
|
|
70
|
+
if not any(isinstance(handler, GracefulStreamHandler) for handler in logger.handlers):
|
|
71
|
+
stream_handler = GracefulStreamHandler(sys.stdout)
|
|
72
|
+
logger.addHandler(stream_handler)
|
|
73
|
+
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Logging Download runner
|
|
3
|
+
|
|
4
|
+
This module contains the LoggingDownloadRunner class.
|
|
5
|
+
|
|
6
|
+
The LoggingDownloadRunner is a derived class of DownloadRunnerBase.
|
|
7
|
+
|
|
8
|
+
It registers callbacks that are called when certain events occur during the download.
|
|
9
|
+
It uses these callbacks to display progress via the logging module.
|
|
10
|
+
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from ciocore.downloader.download_runner_base import DownloadRunnerBase
|
|
15
|
+
from ciocore.downloader.log import LOGGER_NAME
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LoggingDownloadRunner(DownloadRunnerBase):
|
|
21
|
+
CLIENT_NAME = "LoggingDownloadRunner"
|
|
22
|
+
|
|
23
|
+
def __init__(self, jobids=None, location=None, **kwargs):
|
|
24
|
+
|
|
25
|
+
super().__init__(jobids, location, **kwargs)
|
|
26
|
+
|
|
27
|
+
logger.debug("Assigning callbacks")
|
|
28
|
+
self.downloader.on("start", self.on_start)
|
|
29
|
+
self.downloader.on("start_task", self.on_start_task)
|
|
30
|
+
self.downloader.on("progress", self.on_progress)
|
|
31
|
+
self.downloader.on("file_done", self.on_file_done)
|
|
32
|
+
self.downloader.on("task_done", self.on_task_done)
|
|
33
|
+
self.downloader.on("done", self.on_done)
|
|
34
|
+
|
|
35
|
+
def on_start(self, evt):
|
|
36
|
+
logger.info("Starting download")
|
|
37
|
+
|
|
38
|
+
def on_start_task(self, evt):
|
|
39
|
+
logger.info("Starting task %s:%s", evt["job_id"], evt["task_id"])
|
|
40
|
+
|
|
41
|
+
def on_progress(self, evt):
|
|
42
|
+
percent = 0
|
|
43
|
+
if evt["size"] and evt["progress_bytes"]:
|
|
44
|
+
percent = round(evt["progress_bytes"] / evt["size"] * 100, 2)
|
|
45
|
+
logger.info("Progress: %s %.2f%%", evt["filepath"], percent)
|
|
46
|
+
|
|
47
|
+
def on_file_done(self, evt):
|
|
48
|
+
if evt["error"]:
|
|
49
|
+
logger.warning(
|
|
50
|
+
"File done with error: %s:%s:%s %s",
|
|
51
|
+
evt["job_id"],
|
|
52
|
+
evt["task_id"],
|
|
53
|
+
evt["filepath"],
|
|
54
|
+
evt["error"],
|
|
55
|
+
)
|
|
56
|
+
else:
|
|
57
|
+
logger.info(
|
|
58
|
+
"File done %s:%s:%s", evt["job_id"], evt["task_id"], evt["filepath"]
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def on_task_done(self, evt):
|
|
62
|
+
if evt["preexisting"]:
|
|
63
|
+
logger.info(
|
|
64
|
+
"Task already existed locally %s:%s", evt["job_id"], evt["task_id"]
|
|
65
|
+
)
|
|
66
|
+
else:
|
|
67
|
+
logger.info("Task done %s:%s", evt["job_id"], evt["task_id"])
|
|
68
|
+
|
|
69
|
+
def on_done(self, evt):
|
|
70
|
+
"""
|
|
71
|
+
When the job is done, check to see if any tasks were not completed.
|
|
72
|
+
"""
|
|
73
|
+
logger.info("Download finished")
|
|
74
|
+
empty = True
|
|
75
|
+
for job_id, task_id, task in evt["registry"].each():
|
|
76
|
+
if task["completed_files"] < task["filecount"]:
|
|
77
|
+
logger.warning(
|
|
78
|
+
"Task not fully downloaded %s:%s: %s/%s files.",
|
|
79
|
+
job_id,
|
|
80
|
+
task_id,
|
|
81
|
+
task["completed_files"],
|
|
82
|
+
task["filecount"],
|
|
83
|
+
)
|
|
84
|
+
empty = False
|
|
85
|
+
|
|
86
|
+
if empty:
|
|
87
|
+
logger.info("No failed tasks.")
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Perpetual Downloader
|
|
3
|
+
|
|
4
|
+
Not yet tested
|
|
5
|
+
"""
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import time
|
|
9
|
+
import sys
|
|
10
|
+
from ciocore.downloader.base_downloader import BaseDownloader
|
|
11
|
+
from ciocore.downloader.log import LOGGER_NAME
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
14
|
+
|
|
15
|
+
def spinning_cursor():
|
|
16
|
+
while True:
|
|
17
|
+
for cursor in '|/-\\':
|
|
18
|
+
yield cursor
|
|
19
|
+
|
|
20
|
+
class PerpetualDownloader(BaseDownloader):
|
|
21
|
+
CLIENT_NAME = "PerpetualDownloader"
|
|
22
|
+
POLL_INTERVAL = 15
|
|
23
|
+
URL = "/downloads/next"
|
|
24
|
+
spinner = spinning_cursor()
|
|
25
|
+
|
|
26
|
+
def __init__(self, location, *args, **kwargs):
|
|
27
|
+
"""Initialize the downloader."""
|
|
28
|
+
super().__init__(*args, **kwargs)
|
|
29
|
+
self.location = location
|
|
30
|
+
logger.debug("Initializing perpetual downloader")
|
|
31
|
+
|
|
32
|
+
def get_some_tasks(self, _):
|
|
33
|
+
"""Fetch the next batch of tasks from the server.
|
|
34
|
+
|
|
35
|
+
Always set the return locator to True to signal that we should keep running this function.
|
|
36
|
+
|
|
37
|
+
This function never throws an error. If something goes wrong, it just sets the task array to be empty.
|
|
38
|
+
|
|
39
|
+
If tasks array is empty for any reason (error, filter, no tasks ready, etc.), it waits for POLL_INTERVAL seconds before trying again.
|
|
40
|
+
"""
|
|
41
|
+
logger.debug("Fetching the next page of tasks")
|
|
42
|
+
params = {"count": self.page_size, "location": self.location}
|
|
43
|
+
tasks = []
|
|
44
|
+
try:
|
|
45
|
+
response, code = self.client.make_request(
|
|
46
|
+
self.URL, params=params, use_api_key=True
|
|
47
|
+
)
|
|
48
|
+
if code <= 201:
|
|
49
|
+
tasks = json.loads(response).get("data", [])
|
|
50
|
+
tasks = self.filter(tasks)
|
|
51
|
+
except Exception as exc:
|
|
52
|
+
logger.error("Error fetching download info from: %s : %s", self.URL, exc)
|
|
53
|
+
|
|
54
|
+
if not tasks:
|
|
55
|
+
for _ in range(self.POLL_INTERVAL):
|
|
56
|
+
spin_char = next(self.spinner)
|
|
57
|
+
line = f"Listening for files to download... ({spin_char})"
|
|
58
|
+
sys.stdout.write(line)
|
|
59
|
+
sys.stdout.flush()
|
|
60
|
+
sys.stdout.write('\b' * len(line))
|
|
61
|
+
time.sleep(1)
|
|
62
|
+
|
|
63
|
+
return tasks, True
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
|
|
3
|
+
import threading
|
|
4
|
+
import logging
|
|
5
|
+
from ciocore.downloader.log import LOGGER_NAME
|
|
6
|
+
|
|
7
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Registry(object):
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
self.data = {}
|
|
14
|
+
self.lock = threading.Lock()
|
|
15
|
+
|
|
16
|
+
def get_copy(self):
|
|
17
|
+
"""
|
|
18
|
+
Get a copy of the registry.
|
|
19
|
+
|
|
20
|
+
Use a lock to ensure the registry is not modified while we're copying it.
|
|
21
|
+
"""
|
|
22
|
+
with self.lock:
|
|
23
|
+
return copy.deepcopy(self.data)
|
|
24
|
+
|
|
25
|
+
def each(self):
|
|
26
|
+
"""
|
|
27
|
+
Iterate over all tasks in the registry.
|
|
28
|
+
|
|
29
|
+
Use a lock to ensure the registry is not modified while we're iterating over it.
|
|
30
|
+
"""
|
|
31
|
+
with self.lock:
|
|
32
|
+
for job_id, job in self.data.items():
|
|
33
|
+
for task_id, task in job.items():
|
|
34
|
+
yield job_id, task_id, task
|
|
35
|
+
|
|
36
|
+
def register_task(self, task_info):
|
|
37
|
+
"""
|
|
38
|
+
Register a task as active
|
|
39
|
+
|
|
40
|
+
The registry is accessed in a thread-safe manner using a lock.
|
|
41
|
+
"""
|
|
42
|
+
job_id = task_info["job_id"]
|
|
43
|
+
task_id = task_info["task_id"]
|
|
44
|
+
with self.lock:
|
|
45
|
+
if job_id not in self.data:
|
|
46
|
+
self.data[job_id] = {}
|
|
47
|
+
|
|
48
|
+
if task_id in self.data[job_id]:
|
|
49
|
+
logger.debug(
|
|
50
|
+
"Task %s for job %s is already in registry. Skipping.",
|
|
51
|
+
task_id,
|
|
52
|
+
job_id,
|
|
53
|
+
)
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
self.data[job_id][task_id] = {
|
|
57
|
+
"download_id": task_info["download_id"],
|
|
58
|
+
"filecount": len(task_info["files"]),
|
|
59
|
+
"completed_files": 0,
|
|
60
|
+
"preexisting_files": 0,
|
|
61
|
+
"size": task_info["size"],
|
|
62
|
+
}
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
def update_task(self, file_done_event):
|
|
66
|
+
"""
|
|
67
|
+
Update the registry each time a file is done.
|
|
68
|
+
|
|
69
|
+
Access the registry in a thread-safe manner using a lock.
|
|
70
|
+
|
|
71
|
+
Steps:
|
|
72
|
+
1. Get the task from the registry
|
|
73
|
+
2. Increment the completed_files count
|
|
74
|
+
3. If the file was preexisting, increment the preexisting_files count too
|
|
75
|
+
4. If the task is now complete:
|
|
76
|
+
c. Remove the task from the registry
|
|
77
|
+
5. Return the task copy so that the event_dispatcher can let handlers know the task is done.
|
|
78
|
+
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
job_id = file_done_event["job_id"]
|
|
82
|
+
task_id = file_done_event["task_id"]
|
|
83
|
+
with self.lock:
|
|
84
|
+
task = self.data.get(job_id, {}).get(task_id)
|
|
85
|
+
if not task:
|
|
86
|
+
return None
|
|
87
|
+
task["completed_files"] += 1
|
|
88
|
+
if file_done_event["preexisting"]:
|
|
89
|
+
task["preexisting_files"] += 1
|
|
90
|
+
|
|
91
|
+
task_copy = task.copy()
|
|
92
|
+
|
|
93
|
+
# Only really need ==, but I'm paranoid
|
|
94
|
+
if task["completed_files"] >= task["filecount"]:
|
|
95
|
+
del self.data[job_id][task_id]
|
|
96
|
+
|
|
97
|
+
return task_copy
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module contains the Reporter class.
|
|
3
|
+
|
|
4
|
+
It registers callbacks with the with the provided downloader instance that allow it to report "downloaded" or "pending" status back to the server.
|
|
5
|
+
|
|
6
|
+
It is set up in the download_runner_base module. Classes that derive from DownloadRunnerBase, such as LoggingDownloadRunner, do not need to be concerned with the details of the Reporter class.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
12
|
+
|
|
13
|
+
from ciocore import api_client
|
|
14
|
+
from ciocore.downloader.log import LOGGER_NAME
|
|
15
|
+
|
|
16
|
+
STATUS_ENDPOINT = "/downloads/status"
|
|
17
|
+
STATUS_DOWNLOADED = "downloaded"
|
|
18
|
+
STATUS_PENDING = "pending"
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(LOGGER_NAME)
|
|
21
|
+
|
|
22
|
+
class Reporter(object):
|
|
23
|
+
|
|
24
|
+
def __init__(self, downloader, client=api_client.ApiClient(), num_threads=1):
|
|
25
|
+
|
|
26
|
+
self.downloader = downloader
|
|
27
|
+
|
|
28
|
+
self.num_threads = num_threads
|
|
29
|
+
self.client = client
|
|
30
|
+
self.executor = None
|
|
31
|
+
|
|
32
|
+
logger.debug("Assigning reporter callbacks")
|
|
33
|
+
self.downloader.on("task_done", self.on_task_done)
|
|
34
|
+
self.downloader.on("done", self.on_done)
|
|
35
|
+
|
|
36
|
+
def __enter__(self):
|
|
37
|
+
self.executor = ThreadPoolExecutor(max_workers=self.num_threads)
|
|
38
|
+
return self # Optionally return this reporter
|
|
39
|
+
|
|
40
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
41
|
+
self.executor.shutdown()
|
|
42
|
+
# Handle exceptions, from inside the with block
|
|
43
|
+
if exc_type:
|
|
44
|
+
logger.exception("Error running downloader: %s", exc_value)
|
|
45
|
+
# return False to propagate the exception
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def report_task_status(
|
|
51
|
+
self, download_id, status=STATUS_DOWNLOADED, bytes_in_task=0
|
|
52
|
+
):
|
|
53
|
+
"""
|
|
54
|
+
Make a request to the server to report the status of a task.
|
|
55
|
+
|
|
56
|
+
If the user interrupted the download, then we set the task status to pending to be safe.
|
|
57
|
+
"""
|
|
58
|
+
if self.downloader.interrupt_flag.is_set():
|
|
59
|
+
status = STATUS_PENDING
|
|
60
|
+
|
|
61
|
+
bytes_to_download = 0 if status == STATUS_DOWNLOADED else bytes_in_task
|
|
62
|
+
|
|
63
|
+
data = {
|
|
64
|
+
"download_id": download_id,
|
|
65
|
+
"status": status,
|
|
66
|
+
"bytes_downloaded": 0,
|
|
67
|
+
"bytes_to_download": bytes_to_download,
|
|
68
|
+
}
|
|
69
|
+
json_data = json.dumps(data)
|
|
70
|
+
try:
|
|
71
|
+
self.client.make_request(STATUS_ENDPOINT, data=json_data, use_api_key=True)
|
|
72
|
+
except Exception as exc:
|
|
73
|
+
data["error"] = str(exc)
|
|
74
|
+
return data
|
|
75
|
+
|
|
76
|
+
def on_task_done(self, evt):
|
|
77
|
+
"""
|
|
78
|
+
Callback to run on a task-done event. Report status back to the server.
|
|
79
|
+
|
|
80
|
+
Note, the task may consist entirely of preexisting files. Nevertheless, we report the task as downloaded.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
future = self.executor.submit(
|
|
84
|
+
self.report_task_status,
|
|
85
|
+
evt["download_id"],
|
|
86
|
+
status=STATUS_DOWNLOADED,
|
|
87
|
+
bytes_in_task=evt["size"],
|
|
88
|
+
)
|
|
89
|
+
future.add_done_callback(
|
|
90
|
+
lambda f, job_id=evt["job_id"], task_id=evt["task_id"]: log_report_result(
|
|
91
|
+
f.result(), job_id, task_id
|
|
92
|
+
)
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def on_done(self, evt):
|
|
96
|
+
"""
|
|
97
|
+
When the job is done, check to see if any tasks were not completed.
|
|
98
|
+
|
|
99
|
+
If we find any, then report them back to the server as pending.
|
|
100
|
+
"""
|
|
101
|
+
logger.debug("Download done. Reporting remaining task statuses to server")
|
|
102
|
+
for job_id, task_id, task in evt["registry"].each():
|
|
103
|
+
if task["completed_files"] < task["filecount"]:
|
|
104
|
+
|
|
105
|
+
future = self.executor.submit(
|
|
106
|
+
self.report_task_status,
|
|
107
|
+
task["download_id"],
|
|
108
|
+
status=STATUS_PENDING,
|
|
109
|
+
bytes_in_task=task["size"],
|
|
110
|
+
)
|
|
111
|
+
future.add_done_callback(
|
|
112
|
+
lambda f, job_id=job_id, task_id=task_id: log_report_result(
|
|
113
|
+
f.result(), job_id, task_id
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def log_report_result(report_result, job_id, task_id):
|
|
119
|
+
"""Log the report result."""
|
|
120
|
+
if report_result.get("error"):
|
|
121
|
+
logger.error(
|
|
122
|
+
"Error reporting task to server: %s:%s (%s) %s",
|
|
123
|
+
job_id,
|
|
124
|
+
task_id,
|
|
125
|
+
report_result["download_id"],
|
|
126
|
+
report_result["error"],
|
|
127
|
+
)
|
|
128
|
+
return
|
|
129
|
+
logger.debug(
|
|
130
|
+
"Reported task to server: %s:%s (%s) %s",
|
|
131
|
+
job_id,
|
|
132
|
+
task_id,
|
|
133
|
+
report_result["download_id"],
|
|
134
|
+
report_result["status"],
|
|
135
|
+
)
|
ciocore/exceptions.py
CHANGED
|
@@ -49,13 +49,19 @@ class InvalidPathException(Exception):
|
|
|
49
49
|
pass
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
class
|
|
52
|
+
class UploadError(Exception):
|
|
53
|
+
"""
|
|
54
|
+
An upload failed
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class UploaderMissingFile(UploadError):
|
|
53
59
|
"""
|
|
54
60
|
A file is missing
|
|
55
61
|
"""
|
|
56
62
|
|
|
57
63
|
|
|
58
|
-
class UploaderFileModified(
|
|
64
|
+
class UploaderFileModified(UploadError):
|
|
59
65
|
"""
|
|
60
66
|
Something wrong with a local file
|
|
61
67
|
"""
|