ciocore 7.0.2b4__py2.py3-none-any.whl → 8.0.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ciocore might be problematic. Click here for more details.
- ciocore/VERSION +1 -1
- ciocore/__init__.py +23 -1
- ciocore/api_client.py +422 -156
- ciocore/cli.py +503 -0
- ciocore/common.py +10 -1
- ciocore/config.py +86 -54
- ciocore/data.py +23 -70
- ciocore/docsite/404.html +723 -0
- ciocore/docsite/apidoc/api_client/index.html +3203 -0
- ciocore/docsite/apidoc/apidoc/index.html +868 -0
- ciocore/docsite/apidoc/config/index.html +1591 -0
- ciocore/docsite/apidoc/data/index.html +1480 -0
- ciocore/docsite/apidoc/hardware_set/index.html +2367 -0
- ciocore/docsite/apidoc/package_environment/index.html +1450 -0
- ciocore/docsite/apidoc/package_tree/index.html +2310 -0
- ciocore/docsite/assets/_mkdocstrings.css +16 -0
- ciocore/docsite/assets/images/favicon.png +0 -0
- ciocore/docsite/assets/javascripts/bundle.4e31edb1.min.js +29 -0
- ciocore/docsite/assets/javascripts/bundle.4e31edb1.min.js.map +8 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ar.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.da.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.de.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.du.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.es.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.fi.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.fr.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.hi.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.hu.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.hy.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.it.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ja.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.jp.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.kn.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ko.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.multi.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.nl.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.no.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.pt.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ro.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ru.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.sa.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.stemmer.support.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.sv.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.ta.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.te.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.th.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.tr.min.js +18 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.vi.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/min/lunr.zh.min.js +1 -0
- ciocore/docsite/assets/javascripts/lunr/tinyseg.js +206 -0
- ciocore/docsite/assets/javascripts/lunr/wordcut.js +6708 -0
- ciocore/docsite/assets/javascripts/workers/search.dfff1995.min.js +42 -0
- ciocore/docsite/assets/javascripts/workers/search.dfff1995.min.js.map +8 -0
- ciocore/docsite/assets/stylesheets/main.83068744.min.css +1 -0
- ciocore/docsite/assets/stylesheets/main.83068744.min.css.map +1 -0
- ciocore/docsite/assets/stylesheets/palette.ecc896b0.min.css +1 -0
- ciocore/docsite/assets/stylesheets/palette.ecc896b0.min.css.map +1 -0
- ciocore/docsite/cmdline/docs/index.html +834 -0
- ciocore/docsite/cmdline/downloader/index.html +897 -0
- ciocore/docsite/cmdline/packages/index.html +841 -0
- ciocore/docsite/cmdline/uploader/index.html +950 -0
- ciocore/docsite/how-to-guides/index.html +831 -0
- ciocore/docsite/index.html +853 -0
- ciocore/docsite/logo.png +0 -0
- ciocore/docsite/objects.inv +0 -0
- ciocore/docsite/search/search_index.json +1 -0
- ciocore/docsite/sitemap.xml +3 -0
- ciocore/docsite/sitemap.xml.gz +0 -0
- ciocore/docsite/stylesheets/extra.css +26 -0
- ciocore/docsite/stylesheets/tables.css +167 -0
- ciocore/downloader/__init__.py +0 -0
- ciocore/downloader/base_downloader.py +644 -0
- ciocore/downloader/download_runner_base.py +47 -0
- ciocore/downloader/job_downloader.py +119 -0
- ciocore/{downloader.py → downloader/legacy_downloader.py} +0 -1
- ciocore/downloader/log.py +73 -0
- ciocore/downloader/logging_download_runner.py +87 -0
- ciocore/downloader/perpetual_downloader.py +63 -0
- ciocore/downloader/registry.py +97 -0
- ciocore/downloader/reporter.py +135 -0
- ciocore/file_utils.py +3 -3
- ciocore/hardware_set.py +0 -4
- ciocore/package_environment.py +67 -75
- ciocore/package_query.py +171 -0
- ciocore/package_tree.py +300 -377
- ciocore/retry.py +0 -0
- ciocore/uploader/_uploader.py +205 -152
- {ciocore-7.0.2b4.dist-info → ciocore-8.0.0.dist-info}/METADATA +33 -12
- ciocore-8.0.0.dist-info/RECORD +127 -0
- {ciocore-7.0.2b4.dist-info → ciocore-8.0.0.dist-info}/WHEEL +1 -1
- ciocore-8.0.0.dist-info/entry_points.txt +2 -0
- tests/extra_env_fixtures.py +57 -0
- tests/instance_type_fixtures.py +42 -8
- tests/project_fixtures.py +8 -0
- tests/test_api_client.py +125 -4
- tests/test_base_downloader.py +104 -0
- tests/test_cli.py +163 -0
- tests/test_common.py +8 -8
- tests/test_config.py +23 -9
- tests/test_data.py +148 -160
- tests/test_downloader.py +118 -0
- tests/test_hardware_set.py +70 -20
- tests/test_job_downloader.py +213 -0
- tests/test_submit.py +9 -2
- ciocore/__about__.py +0 -10
- ciocore/cli/__init__.py +0 -3
- ciocore/cli/conductor.py +0 -210
- ciocore-7.0.2b4.data/scripts/conductor +0 -19
- ciocore-7.0.2b4.data/scripts/conductor.bat +0 -13
- ciocore-7.0.2b4.dist-info/RECORD +0 -51
- tests/mocks/api_client_mock.py +0 -31
- {ciocore-7.0.2b4.dist-info → ciocore-8.0.0.dist-info}/top_level.txt +0 -0
ciocore/uploader/_uploader.py
CHANGED
|
@@ -15,8 +15,15 @@ except ImportError:
|
|
|
15
15
|
|
|
16
16
|
import ciocore
|
|
17
17
|
from ciocore import config
|
|
18
|
-
from ciocore import
|
|
19
|
-
|
|
18
|
+
from ciocore import (
|
|
19
|
+
api_client,
|
|
20
|
+
client_db,
|
|
21
|
+
common,
|
|
22
|
+
file_utils,
|
|
23
|
+
loggeria,
|
|
24
|
+
worker,
|
|
25
|
+
exceptions,
|
|
26
|
+
)
|
|
20
27
|
|
|
21
28
|
from .upload_stats import UploadStats
|
|
22
29
|
|
|
@@ -46,19 +53,23 @@ class MD5Worker(worker.ThreadWorker):
|
|
|
46
53
|
|
|
47
54
|
# if a submission time md5 was provided then check against it
|
|
48
55
|
if submission_time_md5:
|
|
49
|
-
logger.info(
|
|
56
|
+
logger.info(
|
|
57
|
+
"Enforcing md5 match: %s for: %s", submission_time_md5, filename
|
|
58
|
+
)
|
|
50
59
|
if current_md5 != submission_time_md5:
|
|
51
60
|
message = "MD5 of %s has changed since submission\n" % filename
|
|
52
61
|
message += "submitted md5: %s\n" % submission_time_md5
|
|
53
62
|
message += "current md5: %s\n" % current_md5
|
|
54
|
-
message +=
|
|
63
|
+
message += (
|
|
64
|
+
"This is likely due to the file being written to after the user"
|
|
65
|
+
)
|
|
55
66
|
message += " submitted the job but before it got uploaded to conductor"
|
|
56
67
|
logger.error(message)
|
|
57
68
|
raise Exception(message)
|
|
58
69
|
self.metric_store.set_dict("file_md5s", filename, current_md5)
|
|
59
70
|
self.metric_store.set_dict("file_md5s_cache_hit", filename, cache_hit)
|
|
60
71
|
size_bytes = os.path.getsize(filename)
|
|
61
|
-
|
|
72
|
+
|
|
62
73
|
return (filename, current_md5, size_bytes)
|
|
63
74
|
|
|
64
75
|
def get_md5(self, filepath):
|
|
@@ -68,9 +79,9 @@ class MD5Worker(worker.ThreadWorker):
|
|
|
68
79
|
Use the sqlite db cache to retrive this (if the cache is valid), otherwise generate the md5
|
|
69
80
|
from scratch
|
|
70
81
|
"""
|
|
71
|
-
|
|
82
|
+
|
|
72
83
|
cache_hit = True
|
|
73
|
-
|
|
84
|
+
|
|
74
85
|
# If md5 caching is disable, then just generate the md5 from scratch
|
|
75
86
|
if not self.md5_caching:
|
|
76
87
|
cache_hit = False
|
|
@@ -96,7 +107,9 @@ class MD5Worker(worker.ThreadWorker):
|
|
|
96
107
|
"""
|
|
97
108
|
Store the given file_info into the database
|
|
98
109
|
"""
|
|
99
|
-
client_db.FilesDB.add_file(
|
|
110
|
+
client_db.FilesDB.add_file(
|
|
111
|
+
file_info, db_filepath=self.database_filepath, thread_safe=True
|
|
112
|
+
)
|
|
100
113
|
|
|
101
114
|
|
|
102
115
|
class MD5OutputWorker(worker.ThreadWorker):
|
|
@@ -127,46 +140,45 @@ class MD5OutputWorker(worker.ThreadWorker):
|
|
|
127
140
|
|
|
128
141
|
@common.dec_catch_exception(raise_=True)
|
|
129
142
|
def target(self, thread_int):
|
|
130
|
-
|
|
131
143
|
while not common.SIGINT_EXIT:
|
|
132
|
-
|
|
133
144
|
job = None
|
|
134
145
|
|
|
135
146
|
try:
|
|
136
147
|
logger.debug("Worker querying for job")
|
|
137
148
|
job = self.in_queue.get(block=True, timeout=self.wait_time)
|
|
138
149
|
logger.debug("Got job")
|
|
139
|
-
queue_size = self.in_queue.qsize()
|
|
140
|
-
|
|
150
|
+
queue_size = self.in_queue.qsize()
|
|
151
|
+
|
|
141
152
|
except:
|
|
142
|
-
|
|
143
153
|
logger.debug("No jobs available")
|
|
144
|
-
|
|
154
|
+
|
|
145
155
|
if self._job_counter.value >= self.task_count:
|
|
146
|
-
|
|
147
156
|
if self.batch:
|
|
148
157
|
self.ship_batch()
|
|
149
|
-
|
|
158
|
+
|
|
150
159
|
logger.debug("Worker has completed all of its tasks (%s)", job)
|
|
151
160
|
self.thread_complete_counter.decrement()
|
|
152
161
|
break
|
|
153
|
-
|
|
162
|
+
|
|
154
163
|
elif self._job_counter.value == 0:
|
|
155
164
|
logger.debug("Worker waiting for first job")
|
|
156
|
-
|
|
165
|
+
|
|
157
166
|
time.sleep(1)
|
|
158
167
|
continue
|
|
159
|
-
|
|
160
|
-
logger.debug("Worker got job %
|
|
168
|
+
|
|
169
|
+
logger.debug("Worker got job %s", job)
|
|
161
170
|
self._job_counter.increment()
|
|
162
|
-
logger.debug(
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
171
|
+
logger.debug(
|
|
172
|
+
"Processing Job '%s' #%s on %s. %s tasks remaining in queue",
|
|
173
|
+
job,
|
|
174
|
+
self._job_counter.value,
|
|
175
|
+
self,
|
|
176
|
+
queue_size,
|
|
177
|
+
)
|
|
166
178
|
|
|
167
179
|
try:
|
|
168
180
|
self.check_for_poison_pill(job)
|
|
169
|
-
|
|
181
|
+
|
|
170
182
|
# add file info to the batch list
|
|
171
183
|
self.batch.append(
|
|
172
184
|
{
|
|
@@ -175,24 +187,25 @@ class MD5OutputWorker(worker.ThreadWorker):
|
|
|
175
187
|
"size": job[2],
|
|
176
188
|
}
|
|
177
189
|
)
|
|
178
|
-
|
|
190
|
+
|
|
179
191
|
# if the batch is self.batch_size, ship it
|
|
180
192
|
if len(self.batch) == self.batch_size:
|
|
181
193
|
self.ship_batch()
|
|
182
|
-
|
|
194
|
+
|
|
183
195
|
# mark this task as done
|
|
184
196
|
self.mark_done()
|
|
185
|
-
|
|
197
|
+
|
|
186
198
|
except Exception as exception:
|
|
187
199
|
logger.exception('CAUGHT EXCEPTION on job "%s" [%s]:\n', job, self)
|
|
188
200
|
|
|
189
201
|
# if there is no error queue to dump data into, then simply raise the exception
|
|
190
202
|
if self.error_queue is None:
|
|
191
203
|
raise
|
|
192
|
-
|
|
204
|
+
|
|
193
205
|
self.error_queue.put(sys.exc_info())
|
|
194
206
|
# exit the while loop to stop the thread
|
|
195
|
-
break
|
|
207
|
+
break
|
|
208
|
+
|
|
196
209
|
|
|
197
210
|
class HttpBatchWorker(worker.ThreadWorker):
|
|
198
211
|
"""
|
|
@@ -265,8 +278,10 @@ class HttpBatchWorker(worker.ThreadWorker):
|
|
|
265
278
|
return url_list
|
|
266
279
|
if response_code == 204:
|
|
267
280
|
return None
|
|
268
|
-
raise Exception(
|
|
269
|
-
|
|
281
|
+
raise Exception(
|
|
282
|
+
"%s Failed request to: %s\n%s" % (response_code, uri_path, response_str)
|
|
283
|
+
)
|
|
284
|
+
|
|
270
285
|
def do_work(self, job, thread_int):
|
|
271
286
|
logger.debug("getting upload urls for %s", job)
|
|
272
287
|
result = self.make_request(job)
|
|
@@ -274,19 +289,19 @@ class HttpBatchWorker(worker.ThreadWorker):
|
|
|
274
289
|
# Determine which files have already been uploaded by looking at the difference between
|
|
275
290
|
# the file paths in job and the file paths returned by the request. Only files that need
|
|
276
291
|
# to be uploaded are returned by the request
|
|
277
|
-
incoming_file_paths = set([
|
|
292
|
+
incoming_file_paths = set([item["path"] for item in job])
|
|
278
293
|
|
|
279
294
|
if result:
|
|
280
|
-
|
|
281
295
|
for item_type in result.values():
|
|
282
296
|
for item in item_type:
|
|
283
|
-
incoming_file_paths.remove(item[
|
|
297
|
+
incoming_file_paths.remove(item["filePath"])
|
|
284
298
|
|
|
285
299
|
for path in incoming_file_paths:
|
|
286
300
|
self.metric_store.increment("already_uploaded", True, path)
|
|
287
|
-
|
|
301
|
+
|
|
288
302
|
return result
|
|
289
303
|
|
|
304
|
+
|
|
290
305
|
"""
|
|
291
306
|
This worker subscribes to a queue of list of file uploads (multipart and singlepart).
|
|
292
307
|
|
|
@@ -319,24 +334,24 @@ class FileStatWorker(worker.ThreadWorker):
|
|
|
319
334
|
path = singlepart_upload["filePath"]
|
|
320
335
|
file_size = singlepart_upload["fileSize"]
|
|
321
336
|
upload_url = singlepart_upload["preSignedURL"]
|
|
322
|
-
|
|
337
|
+
|
|
323
338
|
self.metric_store.increment("bytes_to_upload", file_size, path)
|
|
324
339
|
self.metric_store.increment("num_files_to_upload")
|
|
325
340
|
logger.debug("Singlepart, adding task %s", path)
|
|
326
|
-
|
|
341
|
+
|
|
327
342
|
self.put_job((path, file_size, upload_url, SINGLEPART))
|
|
328
|
-
|
|
343
|
+
|
|
329
344
|
# iterate through multipart
|
|
330
345
|
for multipart_upload in job.get("multiPartURLs", []):
|
|
331
346
|
path = multipart_upload["filePath"]
|
|
332
347
|
file_size = multipart_upload["fileSize"]
|
|
333
|
-
|
|
348
|
+
|
|
334
349
|
self.metric_store.increment("bytes_to_upload", file_size, path)
|
|
335
350
|
self.metric_store.increment("num_files_to_upload")
|
|
336
351
|
logger.debug("Multipart, adding task %s", path)
|
|
337
352
|
self.put_job((path, file_size, multipart_upload, MULTIPART))
|
|
338
353
|
|
|
339
|
-
# make sure we return None, so no message is automatically added to the out_queue
|
|
354
|
+
# make sure we return None, so no message is automatically added to the out_queue
|
|
340
355
|
return None
|
|
341
356
|
|
|
342
357
|
|
|
@@ -367,10 +382,9 @@ class UploadWorker(worker.ThreadWorker):
|
|
|
367
382
|
self.metric_store.increment("bytes_uploaded", len(data), filename)
|
|
368
383
|
|
|
369
384
|
def do_work(self, job, thread_int):
|
|
370
|
-
|
|
371
385
|
if job:
|
|
372
386
|
kms_key_name = None
|
|
373
|
-
|
|
387
|
+
|
|
374
388
|
try:
|
|
375
389
|
filename = job[0]
|
|
376
390
|
file_size = job[1]
|
|
@@ -380,37 +394,44 @@ class UploadWorker(worker.ThreadWorker):
|
|
|
380
394
|
except Exception:
|
|
381
395
|
logger.error("Issue with job (%s): %s", len(job), job)
|
|
382
396
|
raise
|
|
383
|
-
|
|
397
|
+
|
|
384
398
|
if len(job) > 4:
|
|
385
399
|
kms_key_name = job[4]
|
|
386
|
-
|
|
400
|
+
|
|
387
401
|
md5 = self.metric_store.get_dict("file_md5s", filename)
|
|
388
|
-
|
|
402
|
+
|
|
389
403
|
try:
|
|
390
404
|
if upload_type == SINGLEPART:
|
|
391
|
-
return self.do_singlepart_upload(
|
|
405
|
+
return self.do_singlepart_upload(
|
|
406
|
+
upload, filename, file_size, md5, kms_key_name
|
|
407
|
+
)
|
|
392
408
|
elif upload_type == MULTIPART:
|
|
393
409
|
return self.do_multipart_upload(upload, filename, md5)
|
|
394
|
-
|
|
395
|
-
raise Exception(
|
|
396
|
-
|
|
410
|
+
|
|
411
|
+
raise Exception(
|
|
412
|
+
"upload_type is '%s' expected %s or %s"
|
|
413
|
+
% (upload_type, SINGLEPART, MULTIPART)
|
|
414
|
+
)
|
|
415
|
+
|
|
397
416
|
except Exception as err_msg:
|
|
398
|
-
|
|
399
417
|
real_md5 = common.get_base64_md5(filename)
|
|
400
418
|
|
|
401
419
|
if isinstance(err_msg, requests.exceptions.HTTPError):
|
|
402
|
-
error_message = "Upload of {} failed with a response code {} ({}) (expected '{}', got '{}')"
|
|
403
|
-
|
|
420
|
+
error_message = f"Upload of {filename} failed with a response code {err_msg.response.status_code} ({err_msg.response.reason}) (expected '{md5}', got '{real_md5}')"
|
|
404
421
|
else:
|
|
405
|
-
error_message =
|
|
406
|
-
|
|
422
|
+
error_message = (
|
|
423
|
+
f"Upload of {filename} failed. (expected '{md5}', got '{real_md5}') {str(err_msg)}"
|
|
424
|
+
)
|
|
425
|
+
|
|
407
426
|
logger.error(error_message)
|
|
408
427
|
raise exceptions.UploadError(error_message)
|
|
409
|
-
|
|
428
|
+
|
|
410
429
|
return worker.EMPTY_JOB
|
|
411
430
|
|
|
412
431
|
@common.DecRetry(retry_exceptions=api_client.CONNECTION_EXCEPTIONS, tries=5)
|
|
413
|
-
def do_singlepart_upload(
|
|
432
|
+
def do_singlepart_upload(
|
|
433
|
+
self, upload_url, filename, file_size, md5, kms_key_name=None
|
|
434
|
+
):
|
|
414
435
|
"""
|
|
415
436
|
Note that for GCS we don't rely on the make_request's own retry mechanism because we need to
|
|
416
437
|
recreate the chunked_reader generator before retrying the request. Instead, we wrap this
|
|
@@ -450,10 +471,8 @@ class UploadWorker(worker.ThreadWorker):
|
|
|
450
471
|
|
|
451
472
|
return response
|
|
452
473
|
else:
|
|
453
|
-
headers = {"Content-MD5": md5,
|
|
454
|
-
|
|
455
|
-
}
|
|
456
|
-
|
|
474
|
+
headers = {"Content-MD5": md5, "Content-Type": "application/octet-stream"}
|
|
475
|
+
|
|
457
476
|
if kms_key_name:
|
|
458
477
|
headers["x-goog-encryption-kms-key-name"] = kms_key_name
|
|
459
478
|
|
|
@@ -543,30 +562,34 @@ class UploadWorker(worker.ThreadWorker):
|
|
|
543
562
|
response.close()
|
|
544
563
|
|
|
545
564
|
return response.headers
|
|
546
|
-
|
|
565
|
+
|
|
547
566
|
def is_complete(self):
|
|
548
567
|
# Get the number of files already uploaded as they are not passed to the Upload
|
|
549
568
|
# worker
|
|
550
569
|
file_store = self.metric_store.get("files")
|
|
551
|
-
|
|
570
|
+
|
|
552
571
|
if isinstance(file_store, dict):
|
|
553
|
-
already_completed_uploads = len(
|
|
572
|
+
already_completed_uploads = len(
|
|
573
|
+
[x for x in file_store.values() if x["already_uploaded"]]
|
|
574
|
+
)
|
|
554
575
|
queue_size = self.out_queue.qsize()
|
|
555
|
-
logger.debug(
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
576
|
+
logger.debug(
|
|
577
|
+
"Is complete? out_queue_size=%s, completed_uploads=%s, task_count=%s",
|
|
578
|
+
queue_size,
|
|
579
|
+
already_completed_uploads,
|
|
580
|
+
self.task_count,
|
|
581
|
+
)
|
|
582
|
+
|
|
559
583
|
return (queue_size + already_completed_uploads) >= self.task_count
|
|
560
|
-
|
|
584
|
+
|
|
561
585
|
else:
|
|
562
586
|
logger.debug("Is complete?: files not initialized yet")
|
|
563
587
|
return False
|
|
564
588
|
|
|
565
589
|
|
|
566
590
|
class Uploader(object):
|
|
567
|
-
|
|
568
591
|
sleep_time = 10
|
|
569
|
-
|
|
592
|
+
|
|
570
593
|
CLIENT_NAME = "Uploader"
|
|
571
594
|
|
|
572
595
|
def __init__(self, args=None):
|
|
@@ -581,24 +604,22 @@ class Uploader(object):
|
|
|
581
604
|
self.cancel = False
|
|
582
605
|
self.error_messages = []
|
|
583
606
|
self.num_files_to_process = 0
|
|
584
|
-
|
|
607
|
+
|
|
585
608
|
self.report_status_thread = None
|
|
586
609
|
self.monitor_status_thread = None
|
|
587
|
-
|
|
610
|
+
|
|
588
611
|
def emit_progress(self, upload_stats):
|
|
589
|
-
|
|
590
|
-
if self.progress_callback:
|
|
612
|
+
if self.progress_callback:
|
|
591
613
|
self.progress_callback(upload_stats)
|
|
592
614
|
|
|
593
615
|
def prepare_workers(self):
|
|
594
616
|
logger.debug("preparing workers...")
|
|
595
|
-
|
|
617
|
+
|
|
596
618
|
if isinstance(threading.current_thread(), threading._MainThread):
|
|
597
619
|
common.register_sigint_signal_handler()
|
|
598
620
|
self.manager = None
|
|
599
621
|
|
|
600
622
|
def create_manager(self, project=None):
|
|
601
|
-
|
|
602
623
|
job_description = [
|
|
603
624
|
(
|
|
604
625
|
MD5Worker,
|
|
@@ -609,20 +630,14 @@ class Uploader(object):
|
|
|
609
630
|
"md5_caching": self.args["md5_caching"],
|
|
610
631
|
},
|
|
611
632
|
),
|
|
612
|
-
(
|
|
613
|
-
MD5OutputWorker, [], {"thread_count": 1}
|
|
614
|
-
),
|
|
633
|
+
(MD5OutputWorker, [], {"thread_count": 1}),
|
|
615
634
|
(
|
|
616
635
|
HttpBatchWorker,
|
|
617
636
|
[],
|
|
618
637
|
{"thread_count": self.args["thread_count"], "project": project},
|
|
619
638
|
),
|
|
620
|
-
(
|
|
621
|
-
|
|
622
|
-
),
|
|
623
|
-
(
|
|
624
|
-
UploadWorker, [], {"thread_count": self.args["thread_count"]}
|
|
625
|
-
),
|
|
639
|
+
(FileStatWorker, [], {"thread_count": 1}),
|
|
640
|
+
(UploadWorker, [], {"thread_count": self.args["thread_count"]}),
|
|
626
641
|
]
|
|
627
642
|
|
|
628
643
|
manager = worker.JobManager(job_description)
|
|
@@ -634,7 +649,6 @@ class Uploader(object):
|
|
|
634
649
|
logger.debug("started report_status thread")
|
|
635
650
|
update_interval = 15
|
|
636
651
|
while True:
|
|
637
|
-
|
|
638
652
|
# don't report status if we are doing a local_upload
|
|
639
653
|
if not self.upload_id:
|
|
640
654
|
logger.debug("not updating status as we were not provided an upload_id")
|
|
@@ -661,7 +675,7 @@ class Uploader(object):
|
|
|
661
675
|
logger.error("could not report status:")
|
|
662
676
|
logger.error(traceback.print_exc())
|
|
663
677
|
logger.error(traceback.format_exc())
|
|
664
|
-
|
|
678
|
+
|
|
665
679
|
else:
|
|
666
680
|
break
|
|
667
681
|
|
|
@@ -669,7 +683,9 @@ class Uploader(object):
|
|
|
669
683
|
|
|
670
684
|
def create_report_status_thread(self):
|
|
671
685
|
logger.debug("creating reporter thread")
|
|
672
|
-
self.report_status_thread = threading.Thread(
|
|
686
|
+
self.report_status_thread = threading.Thread(
|
|
687
|
+
name="ReporterThread", target=self.report_status
|
|
688
|
+
)
|
|
673
689
|
self.report_status_thread.daemon = True
|
|
674
690
|
self.report_status_thread.start()
|
|
675
691
|
|
|
@@ -684,19 +700,27 @@ class Uploader(object):
|
|
|
684
700
|
while True:
|
|
685
701
|
if self.working:
|
|
686
702
|
try:
|
|
687
|
-
upload_stats = UploadStats.create
|
|
703
|
+
upload_stats = UploadStats.create(
|
|
704
|
+
self.manager.metric_store,
|
|
705
|
+
self.num_files_to_process,
|
|
706
|
+
self.job_start_time,
|
|
707
|
+
)
|
|
688
708
|
progress_handler(upload_stats)
|
|
689
709
|
except Exception as e:
|
|
690
710
|
print(e)
|
|
691
711
|
print(traceback.format_exc())
|
|
692
|
-
|
|
712
|
+
|
|
693
713
|
else:
|
|
694
714
|
break
|
|
695
715
|
sleep()
|
|
696
716
|
|
|
697
717
|
def create_monitor_status_thread(self):
|
|
698
718
|
logger.debug("creating console status thread")
|
|
699
|
-
self.monitor_status_thread = threading.Thread(
|
|
719
|
+
self.monitor_status_thread = threading.Thread(
|
|
720
|
+
name="PrintStatusThread",
|
|
721
|
+
target=self.monitor_status,
|
|
722
|
+
args=(self.emit_progress,),
|
|
723
|
+
)
|
|
700
724
|
|
|
701
725
|
# make sure threads don't stop the program from exiting
|
|
702
726
|
self.monitor_status_thread.daemon = True
|
|
@@ -705,11 +729,17 @@ class Uploader(object):
|
|
|
705
729
|
self.monitor_status_thread.start()
|
|
706
730
|
|
|
707
731
|
def mark_upload_finished(self, upload_id, upload_files):
|
|
708
|
-
|
|
709
|
-
|
|
732
|
+
data = {
|
|
733
|
+
"upload_id": upload_id,
|
|
734
|
+
"status": "server_pending",
|
|
735
|
+
"upload_files": upload_files,
|
|
736
|
+
}
|
|
710
737
|
|
|
711
738
|
self.api_client.make_request(
|
|
712
|
-
"/uploads/%s/finish" % upload_id,
|
|
739
|
+
"/uploads/%s/finish" % upload_id,
|
|
740
|
+
data=json.dumps(data),
|
|
741
|
+
verb="POST",
|
|
742
|
+
use_api_key=True,
|
|
713
743
|
)
|
|
714
744
|
return True
|
|
715
745
|
|
|
@@ -718,11 +748,19 @@ class Uploader(object):
|
|
|
718
748
|
|
|
719
749
|
# report error_message to the app
|
|
720
750
|
self.api_client.make_request(
|
|
721
|
-
"/uploads/%s/fail" % upload_id,
|
|
751
|
+
"/uploads/%s/fail" % upload_id,
|
|
752
|
+
data=error_message,
|
|
753
|
+
verb="POST",
|
|
754
|
+
use_api_key=True,
|
|
722
755
|
)
|
|
723
756
|
|
|
724
757
|
return True
|
|
725
758
|
|
|
759
|
+
def assets_only(self, *paths):
|
|
760
|
+
processed_filepaths = file_utils.process_upload_filepaths(paths)
|
|
761
|
+
file_map = {path: None for path in processed_filepaths}
|
|
762
|
+
self.handle_upload_response(project=None, upload_files=file_map)
|
|
763
|
+
|
|
726
764
|
def handle_upload_response(self, project, upload_files, upload_id=None):
|
|
727
765
|
"""
|
|
728
766
|
This is a really confusing method and should probably be split into to clear logic
|
|
@@ -732,7 +770,6 @@ class Uploader(object):
|
|
|
732
770
|
only be fed uploads by the app which have valid projects attached to them.
|
|
733
771
|
"""
|
|
734
772
|
try:
|
|
735
|
-
|
|
736
773
|
logger.info("%s", " NEXT UPLOAD ".center(30, "#"))
|
|
737
774
|
logger.info("project: %s", project)
|
|
738
775
|
logger.info("upload_id is %s", upload_id)
|
|
@@ -744,7 +781,7 @@ class Uploader(object):
|
|
|
744
781
|
|
|
745
782
|
# reset counters
|
|
746
783
|
self.num_files_to_process = len(upload_files)
|
|
747
|
-
logger.debug(
|
|
784
|
+
logger.debug("Processing %s files", self.num_files_to_process)
|
|
748
785
|
self.job_start_time = datetime.datetime.now()
|
|
749
786
|
self.upload_id = upload_id
|
|
750
787
|
self.job_failed = False
|
|
@@ -753,7 +790,7 @@ class Uploader(object):
|
|
|
753
790
|
self.working = True
|
|
754
791
|
|
|
755
792
|
self.prepare_workers()
|
|
756
|
-
|
|
793
|
+
|
|
757
794
|
# create worker pools
|
|
758
795
|
self.manager = self.create_manager(project)
|
|
759
796
|
|
|
@@ -767,39 +804,43 @@ class Uploader(object):
|
|
|
767
804
|
self.manager.add_task((path, md5))
|
|
768
805
|
|
|
769
806
|
logger.info("creating console status thread...")
|
|
770
|
-
self.create_monitor_status_thread()
|
|
807
|
+
self.create_monitor_status_thread()
|
|
771
808
|
|
|
772
|
-
#wait for work to finish
|
|
809
|
+
# wait for work to finish
|
|
773
810
|
while not self.manager.is_complete():
|
|
774
811
|
logger.debug("Manager is running, cancel requested?: %s", self.cancel)
|
|
775
|
-
|
|
812
|
+
|
|
776
813
|
if self.cancel or self.manager.error or common.SIGINT_EXIT:
|
|
777
814
|
self.error_messages = self.manager.stop_work()
|
|
778
815
|
logger.debug("Manager sucesfully stopped")
|
|
779
816
|
break
|
|
780
|
-
|
|
817
|
+
|
|
781
818
|
time.sleep(5)
|
|
782
|
-
|
|
819
|
+
|
|
783
820
|
# Shutdown the manager once all jobs are done
|
|
784
|
-
if not self.cancel and not self.manager.error:
|
|
785
|
-
logger.debug("Waiting for Manager to join")
|
|
821
|
+
if not self.cancel and not self.manager.error:
|
|
822
|
+
logger.debug("Waiting for Manager to join")
|
|
786
823
|
self.manager.join()
|
|
787
824
|
|
|
788
|
-
upload_stats = UploadStats.create(
|
|
825
|
+
upload_stats = UploadStats.create(
|
|
826
|
+
self.manager.metric_store,
|
|
827
|
+
self.num_files_to_process,
|
|
828
|
+
self.job_start_time,
|
|
829
|
+
)
|
|
789
830
|
logger.info(upload_stats.get_formatted_text())
|
|
790
|
-
self.emit_progress(upload_stats)
|
|
791
|
-
|
|
831
|
+
self.emit_progress(upload_stats)
|
|
832
|
+
|
|
792
833
|
logger.debug("error_message: %s", self.error_messages)
|
|
793
834
|
|
|
794
835
|
# signal to the reporter to stop working
|
|
795
836
|
self.working = False
|
|
796
837
|
logger.info("done uploading files")
|
|
797
|
-
|
|
838
|
+
|
|
798
839
|
logger.debug("Waiting for reporter status thread to join")
|
|
799
840
|
self.report_status_thread.join()
|
|
800
|
-
|
|
841
|
+
|
|
801
842
|
logger.debug("Waiting for print status thread to join")
|
|
802
|
-
self.monitor_status_thread.join()
|
|
843
|
+
self.monitor_status_thread.join()
|
|
803
844
|
|
|
804
845
|
# Despite storing lots of data about new uploads, we will only send back the things
|
|
805
846
|
# that have changed, to keep payloads small.
|
|
@@ -807,7 +848,7 @@ class Uploader(object):
|
|
|
807
848
|
if self.upload_id and not self.error_messages:
|
|
808
849
|
md5s = self.return_md5s()
|
|
809
850
|
for path in md5s:
|
|
810
|
-
finished_upload_files[path] =
|
|
851
|
+
finished_upload_files[path] = {"source": path, "md5": md5s[path]}
|
|
811
852
|
|
|
812
853
|
self.mark_upload_finished(self.upload_id, finished_upload_files)
|
|
813
854
|
|
|
@@ -815,13 +856,12 @@ class Uploader(object):
|
|
|
815
856
|
self.error_messages.append(sys.exc_info())
|
|
816
857
|
|
|
817
858
|
def main(self, run_one_loop=False):
|
|
818
|
-
|
|
819
859
|
def show_ouput(upload_stats):
|
|
820
860
|
print(upload_stats.get_formatted_text())
|
|
821
861
|
logger.info("File Progress: %s", upload_stats.file_progress)
|
|
822
|
-
|
|
862
|
+
|
|
823
863
|
self.progress_callback = show_ouput
|
|
824
|
-
|
|
864
|
+
|
|
825
865
|
logger.info("Uploader Started. Checking for uploads...")
|
|
826
866
|
|
|
827
867
|
waiting_for_uploads_flag = False
|
|
@@ -833,10 +873,12 @@ class Uploader(object):
|
|
|
833
873
|
data["location"] = self.location
|
|
834
874
|
logger.debug("Data: %s", data)
|
|
835
875
|
resp_str, resp_code = self.api_client.make_request(
|
|
836
|
-
"/uploads/client/next",
|
|
876
|
+
"/uploads/client/next",
|
|
877
|
+
data=json.dumps(data),
|
|
878
|
+
verb="PUT",
|
|
879
|
+
use_api_key=True,
|
|
837
880
|
)
|
|
838
881
|
if resp_code == 204:
|
|
839
|
-
|
|
840
882
|
if not waiting_for_uploads_flag:
|
|
841
883
|
sys.stdout.write("\nWaiting for jobs to upload ")
|
|
842
884
|
sys.stdout.flush()
|
|
@@ -847,19 +889,21 @@ class Uploader(object):
|
|
|
847
889
|
time.sleep(self.sleep_time)
|
|
848
890
|
waiting_for_uploads_flag = True
|
|
849
891
|
continue
|
|
850
|
-
|
|
892
|
+
|
|
851
893
|
elif resp_code != 201:
|
|
852
|
-
logger.error(
|
|
894
|
+
logger.error(
|
|
895
|
+
"received invalid response code from app %s", resp_code
|
|
896
|
+
)
|
|
853
897
|
logger.error("response is %s", resp_str)
|
|
854
898
|
time.sleep(self.sleep_time)
|
|
855
899
|
continue
|
|
856
|
-
|
|
900
|
+
|
|
857
901
|
print("") # to make a newline after the 204 loop
|
|
858
902
|
|
|
859
903
|
try:
|
|
860
904
|
json_data = json.loads(resp_str)
|
|
861
905
|
upload = json_data.get("data", {})
|
|
862
|
-
|
|
906
|
+
|
|
863
907
|
except ValueError:
|
|
864
908
|
logger.error("response was not valid json: %s", resp_str)
|
|
865
909
|
time.sleep(self.sleep_time)
|
|
@@ -870,28 +914,34 @@ class Uploader(object):
|
|
|
870
914
|
project = upload["project"]
|
|
871
915
|
|
|
872
916
|
self.handle_upload_response(project, upload_files, upload_id)
|
|
873
|
-
|
|
917
|
+
|
|
874
918
|
logger.debug("Upload of entity %s completed.", upload_id)
|
|
875
|
-
upload_stats = UploadStats.create
|
|
919
|
+
upload_stats = UploadStats.create(
|
|
920
|
+
self.manager.metric_store,
|
|
921
|
+
self.num_files_to_process,
|
|
922
|
+
self.job_start_time,
|
|
923
|
+
)
|
|
876
924
|
show_ouput(upload_stats)
|
|
877
925
|
logger.debug(self.manager.worker_queue_status_text())
|
|
878
|
-
|
|
926
|
+
|
|
879
927
|
error_messages = []
|
|
880
|
-
|
|
928
|
+
|
|
881
929
|
for exception in self.error_messages:
|
|
882
930
|
error_messages.append(str(exception[1]))
|
|
883
|
-
|
|
931
|
+
|
|
884
932
|
if error_messages:
|
|
885
|
-
self.mark_upload_failed(
|
|
886
|
-
|
|
933
|
+
self.mark_upload_failed(
|
|
934
|
+
error_message="\n".join(error_messages), upload_id=upload_id
|
|
935
|
+
)
|
|
936
|
+
|
|
887
937
|
log_file = loggeria.LOG_PATH
|
|
888
938
|
sys.stderr.write("Error uploading files:\n")
|
|
889
|
-
|
|
939
|
+
|
|
890
940
|
for err_msg in error_messages:
|
|
891
941
|
sys.stderr.write("\t%s\n", err_msg)
|
|
892
942
|
|
|
893
|
-
sys.stderr.write("\nSee log %s for more details\n\n", log_file)
|
|
894
|
-
|
|
943
|
+
sys.stderr.write("\nSee log %s for more details\n\n", log_file)
|
|
944
|
+
|
|
895
945
|
self.error_messages = []
|
|
896
946
|
|
|
897
947
|
waiting_for_uploads_flag = False
|
|
@@ -899,8 +949,8 @@ class Uploader(object):
|
|
|
899
949
|
except KeyboardInterrupt:
|
|
900
950
|
logger.info("ctrl-c exit")
|
|
901
951
|
break
|
|
902
|
-
except Exception as
|
|
903
|
-
logger.exception("Caught exception:\n%s",
|
|
952
|
+
except Exception as err_msg:
|
|
953
|
+
logger.exception("Caught exception:\n%s", err_msg)
|
|
904
954
|
time.sleep(self.sleep_time)
|
|
905
955
|
continue
|
|
906
956
|
|
|
@@ -913,6 +963,7 @@ class Uploader(object):
|
|
|
913
963
|
"""
|
|
914
964
|
return self.manager.metric_store.get_dict("file_md5s")
|
|
915
965
|
|
|
966
|
+
|
|
916
967
|
def run_uploader(args):
|
|
917
968
|
"""
|
|
918
969
|
Start the uploader process. This process will run indefinitely, polling
|
|
@@ -921,32 +972,34 @@ def run_uploader(args):
|
|
|
921
972
|
# convert the Namespace object to a dictionary
|
|
922
973
|
args_dict = vars(args)
|
|
923
974
|
cfg = config.config().config
|
|
924
|
-
|
|
925
|
-
api_client.ApiClient.register_client(
|
|
975
|
+
|
|
976
|
+
api_client.ApiClient.register_client(
|
|
977
|
+
client_name=Uploader.CLIENT_NAME, client_version=ciocore.version
|
|
978
|
+
)
|
|
926
979
|
|
|
927
980
|
# Set up logging
|
|
928
981
|
log_level_name = args_dict.get("log_level") or cfg["log_level"]
|
|
929
|
-
|
|
982
|
+
|
|
930
983
|
loggeria.setup_conductor_logging(
|
|
931
984
|
logger_level=loggeria.LEVEL_MAP.get(log_level_name),
|
|
932
985
|
log_dirpath=args_dict.get("log_dir"),
|
|
933
986
|
log_filename="conductor_uploader.log",
|
|
934
|
-
disable_console_logging
|
|
935
|
-
use_system_log=False
|
|
936
|
-
)
|
|
987
|
+
disable_console_logging=not args_dict["log_to_console"],
|
|
988
|
+
use_system_log=False,
|
|
989
|
+
)
|
|
937
990
|
|
|
938
991
|
print("Logging to %s", loggeria.LOG_PATH)
|
|
939
|
-
|
|
992
|
+
|
|
940
993
|
logger.debug("Uploader parsed_args is %s", args_dict)
|
|
941
|
-
|
|
994
|
+
|
|
942
995
|
resolved_args = resolve_args(args_dict)
|
|
943
996
|
uploader = Uploader(resolved_args)
|
|
944
|
-
|
|
997
|
+
|
|
945
998
|
if args.paths:
|
|
946
|
-
processed_filepaths =
|
|
999
|
+
processed_filepaths = file_utils.process_upload_filepaths(args.paths[0])
|
|
947
1000
|
file_map = {path: None for path in processed_filepaths}
|
|
948
1001
|
uploader.handle_upload_response(project=None, upload_files=file_map)
|
|
949
|
-
|
|
1002
|
+
|
|
950
1003
|
else:
|
|
951
1004
|
uploader.main()
|
|
952
1005
|
|
|
@@ -972,7 +1025,7 @@ def resolve_args(args):
|
|
|
972
1025
|
Resolve all arguments, reconciling differences between command line args and config.yml args.
|
|
973
1026
|
See resolve_arg function.
|
|
974
1027
|
"""
|
|
975
|
-
|
|
1028
|
+
|
|
976
1029
|
args["md5_caching"] = resolve_arg("md5_caching", args)
|
|
977
1030
|
args["database_filepath"] = resolve_arg("database_filepath", args)
|
|
978
1031
|
args["location"] = resolve_arg("location", args)
|
|
@@ -980,18 +1033,18 @@ def resolve_args(args):
|
|
|
980
1033
|
|
|
981
1034
|
return args
|
|
982
1035
|
|
|
1036
|
+
|
|
983
1037
|
def resolve_arg(key, args):
|
|
984
1038
|
"""
|
|
985
1039
|
If the key doesn't exist (or is None), grab it from the config.
|
|
986
1040
|
"""
|
|
987
|
-
|
|
1041
|
+
|
|
988
1042
|
cfg = config.config().config
|
|
989
|
-
config_value = cfg.get(key)
|
|
990
|
-
|
|
1043
|
+
config_value = cfg.get(key)
|
|
1044
|
+
|
|
991
1045
|
value = args.get(key, config_value)
|
|
992
|
-
|
|
1046
|
+
|
|
993
1047
|
if value is None:
|
|
994
1048
|
value = config_value
|
|
995
|
-
|
|
1049
|
+
|
|
996
1050
|
return value
|
|
997
|
-
|