PyPI - geoseeq - Versions diffs - 0.5.6a16__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

geoseeq 0.5.6a16py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

geoseeq/cli/download.py +1 -0
geoseeq/cli/main.py +3 -1
geoseeq/cli/project.py +96 -0
geoseeq/cli/raw.py +59 -0
geoseeq/cli/upload/upload.py +2 -0
geoseeq/cli/upload/upload_reads.py +1 -0
geoseeq/result/file_chunker.py +50 -0
geoseeq/result/file_download.py +2 -3
geoseeq/result/file_upload.py +55 -142
geoseeq/result/resumable_upload_tracker.py +100 -0
geoseeq/upload_download_manager.py +11 -3
{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/METADATA +1 -1
{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/RECORD +17 -13
{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/LICENSE +0 -0
{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/WHEEL +0 -0
{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/entry_points.txt +0 -0
{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/top_level.txt +0 -0

geoseeq/cli/download.py CHANGED Viewed

@@ -468,3 +468,4 @@ def cli_download_fastqs(state, cores, target_dir, yes, first, download, module_n
             click.confirm('Continue?', abort=True)
         logger.info(f'Downloading {len(download_manager)} files to {target_dir}')
         download_manager.download_files()

geoseeq/cli/main.py CHANGED Viewed

@@ -18,6 +18,7 @@ from .shared_params.opts_and_args import overwrite_option, yes_option
 from .detail import cli_detail
 from .run import cli_app
 from .get_eula import cli_eula
+from .project import cli_project
 logger = logging.getLogger('geoseeq_api')
 handler = logging.StreamHandler()
@@ -53,7 +54,7 @@ def version():
     Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
     Run `geoseeq eula show` to view the EULA.
     """
-    click.echo('0.5.6a16')  # remember to update setup
+    click.echo('0.6.0')  # remember to update setup
 @main.group('advanced')
@@ -65,6 +66,7 @@ cli_advanced.add_command(cli_copy)
 cli_advanced.add_command(cli_user)
 cli_advanced.add_command(cli_detail)
 cli_advanced.add_command(cli_upload_advanced)
+cli_advanced.add_command(cli_project)
 @cli_advanced.group('experimental')
 def cli_experimental():

geoseeq/cli/project.py ADDED Viewed

@@ -0,0 +1,96 @@
+import json
+import logging
+from os import makedirs, getcwd
+from os.path import dirname, join
+import click
+import pandas as pd
+from multiprocessing import Pool
+from .shared_params import (
+    handle_project_id,
+    handle_folder_id,
+    project_id_arg,
+    sample_ids_arg,
+    handle_multiple_sample_ids,
+    handle_multiple_result_file_ids,
+    use_common_state,
+    flatten_list_of_els_and_files,
+    yes_option,
+    module_option,
+    ignore_errors_option,
+    folder_ids_arg,
+)
+from geoseeq.result.file_download import download_url
+from geoseeq.utils import download_ftp
+from geoseeq.id_constructors import (
+    result_file_from_uuid,
+    result_file_from_name,
+)
+from geoseeq.knex import GeoseeqNotFoundError
+from .progress_bar import PBarManager
+from .utils import convert_size
+from geoseeq.constants import FASTQ_MODULE_NAMES
+from geoseeq.result import ResultFile
+from geoseeq.upload_download_manager import GeoSeeqDownloadManager
+from geoseeq.file_system.filesystem_download import (
+    ProjectOnFilesystem,
+    FILE_STATUS_MODIFIED_REMOTE,
+    FILE_STATUS_MODIFIED_LOCAL,
+    FILE_STATUS_NEW_LOCAL,
+    FILE_STATUS_NEW_REMOTE,
+    FILE_STATUS_IS_LOCAL_STUB,
+)
+logger = logging.getLogger('geoseeq_api')
+@click.group("project")
+def cli_project():
+    """Download data from GeoSeeq."""
+    pass
+@cli_project.command("clone")
+@use_common_state
+@click.option('--use-stubs/--full-files', default=True, help='Download full files or stubs')
+@click.option('--target-dir', '-d', default=None, help='Directory to download the project to')
+@project_id_arg
+def cli_clone_project(state, use_stubs, target_dir, project_id):
+    """Clone a project to the local filesystem.
+    """
+    knex = state.get_knex().set_auth_required()
+    proj = handle_project_id(knex, project_id)
+    logger.info(f"Found project \"{proj.name}\"")
+    if target_dir is None:
+        target_dir = proj.name
+    project = ProjectOnFilesystem(proj, target_dir)
+    project.download(use_stubs=use_stubs)
+@cli_project.command("status")
+@use_common_state
+def cli_project_status(state):
+    """Check the status of a project on the local filesystem.
+    """
+    project = ProjectOnFilesystem.from_path(getcwd(), recursive=True)
+    objs_by_status = {
+        FILE_STATUS_MODIFIED_LOCAL: [],
+        FILE_STATUS_MODIFIED_REMOTE: [],
+        FILE_STATUS_NEW_LOCAL: [],
+        FILE_STATUS_NEW_REMOTE: [],
+        FILE_STATUS_IS_LOCAL_STUB: [],
+    }
+    for obj_type, status, local_path, obj in project.list_abnormal_objects():
+        objs_by_status[status].append((obj_type, local_path, obj))
+    print(f"Project: {project.project.name}")
+    for status, objs in objs_by_status.items():
+        print(f"Status: {status}")
+        for obj_type, local_path, obj in objs:
+            if status in (FILE_STATUS_MODIFIED_LOCAL, FILE_STATUS_NEW_LOCAL):
+                print(f"  {obj_type}: {project.path_from_project_root(local_path)} -> {obj}")
+            else:
+                print(f"  {obj_type}:  {obj} -> {project.path_from_project_root(local_path)}")

geoseeq/cli/raw.py ADDED Viewed

@@ -0,0 +1,59 @@
+import click
+import json
+from .shared_params import use_common_state, overwrite_option
+from geoseeq import GeoseeqNotFoundError
+from geoseeq.blob_constructors import (
+    sample_result_file_from_uuid,
+    project_result_file_from_uuid,
+    sample_result_folder_from_uuid,
+    project_result_folder_from_uuid,
+)
+@click.group('raw')
+def cli_raw():
+    """Low-level commands for interacting with the API."""
+    pass
+@cli_raw.command('get-file-data')
+@use_common_state
+@click.argument('file_ids', nargs=-1)
+def cli_get_file_data(state, file_ids):
+    """Print the raw stored data in a result file object."""
+    knex = state.get_knex()
+    for file_id in file_ids:
+        file_id = file_id.split(':')[-1]
+        try:
+            result_file = sample_result_file_from_uuid(knex, file_id)
+        except GeoseeqNotFoundError:
+            result_file = project_result_file_from_uuid(knex, file_id)
+        print(json.dumps(result_file.stored_data, indent=2), file=state.outfile)
+@cli_raw.command('create-raw-file')
+@use_common_state
+@overwrite_option
+@click.argument('folder_id')
+@click.argument('result_filename')
+@click.argument('filename', type=click.File('r'))
+def cli_get_file_data(state, overwrite, folder_id, result_filename, filename):
+    """Print the raw stored data in a result file object."""
+    knex = state.get_knex()
+    folder_id = folder_id.split(':')[-1]
+    try:
+        result_folder = sample_result_folder_from_uuid(knex, folder_id)
+    except GeoseeqNotFoundError:
+        result_folder = project_result_folder_from_uuid(knex, folder_id)
+    blob = json.load(filename)
+    result_file = result_folder.result_file(result_filename)
+    if overwrite:
+        result_file.idem()
+        result_file.stored_data = blob
+        result_file.save()
+    else:
+        result_file.create()
+    click.echo(f'Created file {result_file.uuid}', file=state.outfile)

geoseeq/cli/upload/upload.py CHANGED Viewed

@@ -122,6 +122,7 @@ def cli_upload_file(state, cores, threads_per_upload, num_retries, chunk_size_mb
         use_cache=state.use_cache,
         num_retries=num_retries,
         ignore_errors=ignore_errors,
+        use_atomic_upload=True,
         session=None, #knex.new_session(),
         chunk_size_mb=chunk_size_mb if chunk_size_mb > 0 else None,
     )
@@ -160,6 +161,7 @@ def cli_upload_folder(state, cores, yes, private, recursive, hidden, no_new_vers
         overwrite=True,
         use_cache=state.use_cache,
         no_new_versions=no_new_versions,
+        use_atomic_upload=True,
     )
     for folder_name in folder_names:
         result_folder = root_obj.result_folder(folder_name).idem()

geoseeq/cli/upload/upload_reads.py CHANGED Viewed

@@ -98,6 +98,7 @@ def _do_upload(groups, module_name, link_type, lib, filepaths, overwrite, no_new
             progress_tracker_factory=PBarManager().get_new_bar,
             use_cache=state.use_cache,
             no_new_versions=no_new_versions,
+            use_atomic_upload=True,
         )
         for group in groups:
             sample = lib.sample(group['sample_name']).idem()

geoseeq/result/file_chunker.py ADDED Viewed

@@ -0,0 +1,50 @@
+from os.path import getsize
+import logging
+logger = logging.getLogger("geoseeq_api")  # Same name as calling module
+logger.addHandler(logging.NullHandler())
+class FileChunker:
+    def __init__(self, filepath, chunk_size):
+        self.filepath = filepath
+        self.chunk_size = chunk_size
+        self.file_size = getsize(filepath)
+        self.n_parts = int(self.file_size / self.chunk_size) + 1
+        self.loaded_parts = []
+    def load_all_chunks(self):
+        if len(self.loaded_parts) != self.n_parts:
+            with open(self.filepath, "rb") as f:
+                f.seek(0)
+                for i in range(self.n_parts):
+                    chunk = f.read(self.chunk_size)
+                    self.loaded_parts.append(chunk)
+        return self  # convenience for chaining
+    def chunk_is_preloaded(self, num):
+        return len(self.loaded_parts) > num and self.loaded_parts[num]
+    def read_one_chunk(self, num):
+        if not self.chunk_is_preloaded(num):
+            logger.debug(f"Reading chunk {num} from {self.filepath}")
+            with open(self.filepath, "rb") as f:
+                f.seek(num * self.chunk_size)
+                chunk = f.read(self.chunk_size)
+                return chunk
+        return self.loaded_parts[num]
+    def get_chunk(self, num):
+        if self.chunk_is_preloaded(num):
+            return self.loaded_parts[num]
+        return self.read_one_chunk(num)
+    def get_chunk_size(self, num):
+        if num < (self.n_parts - 1):  # all but the last chunk
+            return self.chunk_size
+        if self.chunk_is_preloaded(num):  # last chunk, pre-loaded
+            return len(self.loaded_parts[num])
+        return len(self.read_one_chunk(num))  # last chunk, not pre-loaded

geoseeq/result/file_download.py CHANGED Viewed

@@ -12,10 +12,10 @@ from geoseeq.constants import FIVE_MB
 logger = logging.getLogger("geoseeq_api")  # Same name as calling module
-def _download_head(url, filename, head=None, progress_tracker=None):
+def _download_head(url, filename, head=None, start=0, progress_tracker=None):
     headers = None
     if head and head > 0:
-        headers = {"Range": f"bytes=0-{head}"}
+        headers = {"Range": f"bytes={start}-{head}"}
     response = requests.get(url, stream=True, headers=headers)
     response.raise_for_status()
     total_size_in_bytes = int(response.headers.get('content-length', 0))
@@ -67,7 +67,6 @@ def download_url(url, kind='guess', filename=None, head=None, progress_tracker=N
         raise ValueError(f"Unknown download kind: {kind}")
 class ResultFileDownload:
     """Abstract class that handles download methods for result files."""

geoseeq/result/file_upload.py CHANGED Viewed

@@ -13,130 +13,21 @@ from geoseeq.utils import md5_checksum
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from .utils import *
 from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
-class FileChunker:
-    def __init__(self, filepath, chunk_size):
-        self.filepath = filepath
-        self.chunk_size = chunk_size
-        self.file_size = getsize(filepath)
-        self.n_parts = int(self.file_size / self.chunk_size) + 1
-        self.loaded_parts = []
-    def load_all_chunks(self):
-        if len(self.loaded_parts) != self.n_parts:
-            with open(self.filepath, "rb") as f:
-                f.seek(0)
-                for i in range(self.n_parts):
-                    chunk = f.read(self.chunk_size)
-                    self.loaded_parts.append(chunk)
-        return self  # convenience for chaining
-    def chunk_is_preloaded(self, num):
-        return len(self.loaded_parts) > num and self.loaded_parts[num]
-    def read_one_chunk(self, num):
-        if not self.chunk_is_preloaded(num):
-            logger.debug(f"Reading chunk {num} from {self.filepath}")
-            with open(self.filepath, "rb") as f:
-                f.seek(num * self.chunk_size)
-                chunk = f.read(self.chunk_size)
-                return chunk
-        return self.loaded_parts[num]
-    def get_chunk(self, num):
-        if self.chunk_is_preloaded(num):
-            return self.loaded_parts[num]
-        return self.read_one_chunk(num)
-    def get_chunk_size(self, num):
-        if num < (self.n_parts - 1):  # all but the last chunk
-            return self.chunk_size
-        if self.chunk_is_preloaded(num):  # last chunk, pre-loaded
-            return len(self.loaded_parts[num])
-        return len(self.read_one_chunk(num))  # last chunk, not pre-loaded
-class ResumableUploadTracker:
-    def __init__(self, filepath, chunk_size, tracker_file_prefix="gs_resumable_upload_tracker"):
-        self.open, self.upload_started = True, False
-        self.upload_id, self.urls = None, None
-        self.filepath = filepath
-        self.tracker_file = join(
-            GEOSEEQ_CACHE_DIR, 'upload',
-            tracker_file_prefix + f".{chunk_size}.{getsize(filepath)}." + basename(filepath)
-        )
-        try:
-            os.makedirs(dirname(self.tracker_file), exist_ok=True)
-        except Exception as e:
-            logger.warning(f'Could not create resumable upload tracker directory. {e}')
-            self.open = False
-        self._loaded_parts = {}
-        self._load_parts_from_file()
-    def start_upload(self, upload_id, urls):
-        if not self.open:
-            return
-        if self.upload_started:
-            raise GeoseeqGeneralError("Upload has already started.")
-        blob = dict(upload_id=upload_id, urls=urls, start_time=time.time())
-        serialized = json.dumps(blob)
-        with open(self.tracker_file, "w") as f:
-            f.write(serialized + "\n")
-        self.upload_id, self.urls = upload_id, urls
-        self.upload_started = True
-    def add_part(self, part_upload_info):
-        if not self.open:
-            return
-        part_id = part_upload_info["PartNumber"]
-        serialized = json.dumps(part_upload_info)
-        with open(self.tracker_file, "a") as f:
-            f.write(serialized + "\n")
-        self._loaded_parts[part_id] = part_upload_info
-        if len(self._loaded_parts) == len(self.urls):
-            self.cleanup()
-            self.open = False
-    def _load_parts_from_file(self):
-        if not isfile(self.tracker_file):
-            return
-        with open(self.tracker_file, "r") as f:
-            header_blob = json.loads(f.readline())
-            self.upload_id, self.urls = header_blob["upload_id"], header_blob["urls"]
-            start_time = header_blob["start_time"]
-            if (time.time() - start_time) > (60 * 60 * 23):
-                logger.warning(f"Tracker file {self.tracker_file} is too old. Deleting.")
-                os.remove(self.tracker_file)
-                return
-            self.upload_started = True
-            for line in f:
-                blob = json.loads(line)
-                part_id = blob["PartNumber"]
-                self._loaded_parts[part_id] = blob
-    def part_has_been_uploaded(self, part_number):
-        if not self.open:
-            return False
-        return part_number in self._loaded_parts
-    def get_part_info(self, part_number):
-        return self._loaded_parts[part_number]
-    def cleanup(self):
-        if not self.open:
-            return
-        try:
-            os.remove(self.tracker_file)
-        except FileNotFoundError:
-            pass
+from .file_chunker import FileChunker
+from .resumable_upload_tracker import ResumableUploadTracker
 class ResultFileUpload:
     """Abstract class that handles upload methods for result files."""
-    def _create_multipart_upload(self, filepath, file_size, optional_fields):
+    def _result_type(self, atomic=False):
+        if self.is_sample_result:
+            return "sample"
+        if atomic:
+            return "project"
+        return "group"
+    def _create_multipart_upload(self, filepath, file_size, optional_fields, atomic=False):
         optional_fields = optional_fields if optional_fields else {}
         optional_fields.update(
             {
@@ -147,23 +38,31 @@ class ResultFileUpload:
         data = {
             "filename": basename(filepath),
             "optional_fields": optional_fields,
-            "result_type": "sample" if self.is_sample_result else "group",
+            "result_type": self._result_type(atomic),
         }
-        response = self.knex.post(f"/ar_fields/{self.uuid}/create_upload", json=data)
+        url = f"/ar_fields/{self.uuid}/create_upload"
+        if atomic:
+            data["fieldname"] = self.name
+            url = f"/ars/{self.parent.uuid}/create_atomic_upload"
+        response = self.knex.post(url, json=data)
         return response
-    def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields):
+    def _prep_multipart_upload(self, filepath, file_size, chunk_size, optional_fields, atomic=False):
         n_parts = int(file_size / chunk_size) + 1
-        response = self._create_multipart_upload(filepath, file_size, optional_fields)
+        response = self._create_multipart_upload(filepath, file_size, optional_fields, atomic=atomic)
         upload_id = response["upload_id"]
-        parts = list(range(1, n_parts + 1))
         data = {
-            "parts": parts,
+            "parts": list(range(1, n_parts + 1)),
             "stance": "upload-multipart",
             "upload_id": upload_id,
-            "result_type": "sample" if self.is_sample_result else "group",
+            "result_type": self._result_type(atomic),
         }
-        response = self.knex.post(f"/ar_fields/{self.uuid}/create_upload_urls", json=data)
+        url = f"/ar_fields/{self.uuid}/create_upload_urls"
+        if atomic:
+            data["uuid"] = response["uuid"]
+            data["fieldname"] = self.name
+            url = f"ars/{self.parent.uuid}/create_atomic_upload_urls"
+        response = self.knex.post(url, json=data)
         urls = response
         return upload_id, urls
@@ -204,16 +103,17 @@ class ResultFileUpload:
             resumable_upload_tracker.add_part(blob)
         return blob
-    def _finish_multipart_upload(self, upload_id, complete_parts):
-        response = self.knex.post(
-            f"/ar_fields/{self.uuid}/complete_upload",
-            json={
-                "parts": complete_parts,
-                "upload_id": upload_id,
-                "result_type": "sample" if self.is_sample_result else "group",
-            },
-            json_response=False,
-        )
+    def _finish_multipart_upload(self, upload_id, complete_parts, atomic=False):
+        data = {
+            "parts": complete_parts,
+            "upload_id": upload_id,
+            "result_type": self._result_type(atomic),
+        }
+        url = f"/ar_fields/{self.uuid}/complete_upload"
+        if atomic:
+            data["fieldname"] = self.name
+            url = f"/ars/{self.parent.uuid}/complete_atomic_upload"
+        response = self.knex.post(url, json=data, json_response=False)
         response.raise_for_status()
     def _upload_parts(self, file_chunker, urls, max_retries, session, progress_tracker, threads, resumable_upload_tracker=None):
@@ -257,6 +157,7 @@ class ResultFileUpload:
         progress_tracker=None,
         threads=1,
         use_cache=True,
+        use_atomic_upload=False,
     ):
         """Upload a file to S3 using the multipart upload process."""
         logger.info(f"Uploading {filepath} to S3 using multipart upload.")
@@ -267,15 +168,21 @@ class ResultFileUpload:
         logger.debug(f"Using chunk size of {chunk_size} bytes.")
         resumable_upload_tracker = None
         if use_cache and file_size > 10 * FIVE_MB:  # only use resumable upload tracker for larger files
-            resumable_upload_tracker = ResumableUploadTracker(filepath, chunk_size)
+            upload_target_uuid = self.parent.uuid if use_atomic_upload else self.uuid
+            resumable_upload_tracker = ResumableUploadTracker(filepath, chunk_size, upload_target_uuid)
         if resumable_upload_tracker and resumable_upload_tracker.upload_started:
+            # a resumable upload for this file has already started
+            resumable_upload_exists_and_is_valid = True
             upload_id, urls = resumable_upload_tracker.upload_id, resumable_upload_tracker.urls
+            use_atomic_upload = resumable_upload_tracker.is_atomic_upload
             logger.info(f'Resuming upload for "{filepath}", upload_id: "{upload_id}"')
         else:
-            upload_id, urls = self._prep_multipart_upload(filepath, file_size, chunk_size, optional_fields)
+            upload_id, urls = self._prep_multipart_upload(filepath, file_size, chunk_size, optional_fields, atomic=use_atomic_upload)
             if resumable_upload_tracker:
                 logger.info(f'Creating new resumable upload for "{filepath}", upload_id: "{upload_id}"')
-                resumable_upload_tracker.start_upload(upload_id, urls)
+                resumable_upload_tracker.start_upload(upload_id, urls, is_atomic_upload=use_atomic_upload)
         logger.info(f'Starting upload for "{filepath}"')
         complete_parts = []
         file_chunker = FileChunker(filepath, chunk_size)
@@ -294,14 +201,20 @@ class ResultFileUpload:
             threads,
             resumable_upload_tracker=resumable_upload_tracker
         )
-        self._finish_multipart_upload(upload_id, complete_parts)
+        self._finish_multipart_upload(upload_id, complete_parts, atomic=use_atomic_upload)
         logger.info(f'Finished Upload for "{filepath}"')
+        if use_atomic_upload:
+            # if this was an atomic upload then this result may not have existed on the server before
+            self.get()
         return self
     def upload_file(self, filepath, multipart_thresh=FIVE_MB, overwrite=True, no_new_versions=False, **kwargs):
         if self.exists() and not overwrite:
             raise GeoseeqGeneralError(f"Overwrite is set to False and file {self.uuid} already exists.")
-        self.idem()
+        if not kwargs.get("use_atomic_upload", False):
+            self.idem()
+        else:
+            self.parent.idem()
         if no_new_versions and self.has_downloadable_file():
             raise GeoseeqGeneralError(f"File {self} already has a downloadable file. Not uploading a new version.")
         resolved_path = Path(filepath).resolve()

geoseeq/result/resumable_upload_tracker.py ADDED Viewed

@@ -0,0 +1,100 @@
+import time
+import json
+import os
+from os.path import basename, getsize, join, dirname, isfile, getctime
+from pathlib import Path
+from random import random
+import requests
+from geoseeq.knex import GeoseeqGeneralError
+from geoseeq.constants import FIVE_MB
+from geoseeq.utils import md5_checksum
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from .utils import *
+from geoseeq.file_system_cache import GEOSEEQ_CACHE_DIR
+from .file_chunker import FileChunker
+class ResumableUploadTracker:
+    def __init__(self, filepath, chunk_size, upload_target_uuid, tracker_file_prefix="gs_resumable_upload_tracker"):
+        self.open, self.upload_started = True, False
+        self.upload_id, self.urls, self.is_atomic_upload = None, None, None
+        self.upload_target_uuid = upload_target_uuid
+        self.filepath = filepath
+        self.tracker_file_dir = join(GEOSEEQ_CACHE_DIR, 'upload')
+        self.tracker_file = join(
+            self.tracker_file_dir,
+            tracker_file_prefix + f".{upload_target_uuid}.{chunk_size}.{getsize(filepath)}." + basename(filepath)
+        )
+        try:
+            os.makedirs(self.tracker_file_dir, exist_ok=True)
+        except Exception as e:
+            logger.warning(f'Could not create resumable upload tracker directory. {e}')
+            self.open = False
+        self._loaded_parts = {}
+        self._load_parts_from_file()
+    def start_upload(self, upload_id, urls, is_atomic_upload=False):
+        if not self.open:
+            return
+        if self.upload_started:
+            raise GeoseeqGeneralError("Upload has already started.")
+        self.upload_started = True
+        blob = dict(upload_id=upload_id,
+                    urls=urls,
+                    is_atomic_upload=is_atomic_upload,
+                    upload_target_uuid=self.upload_target_uuid,
+                    start_time=time.time())
+        serialized = json.dumps(blob)
+        with open(self.tracker_file, "w") as f:
+            f.write(serialized + "\n")
+        self.upload_id, self.urls, self.is_atomic_upload = upload_id, urls, is_atomic_upload
+    def add_part(self, part_upload_info):
+        if not self.open:
+            return
+        part_id = part_upload_info["PartNumber"]
+        serialized = json.dumps(part_upload_info)
+        with open(self.tracker_file, "a") as f:
+            f.write(serialized + "\n")
+        self._loaded_parts[part_id] = part_upload_info
+        if len(self._loaded_parts) == len(self.urls):
+            self.cleanup()
+            self.open = False
+    def _load_parts_from_file(self):
+        if not isfile(self.tracker_file):
+            return
+        with open(self.tracker_file, "r") as f:
+            header_blob = json.loads(f.readline())
+            self.upload_id, self.urls, self.is_atomic_upload = (
+                header_blob["upload_id"], header_blob["urls"], header_blob["is_atomic_upload"]
+            )
+            start_time = header_blob["start_time"]
+            if (time.time() - start_time) > (60 * 60 * 23):
+                logger.warning(f"Tracker file {self.tracker_file} is too old. Deleting.")
+                os.remove(self.tracker_file)
+                return
+            self.upload_started = True
+            for line in f:
+                blob = json.loads(line)
+                part_id = blob["PartNumber"]
+                self._loaded_parts[part_id] = blob
+    def part_has_been_uploaded(self, part_number):
+        if not self.open:
+            return False
+        return part_number in self._loaded_parts
+    def get_part_info(self, part_number):
+        return self._loaded_parts[part_number]
+    def cleanup(self):
+        if not self.open:
+            return
+        try:
+            os.remove(self.tracker_file)
+        except FileNotFoundError:
+            pass

geoseeq/upload_download_manager.py CHANGED Viewed

@@ -22,7 +22,7 @@ def _upload_one_file(args):
     (result_file, filepath, session, progress_tracker,
      link_type, overwrite, log_level, parallel_uploads,
      use_cache, no_new_versions, threads_per_upload,
-     num_retries, ignore_errors, chunk_size_mb) = args
+     num_retries, ignore_errors, chunk_size_mb, use_atomic_upload) = args
     chunk_size = chunk_size_mb * 1024 * 1024 if chunk_size_mb else None
     if parallel_uploads:
         _make_in_process_logger(log_level)
@@ -34,6 +34,7 @@ def _upload_one_file(args):
                 session=session, overwrite=overwrite, progress_tracker=progress_tracker,
                 threads=threads_per_upload, use_cache=use_cache, chunk_size=chunk_size,
                 no_new_versions=no_new_versions, max_retries=num_retries,
+                use_atomic_upload=use_atomic_upload
             )
         else:
             result_file.link_file(link_type, filepath)
@@ -59,6 +60,7 @@ class GeoSeeqUploadManager:
                  num_retries=3,
                  ignore_errors=False,
                  chunk_size_mb=5,
+                 use_atomic_upload=True,
                  use_cache=True):
         self.session = session
         self.n_parallel_uploads = n_parallel_uploads
@@ -73,12 +75,18 @@ class GeoSeeqUploadManager:
         self.num_retries = num_retries
         self.ignore_errors = ignore_errors
         self.chunk_size_mb = chunk_size_mb
+        self.use_atomic_upload = use_atomic_upload
     def add_result_file(self, result_file, local_path):
         self._result_files.append((result_file, local_path))
     def add_local_file_to_result_folder(self, result_folder, local_path, geoseeq_file_name=None):
-        geoseeq_file_name = geoseeq_file_name if geoseeq_file_name else local_path
+        if not geoseeq_file_name:
+            if local_path.startswith("/"):  # if local path is an absolute path use the basename
+                geoseeq_file_name = basename(local_path)
+            else:
+                # remove "./" and "../" from local path to get a geoseeq file name
+                geoseeq_file_name = local_path.replace("./", "").replace("../", "")
         result_file = result_folder.result_file(geoseeq_file_name)
         self.add_result_file(result_file, local_path)
@@ -99,7 +107,7 @@ class GeoSeeqUploadManager:
                 self.link_type, self.overwrite, self.log_level,
                 self.n_parallel_uploads > 1, self.use_cache, self.no_new_versions,
                 self.threads_per_upload, self.num_retries, self.ignore_errors,
-                self.chunk_size_mb,
+                self.chunk_size_mb, self.use_atomic_upload
             ) for result_file, local_path in self._result_files
         ]
         out = []

{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geoseeq
-Version: 0.5.6a16
+Version: 0.6.0
 Summary: GeoSeeq command line tools and python API
 Author: David C. Danko
 Author-email: "David C. Danko" <dcdanko@biotia.io>

{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/RECORD RENAMED Viewed

@@ -11,7 +11,7 @@ geoseeq/project.py,sha256=-9Y2ik0-BpT3iqh89v8VQBbdadhI58oaUP9oZK8oetc,13741
 geoseeq/remote_object.py,sha256=Es-JlAz8iLRmCpAzh1MOwUh2MqtbuQM-p8wHIBAqNlQ,7131
 geoseeq/sample.py,sha256=whgEVk6GnDJJLjn5uTOqFqRtVxZD3BgjTo7brAC5noU,7981
 geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
-geoseeq/upload_download_manager.py,sha256=_I_wJHKoeumMhX6cq_WDna9Il6MYatSfSSZ9DUwTW-E,8275
+geoseeq/upload_download_manager.py,sha256=DNI4nce0MCds-wGoTA3fP_msz3kGOAoJNItoUv7L0uQ,8751
 geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
 geoseeq/utils.py,sha256=PDRiEQIZYTcfEV9AYvloQVvfqs5JaebcFZodAa2SUW8,3577
 geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
@@ -19,12 +19,14 @@ geoseeq/cli/__init__.py,sha256=4WnK87K5seRK3SGJAxNWnQTqyg5uBhdhrOrzB1D4b3M,24
 geoseeq/cli/constants.py,sha256=Do5AUf9lMO9_P8KpFJ3XwwFBAWsxSjZ6sx9_QEGyC_c,176
 geoseeq/cli/copy.py,sha256=02U9kdrAIbbM8MlRMLL6p-LMYFSuRObE3h5jyvcL__M,2275
 geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
-geoseeq/cli/download.py,sha256=_upzZo08K0fAPbEsyi1uN0HGNUaY1pl6OoGPcWmvSUY,17765
+geoseeq/cli/download.py,sha256=N_Wrg9d1kY9eJ6C1l0xc_YFjiri8gkXBo9JiuHx9xxE,17766
 geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
 geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
-geoseeq/cli/main.py,sha256=KL4TcaypRtrSoP0Om62T5cVZ7uOzjR6hcr2WqUm4qSU,3723
+geoseeq/cli/main.py,sha256=Vze6p8cNGsMQmsr5bkhglOxWPIPqxk0BM6417iKvhb4,3791
 geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
 geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
+geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
+geoseeq/cli/raw.py,sha256=EASkIBr3AhBg6FOiLElw8Nuj_okQqf9vBXLdow7JQGw,1884
 geoseeq/cli/run.py,sha256=bx2AV6VIqOSTlxUda78xl0XxcZ8TXlQx02-e7iLQPwI,3838
 geoseeq/cli/search.py,sha256=wgyprEf_Tm5i_rYl9KTxrmFrD4ohy7qS5ttjg7V3xRY,1015
 geoseeq/cli/user.py,sha256=fARv_5vx-QYT765Hsjgwr6J5ddA_viCPQdv9iUoVX6w,925
@@ -37,9 +39,9 @@ geoseeq/cli/shared_params/id_handlers.py,sha256=501K9sCVkI0YGDQ62vXk_DM5lMMDrdB5
 geoseeq/cli/shared_params/obj_getters.py,sha256=ZSkt6LnDkVFlNVYKgLrjzg60-6BthZMr3eeD3HNqzac,2741
 geoseeq/cli/shared_params/opts_and_args.py,sha256=LrDkv9WtUryM4uUMXPRk04-EBcTQ7q5V6Yu-XRDUvvA,2083
 geoseeq/cli/upload/__init__.py,sha256=3C9_S9t7chmYU-2ot89NV03x-EtmsjibulErKaU9w1k,627
-geoseeq/cli/upload/upload.py,sha256=e0tI68OxVLgS6sauUS-m-AJgza13n0XgMhxd86fq0Gw,10017
+geoseeq/cli/upload/upload.py,sha256=JZkhe1q3KOp7-tKyzwi860TQhZoNDnZs4yB2PJhOjl0,10081
 geoseeq/cli/upload/upload_advanced.py,sha256=Jq5eGe-wOdrzxGWVwaFPg0BAJcW0YSx_eHEmYjJeKuA,3434
-geoseeq/cli/upload/upload_reads.py,sha256=EMGqyZf11xwN4v2j8gNxMagTbE4kaOd-_hwupmg5I-8,10670
+geoseeq/cli/upload/upload_reads.py,sha256=dvmOVq0lJSbpQDyWkNEnZmhkMvfEByV-i8xD75Ai4dA,10706
 geoseeq/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 geoseeq/contrib/ncbi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 geoseeq/contrib/ncbi/api.py,sha256=WQeLoGA_-Zha-QeSO8_i7HpvXyD8UkV0qc5okm11KiA,1056
@@ -63,10 +65,12 @@ geoseeq/plotting/map/map.py,sha256=h2QPLGqe-SamhfaTij53S9cQIiO8orCJUAUh0hRicSM,3
 geoseeq/plotting/map/overlay.py,sha256=4VmxqOESTQra9tPr8b8OLEUhJSit9lNipabeSznEYwE,1795
 geoseeq/result/__init__.py,sha256=IFHIyRV8ZzuKIfwfze1SXgcKwNMcSgMAknLHMkwjXIU,356
 geoseeq/result/bioinfo.py,sha256=QQtbyogrdro9avJSN0713sxLVnVeA24mFw3hWtKDKyw,1782
-geoseeq/result/file_download.py,sha256=vbYo2B4JshTIqLaklcgcBb7NY9cD5pMkas95GuQxW8s,5776
-geoseeq/result/file_upload.py,sha256=3RhFEdCFP64A3PY1AwkWz5Qp9at78GlNhzP-OWtHsq8,13721
+geoseeq/result/file_chunker.py,sha256=bXq1csuRtqMB5sbH-AfWo6gdPwrivv5DJPuHVj-h08w,1758
+geoseeq/result/file_download.py,sha256=gV9-C_CMPpOWYi21eagsoiri53yzRHQx351nLBUj4WM,5790
+geoseeq/result/file_upload.py,sha256=xs1DrI-h4ZP7xN8HPBc3SFpcPAxR5HAolraP1Zu7tvE,10648
 geoseeq/result/result_file.py,sha256=1Yj9fkZhds3J-tay6eNH2-EHi00MovHGV1M80_ckHD8,8677
 geoseeq/result/result_folder.py,sha256=6porOXPh7Tpxw3oX5yMRPYQzNCGYqszqmFJd3SwQmTc,11122
+geoseeq/result/resumable_upload_tracker.py,sha256=2aI09gYz2yw63jEXqs8lmCRKQ79TIc3YuPETvP0Jeek,3811
 geoseeq/result/utils.py,sha256=C-CxGzB3WddlnRiqFSkrY78I_m0yFgNqsTBRzGU-y8Q,2772
 geoseeq/vc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 geoseeq/vc/checksum.py,sha256=y8rh1asUZNbE_NLiFO0-9hImLNiTOc2YXQBRKORWK7k,710
@@ -80,9 +84,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
 tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
 tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
-geoseeq-0.5.6a16.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
-geoseeq-0.5.6a16.dist-info/METADATA,sha256=04xNdeQsTinp3ka0HCZVdy4-4uiCpAx2HA-vtlzBSSA,4806
-geoseeq-0.5.6a16.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-geoseeq-0.5.6a16.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
-geoseeq-0.5.6a16.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
-geoseeq-0.5.6a16.dist-info/RECORD,,
+geoseeq-0.6.0.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
+geoseeq-0.6.0.dist-info/METADATA,sha256=mDqowxeSFM0nNuY_354pumCtTUpztbhhRe1Dv2rqn5g,4803
+geoseeq-0.6.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+geoseeq-0.6.0.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
+geoseeq-0.6.0.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
+geoseeq-0.6.0.dist-info/RECORD,,

{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{geoseeq-0.5.6a16.dist-info → geoseeq-0.6.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

geoseeq 0.5.6a16__py3-none-any.whl → 0.6.0__py3-none-any.whl

geoseeq 0.5.6a16py3-none-any.whl → 0.6.0py3-none-any.whl