PyPI - rda-python-dsquasar - Versions diffs - 2.0.8__tar.gz → 2.0.10__tar.gz - Mend

rda-python-dsquasar 2.0.8tar.gz → 2.0.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{rda_python_dsquasar-2.0.8 → rda_python_dsquasar-2.0.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rda_python_dsquasar
-Version: 2.0.8
+Version: 2.0.10
 Summary: RDA Python package to backup and recover RDA data archives to and from GLOBUS Quasar backup server
 Author-email: Zaihua Ji <zji@ucar.edu>
 Project-URL: Homepage, https://github.com/NCAR/rda-python-dsquasar

{rda_python_dsquasar-2.0.8 → rda_python_dsquasar-2.0.10}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "rda_python_dsquasar"
-version = "2.0.8"
+version = "2.0.10"
 authors = [
   { name="Zaihua Ji", email="zji@ucar.edu" },
 ]

{rda_python_dsquasar-2.0.8 → rda_python_dsquasar-2.0.10}/src/rda_python_dsquasar/taccrec.py RENAMED Viewed

@@ -144,14 +144,14 @@ def get_uid_from_logname(db_params):
 def main():
     parser = argparse.ArgumentParser(description='Insert tar file summary into tfile table.')
-    parser.add_argument('--member-list', help='Path to tar member list file (from tar -tvf)')
-    parser.add_argument('--member-list-file', help='File containing list of tar member list files (one per line)')
-    parser.add_argument('--db-host', default='rda-db.ucar.edu', help='Database host (default: rda-db.ucar.edu)')
-    parser.add_argument('--db-port', default=5432, type=int, help='Database port (default: 5432)')
-    parser.add_argument('--db-name', default='rdadb', help='Database name (default: rdadb)')
-    parser.add_argument('--db-user', default='dssdb', help='Database user (default: dssdb)')
-    parser.add_argument('--db-password', help='Database password (optional, use .pgpass if omitted)')
-    parser.add_argument('--no-update', action='store_true', help='If tfile exists, skip all updates including wfile tables (default: False)')
+    parser.add_argument('-ml', '--member-list', help='Path to tar member list file (from tar -tvf)')
+    parser.add_argument('-mf', '--member-list-file', help='File containing list of tar member list files (one per line)')
+    parser.add_argument('-ht', '--db-host', default='rda-db.ucar.edu', help='Database host (default: rda-db.ucar.edu)')
+    parser.add_argument('-pt', '--db-port', default=5432, type=int, help='Database port (default: 5432)')
+    parser.add_argument('-db', '--db-name', default='rdadb', help='Database name (default: rdadb)')
+    parser.add_argument('-us', '--db-user', default='dssdb', help='Database user (default: dssdb)')
+    parser.add_argument('-pw', '--db-password', help='Database password (optional, use .pgpass if omitted)')
+    parser.add_argument('-nu', '--no-update', action='store_true', help='If tfile exists, skip all updates including wfile tables (default: False)')
     args = parser.parse_args()
     if args.member_list_file:
         with open(args.member_list_file, 'r') as f:

{rda_python_dsquasar-2.0.8 → rda_python_dsquasar-2.0.10}/src/rda_python_dsquasar/tacctar.py RENAMED Viewed

@@ -69,14 +69,44 @@ def group_files_by_size(files, min_size, max_size):
             batches.append(current_batch)
     return batches
-def tar_batches(dirpath, batches, output_dir, dataset_path=None, dataset_name=None, tar_batch=True):
+def write_mbr_and_archive_tar(tar_file, mbr_file, archive_dir=None):
     """
-    If tar_batch is True, create tar files for each batch. Otherwise, dump file list to .batch files.
+    Write tar member list file in tar -tvf format and move tar file to archive_dir if provided.
     """
+    import pwd, grp, time, shutil
+    # Write member list file
+    with tarfile.open(tar_file, "r") as tar:
+        with open(mbr_file, "w") as mf:
+            for member in tar.getmembers():
+                mode = oct(member.mode)[-4:]
+                typechar = '-' if member.isfile() else 'd' if member.isdir() else 'l' if member.issym() else '?'
+                uname = member.uname or (pwd.getpwuid(member.uid).pw_name if hasattr(member, 'uid') else '')
+                gname = member.gname or (grp.getgrgid(member.gid).gr_name if hasattr(member, 'gid') else '')
+                size = member.size
+                mtime = time.strftime("%Y-%m-%d %H:%M", time.localtime(member.mtime))
+                mf.write(f"{typechar}{mode} {uname}/{gname} {size:9d} {mtime} {member.name}\n")
+    # Move tar file to archive_dir if specified
+    if archive_dir:
+        archive_dir_abs = os.path.abspath(archive_dir)
+        os.makedirs(archive_dir_abs, exist_ok=True)
+        dest_tar_file = os.path.join(archive_dir_abs, os.path.basename(tar_file))
+        try:
+            shutil.move(tar_file, dest_tar_file)
+            logging.info(f"Moved tar file {tar_file} to archive directory {dest_tar_file}.")
+        except Exception as e:
+            logging.error(f"Failed to move tar file {tar_file} to archive directory {archive_dir_abs}: {e}")
+def tar_batches(dirpath, batches, output_dir, dataset_path=None, dataset_name=None, tar_batch=True, archive_dir=None):
+    """
+    If tar_batch is True, create tar files for each batch and move to archive_dir if provided. Otherwise, dump file list to .batch files.
+    Always create a .mbr file for each tar file.
+    """
+    import shutil
     for idx, batch in enumerate(batches, 1):
         num_files = len(batch)
         tar_name = os.path.join(output_dir, f"{dataset_name}_part{idx}_{num_files}files.tar")
         batch_name = tar_name.replace(".tar", ".batch")
+        mbr_file = tar_name + '.mbr'
         if tar_batch:
             logging.info(f"Creating tar: {tar_name} with {num_files} files.")
             with tarfile.open(tar_name, "w") as tar:
@@ -87,6 +117,7 @@ def tar_batches(dirpath, batches, output_dir, dataset_path=None, dataset_name=No
                         tar.add(f, arcname=arcname)
                     except Exception as e:
                         logging.warning(f"Failed to add {f} to tar: {e}")
+            write_mbr_and_archive_tar(tar_name, mbr_file, archive_dir)
         else:
             logging.info(f"Writing batch file list: {batch_name} with {num_files} files.")
             with open(batch_name, "w") as bf:
@@ -95,7 +126,7 @@ def tar_batches(dirpath, batches, output_dir, dataset_path=None, dataset_name=No
                     arcname = os.path.join(dataset_name, arcname)
                     bf.write(arcname + "\n")
-def process_directory_tree(dataset_path, output_dir, db_params=None, tar_batch=True, dsid=None):
+def process_directory_tree(dataset_path, output_dir, db_params=None, tar_batch=True, dsid=None, archive_dir=None):
     dataset_name = dsid if dsid else os.path.basename(os.path.abspath(dataset_path))
     wfile_table = f"dssdb.wfile_{dataset_name}"
     all_files = []
@@ -130,7 +161,7 @@ def process_directory_tree(dataset_path, output_dir, db_params=None, tar_batch=T
     if not all_files:
         return
     batches = group_files_by_size(all_files, ONE_TB, THREE_TB)
-    tar_batches(dataset_path, batches, output_dir, dataset_path=dataset_path, dataset_name=dataset_name, tar_batch=tar_batch)
+    tar_batches(dataset_path, batches, output_dir, dataset_path=dataset_path, dataset_name=dataset_name, tar_batch=tar_batch, archive_dir=archive_dir)
 def read_directories_from_file(input_file, tar_root=None):
     dataset_ids = []
@@ -186,7 +217,8 @@ def collect_all_files(dataset_dirs, db_params=None):
 def find_common_root(paths):
     return os.path.commonpath(paths) if paths else ''
-def tar_batches_across_dirs(files, batches, output_dir, tar_root, dataset_paths, tar_batch=True, dataset_ids=None):
+def tar_batches_across_dirs(files, batches, output_dir, tar_root, dataset_paths, tar_batch=True, dataset_ids=None, archive_dir=None):
+    import shutil
     dataset_dir_paths = [Path(d).resolve() for d in dataset_paths]
     dsid_map = {str(Path(d).resolve()): dsid for dsid, d in zip(dataset_ids, dataset_paths)} if dataset_ids else {}
     for idx, batch in enumerate(batches, 1):
@@ -208,6 +240,7 @@ def tar_batches_across_dirs(files, batches, output_dir, tar_root, dataset_paths,
             prefix = "_".join(sorted(batch_dataset_names)) if batch_dataset_names else "batch"
         tar_name = os.path.join(output_dir, f"{prefix}_part{idx}_{num_files}files.tar")
         batch_name = tar_name.replace(".tar", ".batch")
+        mbr_file = tar_name + '.mbr'
         if tar_batch:
             logging.info(f"Creating tar: {tar_name} with {num_files} files.")
             with tarfile.open(tar_name, "w") as tar:
@@ -217,6 +250,7 @@ def tar_batches_across_dirs(files, batches, output_dir, tar_root, dataset_paths,
                         tar.add(f, arcname=arcname)
                     except Exception as e:
                         logging.warning(f"Failed to add {f} to tar: {e}")
+            write_mbr_and_archive_tar(tar_name, mbr_file, archive_dir)
         else:
             logging.info(f"Writing batch file list: {batch_name} with {num_files} files.")
             with open(batch_name, "w") as bf:
@@ -227,14 +261,7 @@ def tar_batches_across_dirs(files, batches, output_dir, tar_root, dataset_paths,
 def get_batch_size(batch):
     return sum(get_file_size(f) for f in batch)
-def tar_batch_file(batch_file, tar_root=None):
-    """
-    Read a filelist in batch_file and tar the filelist into a tar file named like batch file name by replacing .batch with .tar.
-    Prepend path tar_root to the file names in each batch file for the full path.
-    After tarring, dump a member file detail list (like 'tar -tvf') into a .mbr file named as tarfilename+'.mbr'.
-    If the .mbr file already exists, skip the tar action.
-    """
-    import pwd, grp, time
+def tar_batch_file(batch_file, tar_root=None, archive_dir=None):
     tar_file = batch_file.replace('.batch', '.tar')
     mbr_file = tar_file + '.mbr'
     if os.path.exists(mbr_file):
@@ -250,30 +277,25 @@ def tar_batch_file(batch_file, tar_root=None):
                 tar.add(abs_path, arcname=rel_path)
             except Exception as e:
                 logging.warning(f"Failed to add {abs_path} as {rel_path} to tar: {e}")
-    # Write member list file in tar -tvf format
-    with tarfile.open(tar_file, "r") as tar:
-        with open(mbr_file, "w") as mf:
-            for member in tar.getmembers():
-                mode = oct(member.mode)[-4:]
-                typechar = '-' if member.isfile() else 'd' if member.isdir() else 'l' if member.issym() else '?'
-                uname = member.uname or (pwd.getpwuid(member.uid).pw_name if hasattr(member, 'uid') else '')
-                gname = member.gname or (grp.getgrgid(member.gid).gr_name if hasattr(member, 'gid') else '')
-                size = member.size
-                mtime = time.strftime("%Y-%m-%d %H:%M", time.localtime(member.mtime))
-                # Format: -rw-r--r-- user/group size date name
-                mf.write(f"{typechar}{mode} {uname}/{gname} {size:9d} {mtime} {member.name}\n")
+    write_mbr_and_archive_tar(tar_file, mbr_file, archive_dir)
 def main():
     import argparse
     parser = argparse.ArgumentParser(description="Tar files from a list of dataset IDs into 1-3TB tar files.")
-    parser.add_argument('--batch-input-file', type=str, default=None, help='A file containing a list of batch file names, one per line. Each batch file should contain relative file names to be tarred.')
-    parser.add_argument('--batch-files', nargs='*', default=None, help='List of batch files to tar. Each batch file should contain relative file names to be tarred.')
-    parser.add_argument('--tar-root', type=str, required=True, help='Root directory for relative tar member file names (arcname). REQUIRED for all modes.')
-    parser.add_argument("--input-file", help="File containing list of dataset IDs to process (one per line)")
-    parser.add_argument("--output-dir", help="Directory to store tar files (default: current directory)")
-    parser.add_argument('--check-tarred', action='store_true', default=False, help='Skip files already tarred (tid > 0 in wfile_<dataset_name>)')
-    parser.add_argument('--tar-batch', action='store_true', default=False, help='Tar files for each batch. If not set, dump file list to .batch files instead.')
-    parser.add_argument("dataset_ids", nargs='*', help="Dataset IDs to process")
+    parser.add_argument('-bi', '--batch-input-file', type=str, default=None, help='A file containing a list of batch file names, one per line. Each batch file should contain relative file names to be tarred.')
+    parser.add_argument('-bf', '--batch-files', nargs='*', default=None, help='List of batch files to tar. Each batch file should contain relative file names to be tarred.')
+    parser.add_argument('-tr', '--tar-root', type=str, required=True, help='Root directory for relative tar member file names (arcname). REQUIRED for all modes.')
+    parser.add_argument('-if', '--input-file', help='File containing list of dataset IDs to process (one per line)')
+    parser.add_argument('-od', '--output-dir', help='Directory to store tar files (default: current directory)')
+    parser.add_argument('-ct', '--check-tarred', action='store_true', default=False, help='Skip files already tarred (tid > 0 in wfile_<dataset_name>)')
+    parser.add_argument('-tb', '--tar-batch', action='store_true', default=False, help='Tar files for each batch. If not set, dump file list to .batch files instead.')
+    parser.add_argument('-ds', '--dataset-ids', nargs='*', help='Dataset IDs to process')
+    parser.add_argument('-ht', '--db-host', type=str, default='rda-db.ucar.edu', help='Database host for tarred check')
+    parser.add_argument('-pt', '--db-port', type=int, default=5432, help='Database port for tarred check')
+    parser.add_argument('-db', '--db-name', type=str, default='rdadb', help='Database name for tarred check')
+    parser.add_argument('-us', '--db-user', type=str, default='dssdb', help='Database user for tarred check')
+    parser.add_argument('-pw', '--db-password', type=str, default=None, help='Database password for tarred check')
+    parser.add_argument('-ad', '--archive-dir', type=str, default=None, help='Directory to move tar files to after creation (optional)')
     args = parser.parse_args()
     output_dir = args.output_dir if args.output_dir else os.getcwd()
     os.makedirs(output_dir, exist_ok=True)
@@ -281,11 +303,11 @@ def main():
     db_params = None
     if args.check_tarred:
         db_params = {
-            'host': args.db_host if hasattr(args, 'db_host') else 'rda-db.ucar.edu',
-            'port': args.db_port if hasattr(args, 'db_port') else 5432,
-            'dbname': args.db_name if hasattr(args, 'db_name') else 'rdadb',
-            'user': args.db_user if hasattr(args, 'db_user') else 'dssdb',
-            'password': args.db_password if hasattr(args, 'db_password') else None
+            'host': args.db_host,
+            'port': args.db_port,
+            'dbname': args.db_name,
+            'user': args.db_user,
+            'password': args.db_password
         }
     # Batch tar mode
     if args.batch_input_file or args.batch_files:
@@ -296,7 +318,7 @@ def main():
         if args.batch_files:
             batch_files.extend(args.batch_files)
         for batch_file in batch_files:
-            tar_batch_file(batch_file, tar_root=args.tar_root)
+            tar_batch_file(batch_file, tar_root=args.tar_root, archive_dir=args.archive_dir)
         return
     # Directory tree processing mode
     if args.input_file:
@@ -310,7 +332,7 @@ def main():
             logging.info(f"Last batch size ({get_batch_size(batches[-1])} bytes) < 1TB, appending to previous batch.")
             batches[-2].extend(batches[-1])
             batches.pop()
-        tar_batches_across_dirs(files, batches, output_dir, args.tar_root, dataset_paths, tar_batch=args.tar_batch, dataset_ids=None)
+        tar_batches_across_dirs(files, batches, output_dir, args.tar_root, dataset_paths, tar_batch=args.tar_batch, dataset_ids=None, archive_dir=args.archive_dir)
         return
     elif args.dataset_ids:
         for dsid in args.dataset_ids:
@@ -318,7 +340,7 @@ def main():
             if not os.path.isdir(dataset_path):
                 logging.warning(f"Dataset directory does not exist: {dataset_path}")
                 continue
-            process_directory_tree(dataset_path, output_dir, db_params=db_params, tar_batch=args.tar_batch, dsid=dsid)
+            process_directory_tree(dataset_path, output_dir, db_params=db_params, tar_batch=args.tar_batch, dsid=dsid, archive_dir=args.archive_dir)
         return
     else:
         print("Error: Must provide either --input-file or dataset_ids or --batch-files or --batch-input-file.")

{rda_python_dsquasar-2.0.8 → rda_python_dsquasar-2.0.10}/src/rda_python_dsquasar.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rda_python_dsquasar
-Version: 2.0.8
+Version: 2.0.10
 Summary: RDA Python package to backup and recover RDA data archives to and from GLOBUS Quasar backup server
 Author-email: Zaihua Ji <zji@ucar.edu>
 Project-URL: Homepage, https://github.com/NCAR/rda-python-dsquasar