rda-python-dsquasar 2.0.5__tar.gz → 2.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/PKG-INFO +1 -1
  2. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/pyproject.toml +1 -1
  3. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/taccrec.py +57 -6
  4. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/tacctar.py +19 -11
  5. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/PKG-INFO +1 -1
  6. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/LICENSE +0 -0
  7. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/README.md +0 -0
  8. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/setup.cfg +0 -0
  9. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/__init__.py +0 -0
  10. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/ds_quasar.py +0 -0
  11. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/dsquasar.py +0 -0
  12. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/dstacc.py +0 -0
  13. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/SOURCES.txt +0 -0
  14. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/dependency_links.txt +0 -0
  15. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/entry_points.txt +0 -0
  16. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/requires.txt +0 -0
  17. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/top_level.txt +0 -0
  18. {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/test/test_dsquasar.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_dsquasar
3
- Version: 2.0.5
3
+ Version: 2.0.6
4
4
  Summary: RDA Python package to backup and recover RDA data archives to and from GLOBUS Quasar backup server
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-dsquasar
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "rda_python_dsquasar"
7
- version = "2.0.5"
7
+ version = "2.0.6"
8
8
  authors = [
9
9
  { name="Zaihua Ji", email="zji@ucar.edu" },
10
10
  ]
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env python3
1
2
  import os
2
3
  import tarfile
3
4
  import argparse
@@ -93,12 +94,12 @@ def insert_tfile_row(summary, db_params, extra, update_on_conflict=False, member
93
94
  try:
94
95
  cur.execute(sql, values)
95
96
  conn.commit()
96
- # Retrieve tidx for the just-inserted tfile row
97
- cur.execute(f"SELECT tidx FROM {table_name} WHERE tfile=%s", (summary['tfile'],))
97
+ # Retrieve tid for the just-inserted tfile row
98
+ cur.execute(f"SELECT tid FROM {table_name} WHERE tfile=%s", (summary['tfile'],))
98
99
  row = cur.fetchone()
99
- tidx = row[0] if row else None
100
+ tid = row[0] if row else None
100
101
  # Update wfile tables if member_details provided
101
- if member_details and tidx is not None:
102
+ if member_details and tid is not None:
102
103
  for m in member_details:
103
104
  name = m['name']
104
105
  if '/' in name:
@@ -111,8 +112,8 @@ def insert_tfile_row(summary, db_params, extra, update_on_conflict=False, member
111
112
  exists = cur.fetchone()[0]
112
113
  if not exists:
113
114
  continue
114
- # Update tidx if record exists
115
- cur.execute(f"UPDATE {wfile_table} SET tidx=%s WHERE wfile=%s", (tidx, wfile))
115
+ # Update tid if record exists
116
+ cur.execute(f"UPDATE {wfile_table} SET tid=%s WHERE wfile=%s", (tid, wfile))
116
117
  conn.commit()
117
118
  except Exception as e:
118
119
  print(f"Database error: {e}")
@@ -144,6 +145,7 @@ def get_uid_from_logname(db_params):
144
145
  def main():
145
146
  parser = argparse.ArgumentParser(description='Insert tar file summary into tfile table.')
146
147
  parser.add_argument('--member-list', help='Path to tar member list file (from tar -tvf)')
148
+ parser.add_argument('--member-list-file', help='File containing list of tar member list files (one per line)')
147
149
  parser.add_argument('--db-host', default='rda-db.ucar.edu', help='Database host (default: rda-db.ucar.edu)')
148
150
  parser.add_argument('--db-port', default=5432, type=int, help='Database port (default: 5432)')
149
151
  parser.add_argument('--db-name', default='rdadb', help='Database name (default: rdadb)')
@@ -151,6 +153,55 @@ def main():
151
153
  parser.add_argument('--db-password', help='Database password (optional, use .pgpass if omitted)')
152
154
  parser.add_argument('--no-update', action='store_true', help='If tfile exists, skip all updates including wfile tables (default: False)')
153
155
  args = parser.parse_args()
156
+ if args.member_list_file:
157
+ with open(args.member_list_file, 'r') as f:
158
+ for line in f:
159
+ member_list_path = line.strip()
160
+ if not member_list_path or not os.path.isfile(member_list_path):
161
+ print(f"Error: member list file '{member_list_path}' is invalid or does not exist.")
162
+ continue
163
+ summary, member_details = get_tar_summary_and_details(member_list_path)
164
+ if summary is None:
165
+ continue
166
+ db_params = {
167
+ 'host': args.db_host,
168
+ 'port': args.db_port,
169
+ 'dbname': args.db_name,
170
+ 'user': args.db_user
171
+ }
172
+ if args.db_password:
173
+ db_params['password'] = args.db_password
174
+ # Check if tfile exists if --no-update is set
175
+ if args.no_update:
176
+ try:
177
+ conn = psycopg2.connect(**db_params)
178
+ cur = conn.cursor()
179
+ cur.execute("SELECT 1 FROM dssdb.tfile WHERE tfile=%s LIMIT 1", (summary['tfile'],))
180
+ exists = cur.fetchone()
181
+ cur.close()
182
+ conn.close()
183
+ if exists:
184
+ print(f"tfile '{summary['tfile']}' already exists in dssdb.tfile. Skipping all updates.")
185
+ continue
186
+ except Exception as e:
187
+ print(f"Database error during tfile existence check: {e}")
188
+ continue
189
+ try:
190
+ uid = get_uid_from_logname(db_params)
191
+ except Exception as e:
192
+ print(f"Error getting uid from dssgrp: {e}")
193
+ continue
194
+ extra = {
195
+ 'uid': uid,
196
+ 'dsid': summary['dsid'],
197
+ 'data_format': '',
198
+ 'disp_order': 0,
199
+ 'dsids': summary['dsids'],
200
+ 'note': summary['note']
201
+ }
202
+ insert_tfile_row(summary, db_params, extra, update_on_conflict=True, member_details=member_details if not args.no_update else None)
203
+ print(f"Inserted tar summary for {summary['tfile']} into tfile.")
204
+ return
154
205
  if not args.member_list or not os.path.isfile(args.member_list):
155
206
  print('Error: --member-list argument is required and must point to a valid file.')
156
207
  return
@@ -68,25 +68,33 @@ def group_files_by_size(files, min_size, max_size):
68
68
  batches.append(current_batch)
69
69
  return batches
70
70
 
71
- def tar_batches(dirpath, batches, output_dir):
72
- src_dir_name = Path(dirpath).name
71
+ def tar_batches(dirpath, batches, output_dir, root_path=None, root_dirname=None):
72
+ # Use root_dirname for tar file naming and arcname
73
73
  for idx, batch in enumerate(batches, 1):
74
- tar_name = os.path.join(output_dir, f"{src_dir_name}_part{idx}.tar")
75
- logging.info(f"Creating tar: {tar_name} with {len(batch)} files.")
74
+ num_files = len(batch)
75
+ tar_name = os.path.join(output_dir, f"{root_dirname}_part{idx}_{num_files}files.tar")
76
+ logging.info(f"Creating tar: {tar_name} with {num_files} files.")
76
77
  with tarfile.open(tar_name, "w") as tar:
77
78
  for f in batch:
78
79
  try:
79
- tar.add(f, arcname=os.path.relpath(f, dirpath))
80
+ # arcname should be relative to root_path, and always start with root_dirname
81
+ arcname = os.path.relpath(f, root_path)
82
+ arcname = os.path.join(root_dirname, arcname)
83
+ tar.add(f, arcname=arcname)
80
84
  except Exception as e:
81
85
  logging.warning(f"Failed to add {f} to tar: {e}")
82
86
 
83
- def process_directory_tree(root_dir, output_dir):
84
- for dirpath, dirnames, filenames in os.walk(root_dir):
87
+ def process_directory_tree(root_path, output_dir):
88
+ root_dirname = os.path.basename(os.path.abspath(root_path))
89
+ # Gather all files under root_path first
90
+ all_files = []
91
+ for dirpath, dirnames, filenames in os.walk(root_path):
85
92
  abs_files = [os.path.join(dirpath, f) for f in filenames]
86
- if not abs_files:
87
- continue
88
- batches = group_files_by_size(abs_files, ONE_TB, THREE_TB)
89
- tar_batches(dirpath, batches, output_dir)
93
+ all_files.extend(abs_files)
94
+ if not all_files:
95
+ return
96
+ batches = group_files_by_size(all_files, ONE_TB, THREE_TB)
97
+ tar_batches(root_path, batches, output_dir, root_path=root_path, root_dirname=root_dirname)
90
98
 
91
99
  def read_directories_from_file(input_file):
92
100
  dirs = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rda_python_dsquasar
3
- Version: 2.0.5
3
+ Version: 2.0.6
4
4
  Summary: RDA Python package to backup and recover RDA data archives to and from GLOBUS Quasar backup server
5
5
  Author-email: Zaihua Ji <zji@ucar.edu>
6
6
  Project-URL: Homepage, https://github.com/NCAR/rda-python-dsquasar