rda-python-dsquasar 2.0.5__tar.gz → 2.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/PKG-INFO +1 -1
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/pyproject.toml +1 -1
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/taccrec.py +57 -6
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/tacctar.py +19 -11
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/PKG-INFO +1 -1
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/LICENSE +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/README.md +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/setup.cfg +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/__init__.py +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/ds_quasar.py +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/dsquasar.py +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/dstacc.py +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/SOURCES.txt +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/dependency_links.txt +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/entry_points.txt +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/requires.txt +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/top_level.txt +0 -0
- {rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/test/test_dsquasar.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rda_python_dsquasar
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.6
|
|
4
4
|
Summary: RDA Python package to backup and recover RDA data archives to and from GLOBUS Quasar backup server
|
|
5
5
|
Author-email: Zaihua Ji <zji@ucar.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/NCAR/rda-python-dsquasar
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
1
2
|
import os
|
|
2
3
|
import tarfile
|
|
3
4
|
import argparse
|
|
@@ -93,12 +94,12 @@ def insert_tfile_row(summary, db_params, extra, update_on_conflict=False, member
|
|
|
93
94
|
try:
|
|
94
95
|
cur.execute(sql, values)
|
|
95
96
|
conn.commit()
|
|
96
|
-
# Retrieve
|
|
97
|
-
cur.execute(f"SELECT
|
|
97
|
+
# Retrieve tid for the just-inserted tfile row
|
|
98
|
+
cur.execute(f"SELECT tid FROM {table_name} WHERE tfile=%s", (summary['tfile'],))
|
|
98
99
|
row = cur.fetchone()
|
|
99
|
-
|
|
100
|
+
tid = row[0] if row else None
|
|
100
101
|
# Update wfile tables if member_details provided
|
|
101
|
-
if member_details and
|
|
102
|
+
if member_details and tid is not None:
|
|
102
103
|
for m in member_details:
|
|
103
104
|
name = m['name']
|
|
104
105
|
if '/' in name:
|
|
@@ -111,8 +112,8 @@ def insert_tfile_row(summary, db_params, extra, update_on_conflict=False, member
|
|
|
111
112
|
exists = cur.fetchone()[0]
|
|
112
113
|
if not exists:
|
|
113
114
|
continue
|
|
114
|
-
# Update
|
|
115
|
-
cur.execute(f"UPDATE {wfile_table} SET
|
|
115
|
+
# Update tid if record exists
|
|
116
|
+
cur.execute(f"UPDATE {wfile_table} SET tid=%s WHERE wfile=%s", (tid, wfile))
|
|
116
117
|
conn.commit()
|
|
117
118
|
except Exception as e:
|
|
118
119
|
print(f"Database error: {e}")
|
|
@@ -144,6 +145,7 @@ def get_uid_from_logname(db_params):
|
|
|
144
145
|
def main():
|
|
145
146
|
parser = argparse.ArgumentParser(description='Insert tar file summary into tfile table.')
|
|
146
147
|
parser.add_argument('--member-list', help='Path to tar member list file (from tar -tvf)')
|
|
148
|
+
parser.add_argument('--member-list-file', help='File containing list of tar member list files (one per line)')
|
|
147
149
|
parser.add_argument('--db-host', default='rda-db.ucar.edu', help='Database host (default: rda-db.ucar.edu)')
|
|
148
150
|
parser.add_argument('--db-port', default=5432, type=int, help='Database port (default: 5432)')
|
|
149
151
|
parser.add_argument('--db-name', default='rdadb', help='Database name (default: rdadb)')
|
|
@@ -151,6 +153,55 @@ def main():
|
|
|
151
153
|
parser.add_argument('--db-password', help='Database password (optional, use .pgpass if omitted)')
|
|
152
154
|
parser.add_argument('--no-update', action='store_true', help='If tfile exists, skip all updates including wfile tables (default: False)')
|
|
153
155
|
args = parser.parse_args()
|
|
156
|
+
if args.member_list_file:
|
|
157
|
+
with open(args.member_list_file, 'r') as f:
|
|
158
|
+
for line in f:
|
|
159
|
+
member_list_path = line.strip()
|
|
160
|
+
if not member_list_path or not os.path.isfile(member_list_path):
|
|
161
|
+
print(f"Error: member list file '{member_list_path}' is invalid or does not exist.")
|
|
162
|
+
continue
|
|
163
|
+
summary, member_details = get_tar_summary_and_details(member_list_path)
|
|
164
|
+
if summary is None:
|
|
165
|
+
continue
|
|
166
|
+
db_params = {
|
|
167
|
+
'host': args.db_host,
|
|
168
|
+
'port': args.db_port,
|
|
169
|
+
'dbname': args.db_name,
|
|
170
|
+
'user': args.db_user
|
|
171
|
+
}
|
|
172
|
+
if args.db_password:
|
|
173
|
+
db_params['password'] = args.db_password
|
|
174
|
+
# Check if tfile exists if --no-update is set
|
|
175
|
+
if args.no_update:
|
|
176
|
+
try:
|
|
177
|
+
conn = psycopg2.connect(**db_params)
|
|
178
|
+
cur = conn.cursor()
|
|
179
|
+
cur.execute("SELECT 1 FROM dssdb.tfile WHERE tfile=%s LIMIT 1", (summary['tfile'],))
|
|
180
|
+
exists = cur.fetchone()
|
|
181
|
+
cur.close()
|
|
182
|
+
conn.close()
|
|
183
|
+
if exists:
|
|
184
|
+
print(f"tfile '{summary['tfile']}' already exists in dssdb.tfile. Skipping all updates.")
|
|
185
|
+
continue
|
|
186
|
+
except Exception as e:
|
|
187
|
+
print(f"Database error during tfile existence check: {e}")
|
|
188
|
+
continue
|
|
189
|
+
try:
|
|
190
|
+
uid = get_uid_from_logname(db_params)
|
|
191
|
+
except Exception as e:
|
|
192
|
+
print(f"Error getting uid from dssgrp: {e}")
|
|
193
|
+
continue
|
|
194
|
+
extra = {
|
|
195
|
+
'uid': uid,
|
|
196
|
+
'dsid': summary['dsid'],
|
|
197
|
+
'data_format': '',
|
|
198
|
+
'disp_order': 0,
|
|
199
|
+
'dsids': summary['dsids'],
|
|
200
|
+
'note': summary['note']
|
|
201
|
+
}
|
|
202
|
+
insert_tfile_row(summary, db_params, extra, update_on_conflict=True, member_details=member_details if not args.no_update else None)
|
|
203
|
+
print(f"Inserted tar summary for {summary['tfile']} into tfile.")
|
|
204
|
+
return
|
|
154
205
|
if not args.member_list or not os.path.isfile(args.member_list):
|
|
155
206
|
print('Error: --member-list argument is required and must point to a valid file.')
|
|
156
207
|
return
|
|
@@ -68,25 +68,33 @@ def group_files_by_size(files, min_size, max_size):
|
|
|
68
68
|
batches.append(current_batch)
|
|
69
69
|
return batches
|
|
70
70
|
|
|
71
|
-
def tar_batches(dirpath, batches, output_dir):
|
|
72
|
-
|
|
71
|
+
def tar_batches(dirpath, batches, output_dir, root_path=None, root_dirname=None):
|
|
72
|
+
# Use root_dirname for tar file naming and arcname
|
|
73
73
|
for idx, batch in enumerate(batches, 1):
|
|
74
|
-
|
|
75
|
-
|
|
74
|
+
num_files = len(batch)
|
|
75
|
+
tar_name = os.path.join(output_dir, f"{root_dirname}_part{idx}_{num_files}files.tar")
|
|
76
|
+
logging.info(f"Creating tar: {tar_name} with {num_files} files.")
|
|
76
77
|
with tarfile.open(tar_name, "w") as tar:
|
|
77
78
|
for f in batch:
|
|
78
79
|
try:
|
|
79
|
-
|
|
80
|
+
# arcname should be relative to root_path, and always start with root_dirname
|
|
81
|
+
arcname = os.path.relpath(f, root_path)
|
|
82
|
+
arcname = os.path.join(root_dirname, arcname)
|
|
83
|
+
tar.add(f, arcname=arcname)
|
|
80
84
|
except Exception as e:
|
|
81
85
|
logging.warning(f"Failed to add {f} to tar: {e}")
|
|
82
86
|
|
|
83
|
-
def process_directory_tree(
|
|
84
|
-
|
|
87
|
+
def process_directory_tree(root_path, output_dir):
|
|
88
|
+
root_dirname = os.path.basename(os.path.abspath(root_path))
|
|
89
|
+
# Gather all files under root_path first
|
|
90
|
+
all_files = []
|
|
91
|
+
for dirpath, dirnames, filenames in os.walk(root_path):
|
|
85
92
|
abs_files = [os.path.join(dirpath, f) for f in filenames]
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
93
|
+
all_files.extend(abs_files)
|
|
94
|
+
if not all_files:
|
|
95
|
+
return
|
|
96
|
+
batches = group_files_by_size(all_files, ONE_TB, THREE_TB)
|
|
97
|
+
tar_batches(root_path, batches, output_dir, root_path=root_path, root_dirname=root_dirname)
|
|
90
98
|
|
|
91
99
|
def read_directories_from_file(input_file):
|
|
92
100
|
dirs = []
|
{rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: rda_python_dsquasar
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.6
|
|
4
4
|
Summary: RDA Python package to backup and recover RDA data archives to and from GLOBUS Quasar backup server
|
|
5
5
|
Author-email: Zaihua Ji <zji@ucar.edu>
|
|
6
6
|
Project-URL: Homepage, https://github.com/NCAR/rda-python-dsquasar
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar/ds_quasar.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{rda_python_dsquasar-2.0.5 → rda_python_dsquasar-2.0.6}/src/rda_python_dsquasar.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|