rda-python-globus 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 NSF National Center for Atmospheric Research
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,119 @@
1
+ Metadata-Version: 2.4
2
+ Name: rda_python_globus
3
+ Version: 1.0.0
4
+ Summary: Tools for managing Globus transfers supporting the NSF NCAR Research Data Archive
5
+ Author-email: Thomas Cram <tcram@ucar.edu>
6
+ Project-URL: Homepage, https://github.com/NCAR/rda-python-globus
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Development Status :: 5 - Production/Stable
11
+ Requires-Python: >=3.7
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: globus_sdk
15
+ Requires-Dist: click
16
+ Requires-Dist: pyyaml
17
+ Requires-Dist: six
18
+ Dynamic: license-file
19
+
20
+ # dsglobus
21
+
22
+ This application is a command-line tool for Globus data transfer and management of files
23
+ archived in the NSF NCAR Research Data Archive.
24
+
25
+ ## Installation
26
+
27
+ Use the package manager [pip](https://pip.pypa.io/en/stable/) to
28
+ install `rda_python_globus`.
29
+
30
+ From within your Python virtual environment:
31
+ - run `pip install git+https://github.com/NCAR/rda-python-globus`
32
+
33
+ After installation, the cli command `dsglobus` will be available in
34
+ the /bin directory of your virtual environment.
35
+
36
+ ## Command-line usage
37
+
38
+ The `dsglobus` app is run with the following subcommands. Each supports a
39
+ `--help/-h` option for details and examples on its usage:
40
+ ```
41
+ dsglobus transfer --help
42
+ dsglobus get-task --help
43
+ dsglobus task-list --help
44
+ dsglobus cancel-task --help
45
+ dsglobus ls --help
46
+ dsglobus mkdir --help
47
+ dsglobus rename --help
48
+ dsglobus delete --help
49
+ ```
50
+
51
+ ### Example usage
52
+ 1. Transfer a single file from the `NCAR RDA GLADE` endpoint to the `NCAR RDA Quasar`
53
+ endpoint:
54
+ ```
55
+ $ dsglobus transfer \
56
+ --source-endpoint rda-glade \
57
+ --destination-endpoint rda-quasar \
58
+ --source-file /data/d999009/file.txt \
59
+ --destination-file /d999009/file.txt
60
+ ```
61
+ 2. Multiple files can be transferred with a single `dsglobus transfer` call by
62
+ passing a JSON formatted list of files. To transfer a batch of files from a JSON file:
63
+ ```
64
+ $ dsglobus transfer \
65
+ --source-endpoint SOURCE_ENDPOINT \
66
+ --destination-endpoint DESTINATION_ENDPOINT \
67
+ --batch /path/to/batch.json
68
+ ```
69
+ where the contents of `batch.json` is formatted with `source_file/destination_file`
70
+ pairs as:
71
+ ```
72
+ {
73
+ "files": [
74
+ {"source_file": "/data/d999009/file1.tar", "destination_file": "/d999009/file1.tar"},
75
+ {"source_file": "/data/d999009/file2.tar", "destination_file": "/d999009/file2.tar"},
76
+ {"source_file": "/data/d999009/file3.tar", "destination_file": "/d999009/file3.tar"}
77
+ ]
78
+ }
79
+ ```
80
+
81
+ ### Listing contents of a directory on a Globus endpoint
82
+
83
+ A listing of files on a Globus endpoint can be retrieved via the `dsglobus ls` command. This
84
+ command supports filtering the results subject to the following rules:
85
+
86
+ - Filter patterns must start with `--, ~, !`, or `!~`. If none of these are given, `=` will
87
+ be used
88
+ - `=` does exact matching
89
+ - `~` does regex matching, supporting globs (`*`)
90
+ - `!` does inverse `=` matching
91
+ - `!~` does inverse `~` matching
92
+ - `~*.txt` matches all `.txt` files, for example
93
+
94
+ Examples:
95
+ ```
96
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '~*.txt' # all txt files
97
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '!~file1.*' # not starting in "file1."
98
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '~*ile3.tx*' # anything with "ile3.tx"
99
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '=file2.txt' # only "file2.txt"
100
+ $ dsglobus ls -ep <endpoint> -p <path> --filter 'file2.txt' # same as '=file2.txt'
101
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '!=file2.txt' # anything but "file2.txt"
102
+ ```
103
+
104
+ ## Customizing and extending dsglobus
105
+
106
+ This app can be modified and adapted to be used on other Globus clients and endpoints with
107
+ minimal effort. Simply update the client ID, token storage, endpoint IDs, endpoint aliases, and
108
+ other configuration parameters in `rda_globus_python/lib/config.py` to adapt the app to your use
109
+ case and specific needs.
110
+
111
+ ## Resources
112
+
113
+ This app is adapted from the fully featured [Globus Command Line Interface (CLI)](https://docs.globus.org/cli/)
114
+ and uses the
115
+ [`TransferClient` class from the Globus SDK](https://globus-sdk-python.readthedocs.io/en/stable/services/transfer.html).
116
+
117
+ The full [Globus Transfer documentation](https://docs.globus.org/api/transfer/) offers full
118
+ details about the service and reference documentation for all of
119
+ its supported methods and features.
@@ -0,0 +1,100 @@
1
+ # dsglobus
2
+
3
+ This application is a command-line tool for Globus data transfer and management of files
4
+ archived in the NSF NCAR Research Data Archive.
5
+
6
+ ## Installation
7
+
8
+ Use the package manager [pip](https://pip.pypa.io/en/stable/) to
9
+ install `rda_python_globus`.
10
+
11
+ From within your Python virtual environment:
12
+ - run `pip install git+https://github.com/NCAR/rda-python-globus`
13
+
14
+ After installation, the cli command `dsglobus` will be available in
15
+ the /bin directory of your virtual environment.
16
+
17
+ ## Command-line usage
18
+
19
+ The `dsglobus` app is run with the following subcommands. Each supports a
20
+ `--help/-h` option for details and examples on its usage:
21
+ ```
22
+ dsglobus transfer --help
23
+ dsglobus get-task --help
24
+ dsglobus task-list --help
25
+ dsglobus cancel-task --help
26
+ dsglobus ls --help
27
+ dsglobus mkdir --help
28
+ dsglobus rename --help
29
+ dsglobus delete --help
30
+ ```
31
+
32
+ ### Example usage
33
+ 1. Transfer a single file from the `NCAR RDA GLADE` endpoint to the `NCAR RDA Quasar`
34
+ endpoint:
35
+ ```
36
+ $ dsglobus transfer \
37
+ --source-endpoint rda-glade \
38
+ --destination-endpoint rda-quasar \
39
+ --source-file /data/d999009/file.txt \
40
+ --destination-file /d999009/file.txt
41
+ ```
42
+ 2. Multiple files can be transferred with a single `dsglobus transfer` call by
43
+ passing a JSON formatted list of files. To transfer a batch of files from a JSON file:
44
+ ```
45
+ $ dsglobus transfer \
46
+ --source-endpoint SOURCE_ENDPOINT \
47
+ --destination-endpoint DESTINATION_ENDPOINT \
48
+ --batch /path/to/batch.json
49
+ ```
50
+ where the contents of `batch.json` is formatted with `source_file/destination_file`
51
+ pairs as:
52
+ ```
53
+ {
54
+ "files": [
55
+ {"source_file": "/data/d999009/file1.tar", "destination_file": "/d999009/file1.tar"},
56
+ {"source_file": "/data/d999009/file2.tar", "destination_file": "/d999009/file2.tar"},
57
+ {"source_file": "/data/d999009/file3.tar", "destination_file": "/d999009/file3.tar"}
58
+ ]
59
+ }
60
+ ```
61
+
62
+ ### Listing contents of a directory on a Globus endpoint
63
+
64
+ A listing of files on a Globus endpoint can be retrieved via the `dsglobus ls` command. This
65
+ command supports filtering the results subject to the following rules:
66
+
67
+ - Filter patterns must start with `--, ~, !`, or `!~`. If none of these are given, `=` will
68
+ be used
69
+ - `=` does exact matching
70
+ - `~` does regex matching, supporting globs (`*`)
71
+ - `!` does inverse `=` matching
72
+ - `!~` does inverse `~` matching
73
+ - `~*.txt` matches all `.txt` files, for example
74
+
75
+ Examples:
76
+ ```
77
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '~*.txt' # all txt files
78
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '!~file1.*' # not starting in "file1."
79
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '~*ile3.tx*' # anything with "ile3.tx"
80
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '=file2.txt' # only "file2.txt"
81
+ $ dsglobus ls -ep <endpoint> -p <path> --filter 'file2.txt' # same as '=file2.txt'
82
+ $ dsglobus ls -ep <endpoint> -p <path> --filter '!=file2.txt' # anything but "file2.txt"
83
+ ```
84
+
85
+ ## Customizing and extending dsglobus
86
+
87
+ This app can be modified and adapted to be used on other Globus clients and endpoints with
88
+ minimal effort. Simply update the client ID, token storage, endpoint IDs, endpoint aliases, and
89
+ other configuration parameters in `rda_globus_python/lib/config.py` to adapt the app to your use
90
+ case and specific needs.
91
+
92
+ ## Resources
93
+
94
+ This app is adapted from the fully featured [Globus Command Line Interface (CLI)](https://docs.globus.org/cli/)
95
+ and uses the
96
+ [`TransferClient` class from the Globus SDK](https://globus-sdk-python.readthedocs.io/en/stable/services/transfer.html).
97
+
98
+ The full [Globus Transfer documentation](https://docs.globus.org/api/transfer/) offers full
99
+ details about the service and reference documentation for all of
100
+ its supported methods and features.
@@ -0,0 +1,33 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=61.0",
4
+ ]
5
+ build-backend = "setuptools.build_meta"
6
+
7
+ [project]
8
+ name = "rda_python_globus"
9
+ version = "1.0.0"
10
+ authors = [
11
+ { name="Thomas Cram", email="tcram@ucar.edu" },
12
+ ]
13
+ description = "Tools for managing Globus transfers supporting the NSF NCAR Research Data Archive"
14
+ readme = "README.md"
15
+ requires-python = ">=3.7"
16
+ classifiers = [
17
+ "Programming Language :: Python :: 3",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ "Development Status :: 5 - Production/Stable",
21
+ ]
22
+ dependencies = [
23
+ "globus_sdk",
24
+ "click",
25
+ "pyyaml",
26
+ "six",
27
+ ]
28
+
29
+ [project.urls]
30
+ "Homepage" = "https://github.com/NCAR/rda-python-globus"
31
+
32
+ [project.scripts]
33
+ dsglobus = "rda_python_globus:cli"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ from .main import cli
2
+
3
+ __all__ = ("cli",)
@@ -0,0 +1,327 @@
1
+ import sys
2
+ import click
3
+ import textwrap
4
+ import typing as t
5
+ from globus_sdk import DeleteData, GlobusAPIError, NetworkError
6
+
7
+ from .lib import (
8
+ common_options,
9
+ task_submission_options,
10
+ path_options,
11
+ endpoint_options,
12
+ transfer_client,
13
+ process_json_stream,
14
+ )
15
+
16
+ import logging
17
+ logger = logging.getLogger(__name__)
18
+
19
+ def add_batch_to_delete_data(batch, delete_data):
20
+ """ Add batch of files to delete data object. """
21
+ delete_files = process_json_stream(batch)
22
+ for file in delete_files:
23
+ delete_data.add_item(file)
24
+
25
+ return delete_data
26
+
27
+ @click.command(
28
+ "mkdir",
29
+ short_help="Create a directory on a Globus endpoint.",
30
+ epilog='''
31
+ \b
32
+ === Examples ===
33
+ \b
34
+ 1. Create a directory on the RDA Quasar endpoint:
35
+ \b
36
+ $ dsglobus mkdir \\
37
+ --endpoint rda-quasar \\
38
+ --path /d999009/new_directory
39
+ '''
40
+ )
41
+ @endpoint_options
42
+ @path_options
43
+ @common_options
44
+ def mkdir_command(
45
+ endpoint: str,
46
+ path: str,
47
+ ) -> None:
48
+ """
49
+ Create a directory on a Globus endpoint. Directory path is relative to the endpoint host path.
50
+ """
51
+ tc = transfer_client()
52
+ try:
53
+ res = tc.operation_mkdir(endpoint, path=path)
54
+ click.echo(f"{res['message']}")
55
+ except (GlobusAPIError, NetworkError) as e:
56
+ logger.error(f"Error creating directory: {e}")
57
+ raise click.Abort()
58
+
59
+ @click.command(
60
+ "rename",
61
+ short_help="Rename a file or directory on a Globus endpoint.",
62
+ epilog='''
63
+ \b
64
+ === Examples ===
65
+ \b
66
+ 1. Rename a single file on the RDA Quasar endpoint:
67
+ \b
68
+ $ dsglobus rename \\
69
+ --endpoint rda-quasar \\
70
+ --old-path /d999009/old_file.txt \\
71
+ --new-path /d999009/new_file.txt
72
+ \b
73
+ 2. Rename a directory on the RDA Quasar endpoint:
74
+ \b
75
+ $ dsglobus rename \\
76
+ --endpoint rda-quasar \\
77
+ --old-path /d999009/old_directory \\
78
+ --new-path /d999009/new_directory
79
+ \b
80
+ 3. Rename a batch of files/directories on the RDA Quasar endpoint:
81
+ \b
82
+ $ dsglobus rename \\
83
+ --endpoint rda-quasar \\
84
+ --batch /path/to/batch.json
85
+ \b
86
+ The batch file should contain a JSON array of file paths to rename.
87
+ \b
88
+ Example batch file contents:
89
+ \b
90
+ [
91
+ {
92
+ "old_path": "/d999009/file_old.txt",
93
+ "new_path": "/d999009/file_new.txt"
94
+ },
95
+ {
96
+ "old_path": "/d999009/file2_old.txt",
97
+ "new_path": "/d999009/file2_new.txt"
98
+ },
99
+ {
100
+ "old_path": "/d999009/old_directory/",
101
+ "new_path": "/d999009/new_directory/"
102
+ }
103
+ ]
104
+ \b
105
+ 4. The batch files can also be read from stdin using '-':
106
+ \b
107
+ $ dsglobus rename \\
108
+ --endpoint rda-quasar \\
109
+ --batch -
110
+ [
111
+ {
112
+ "old_path": "/d999009/file_old.txt",
113
+ "new_path": "/d999009/file_new.txt"
114
+ },
115
+ {
116
+ "old_path": "/d999009/file2_old.txt",
117
+ "new_path": "/d999009/file2_new.txt"
118
+ },
119
+ {
120
+ "old_path": "/d999009/old_directory/",
121
+ "new_path": "/d999009/new_directory/"
122
+ }
123
+ ]
124
+ <Ctrl+D>
125
+ '''
126
+ )
127
+ @click.option(
128
+ "--old-path",
129
+ "-op",
130
+ type=str,
131
+ help="Old file or directory path on the endpoint. Ignored if --batch is used.",
132
+ )
133
+ @click.option(
134
+ "--new-path",
135
+ "-np",
136
+ type=str,
137
+ help="New file or directory path on the endpoint. Ignored if --batch is used.",
138
+ )
139
+ @click.option(
140
+ "--batch",
141
+ type=click.File('r'),
142
+ help=textwrap.dedent("""\
143
+ Accept a batch of multiple file/directory name pairs from a file.
144
+ Use '-' to read from stdin, and close the stream with 'Ctrl+D'.
145
+ See examples below.
146
+ """),
147
+ )
148
+ @endpoint_options
149
+ @common_options
150
+ def rename_command(
151
+ endpoint: str,
152
+ old_path: str,
153
+ new_path: str,
154
+ batch: t.TextIO
155
+ ) -> None:
156
+ """
157
+ Rename a file or directory on a Globus endpoint. Path is relative to the endpoint host path.
158
+ """
159
+ if old_path is None and new_path is None and batch is None:
160
+ raise click.UsageError('--old-path and --new-path, or --batch is required.')
161
+
162
+ if batch:
163
+ files = process_json_stream(batch)
164
+ else:
165
+ if old_path is None or new_path is None:
166
+ raise click.UsageError('--old-path and --new-path are required if --batch is not used.')
167
+ files = [
168
+ {
169
+ "old_path": old_path,
170
+ "new_path": new_path
171
+ }
172
+ ]
173
+
174
+ tc = transfer_client()
175
+ for file in files:
176
+ old_path = file["old_path"]
177
+ new_path = file["new_path"]
178
+ try:
179
+ res = tc.operation_rename(endpoint, oldpath=old_path, newpath=new_path)
180
+ click.echo(f"old path: {old_path}\nnew path: {new_path}\n{res['message']}")
181
+ except (GlobusAPIError, NetworkError) as e:
182
+ logger.error(f"Error renaming file/directory: {e}")
183
+ raise click.Abort()
184
+
185
+ @click.command(
186
+ "delete",
187
+ short_help="Delete files and/or directories on a Globus endpoint.",
188
+ epilog='''
189
+ \b
190
+ === Examples ===
191
+ \b
192
+ 1. Delete a single file on the RDA Quasar endpoint:
193
+ \b
194
+ $ dsglobus delete \\
195
+ --endpoint rda-quasar \\
196
+ --target-file /d999009/file.txt
197
+
198
+ \b
199
+ 2. Delete a directory on the RDA Quasar endpoint. --recursive is required
200
+ to delete a directory and its contents:
201
+ \b
202
+ $ dsglobus delete \\
203
+ --endpoint rda-quasar \\
204
+ --target-file /d999009/dir \\
205
+ --recursive
206
+ \b
207
+ 3. Delete a batch of files/directories on the RDA Quasar endpoint:
208
+ \b
209
+ $ dsglobus delete \\
210
+ --endpoint rda-quasar \\
211
+ --batch /path/to/batch.json \\
212
+ --recursive
213
+ \b
214
+ The batch file should contain a JSON array of file paths to delete.
215
+ \b
216
+ Example batch file contents:
217
+ \b
218
+ [
219
+ "/d999009/file1.txt",
220
+ "/d999009/file2.txt",
221
+ "/d999009/dir1",
222
+ "/d999009/dir2"
223
+ ]
224
+ \b
225
+ 4. The batch files can also be read from stdin using '-':
226
+ \b
227
+ $ dsglobus delete \\
228
+ --endpoint rda-quasar \\
229
+ --recursive \\
230
+ --batch -
231
+ [
232
+ "/d999009/file1.txt",
233
+ "/d999009/file2.txt",
234
+ "/d999009/dir1",
235
+ "/d999009/dir2"
236
+ ]
237
+ <Ctrl+D>
238
+ '''
239
+ )
240
+ @click.option(
241
+ "--target-file",
242
+ "-tf",
243
+ type=str,
244
+ help="File or directory to delete on the endpoint. Ignored if --batch is used.",
245
+ )
246
+ @click.option(
247
+ "--batch",
248
+ type=click.File('r'),
249
+ help=textwrap.dedent("""\
250
+ Accept a batch of files/directories from a file.
251
+ Use '-' to read from stdin, and close the stream with 'Ctrl+D'.
252
+ See examples below.
253
+ """),
254
+ )
255
+ @click.option(
256
+ "--recursive",
257
+ "-r",
258
+ is_flag=True,
259
+ default=False,
260
+ show_default=True,
261
+ help="Recursively delete directories and their contents. Required if deleting a directory.",
262
+ )
263
+ @endpoint_options
264
+ @task_submission_options
265
+ @common_options
266
+ def delete_command(
267
+ endpoint: str,
268
+ target_file: str,
269
+ label: str,
270
+ batch: t.TextIO,
271
+ dry_run: bool,
272
+ recursive: bool,
273
+ ) -> None:
274
+ """
275
+ Delete files and/or directories on a Globus endpoint. Directory
276
+ path is relative to the endpoint host path.
277
+ """
278
+ tc = transfer_client()
279
+ delete_data = DeleteData(tc, endpoint, label=label, recursive=recursive)
280
+
281
+ # If a batch file is provided, read the file and add to delete data
282
+ if batch:
283
+ try:
284
+ delete_data = add_batch_to_delete_data(batch, delete_data)
285
+ except ValueError as e:
286
+ logger.error(f"Error processing batch file: {e}")
287
+ raise click.Abort()
288
+ else:
289
+ if target_file is None:
290
+ raise click.UsageError('--target-file is required if --batch is not used.')
291
+ # Add the target file to delete data
292
+ try:
293
+ delete_data.add_item(target_file)
294
+ except ValueError as e:
295
+ logger.error(f"Error adding target file: {e}")
296
+ raise click.Abort()
297
+
298
+ # If dry run is specified, print the delete data and exit
299
+ if dry_run:
300
+ click.echo("Dry run: delete data to be submitted:")
301
+ data = delete_data.data
302
+ click.echo(f"Endpoint: {data['endpoint']}")
303
+ try:
304
+ click.echo(f"Label: {data['label']}")
305
+ except KeyError:
306
+ click.echo("Label: None")
307
+ click.echo("Files to delete:")
308
+ for item in data["DATA"]:
309
+ click.echo(f" {item}")
310
+ click.echo("\n")
311
+
312
+ # exit safely
313
+ sys.exit(1)
314
+
315
+ # Submit the task
316
+ try:
317
+ delete_response = tc.submit_delete(delete_data)
318
+ task_id = delete_response["task_id"]
319
+ except (GlobusAPIError, NetworkError) as e:
320
+ logger.error(f"Error submitting task: {e}")
321
+ raise click.Abort()
322
+ click.echo(f'Task ID: {task_id}\n{delete_response["message"]}')
323
+
324
+ def add_commands(group):
325
+ group.add_command(mkdir_command)
326
+ group.add_command(rename_command)
327
+ group.add_command(delete_command)