geoseeq 0.6.8a2__py3-none-any.whl → 0.6.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- geoseeq/cli/download.py +29 -3
- geoseeq/cli/main.py +1 -1
- geoseeq/cli/shared_params/id_handlers.py +31 -3
- geoseeq/knex.py +2 -1
- geoseeq/project.py +8 -3
- geoseeq/sample.py +11 -12
- geoseeq/utils.py +13 -5
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/METADATA +1 -1
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/RECORD +13 -13
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/WHEEL +1 -1
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/LICENSE +0 -0
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/top_level.txt +0 -0
geoseeq/cli/download.py
CHANGED
@@ -98,6 +98,7 @@ def cli_download_metadata(state, sample_ids):
|
|
98
98
|
|
99
99
|
cores_option = click.option('--cores', default=1, help='Number of downloads to run in parallel')
|
100
100
|
head_option = click.option('--head', default=None, type=int, help='Download the first N bytes of each file')
|
101
|
+
alt_id_option = click.option('--alt-sample-id', default=None, help='Specify an alternate sample id from the project metadata to id samples')
|
101
102
|
|
102
103
|
@cli_download.command("files")
|
103
104
|
@use_common_state
|
@@ -113,6 +114,7 @@ head_option = click.option('--head', default=None, type=int, help='Download the
|
|
113
114
|
@click.option("--extension", multiple=True, help="Only download files with this extension. e.g. 'fastq.gz', 'bam', 'csv'")
|
114
115
|
@click.option("--with-versions/--without-versions", default=False, help="Download all versions of a file, not just the latest")
|
115
116
|
@ignore_errors_option
|
117
|
+
@alt_id_option
|
116
118
|
@project_id_arg
|
117
119
|
@sample_ids_arg
|
118
120
|
def cli_download_files(
|
@@ -129,6 +131,7 @@ def cli_download_files(
|
|
129
131
|
with_versions,
|
130
132
|
download,
|
131
133
|
ignore_errors,
|
134
|
+
alt_sample_id,
|
132
135
|
project_id,
|
133
136
|
sample_ids,
|
134
137
|
):
|
@@ -164,6 +167,13 @@ def cli_download_files(
|
|
164
167
|
haib17CEM4890_H2NYMCCXY_SL254769 haib17CEM4890_H2NYMCCXY_SL254773 `# specify the samples by name` \\
|
165
168
|
--folder-type sample --extension '.contigs.fasta' # filter for contig files
|
166
169
|
|
170
|
+
\b
|
171
|
+
# Download files from a sample in the metasub project using an alternate sample id called "barcode"
|
172
|
+
$ geoseeq download files 'MetaSUB Consortium/Cell Paper' `# specify the project` \\
|
173
|
+
235183938 `# the alternate sample name (in this case a barcode number)` \\
|
174
|
+
--alt-sample-id barcode `# specify the alternate sample id column name` \\
|
175
|
+
--folder-type 'sample' `# only download files from sample folders`
|
176
|
+
|
167
177
|
---
|
168
178
|
|
169
179
|
Command Arguments:
|
@@ -184,7 +194,7 @@ def cli_download_files(
|
|
184
194
|
samples = []
|
185
195
|
if sample_ids:
|
186
196
|
logger.info(f"Fetching info for {len(sample_ids)} samples.")
|
187
|
-
samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj)
|
197
|
+
samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj, alternate_id_col=alt_sample_id)
|
188
198
|
|
189
199
|
response = proj.bulk_find_files(
|
190
200
|
sample_uuids=[s.uuid for s in samples],
|
@@ -377,9 +387,21 @@ def cli_download_ids(state, cores, target_dir, file_name, yes, download, head, i
|
|
377
387
|
@click.option("--download/--urls-only", default=True, help="Download files or just print urls")
|
378
388
|
@module_option(FASTQ_MODULE_NAMES, use_default=False)
|
379
389
|
@ignore_errors_option
|
390
|
+
@alt_id_option
|
380
391
|
@project_id_arg
|
381
392
|
@sample_ids_arg
|
382
|
-
def cli_download_fastqs(state,
|
393
|
+
def cli_download_fastqs(state,
|
394
|
+
cores,
|
395
|
+
target_dir,
|
396
|
+
yes,
|
397
|
+
first,
|
398
|
+
download,
|
399
|
+
module_name,
|
400
|
+
ignore_errors,
|
401
|
+
alt_sample_id,
|
402
|
+
project_id,
|
403
|
+
sample_ids
|
404
|
+
):
|
383
405
|
"""Download fastq files from a GeoSeeq project.
|
384
406
|
|
385
407
|
This command will download fastq files from a GeoSeeq project. You can filter
|
@@ -401,6 +423,10 @@ def cli_download_fastqs(state, cores, target_dir, yes, first, download, module_n
|
|
401
423
|
# Download all fastq files from two samples in "My Org/My Project"
|
402
424
|
$ geoseeq download fastqs "My Org/My Project" S1 S2
|
403
425
|
|
426
|
+
\b
|
427
|
+
# Download all fastq files from a single sample using an alternate sample id called "barcode"
|
428
|
+
$ geoseeq download fastqs 'MetaSUB Consortium/Cell Paper' 235183938 --alt-sample-id barcode
|
429
|
+
|
404
430
|
---
|
405
431
|
|
406
432
|
Command Arguments:
|
@@ -422,7 +448,7 @@ def cli_download_fastqs(state, cores, target_dir, yes, first, download, module_n
|
|
422
448
|
samples = []
|
423
449
|
if sample_ids:
|
424
450
|
logger.info(f"Fetching info for {len(sample_ids)} samples.")
|
425
|
-
samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj)
|
451
|
+
samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj, alternate_id_col=alt_sample_id)
|
426
452
|
else:
|
427
453
|
logger.info("Fetching info for all samples in project.")
|
428
454
|
samples = proj.get_samples()
|
geoseeq/cli/main.py
CHANGED
@@ -54,7 +54,7 @@ def version():
|
|
54
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
55
55
|
Run `geoseeq eula show` to view the EULA.
|
56
56
|
"""
|
57
|
-
click.echo('0.6.
|
57
|
+
click.echo('0.6.10') # remember to update pyproject.toml
|
58
58
|
|
59
59
|
|
60
60
|
@main.group('advanced')
|
@@ -133,7 +133,30 @@ def handle_folder_id(knex, folder_id, yes=False, private=True, create=True):
|
|
133
133
|
raise ValueError('sample_folder_id must be a UUID, an organization name and project name, or a GRN')
|
134
134
|
|
135
135
|
|
136
|
-
def
|
136
|
+
def map_alternate_ids_to_uuids(proj, alternate_id_col, sample_ids):
|
137
|
+
"""Return a list of sample UUIDs
|
138
|
+
|
139
|
+
`proj` is a project object
|
140
|
+
`alternate_id_col` is the name of the column containing alternate IDs
|
141
|
+
`sample_ids` is a list of alternate IDs
|
142
|
+
"""
|
143
|
+
metadata = proj.get_sample_metadata()
|
144
|
+
if alternate_id_col not in metadata:
|
145
|
+
raise ValueError(f'Column "{alternate_id_col}" not found in project metadata')
|
146
|
+
alt_col_df = metadata[["uuid", alternate_id_col]]
|
147
|
+
# filter to the alt ids in our list- it is possible alt_id_col as a whole is not
|
148
|
+
# unique but that our list of alt ids is
|
149
|
+
alt_col_df = alt_col_df[alt_col_df[alternate_id_col].isin(sample_ids)]
|
150
|
+
if alt_col_df.shape[0] == 0:
|
151
|
+
raise ValueError(f'No samples found with the given alternate IDs in list')
|
152
|
+
if alt_col_df.shape[0] < len(sample_ids):
|
153
|
+
raise ValueError(f'Not all alternate IDs in list are found')
|
154
|
+
if alt_col_df.shape[0] > len(sample_ids):
|
155
|
+
raise ValueError(f'More than one sample found with the same alternate ID')
|
156
|
+
return list(alt_col_df['uuid'])
|
157
|
+
|
158
|
+
|
159
|
+
def handle_multiple_sample_ids(knex, sample_ids, proj=None, alternate_id_col=None):
|
137
160
|
"""Return a list of fetched sample objects
|
138
161
|
|
139
162
|
`sample_ids` may have three different structures:
|
@@ -144,7 +167,9 @@ def handle_multiple_sample_ids(knex, sample_ids, proj=None):
|
|
144
167
|
Any sample may in fact be a file containing sample IDs, in which case the file will be read line by line
|
145
168
|
and each element will be a sample ID
|
146
169
|
|
147
|
-
If `
|
170
|
+
If `proj` is provided then `alternate_id_col` may also be provided.
|
171
|
+
If so then alternate IDs will be used to fetch samples. If alternate ids are
|
172
|
+
not present or not unique then fail.
|
148
173
|
"""
|
149
174
|
project_as_arg = bool(proj)
|
150
175
|
if proj or (proj := el_is_project_id(knex, sample_ids[0])):
|
@@ -155,7 +180,10 @@ def handle_multiple_sample_ids(knex, sample_ids, proj=None):
|
|
155
180
|
return list(proj.get_samples(cache=False))
|
156
181
|
else:
|
157
182
|
samples = []
|
158
|
-
|
183
|
+
sample_ids = flatten_list_of_els_and_files(sample_ids)
|
184
|
+
if alternate_id_col:
|
185
|
+
sample_ids = map_alternate_ids_to_uuids(proj, alternate_id_col, sample_ids)
|
186
|
+
for el in sample_ids:
|
159
187
|
if is_grn_or_uuid(el):
|
160
188
|
el = el.split(':')[-1]
|
161
189
|
samples.append(sample_from_uuid(knex, el))
|
geoseeq/knex.py
CHANGED
geoseeq/project.py
CHANGED
@@ -256,9 +256,14 @@ class Project(RemoteObject):
|
|
256
256
|
|
257
257
|
def get_sample_metadata(self):
|
258
258
|
"""Return a pandas dataframe with sample metadata."""
|
259
|
-
url = f"sample_groups/{self.uuid}/
|
260
|
-
|
261
|
-
|
259
|
+
url = f"sample_groups/{self.uuid}/samples-list?page=1&page_size=500&&"
|
260
|
+
rows = []
|
261
|
+
while url:
|
262
|
+
blob = self.knex.get(url)
|
263
|
+
rows.extend(blob["results"])
|
264
|
+
url = blob["next"]
|
265
|
+
return pd.DataFrame(rows)
|
266
|
+
|
262
267
|
|
263
268
|
@property
|
264
269
|
def n_samples(self):
|
geoseeq/sample.py
CHANGED
@@ -205,18 +205,17 @@ class Sample(RemoteObject):
|
|
205
205
|
files[read_type] = {}
|
206
206
|
for folder_name, file_grns in folders.items():
|
207
207
|
files[read_type][folder_name] = []
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
[
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
)
|
208
|
+
if read_type in ["short_read::paired_end"]:
|
209
|
+
files[read_type][folder_name].append(
|
210
|
+
[
|
211
|
+
self._grn_to_file(file_grns[0]),
|
212
|
+
self._grn_to_file(file_grns[1]),
|
213
|
+
]
|
214
|
+
)
|
215
|
+
else:
|
216
|
+
files[read_type][folder_name].append(
|
217
|
+
self._grn_to_file(file_grns[0])
|
218
|
+
)
|
220
219
|
return files
|
221
220
|
|
222
221
|
def get_one_fasta(self):
|
geoseeq/utils.py
CHANGED
@@ -16,11 +16,19 @@ logger.addHandler(logging.NullHandler()) # No output unless configured by calli
|
|
16
16
|
def load_auth_profile(profile=""):
|
17
17
|
"""Return an endpoit and a token"""
|
18
18
|
profile = profile or "__default__"
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
try:
|
20
|
+
with open(PROFILES_PATH, "r") as f:
|
21
|
+
profiles = json.load(f)
|
22
|
+
if profile in profiles:
|
23
|
+
return profiles[profile]["endpoint"], profiles[profile]["token"]
|
24
|
+
raise KeyError(f"Profile {profile} not found.")
|
25
|
+
except FileNotFoundError:
|
26
|
+
endpoint, token = environ.get("GEOSEEQ_ENDPOINT", DEFAULT_ENDPOINT), environ.get("GEOSEEQ_API_TOKEN", None)
|
27
|
+
if token:
|
28
|
+
logger.debug("Using environment variables for authentication.")
|
29
|
+
else:
|
30
|
+
logger.warning("Accessing anonymously, functionality may be limited. Configure profiles or set GEOSEEQ_API_TOKEN to authenticate.")
|
31
|
+
return endpoint, token
|
24
32
|
|
25
33
|
|
26
34
|
def set_profile(token, endpoint=DEFAULT_ENDPOINT, profile="", overwrite=False):
|
@@ -4,25 +4,25 @@ geoseeq/blob_constructors.py,sha256=AkWpDQY0EdGMxF1p6eRspyHKubcUdiW4it-_Q7S2QWk,
|
|
4
4
|
geoseeq/bulk_creators.py,sha256=pdn-Dv7yv5SFv-PfDuQbuOnw2W4-BfIfRJVRAhM8U6s,2115
|
5
5
|
geoseeq/constants.py,sha256=z_ninEd7WsS5DaLntdR-sqAFib6Ie22jlhPKzLvLerw,449
|
6
6
|
geoseeq/file_system_cache.py,sha256=HzVZWtwLD2fjWWSo_UfWmGeBltm9He4lP_OqzKwNGWg,4138
|
7
|
-
geoseeq/knex.py,sha256=
|
7
|
+
geoseeq/knex.py,sha256=GXPsV65w1co1OvpB4pngH4f5I1p1AAxDWX7e2QkMXw0,7987
|
8
8
|
geoseeq/organization.py,sha256=bJkYL8_D-k6IYAaii2ZbxjwYnXy6lvu6iLXscxKlA3w,2542
|
9
9
|
geoseeq/pipeline.py,sha256=89mhWaecsKnm6tyRkdkaVp4dmZh62_v42Ze0oXf8OTY,9873
|
10
|
-
geoseeq/project.py,sha256=
|
10
|
+
geoseeq/project.py,sha256=kN6m1N4Tlud7saU03Sbir-oIBnXet_Cwi2OVVdaeag0,13929
|
11
11
|
geoseeq/remote_object.py,sha256=GYN6PKU7Zz3htIdpFjfZiFejzGqqJHbJyKlefM1Eixk,7151
|
12
|
-
geoseeq/sample.py,sha256=
|
12
|
+
geoseeq/sample.py,sha256=OU4H-U8XxsFosfa9wcWWrHq9NVT3nDKZcvPtPGGlLlk,8310
|
13
13
|
geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
|
14
14
|
geoseeq/upload_download_manager.py,sha256=FMRqLLg77o1qFbWZc5Yc86a2pjeZrrn1rHJr1iaxKCU,8757
|
15
15
|
geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
|
16
|
-
geoseeq/utils.py,sha256=
|
16
|
+
geoseeq/utils.py,sha256=ZXpWb2MetUIeLrExiXb7IaOXYrW1pvrdP3o0KWzbwCs,4035
|
17
17
|
geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
|
18
18
|
geoseeq/cli/__init__.py,sha256=4WnK87K5seRK3SGJAxNWnQTqyg5uBhdhrOrzB1D4b3M,24
|
19
19
|
geoseeq/cli/constants.py,sha256=Do5AUf9lMO9_P8KpFJ3XwwFBAWsxSjZ6sx9_QEGyC_c,176
|
20
20
|
geoseeq/cli/copy.py,sha256=02U9kdrAIbbM8MlRMLL6p-LMYFSuRObE3h5jyvcL__M,2275
|
21
21
|
geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
22
|
-
geoseeq/cli/download.py,sha256=
|
22
|
+
geoseeq/cli/download.py,sha256=W3OswqpHg1thzW6CJ7IcSS0Te2LA2WfgYISQMSl4GQg,18921
|
23
23
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
24
24
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
25
|
-
geoseeq/cli/main.py,sha256=
|
25
|
+
geoseeq/cli/main.py,sha256=NPW0EHw2JSdyQ5_nCSPiCsEsy0ZKJ1u7WJa9RQfMmqI,3918
|
26
26
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
27
27
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
28
28
|
geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
|
@@ -35,7 +35,7 @@ geoseeq/cli/view.py,sha256=P-o2YKBkTrPzSI-JOv7xROc63HLSUygZNZsjp9TGvSw,6783
|
|
35
35
|
geoseeq/cli/shared_params/__init__.py,sha256=ckNHGCBJUpJbQmcYi7lW-lsC0xKud6CCMznwcG5Vte4,325
|
36
36
|
geoseeq/cli/shared_params/common_state.py,sha256=jiHZtL3TATMjEoqhbO7HT8KkLJr1QPsy7ZHT4qcoQ1E,4095
|
37
37
|
geoseeq/cli/shared_params/config.py,sha256=HQ0xQh_jdt3EKI5VXYqQXzo-s8Rm6YlziMyVX-kg598,1072
|
38
|
-
geoseeq/cli/shared_params/id_handlers.py,sha256=
|
38
|
+
geoseeq/cli/shared_params/id_handlers.py,sha256=KtzflnplYVkXsyqI5Ej6r-_BwQnuXVHPr7JcYumTKNc,10700
|
39
39
|
geoseeq/cli/shared_params/obj_getters.py,sha256=ZSkt6LnDkVFlNVYKgLrjzg60-6BthZMr3eeD3HNqzac,2741
|
40
40
|
geoseeq/cli/shared_params/opts_and_args.py,sha256=_DcJ-TqgrbBaeDd-kuHEx2gLZPQN6EHZYWh8Ag-d8Vg,2091
|
41
41
|
geoseeq/cli/upload/__init__.py,sha256=3C9_S9t7chmYU-2ot89NV03x-EtmsjibulErKaU9w1k,627
|
@@ -85,9 +85,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
85
85
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
87
87
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
88
|
-
geoseeq-0.6.
|
89
|
-
geoseeq-0.6.
|
90
|
-
geoseeq-0.6.
|
91
|
-
geoseeq-0.6.
|
92
|
-
geoseeq-0.6.
|
93
|
-
geoseeq-0.6.
|
88
|
+
geoseeq-0.6.10.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
89
|
+
geoseeq-0.6.10.dist-info/METADATA,sha256=moKunTjFSy4nQVg0X-Es5JmhsyPwtU9DZr-LIR6bCUE,4916
|
90
|
+
geoseeq-0.6.10.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
|
91
|
+
geoseeq-0.6.10.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
92
|
+
geoseeq-0.6.10.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
93
|
+
geoseeq-0.6.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|