geoseeq 0.6.8a2__py3-none-any.whl → 0.6.10__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- geoseeq/cli/download.py +29 -3
- geoseeq/cli/main.py +1 -1
- geoseeq/cli/shared_params/id_handlers.py +31 -3
- geoseeq/knex.py +2 -1
- geoseeq/project.py +8 -3
- geoseeq/sample.py +11 -12
- geoseeq/utils.py +13 -5
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/METADATA +1 -1
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/RECORD +13 -13
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/WHEEL +1 -1
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/LICENSE +0 -0
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/entry_points.txt +0 -0
- {geoseeq-0.6.8a2.dist-info → geoseeq-0.6.10.dist-info}/top_level.txt +0 -0
geoseeq/cli/download.py
CHANGED
@@ -98,6 +98,7 @@ def cli_download_metadata(state, sample_ids):
|
|
98
98
|
|
99
99
|
cores_option = click.option('--cores', default=1, help='Number of downloads to run in parallel')
|
100
100
|
head_option = click.option('--head', default=None, type=int, help='Download the first N bytes of each file')
|
101
|
+
alt_id_option = click.option('--alt-sample-id', default=None, help='Specify an alternate sample id from the project metadata to id samples')
|
101
102
|
|
102
103
|
@cli_download.command("files")
|
103
104
|
@use_common_state
|
@@ -113,6 +114,7 @@ head_option = click.option('--head', default=None, type=int, help='Download the
|
|
113
114
|
@click.option("--extension", multiple=True, help="Only download files with this extension. e.g. 'fastq.gz', 'bam', 'csv'")
|
114
115
|
@click.option("--with-versions/--without-versions", default=False, help="Download all versions of a file, not just the latest")
|
115
116
|
@ignore_errors_option
|
117
|
+
@alt_id_option
|
116
118
|
@project_id_arg
|
117
119
|
@sample_ids_arg
|
118
120
|
def cli_download_files(
|
@@ -129,6 +131,7 @@ def cli_download_files(
|
|
129
131
|
with_versions,
|
130
132
|
download,
|
131
133
|
ignore_errors,
|
134
|
+
alt_sample_id,
|
132
135
|
project_id,
|
133
136
|
sample_ids,
|
134
137
|
):
|
@@ -164,6 +167,13 @@ def cli_download_files(
|
|
164
167
|
haib17CEM4890_H2NYMCCXY_SL254769 haib17CEM4890_H2NYMCCXY_SL254773 `# specify the samples by name` \\
|
165
168
|
--folder-type sample --extension '.contigs.fasta' # filter for contig files
|
166
169
|
|
170
|
+
\b
|
171
|
+
# Download files from a sample in the metasub project using an alternate sample id called "barcode"
|
172
|
+
$ geoseeq download files 'MetaSUB Consortium/Cell Paper' `# specify the project` \\
|
173
|
+
235183938 `# the alternate sample name (in this case a barcode number)` \\
|
174
|
+
--alt-sample-id barcode `# specify the alternate sample id column name` \\
|
175
|
+
--folder-type 'sample' `# only download files from sample folders`
|
176
|
+
|
167
177
|
---
|
168
178
|
|
169
179
|
Command Arguments:
|
@@ -184,7 +194,7 @@ def cli_download_files(
|
|
184
194
|
samples = []
|
185
195
|
if sample_ids:
|
186
196
|
logger.info(f"Fetching info for {len(sample_ids)} samples.")
|
187
|
-
samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj)
|
197
|
+
samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj, alternate_id_col=alt_sample_id)
|
188
198
|
|
189
199
|
response = proj.bulk_find_files(
|
190
200
|
sample_uuids=[s.uuid for s in samples],
|
@@ -377,9 +387,21 @@ def cli_download_ids(state, cores, target_dir, file_name, yes, download, head, i
|
|
377
387
|
@click.option("--download/--urls-only", default=True, help="Download files or just print urls")
|
378
388
|
@module_option(FASTQ_MODULE_NAMES, use_default=False)
|
379
389
|
@ignore_errors_option
|
390
|
+
@alt_id_option
|
380
391
|
@project_id_arg
|
381
392
|
@sample_ids_arg
|
382
|
-
def cli_download_fastqs(state,
|
393
|
+
def cli_download_fastqs(state,
|
394
|
+
cores,
|
395
|
+
target_dir,
|
396
|
+
yes,
|
397
|
+
first,
|
398
|
+
download,
|
399
|
+
module_name,
|
400
|
+
ignore_errors,
|
401
|
+
alt_sample_id,
|
402
|
+
project_id,
|
403
|
+
sample_ids
|
404
|
+
):
|
383
405
|
"""Download fastq files from a GeoSeeq project.
|
384
406
|
|
385
407
|
This command will download fastq files from a GeoSeeq project. You can filter
|
@@ -401,6 +423,10 @@ def cli_download_fastqs(state, cores, target_dir, yes, first, download, module_n
|
|
401
423
|
# Download all fastq files from two samples in "My Org/My Project"
|
402
424
|
$ geoseeq download fastqs "My Org/My Project" S1 S2
|
403
425
|
|
426
|
+
\b
|
427
|
+
# Download all fastq files from a single sample using an alternate sample id called "barcode"
|
428
|
+
$ geoseeq download fastqs 'MetaSUB Consortium/Cell Paper' 235183938 --alt-sample-id barcode
|
429
|
+
|
404
430
|
---
|
405
431
|
|
406
432
|
Command Arguments:
|
@@ -422,7 +448,7 @@ def cli_download_fastqs(state, cores, target_dir, yes, first, download, module_n
|
|
422
448
|
samples = []
|
423
449
|
if sample_ids:
|
424
450
|
logger.info(f"Fetching info for {len(sample_ids)} samples.")
|
425
|
-
samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj)
|
451
|
+
samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj, alternate_id_col=alt_sample_id)
|
426
452
|
else:
|
427
453
|
logger.info("Fetching info for all samples in project.")
|
428
454
|
samples = proj.get_samples()
|
geoseeq/cli/main.py
CHANGED
@@ -54,7 +54,7 @@ def version():
|
|
54
54
|
Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
|
55
55
|
Run `geoseeq eula show` to view the EULA.
|
56
56
|
"""
|
57
|
-
click.echo('0.6.
|
57
|
+
click.echo('0.6.10') # remember to update pyproject.toml
|
58
58
|
|
59
59
|
|
60
60
|
@main.group('advanced')
|
@@ -133,7 +133,30 @@ def handle_folder_id(knex, folder_id, yes=False, private=True, create=True):
|
|
133
133
|
raise ValueError('sample_folder_id must be a UUID, an organization name and project name, or a GRN')
|
134
134
|
|
135
135
|
|
136
|
-
def
|
136
|
+
def map_alternate_ids_to_uuids(proj, alternate_id_col, sample_ids):
|
137
|
+
"""Return a list of sample UUIDs
|
138
|
+
|
139
|
+
`proj` is a project object
|
140
|
+
`alternate_id_col` is the name of the column containing alternate IDs
|
141
|
+
`sample_ids` is a list of alternate IDs
|
142
|
+
"""
|
143
|
+
metadata = proj.get_sample_metadata()
|
144
|
+
if alternate_id_col not in metadata:
|
145
|
+
raise ValueError(f'Column "{alternate_id_col}" not found in project metadata')
|
146
|
+
alt_col_df = metadata[["uuid", alternate_id_col]]
|
147
|
+
# filter to the alt ids in our list- it is possible alt_id_col as a whole is not
|
148
|
+
# unique but that our list of alt ids is
|
149
|
+
alt_col_df = alt_col_df[alt_col_df[alternate_id_col].isin(sample_ids)]
|
150
|
+
if alt_col_df.shape[0] == 0:
|
151
|
+
raise ValueError(f'No samples found with the given alternate IDs in list')
|
152
|
+
if alt_col_df.shape[0] < len(sample_ids):
|
153
|
+
raise ValueError(f'Not all alternate IDs in list are found')
|
154
|
+
if alt_col_df.shape[0] > len(sample_ids):
|
155
|
+
raise ValueError(f'More than one sample found with the same alternate ID')
|
156
|
+
return list(alt_col_df['uuid'])
|
157
|
+
|
158
|
+
|
159
|
+
def handle_multiple_sample_ids(knex, sample_ids, proj=None, alternate_id_col=None):
|
137
160
|
"""Return a list of fetched sample objects
|
138
161
|
|
139
162
|
`sample_ids` may have three different structures:
|
@@ -144,7 +167,9 @@ def handle_multiple_sample_ids(knex, sample_ids, proj=None):
|
|
144
167
|
Any sample may in fact be a file containing sample IDs, in which case the file will be read line by line
|
145
168
|
and each element will be a sample ID
|
146
169
|
|
147
|
-
If `
|
170
|
+
If `proj` is provided then `alternate_id_col` may also be provided.
|
171
|
+
If so then alternate IDs will be used to fetch samples. If alternate ids are
|
172
|
+
not present or not unique then fail.
|
148
173
|
"""
|
149
174
|
project_as_arg = bool(proj)
|
150
175
|
if proj or (proj := el_is_project_id(knex, sample_ids[0])):
|
@@ -155,7 +180,10 @@ def handle_multiple_sample_ids(knex, sample_ids, proj=None):
|
|
155
180
|
return list(proj.get_samples(cache=False))
|
156
181
|
else:
|
157
182
|
samples = []
|
158
|
-
|
183
|
+
sample_ids = flatten_list_of_els_and_files(sample_ids)
|
184
|
+
if alternate_id_col:
|
185
|
+
sample_ids = map_alternate_ids_to_uuids(proj, alternate_id_col, sample_ids)
|
186
|
+
for el in sample_ids:
|
159
187
|
if is_grn_or_uuid(el):
|
160
188
|
el = el.split(':')[-1]
|
161
189
|
samples.append(sample_from_uuid(knex, el))
|
geoseeq/knex.py
CHANGED
geoseeq/project.py
CHANGED
@@ -256,9 +256,14 @@ class Project(RemoteObject):
|
|
256
256
|
|
257
257
|
def get_sample_metadata(self):
|
258
258
|
"""Return a pandas dataframe with sample metadata."""
|
259
|
-
url = f"sample_groups/{self.uuid}/
|
260
|
-
|
261
|
-
|
259
|
+
url = f"sample_groups/{self.uuid}/samples-list?page=1&page_size=500&&"
|
260
|
+
rows = []
|
261
|
+
while url:
|
262
|
+
blob = self.knex.get(url)
|
263
|
+
rows.extend(blob["results"])
|
264
|
+
url = blob["next"]
|
265
|
+
return pd.DataFrame(rows)
|
266
|
+
|
262
267
|
|
263
268
|
@property
|
264
269
|
def n_samples(self):
|
geoseeq/sample.py
CHANGED
@@ -205,18 +205,17 @@ class Sample(RemoteObject):
|
|
205
205
|
files[read_type] = {}
|
206
206
|
for folder_name, file_grns in folders.items():
|
207
207
|
files[read_type][folder_name] = []
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
[
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
)
|
208
|
+
if read_type in ["short_read::paired_end"]:
|
209
|
+
files[read_type][folder_name].append(
|
210
|
+
[
|
211
|
+
self._grn_to_file(file_grns[0]),
|
212
|
+
self._grn_to_file(file_grns[1]),
|
213
|
+
]
|
214
|
+
)
|
215
|
+
else:
|
216
|
+
files[read_type][folder_name].append(
|
217
|
+
self._grn_to_file(file_grns[0])
|
218
|
+
)
|
220
219
|
return files
|
221
220
|
|
222
221
|
def get_one_fasta(self):
|
geoseeq/utils.py
CHANGED
@@ -16,11 +16,19 @@ logger.addHandler(logging.NullHandler()) # No output unless configured by calli
|
|
16
16
|
def load_auth_profile(profile=""):
|
17
17
|
"""Return an endpoit and a token"""
|
18
18
|
profile = profile or "__default__"
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
19
|
+
try:
|
20
|
+
with open(PROFILES_PATH, "r") as f:
|
21
|
+
profiles = json.load(f)
|
22
|
+
if profile in profiles:
|
23
|
+
return profiles[profile]["endpoint"], profiles[profile]["token"]
|
24
|
+
raise KeyError(f"Profile {profile} not found.")
|
25
|
+
except FileNotFoundError:
|
26
|
+
endpoint, token = environ.get("GEOSEEQ_ENDPOINT", DEFAULT_ENDPOINT), environ.get("GEOSEEQ_API_TOKEN", None)
|
27
|
+
if token:
|
28
|
+
logger.debug("Using environment variables for authentication.")
|
29
|
+
else:
|
30
|
+
logger.warning("Accessing anonymously, functionality may be limited. Configure profiles or set GEOSEEQ_API_TOKEN to authenticate.")
|
31
|
+
return endpoint, token
|
24
32
|
|
25
33
|
|
26
34
|
def set_profile(token, endpoint=DEFAULT_ENDPOINT, profile="", overwrite=False):
|
@@ -4,25 +4,25 @@ geoseeq/blob_constructors.py,sha256=AkWpDQY0EdGMxF1p6eRspyHKubcUdiW4it-_Q7S2QWk,
|
|
4
4
|
geoseeq/bulk_creators.py,sha256=pdn-Dv7yv5SFv-PfDuQbuOnw2W4-BfIfRJVRAhM8U6s,2115
|
5
5
|
geoseeq/constants.py,sha256=z_ninEd7WsS5DaLntdR-sqAFib6Ie22jlhPKzLvLerw,449
|
6
6
|
geoseeq/file_system_cache.py,sha256=HzVZWtwLD2fjWWSo_UfWmGeBltm9He4lP_OqzKwNGWg,4138
|
7
|
-
geoseeq/knex.py,sha256=
|
7
|
+
geoseeq/knex.py,sha256=GXPsV65w1co1OvpB4pngH4f5I1p1AAxDWX7e2QkMXw0,7987
|
8
8
|
geoseeq/organization.py,sha256=bJkYL8_D-k6IYAaii2ZbxjwYnXy6lvu6iLXscxKlA3w,2542
|
9
9
|
geoseeq/pipeline.py,sha256=89mhWaecsKnm6tyRkdkaVp4dmZh62_v42Ze0oXf8OTY,9873
|
10
|
-
geoseeq/project.py,sha256=
|
10
|
+
geoseeq/project.py,sha256=kN6m1N4Tlud7saU03Sbir-oIBnXet_Cwi2OVVdaeag0,13929
|
11
11
|
geoseeq/remote_object.py,sha256=GYN6PKU7Zz3htIdpFjfZiFejzGqqJHbJyKlefM1Eixk,7151
|
12
|
-
geoseeq/sample.py,sha256=
|
12
|
+
geoseeq/sample.py,sha256=OU4H-U8XxsFosfa9wcWWrHq9NVT3nDKZcvPtPGGlLlk,8310
|
13
13
|
geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
|
14
14
|
geoseeq/upload_download_manager.py,sha256=FMRqLLg77o1qFbWZc5Yc86a2pjeZrrn1rHJr1iaxKCU,8757
|
15
15
|
geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
|
16
|
-
geoseeq/utils.py,sha256=
|
16
|
+
geoseeq/utils.py,sha256=ZXpWb2MetUIeLrExiXb7IaOXYrW1pvrdP3o0KWzbwCs,4035
|
17
17
|
geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
|
18
18
|
geoseeq/cli/__init__.py,sha256=4WnK87K5seRK3SGJAxNWnQTqyg5uBhdhrOrzB1D4b3M,24
|
19
19
|
geoseeq/cli/constants.py,sha256=Do5AUf9lMO9_P8KpFJ3XwwFBAWsxSjZ6sx9_QEGyC_c,176
|
20
20
|
geoseeq/cli/copy.py,sha256=02U9kdrAIbbM8MlRMLL6p-LMYFSuRObE3h5jyvcL__M,2275
|
21
21
|
geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
|
22
|
-
geoseeq/cli/download.py,sha256=
|
22
|
+
geoseeq/cli/download.py,sha256=W3OswqpHg1thzW6CJ7IcSS0Te2LA2WfgYISQMSl4GQg,18921
|
23
23
|
geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
|
24
24
|
geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
|
25
|
-
geoseeq/cli/main.py,sha256=
|
25
|
+
geoseeq/cli/main.py,sha256=NPW0EHw2JSdyQ5_nCSPiCsEsy0ZKJ1u7WJa9RQfMmqI,3918
|
26
26
|
geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
|
27
27
|
geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
|
28
28
|
geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
|
@@ -35,7 +35,7 @@ geoseeq/cli/view.py,sha256=P-o2YKBkTrPzSI-JOv7xROc63HLSUygZNZsjp9TGvSw,6783
|
|
35
35
|
geoseeq/cli/shared_params/__init__.py,sha256=ckNHGCBJUpJbQmcYi7lW-lsC0xKud6CCMznwcG5Vte4,325
|
36
36
|
geoseeq/cli/shared_params/common_state.py,sha256=jiHZtL3TATMjEoqhbO7HT8KkLJr1QPsy7ZHT4qcoQ1E,4095
|
37
37
|
geoseeq/cli/shared_params/config.py,sha256=HQ0xQh_jdt3EKI5VXYqQXzo-s8Rm6YlziMyVX-kg598,1072
|
38
|
-
geoseeq/cli/shared_params/id_handlers.py,sha256=
|
38
|
+
geoseeq/cli/shared_params/id_handlers.py,sha256=KtzflnplYVkXsyqI5Ej6r-_BwQnuXVHPr7JcYumTKNc,10700
|
39
39
|
geoseeq/cli/shared_params/obj_getters.py,sha256=ZSkt6LnDkVFlNVYKgLrjzg60-6BthZMr3eeD3HNqzac,2741
|
40
40
|
geoseeq/cli/shared_params/opts_and_args.py,sha256=_DcJ-TqgrbBaeDd-kuHEx2gLZPQN6EHZYWh8Ag-d8Vg,2091
|
41
41
|
geoseeq/cli/upload/__init__.py,sha256=3C9_S9t7chmYU-2ot89NV03x-EtmsjibulErKaU9w1k,627
|
@@ -85,9 +85,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
|
|
85
85
|
tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
|
87
87
|
tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
|
88
|
-
geoseeq-0.6.
|
89
|
-
geoseeq-0.6.
|
90
|
-
geoseeq-0.6.
|
91
|
-
geoseeq-0.6.
|
92
|
-
geoseeq-0.6.
|
93
|
-
geoseeq-0.6.
|
88
|
+
geoseeq-0.6.10.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
|
89
|
+
geoseeq-0.6.10.dist-info/METADATA,sha256=moKunTjFSy4nQVg0X-Es5JmhsyPwtU9DZr-LIR6bCUE,4916
|
90
|
+
geoseeq-0.6.10.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
|
91
|
+
geoseeq-0.6.10.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
|
92
|
+
geoseeq-0.6.10.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
|
93
|
+
geoseeq-0.6.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|