geoseeq 0.6.8a2__py3-none-any.whl → 0.6.10__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
geoseeq/cli/download.py CHANGED
@@ -98,6 +98,7 @@ def cli_download_metadata(state, sample_ids):
98
98
 
99
99
  cores_option = click.option('--cores', default=1, help='Number of downloads to run in parallel')
100
100
  head_option = click.option('--head', default=None, type=int, help='Download the first N bytes of each file')
101
+ alt_id_option = click.option('--alt-sample-id', default=None, help='Specify an alternate sample id from the project metadata to id samples')
101
102
 
102
103
  @cli_download.command("files")
103
104
  @use_common_state
@@ -113,6 +114,7 @@ head_option = click.option('--head', default=None, type=int, help='Download the
113
114
  @click.option("--extension", multiple=True, help="Only download files with this extension. e.g. 'fastq.gz', 'bam', 'csv'")
114
115
  @click.option("--with-versions/--without-versions", default=False, help="Download all versions of a file, not just the latest")
115
116
  @ignore_errors_option
117
+ @alt_id_option
116
118
  @project_id_arg
117
119
  @sample_ids_arg
118
120
  def cli_download_files(
@@ -129,6 +131,7 @@ def cli_download_files(
129
131
  with_versions,
130
132
  download,
131
133
  ignore_errors,
134
+ alt_sample_id,
132
135
  project_id,
133
136
  sample_ids,
134
137
  ):
@@ -164,6 +167,13 @@ def cli_download_files(
164
167
  haib17CEM4890_H2NYMCCXY_SL254769 haib17CEM4890_H2NYMCCXY_SL254773 `# specify the samples by name` \\
165
168
  --folder-type sample --extension '.contigs.fasta' # filter for contig files
166
169
 
170
+ \b
171
+ # Download files from a sample in the metasub project using an alternate sample id called "barcode"
172
+ $ geoseeq download files 'MetaSUB Consortium/Cell Paper' `# specify the project` \\
173
+ 235183938 `# the alternate sample name (in this case a barcode number)` \\
174
+ --alt-sample-id barcode `# specify the alternate sample id column name` \\
175
+ --folder-type 'sample' `# only download files from sample folders`
176
+
167
177
  ---
168
178
 
169
179
  Command Arguments:
@@ -184,7 +194,7 @@ def cli_download_files(
184
194
  samples = []
185
195
  if sample_ids:
186
196
  logger.info(f"Fetching info for {len(sample_ids)} samples.")
187
- samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj)
197
+ samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj, alternate_id_col=alt_sample_id)
188
198
 
189
199
  response = proj.bulk_find_files(
190
200
  sample_uuids=[s.uuid for s in samples],
@@ -377,9 +387,21 @@ def cli_download_ids(state, cores, target_dir, file_name, yes, download, head, i
377
387
  @click.option("--download/--urls-only", default=True, help="Download files or just print urls")
378
388
  @module_option(FASTQ_MODULE_NAMES, use_default=False)
379
389
  @ignore_errors_option
390
+ @alt_id_option
380
391
  @project_id_arg
381
392
  @sample_ids_arg
382
- def cli_download_fastqs(state, cores, target_dir, yes, first, download, module_name, ignore_errors, project_id, sample_ids):
393
+ def cli_download_fastqs(state,
394
+ cores,
395
+ target_dir,
396
+ yes,
397
+ first,
398
+ download,
399
+ module_name,
400
+ ignore_errors,
401
+ alt_sample_id,
402
+ project_id,
403
+ sample_ids
404
+ ):
383
405
  """Download fastq files from a GeoSeeq project.
384
406
 
385
407
  This command will download fastq files from a GeoSeeq project. You can filter
@@ -401,6 +423,10 @@ def cli_download_fastqs(state, cores, target_dir, yes, first, download, module_n
401
423
  # Download all fastq files from two samples in "My Org/My Project"
402
424
  $ geoseeq download fastqs "My Org/My Project" S1 S2
403
425
 
426
+ \b
427
+ # Download all fastq files from a single sample using an alternate sample id called "barcode"
428
+ $ geoseeq download fastqs 'MetaSUB Consortium/Cell Paper' 235183938 --alt-sample-id barcode
429
+
404
430
  ---
405
431
 
406
432
  Command Arguments:
@@ -422,7 +448,7 @@ def cli_download_fastqs(state, cores, target_dir, yes, first, download, module_n
422
448
  samples = []
423
449
  if sample_ids:
424
450
  logger.info(f"Fetching info for {len(sample_ids)} samples.")
425
- samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj)
451
+ samples = handle_multiple_sample_ids(knex, sample_ids, proj=proj, alternate_id_col=alt_sample_id)
426
452
  else:
427
453
  logger.info("Fetching info for all samples in project.")
428
454
  samples = proj.get_samples()
geoseeq/cli/main.py CHANGED
@@ -54,7 +54,7 @@ def version():
54
54
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
55
55
  Run `geoseeq eula show` to view the EULA.
56
56
  """
57
- click.echo('0.6.8a2') # remember to update setup
57
+ click.echo('0.6.10') # remember to update pyproject.toml
58
58
 
59
59
 
60
60
  @main.group('advanced')
@@ -133,7 +133,30 @@ def handle_folder_id(knex, folder_id, yes=False, private=True, create=True):
133
133
  raise ValueError('sample_folder_id must be a UUID, an organization name and project name, or a GRN')
134
134
 
135
135
 
136
- def handle_multiple_sample_ids(knex, sample_ids, proj=None):
136
+ def map_alternate_ids_to_uuids(proj, alternate_id_col, sample_ids):
137
+ """Return a list of sample UUIDs
138
+
139
+ `proj` is a project object
140
+ `alternate_id_col` is the name of the column containing alternate IDs
141
+ `sample_ids` is a list of alternate IDs
142
+ """
143
+ metadata = proj.get_sample_metadata()
144
+ if alternate_id_col not in metadata:
145
+ raise ValueError(f'Column "{alternate_id_col}" not found in project metadata')
146
+ alt_col_df = metadata[["uuid", alternate_id_col]]
147
+ # filter to the alt ids in our list- it is possible alt_id_col as a whole is not
148
+ # unique but that our list of alt ids is
149
+ alt_col_df = alt_col_df[alt_col_df[alternate_id_col].isin(sample_ids)]
150
+ if alt_col_df.shape[0] == 0:
151
+ raise ValueError(f'No samples found with the given alternate IDs in list')
152
+ if alt_col_df.shape[0] < len(sample_ids):
153
+ raise ValueError(f'Not all alternate IDs in list are found')
154
+ if alt_col_df.shape[0] > len(sample_ids):
155
+ raise ValueError(f'More than one sample found with the same alternate ID')
156
+ return list(alt_col_df['uuid'])
157
+
158
+
159
+ def handle_multiple_sample_ids(knex, sample_ids, proj=None, alternate_id_col=None):
137
160
  """Return a list of fetched sample objects
138
161
 
139
162
  `sample_ids` may have three different structures:
@@ -144,7 +167,9 @@ def handle_multiple_sample_ids(knex, sample_ids, proj=None):
144
167
  Any sample may in fact be a file containing sample IDs, in which case the file will be read line by line
145
168
  and each element will be a sample ID
146
169
 
147
- If `one_project` is True, all samples must be from the same project
170
+ If `proj` is provided then `alternate_id_col` may also be provided.
171
+ If so then alternate IDs will be used to fetch samples. If alternate ids are
172
+ not present or not unique then fail.
148
173
  """
149
174
  project_as_arg = bool(proj)
150
175
  if proj or (proj := el_is_project_id(knex, sample_ids[0])):
@@ -155,7 +180,10 @@ def handle_multiple_sample_ids(knex, sample_ids, proj=None):
155
180
  return list(proj.get_samples(cache=False))
156
181
  else:
157
182
  samples = []
158
- for el in flatten_list_of_els_and_files(sample_ids):
183
+ sample_ids = flatten_list_of_els_and_files(sample_ids)
184
+ if alternate_id_col:
185
+ sample_ids = map_alternate_ids_to_uuids(proj, alternate_id_col, sample_ids)
186
+ for el in sample_ids:
159
187
  if is_grn_or_uuid(el):
160
188
  el = el.split(':')[-1]
161
189
  samples.append(sample_from_uuid(knex, el))
geoseeq/knex.py CHANGED
@@ -214,7 +214,8 @@ class Knex:
214
214
  """Return a knex authenticated with a profile."""
215
215
  endpoint, token = load_auth_profile(profile)
216
216
  knex = cls(endpoint)
217
- knex.add_api_token(token)
217
+ if token:
218
+ knex.add_api_token(token)
218
219
  return knex
219
220
 
220
221
 
geoseeq/project.py CHANGED
@@ -256,9 +256,14 @@ class Project(RemoteObject):
256
256
 
257
257
  def get_sample_metadata(self):
258
258
  """Return a pandas dataframe with sample metadata."""
259
- url = f"sample_groups/{self.uuid}/metadata"
260
- blob = self.knex.get(url)
261
- return pd.DataFrame.from_dict(blob, orient="index")
259
+ url = f"sample_groups/{self.uuid}/samples-list?page=1&page_size=500&&"
260
+ rows = []
261
+ while url:
262
+ blob = self.knex.get(url)
263
+ rows.extend(blob["results"])
264
+ url = blob["next"]
265
+ return pd.DataFrame(rows)
266
+
262
267
 
263
268
  @property
264
269
  def n_samples(self):
geoseeq/sample.py CHANGED
@@ -205,18 +205,17 @@ class Sample(RemoteObject):
205
205
  files[read_type] = {}
206
206
  for folder_name, file_grns in folders.items():
207
207
  files[read_type][folder_name] = []
208
- for file_grn in file_grns:
209
- if read_type in ["short_read::paired_end"]:
210
- files[read_type][folder_name].append(
211
- [
212
- self._grn_to_file(file_grn[0]),
213
- self._grn_to_file(file_grn[1]),
214
- ]
215
- )
216
- else:
217
- files[read_type][folder_name].append(
218
- self._grn_to_file(file_grn[0])
219
- )
208
+ if read_type in ["short_read::paired_end"]:
209
+ files[read_type][folder_name].append(
210
+ [
211
+ self._grn_to_file(file_grns[0]),
212
+ self._grn_to_file(file_grns[1]),
213
+ ]
214
+ )
215
+ else:
216
+ files[read_type][folder_name].append(
217
+ self._grn_to_file(file_grns[0])
218
+ )
220
219
  return files
221
220
 
222
221
  def get_one_fasta(self):
geoseeq/utils.py CHANGED
@@ -16,11 +16,19 @@ logger.addHandler(logging.NullHandler()) # No output unless configured by calli
16
16
  def load_auth_profile(profile=""):
17
17
  """Return an endpoit and a token"""
18
18
  profile = profile or "__default__"
19
- with open(PROFILES_PATH, "r") as f:
20
- profiles = json.load(f)
21
- if profile in profiles:
22
- return profiles[profile]["endpoint"], profiles[profile]["token"]
23
- raise KeyError(f"Profile {profile} not found.")
19
+ try:
20
+ with open(PROFILES_PATH, "r") as f:
21
+ profiles = json.load(f)
22
+ if profile in profiles:
23
+ return profiles[profile]["endpoint"], profiles[profile]["token"]
24
+ raise KeyError(f"Profile {profile} not found.")
25
+ except FileNotFoundError:
26
+ endpoint, token = environ.get("GEOSEEQ_ENDPOINT", DEFAULT_ENDPOINT), environ.get("GEOSEEQ_API_TOKEN", None)
27
+ if token:
28
+ logger.debug("Using environment variables for authentication.")
29
+ else:
30
+ logger.warning("Accessing anonymously, functionality may be limited. Configure profiles or set GEOSEEQ_API_TOKEN to authenticate.")
31
+ return endpoint, token
24
32
 
25
33
 
26
34
  def set_profile(token, endpoint=DEFAULT_ENDPOINT, profile="", overwrite=False):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geoseeq
3
- Version: 0.6.8a2
3
+ Version: 0.6.10
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -4,25 +4,25 @@ geoseeq/blob_constructors.py,sha256=AkWpDQY0EdGMxF1p6eRspyHKubcUdiW4it-_Q7S2QWk,
4
4
  geoseeq/bulk_creators.py,sha256=pdn-Dv7yv5SFv-PfDuQbuOnw2W4-BfIfRJVRAhM8U6s,2115
5
5
  geoseeq/constants.py,sha256=z_ninEd7WsS5DaLntdR-sqAFib6Ie22jlhPKzLvLerw,449
6
6
  geoseeq/file_system_cache.py,sha256=HzVZWtwLD2fjWWSo_UfWmGeBltm9He4lP_OqzKwNGWg,4138
7
- geoseeq/knex.py,sha256=SlK3Z9Y51APecIeJep4eNvFqlwKpQzvtokBnKe0L5Oc,7965
7
+ geoseeq/knex.py,sha256=GXPsV65w1co1OvpB4pngH4f5I1p1AAxDWX7e2QkMXw0,7987
8
8
  geoseeq/organization.py,sha256=bJkYL8_D-k6IYAaii2ZbxjwYnXy6lvu6iLXscxKlA3w,2542
9
9
  geoseeq/pipeline.py,sha256=89mhWaecsKnm6tyRkdkaVp4dmZh62_v42Ze0oXf8OTY,9873
10
- geoseeq/project.py,sha256=pVx4etzkYmYAYwcPJsjN9PrI-7GZEkAaz2Q5GFdng1s,13810
10
+ geoseeq/project.py,sha256=kN6m1N4Tlud7saU03Sbir-oIBnXet_Cwi2OVVdaeag0,13929
11
11
  geoseeq/remote_object.py,sha256=GYN6PKU7Zz3htIdpFjfZiFejzGqqJHbJyKlefM1Eixk,7151
12
- geoseeq/sample.py,sha256=TNtYL8ph2pG0R5ukDC4J2AwdA0xDK9Bx7dVQmQKIMQs,8394
12
+ geoseeq/sample.py,sha256=OU4H-U8XxsFosfa9wcWWrHq9NVT3nDKZcvPtPGGlLlk,8310
13
13
  geoseeq/search.py,sha256=gawad6Cx5FxJBPlYkXWb-UKAO-UC0_yhvyU9Ca1kaNI,3388
14
14
  geoseeq/upload_download_manager.py,sha256=FMRqLLg77o1qFbWZc5Yc86a2pjeZrrn1rHJr1iaxKCU,8757
15
15
  geoseeq/user.py,sha256=tol8i1UGLRrbMw5jeJDnna1ikRgrCDd50Jxz0a1lSgg,690
16
- geoseeq/utils.py,sha256=PDRiEQIZYTcfEV9AYvloQVvfqs5JaebcFZodAa2SUW8,3577
16
+ geoseeq/utils.py,sha256=ZXpWb2MetUIeLrExiXb7IaOXYrW1pvrdP3o0KWzbwCs,4035
17
17
  geoseeq/work_orders.py,sha256=5uLVVfdKE8qh4gGaHkdBpXJGRTujuSg59knWCqEET4A,8071
18
18
  geoseeq/cli/__init__.py,sha256=4WnK87K5seRK3SGJAxNWnQTqyg5uBhdhrOrzB1D4b3M,24
19
19
  geoseeq/cli/constants.py,sha256=Do5AUf9lMO9_P8KpFJ3XwwFBAWsxSjZ6sx9_QEGyC_c,176
20
20
  geoseeq/cli/copy.py,sha256=02U9kdrAIbbM8MlRMLL6p-LMYFSuRObE3h5jyvcL__M,2275
21
21
  geoseeq/cli/detail.py,sha256=q8Suu-j2k18knfSVFG-SWWGNsKM-n8y9RMA3LcIIi9Y,4132
22
- geoseeq/cli/download.py,sha256=QTNA7qFjCdRJg2vKbAm5yH8WGlcF5fb5bSjm5QiI4XE,17768
22
+ geoseeq/cli/download.py,sha256=W3OswqpHg1thzW6CJ7IcSS0Te2LA2WfgYISQMSl4GQg,18921
23
23
  geoseeq/cli/fastq_utils.py,sha256=-bmeQLaiMBm57zWOF0R5OlWTU0_3sh1JBC1RYw2BOFM,3083
24
24
  geoseeq/cli/get_eula.py,sha256=79mbUwyiF7O1r0g6UTxG9kJGQEqKuH805E6eLkPC6Y4,997
25
- geoseeq/cli/main.py,sha256=iZtDhmUGO7zKTG6pA5WG1FosxwR8BYp6n6b5tSJhfhM,3910
25
+ geoseeq/cli/main.py,sha256=NPW0EHw2JSdyQ5_nCSPiCsEsy0ZKJ1u7WJa9RQfMmqI,3918
26
26
  geoseeq/cli/manage.py,sha256=wGXAcVaXqE5JQEU8Jh6OlHr02nB396bpS_SFcOZdrEo,5929
27
27
  geoseeq/cli/progress_bar.py,sha256=p1Xl01nkYxSBZCB30ue2verIIi22W93m3ZAMAxipD0g,738
28
28
  geoseeq/cli/project.py,sha256=V5SdXm2Hwo2lxrkpwRDedw-mAE4XnM2uwT-Gj1D90VQ,3030
@@ -35,7 +35,7 @@ geoseeq/cli/view.py,sha256=P-o2YKBkTrPzSI-JOv7xROc63HLSUygZNZsjp9TGvSw,6783
35
35
  geoseeq/cli/shared_params/__init__.py,sha256=ckNHGCBJUpJbQmcYi7lW-lsC0xKud6CCMznwcG5Vte4,325
36
36
  geoseeq/cli/shared_params/common_state.py,sha256=jiHZtL3TATMjEoqhbO7HT8KkLJr1QPsy7ZHT4qcoQ1E,4095
37
37
  geoseeq/cli/shared_params/config.py,sha256=HQ0xQh_jdt3EKI5VXYqQXzo-s8Rm6YlziMyVX-kg598,1072
38
- geoseeq/cli/shared_params/id_handlers.py,sha256=501K9sCVkI0YGDQ62vXk_DM5lMMDrdB5spIS3cw9x9U,9299
38
+ geoseeq/cli/shared_params/id_handlers.py,sha256=KtzflnplYVkXsyqI5Ej6r-_BwQnuXVHPr7JcYumTKNc,10700
39
39
  geoseeq/cli/shared_params/obj_getters.py,sha256=ZSkt6LnDkVFlNVYKgLrjzg60-6BthZMr3eeD3HNqzac,2741
40
40
  geoseeq/cli/shared_params/opts_and_args.py,sha256=_DcJ-TqgrbBaeDd-kuHEx2gLZPQN6EHZYWh8Ag-d8Vg,2091
41
41
  geoseeq/cli/upload/__init__.py,sha256=3C9_S9t7chmYU-2ot89NV03x-EtmsjibulErKaU9w1k,627
@@ -85,9 +85,9 @@ geoseeq/vc/vc_stub.py,sha256=IQr8dI0zsWKVAeY_5ybDD6n49_3othcgfHS3P0O9tuY,3110
85
85
  tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  tests/test_api_client.py,sha256=TS5njc5pcPP_Ycy-ljcfPVT1hQRBsFVdQ0lCqBmoesU,12810
87
87
  tests/test_plotting.py,sha256=TcTu-2ARr8sxZJ7wPQxmbs3-gHw7uRvsgrhhhg0qKik,784
88
- geoseeq-0.6.8a2.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
89
- geoseeq-0.6.8a2.dist-info/METADATA,sha256=Z44188Ooqx6AxBedAxOXv-m1xPrMIiGKwLloBh09Veo,4917
90
- geoseeq-0.6.8a2.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
91
- geoseeq-0.6.8a2.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
92
- geoseeq-0.6.8a2.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
93
- geoseeq-0.6.8a2.dist-info/RECORD,,
88
+ geoseeq-0.6.10.dist-info/LICENSE,sha256=IuhIl1XCxXLPLJT_coN1CNqQU4Khlq7x4IdW7ioOJD8,1067
89
+ geoseeq-0.6.10.dist-info/METADATA,sha256=moKunTjFSy4nQVg0X-Es5JmhsyPwtU9DZr-LIR6bCUE,4916
90
+ geoseeq-0.6.10.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
91
+ geoseeq-0.6.10.dist-info/entry_points.txt,sha256=yF-6KDM8zXib4Al0qn49TX-qM7PUkWUIcYtsgt36rjM,45
92
+ geoseeq-0.6.10.dist-info/top_level.txt,sha256=zZk7mmeaqAYqFJG8nq2DTgSQPbflRjJwkDIhNURPDEU,14
93
+ geoseeq-0.6.10.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.2.0)
2
+ Generator: setuptools (75.7.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5