geoseeq 0.6.14.dev7__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/PKG-INFO +3 -2
  2. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/download.py +2 -2
  3. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/main.py +1 -1
  4. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/upload/__init__.py +8 -3
  5. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/upload/upload.py +114 -0
  6. geoseeq-0.7.0/geoseeq/cli/upload/upload_advanced.py +322 -0
  7. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/upload/upload_reads.py +13 -4
  8. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/constants.py +4 -1
  9. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/id_constructors/from_ids.py +20 -14
  10. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/id_constructors/utils.py +17 -0
  11. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/result_file.py +2 -1
  12. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/result_folder.py +8 -0
  13. geoseeq-0.7.0/geoseeq/result/smart_objects.py +40 -0
  14. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/smart_table.py +3 -2
  15. geoseeq-0.7.0/geoseeq/smart_tree.py +57 -0
  16. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/upload_download_manager.py +16 -6
  17. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq.egg-info/PKG-INFO +3 -2
  18. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq.egg-info/SOURCES.txt +2 -0
  19. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/pyproject.toml +1 -1
  20. geoseeq-0.6.14.dev7/geoseeq/cli/upload/upload_advanced.py +0 -92
  21. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/LICENSE +0 -0
  22. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/README.md +0 -0
  23. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/__init__.py +0 -0
  24. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/app.py +0 -0
  25. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/blob_constructors.py +0 -0
  26. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/bulk_creators.py +0 -0
  27. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/__init__.py +0 -0
  28. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/constants.py +0 -0
  29. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/copy.py +0 -0
  30. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/detail.py +0 -0
  31. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/fastq_utils.py +0 -0
  32. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/find_grn.py +0 -0
  33. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/get_eula.py +0 -0
  34. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/manage.py +0 -0
  35. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/progress_bar.py +0 -0
  36. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/project.py +0 -0
  37. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/raw.py +0 -0
  38. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/run.py +0 -0
  39. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/search.py +0 -0
  40. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/shared_params/__init__.py +0 -0
  41. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/shared_params/common_state.py +0 -0
  42. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/shared_params/config.py +0 -0
  43. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  44. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  45. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
  46. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/user.py +0 -0
  47. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/utils.py +0 -0
  48. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/cli/view.py +0 -0
  49. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/contrib/__init__.py +0 -0
  50. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/contrib/ncbi/__init__.py +0 -0
  51. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/contrib/ncbi/api.py +0 -0
  52. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  53. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/contrib/ncbi/cli.py +0 -0
  54. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  55. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/file_system_cache.py +0 -0
  56. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/id_constructors/__init__.py +0 -0
  57. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/id_constructors/from_blobs.py +0 -0
  58. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/id_constructors/from_names.py +0 -0
  59. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/id_constructors/from_uuids.py +0 -0
  60. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/id_constructors/resolvers.py +0 -0
  61. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/knex.py +0 -0
  62. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/organization.py +0 -0
  63. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/pipeline.py +0 -0
  64. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/plotting/__init__.py +0 -0
  65. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/plotting/constants.py +0 -0
  66. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/plotting/highcharts.py +0 -0
  67. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/plotting/map/__init__.py +0 -0
  68. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/plotting/map/base_layer.py +0 -0
  69. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/plotting/map/map.py +0 -0
  70. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/plotting/map/overlay.py +0 -0
  71. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/plotting/selectable.py +0 -0
  72. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/project.py +0 -0
  73. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/remote_object.py +0 -0
  74. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/__init__.py +0 -0
  75. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/bioinfo.py +0 -0
  76. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/file_chunker.py +0 -0
  77. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/file_download.py +0 -0
  78. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/file_upload.py +0 -0
  79. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/resumable_download_tracker.py +0 -0
  80. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/resumable_upload_tracker.py +0 -0
  81. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/result/utils.py +0 -0
  82. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/sample.py +0 -0
  83. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/search.py +0 -0
  84. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/user.py +0 -0
  85. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/utils.py +0 -0
  86. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/vc/__init__.py +0 -0
  87. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/vc/checksum.py +0 -0
  88. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/vc/cli.py +0 -0
  89. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/vc/clone.py +0 -0
  90. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/vc/constants.py +0 -0
  91. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/vc/vc_cache.py +0 -0
  92. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/vc/vc_dir.py +0 -0
  93. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/vc/vc_sample.py +0 -0
  94. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/vc/vc_stub.py +0 -0
  95. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq/work_orders.py +0 -0
  96. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq.egg-info/dependency_links.txt +0 -0
  97. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq.egg-info/entry_points.txt +0 -0
  98. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq.egg-info/requires.txt +0 -0
  99. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/geoseeq.egg-info/top_level.txt +0 -0
  100. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/setup.cfg +0 -0
  101. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/setup.py +0 -0
  102. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/tests/__init__.py +0 -0
  103. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/tests/test_api_client.py +0 -0
  104. {geoseeq-0.6.14.dev7 → geoseeq-0.7.0}/tests/test_plotting.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: geoseeq
3
- Version: 0.6.14.dev7
3
+ Version: 0.7.0
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Author: David C. Danko
6
6
  Author-email: "David C. Danko" <dcdanko@biotia.io>
@@ -18,6 +18,7 @@ Requires-Dist: pandas
18
18
  Requires-Dist: biopython
19
19
  Requires-Dist: tqdm
20
20
  Dynamic: author
21
+ Dynamic: license-file
21
22
 
22
23
  # Geoseeq API Client
23
24
 
@@ -414,7 +414,7 @@ def _make_read_configs(download_results, config_dir="."):
414
414
  "reads_1": ["small.fq.gz"],
415
415
  "reads_2": [],
416
416
  "fastq_checksum": "",
417
- "data_type": "short-read",
417
+ "data_type": "single",
418
418
  "bdx_result_dir": "results",
419
419
  "geoseeq_uuid": "05bf22e9-9d25-42db-af25-31bc538a7006"
420
420
  }
@@ -428,7 +428,7 @@ def _make_read_configs(download_results, config_dir="."):
428
428
  "reads_1": [],
429
429
  "reads_2": [],
430
430
  "fastq_checksum": "",
431
- "data_type": "short-read",
431
+ "data_type": read_type,
432
432
  "bdx_result_dir": "results",
433
433
  "geoseeq_uuid": sample.uuid,
434
434
  }
@@ -55,7 +55,7 @@ def version():
55
55
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
56
56
  Run `geoseeq eula show` to view the EULA.
57
57
  """
58
- click.echo('0.6.14dev7') # remember to update pyproject.toml
58
+ click.echo('0.7.0') # remember to update pyproject.toml
59
59
 
60
60
 
61
61
  @main.group('advanced')
@@ -4,9 +4,11 @@ from .upload import (
4
4
  cli_upload_file,
5
5
  cli_upload_folder,
6
6
  cli_metadata,
7
+ cli_upload_smart_table,
8
+ cli_upload_smart_tree,
7
9
  )
8
10
  from .upload_reads import cli_upload_reads_wizard
9
- from .upload_advanced import cli_find_urls_for_reads
11
+ from .upload_advanced import cli_find_urls_for_reads, cli_upload_from_config
10
12
 
11
13
  @click.group('upload')
12
14
  def cli_upload():
@@ -17,10 +19,13 @@ cli_upload.add_command(cli_upload_reads_wizard)
17
19
  cli_upload.add_command(cli_upload_file)
18
20
  cli_upload.add_command(cli_upload_folder)
19
21
  cli_upload.add_command(cli_metadata)
22
+ cli_upload.add_command(cli_upload_smart_table)
23
+ cli_upload.add_command(cli_upload_smart_tree)
20
24
 
21
- @click.group('upload')
25
+ @cli_upload.group('advanced')
22
26
  def cli_upload_advanced():
23
27
  """Advanced tools to upload files to GeoSeeq."""
24
28
  pass
25
29
 
26
- cli_upload_advanced.add_command(cli_find_urls_for_reads)
30
+ cli_upload_advanced.add_command(cli_find_urls_for_reads)
31
+ cli_upload_advanced.add_command(cli_upload_from_config)
@@ -263,3 +263,117 @@ def cli_metadata(state, overwrite, yes, private, create, index_col, encoding, pr
263
263
  sample.metadata = new_meta
264
264
  sample.idem()
265
265
  click.echo(f'Wrote metadata for {len(samples)} samples')
266
+
267
+
268
+ @click.command('smart-table')
269
+ @use_common_state
270
+ @overwrite_option
271
+ @yes_option
272
+ @private_option
273
+ @click.option('-n', '--geoseeq-file-name', default=None,
274
+ help='Specify a different name for the file on GeoSeeq than the local file name.',
275
+ show_default=True)
276
+ @folder_id_arg
277
+ @click.argument('file_path', type=click.Path(exists=True), nargs=1)
278
+ def cli_upload_smart_table(state, overwrite, yes, private, folder_id, geoseeq_file_name, file_path):
279
+ """Upload a smart table to GeoSeeq.
280
+
281
+ This command uploads a smart table to a project or sample on GeoSeeq. It can be used to upload
282
+ a single file to a folder at once.
283
+
284
+ ---
285
+
286
+ Example Usage:
287
+
288
+ \b
289
+ # Upload a smart table from a file
290
+ $ geoseeq upload smart-table "My Org/My Project/My Sample/My Folder" /path/to/my_table.csv
291
+
292
+ \b
293
+ # Upload a smart table from a file but name it "My Smart Table" on GeoSeeq
294
+ $ geoseeq upload smart-table "My Org/My Project/My Sample/My Folder" /path/to/my_table.csv -n "My Smart Table"
295
+
296
+ ---
297
+
298
+ Command Arguments:
299
+
300
+ [FOLDER_ID] Can be a folder UUID, GeoSeeq Resource Number (GRN), or an
301
+ names for an org, project, sample, folder separated by a slash. Can exclude
302
+ the sample name if the folder is for a project.
303
+
304
+ [FILE_PATH] A path to a file on your local machine.
305
+
306
+ ---
307
+ """
308
+ knex = state.get_knex()
309
+ result_folder = handle_folder_id(knex, folder_id, yes=yes, private=private)
310
+
311
+ if not geoseeq_file_name:
312
+ geoseeq_file_name = basename(file_path)
313
+
314
+ if not overwrite and result_folder.result_file(geoseeq_file_name).exists():
315
+ raise click.UsageError(f'{geoseeq_file_name} already exists in {result_folder}. Use --overwrite to overwrite it.')
316
+
317
+ result_file = result_folder.result_file(geoseeq_file_name)
318
+ smart_table = result_file.as_smart_table()
319
+ smart_table.import_csv(file_path)
320
+
321
+
322
+ @click.command('smart-tree')
323
+ @use_common_state
324
+ @click.option('-m/-nm', '--make-name-map/--no-name-map', default=True, help="Create a sample name map with all samples currently in the project.")
325
+ @overwrite_option
326
+ @yes_option
327
+ @private_option
328
+ @click.option('-n', '--geoseeq-file-name', default=None,
329
+ help='Specify a different name for the file on GeoSeeq than the local file name.',
330
+ show_default=True)
331
+ @folder_id_arg
332
+ @click.argument('newick_file_path', type=click.Path(exists=True), nargs=1)
333
+ def cli_upload_smart_tree(state, make_name_map, overwrite, yes, private, folder_id, geoseeq_file_name, newick_file_path):
334
+ """Upload a smart tree to GeoSeeq.
335
+
336
+ This command uploads a smart tree to a project or sample on GeoSeeq. It can be used to upload
337
+ a single file to a folder at once.
338
+
339
+ ---
340
+
341
+ Example Usage:
342
+
343
+ \b
344
+ # Upload a smart tree from a file
345
+ $ geoseeq upload smart-tree "My Org/My Project/My Sample/My Folder" /path/to/my_tree.nwk
346
+
347
+ \b
348
+ # Upload a smart tree from a file but name it "My Smart Tree" on GeoSeeq
349
+ $ geoseeq upload smart-tree "My Org/My Project/My Sample/My Folder" /path/to/my_tree.nwk -n "My Smart Tree"
350
+
351
+ ---
352
+
353
+ Command Arguments:
354
+
355
+ [FOLDER_ID] Can be a folder UUID, GeoSeeq Resource Number (GRN), or an
356
+ names for an org, project, sample, folder separated by a slash. Can exclude
357
+ the sample name if the folder is for a project.
358
+
359
+ [NEWICK_FILE_PATH] A path to a newick file on your local machine.
360
+
361
+ ---
362
+ """
363
+ knex = state.get_knex()
364
+ result_folder = handle_folder_id(knex, folder_id, yes=yes, private=private)
365
+
366
+ if not geoseeq_file_name:
367
+ geoseeq_file_name = basename(newick_file_path)
368
+
369
+ if not overwrite and result_folder.result_file(geoseeq_file_name).exists():
370
+ raise click.UsageError(f'{geoseeq_file_name} already exists in {result_folder}. Use --overwrite to overwrite it.')
371
+
372
+ result_file = result_folder.result_file(geoseeq_file_name)
373
+ smart_tree = result_file.as_smart_tree()
374
+ with open(newick_file_path) as f:
375
+ newick_str = f.read()
376
+ smart_tree.create_from_newick(newick_str)
377
+ if make_name_map:
378
+ smart_tree.add_all_samples_to_map(result_folder.project)
379
+ smart_tree.idem()
@@ -0,0 +1,322 @@
1
+ import logging
2
+ import click
3
+ import requests
4
+ from os.path import basename, getsize
5
+ from .upload_reads import (
6
+ _make_in_process_logger,
7
+ _get_regex,
8
+ _group_files,
9
+ flatten_list_of_fastxs,
10
+ )
11
+
12
+ from multiprocessing import Pool, current_process
13
+
14
+ from geoseeq.cli.constants import *
15
+ from geoseeq.cli.shared_params import (
16
+ handle_project_id,
17
+ private_option,
18
+ module_option,
19
+ project_id_arg,
20
+ overwrite_option,
21
+ yes_option,
22
+ use_common_state,
23
+ )
24
+
25
+ from geoseeq.constants import FASTQ_MODULE_NAMES
26
+ from geoseeq.cli.progress_bar import PBarManager
27
+ import pandas as pd
28
+ from typing import Dict, Optional
29
+ from geoseeq.id_constructors.from_ids import (
30
+ org_from_id,
31
+ project_from_id,
32
+ sample_from_id,
33
+ result_folder_from_id,
34
+ result_file_from_id,
35
+ )
36
+ from geoseeq.upload_download_manager import GeoSeeqUploadManager
37
+
38
+ logger = logging.getLogger('geoseeq_api')
39
+
40
+
41
+ def _keep_only_authentication_url_args(url):
42
+ """Return a url with only the S3 authentication args"""
43
+ root, args = url.split('?')
44
+ args = args.split('&')
45
+ args = [arg for arg in args if arg.startswith('AWSAccessKeyId=') or arg.startswith('Signature=')]
46
+ return root + '?' + '&'.join(args)
47
+
48
+
49
+ def _get_url_for_one_file(args):
50
+ """Return a tuple of the filepath and the url to upload it to"""
51
+ result_file, filepath, overwrite, log_level = args
52
+ _make_in_process_logger(log_level)
53
+ if result_file.exists() and not overwrite:
54
+ return
55
+ result_file = result_file.idem()
56
+ file_size = getsize(filepath)
57
+ _, urls = result_file._prep_multipart_upload(filepath, file_size, file_size + 1, {})
58
+ url = _keep_only_authentication_url_args(urls['1'])
59
+ return filepath, url
60
+
61
+
62
+ def _find_target_urls(groups, module_name, lib, filepaths, overwrite, cores, state):
63
+ """Use GeoSeeq to get target urls for a set of files"""
64
+ with requests.Session() as session:
65
+ find_url_args = []
66
+ for group in groups:
67
+ sample = lib.sample(group['sample_name']).idem()
68
+ read_folder = sample.result_folder(module_name).idem()
69
+
70
+ for field_name, path in group['fields'].items():
71
+ result_file = read_folder.read_file(field_name)
72
+ filepath = filepaths[path]
73
+ find_url_args.append((
74
+ result_file, filepath, overwrite, state.log_level
75
+ ))
76
+
77
+ with Pool(cores) as p:
78
+ for (file_name, target_url) in p.imap_unordered(_get_url_for_one_file, find_url_args):
79
+ yield file_name, target_url
80
+
81
+
82
+ @click.command('read-links')
83
+ @use_common_state
84
+ @click.option('--cores', default=1, help='Number of uploads to run in parallel')
85
+ @overwrite_option
86
+ @yes_option
87
+ @click.option('--regex', default=None, help='An optional regex to use to extract sample names from the file names')
88
+ @private_option
89
+ @module_option(FASTQ_MODULE_NAMES)
90
+ @project_id_arg
91
+ @click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
92
+ def cli_find_urls_for_reads(state, cores, overwrite, yes, regex, private, module_name, project_id, fastq_files):
93
+ """Print a two column list with filenames and a target storage URL
94
+ """
95
+ knex = state.get_knex()
96
+ proj = handle_project_id(knex, project_id, yes, private)
97
+ filepaths = {basename(line): line for line in flatten_list_of_fastxs(fastq_files)}
98
+ click.echo(f'Found {len(filepaths)} files to upload.', err=True)
99
+ regex = _get_regex(knex, filepaths, module_name, proj, regex)
100
+ groups = _group_files(knex, filepaths, module_name, regex, yes)
101
+ for file_name, target_url in _find_target_urls(groups, module_name, proj, filepaths, overwrite, cores, state):
102
+ print(f'{file_name}\t{target_url}', file=state.outfile)
103
+
104
+
105
+ def _get_result_file_from_record_with_ids(knex, record: Dict) -> Dict:
106
+ """Get all relevant objects from a record, handling GRNs, UUIDs, and absolute names without requiring parent objects.
107
+
108
+ Returns a dict with 'org', 'project', 'sample', 'folder', and 'result_file' keys.
109
+ Objects may be None if not needed/specified.
110
+ Guaranteed that at least org is not None.
111
+ """
112
+ objects = {
113
+ 'org': None,
114
+ 'project': None,
115
+ 'sample': None,
116
+ 'folder': None,
117
+ 'result_file': None
118
+ }
119
+
120
+ # Try to get file directly - if it's a GRN/UUID we don't need parent objects
121
+ try:
122
+ objects['result_file'] = result_file_from_id(knex, record['filename'])
123
+ objects['folder'] = objects['result_file'].folder
124
+ if hasattr(objects['folder'], 'sample'):
125
+ objects['sample'] = objects['folder'].sample
126
+ objects['project'] = objects['sample'].project
127
+ else:
128
+ objects['project'] = objects['folder'].project
129
+ objects['org'] = objects['project'].org
130
+ return objects
131
+ except ValueError:
132
+ pass # Not a GRN, UUID or abs name. Continue with normal flow
133
+
134
+ # Try to get folder directly - if it's a GRN/UUID we don't need parent objects
135
+ try:
136
+ objects['folder'] = result_folder_from_id(knex, record['folder'])
137
+ # Get parent objects from folder
138
+ if hasattr(objects['folder'], 'sample'):
139
+ objects['sample'] = objects['folder'].sample
140
+ objects['project'] = objects['sample'].project
141
+ else:
142
+ objects['project'] = objects['folder'].project
143
+ objects['org'] = objects['project'].org
144
+ return objects
145
+ except ValueError:
146
+ pass # Not a GRN, UUID or abs name. Continue with normal flow
147
+
148
+ # Try to get sample directly if specified
149
+ if pd.notna(record['sample']):
150
+ try:
151
+ objects['sample'] = sample_from_id(knex, record['sample'])
152
+ objects['project'] = objects['sample'].project
153
+ objects['org'] = objects['project'].org
154
+ return objects
155
+ except ValueError:
156
+ pass # Not a GRN, UUID or abs name. Continue with normal flow
157
+
158
+ # Try to get project directly
159
+ try:
160
+ objects['project'] = project_from_id(knex, record['project'])
161
+ objects['org'] = objects['project'].org
162
+ return objects
163
+ except ValueError:
164
+ pass # Not a GRN/UUID, continue
165
+
166
+
167
+ if objects['org'] is None: # Get org directly if we don't have one yet
168
+ objects['org'] = org_from_id(knex, record['organization'])
169
+
170
+ return objects
171
+
172
+
173
+ def _get_result_file_from_record(knex, record: Dict) -> Dict:
174
+ """Get all relevant objects from a record, handling GRNs/UUIDs without requiring parent objects.
175
+
176
+ Returns a dict with 'org', 'project', 'sample', 'folder', and 'result_file' keys.
177
+ Objects may be None if not needed/specified.
178
+ """
179
+ objects = _get_result_file_from_record_with_ids(knex, record)
180
+
181
+ if objects['project'] is None:
182
+ objects['project'] = objects['org'].project(record['project'])
183
+
184
+ if objects['sample'] is None:
185
+ if pd.notna(record['sample']):
186
+ objects['sample'] = objects['project'].sample(record['sample'])
187
+ parent = objects['sample']
188
+ else:
189
+ parent = objects['project']
190
+
191
+ if objects['folder'] is None:
192
+ objects['folder'] = parent.result_folder(record['folder'])
193
+
194
+ if objects['result_file'] is None:
195
+ objects['result_file'] = objects['folder'].result_file(record['filename'])
196
+
197
+ objects['result_file'].idem()
198
+ print(objects)
199
+ return objects
200
+
201
+
202
+ def _add_record_to_upload_manager_local_file(record: Dict, result_file, upload_manager: GeoSeeqUploadManager) -> None:
203
+ """Add a local file upload to the upload manager."""
204
+ upload_manager.add_result_file(result_file, record['path'], link_type='upload')
205
+
206
+
207
+ def _add_record_to_upload_manager_s3_file(record: Dict, result_file, upload_manager: GeoSeeqUploadManager) -> None:
208
+ """Add an S3 file link to the upload manager.
209
+
210
+ Handles two types of S3 URLs:
211
+ 1. https://endpoint/bucket/key - Full URL with endpoint included
212
+ 2. s3://bucket/key - S3 protocol URL that needs endpoint added
213
+ """
214
+ path = record['path']
215
+
216
+ if path.startswith('s3://'):
217
+ # Convert s3:// URL to https:// URL
218
+ if not record['endpoint_url']:
219
+ raise ValueError("endpoint_url is required for s3:// URLs")
220
+
221
+ # Remove s3:// prefix and combine with endpoint
222
+ bucket_and_key = path[5:] # len('s3://') == 5
223
+ path = f"{record['endpoint_url'].rstrip('/')}/{bucket_and_key}"
224
+ elif not path.startswith('https://'):
225
+ raise ValueError("S3 URLs must start with either 's3://' or 'https://'")
226
+
227
+ upload_manager.add_result_file(result_file, path, link_type='s3')
228
+
229
+
230
+ def _upload_one_record(knex, record: Dict, overwrite: bool, upload_manager: GeoSeeqUploadManager) -> Dict:
231
+ """Process a single record from the config file and add it to the upload manager."""
232
+ objects = _get_result_file_from_record(knex, record)
233
+ if not objects['result_file']:
234
+ raise ValueError(f"Could not find or create result_file from record: {record}")
235
+
236
+ # Add to upload manager based on type
237
+ if record['type'].lower() == 'local':
238
+ _add_record_to_upload_manager_local_file(record, objects["result_file"], upload_manager)
239
+ elif record['type'].lower() == 's3':
240
+ _add_record_to_upload_manager_s3_file(record, objects["result_file"], upload_manager)
241
+ else:
242
+ raise ValueError(f"Unknown file type: {record['type']}")
243
+
244
+ return objects
245
+
246
+
247
+ REQUIRED_COLUMNS = [
248
+ 'organization', 'project', 'sample', 'folder',
249
+ 'filename', 'path', 'type', 'endpoint_url'
250
+ ]
251
+
252
+
253
+ @click.command('from-config')
254
+ @use_common_state
255
+ @click.option('--cores', default=1, help='Number of uploads to run in parallel')
256
+ @click.option('--sep', default=',', help='Separator character for the CSV file')
257
+ @overwrite_option
258
+ @yes_option
259
+ @click.argument('config_file', type=click.Path(exists=True))
260
+ def cli_upload_from_config(state, cores, sep, overwrite, yes, config_file):
261
+ """Upload files to GeoSeeq based on a configuration CSV file.
262
+
263
+ \b
264
+ The CSV file must have the following columns:
265
+ - organization: Organization name, GRN, or UUID (optional if project/sample/folder specified by GRN/UUID)
266
+ - project: Project name, GRN, or UUID (optional if sample/folder specified by GRN/UUID)
267
+ - sample: Sample name, GRN, or UUID (optional, also optional if folder specified by GRN/UUID)
268
+ - folder: Folder name, GRN, or UUID
269
+ - filename: Name to give the file on GeoSeeq
270
+ - path: Path to local file or S3 URL
271
+ - type: Either "local" or "s3"
272
+ - endpoint_url: S3 endpoint URL (required for S3 files)
273
+
274
+ \b
275
+ When using GRNs or UUIDs, you can omit the parent object IDs. For example:
276
+ - If folder is a GRN/UUID, organization/project/sample can be blank
277
+ - If sample is a GRN/UUID, organization/project can be blank
278
+ - If project is a GRN/UUID, organization can be blank
279
+
280
+ \b
281
+ Example config.csv:
282
+ organization,project,sample,folder,filename,path,type,endpoint_url
283
+ MyOrg,MyProject,Sample1,reads,file1.fastq,/path/to/file1.fastq,local,
284
+ ,grn:project:uuid,Sample2,reads,file2.fastq,/path/to/file2.fastq,local,
285
+ ,,grn:sample:uuid,reads,file3.fastq,/path/to/file3.fastq,local,
286
+ ,,,grn:folder:uuid,file4.fastq,s3://bucket/file4.fastq,s3,https://s3.amazonaws.com
287
+
288
+ \b
289
+ Example with tab separator:
290
+ $ geoseeq upload advanced from-config --sep $'\t' config.tsv
291
+ """
292
+ knex = state.get_knex()
293
+
294
+ # Read and validate config file
295
+ df = pd.read_csv(config_file, sep=sep)
296
+ missing_cols = set(REQUIRED_COLUMNS) - set(df.columns)
297
+ if missing_cols:
298
+ raise click.UsageError(f"Config file missing required columns: {missing_cols}")
299
+
300
+ # Create upload manager
301
+ upload_manager = GeoSeeqUploadManager(
302
+ n_parallel_uploads=cores,
303
+ progress_tracker_factory=PBarManager().get_new_bar,
304
+ log_level=state.log_level,
305
+ overwrite=overwrite,
306
+ use_cache=state.use_cache,
307
+ )
308
+
309
+ # Process records and add to upload manager
310
+ objects_by_record = {} # Store objects for human readable paths
311
+ for _, record in df.iterrows():
312
+ objects = _upload_one_record(knex, record, overwrite, upload_manager)
313
+ objects_by_record[record['path']] = objects
314
+
315
+ # Show preview with both technical and human readable paths
316
+ click.echo(upload_manager.get_preview_string(), err=True)
317
+
318
+ if not yes:
319
+ click.confirm('Do you want to proceed with these uploads?', abort=True)
320
+
321
+ # Perform uploads
322
+ upload_manager.upload_files()
@@ -2,7 +2,7 @@ import logging
2
2
  import click
3
3
  import requests
4
4
  from os.path import basename
5
-
5
+ import pandas as pd
6
6
  from multiprocessing import Pool, current_process
7
7
 
8
8
  from geoseeq.cli.constants import *
@@ -67,8 +67,12 @@ def _get_regex(knex, filepaths, module_name, lib, regex):
67
67
  return regex
68
68
 
69
69
 
70
- def _group_files(knex, filepaths, module_name, regex, yes):
70
+ def _group_files(knex, filepaths, module_name, regex, yes, name_map):
71
71
  """Group the files into samples, confirm, and return the groups."""
72
+ if name_map is not None:
73
+ name_map_filename, cur_col, new_col = name_map
74
+ name_map = pd.read_csv(name_map_filename)[[cur_col, new_col]]
75
+ name_map = name_map.set_index(cur_col).to_dict()
72
76
  seq_length, seq_type = module_name.split('::')[:2]
73
77
  groups = knex.post('bulk_upload/group_files', json={
74
78
  'filenames': list(filepaths.keys()),
@@ -76,7 +80,11 @@ def _group_files(knex, filepaths, module_name, regex, yes):
76
80
  'regex': regex
77
81
  })
78
82
  for group in groups:
79
- click.echo(f'sample_name: {group["sample_name"]}', err=True)
83
+ sample_name = group["sample_name"]
84
+ if name_map:
85
+ sample_name = name_map.get(sample_name, sample_name)
86
+ group["sample_name"] = sample_name
87
+ click.echo(f'sample_name: {sample_name}', err=True)
80
88
  click.echo(f' module_name: {module_name}', err=True)
81
89
  for field_name, filename in group['fields'].items():
82
90
  path = filepaths[filename]
@@ -173,10 +181,11 @@ def flatten_list_of_bams(filepaths):
173
181
  @private_option
174
182
  @link_option
175
183
  @no_new_versions_option
184
+ @click.option('--name-map', default=None, nargs=3, help="A file to use for converting names. Takes three arguments: a file name, a column name for current names, and a column name for new names.")
176
185
  @module_option(FASTQ_MODULE_NAMES)
177
186
  @project_id_arg
178
187
  @click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
179
- def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_type, no_new_versions, module_name, project_id, fastq_files):
188
+ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_type, no_new_versions, name_map, module_name, project_id, fastq_files):
180
189
  """Upload fastq read files to GeoSeeq.
181
190
 
182
191
  This command automatically groups files by their sample name, lane number
@@ -1,5 +1,6 @@
1
1
  from os import environ
2
2
  from os.path import join
3
+ from typing import Literal
3
4
 
4
5
  FIVE_MB = 5 * (1024 ** 2)
5
6
  FASTQ_MODULE_NAMES = [
@@ -13,4 +14,6 @@ DEFAULT_ENDPOINT = "https://backend.geoseeq.com"
13
14
 
14
15
  CONFIG_FOLDER = environ.get("XDG_CONFIG_HOME", join(environ["HOME"], ".config"))
15
16
  CONFIG_DIR = environ.get("GEOSEEQ_CONFIG_DIR", join(CONFIG_FOLDER, "geoseeq"))
16
- PROFILES_PATH = join(CONFIG_DIR, "profiles.json")
17
+ PROFILES_PATH = join(CONFIG_DIR, "profiles.json")
18
+
19
+ OBJECT_TYPE_STR = Literal['org', 'project', 'sample', 'sample_result_folder', 'project_result_folder', 'sample_result_file', 'project_result_file']
@@ -22,51 +22,51 @@ from .from_names import (
22
22
  project_result_file_from_name,
23
23
  result_file_from_name,
24
24
  )
25
- from .utils import is_grn_or_uuid, is_name
25
+ from .utils import is_grn_or_uuid, is_name, is_abs_name
26
26
  from geoseeq.knex import with_knex
27
-
27
+ from geoseeq.constants import OBJECT_TYPE_STR
28
28
  logger = logging.getLogger("geoseeq_api") # Same name as calling module
29
29
 
30
30
 
31
- def _generic_from_id(knex, id, from_uuid_func, from_name_func):
31
+ def _generic_from_id(knex, id, from_uuid_func, from_name_func, object_type_str: OBJECT_TYPE_STR):
32
32
  """Return the object which the id points to."""
33
33
  logger.debug(f'Getting object from id: {id}, knex: {knex}, from_uuid_func: {from_uuid_func}, from_name_func: {from_name_func}')
34
34
  if is_grn_or_uuid(id):
35
35
  id = id.split(':')[-1] # if this is a GRN, get the UUID. Won't hurt if it's already a UUID.
36
36
  return from_uuid_func(knex, id)
37
- if is_name(id):
37
+ if is_abs_name(id, object_type_str):
38
38
  return from_name_func(knex, id)
39
- raise ValueError(f'"{id}" is not a GRN, UUID, or name')
39
+ raise ValueError(f'"{id}" is not a GRN, UUID, or absolute name for {object_type_str}')
40
40
 
41
41
 
42
42
  @with_knex
43
43
  def org_from_id(knex, id):
44
44
  """Return the organization object which the id points to."""
45
- return _generic_from_id(knex, id, org_from_uuid, org_from_name)
45
+ return _generic_from_id(knex, id, org_from_uuid, org_from_name, 'org')
46
46
 
47
47
 
48
48
  @with_knex
49
49
  def project_from_id(knex, id):
50
50
  """Return the project object which the id points to."""
51
- return _generic_from_id(knex, id, project_from_uuid, project_from_name)
51
+ return _generic_from_id(knex, id, project_from_uuid, project_from_name, 'project')
52
52
 
53
53
 
54
54
  @with_knex
55
55
  def sample_from_id(knex, id):
56
56
  """Return the sample object which the id points to."""
57
- return _generic_from_id(knex, id, sample_from_uuid, sample_from_name)
57
+ return _generic_from_id(knex, id, sample_from_uuid, sample_from_name, 'sample')
58
58
 
59
59
 
60
60
  @with_knex
61
61
  def sample_result_folder_from_id(knex, id):
62
62
  """Return the sample result folder object which the id points to."""
63
- return _generic_from_id(knex, id, sample_result_folder_from_uuid, sample_result_folder_from_name)
63
+ return _generic_from_id(knex, id, sample_result_folder_from_uuid, sample_result_folder_from_name, 'sample_result_folder')
64
64
 
65
65
 
66
66
  @with_knex
67
67
  def project_result_folder_from_id(knex, id):
68
68
  """Return the project result folder object which the id points to."""
69
- return _generic_from_id(knex, id, project_result_folder_from_uuid, project_result_folder_from_name)
69
+ return _generic_from_id(knex, id, project_result_folder_from_uuid, project_result_folder_from_name, 'project_result_folder')
70
70
 
71
71
 
72
72
  @with_knex
@@ -75,19 +75,22 @@ def result_folder_from_id(knex, id):
75
75
 
76
76
  Guess the result folder is a sample result folder. If not, try a project result folder.
77
77
  """
78
- return _generic_from_id(knex, id, result_folder_from_uuid, result_folder_from_name)
78
+ try:
79
+ return _generic_from_id(knex, id, result_folder_from_uuid, result_folder_from_name, 'sample_result_folder')
80
+ except ValueError:
81
+ return _generic_from_id(knex, id, result_folder_from_uuid, result_folder_from_name, 'project_result_folder')
79
82
 
80
83
 
81
84
  @with_knex
82
85
  def sample_result_file_from_id(knex, id):
83
86
  """Return the sample result file object which the id points to."""
84
- return _generic_from_id(knex, id, sample_result_file_from_uuid, sample_result_file_from_name)
87
+ return _generic_from_id(knex, id, sample_result_file_from_uuid, sample_result_file_from_name, 'sample_result_file')
85
88
 
86
89
 
87
90
  @with_knex
88
91
  def project_result_file_from_id(knex, id):
89
92
  """Return the project result file object which the id points to."""
90
- return _generic_from_id(knex, id, project_result_file_from_uuid, project_result_file_from_name)
93
+ return _generic_from_id(knex, id, project_result_file_from_uuid, project_result_file_from_name, 'project_result_file')
91
94
 
92
95
 
93
96
  @with_knex
@@ -96,7 +99,10 @@ def result_file_from_id(knex, id):
96
99
 
97
100
  Guess the result file is a sample result file. If not, try a project result file.
98
101
  """
99
- return _generic_from_id(knex, id, result_file_from_uuid, result_file_from_name)
102
+ try:
103
+ return _generic_from_id(knex, id, result_file_from_uuid, result_file_from_name, 'sample_result_file')
104
+ except ValueError:
105
+ return _generic_from_id(knex, id, result_file_from_uuid, result_file_from_name, 'project_result_file')
100
106
 
101
107
 
102
108