geoseeq 0.7.3.dev4__tar.gz → 0.7.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/PKG-INFO +8 -2
  2. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/README.md +7 -1
  3. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/uploading_data_examples.md +11 -4
  4. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/constants.py +2 -2
  5. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/download.py +95 -15
  6. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/main.py +1 -1
  7. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/upload/upload_reads.py +34 -9
  8. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/constants.py +10 -1
  9. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/result_file.py +12 -0
  10. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/sample.py +3 -1
  11. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/pyproject.toml +1 -1
  12. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/setup.py +2 -1
  13. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/.devcontainer/devcontainer.json +0 -0
  14. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/.github/workflows/python-publish.yml +0 -0
  15. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/.github/workflows/run_unit_tests.yml +0 -0
  16. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/.gitignore +0 -0
  17. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/.pre-commit-config.yaml +0 -0
  18. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/AGENTS.md +0 -0
  19. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/LICENSE +0 -0
  20. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/Makefile +0 -0
  21. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/commit_pylintrc +0 -0
  22. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/about_geoseeq.md +0 -0
  23. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/contributing.md +0 -0
  24. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/downloading_data_examples.md +0 -0
  25. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/examples/simple_python_example/README.md +0 -0
  26. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/examples/simple_python_example/project_dashboard_example.py +0 -0
  27. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/examples/simple_python_example/sample_dashboard_example.py +0 -0
  28. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/examples/simple_python_example/simple_python_example.py +0 -0
  29. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/examples/simple_python_example/smart_table_example.py +0 -0
  30. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/examples/simple_snakemake_example/README.md +0 -0
  31. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/examples/simple_snakemake_example/Snakefile +0 -0
  32. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/docs/examples/simple_snakemake_example/config.yaml +0 -0
  33. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/__init__.py +0 -0
  34. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/app.py +0 -0
  35. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/blob_constructors.py +0 -0
  36. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/bulk_creators.py +0 -0
  37. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/__init__.py +0 -0
  38. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/copy.py +0 -0
  39. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/detail.py +0 -0
  40. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/fastq_utils.py +0 -0
  41. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/find_grn.py +0 -0
  42. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/get_eula.py +0 -0
  43. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/manage.py +0 -0
  44. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/progress_bar.py +0 -0
  45. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/project.py +0 -0
  46. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/raw.py +0 -0
  47. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/run.py +0 -0
  48. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/search.py +0 -0
  49. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/shared_params/__init__.py +0 -0
  50. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/shared_params/common_state.py +0 -0
  51. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/shared_params/config.py +0 -0
  52. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/shared_params/id_handlers.py +0 -0
  53. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/shared_params/obj_getters.py +0 -0
  54. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/shared_params/opts_and_args.py +0 -0
  55. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/upload/__init__.py +0 -0
  56. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/upload/upload.py +0 -0
  57. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/upload/upload_advanced.py +0 -0
  58. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/user.py +0 -0
  59. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/utils.py +0 -0
  60. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/cli/view.py +0 -0
  61. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/contrib/__init__.py +0 -0
  62. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/contrib/ncbi/README.md +0 -0
  63. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/contrib/ncbi/__init__.py +0 -0
  64. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/contrib/ncbi/api.py +0 -0
  65. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/contrib/ncbi/bioproject.py +0 -0
  66. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/contrib/ncbi/cli.py +0 -0
  67. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/contrib/ncbi/setup_logging.py +0 -0
  68. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/dashboard/dashboard.py +0 -0
  69. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/file_system/filesystem_download.py +0 -0
  70. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/file_system/main.py +0 -0
  71. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/file_system_cache.py +0 -0
  72. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/id_constructors/__init__.py +0 -0
  73. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/id_constructors/from_blobs.py +0 -0
  74. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/id_constructors/from_ids.py +0 -0
  75. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/id_constructors/from_names.py +0 -0
  76. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/id_constructors/from_uuids.py +0 -0
  77. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/id_constructors/resolvers.py +0 -0
  78. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/id_constructors/utils.py +0 -0
  79. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/knex.py +0 -0
  80. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/organization.py +0 -0
  81. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/pipeline.py +0 -0
  82. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/plotting/README.md +0 -0
  83. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/plotting/__init__.py +0 -0
  84. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/plotting/constants.py +0 -0
  85. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/plotting/highcharts.py +0 -0
  86. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/plotting/map/__init__.py +0 -0
  87. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/plotting/map/base_layer.py +0 -0
  88. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/plotting/map/map.py +0 -0
  89. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/plotting/map/overlay.py +0 -0
  90. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/plotting/selectable.py +0 -0
  91. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/project.py +0 -0
  92. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/remote_object.py +0 -0
  93. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/__init__.py +0 -0
  94. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/bioinfo.py +0 -0
  95. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/file_chunker.py +0 -0
  96. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/file_download.py +0 -0
  97. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/file_upload.py +0 -0
  98. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/result_folder.py +0 -0
  99. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/resumable_download_tracker.py +0 -0
  100. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/resumable_upload_tracker.py +0 -0
  101. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/smart_objects.py +0 -0
  102. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/result/utils.py +0 -0
  103. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/search.py +0 -0
  104. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/smart_table.py +0 -0
  105. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/smart_tree.py +0 -0
  106. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/upload_download_manager.py +0 -0
  107. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/user.py +0 -0
  108. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/utils.py +0 -0
  109. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/README.md +0 -0
  110. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/__init__.py +0 -0
  111. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/checksum.py +0 -0
  112. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/cli.py +0 -0
  113. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/clone.py +0 -0
  114. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/constants.py +0 -0
  115. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/vc_cache.py +0 -0
  116. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/vc_dir.py +0 -0
  117. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/vc_sample.py +0 -0
  118. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/vc/vc_stub.py +0 -0
  119. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/geoseeq/work_orders.py +0 -0
  120. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/__init__.py +0 -0
  121. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/conftest.py +0 -0
  122. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_api_client.py +0 -0
  123. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_download.py +0 -0
  124. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_download_cli.py +0 -0
  125. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_file_chunker.py +0 -0
  126. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_file_system_cache.py +0 -0
  127. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_files/files_path.txt +0 -0
  128. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_files/sampleclit.R1.fastq.gz +0 -0
  129. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_files/sampleclit.R2.fastq.gz +0 -0
  130. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_files/single-end.fastq.gz +0 -0
  131. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_files/test_metadata.csv +0 -0
  132. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_files/test_small.R1.fastq.gz +0 -0
  133. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_files/test_small.R2.fastq.gz +0 -0
  134. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_plotting.py +0 -0
  135. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_remote_object.py +0 -0
  136. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_upload.py +0 -0
  137. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_upload_cli.py +0 -0
  138. {geoseeq-0.7.3.dev4 → geoseeq-0.7.5}/tests/test_utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: geoseeq
3
- Version: 0.7.3.dev4
3
+ Version: 0.7.5
4
4
  Summary: GeoSeeq command line tools and python API
5
5
  Project-URL: Homepage, https://github.com/biotia/geoseeq_api_client
6
6
  Project-URL: Issues, https://github.com/biotia/geoseeq_api_client/issues
@@ -84,7 +84,7 @@ $ geoseeq download files --extension fastq.gz "GeoSeeq/Example CLI Project"
84
84
 
85
85
  GeoSeeq can automatically group fastq files into samples according to their
86
86
  sample name, read number, and lane number. It supports paired end, single end,
87
- and nanopore reads.
87
+ nanopore, and pacbio reads.
88
88
 
89
89
  Assume you have data from a single ended sequencing run stored as fastq files:
90
90
  - Sample1_L1_R1.fastq.gz
@@ -115,6 +115,12 @@ GeoSeeq will automatically create a new sample named `Sample1` if it does not al
115
115
 
116
116
  This command would upload data [to this project.](https://portal.geoseeq.com/sample-groups/ed59b913-91ec-489b-a1b9-4ea137a6e5cf/samples). Since only organization members can upload data, you will need to replace `GeoSeeq` with your organization name.
117
117
 
118
+ To rename samples on the fly, provide a CSV file with current and new names using the `--name-map` option:
119
+
120
+ ```
121
+ $ geoseeq upload reads --name-map sample_map.csv current_name new_name "GeoSeeq/Example CLI Project" fastq_files.txt
122
+ ```
123
+
118
124
  Note: You will need to have an API token set to use this command (see above)
119
125
 
120
126
  ## Using the Python API in a program
@@ -60,7 +60,7 @@ $ geoseeq download files --extension fastq.gz "GeoSeeq/Example CLI Project"
60
60
 
61
61
  GeoSeeq can automatically group fastq files into samples according to their
62
62
  sample name, read number, and lane number. It supports paired end, single end,
63
- and nanopore reads.
63
+ nanopore, and pacbio reads.
64
64
 
65
65
  Assume you have data from a single ended sequencing run stored as fastq files:
66
66
  - Sample1_L1_R1.fastq.gz
@@ -91,6 +91,12 @@ GeoSeeq will automatically create a new sample named `Sample1` if it does not al
91
91
 
92
92
  This command would upload data [to this project.](https://portal.geoseeq.com/sample-groups/ed59b913-91ec-489b-a1b9-4ea137a6e5cf/samples). Since only organization members can upload data, you will need to replace `GeoSeeq` with your organization name.
93
93
 
94
+ To rename samples on the fly, provide a CSV file with current and new names using the `--name-map` option:
95
+
96
+ ```
97
+ $ geoseeq upload reads --name-map sample_map.csv current_name new_name "GeoSeeq/Example CLI Project" fastq_files.txt
98
+ ```
99
+
94
100
  Note: You will need to have an API token set to use this command (see above)
95
101
 
96
102
  ## Using the Python API in a program
@@ -16,11 +16,11 @@ $ export GEOSEEQ_API_TOKEN=<your token from the geoseeq app>
16
16
 
17
17
  ## Uploading sequencing data
18
18
 
19
- GeoSeeq can automatically group fastq files into samples according to their
19
+ GeoSeeq can automatically group fastq files into samples according to their
20
20
  sample name, read number, and lane number. It supports paired end, single end,
21
- and nanopore reads.
21
+ nanopore, and pacbio reads.
22
22
 
23
- Assume you have data from a single ended sequencing run stored as fastq files:
23
+ Assume you have data from a single ended sequencing run stored as fastq files:
24
24
  - Sample1_L1_R1.fastq.gz
25
25
  - Sample1_L1_R2.fastq.gz
26
26
  - Sample1_L2_R1.fastq.gz
@@ -47,13 +47,20 @@ Uploading Sample: Sample1
47
47
 
48
48
  GeoSeeq will automatically create a new sample named `Sample1` if it does not already exist.
49
49
 
50
+ To rename samples during upload, provide a CSV with current and new names
51
+ using `--name-map`:
52
+
53
+ ```
54
+ $ geoseeq upload reads --name-map sample_map.csv current_name new_name "Example GeoSeeq Org/Example CLI Project" fastq_files.txt
55
+ ```
56
+
50
57
  Note: You will need to have an API token set to use this command (see above)
51
58
 
52
59
  ### Linking reads from S3, Wasabi, FTP, Azure, and other cloud storage services
53
60
 
54
61
  GeoSeeq allows you to link files stored on other cloud storage services without moving the files.
55
62
 
56
- Assume you have data from a single ended sequencing run stored as fastq files on an s3 bucket:
63
+ Assume you have data from a single ended sequencing run stored as fastq files on an s3 bucket:
57
64
  - `https://s3.wasabisys.com/mybucketname/Sample1_L1_R1.fastq.gz`
58
65
  - `https://s3.wasabisys.com/mybucketname/Sample1_L1_R2.fastq.gz`
59
66
  - `https://s3.wasabisys.com/mybucketname/Sample1_L2_R1.fastq.gz`
@@ -1,11 +1,11 @@
1
1
  SINGLE_END="short_read::single_end"
2
2
  PAIRED_END="short_read::paired_end"
3
3
  NANOPORE="long_read::nanopore"
4
+ PACBIO="long_read::pacbio"
4
5
 
5
6
  READ_MODULE_NAMES = [
6
7
  SINGLE_END,
7
8
  PAIRED_END,
8
9
  NANOPORE,
10
+ PACBIO,
9
11
  ]
10
-
11
-
@@ -3,6 +3,7 @@ import logging
3
3
  from os import makedirs
4
4
  from os.path import dirname, join
5
5
 
6
+ import gzip
6
7
  import click
7
8
  import pandas as pd
8
9
  from multiprocessing import Pool
@@ -32,6 +33,7 @@ from .utils import convert_size
32
33
  from geoseeq.constants import FASTQ_MODULE_NAMES
33
34
  from geoseeq.result import ResultFile
34
35
  from geoseeq.upload_download_manager import GeoSeeqDownloadManager
36
+ import os
35
37
 
36
38
  logger = logging.getLogger('geoseeq_api')
37
39
 
@@ -378,7 +380,27 @@ def cli_download_ids(state, cores, target_dir, file_name, yes, download, head, i
378
380
  download_manager.download_files()
379
381
 
380
382
 
381
- def _get_sample_result_files_with_names(sample, module_name=None, first=False):
383
+ def _get_local_filename_for_fastq(sample, result_file, read_type, read_num, lane_num, file_name_mode):
384
+ """Return a local filename for a fastq file based on the specified naming mode."""
385
+ if file_name_mode == "original":
386
+ return result_file.get_stored_data_filename()
387
+ elif file_name_mode == "geoseeq":
388
+ sname = sample.name.replace(".", "-").replace(" ", "_").lower()
389
+ rtype = read_type.replace("::", "__").replace(".", "-").replace(" ", "_").lower()
390
+ filename = f"{sname}.{rtype}.R{read_num}.L{lane_num}.fastq.gz"
391
+ return filename
392
+ elif file_name_mode == "sample-uuid":
393
+ filename = f"{sample.uuid}.R{read_num}.L{lane_num}.fastq.gz"
394
+ return filename
395
+ elif file_name_mode == "file-uuid":
396
+ filename = f"{result_file.uuid}.fastq.gz"
397
+ return filename
398
+ else:
399
+ raise ValueError(f"Unknown file name mode: {file_name_mode}")
400
+
401
+
402
+ def _get_sample_result_files_with_names(sample, module_name=None, which_fastqs_mode='all', file_name_mode='original'):
403
+ """Return list of (result_file, filename, key) tuples for all fastq files in a sample."""
382
404
  result_files_with_names = []
383
405
  for read_type, folder in sample.get_all_fastqs().items():
384
406
  if module_name and module_name != read_type:
@@ -388,19 +410,18 @@ def _get_sample_result_files_with_names(sample, module_name=None, first=False):
388
410
  lane_num = lane_num + 1 # 1 indexed
389
411
  if read_type in ["short_read::paired_end"]:
390
412
  key = (sample, read_type, 1, lane_num) # sample name, read type, read number, lane number
391
- result_files_with_names.append(
392
- (result_file[0], result_file[0].get_referenced_filename(), key)
393
- )
413
+ fname = _get_local_filename_for_fastq(sample, result_file[0], read_type, 1, lane_num, file_name_mode)
414
+ result_files_with_names.append((result_file[0], fname, key))
415
+ if which_fastqs_mode == "first-r1":
416
+ break
394
417
  key = (sample, read_type, 2, lane_num)
395
- result_files_with_names.append(
396
- (result_file[1], result_file[1].get_referenced_filename(), key)
397
- )
418
+ fname = _get_local_filename_for_fastq(sample, result_file[1], read_type, 2, lane_num, file_name_mode)
419
+ result_files_with_names.append((result_file[1], fname, key))
398
420
  else:
399
421
  key = (sample, read_type, 1, lane_num)
400
- result_files_with_names.append(
401
- (result_file, result_file.get_referenced_filename(), key)
402
- )
403
- if first:
422
+ fname = _get_local_filename_for_fastq(sample, result_file, read_type, 1, lane_num, file_name_mode)
423
+ result_files_with_names.append((result_file, fname, key))
424
+ if which_fastqs_mode in ["first-all", "first-r1"]:
404
425
  break
405
426
 
406
427
  return result_files_with_names
@@ -442,14 +463,52 @@ def _make_read_configs(download_results, config_dir="."):
442
463
  with open(config_path, "w") as f:
443
464
  json.dump(config_blob, f, indent=4)
444
465
 
466
+ def _open_maybe_gzip(local_path):
467
+ """Open a file that may be gzipped. Do not rely on file extension."""
468
+ with open(local_path, "rb") as f:
469
+ magic_number = f.read(2)
470
+ if magic_number == b'\x1f\x8b':
471
+ return gzip.open(local_path, "rt")
472
+ else:
473
+ return open(local_path, "r")
474
+
475
+
476
+ def _trim_fastq_to_complete_reads(key, local_path):
477
+ """Trim a fastq file to the nearest complete read boundary under head_bytes.
478
+
479
+ Write the output as a gzipped file regardless of input compression.
480
+ """
481
+ temp_path = local_path + ".tmp"
482
+ with _open_maybe_gzip(local_path) as infile, gzip.open(temp_path, "wt") as outfile:
483
+ lines_written = 0
484
+ while True:
485
+ read_lines = []
486
+ for _ in range(4):
487
+ line = infile.readline()
488
+ if not line:
489
+ break
490
+ read_lines.append(line)
491
+ if len(read_lines) < 4:
492
+ break # end of file
493
+ if infile.tell() > key[4]: # key[4] is head_bytes
494
+ break # reached head limit
495
+ for line in read_lines:
496
+ outfile.write(line)
497
+ lines_written += 4
498
+ # Replace original file with trimmed file
499
+
500
+ os.replace(temp_path, local_path)
501
+
445
502
 
446
503
  @cli_download.command("fastqs")
447
504
  @use_common_state
448
505
  @cores_option
449
506
  @click.option("--target-dir", default=".")
450
507
  @yes_option
451
- @click.option("--first/--all", default=False, help="Download only the first folder of fastq files for each sample.")
508
+ @click.option('--file-name-mode', type=click.Choice(['original', 'geoseeq', 'sample-uuid', 'file-uuid']), help="Choose how the downloaded fastq files are named.", default='original')
509
+ @click.option("--which-fastqs-mode", type=click.Choice(["first-all", "first-r1", "all"]), default="all", help="Choose which fastq files to download per sample. ")
452
510
  @click.option("--download/--urls-only", default=True, help="Download files or just print urls")
511
+ @head_option
453
512
  @click.option("--config-dir", default=None, help="Directory to write read config files. If unset do not write config files.")
454
513
  @module_option(FASTQ_MODULE_NAMES, use_default=False)
455
514
  @ignore_errors_option
@@ -460,8 +519,10 @@ def cli_download_fastqs(state,
460
519
  cores,
461
520
  target_dir,
462
521
  yes,
463
- first,
522
+ file_name_mode,
523
+ which_fastqs_mode,
464
524
  download,
525
+ head,
465
526
  config_dir,
466
527
  module_name,
467
528
  ignore_errors,
@@ -474,6 +535,20 @@ def cli_download_fastqs(state,
474
535
  This command will download fastq files from a GeoSeeq project. You can filter
475
536
  files by sample name and by specific fastq read types.
476
537
 
538
+ The filenames of the downloaded fastq files can be controlled using the --file-name-mode option:
539
+ - original: Use the original filename as uploaded to GeoSeeq (default)
540
+ - geoseeq: Use a normalized GeoSeeq generated filename that includes the sample name, read type, read number, and lane number.
541
+ - sample-uuid: Use the GeoSeeq UUID of the sample along with lane number and read number.
542
+ - file-uuid: Use the GeoSeeq UUID of the result file only.
543
+
544
+ If the --head option is used to only download the first N bytes of each fastq file, this command
545
+ will automatically clip the fastq files at the nearest complete read boundary to avoid incomplete reads.
546
+
547
+ The --which-fastqs-mode option controls which fastq files are downloaded per sample:
548
+ - first-all: Download all fastq files but from the first fastq folder only.
549
+ - first-r1: Download only the first read (R1) fastq file from the first fastq folder.
550
+ - all: Download all fastq files from all folders.
551
+
477
552
  ---
478
553
 
479
554
  Example Usage:
@@ -523,7 +598,7 @@ def cli_download_fastqs(state,
523
598
  result_files_with_names = []
524
599
  for sample in samples:
525
600
  try:
526
- result_files_with_names += _get_sample_result_files_with_names(sample, module_name, first)
601
+ result_files_with_names += _get_sample_result_files_with_names(sample, module_name, which_fastqs_mode, file_name_mode)
527
602
  except Exception as e:
528
603
  logger.error(f"Error fetching fastq files for sample {sample.name}: {e}")
529
604
  if not ignore_errors:
@@ -538,9 +613,14 @@ def cli_download_fastqs(state,
538
613
  ignore_errors=ignore_errors,
539
614
  log_level=state.log_level,
540
615
  progress_tracker_factory=PBarManager().get_new_bar,
616
+ head=head,
541
617
  )
542
618
  for result_file, filename, key in result_files_with_names:
543
- download_manager.add_download(result_file, join(target_dir, filename), key=key)
619
+ callback = None
620
+ if head:
621
+ callback = _trim_fastq_to_complete_reads
622
+ key = key + (head,) # append head bytes to key
623
+ download_manager.add_download(result_file, join(target_dir, filename), key=key, callback=callback)
544
624
  if not download:
545
625
  print(download_manager.get_url_string(), file=state.outfile)
546
626
  else:
@@ -55,7 +55,7 @@ def version():
55
55
  Use of this tool implies acceptance of the GeoSeeq End User License Agreement.
56
56
  Run `geoseeq eula show` to view the EULA.
57
57
  """
58
- click.echo("0.7.3dev0") # remember to update pyproject.toml
58
+ click.echo("0.7.5") # remember to update pyproject.toml
59
59
 
60
60
 
61
61
  @main.group("advanced")
@@ -1,9 +1,10 @@
1
+ # pylint: disable=line-too-long
1
2
  import logging
2
3
  import click
3
4
  import requests
4
5
  from os.path import basename
5
6
  import pandas as pd
6
- from multiprocessing import Pool, current_process
7
+ from multiprocessing import current_process
7
8
 
8
9
  from geoseeq.cli.constants import *
9
10
  from geoseeq.cli.shared_params import (
@@ -46,10 +47,10 @@ def _upload_one_file(args):
46
47
 
47
48
  def _get_regex(knex, filepaths, module_name, lib, regex):
48
49
  """Return a regex that will group the files into samples
49
-
50
+
50
51
  Tell the user how many files did could not be matched using the regex.
51
52
  """
52
- seq_length, seq_type = module_name.split('::')[:2]
53
+ _, seq_type = module_name.split('::')[:2]
53
54
  args = {
54
55
  'filenames': list(filepaths.keys()),
55
56
  'sequence_type': seq_type,
@@ -181,7 +182,13 @@ def flatten_list_of_bams(filepaths):
181
182
  @private_option
182
183
  @link_option
183
184
  @no_new_versions_option
184
- @click.option('--name-map', default=None, nargs=3, help="A file to use for converting names. Takes three arguments: a file name, a column name for current names, and a column name for new names.")
185
+ @click.option(
186
+ '--name-map',
187
+ default=None,
188
+ nargs=3,
189
+ required=False,
190
+ help='Optional CSV and column names used to map existing names to new ones. Provide: <file> <current_name_col> <new_name_col>.'
191
+ )
185
192
  @module_option(FASTQ_MODULE_NAMES)
186
193
  @project_id_arg
187
194
  @click.argument('fastq_files', type=click.Path(exists=True), nargs=-1)
@@ -214,15 +221,23 @@ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_t
214
221
  $ ls -1 path/to/fastq/files/*.fastq.gz > file_list.txt
215
222
  $ geoseeq upload reads --yes --overwrite "GeoSeeq/Example CLI Project" file_list.txt
216
223
 
224
+ \b
225
+ # Remap sample names using a CSV file with current and new names
226
+ $ geoseeq upload reads --name-map sample_map.csv current_name new_name "GeoSeeq/Example CLI Project" fastq_files.txt
227
+
217
228
  ---
218
229
 
230
+ The optional ``--name-map`` flag takes three values: a CSV filename,
231
+ the column containing current names and the column containing new names.
232
+ When provided, sample names will be translated during upload.
233
+
219
234
  Command Arguments:
220
-
235
+
221
236
  [PROJECT_ID] Can be a project UUID, GeoSeeq Resource Number (GRN), or an
222
237
  organization name and project name separated by a slash.
223
238
 
224
239
  \b
225
- Examples:
240
+ Examples:
226
241
  - Name pair: "GeoSeeq/Example CLI Project"
227
242
  - UUID: "ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
228
243
  - GRN: "grn:gs1:project:ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
@@ -284,12 +299,12 @@ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_t
284
299
  ---
285
300
 
286
301
  Command Arguments:
287
-
302
+
288
303
  [PROJECT_ID] Can be a project UUID, GeoSeeq Resource Number (GRN), or an
289
304
  organization name and project name separated by a slash.
290
305
 
291
306
  \b
292
- Examples:
307
+ Examples:
293
308
  - Name pair: "GeoSeeq/Example CLI Project"
294
309
  - UUID: "ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
295
310
  - GRN: "grn:gs1:project:ed59b913-91ec-489b-a1b9-4ea137a6e5cf"
@@ -303,4 +318,14 @@ def cli_upload_reads_wizard(state, cores, overwrite, yes, regex, private, link_t
303
318
  # filepaths = {basename(line): line for line in flatten_list_of_bams(files)}
304
319
  # click.echo(f'Found {len(filepaths)} files to upload.', err=True)
305
320
  # groups = _group_files(knex, filepaths, 'bam::bam', regex, yes)
306
- # _do_upload(groups, 'bam::bam', link_type, proj, filepaths, overwrite, no_new_versions, cores, state)
321
+ # _do_upload(
322
+ # groups,
323
+ # 'bam::bam',
324
+ # link_type,
325
+ # proj,
326
+ # filepaths,
327
+ # overwrite,
328
+ # no_new_versions,
329
+ # cores,
330
+ # state,
331
+ # )
@@ -7,6 +7,7 @@ FASTQ_MODULE_NAMES = [
7
7
  'short_read::paired_end',
8
8
  'short_read::single_end',
9
9
  'long_read::nanopore',
10
+ 'long_read::pacbio',
10
11
  'raw::raw_reads',
11
12
  'genome::fasta',
12
13
  ]
@@ -16,4 +17,12 @@ CONFIG_FOLDER = environ.get("XDG_CONFIG_HOME", join(environ["HOME"], ".config"))
16
17
  CONFIG_DIR = environ.get("GEOSEEQ_CONFIG_DIR", join(CONFIG_FOLDER, "geoseeq"))
17
18
  PROFILES_PATH = join(CONFIG_DIR, "profiles.json")
18
19
 
19
- OBJECT_TYPE_STR = Literal['org', 'project', 'sample', 'sample_result_folder', 'project_result_folder', 'sample_result_file', 'project_result_file']
20
+ OBJECT_TYPE_STR = Literal[
21
+ 'org',
22
+ 'project',
23
+ 'sample',
24
+ 'sample_result_folder',
25
+ 'project_result_folder',
26
+ 'sample_result_file',
27
+ 'project_result_file',
28
+ ]
@@ -103,6 +103,18 @@ class ResultFile(RemoteObject, ResultFileUpload, ResultFileDownload, ResultFileS
103
103
  # except TypeError:
104
104
  # return basename(self.get_blob_filename())
105
105
 
106
+ def get_stored_data_filename(self):
107
+ """Return the filename that is stored in the stored_data field.
108
+
109
+ This is typically the filename that was originally uploaded to create this result file.
110
+ """
111
+ try:
112
+ key = [k for k in ["filename", "uri", "url"] if k in self.stored_data][0]
113
+ except IndexError:
114
+ raise TypeError("Cannot make a reference filename for a BLOB type result field.")
115
+ filepath = self.stored_data[key]
116
+ return basename(filepath)
117
+
106
118
  def _save(self):
107
119
  data = {field: getattr(self, field) for field in self.remote_fields if hasattr(self, field)}
108
120
  data["analysis_result"] = self.parent.uuid
@@ -1,6 +1,6 @@
1
1
  import urllib
2
2
  from .remote_object import RemoteObject
3
- from .result import SampleResultFile, SampleResultFolder
3
+ from .result import SampleResultFolder
4
4
 
5
5
 
6
6
  class Sample(RemoteObject):
@@ -166,12 +166,14 @@ class Sample(RemoteObject):
166
166
  "short_read::paired_end"
167
167
  "short_read::single_end"
168
168
  "long_read::nanopore"
169
+ "long_read::pacbio"
169
170
  """
170
171
  if preference_order is None:
171
172
  preference_order = [
172
173
  "short_read::paired_end",
173
174
  "short_read::single_end",
174
175
  "long_read::nanopore",
176
+ "long_read::pacbio",
175
177
  ]
176
178
  all_fastqs = self.get_all_fastqs()
177
179
  for read_type in preference_order:
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "geoseeq"
7
- version = "0.7.3dev4"
7
+ version = "0.7.5"
8
8
  authors = [
9
9
  { name="David C. Danko", email="dcdanko@biotia.io" },
10
10
  ]
@@ -5,7 +5,8 @@ import setuptools
5
5
 
6
6
  setuptools.setup(
7
7
  name='geoseeq',
8
- version='0.5.6a7', # DEPRECATED see pyproject.toml remember to update version string in CLI as well
8
+ version='0.7.4', # DEPRECATED see pyproject.toml
9
+ # remember to update version string in CLI as well
9
10
  author="David C. Danko",
10
11
  author_email='dcdanko@biotia.io',
11
12
  description=open('README.md').read(),
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes