mutts 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mutts/cli.py +25 -17
- mutts/spreadsheet.py +16 -2
- mutts/static-excel-tabs/JGI.Metagenome.NA.v15.xlsx +0 -0
- mutts-1.0.3.dist-info/METADATA +219 -0
- mutts-1.0.3.dist-info/RECORD +9 -0
- {mutts-1.0.0.dist-info → mutts-1.0.3.dist-info}/WHEEL +1 -1
- mutts-1.0.0.dist-info/METADATA +0 -18
- mutts-1.0.0.dist-info/RECORD +0 -8
- {mutts-1.0.0.dist-info → mutts-1.0.3.dist-info}/entry_points.txt +0 -0
mutts/cli.py
CHANGED
|
@@ -43,7 +43,11 @@ def format_worksheet(worksheet):
|
|
|
43
43
|
@click.command()
|
|
44
44
|
@click.option("--submission", "-s", required=True, help="Metadata submission id.")
|
|
45
45
|
@click.option(
|
|
46
|
-
"--user-facility",
|
|
46
|
+
"--user-facility",
|
|
47
|
+
"-u",
|
|
48
|
+
required=True,
|
|
49
|
+
type=click.Choice(list(MetadataRetriever.USER_FACILITY_DICT.keys()), case_sensitive=False),
|
|
50
|
+
help="User facility to send data to."
|
|
47
51
|
)
|
|
48
52
|
@click.option("--header/--no-header", "-h", default=False, show_default=True)
|
|
49
53
|
@click.option(
|
|
@@ -103,22 +107,26 @@ def cli(
|
|
|
103
107
|
# Write the generated data to 'DATA SHEET'
|
|
104
108
|
user_facility_spreadsheet.to_excel(writer, index=False, sheet_name='DATA SHEET')
|
|
105
109
|
|
|
106
|
-
#
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
110
|
+
# Check if mapper is one of the v15 JGI templates
|
|
111
|
+
mapper_basename = os.path.basename(mapper)
|
|
112
|
+
jgi_v15_mappers = ['jgi_mg_header_v15.json', 'jgi_mt_header_v15.json']
|
|
113
|
+
|
|
114
|
+
if mapper_basename in jgi_v15_mappers:
|
|
115
|
+
# Path to static JGI v15 Excel template
|
|
116
|
+
static_excel_path = os.path.join(
|
|
117
|
+
os.path.dirname(__file__), 'static-excel-tabs', 'JGI.Metagenome.NA.v15.xlsx'
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Copy INSTRUCTIONS and PLATE LOCATIONS sheets from JGI v15 template
|
|
121
|
+
# static file if it exists
|
|
122
|
+
if os.path.exists(static_excel_path):
|
|
123
|
+
static_excel = pd.ExcelFile(static_excel_path)
|
|
124
|
+
if 'INSTRUCTIONS' in static_excel.sheet_names:
|
|
125
|
+
instructions_df = pd.read_excel(static_excel, 'INSTRUCTIONS')
|
|
126
|
+
instructions_df.to_excel(writer, index=False, sheet_name='INSTRUCTIONS')
|
|
127
|
+
if 'PLATE LOCATIONS' in static_excel.sheet_names:
|
|
128
|
+
plate_locations_df = pd.read_excel(static_excel, 'PLATE LOCATIONS')
|
|
129
|
+
plate_locations_df.to_excel(writer, index=False, sheet_name='PLATE LOCATIONS')
|
|
122
130
|
|
|
123
131
|
# Apply formatting to all sheets
|
|
124
132
|
for sheet_name in writer.book.sheetnames:
|
mutts/spreadsheet.py
CHANGED
|
@@ -7,6 +7,9 @@ class SpreadsheetCreator:
|
|
|
7
7
|
Creates a spreadsheet based on a JSON mapper and metadata DataFrame.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
+
# List of JGI-specific user facilities
|
|
11
|
+
JGI_FACILITIES = ['jgi_mg', 'jgi_mt', 'jgi_mg_lr']
|
|
12
|
+
|
|
10
13
|
def __init__(
|
|
11
14
|
self,
|
|
12
15
|
user_facility: str,
|
|
@@ -16,6 +19,7 @@ class SpreadsheetCreator:
|
|
|
16
19
|
"""
|
|
17
20
|
Initialize the SpreadsheetCreator.
|
|
18
21
|
|
|
22
|
+
:param user_facility: The user facility identifier.
|
|
19
23
|
:param json_mapper: The JSON mapper specifying column mappings.
|
|
20
24
|
:param metadata_df: The metadata DataFrame to create the spreadsheet from.
|
|
21
25
|
"""
|
|
@@ -66,10 +70,20 @@ class SpreadsheetCreator:
|
|
|
66
70
|
"sub_port_mapping" in v
|
|
67
71
|
and v["sub_port_mapping"] in self.metadata_df.columns.to_list()
|
|
68
72
|
):
|
|
73
|
+
# Get the column data
|
|
74
|
+
column_data = self.metadata_df[v["sub_port_mapping"]]
|
|
75
|
+
|
|
76
|
+
# For JGI facilities, remove "_data" suffix from `sample_isolated_from` values
|
|
77
|
+
if (
|
|
78
|
+
self.user_facility in self.JGI_FACILITIES
|
|
79
|
+
and v["sub_port_mapping"] == "sample_isolated_from"
|
|
80
|
+
):
|
|
81
|
+
column_data = column_data.str.replace("_data", "", regex=False)
|
|
82
|
+
|
|
69
83
|
if "header" in v:
|
|
70
|
-
rows_df[v["header"]] =
|
|
84
|
+
rows_df[v["header"]] = column_data
|
|
71
85
|
else:
|
|
72
|
-
rows_df[k] =
|
|
86
|
+
rows_df[k] = column_data
|
|
73
87
|
|
|
74
88
|
return rows_df
|
|
75
89
|
|
|
Binary file
|
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mutts
|
|
3
|
+
Version: 1.0.3
|
|
4
|
+
Summary: Metadata for User facility Template Transformations
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: NMDC,US DOE user facilities,metadata translation
|
|
7
|
+
Author: Sujay Patil
|
|
8
|
+
Author-email: spatil@lbl.gov
|
|
9
|
+
Requires-Python: >=3.12,<4.0
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Requires-Dist: click (>=8.1.7,<9.0.0)
|
|
17
|
+
Requires-Dist: openpyxl (>=3.1.0,<4.0.0)
|
|
18
|
+
Requires-Dist: pandas (>=2.2.0,<3.0.0)
|
|
19
|
+
Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
|
|
20
|
+
Requires-Dist: requests (>=2.32.0,<3.0.0)
|
|
21
|
+
Project-URL: Homepage, https://github.com/microbiomedata/metadata-for-user-facility-template-transformations
|
|
22
|
+
Project-URL: Repository, https://github.com/microbiomedata/metadata-for-user-facility-template-transformations
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# Metadata for User facility Template Transformations (MUTTs)
|
|
26
|
+
|
|
27
|
+
## Table of Contents
|
|
28
|
+
- [Metadata for User facility Template Transformations (MUTTs)](#metadata-for-user-facility-template-transformations-mutts)
|
|
29
|
+
- [Table of Contents](#table-of-contents)
|
|
30
|
+
- [Introduction](#introduction)
|
|
31
|
+
- [MUTTs User Documentation](#mutts-user-documentation)
|
|
32
|
+
- [Prerequisites](#prerequisites)
|
|
33
|
+
- [Installation](#installation)
|
|
34
|
+
- [Usage](#usage)
|
|
35
|
+
- [Example 1: Generate a JGI Metagenome spreadsheet](#example-1-generate-a-jgi-metagenome-spreadsheet)
|
|
36
|
+
- [Example 2: Generate a JGI Metagenome v15 spreadsheet](#example-2-generate-a-jgi-metagenome-v15-spreadsheet)
|
|
37
|
+
- [Example 3: Generate an EMSL spreadsheet](#example-3-generate-an-emsl-spreadsheet)
|
|
38
|
+
- [Command Options](#command-options)
|
|
39
|
+
- [MUTTs Developer Documentation](#mutts-developer-documentation)
|
|
40
|
+
- [Software Requirements](#software-requirements)
|
|
41
|
+
- [Development Installation](#development-installation)
|
|
42
|
+
- [Creating Custom Mapper Files](#creating-custom-mapper-files)
|
|
43
|
+
|
|
44
|
+
## Introduction
|
|
45
|
+
|
|
46
|
+
The programs bundled in this repository automatically retrieve Biosample metadata records for studies submitted to NMDC through the [NMDC Submission Portal](https://data.microbiomedata.org/submission/home), and convert the metadata into Excel spreadsheets that are accepted by [DOE user facilities](https://www.energy.gov/science/office-science-user-facilities).
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## MUTTs User Documentation
|
|
51
|
+
|
|
52
|
+
The documentation and setup instructions in this section are meant for any user who would like to install the MUTTs Python package and use it's transformation capabilities to convert data from the NMDC Submission Portal into an Excel spreadsheet that follows a template, based on the MUTTs JSON mapper file that is used.
|
|
53
|
+
|
|
54
|
+
### Prerequisites
|
|
55
|
+
- [Python](https://www.python.org/downloads/) 3.12 or higher
|
|
56
|
+
- An [NMDC user account](https://data.microbiomedata.org/) with an API access token
|
|
57
|
+
|
|
58
|
+
> To create an NMDC user account you will need to sign up at the above link by clicking on the 'ORCID LOGIN' button/link at the top right corner of the NMDC site, and signing in appropriately with your ORCID credentials
|
|
59
|
+
|
|
60
|
+
**Setting up your API access token**
|
|
61
|
+
|
|
62
|
+
This is required for running the examples in the [Usage](#usage) section below (after going through all the [Installation](#installation) steps).
|
|
63
|
+
|
|
64
|
+
Create a `.env` file in your working directory:
|
|
65
|
+
```bash
|
|
66
|
+
echo "DATA_PORTAL_REFRESH_TOKEN=your_token_here" > .env
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
To get your access token:
|
|
70
|
+
1. Visit https://data.microbiomedata.org/user
|
|
71
|
+
2. Copy your Refresh Token
|
|
72
|
+
3. Replace `your_token_here` in the `.env` file with your token
|
|
73
|
+
|
|
74
|
+
### Installation
|
|
75
|
+
|
|
76
|
+
1. **Create a virtual environment** (recommended)
|
|
77
|
+
```bash
|
|
78
|
+
python -m venv mutts-env
|
|
79
|
+
source mutts-env/bin/activate # On Windows: mutts-env\Scripts\activate
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
2. **Install the MUTTs package from PyPI**
|
|
83
|
+
```bash
|
|
84
|
+
pip install mutts
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
3. **Download any of the MUTTs JSON mapper configuration files**
|
|
88
|
+
|
|
89
|
+
*Note*: It is not mandatory that you need to download/use any of the pre-existing/already defined JSON mapper files that are present in this repository. You can always define your own custom JSON mapper files that follow a format similar to the ones defined in this repo.
|
|
90
|
+
|
|
91
|
+
Create a directory for your mapper files and download them from this repository:
|
|
92
|
+
```bash
|
|
93
|
+
mkdir input-files
|
|
94
|
+
cd input-files
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Download the mapper files you need from the [input-files directory](https://github.com/microbiomedata/metadata-for-user-facility-template-transformations/tree/main/input-files):
|
|
98
|
+
- For EMSL: `emsl_header.json`
|
|
99
|
+
- For JGI Metagenome: `jgi_mg_header.json` or `jgi_mg_header_v15.json`
|
|
100
|
+
- For JGI Metatranscriptome: `jgi_mt_header.json` or `jgi_mt_header_v15.json`
|
|
101
|
+
|
|
102
|
+
### Usage
|
|
103
|
+
|
|
104
|
+
Run the `mutts` command with the required options:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
mutts --help
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Note: In the below examples there is a `--submission` optional argument that requires you to pass it an NMDC Submission UUID as value, and the way you would get that is from the URL of the Submission page when you open it up from the Submission Portal.
|
|
111
|
+
|
|
112
|
+
An example would look like below:
|
|
113
|
+
|
|
114
|
+
```
|
|
115
|
+
https://data.microbiomedata.org/submission/<submission-uuid>/samples
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
#### Example 1: Generate a JGI Metagenome spreadsheet
|
|
119
|
+
```bash
|
|
120
|
+
mutts --submission <submission-uuid> \
|
|
121
|
+
--unique-field samp_name \
|
|
122
|
+
--user-facility jgi_mg \
|
|
123
|
+
--mapper input-files/jgi_mg_header.json \
|
|
124
|
+
--output my-samples_jgi.xlsx
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
#### Example 2: Generate a JGI Metagenome v15 spreadsheet
|
|
128
|
+
```bash
|
|
129
|
+
mutts --submission <submission-uuid> \
|
|
130
|
+
--unique-field samp_name \
|
|
131
|
+
--user-facility jgi_mg \
|
|
132
|
+
--mapper input-files/jgi_mg_header_v15.json \
|
|
133
|
+
--output my-samples_jgi_v15.xlsx
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
#### Example 3: Generate an EMSL spreadsheet
|
|
137
|
+
```bash
|
|
138
|
+
mutts --submission <submission-uuid> \
|
|
139
|
+
--user-facility emsl \
|
|
140
|
+
--mapper input-files/emsl_header.json \
|
|
141
|
+
--header \
|
|
142
|
+
--unique-field samp_name \
|
|
143
|
+
--output my-samples_emsl.xlsx
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
#### Command Options
|
|
147
|
+
|
|
148
|
+
- `-s, --submission`: Your NMDC metadata submission UUID (required)
|
|
149
|
+
- `-u, --user-facility`: Target facility (required): `emsl`, `jgi_mg`, `jgi_mg_lr`, or `jgi_mt`
|
|
150
|
+
- `-m, --mapper`: Path to the JSON mapper file (required)
|
|
151
|
+
- `-uf, --unique-field`: Field to uniquely identify records (required, typically `samp_name`)
|
|
152
|
+
- `-o, --output`: Output Excel file path (required)
|
|
153
|
+
- `-h, --header`: Include headers in output (use for EMSL, omit for JGI)
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## MUTTs Developer Documentation
|
|
158
|
+
|
|
159
|
+
The documentation and setup instructions in this section are largely meant for any developer/programmer whose primary use case is to extend/improve/build upon the current capabilities of the MUTTs software.
|
|
160
|
+
|
|
161
|
+
The software consists of two main components:
|
|
162
|
+
|
|
163
|
+
1. **JSON Mapper Configuration Files**
|
|
164
|
+
- Controls/specifies the mapping between columns from the NMDC Submission Portal and column names used in the output spreadsheets
|
|
165
|
+
- Top-level keys indicate main headers in the output
|
|
166
|
+
- Numbered keys add clarifying header information
|
|
167
|
+
- The `header` keyword allows custom column names
|
|
168
|
+
- The `sub_port_mapping` keyword specifies mappings between Submission Portal columns/slots (as dictated by the [NMDC submission schema](https://microbiomedata.github.io/submission-schema/)) and user facility template columns
|
|
169
|
+
- Examples available in [input-files/](input-files/)
|
|
170
|
+
|
|
171
|
+
1. **`mutts` CLI**
|
|
172
|
+
- Command-line application that performs the metadata conversion
|
|
173
|
+
- Consumes mapper files and submission data as inputs
|
|
174
|
+
|
|
175
|
+
### Software Requirements
|
|
176
|
+
- [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer)
|
|
177
|
+
- [Python](https://www.python.org/downloads/release/python-390/) 3.12 or higher
|
|
178
|
+
|
|
179
|
+
### Development Installation
|
|
180
|
+
|
|
181
|
+
1. Clone this repository
|
|
182
|
+
```bash
|
|
183
|
+
git clone https://github.com/microbiomedata/metadata-for-user-facility-template-transformations.git
|
|
184
|
+
cd metadata-for-user-facility-template-transformations
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
2. Install dependencies with Poetry
|
|
188
|
+
```bash
|
|
189
|
+
poetry install
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
This installs the `mutts` package in development mode and creates the `mutts` command-line tool.
|
|
193
|
+
|
|
194
|
+
3. Set up your `.env` file
|
|
195
|
+
```bash
|
|
196
|
+
cp .env.example .env # if available, or create a new .env file
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
Add your NMDC API token:
|
|
200
|
+
```
|
|
201
|
+
DATA_PORTAL_REFRESH_TOKEN=your_token_here
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Get your token from: https://data.microbiomedata.org/user
|
|
205
|
+
|
|
206
|
+
4. Run the CLI in development mode
|
|
207
|
+
```bash
|
|
208
|
+
poetry run mutts --help
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### Creating Custom Mapper Files
|
|
212
|
+
|
|
213
|
+
To create a custom mapper for a new user facility, refer to the existing examples:
|
|
214
|
+
- [emsl_header.json](input-files/emsl_header.json) - EMSL configuration
|
|
215
|
+
- [jgi_mg_header.json](input-files/jgi_mg_header.json) - JGI Metagenome configuration
|
|
216
|
+
- [jgi_mt_header.json](input-files/jgi_mt_header.json) - JGI Metatranscriptome configuration
|
|
217
|
+
- [jgi_mg_header_v15.json](input-files/jgi_mg_header_v15.json) - JGI Metagenome v15 configuration
|
|
218
|
+
- [jgi_mt_header_v15.json](input-files/jgi_mt_header_v15.json) - JGI Metatranscriptome v15 configuration
|
|
219
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
mutts/__init__.py,sha256=iTcKOq5Id9qudNWyTGPJyJGuS19yG-FhPlmSfuJM7jU,217
|
|
2
|
+
mutts/cli.py,sha256=YzRnMLEtbFc2yuAYqWoljvk0Z9IEHDw3XefjTZE8Rbg,4912
|
|
3
|
+
mutts/retriever.py,sha256=B9cShEaeOY4BN1nWaML_UzMTR971X4rn1XfqyvZhAno,8333
|
|
4
|
+
mutts/spreadsheet.py,sha256=hSEEkTcTmkEFsktGG1_s4gAPuDys9Dnzbdk9cfJKwcY,5053
|
|
5
|
+
mutts/static-excel-tabs/JGI.Metagenome.NA.v15.xlsx,sha256=tYlYX5QZVjiiCrsCsxfL73O0BdvlqttBjUyqXsv8s7s,22099
|
|
6
|
+
mutts-1.0.3.dist-info/METADATA,sha256=JU9879qBfZ0iLTHWHCR1J6XUyUSXPq1sSOm7wW0b0Zg,8928
|
|
7
|
+
mutts-1.0.3.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
8
|
+
mutts-1.0.3.dist-info/entry_points.txt,sha256=rDp08H4MnNWGYHFE6ZAqOocRyTp68IPJsjXTcQppi8s,39
|
|
9
|
+
mutts-1.0.3.dist-info/RECORD,,
|
mutts-1.0.0.dist-info/METADATA
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.3
|
|
2
|
-
Name: mutts
|
|
3
|
-
Version: 1.0.0
|
|
4
|
-
Summary: Metadata for User facility Template Transformations
|
|
5
|
-
Author: Sujay Patil
|
|
6
|
-
Author-email: spatil@lbl.gov
|
|
7
|
-
Requires-Python: >=3.9,<4.0
|
|
8
|
-
Classifier: Programming Language :: Python :: 3
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
-
Requires-Dist: click (>=8.1.3,<9.0.0)
|
|
15
|
-
Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
|
|
16
|
-
Requires-Dist: pandas (>=1.5.2,<2.0.0)
|
|
17
|
-
Requires-Dist: python-dotenv (>=0.21.1,<0.22.0)
|
|
18
|
-
Requires-Dist: requests (>=2.28.2,<3.0.0)
|
mutts-1.0.0.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
mutts/__init__.py,sha256=iTcKOq5Id9qudNWyTGPJyJGuS19yG-FhPlmSfuJM7jU,217
|
|
2
|
-
mutts/cli.py,sha256=Zqhv9Yeu7LOtRh38izhekSATb0yveqjLGH-qO9tc9Jk,4551
|
|
3
|
-
mutts/retriever.py,sha256=B9cShEaeOY4BN1nWaML_UzMTR971X4rn1XfqyvZhAno,8333
|
|
4
|
-
mutts/spreadsheet.py,sha256=5iTWcBBWm06avW5Xt7YvcgqbuOkgEmPJKqd2xcb63M8,4487
|
|
5
|
-
mutts-1.0.0.dist-info/METADATA,sha256=w4iXydlpLKo16Fefj1E7GdBGbbL1Mc_X8NZqO07NQtY,698
|
|
6
|
-
mutts-1.0.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
7
|
-
mutts-1.0.0.dist-info/entry_points.txt,sha256=rDp08H4MnNWGYHFE6ZAqOocRyTp68IPJsjXTcQppi8s,39
|
|
8
|
-
mutts-1.0.0.dist-info/RECORD,,
|
|
File without changes
|