mutts 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mutts/cli.py CHANGED
@@ -43,7 +43,11 @@ def format_worksheet(worksheet):
43
43
  @click.command()
44
44
  @click.option("--submission", "-s", required=True, help="Metadata submission id.")
45
45
  @click.option(
46
- "--user-facility", "-u", required=True, help="User facility to send data to."
46
+ "--user-facility",
47
+ "-u",
48
+ required=True,
49
+ type=click.Choice(list(MetadataRetriever.USER_FACILITY_DICT.keys()), case_sensitive=False),
50
+ help="User facility to send data to."
47
51
  )
48
52
  @click.option("--header/--no-header", "-h", default=False, show_default=True)
49
53
  @click.option(
@@ -103,22 +107,26 @@ def cli(
103
107
  # Write the generated data to 'DATA SHEET'
104
108
  user_facility_spreadsheet.to_excel(writer, index=False, sheet_name='DATA SHEET')
105
109
 
106
- # Path to static JGI v15 Excel template
107
- static_excel_path = os.path.join(
108
- os.path.dirname(__file__), '..', '..',
109
- 'input-files', 'static-excel-tabs', 'JGI.Metagenome.NA.v15.xlsx'
110
- )
111
-
112
- # Copy INSTRUCTIONS and PLATE LOCATIONS sheets from JGI v15 template
113
- # static file if it exists
114
- if os.path.exists(static_excel_path):
115
- static_excel = pd.ExcelFile(static_excel_path)
116
- if 'INSTRUCTIONS' in static_excel.sheet_names:
117
- instructions_df = pd.read_excel(static_excel, 'INSTRUCTIONS')
118
- instructions_df.to_excel(writer, index=False, sheet_name='INSTRUCTIONS')
119
- if 'PLATE LOCATIONS' in static_excel.sheet_names:
120
- plate_locations_df = pd.read_excel(static_excel, 'PLATE LOCATIONS')
121
- plate_locations_df.to_excel(writer, index=False, sheet_name='PLATE LOCATIONS')
110
+ # Check if mapper is one of the v15 JGI templates
111
+ mapper_basename = os.path.basename(mapper)
112
+ jgi_v15_mappers = ['jgi_mg_header_v15.json', 'jgi_mt_header_v15.json']
113
+
114
+ if mapper_basename in jgi_v15_mappers:
115
+ # Path to static JGI v15 Excel template
116
+ static_excel_path = os.path.join(
117
+ os.path.dirname(__file__), 'static-excel-tabs', 'JGI.Metagenome.NA.v15.xlsx'
118
+ )
119
+
120
+ # Copy INSTRUCTIONS and PLATE LOCATIONS sheets from JGI v15 template
121
+ # static file if it exists
122
+ if os.path.exists(static_excel_path):
123
+ static_excel = pd.ExcelFile(static_excel_path)
124
+ if 'INSTRUCTIONS' in static_excel.sheet_names:
125
+ instructions_df = pd.read_excel(static_excel, 'INSTRUCTIONS')
126
+ instructions_df.to_excel(writer, index=False, sheet_name='INSTRUCTIONS')
127
+ if 'PLATE LOCATIONS' in static_excel.sheet_names:
128
+ plate_locations_df = pd.read_excel(static_excel, 'PLATE LOCATIONS')
129
+ plate_locations_df.to_excel(writer, index=False, sheet_name='PLATE LOCATIONS')
122
130
 
123
131
  # Apply formatting to all sheets
124
132
  for sheet_name in writer.book.sheetnames:
mutts/spreadsheet.py CHANGED
@@ -7,6 +7,9 @@ class SpreadsheetCreator:
7
7
  Creates a spreadsheet based on a JSON mapper and metadata DataFrame.
8
8
  """
9
9
 
10
+ # List of JGI-specific user facilities
11
+ JGI_FACILITIES = ['jgi_mg', 'jgi_mt', 'jgi_mg_lr']
12
+
10
13
  def __init__(
11
14
  self,
12
15
  user_facility: str,
@@ -16,6 +19,7 @@ class SpreadsheetCreator:
16
19
  """
17
20
  Initialize the SpreadsheetCreator.
18
21
 
22
+ :param user_facility: The user facility identifier.
19
23
  :param json_mapper: The JSON mapper specifying column mappings.
20
24
  :param metadata_df: The metadata DataFrame to create the spreadsheet from.
21
25
  """
@@ -66,10 +70,20 @@ class SpreadsheetCreator:
66
70
  "sub_port_mapping" in v
67
71
  and v["sub_port_mapping"] in self.metadata_df.columns.to_list()
68
72
  ):
73
+ # Get the column data
74
+ column_data = self.metadata_df[v["sub_port_mapping"]]
75
+
76
+ # For JGI facilities, remove "_data" suffix from `sample_isolated_from` values
77
+ if (
78
+ self.user_facility in self.JGI_FACILITIES
79
+ and v["sub_port_mapping"] == "sample_isolated_from"
80
+ ):
81
+ column_data = column_data.str.replace("_data", "", regex=False)
82
+
69
83
  if "header" in v:
70
- rows_df[v["header"]] = self.metadata_df[v["sub_port_mapping"]]
84
+ rows_df[v["header"]] = column_data
71
85
  else:
72
- rows_df[k] = self.metadata_df[v["sub_port_mapping"]]
86
+ rows_df[k] = column_data
73
87
 
74
88
  return rows_df
75
89
 
@@ -0,0 +1,219 @@
1
+ Metadata-Version: 2.4
2
+ Name: mutts
3
+ Version: 1.0.3
4
+ Summary: Metadata for User facility Template Transformations
5
+ License: MIT
6
+ Keywords: NMDC,US DOE user facilities,metadata translation
7
+ Author: Sujay Patil
8
+ Author-email: spatil@lbl.gov
9
+ Requires-Python: >=3.12,<4.0
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Classifier: Programming Language :: Python :: 3.14
16
+ Requires-Dist: click (>=8.1.7,<9.0.0)
17
+ Requires-Dist: openpyxl (>=3.1.0,<4.0.0)
18
+ Requires-Dist: pandas (>=2.2.0,<3.0.0)
19
+ Requires-Dist: python-dotenv (>=1.0.0,<2.0.0)
20
+ Requires-Dist: requests (>=2.32.0,<3.0.0)
21
+ Project-URL: Homepage, https://github.com/microbiomedata/metadata-for-user-facility-template-transformations
22
+ Project-URL: Repository, https://github.com/microbiomedata/metadata-for-user-facility-template-transformations
23
+ Description-Content-Type: text/markdown
24
+
25
+ # Metadata for User facility Template Transformations (MUTTs)
26
+
27
+ ## Table of Contents
28
+ - [Metadata for User facility Template Transformations (MUTTs)](#metadata-for-user-facility-template-transformations-mutts)
29
+ - [Table of Contents](#table-of-contents)
30
+ - [Introduction](#introduction)
31
+ - [MUTTs User Documentation](#mutts-user-documentation)
32
+ - [Prerequisites](#prerequisites)
33
+ - [Installation](#installation)
34
+ - [Usage](#usage)
35
+ - [Example 1: Generate a JGI Metagenome spreadsheet](#example-1-generate-a-jgi-metagenome-spreadsheet)
36
+ - [Example 2: Generate a JGI Metagenome v15 spreadsheet](#example-2-generate-a-jgi-metagenome-v15-spreadsheet)
37
+ - [Example 3: Generate an EMSL spreadsheet](#example-3-generate-an-emsl-spreadsheet)
38
+ - [Command Options](#command-options)
39
+ - [MUTTs Developer Documentation](#mutts-developer-documentation)
40
+ - [Software Requirements](#software-requirements)
41
+ - [Development Installation](#development-installation)
42
+ - [Creating Custom Mapper Files](#creating-custom-mapper-files)
43
+
44
+ ## Introduction
45
+
46
+ The programs bundled in this repository automatically retrieve Biosample metadata records for studies submitted to NMDC through the [NMDC Submission Portal](https://data.microbiomedata.org/submission/home), and convert the metadata into Excel spreadsheets that are accepted by [DOE user facilities](https://www.energy.gov/science/office-science-user-facilities).
47
+
48
+ ---
49
+
50
+ ## MUTTs User Documentation
51
+
52
+ The documentation and setup instructions in this section are meant for any user who would like to install the MUTTs Python package and use it's transformation capabilities to convert data from the NMDC Submission Portal into an Excel spreadsheet that follows a template, based on the MUTTs JSON mapper file that is used.
53
+
54
+ ### Prerequisites
55
+ - [Python](https://www.python.org/downloads/) 3.12 or higher
56
+ - An [NMDC user account](https://data.microbiomedata.org/) with an API access token
57
+
58
+ > To create an NMDC user account you will need to sign up at the above link by clicking on the 'ORCID LOGIN' button/link at the top right corner of the NMDC site, and signing in appropriately with your ORCID credentials
59
+
60
+ **Setting up your API access token**
61
+
62
+ This is required for running the examples in the [Usage](#usage) section below (after going through all the [Installation](#installation) steps).
63
+
64
+ Create a `.env` file in your working directory:
65
+ ```bash
66
+ echo "DATA_PORTAL_REFRESH_TOKEN=your_token_here" > .env
67
+ ```
68
+
69
+ To get your access token:
70
+ 1. Visit https://data.microbiomedata.org/user
71
+ 2. Copy your Refresh Token
72
+ 3. Replace `your_token_here` in the `.env` file with your token
73
+
74
+ ### Installation
75
+
76
+ 1. **Create a virtual environment** (recommended)
77
+ ```bash
78
+ python -m venv mutts-env
79
+ source mutts-env/bin/activate # On Windows: mutts-env\Scripts\activate
80
+ ```
81
+
82
+ 2. **Install the MUTTs package from PyPI**
83
+ ```bash
84
+ pip install mutts
85
+ ```
86
+
87
+ 3. **Download any of the MUTTs JSON mapper configuration files**
88
+
89
+ *Note*: It is not mandatory that you need to download/use any of the pre-existing/already defined JSON mapper files that are present in this repository. You can always define your own custom JSON mapper files that follow a format similar to the ones defined in this repo.
90
+
91
+ Create a directory for your mapper files and download them from this repository:
92
+ ```bash
93
+ mkdir input-files
94
+ cd input-files
95
+ ```
96
+
97
+ Download the mapper files you need from the [input-files directory](https://github.com/microbiomedata/metadata-for-user-facility-template-transformations/tree/main/input-files):
98
+ - For EMSL: `emsl_header.json`
99
+ - For JGI Metagenome: `jgi_mg_header.json` or `jgi_mg_header_v15.json`
100
+ - For JGI Metatranscriptome: `jgi_mt_header.json` or `jgi_mt_header_v15.json`
101
+
102
+ ### Usage
103
+
104
+ Run the `mutts` command with the required options:
105
+
106
+ ```bash
107
+ mutts --help
108
+ ```
109
+
110
+ Note: In the below examples there is a `--submission` optional argument that requires you to pass it an NMDC Submission UUID as value, and the way you would get that is from the URL of the Submission page when you open it up from the Submission Portal.
111
+
112
+ An example would look like below:
113
+
114
+ ```
115
+ https://data.microbiomedata.org/submission/<submission-uuid>/samples
116
+ ```
117
+
118
+ #### Example 1: Generate a JGI Metagenome spreadsheet
119
+ ```bash
120
+ mutts --submission <submission-uuid> \
121
+ --unique-field samp_name \
122
+ --user-facility jgi_mg \
123
+ --mapper input-files/jgi_mg_header.json \
124
+ --output my-samples_jgi.xlsx
125
+ ```
126
+
127
+ #### Example 2: Generate a JGI Metagenome v15 spreadsheet
128
+ ```bash
129
+ mutts --submission <submission-uuid> \
130
+ --unique-field samp_name \
131
+ --user-facility jgi_mg \
132
+ --mapper input-files/jgi_mg_header_v15.json \
133
+ --output my-samples_jgi_v15.xlsx
134
+ ```
135
+
136
+ #### Example 3: Generate an EMSL spreadsheet
137
+ ```bash
138
+ mutts --submission <submission-uuid> \
139
+ --user-facility emsl \
140
+ --mapper input-files/emsl_header.json \
141
+ --header \
142
+ --unique-field samp_name \
143
+ --output my-samples_emsl.xlsx
144
+ ```
145
+
146
+ #### Command Options
147
+
148
+ - `-s, --submission`: Your NMDC metadata submission UUID (required)
149
+ - `-u, --user-facility`: Target facility (required): `emsl`, `jgi_mg`, `jgi_mg_lr`, or `jgi_mt`
150
+ - `-m, --mapper`: Path to the JSON mapper file (required)
151
+ - `-uf, --unique-field`: Field to uniquely identify records (required, typically `samp_name`)
152
+ - `-o, --output`: Output Excel file path (required)
153
+ - `-h, --header`: Include headers in output (use for EMSL, omit for JGI)
154
+
155
+ ---
156
+
157
+ ## MUTTs Developer Documentation
158
+
159
+ The documentation and setup instructions in this section are largely meant for any developer/programmer whose primary use case is to extend/improve/build upon the current capabilities of the MUTTs software.
160
+
161
+ The software consists of two main components:
162
+
163
+ 1. **JSON Mapper Configuration Files**
164
+ - Controls/specifies the mapping between columns from the NMDC Submission Portal and column names used in the output spreadsheets
165
+ - Top-level keys indicate main headers in the output
166
+ - Numbered keys add clarifying header information
167
+ - The `header` keyword allows custom column names
168
+ - The `sub_port_mapping` keyword specifies mappings between Submission Portal columns/slots (as dictated by the [NMDC submission schema](https://microbiomedata.github.io/submission-schema/)) and user facility template columns
169
+ - Examples available in [input-files/](input-files/)
170
+
171
+ 1. **`mutts` CLI**
172
+ - Command-line application that performs the metadata conversion
173
+ - Consumes mapper files and submission data as inputs
174
+
175
+ ### Software Requirements
176
+ - [Poetry](https://python-poetry.org/docs/#installing-with-the-official-installer)
177
+ - [Python](https://www.python.org/downloads/release/python-390/) 3.12 or higher
178
+
179
+ ### Development Installation
180
+
181
+ 1. Clone this repository
182
+ ```bash
183
+ git clone https://github.com/microbiomedata/metadata-for-user-facility-template-transformations.git
184
+ cd metadata-for-user-facility-template-transformations
185
+ ```
186
+
187
+ 2. Install dependencies with Poetry
188
+ ```bash
189
+ poetry install
190
+ ```
191
+
192
+ This installs the `mutts` package in development mode and creates the `mutts` command-line tool.
193
+
194
+ 3. Set up your `.env` file
195
+ ```bash
196
+ cp .env.example .env # if available, or create a new .env file
197
+ ```
198
+
199
+ Add your NMDC API token:
200
+ ```
201
+ DATA_PORTAL_REFRESH_TOKEN=your_token_here
202
+ ```
203
+
204
+ Get your token from: https://data.microbiomedata.org/user
205
+
206
+ 4. Run the CLI in development mode
207
+ ```bash
208
+ poetry run mutts --help
209
+ ```
210
+
211
+ ### Creating Custom Mapper Files
212
+
213
+ To create a custom mapper for a new user facility, refer to the existing examples:
214
+ - [emsl_header.json](input-files/emsl_header.json) - EMSL configuration
215
+ - [jgi_mg_header.json](input-files/jgi_mg_header.json) - JGI Metagenome configuration
216
+ - [jgi_mt_header.json](input-files/jgi_mt_header.json) - JGI Metatranscriptome configuration
217
+ - [jgi_mg_header_v15.json](input-files/jgi_mg_header_v15.json) - JGI Metagenome v15 configuration
218
+ - [jgi_mt_header_v15.json](input-files/jgi_mt_header_v15.json) - JGI Metatranscriptome v15 configuration
219
+
@@ -0,0 +1,9 @@
1
+ mutts/__init__.py,sha256=iTcKOq5Id9qudNWyTGPJyJGuS19yG-FhPlmSfuJM7jU,217
2
+ mutts/cli.py,sha256=YzRnMLEtbFc2yuAYqWoljvk0Z9IEHDw3XefjTZE8Rbg,4912
3
+ mutts/retriever.py,sha256=B9cShEaeOY4BN1nWaML_UzMTR971X4rn1XfqyvZhAno,8333
4
+ mutts/spreadsheet.py,sha256=hSEEkTcTmkEFsktGG1_s4gAPuDys9Dnzbdk9cfJKwcY,5053
5
+ mutts/static-excel-tabs/JGI.Metagenome.NA.v15.xlsx,sha256=tYlYX5QZVjiiCrsCsxfL73O0BdvlqttBjUyqXsv8s7s,22099
6
+ mutts-1.0.3.dist-info/METADATA,sha256=JU9879qBfZ0iLTHWHCR1J6XUyUSXPq1sSOm7wW0b0Zg,8928
7
+ mutts-1.0.3.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
8
+ mutts-1.0.3.dist-info/entry_points.txt,sha256=rDp08H4MnNWGYHFE6ZAqOocRyTp68IPJsjXTcQppi8s,39
9
+ mutts-1.0.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.3
2
+ Generator: poetry-core 2.2.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,18 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: mutts
3
- Version: 1.0.0
4
- Summary: Metadata for User facility Template Transformations
5
- Author: Sujay Patil
6
- Author-email: spatil@lbl.gov
7
- Requires-Python: >=3.9,<4.0
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.9
10
- Classifier: Programming Language :: Python :: 3.10
11
- Classifier: Programming Language :: Python :: 3.11
12
- Classifier: Programming Language :: Python :: 3.12
13
- Classifier: Programming Language :: Python :: 3.13
14
- Requires-Dist: click (>=8.1.3,<9.0.0)
15
- Requires-Dist: openpyxl (>=3.0.10,<4.0.0)
16
- Requires-Dist: pandas (>=1.5.2,<2.0.0)
17
- Requires-Dist: python-dotenv (>=0.21.1,<0.22.0)
18
- Requires-Dist: requests (>=2.28.2,<3.0.0)
@@ -1,8 +0,0 @@
1
- mutts/__init__.py,sha256=iTcKOq5Id9qudNWyTGPJyJGuS19yG-FhPlmSfuJM7jU,217
2
- mutts/cli.py,sha256=Zqhv9Yeu7LOtRh38izhekSATb0yveqjLGH-qO9tc9Jk,4551
3
- mutts/retriever.py,sha256=B9cShEaeOY4BN1nWaML_UzMTR971X4rn1XfqyvZhAno,8333
4
- mutts/spreadsheet.py,sha256=5iTWcBBWm06avW5Xt7YvcgqbuOkgEmPJKqd2xcb63M8,4487
5
- mutts-1.0.0.dist-info/METADATA,sha256=w4iXydlpLKo16Fefj1E7GdBGbbL1Mc_X8NZqO07NQtY,698
6
- mutts-1.0.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
7
- mutts-1.0.0.dist-info/entry_points.txt,sha256=rDp08H4MnNWGYHFE6ZAqOocRyTp68IPJsjXTcQppi8s,39
8
- mutts-1.0.0.dist-info/RECORD,,