check-my-sample-sheet 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2024, Sequana Development Team (https://sequana.readthedocs.io)
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ * Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ * Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ * Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,75 @@
1
+ Metadata-Version: 2.4
2
+ Name: check-my-sample-sheet
3
+ Version: 1.0.0
4
+ Summary: Streamlit web application to validate Illumina sample sheets (bcl2fastq v1 and BCL Convert v2).
5
+ License-Expression: BSD-3-Clause
6
+ License-File: LICENSE
7
+ Keywords: illumina,samplesheet,bcl2fastq,bcl-convert,sequana,ngs
8
+ Author: Thomas Cokelaer
9
+ Author-email: cokelaer@gmail.com
10
+ Requires-Python: >=3.9,<4.0
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
15
+ Requires-Dist: requests
16
+ Requires-Dist: sequana (>=0.23.0)
17
+ Requires-Dist: streamlit (>=1.28)
18
+ Requires-Dist: streamlit-option-menu
19
+ Project-URL: Documentation, https://github.com/sequana/webapp_samplesheet
20
+ Project-URL: Homepage, https://github.com/sequana/webapp_samplesheet
21
+ Project-URL: Repository, https://github.com/sequana/webapp_samplesheet
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Check My Sample Sheet
25
+
26
+ [![Tests](https://github.com/sequana/webapp_samplesheet/actions/workflows/tests.yml/badge.svg)](https://github.com/sequana/webapp_samplesheet/actions/workflows/tests.yml)
27
+ [![Release](https://img.shields.io/github/v/release/sequana/webapp_samplesheet)](https://github.com/sequana/webapp_samplesheet/releases)
28
+ [![License](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](LICENSE)
29
+ ![Python](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)
30
+ [![Streamlit App](https://img.shields.io/badge/Streamlit-Live%20Demo-FF4B4B?logo=streamlit&logoColor=white)](https://check-my-sample-sheet.streamlit.app/)
31
+ ![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fcheck-my-sample-sheet.streamlit.app%2F&countColor=%23263759)
32
+
33
+ This is a streamlit application that uses Sequana (github.com/sequana/sequana) **iem** modules to check Sample Sheets from Illumina sequencers. Both formats are supported and detected automatically:
34
+
35
+ - **v1** (bcl2fastq): `[Data]` / `[Settings]` sections
36
+ - **v2** (BCL Convert): `[BCLConvert_Data]` / `[BCLConvert_Settings]` sections
37
+
38
+ Running demo is here: https://check-my-sample-sheet.streamlit.app/
39
+
40
+
41
+ # General Information
42
+
43
+ If you want to contribute to this web application, please provide PR here. Note, however, that the core of the application is within the Sequana project on https://github.com/sequana/sequana/, more specifically in the iem.py module.
44
+
45
+ The sanity checks implemented are based on experience, the bcl2fastq documentation (v2.20) and the BCL Convert specification.
46
+
47
+ # Installation
48
+
49
+ From PyPI:
50
+
51
+ pip install check-my-sample-sheet
52
+
53
+ Then launch the app (opens in your browser); extra arguments are forwarded to
54
+ `streamlit run` (e.g. `--server.port 8502`):
55
+
56
+ check-my-sample-sheet
57
+
58
+ # Local instance (from source)
59
+
60
+ git clone https://github.com/sequana/webapp_samplesheet
61
+ cd webapp_samplesheet
62
+
63
+ # install the dependencies (sequana, streamlit, ...)
64
+ pip install -r requirements.txt
65
+
66
+ # run the application locally in your browser
67
+ streamlit run check_my_sample_sheet/app.py
68
+
69
+ # Running the tests
70
+
71
+ pip install -r requirements-dev.txt
72
+ pytest
73
+
74
+
75
+
@@ -0,0 +1,51 @@
1
+ # Check My Sample Sheet
2
+
3
+ [![Tests](https://github.com/sequana/webapp_samplesheet/actions/workflows/tests.yml/badge.svg)](https://github.com/sequana/webapp_samplesheet/actions/workflows/tests.yml)
4
+ [![Release](https://img.shields.io/github/v/release/sequana/webapp_samplesheet)](https://github.com/sequana/webapp_samplesheet/releases)
5
+ [![License](https://img.shields.io/badge/license-BSD--3--Clause-blue.svg)](LICENSE)
6
+ ![Python](https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12-blue.svg)
7
+ [![Streamlit App](https://img.shields.io/badge/Streamlit-Live%20Demo-FF4B4B?logo=streamlit&logoColor=white)](https://check-my-sample-sheet.streamlit.app/)
8
+ ![Visitors](https://api.visitorbadge.io/api/visitors?path=https%3A%2F%2Fcheck-my-sample-sheet.streamlit.app%2F&countColor=%23263759)
9
+
10
+ This is a streamlit application that uses Sequana (github.com/sequana/sequana) **iem** modules to check Sample Sheets from Illumina sequencers. Both formats are supported and detected automatically:
11
+
12
+ - **v1** (bcl2fastq): `[Data]` / `[Settings]` sections
13
+ - **v2** (BCL Convert): `[BCLConvert_Data]` / `[BCLConvert_Settings]` sections
14
+
15
+ Running demo is here: https://check-my-sample-sheet.streamlit.app/
16
+
17
+
18
+ # General Information
19
+
20
+ If you want to contribute to this web application, please provide PR here. Note, however, that the core of the application is within the Sequana project on https://github.com/sequana/sequana/, more specifically in the iem.py module.
21
+
22
+ The sanity checks implemented are based on experience, the bcl2fastq documentation (v2.20) and the BCL Convert specification.
23
+
24
+ # Installation
25
+
26
+ From PyPI:
27
+
28
+ pip install check-my-sample-sheet
29
+
30
+ Then launch the app (opens in your browser); extra arguments are forwarded to
31
+ `streamlit run` (e.g. `--server.port 8502`):
32
+
33
+ check-my-sample-sheet
34
+
35
+ # Local instance (from source)
36
+
37
+ git clone https://github.com/sequana/webapp_samplesheet
38
+ cd webapp_samplesheet
39
+
40
+ # install the dependencies (sequana, streamlit, ...)
41
+ pip install -r requirements.txt
42
+
43
+ # run the application locally in your browser
44
+ streamlit run check_my_sample_sheet/app.py
45
+
46
+ # Running the tests
47
+
48
+ pip install -r requirements-dev.txt
49
+ pytest
50
+
51
+
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0"
@@ -0,0 +1,19 @@
1
+ """Console entry point: launch the Streamlit sample sheet validator.
2
+
3
+ Installed as the ``check-my-sample-sheet`` command (see pyproject.toml). Any
4
+ extra arguments are forwarded to ``streamlit run`` (e.g. ``--server.port``).
5
+ """
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ from streamlit.web import cli as stcli
10
+
11
+
12
+ def main():
13
+ app = str(Path(__file__).resolve().parent / "app.py")
14
+ sys.argv = ["streamlit", "run", app, *sys.argv[1:]]
15
+ sys.exit(stcli.main())
16
+
17
+
18
+ if __name__ == "__main__":
19
+ main()
@@ -0,0 +1,409 @@
1
+ #
2
+ # This file is part of Sequana software
3
+ #
4
+ # Copyright (c) 2023-2024 - Sequana Development Team
5
+ #
6
+ # Distributed under the terms of the 3-clause BSD license.
7
+ # The full license is in the LICENSE file, distributed with this software.
8
+ #
9
+ # website: https://github.com/sequana/webapp_samplesheet
10
+ # documentation: http://github.com/sequana/webapp_samplesheet/
11
+ #
12
+ ##############################################################################
13
+
14
+ import tempfile
15
+ import time
16
+ from collections import defaultdict
17
+ from pathlib import Path
18
+
19
+ import requests
20
+ import streamlit as st
21
+ from sequana.iem import SampleSheetFactory, get_sample_sheet_version
22
+ from streamlit_option_menu import option_menu
23
+
24
+ # directory holding this module, used to resolve packaged assets (imgs, examples)
25
+ # regardless of the current working directory.
26
+ HERE = Path(__file__).resolve().parent
27
+ LOGO = str(HERE / "imgs" / "logo_256x256.png")
28
+
29
+ st.set_page_config(
30
+ page_title="Check My Sample Sheet",
31
+ page_icon=LOGO,
32
+ layout="wide",
33
+ menu_items={"Report a bug": "https://github.com/sequana/webapp_samplesheet/issues/new/choose"},
34
+ )
35
+
36
+ version = "1.0.0"
37
+
38
+
39
+ def print_checks(checks):
40
+ """
41
+ This function processes a list of checks and displays them in a Streamlit application.
42
+ Each check is represented as a dictionary with 'status' and 'msg' keys. The function
43
+ updates a color-coded progress bar based on the number of errors, warnings, and successes.
44
+ It also prints the messages associated with each check in the appropriate Streamlit function
45
+ (st.error, st.warning, st.success).
46
+
47
+ Parameters:
48
+ checks (list): A list of dictionaries, where each dictionary represents a check.
49
+ The dictionary should have 'status' and 'msg' keys.
50
+
51
+ Returns:
52
+ dict: A dictionary containing the messages associated with each status (Error, Warning, Success).
53
+ """
54
+
55
+ # func to update colorbar
56
+ def colored_bar(success, warning, error, completed=0):
57
+ return f"""
58
+ <div style="display: flex; width: {completed}%; height: 30px; border: 1px solid black;">
59
+ <div style="width: {success}%; background-color: green;"></div>
60
+ <div style="width: {warning}%; background-color: yellow;"></div>
61
+ <div style="width: {error}%; background-color: red;"></div>
62
+ </div>
63
+ """
64
+
65
+ def add_legend(success, warning, error):
66
+ # finally add the legend
67
+ st.markdown(
68
+ f"""
69
+ <div style="display: flex; justify-content: space-between; width: 50%;">
70
+ <div style="display: flex; align-items: center;">
71
+ <div style="width: 20px; height: 20px; background-color: green; margin-right: 5px;"></div>
72
+ <span>Success ({success})</span>
73
+ </div>
74
+ <div style="display: flex; align-items: center;">
75
+ <div style="width: 20px; height: 20px; background-color: yellow; margin-right: 5px;"></div>
76
+ <span>Warning ({warning})</span>
77
+ </div>
78
+ <div style="display: flex; align-items: center;">
79
+ <div style="width: 20px; height: 20px; background-color: red; margin-right: 5px;"></div>
80
+ <span>Error ({error})</span>
81
+ </div>
82
+ </div>
83
+ """,
84
+ unsafe_allow_html=True,
85
+ )
86
+
87
+ msgs = defaultdict(list)
88
+ emoji = {"Error": ":x:", "Success": ":white_check_mark:", "Warning": ":warning:"}
89
+
90
+ # Placeholder for the colored bar
91
+ bar_placeholder = st.empty()
92
+ bar_placeholder.markdown(colored_bar(0, 0, 0, 0), unsafe_allow_html=True)
93
+ # add_legend(0,0,0)
94
+
95
+ counter = {"Error": 0, "Warning": 0, "Success": 0}
96
+
97
+ N = len(checks) # Number of checks
98
+ for i, check in enumerate(checks):
99
+ status = check["status"]
100
+ msg = check["msg"]
101
+
102
+ msgs[status].append(f"{status} {emoji[status]}. {msg}\n\n")
103
+ time.sleep(0.15)
104
+
105
+ counter[status] += 1
106
+
107
+ S = sum(counter.values())
108
+ success = counter["Success"] / S * 100
109
+ warning = counter["Warning"] / S * 100
110
+ error = counter["Error"] / S * 100
111
+
112
+ completed = min(round(100 * (S / float(N))), 100)
113
+ bar_placeholder.markdown(colored_bar(success, warning, error, completed), unsafe_allow_html=True)
114
+
115
+ # finally add the legend
116
+ _, col2, _ = st.columns([1, 4, 1])
117
+ with col2:
118
+ add_legend(counter["Success"], counter["Warning"], counter["Error"])
119
+
120
+ # prints all message
121
+ for error in msgs["Error"]:
122
+ st.error(error)
123
+ for warning in msgs["Warning"]:
124
+ st.warning(warning)
125
+ for success in msgs["Success"]:
126
+ st.success(success)
127
+ return dict(msgs)
128
+
129
+
130
+ if "code_input" not in st.session_state:
131
+ st.session_state.code_input = ""
132
+
133
+ # used to reset the file_uploader when an example is loaded: bumping this counter
134
+ # changes the widget key, which forces Streamlit to drop any previously uploaded file.
135
+ if "uploader_key" not in st.session_state:
136
+ st.session_state.uploader_key = 0
137
+
138
+
139
+ def load_example(filename):
140
+ """Load example file from examples directory."""
141
+ examples_dir = Path(__file__).parent / "examples"
142
+ with open(examples_dir / filename, "r") as f:
143
+ return f.read()
144
+
145
+
146
+ def set_example(filename):
147
+ """Callback to load example into the textarea session state.
148
+
149
+ Also resets the file_uploader (by bumping its key) so a previously uploaded
150
+ file does not silently take precedence over the loaded example.
151
+ """
152
+ st.session_state.code_input = load_example(filename)
153
+ st.session_state.uploader_key += 1
154
+
155
+
156
+ def main():
157
+ st.sidebar.write("Provided by the [Sequana team](https://github.com/sequana/sequana)")
158
+ st.sidebar.image(LOGO)
159
+ st.title(f"Check My Sample Sheet (v{version})")
160
+
161
+ menu = ["Sample Sheet Validation (Illumina)", "Examples", "About", "How to cite"]
162
+
163
+ # 1. as sidebar menu
164
+ with st.sidebar:
165
+ choice = option_menu(
166
+ "Main Menu", menu, icons=["gear", "gear", "cloud-upload", ""], menu_icon="cast", default_index=0
167
+ )
168
+ st.markdown(
169
+ "**Resources:**\n\n"
170
+ "- [Source code](https://github.com/sequana/webapp_samplesheet)\n"
171
+ "- [Report a bug](https://github.com/sequana/webapp_samplesheet/issues/new/choose)\n"
172
+ "- [Sequana documentation](https://sequana.readthedocs.io)"
173
+ )
174
+
175
+ if choice == "Sample Sheet Validation (Illumina)":
176
+
177
+ st.markdown(
178
+ "This tool validates Illumina sample sheets. Both the **v1** format (bcl2fastq v2.20) and the "
179
+ "**v2** format (BCL Convert) are supported; the version is detected automatically. "
180
+ "It checks the structure, mandatory sections, sample identifiers, indexes, and more. "
181
+ "Provide a sample sheet below for validation, or load one of the examples to try the tool. "
182
+ "More examples are available in the **Examples** section of the menu."
183
+ )
184
+ st.subheader("Input Sample Sheet", divider="blue")
185
+
186
+ # create a 3-column layout
187
+ col1, col2, col3 = st.columns([4, 1, 4])
188
+ with col1:
189
+ data_file = st.file_uploader(
190
+ "Drop a sample sheet below and press the **Process** button. ",
191
+ type=["csv", "txt"],
192
+ key=f"uploader_{st.session_state.uploader_key}",
193
+ )
194
+ with col2:
195
+ # Centered "OR" text
196
+ st.markdown("<div style='text-align: center;'><br><br>OR</div>", unsafe_allow_html=True)
197
+
198
+ with col3:
199
+ code = st.text_area(
200
+ "Paste your sample sheet content here and press the **Process** button.", key="code_input"
201
+ )
202
+
203
+ st.subheader("Load an Example", divider="blue")
204
+ st.caption(
205
+ "Click a button to load a sample sheet into the text area above. "
206
+ "Examples 1, 2 and 4 are valid sheets; Example 3 is invalid and demonstrates how errors are reported. "
207
+ "Example 4 is a v2 (BCL Convert) sheet, the others are v1 (bcl2fastq)."
208
+ )
209
+ example_col1, example_col2, example_col3, example_col4 = st.columns(4)
210
+ with example_col1:
211
+ st.button("Example 1: Dual indexing (v1)", on_click=set_example, args=("sample_sheet.csv",))
212
+ with example_col2:
213
+ st.button("Example 2: Single index + Settings (v1)", on_click=set_example, args=("sample_sheet_settings_index.csv",))
214
+ with example_col3:
215
+ st.button("Example 3: Invalid (bad sample ID)", on_click=set_example, args=("Bad_SampleSheet_alphanum.csv",))
216
+ with example_col4:
217
+ st.button("Example 4: BCL Convert (v2)", on_click=set_example, args=("sample_sheet_v2_bclconvert.csv",))
218
+
219
+ if st.button(":gear: Process :gear:"):
220
+
221
+ try:
222
+ samplesheet = data_file.read().decode()
223
+ # st.experimental_rerun()
224
+ except:
225
+ samplesheet = code # if there is no drag/drop data, we use the pasted code (if any)
226
+ data_file = None
227
+
228
+ try:
229
+ process_sample_sheet(data_file, samplesheet)
230
+ except Exception as err:
231
+ import urllib.parse
232
+
233
+ base_url = f"https://github.com/sequana/webapp_samplesheet/issues/new"
234
+
235
+ samplesheet = "\n".join([" " + x for x in samplesheet.split("\n")])
236
+ params = {
237
+ "title": "Automatic error from the check-my-sample-sheet website",
238
+ "body": f"Dear developer(s),\n\nI encountered an unexpected error using the following sample sheet:\n\n{samplesheet}\n\nHere is the full error message:\n\n {err}\n\nPlease let us know what you think might be the reason for the error.",
239
+ }
240
+ url = f"{base_url}?{urllib.parse.urlencode(params)}"
241
+ st.markdown(
242
+ f'<div style="background-color: #ffcccc; padding: 10px; border-radius: 5px;"> Sorry, an unknown error occurred. Please create an issue <a href="{url}">here</a> to report it. A page will open; you will need to click on the "Submit new issue" button. </div>',
243
+ unsafe_allow_html=True,
244
+ )
245
+
246
+ raise Exception(err)
247
+
248
+ elif choice == "Examples":
249
+ st.write("Below are several sample sheet examples, both valid and invalid, to illustrate the expected format.")
250
+ st.subheader("1 - Minimalist Example (only [Data] section)")
251
+ st.write(
252
+ "In this example, the sample sheet is simplified to keep only the [Data] section and the mandatory columns "
253
+ "(index and Sample_ID). Note that 'Sample_ID' is not strictly mandatory in bcl2fastq, but we make it "
254
+ "mandatory in this application as a design choice for better traceability."
255
+ )
256
+ st.code(
257
+ """[Data]
258
+ Sample_ID,index
259
+ ID1,TGACCA
260
+ ID2,CATTTT"""
261
+ )
262
+
263
+ st.subheader("2 - [Data] section with dual indexing and no [Settings] section")
264
+ url = "https://raw.githubusercontent.com/sequana/webapp_samplesheet/main/examples/sample_sheet.csv"
265
+ r = requests.get(url, allow_redirects=True)
266
+ data = r.content.decode()
267
+ st.write(
268
+ "A more common example is shown below. The Illumina sample sheet uses sections enclosed in square brackets. "
269
+ "Up to four sections may appear: [Header], [Reads], [Settings] and [Data]. This example shows the [Header], "
270
+ "[Reads] and [Data] sections (the [Settings] section is missing). According to the Illumina specification, "
271
+ "all sections are optional, including [Data]. However, when [Data] is missing, all reads are stored as "
272
+ "undetermined, which is rarely useful. For this reason, we require the [Data] section in this application."
273
+ )
274
+ st.code(data, language="bash")
275
+
276
+ st.subheader("3 - [Data] section with single index and a [Settings] section")
277
+ url = (
278
+ "https://raw.githubusercontent.com/sequana/webapp_samplesheet/main/examples/sample_sheet_settings_index.csv"
279
+ )
280
+ r = requests.get(url, allow_redirects=True)
281
+ data = r.content.decode()
282
+ st.code(data, language="bash")
283
+
284
+ st.subheader("4 - Example of an erroneous sample sheet (invalid sample ID name)")
285
+ url = "https://raw.githubusercontent.com/sequana/webapp_samplesheet/main/examples/Bad_SampleSheet_alphanum.csv"
286
+ r = requests.get(url, allow_redirects=True)
287
+ data = r.content.decode()
288
+ st.code(data, language="bash")
289
+
290
+ st.subheader("5 - Example of an erroneous sample sheet (extra trailing semicolons)")
291
+ url = "https://raw.githubusercontent.com/sequana/webapp_samplesheet/main/examples/Bad_SampleSheet_extra_semicolons.csv"
292
+ r = requests.get(url, allow_redirects=True)
293
+ data = r.content.decode()
294
+ st.code(data, language="bash")
295
+
296
+ elif choice == "About":
297
+ st.subheader("About")
298
+ st.markdown(
299
+ "This application is part of the [Sequana Project](https://github.com/sequana), which is dedicated to NGS analysis. "
300
+ "Please see the [online documentation](https://sequana.readthedocs.io) as well as https://sequana.github.io for more information. "
301
+ "The code used in this application is based on the [IEM module](https://github.com/sequana/sequana) of the Sequana Python library. "
302
+ "It was developed based on the bcl2fastq documentation (v2.20) and is intended for users who want to demultiplex their data properly. "
303
+ "The source code for this web application is available on [GitHub](https://github.com/sequana/webapp_samplesheet)."
304
+ "\n\nThe different checks performed are described in this preprint: [Research Square](https://www.researchsquare.com/article/rs-5268893/v1)."
305
+ )
306
+ st.info(
307
+ "Application Author: Thomas Cokelaer\n\nIEM module provided by The Sequana Team\n\nOriginal beta testing: Laure Lemée, Etienne Kornobis, Rania Ouazahrou"
308
+ )
309
+ else:
310
+ st.subheader("How to cite?")
311
+ st.info("Check My Sample Sheet application (this website):\n\nLemée L. et al, [Research Square](https://www.researchsquare.com/article/rs-5268893/v1)")
312
+
313
+ st.info(
314
+ "The Sequana framework used to check the sample sheet:\n\nCokelaer T. et al, (2017), 'Sequana': a Set of Snakemake NGS pipelines, Journal of Open Source Software, 2(16), 352, JOSS DOI [doi:10.21105/joss.00352](https://joss.theoj.org/papers/10.21105/joss.00352)"
315
+ )
316
+
317
+
318
+ def process_sample_sheet(data_file, samplesheet):
319
+ """
320
+ This function processes an uploaded sample sheet file and performs validation checks.
321
+ It saves the file locally, creates a SampleSheet object using the Sequana library,
322
+ and then validates the sample sheet. If errors are found, they are displayed in the
323
+ Streamlit application. The function also provides options to download the corrected
324
+ sample sheet file and view the data section as a CSV file.
325
+
326
+ Parameters:
327
+ data_file (FileIO): The uploaded sample sheet file.
328
+ samplesheet (str): The content of the sample sheet file.
329
+
330
+ Returns:
331
+ None
332
+ """
333
+ if data_file is not None:
334
+ file_details = {"Filename": data_file.name, "FileType": data_file.type, "FileSize": data_file.size}
335
+ else:
336
+ pass
337
+
338
+ if 1 == 1:
339
+
340
+ # read to save locally
341
+ # samplesheet = data_file.read().decode()
342
+
343
+ with tempfile.NamedTemporaryFile(delete=False, mode="w") as fout:
344
+ fout.write(samplesheet)
345
+ fout.close()
346
+ version = get_sample_sheet_version(fout.name)
347
+ iem = SampleSheetFactory(fout.name)
348
+
349
+ if version == "v2":
350
+ st.info(":information_source: Detected an Illumina **v2** sample sheet: validating against the **BCL Convert** specification.")
351
+ else:
352
+ st.info(":information_source: Detected an Illumina **v1** sample sheet: validating against the **bcl2fastq v2.20** specification.")
353
+
354
+ try:
355
+ # st.write(f"This sample sheet contains {len(iem.df)} samples")
356
+ iem.validate()
357
+ except SystemExit as err:
358
+ st.header("Validation Results", divider="blue")
359
+ msg = "Error(s) found. :sob: See the message below from Sequana for details."
360
+ st.error(msg)
361
+ st.info(err)
362
+ else:
363
+ st.header("Validation Results", divider="blue")
364
+ # emoji within div do not seem to work
365
+ msg = ":champagne: Your sample sheet looks correct. :champagne:"
366
+ st.success(msg)
367
+ # =============================================================== validation
368
+ st.subheader("Details about the checks", divider="blue")
369
+ checks = iem.checker()
370
+
371
+ msgs = print_checks(checks)
372
+
373
+ # =============================================================== original file
374
+ st.subheader("Original file", divider="blue")
375
+ st.code(samplesheet)
376
+
377
+ # =============================================================== corrected file
378
+ if len(msgs["Error"]):
379
+ st.subheader("Corrected file", divider="blue")
380
+ st.caption(
381
+ "The quick fix below removes extra trailing semicolons. Other types of errors are difficult to correct "
382
+ "automatically, so you will need to fix the file manually. We strongly recommend using the IEM software "
383
+ "from Illumina for that purpose. If you need to edit the file quickly, do not use Excel: although the "
384
+ "extension is usually .csv, an Illumina sample sheet is not a standard CSV file and Excel may corrupt it."
385
+ )
386
+ with tempfile.NamedTemporaryFile(delete=False, mode="w") as fout:
387
+ iem.quick_fix(fout.name)
388
+ fout.close()
389
+ with open(fout.name, "r") as fin:
390
+
391
+ st.download_button(
392
+ label="Download data as CSV",
393
+ data=fin.read(),
394
+ file_name="sample.csv",
395
+ mime="text/csv",
396
+ )
397
+
398
+ # =============================================================== data section
399
+ st.subheader("Data section", divider="blue")
400
+ st.caption(
401
+ "For convenience, the [Data] section is shown below as a parsed table. "
402
+ "Check that the values are consistent with your expectations (for example, that each index appears in a single column)."
403
+ )
404
+ df = iem.df.copy()
405
+ st.write(df)
406
+
407
+
408
+ if __name__ == "__main__":
409
+ main()
@@ -0,0 +1,36 @@
1
+ [Header]
2
+ IEMFileVersion,5
3
+ Investigator Name,tartampion
4
+ Experiment Name,Daudet
5
+ Date,21/06/2024
6
+ Workflow,GenerateFASTQ
7
+ Application,NextSeq FASTQ Only
8
+ Instrument Type,NextSeq/MiniSeq
9
+ Assay,TruSeq DNA PCR-Free
10
+ Index Adapters,"IDT-ILMN TruSeq DNA UD Indexes (96 Indexes)"
11
+ Description,TEST_FILE
12
+ Chemistry,Amplicon
13
+
14
+ [Reads]
15
+ 151
16
+ 151
17
+
18
+ [Settings]
19
+ Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
20
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
21
+
22
+ [Data]
23
+ Sample_ID,Sample_Name,Sample_Plate,Sample_Well,Index_Plate_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
24
+ TagV1,,,,E01,UDI0005,ATCCACTG,UDI0005,ACGCACCT,B18249,
25
+ TagV3,,,,F01,UDI0006,GCTTGTCA,UDI0006,GTATGTTC,B18249,
26
+ TagV4,,,,G01,UDI0007,CAAGCTAG,UDI0007,CGCTATGT,B18249,
27
+ TagV5,,,,H01,UDI0008,TGGATCGA,UDI0008,TATCGCAC,B18249,
28
+ 3D+LmeA14,,,,A02,UDI0009,AGTTCAGG,UDI0009,TCTGTTGG,B18249,
29
+ 3D+LmeA30,,,,B02,UDI0010,GACCTGAA,UDI0010,CTCACCAA,B18249,
30
+ 3D+LmeA56,,,,C02,UDI0011,TCTCTACT,UDI0011,GAACCGCG,B18249,
31
+ 3D+LmeA9,,,,D02,UDI0012,CTCTCGTC,UDI0012,AGGTTATA,B18249,
32
+ 3D+LmeA19,,,,E02,UDI0013,CCAAGTCT,UDI0013,TCATCCTT,B18249,
33
+ 3D+LmeA26,,,,F02,UDI0014,TTGGACTC,UDI0014,CTGCTTCC,B18249,
34
+ 3D+LmeA28,,,,G02,UDI0015,GGCTTAAG,UDI0015,GGTCACGA,B18249,
35
+ 3D+LmeA42,,,,H02,UDI0016,AATCCGGA,UDI0016,AACTGTAG,B18249,
36
+ 3D+LmeA55,,,,A03,UDI0017,TAATACAG,UDI0017,GTGAATAT,B18249,
@@ -0,0 +1,17 @@
1
+ [Header];;;;;;
2
+ IEMFileVersion;4;;;
3
+ Investigator Name;;;;
4
+ Experiment Name;160104-SR-310v2-std;;
5
+ Chemistry;Default;;;;
6
+ ;;;;;;;;;;
7
+ [Reads];;;;;;;;
8
+ 315;;;;
9
+ ;;;
10
+ [Settings];;;;
11
+ Adapter;AGATCGGAAGAGCACACGTCTGAACTCCAGTCA;;;
12
+ AdapterRead2;AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT;;;
13
+ ;;;
14
+ [Data];;;
15
+ Sample_ID;Sample_Name;I7_Index_ID;index;Sample_Project
16
+ A;;NF01;CGATGT;
17
+ B;;NF03;ACAGTG;
@@ -0,0 +1,4 @@
1
+ [Data]
2
+ Sample_ID,index
3
+ ID1,TGACCA
4
+ ID2,CATTTT
@@ -0,0 +1,7 @@
1
+ [Settings]
2
+ Adapter, ACGTACGTN
3
+
4
+ [Data]
5
+ Sample_ID,index
6
+ ID1,TGACCa
7
+ ID1,CATTTT
@@ -0,0 +1,8 @@
1
+ [Settings];;;;
2
+ Adapter;AGATCGGAAGAGCACACGTCTGAACTCCAGTCA;;;
3
+ AdapterRead2;AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT;;;
4
+ ;;;
5
+ [Data];;;
6
+ Sample_ID;Sample_Name;I7_Index_ID;index;Sample_Project
7
+ A;;NF01;CGATGT;
8
+ B;;NF03;ACAGTG;
@@ -0,0 +1,21 @@
1
+ [Header]
2
+ Local Run Manager Analysis Id,43043
3
+ Experiment Name,MyProject
4
+ Date,2020-01-24
5
+ Module,GenerateFASTQ - 2.0.0
6
+ Workflow,GenerateFASTQ
7
+ Library Prep Kit,Custom
8
+ Description,200123-PE-MP-150
9
+ Chemistry,Default
10
+
11
+ [Reads]
12
+ 151
13
+ 151
14
+
15
+ [Data]
16
+ Sample_ID,Sample_Name,Description,index,I7_Index_ID,Sample_Project
17
+ 412,412,,TGACCA,TGACCA,B3202
18
+ LMD666,LMD666,,CATTTT,CATTTT,B3202
19
+ TaoP3,TaoP3,,ACAGTG,ACAGTG,B3202
20
+ TaoP10,TaoP10,,GTGAAA,GTGAAA,B3202
21
+
@@ -0,0 +1,24 @@
1
+ [Header]
2
+ IEMFileVersion,4
3
+ Investigator Name,
4
+ Experiment Name,
5
+ Date,8/23/2016
6
+ Workflow,GenerateFASTQ
7
+ Application,FASTQ Only
8
+ Assay,NEXTFlex-PCRfree
9
+ Description,
10
+ Chemistry,Default
11
+
12
+ [Reads]
13
+ 300
14
+ 300
15
+
16
+ [Settings]
17
+ Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
18
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
19
+
20
+ [Data]
21
+ Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
22
+ ID1,,TEST,A01,NF02,TGACCA,,
23
+ ID2,,TEST,A02,NF03,ACAGTG,,
24
+
@@ -0,0 +1,44 @@
1
+ [Header],
2
+ FileFormatVersion,2
3
+ RunName,20260616_B21184-18S-PE-MPdbl-151-10-5M
4
+ InstrumentPlatform,MiSeqi100Series
5
+ IndexOrientation,Forward
6
+ AnalysisLocation,Local
7
+
8
+ [Reads]
9
+ Read1Cycles,151
10
+ Read2Cycles,151
11
+ Index1Cycles,10
12
+ Index2Cycles,10
13
+
14
+ [Sequencing_Settings]
15
+ LibraryPrepKits,NexteraXT
16
+
17
+ [BCLConvert_Settings]
18
+ SoftwareVersion,4.4.6
19
+ AdapterRead1,CTGTCTCTTATACACATCT
20
+ AdapterRead2,CTGTCTCTTATACACATCT
21
+ OverrideCycles,R1:Y151;I1:I10;I2:I10;R2:Y151
22
+ FastqCompressionFormat,dragen
23
+ NoLaneSplitting,true
24
+ GenerateFastqcMetrics,true
25
+
26
+ [BCLConvert_Data]
27
+ Sample_ID,Index,Index2
28
+ 1,AGGTCAGATA,CTACAAGATA
29
+ 2,CGACATCCGA,TACGTTCATT
30
+ 3,ATTCCATAAG,TGCCTGGTGG
31
+ 4,CACAATAGGA,TCCATCCGAG
32
+ 5,AACATCGCGC,GTCCACTTGT
33
+
34
+
35
+ [Cloud_Settings]
36
+ GeneratedVersion,1.25.0.202605080250
37
+
38
+ [Cloud_Data]
39
+ Sample_ID,ProjectName,LibraryName,LibraryPrepKitName,IndexAdapterKitName
40
+ 1,BXXXX,1_AGGTCAGATA_CTACAAGATA,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
41
+ 2,BXXXX,2_CGACATCCGA_TACGTTCATT,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
42
+ 3,BXXXX,3_ATTCCATAAG_TGCCTGGTGG,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
43
+ 4,BXXXX,4_CACAATAGGA_TCCATCCGAG,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
44
+ 5,BXXXX,5_AACATCGCGC_GTCCACTTGT,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
@@ -0,0 +1,46 @@
1
+ [project]
2
+ name = "check-my-sample-sheet"
3
+ version = "1.0.0"
4
+ description = "Streamlit web application to validate Illumina sample sheets (bcl2fastq v1 and BCL Convert v2)."
5
+ authors = [{ name = "Thomas Cokelaer", email = "cokelaer@gmail.com" }]
6
+ license = "BSD-3-Clause"
7
+ readme = "README.md"
8
+ requires-python = ">=3.9,<4.0"
9
+ keywords = ["illumina", "samplesheet", "bcl2fastq", "bcl-convert", "sequana", "ngs"]
10
+ classifiers = [
11
+ "Development Status :: 5 - Production/Stable",
12
+ "Intended Audience :: Science/Research",
13
+ "Programming Language :: Python :: 3",
14
+ "Topic :: Scientific/Engineering :: Bio-Informatics",
15
+ ]
16
+ dependencies = [
17
+ # sequana >= 0.23.0 ships BCLConvert / SampleSheetFactory / get_sample_sheet_version
18
+ "sequana>=0.23.0",
19
+ "streamlit>=1.28",
20
+ "streamlit-option-menu",
21
+ "requests",
22
+ ]
23
+
24
+ [project.urls]
25
+ homepage = "https://github.com/sequana/webapp_samplesheet"
26
+ repository = "https://github.com/sequana/webapp_samplesheet"
27
+ documentation = "https://github.com/sequana/webapp_samplesheet"
28
+
29
+ [project.scripts]
30
+ check-my-sample-sheet = "check_my_sample_sheet.__main__:main"
31
+
32
+ [tool.poetry]
33
+ packages = [{ include = "check_my_sample_sheet" }]
34
+ include = ["check_my_sample_sheet/examples/*", "check_my_sample_sheet/imgs/*"]
35
+
36
+ [tool.poetry.group.dev.dependencies]
37
+ pytest = "*"
38
+ pytest-cov = "*"
39
+
40
+ [build-system]
41
+ requires = ["poetry-core>=2.0.0"]
42
+ build-backend = "poetry.core.masonry.api"
43
+
44
+ [tool.pytest.ini_options]
45
+ addopts = "--cov=check_my_sample_sheet --cov-report=term-missing"
46
+ testpaths = ["tests"]