check-my-sample-sheet 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- check_my_sample_sheet/__init__.py +1 -0
- check_my_sample_sheet/__main__.py +19 -0
- check_my_sample_sheet/app.py +409 -0
- check_my_sample_sheet/examples/Bad_SampleSheet_alphanum.csv +36 -0
- check_my_sample_sheet/examples/Bad_SampleSheet_extra_semicolons.csv +17 -0
- check_my_sample_sheet/examples/case1.csv +4 -0
- check_my_sample_sheet/examples/case2.csv +7 -0
- check_my_sample_sheet/examples/case3.csv +8 -0
- check_my_sample_sheet/examples/sample_sheet.csv +21 -0
- check_my_sample_sheet/examples/sample_sheet_settings_index.csv +24 -0
- check_my_sample_sheet/examples/sample_sheet_v2_bclconvert.csv +44 -0
- check_my_sample_sheet/imgs/logo_256x256.png +0 -0
- check_my_sample_sheet-1.0.0.dist-info/METADATA +75 -0
- check_my_sample_sheet-1.0.0.dist-info/RECORD +17 -0
- check_my_sample_sheet-1.0.0.dist-info/WHEEL +4 -0
- check_my_sample_sheet-1.0.0.dist-info/entry_points.txt +3 -0
- check_my_sample_sheet-1.0.0.dist-info/licenses/LICENSE +29 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.0.0"
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Console entry point: launch the Streamlit sample sheet validator.
|
|
2
|
+
|
|
3
|
+
Installed as the ``check-my-sample-sheet`` command (see pyproject.toml). Any
|
|
4
|
+
extra arguments are forwarded to ``streamlit run`` (e.g. ``--server.port``).
|
|
5
|
+
"""
|
|
6
|
+
import sys
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
from streamlit.web import cli as stcli
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def main():
|
|
13
|
+
app = str(Path(__file__).resolve().parent / "app.py")
|
|
14
|
+
sys.argv = ["streamlit", "run", app, *sys.argv[1:]]
|
|
15
|
+
sys.exit(stcli.main())
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
if __name__ == "__main__":
|
|
19
|
+
main()
|
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
#
|
|
2
|
+
# This file is part of Sequana software
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2023-2024 - Sequana Development Team
|
|
5
|
+
#
|
|
6
|
+
# Distributed under the terms of the 3-clause BSD license.
|
|
7
|
+
# The full license is in the LICENSE file, distributed with this software.
|
|
8
|
+
#
|
|
9
|
+
# website: https://github.com/sequana/webapp_samplesheet
|
|
10
|
+
# documentation: http://github.com/sequana/webapp_samplesheet/
|
|
11
|
+
#
|
|
12
|
+
##############################################################################
|
|
13
|
+
|
|
14
|
+
import tempfile
|
|
15
|
+
import time
|
|
16
|
+
from collections import defaultdict
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
import requests
|
|
20
|
+
import streamlit as st
|
|
21
|
+
from sequana.iem import SampleSheetFactory, get_sample_sheet_version
|
|
22
|
+
from streamlit_option_menu import option_menu
|
|
23
|
+
|
|
24
|
+
# directory holding this module, used to resolve packaged assets (imgs, examples)
|
|
25
|
+
# regardless of the current working directory.
|
|
26
|
+
HERE = Path(__file__).resolve().parent
|
|
27
|
+
LOGO = str(HERE / "imgs" / "logo_256x256.png")
|
|
28
|
+
|
|
29
|
+
st.set_page_config(
|
|
30
|
+
page_title="Check My Sample Sheet",
|
|
31
|
+
page_icon=LOGO,
|
|
32
|
+
layout="wide",
|
|
33
|
+
menu_items={"Report a bug": "https://github.com/sequana/webapp_samplesheet/issues/new/choose"},
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
version = "1.0.0"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def print_checks(checks):
|
|
40
|
+
"""
|
|
41
|
+
This function processes a list of checks and displays them in a Streamlit application.
|
|
42
|
+
Each check is represented as a dictionary with 'status' and 'msg' keys. The function
|
|
43
|
+
updates a color-coded progress bar based on the number of errors, warnings, and successes.
|
|
44
|
+
It also prints the messages associated with each check in the appropriate Streamlit function
|
|
45
|
+
(st.error, st.warning, st.success).
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
checks (list): A list of dictionaries, where each dictionary represents a check.
|
|
49
|
+
The dictionary should have 'status' and 'msg' keys.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
dict: A dictionary containing the messages associated with each status (Error, Warning, Success).
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
# func to update colorbar
|
|
56
|
+
def colored_bar(success, warning, error, completed=0):
|
|
57
|
+
return f"""
|
|
58
|
+
<div style="display: flex; width: {completed}%; height: 30px; border: 1px solid black;">
|
|
59
|
+
<div style="width: {success}%; background-color: green;"></div>
|
|
60
|
+
<div style="width: {warning}%; background-color: yellow;"></div>
|
|
61
|
+
<div style="width: {error}%; background-color: red;"></div>
|
|
62
|
+
</div>
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
def add_legend(success, warning, error):
|
|
66
|
+
# finally add the legend
|
|
67
|
+
st.markdown(
|
|
68
|
+
f"""
|
|
69
|
+
<div style="display: flex; justify-content: space-between; width: 50%;">
|
|
70
|
+
<div style="display: flex; align-items: center;">
|
|
71
|
+
<div style="width: 20px; height: 20px; background-color: green; margin-right: 5px;"></div>
|
|
72
|
+
<span>Success ({success})</span>
|
|
73
|
+
</div>
|
|
74
|
+
<div style="display: flex; align-items: center;">
|
|
75
|
+
<div style="width: 20px; height: 20px; background-color: yellow; margin-right: 5px;"></div>
|
|
76
|
+
<span>Warning ({warning})</span>
|
|
77
|
+
</div>
|
|
78
|
+
<div style="display: flex; align-items: center;">
|
|
79
|
+
<div style="width: 20px; height: 20px; background-color: red; margin-right: 5px;"></div>
|
|
80
|
+
<span>Error ({error})</span>
|
|
81
|
+
</div>
|
|
82
|
+
</div>
|
|
83
|
+
""",
|
|
84
|
+
unsafe_allow_html=True,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
msgs = defaultdict(list)
|
|
88
|
+
emoji = {"Error": ":x:", "Success": ":white_check_mark:", "Warning": ":warning:"}
|
|
89
|
+
|
|
90
|
+
# Placeholder for the colored bar
|
|
91
|
+
bar_placeholder = st.empty()
|
|
92
|
+
bar_placeholder.markdown(colored_bar(0, 0, 0, 0), unsafe_allow_html=True)
|
|
93
|
+
# add_legend(0,0,0)
|
|
94
|
+
|
|
95
|
+
counter = {"Error": 0, "Warning": 0, "Success": 0}
|
|
96
|
+
|
|
97
|
+
N = len(checks) # Number of checks
|
|
98
|
+
for i, check in enumerate(checks):
|
|
99
|
+
status = check["status"]
|
|
100
|
+
msg = check["msg"]
|
|
101
|
+
|
|
102
|
+
msgs[status].append(f"{status} {emoji[status]}. {msg}\n\n")
|
|
103
|
+
time.sleep(0.15)
|
|
104
|
+
|
|
105
|
+
counter[status] += 1
|
|
106
|
+
|
|
107
|
+
S = sum(counter.values())
|
|
108
|
+
success = counter["Success"] / S * 100
|
|
109
|
+
warning = counter["Warning"] / S * 100
|
|
110
|
+
error = counter["Error"] / S * 100
|
|
111
|
+
|
|
112
|
+
completed = min(round(100 * (S / float(N))), 100)
|
|
113
|
+
bar_placeholder.markdown(colored_bar(success, warning, error, completed), unsafe_allow_html=True)
|
|
114
|
+
|
|
115
|
+
# finally add the legend
|
|
116
|
+
_, col2, _ = st.columns([1, 4, 1])
|
|
117
|
+
with col2:
|
|
118
|
+
add_legend(counter["Success"], counter["Warning"], counter["Error"])
|
|
119
|
+
|
|
120
|
+
# prints all message
|
|
121
|
+
for error in msgs["Error"]:
|
|
122
|
+
st.error(error)
|
|
123
|
+
for warning in msgs["Warning"]:
|
|
124
|
+
st.warning(warning)
|
|
125
|
+
for success in msgs["Success"]:
|
|
126
|
+
st.success(success)
|
|
127
|
+
return dict(msgs)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
if "code_input" not in st.session_state:
|
|
131
|
+
st.session_state.code_input = ""
|
|
132
|
+
|
|
133
|
+
# used to reset the file_uploader when an example is loaded: bumping this counter
|
|
134
|
+
# changes the widget key, which forces Streamlit to drop any previously uploaded file.
|
|
135
|
+
if "uploader_key" not in st.session_state:
|
|
136
|
+
st.session_state.uploader_key = 0
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def load_example(filename):
|
|
140
|
+
"""Load example file from examples directory."""
|
|
141
|
+
examples_dir = Path(__file__).parent / "examples"
|
|
142
|
+
with open(examples_dir / filename, "r") as f:
|
|
143
|
+
return f.read()
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def set_example(filename):
|
|
147
|
+
"""Callback to load example into the textarea session state.
|
|
148
|
+
|
|
149
|
+
Also resets the file_uploader (by bumping its key) so a previously uploaded
|
|
150
|
+
file does not silently take precedence over the loaded example.
|
|
151
|
+
"""
|
|
152
|
+
st.session_state.code_input = load_example(filename)
|
|
153
|
+
st.session_state.uploader_key += 1
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def main():
|
|
157
|
+
st.sidebar.write("Provided by the [Sequana team](https://github.com/sequana/sequana)")
|
|
158
|
+
st.sidebar.image(LOGO)
|
|
159
|
+
st.title(f"Check My Sample Sheet (v{version})")
|
|
160
|
+
|
|
161
|
+
menu = ["Sample Sheet Validation (Illumina)", "Examples", "About", "How to cite"]
|
|
162
|
+
|
|
163
|
+
# 1. as sidebar menu
|
|
164
|
+
with st.sidebar:
|
|
165
|
+
choice = option_menu(
|
|
166
|
+
"Main Menu", menu, icons=["gear", "gear", "cloud-upload", ""], menu_icon="cast", default_index=0
|
|
167
|
+
)
|
|
168
|
+
st.markdown(
|
|
169
|
+
"**Resources:**\n\n"
|
|
170
|
+
"- [Source code](https://github.com/sequana/webapp_samplesheet)\n"
|
|
171
|
+
"- [Report a bug](https://github.com/sequana/webapp_samplesheet/issues/new/choose)\n"
|
|
172
|
+
"- [Sequana documentation](https://sequana.readthedocs.io)"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if choice == "Sample Sheet Validation (Illumina)":
|
|
176
|
+
|
|
177
|
+
st.markdown(
|
|
178
|
+
"This tool validates Illumina sample sheets. Both the **v1** format (bcl2fastq v2.20) and the "
|
|
179
|
+
"**v2** format (BCL Convert) are supported; the version is detected automatically. "
|
|
180
|
+
"It checks the structure, mandatory sections, sample identifiers, indexes, and more. "
|
|
181
|
+
"Provide a sample sheet below for validation, or load one of the examples to try the tool. "
|
|
182
|
+
"More examples are available in the **Examples** section of the menu."
|
|
183
|
+
)
|
|
184
|
+
st.subheader("Input Sample Sheet", divider="blue")
|
|
185
|
+
|
|
186
|
+
# create a 3-column layout
|
|
187
|
+
col1, col2, col3 = st.columns([4, 1, 4])
|
|
188
|
+
with col1:
|
|
189
|
+
data_file = st.file_uploader(
|
|
190
|
+
"Drop a sample sheet below and press the **Process** button. ",
|
|
191
|
+
type=["csv", "txt"],
|
|
192
|
+
key=f"uploader_{st.session_state.uploader_key}",
|
|
193
|
+
)
|
|
194
|
+
with col2:
|
|
195
|
+
# Centered "OR" text
|
|
196
|
+
st.markdown("<div style='text-align: center;'><br><br>OR</div>", unsafe_allow_html=True)
|
|
197
|
+
|
|
198
|
+
with col3:
|
|
199
|
+
code = st.text_area(
|
|
200
|
+
"Paste your sample sheet content here and press the **Process** button.", key="code_input"
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
st.subheader("Load an Example", divider="blue")
|
|
204
|
+
st.caption(
|
|
205
|
+
"Click a button to load a sample sheet into the text area above. "
|
|
206
|
+
"Examples 1, 2 and 4 are valid sheets; Example 3 is invalid and demonstrates how errors are reported. "
|
|
207
|
+
"Example 4 is a v2 (BCL Convert) sheet, the others are v1 (bcl2fastq)."
|
|
208
|
+
)
|
|
209
|
+
example_col1, example_col2, example_col3, example_col4 = st.columns(4)
|
|
210
|
+
with example_col1:
|
|
211
|
+
st.button("Example 1: Dual indexing (v1)", on_click=set_example, args=("sample_sheet.csv",))
|
|
212
|
+
with example_col2:
|
|
213
|
+
st.button("Example 2: Single index + Settings (v1)", on_click=set_example, args=("sample_sheet_settings_index.csv",))
|
|
214
|
+
with example_col3:
|
|
215
|
+
st.button("Example 3: Invalid (bad sample ID)", on_click=set_example, args=("Bad_SampleSheet_alphanum.csv",))
|
|
216
|
+
with example_col4:
|
|
217
|
+
st.button("Example 4: BCL Convert (v2)", on_click=set_example, args=("sample_sheet_v2_bclconvert.csv",))
|
|
218
|
+
|
|
219
|
+
if st.button(":gear: Process :gear:"):
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
samplesheet = data_file.read().decode()
|
|
223
|
+
# st.experimental_rerun()
|
|
224
|
+
except:
|
|
225
|
+
samplesheet = code # if there is no drag/drop data, we use the pasted code (if any)
|
|
226
|
+
data_file = None
|
|
227
|
+
|
|
228
|
+
try:
|
|
229
|
+
process_sample_sheet(data_file, samplesheet)
|
|
230
|
+
except Exception as err:
|
|
231
|
+
import urllib.parse
|
|
232
|
+
|
|
233
|
+
base_url = f"https://github.com/sequana/webapp_samplesheet/issues/new"
|
|
234
|
+
|
|
235
|
+
samplesheet = "\n".join([" " + x for x in samplesheet.split("\n")])
|
|
236
|
+
params = {
|
|
237
|
+
"title": "Automatic error from the check-my-sample-sheet website",
|
|
238
|
+
"body": f"Dear developer(s),\n\nI encountered an unexpected error using the following sample sheet:\n\n{samplesheet}\n\nHere is the full error message:\n\n {err}\n\nPlease let us know what you think might be the reason for the error.",
|
|
239
|
+
}
|
|
240
|
+
url = f"{base_url}?{urllib.parse.urlencode(params)}"
|
|
241
|
+
st.markdown(
|
|
242
|
+
f'<div style="background-color: #ffcccc; padding: 10px; border-radius: 5px;"> Sorry, an unknown error occurred. Please create an issue <a href="{url}">here</a> to report it. A page will open; you will need to click on the "Submit new issue" button. </div>',
|
|
243
|
+
unsafe_allow_html=True,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
raise Exception(err)
|
|
247
|
+
|
|
248
|
+
elif choice == "Examples":
|
|
249
|
+
st.write("Below are several sample sheet examples, both valid and invalid, to illustrate the expected format.")
|
|
250
|
+
st.subheader("1 - Minimalist Example (only [Data] section)")
|
|
251
|
+
st.write(
|
|
252
|
+
"In this example, the sample sheet is simplified to keep only the [Data] section and the mandatory columns "
|
|
253
|
+
"(index and Sample_ID). Note that 'Sample_ID' is not strictly mandatory in bcl2fastq, but we make it "
|
|
254
|
+
"mandatory in this application as a design choice for better traceability."
|
|
255
|
+
)
|
|
256
|
+
st.code(
|
|
257
|
+
"""[Data]
|
|
258
|
+
Sample_ID,index
|
|
259
|
+
ID1,TGACCA
|
|
260
|
+
ID2,CATTTT"""
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
st.subheader("2 - [Data] section with dual indexing and no [Settings] section")
|
|
264
|
+
url = "https://raw.githubusercontent.com/sequana/webapp_samplesheet/main/examples/sample_sheet.csv"
|
|
265
|
+
r = requests.get(url, allow_redirects=True)
|
|
266
|
+
data = r.content.decode()
|
|
267
|
+
st.write(
|
|
268
|
+
"A more common example is shown below. The Illumina sample sheet uses sections enclosed in square brackets. "
|
|
269
|
+
"Up to four sections may appear: [Header], [Reads], [Settings] and [Data]. This example shows the [Header], "
|
|
270
|
+
"[Reads] and [Data] sections (the [Settings] section is missing). According to the Illumina specification, "
|
|
271
|
+
"all sections are optional, including [Data]. However, when [Data] is missing, all reads are stored as "
|
|
272
|
+
"undetermined, which is rarely useful. For this reason, we require the [Data] section in this application."
|
|
273
|
+
)
|
|
274
|
+
st.code(data, language="bash")
|
|
275
|
+
|
|
276
|
+
st.subheader("3 - [Data] section with single index and a [Settings] section")
|
|
277
|
+
url = (
|
|
278
|
+
"https://raw.githubusercontent.com/sequana/webapp_samplesheet/main/examples/sample_sheet_settings_index.csv"
|
|
279
|
+
)
|
|
280
|
+
r = requests.get(url, allow_redirects=True)
|
|
281
|
+
data = r.content.decode()
|
|
282
|
+
st.code(data, language="bash")
|
|
283
|
+
|
|
284
|
+
st.subheader("4 - Example of an erroneous sample sheet (invalid sample ID name)")
|
|
285
|
+
url = "https://raw.githubusercontent.com/sequana/webapp_samplesheet/main/examples/Bad_SampleSheet_alphanum.csv"
|
|
286
|
+
r = requests.get(url, allow_redirects=True)
|
|
287
|
+
data = r.content.decode()
|
|
288
|
+
st.code(data, language="bash")
|
|
289
|
+
|
|
290
|
+
st.subheader("5 - Example of an erroneous sample sheet (extra trailing semicolons)")
|
|
291
|
+
url = "https://raw.githubusercontent.com/sequana/webapp_samplesheet/main/examples/Bad_SampleSheet_extra_semicolons.csv"
|
|
292
|
+
r = requests.get(url, allow_redirects=True)
|
|
293
|
+
data = r.content.decode()
|
|
294
|
+
st.code(data, language="bash")
|
|
295
|
+
|
|
296
|
+
elif choice == "About":
|
|
297
|
+
st.subheader("About")
|
|
298
|
+
st.markdown(
|
|
299
|
+
"This application is part of the [Sequana Project](https://github.com/sequana), which is dedicated to NGS analysis. "
|
|
300
|
+
"Please see the [online documentation](https://sequana.readthedocs.io) as well as https://sequana.github.io for more information. "
|
|
301
|
+
"The code used in this application is based on the [IEM module](https://github.com/sequana/sequana) of the Sequana Python library. "
|
|
302
|
+
"It was developed based on the bcl2fastq documentation (v2.20) and is intended for users who want to demultiplex their data properly. "
|
|
303
|
+
"The source code for this web application is available on [GitHub](https://github.com/sequana/webapp_samplesheet)."
|
|
304
|
+
"\n\nThe different checks performed are described in this preprint: [Research Square](https://www.researchsquare.com/article/rs-5268893/v1)."
|
|
305
|
+
)
|
|
306
|
+
st.info(
|
|
307
|
+
"Application Author: Thomas Cokelaer\n\nIEM module provided by The Sequana Team\n\nOriginal beta testing: Laure Lemée, Etienne Kornobis, Rania Ouazahrou"
|
|
308
|
+
)
|
|
309
|
+
else:
|
|
310
|
+
st.subheader("How to cite?")
|
|
311
|
+
st.info("Check My Sample Sheet application (this website):\n\nLemée L. et al, [Research Square](https://www.researchsquare.com/article/rs-5268893/v1)")
|
|
312
|
+
|
|
313
|
+
st.info(
|
|
314
|
+
"The Sequana framework used to check the sample sheet:\n\nCokelaer T. et al, (2017), 'Sequana': a Set of Snakemake NGS pipelines, Journal of Open Source Software, 2(16), 352, JOSS DOI [doi:10.21105/joss.00352](https://joss.theoj.org/papers/10.21105/joss.00352)"
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def process_sample_sheet(data_file, samplesheet):
|
|
319
|
+
"""
|
|
320
|
+
This function processes an uploaded sample sheet file and performs validation checks.
|
|
321
|
+
It saves the file locally, creates a SampleSheet object using the Sequana library,
|
|
322
|
+
and then validates the sample sheet. If errors are found, they are displayed in the
|
|
323
|
+
Streamlit application. The function also provides options to download the corrected
|
|
324
|
+
sample sheet file and view the data section as a CSV file.
|
|
325
|
+
|
|
326
|
+
Parameters:
|
|
327
|
+
data_file (FileIO): The uploaded sample sheet file.
|
|
328
|
+
samplesheet (str): The content of the sample sheet file.
|
|
329
|
+
|
|
330
|
+
Returns:
|
|
331
|
+
None
|
|
332
|
+
"""
|
|
333
|
+
if data_file is not None:
|
|
334
|
+
file_details = {"Filename": data_file.name, "FileType": data_file.type, "FileSize": data_file.size}
|
|
335
|
+
else:
|
|
336
|
+
pass
|
|
337
|
+
|
|
338
|
+
if 1 == 1:
|
|
339
|
+
|
|
340
|
+
# read to save locally
|
|
341
|
+
# samplesheet = data_file.read().decode()
|
|
342
|
+
|
|
343
|
+
with tempfile.NamedTemporaryFile(delete=False, mode="w") as fout:
|
|
344
|
+
fout.write(samplesheet)
|
|
345
|
+
fout.close()
|
|
346
|
+
version = get_sample_sheet_version(fout.name)
|
|
347
|
+
iem = SampleSheetFactory(fout.name)
|
|
348
|
+
|
|
349
|
+
if version == "v2":
|
|
350
|
+
st.info(":information_source: Detected an Illumina **v2** sample sheet: validating against the **BCL Convert** specification.")
|
|
351
|
+
else:
|
|
352
|
+
st.info(":information_source: Detected an Illumina **v1** sample sheet: validating against the **bcl2fastq v2.20** specification.")
|
|
353
|
+
|
|
354
|
+
try:
|
|
355
|
+
# st.write(f"This sample sheet contains {len(iem.df)} samples")
|
|
356
|
+
iem.validate()
|
|
357
|
+
except SystemExit as err:
|
|
358
|
+
st.header("Validation Results", divider="blue")
|
|
359
|
+
msg = "Error(s) found. :sob: See the message below from Sequana for details."
|
|
360
|
+
st.error(msg)
|
|
361
|
+
st.info(err)
|
|
362
|
+
else:
|
|
363
|
+
st.header("Validation Results", divider="blue")
|
|
364
|
+
# emoji within div do not seem to work
|
|
365
|
+
msg = ":champagne: Your sample sheet looks correct. :champagne:"
|
|
366
|
+
st.success(msg)
|
|
367
|
+
# =============================================================== validation
|
|
368
|
+
st.subheader("Details about the checks", divider="blue")
|
|
369
|
+
checks = iem.checker()
|
|
370
|
+
|
|
371
|
+
msgs = print_checks(checks)
|
|
372
|
+
|
|
373
|
+
# =============================================================== original file
|
|
374
|
+
st.subheader("Original file", divider="blue")
|
|
375
|
+
st.code(samplesheet)
|
|
376
|
+
|
|
377
|
+
# =============================================================== corrected file
|
|
378
|
+
if len(msgs["Error"]):
|
|
379
|
+
st.subheader("Corrected file", divider="blue")
|
|
380
|
+
st.caption(
|
|
381
|
+
"The quick fix below removes extra trailing semicolons. Other types of errors are difficult to correct "
|
|
382
|
+
"automatically, so you will need to fix the file manually. We strongly recommend using the IEM software "
|
|
383
|
+
"from Illumina for that purpose. If you need to edit the file quickly, do not use Excel: although the "
|
|
384
|
+
"extension is usually .csv, an Illumina sample sheet is not a standard CSV file and Excel may corrupt it."
|
|
385
|
+
)
|
|
386
|
+
with tempfile.NamedTemporaryFile(delete=False, mode="w") as fout:
|
|
387
|
+
iem.quick_fix(fout.name)
|
|
388
|
+
fout.close()
|
|
389
|
+
with open(fout.name, "r") as fin:
|
|
390
|
+
|
|
391
|
+
st.download_button(
|
|
392
|
+
label="Download data as CSV",
|
|
393
|
+
data=fin.read(),
|
|
394
|
+
file_name="sample.csv",
|
|
395
|
+
mime="text/csv",
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
# =============================================================== data section
|
|
399
|
+
st.subheader("Data section", divider="blue")
|
|
400
|
+
st.caption(
|
|
401
|
+
"For convenience, the [Data] section is shown below as a parsed table. "
|
|
402
|
+
"Check that the values are consistent with your expectations (for example, that each index appears in a single column)."
|
|
403
|
+
)
|
|
404
|
+
df = iem.df.copy()
|
|
405
|
+
st.write(df)
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
if __name__ == "__main__":
|
|
409
|
+
main()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
IEMFileVersion,5
|
|
3
|
+
Investigator Name,tartampion
|
|
4
|
+
Experiment Name,Daudet
|
|
5
|
+
Date,21/06/2024
|
|
6
|
+
Workflow,GenerateFASTQ
|
|
7
|
+
Application,NextSeq FASTQ Only
|
|
8
|
+
Instrument Type,NextSeq/MiniSeq
|
|
9
|
+
Assay,TruSeq DNA PCR-Free
|
|
10
|
+
Index Adapters,"IDT-ILMN TruSeq DNA UD Indexes (96 Indexes)"
|
|
11
|
+
Description,TEST_FILE
|
|
12
|
+
Chemistry,Amplicon
|
|
13
|
+
|
|
14
|
+
[Reads]
|
|
15
|
+
151
|
|
16
|
+
151
|
|
17
|
+
|
|
18
|
+
[Settings]
|
|
19
|
+
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
|
|
20
|
+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
|
21
|
+
|
|
22
|
+
[Data]
|
|
23
|
+
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,Index_Plate_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
|
|
24
|
+
TagV1,,,,E01,UDI0005,ATCCACTG,UDI0005,ACGCACCT,B18249,
|
|
25
|
+
TagV3,,,,F01,UDI0006,GCTTGTCA,UDI0006,GTATGTTC,B18249,
|
|
26
|
+
TagV4,,,,G01,UDI0007,CAAGCTAG,UDI0007,CGCTATGT,B18249,
|
|
27
|
+
TagV5,,,,H01,UDI0008,TGGATCGA,UDI0008,TATCGCAC,B18249,
|
|
28
|
+
3D+LmeA14,,,,A02,UDI0009,AGTTCAGG,UDI0009,TCTGTTGG,B18249,
|
|
29
|
+
3D+LmeA30,,,,B02,UDI0010,GACCTGAA,UDI0010,CTCACCAA,B18249,
|
|
30
|
+
3D+LmeA56,,,,C02,UDI0011,TCTCTACT,UDI0011,GAACCGCG,B18249,
|
|
31
|
+
3D+LmeA9,,,,D02,UDI0012,CTCTCGTC,UDI0012,AGGTTATA,B18249,
|
|
32
|
+
3D+LmeA19,,,,E02,UDI0013,CCAAGTCT,UDI0013,TCATCCTT,B18249,
|
|
33
|
+
3D+LmeA26,,,,F02,UDI0014,TTGGACTC,UDI0014,CTGCTTCC,B18249,
|
|
34
|
+
3D+LmeA28,,,,G02,UDI0015,GGCTTAAG,UDI0015,GGTCACGA,B18249,
|
|
35
|
+
3D+LmeA42,,,,H02,UDI0016,AATCCGGA,UDI0016,AACTGTAG,B18249,
|
|
36
|
+
3D+LmeA55,,,,A03,UDI0017,TAATACAG,UDI0017,GTGAATAT,B18249,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
[Header];;;;;;
|
|
2
|
+
IEMFileVersion;4;;;
|
|
3
|
+
Investigator Name;;;;
|
|
4
|
+
Experiment Name;160104-SR-310v2-std;;
|
|
5
|
+
Chemistry;Default;;;;
|
|
6
|
+
;;;;;;;;;;
|
|
7
|
+
[Reads];;;;;;;;
|
|
8
|
+
315;;;;
|
|
9
|
+
;;;
|
|
10
|
+
[Settings];;;;
|
|
11
|
+
Adapter;AGATCGGAAGAGCACACGTCTGAACTCCAGTCA;;;
|
|
12
|
+
AdapterRead2;AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT;;;
|
|
13
|
+
;;;
|
|
14
|
+
[Data];;;
|
|
15
|
+
Sample_ID;Sample_Name;I7_Index_ID;index;Sample_Project
|
|
16
|
+
A;;NF01;CGATGT;
|
|
17
|
+
B;;NF03;ACAGTG;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
Local Run Manager Analysis Id,43043
|
|
3
|
+
Experiment Name,MyProject
|
|
4
|
+
Date,2020-01-24
|
|
5
|
+
Module,GenerateFASTQ - 2.0.0
|
|
6
|
+
Workflow,GenerateFASTQ
|
|
7
|
+
Library Prep Kit,Custom
|
|
8
|
+
Description,200123-PE-MP-150
|
|
9
|
+
Chemistry,Default
|
|
10
|
+
|
|
11
|
+
[Reads]
|
|
12
|
+
151
|
|
13
|
+
151
|
|
14
|
+
|
|
15
|
+
[Data]
|
|
16
|
+
Sample_ID,Sample_Name,Description,index,I7_Index_ID,Sample_Project
|
|
17
|
+
412,412,,TGACCA,TGACCA,B3202
|
|
18
|
+
LMD666,LMD666,,CATTTT,CATTTT,B3202
|
|
19
|
+
TaoP3,TaoP3,,ACAGTG,ACAGTG,B3202
|
|
20
|
+
TaoP10,TaoP10,,GTGAAA,GTGAAA,B3202
|
|
21
|
+
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[Header]
|
|
2
|
+
IEMFileVersion,4
|
|
3
|
+
Investigator Name,
|
|
4
|
+
Experiment Name,
|
|
5
|
+
Date,8/23/2016
|
|
6
|
+
Workflow,GenerateFASTQ
|
|
7
|
+
Application,FASTQ Only
|
|
8
|
+
Assay,NEXTFlex-PCRfree
|
|
9
|
+
Description,
|
|
10
|
+
Chemistry,Default
|
|
11
|
+
|
|
12
|
+
[Reads]
|
|
13
|
+
300
|
|
14
|
+
300
|
|
15
|
+
|
|
16
|
+
[Settings]
|
|
17
|
+
Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
|
|
18
|
+
AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
|
|
19
|
+
|
|
20
|
+
[Data]
|
|
21
|
+
Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,Sample_Project,Description
|
|
22
|
+
ID1,,TEST,A01,NF02,TGACCA,,
|
|
23
|
+
ID2,,TEST,A02,NF03,ACAGTG,,
|
|
24
|
+
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[Header],
|
|
2
|
+
FileFormatVersion,2
|
|
3
|
+
RunName,20260616_B21184-18S-PE-MPdbl-151-10-5M
|
|
4
|
+
InstrumentPlatform,MiSeqi100Series
|
|
5
|
+
IndexOrientation,Forward
|
|
6
|
+
AnalysisLocation,Local
|
|
7
|
+
|
|
8
|
+
[Reads]
|
|
9
|
+
Read1Cycles,151
|
|
10
|
+
Read2Cycles,151
|
|
11
|
+
Index1Cycles,10
|
|
12
|
+
Index2Cycles,10
|
|
13
|
+
|
|
14
|
+
[Sequencing_Settings]
|
|
15
|
+
LibraryPrepKits,NexteraXT
|
|
16
|
+
|
|
17
|
+
[BCLConvert_Settings]
|
|
18
|
+
SoftwareVersion,4.4.6
|
|
19
|
+
AdapterRead1,CTGTCTCTTATACACATCT
|
|
20
|
+
AdapterRead2,CTGTCTCTTATACACATCT
|
|
21
|
+
OverrideCycles,R1:Y151;I1:I10;I2:I10;R2:Y151
|
|
22
|
+
FastqCompressionFormat,dragen
|
|
23
|
+
NoLaneSplitting,true
|
|
24
|
+
GenerateFastqcMetrics,true
|
|
25
|
+
|
|
26
|
+
[BCLConvert_Data]
|
|
27
|
+
Sample_ID,Index,Index2
|
|
28
|
+
1,AGGTCAGATA,CTACAAGATA
|
|
29
|
+
2,CGACATCCGA,TACGTTCATT
|
|
30
|
+
3,ATTCCATAAG,TGCCTGGTGG
|
|
31
|
+
4,CACAATAGGA,TCCATCCGAG
|
|
32
|
+
5,AACATCGCGC,GTCCACTTGT
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
[Cloud_Settings]
|
|
36
|
+
GeneratedVersion,1.25.0.202605080250
|
|
37
|
+
|
|
38
|
+
[Cloud_Data]
|
|
39
|
+
Sample_ID,ProjectName,LibraryName,LibraryPrepKitName,IndexAdapterKitName
|
|
40
|
+
1,BXXXX,1_AGGTCAGATA_CTACAAGATA,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
|
|
41
|
+
2,BXXXX,2_CGACATCCGA_TACGTTCATT,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
|
|
42
|
+
3,BXXXX,3_ATTCCATAAG_TGCCTGGTGG,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
|
|
43
|
+
4,BXXXX,4_CACAATAGGA_TCCATCCGAG,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
|
|
44
|
+
5,BXXXX,5_AACATCGCGC_GTCCACTTGT,NexteraXT,IlluminaDNARNAUDISetABCDTagmentation
|
|
Binary file
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: check-my-sample-sheet
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Streamlit web application to validate Illumina sample sheets (bcl2fastq v1 and BCL Convert v2).
|
|
5
|
+
License-Expression: BSD-3-Clause
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: illumina,samplesheet,bcl2fastq,bcl-convert,sequana,ngs
|
|
8
|
+
Author: Thomas Cokelaer
|
|
9
|
+
Author-email: cokelaer@gmail.com
|
|
10
|
+
Requires-Python: >=3.9,<4.0
|
|
11
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
15
|
+
Requires-Dist: requests
|
|
16
|
+
Requires-Dist: sequana (>=0.23.0)
|
|
17
|
+
Requires-Dist: streamlit (>=1.28)
|
|
18
|
+
Requires-Dist: streamlit-option-menu
|
|
19
|
+
Project-URL: Documentation, https://github.com/sequana/webapp_samplesheet
|
|
20
|
+
Project-URL: Homepage, https://github.com/sequana/webapp_samplesheet
|
|
21
|
+
Project-URL: Repository, https://github.com/sequana/webapp_samplesheet
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# Check My Sample Sheet
|
|
25
|
+
|
|
26
|
+
[](https://github.com/sequana/webapp_samplesheet/actions/workflows/tests.yml)
|
|
27
|
+
[](https://github.com/sequana/webapp_samplesheet/releases)
|
|
28
|
+
[](LICENSE)
|
|
29
|
+

|
|
30
|
+
[](https://check-my-sample-sheet.streamlit.app/)
|
|
31
|
+

|
|
32
|
+
|
|
33
|
+
This is a streamlit application that uses Sequana (github.com/sequana/sequana) **iem** modules to check Sample Sheets from Illumina sequencers. Both formats are supported and detected automatically:
|
|
34
|
+
|
|
35
|
+
- **v1** (bcl2fastq): `[Data]` / `[Settings]` sections
|
|
36
|
+
- **v2** (BCL Convert): `[BCLConvert_Data]` / `[BCLConvert_Settings]` sections
|
|
37
|
+
|
|
38
|
+
Running demo is here: https://check-my-sample-sheet.streamlit.app/
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# General Information
|
|
42
|
+
|
|
43
|
+
If you want to contribute to this web application, please provide PR here. Note, however, that the core of the application is within the Sequana project on https://github.com/sequana/sequana/, more specifically in the iem.py module.
|
|
44
|
+
|
|
45
|
+
The sanity checks implemented are based on experience, the bcl2fastq documentation (v2.20) and the BCL Convert specification.
|
|
46
|
+
|
|
47
|
+
# Installation
|
|
48
|
+
|
|
49
|
+
From PyPI:
|
|
50
|
+
|
|
51
|
+
pip install check-my-sample-sheet
|
|
52
|
+
|
|
53
|
+
Then launch the app (opens in your browser); extra arguments are forwarded to
|
|
54
|
+
`streamlit run` (e.g. `--server.port 8502`):
|
|
55
|
+
|
|
56
|
+
check-my-sample-sheet
|
|
57
|
+
|
|
58
|
+
# Local instance (from source)
|
|
59
|
+
|
|
60
|
+
git clone https://github.com/sequana/webapp_samplesheet
|
|
61
|
+
cd webapp_samplesheet
|
|
62
|
+
|
|
63
|
+
# install the dependencies (sequana, streamlit, ...)
|
|
64
|
+
pip install -r requirements.txt
|
|
65
|
+
|
|
66
|
+
# run the application locally in your browser
|
|
67
|
+
streamlit run check_my_sample_sheet/app.py
|
|
68
|
+
|
|
69
|
+
# Running the tests
|
|
70
|
+
|
|
71
|
+
pip install -r requirements-dev.txt
|
|
72
|
+
pytest
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
check_my_sample_sheet/__init__.py,sha256=J-j-u0itpEFT6irdmWmixQqYMadNl1X91TxUmoiLHMI,22
|
|
2
|
+
check_my_sample_sheet/__main__.py,sha256=HyPwQhHdMySpflg7MSdwRcpiOR_mfnL883zaO0HrrLg,499
|
|
3
|
+
check_my_sample_sheet/app.py,sha256=PT-_9rxMqFOpBiO870n6H0Ulw-WTxI5CSz53p4EEZvs,18594
|
|
4
|
+
check_my_sample_sheet/examples/Bad_SampleSheet_alphanum.csv,sha256=Mfs8Tpz7yqfxRJIUHI-dmeTLX3gpHpyeWa29yTQETLs,1349
|
|
5
|
+
check_my_sample_sheet/examples/Bad_SampleSheet_extra_semicolons.csv,sha256=4cDQoAPDNNkpk4pQQUhh5EqjKi8EmFKvEG9l42VutmE,367
|
|
6
|
+
check_my_sample_sheet/examples/case1.csv,sha256=sHNlVbx3WWQ48GpWrMtJS3eCSQZ8senO_HRVMs2PmMM,45
|
|
7
|
+
check_my_sample_sheet/examples/case2.csv,sha256=-uE3hFxcGVcn7eBE2j7fHXxdatbGuW3w4fTbhl3WzFk,76
|
|
8
|
+
check_my_sample_sheet/examples/case3.csv,sha256=ps_mksg7x7BmYD4ZexhqlLc1ilHjgXdDxPj3hA-eETs,211
|
|
9
|
+
check_my_sample_sheet/examples/sample_sheet.csv,sha256=DmK9y12UX7sRKLqMW9fo8NrDqzamTP5A8cDa2FNkN3I,435
|
|
10
|
+
check_my_sample_sheet/examples/sample_sheet_settings_index.csv,sha256=gyYcpksdqUkFLdOzEk5tfAunI8izJK2sYRa_65HHXo0,452
|
|
11
|
+
check_my_sample_sheet/examples/sample_sheet_v2_bclconvert.csv,sha256=gYJAJ8In6NqJvhh6rhbdauOri-BXFbdCcwh1vfB4or8,1216
|
|
12
|
+
check_my_sample_sheet/imgs/logo_256x256.png,sha256=OqmDHAZRHPX0y7YJbsOK6mniWJM0juxvEQjr0iXqz1w,84205
|
|
13
|
+
check_my_sample_sheet-1.0.0.dist-info/METADATA,sha256=S0rK-q6bDI34jqKNvAlV_hwy7zIeiqPiJtcOVDCMNGQ,3173
|
|
14
|
+
check_my_sample_sheet-1.0.0.dist-info/WHEEL,sha256=Vz2fHgx6HFtSwhs8KvkHLqH5Ea4w1_rner5uNVGCeIE,88
|
|
15
|
+
check_my_sample_sheet-1.0.0.dist-info/entry_points.txt,sha256=Nc0qYx9_XYTwTdpHNqaxXZT7lBfwQEIh7ipxXSS954s,77
|
|
16
|
+
check_my_sample_sheet-1.0.0.dist-info/licenses/LICENSE,sha256=lBnvxmIoXrNnIDL1nx_Qjfh7r5LvnE_Mbv1-XvSkxWw,1557
|
|
17
|
+
check_my_sample_sheet-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024, Sequana Development Team (https://sequana.readthedocs.io)
|
|
4
|
+
All rights reserved.
|
|
5
|
+
|
|
6
|
+
Redistribution and use in source and binary forms, with or without
|
|
7
|
+
modification, are permitted provided that the following conditions are met:
|
|
8
|
+
|
|
9
|
+
* Redistributions of source code must retain the above copyright notice, this
|
|
10
|
+
list of conditions and the following disclaimer.
|
|
11
|
+
|
|
12
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
|
13
|
+
this list of conditions and the following disclaimer in the documentation
|
|
14
|
+
and/or other materials provided with the distribution.
|
|
15
|
+
|
|
16
|
+
* Neither the name of the copyright holder nor the names of its
|
|
17
|
+
contributors may be used to endorse or promote products derived from
|
|
18
|
+
this software without specific prior written permission.
|
|
19
|
+
|
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
21
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
22
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
23
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
24
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
25
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
26
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
27
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
28
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
29
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|