bfabric-scripts 1.13.40__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bfabric_scripts/__init__.py +0 -0
- bfabric_scripts/bfabric_executable_submitter_functionalTest.py +60 -0
- bfabric_scripts/bfabric_executable_submitter_gridengine.py +65 -0
- bfabric_scripts/bfabric_executable_submitter_slurm.py +54 -0
- bfabric_scripts/bfabric_executable_wrappercreator.py +39 -0
- bfabric_scripts/bfabric_feeder_mascot.py +295 -0
- bfabric_scripts/bfabric_flask.py +238 -0
- bfabric_scripts/bfabric_list_not_existing_storage_directories.py +54 -0
- bfabric_scripts/bfabric_list_workunit_parameters.py +124 -0
- bfabric_scripts/bfabric_logthis.py +38 -0
- bfabric_scripts/bfabric_read.py +145 -0
- bfabric_scripts/bfabric_read_samples_from_dataset.py +75 -0
- bfabric_scripts/bfabric_read_samples_of_workunit.py +101 -0
- bfabric_scripts/bfabric_save_csv2dataset.py +85 -0
- bfabric_scripts/bfabric_save_dataset2csv.py +74 -0
- bfabric_scripts/bfabric_save_fasta.py +81 -0
- bfabric_scripts/bfabric_save_importresource_sample.py +133 -0
- bfabric_scripts/bfabric_save_link_to_workunit.py +40 -0
- bfabric_scripts/bfabric_save_resource_description.py +27 -0
- bfabric_scripts/bfabric_save_workflowstep.py +107 -0
- bfabric_scripts/bfabric_save_workunit_attribute.py +38 -0
- bfabric_scripts/bfabric_setExternalJobStatus_done.py +44 -0
- bfabric_scripts/bfabric_setResourceStatus_available.py +50 -0
- bfabric_scripts/bfabric_setWorkunitStatus.py +40 -0
- bfabric_scripts/bfabric_slurm_queue_status.py +97 -0
- bfabric_scripts/bfabric_upload_resource.py +43 -0
- bfabric_scripts/bfabric_upload_submitter_executable.py +70 -0
- bfabric_scripts/bfabric_wrapper_creator_yaml.py +38 -0
- bfabric_scripts/cli/__init__.py +0 -0
- bfabric_scripts/cli/__main__.py +25 -0
- bfabric_scripts/cli/api/__init__.py +0 -0
- bfabric_scripts/cli/api/create.py +38 -0
- bfabric_scripts/cli/api/delete.py +52 -0
- bfabric_scripts/cli/api/inspect.py +127 -0
- bfabric_scripts/cli/api/namespaces.py +10 -0
- bfabric_scripts/cli/api/parser.py +170 -0
- bfabric_scripts/cli/api/query_repr.py +59 -0
- bfabric_scripts/cli/api/read.py +167 -0
- bfabric_scripts/cli/api/update.py +65 -0
- bfabric_scripts/cli/cli_api.py +14 -0
- bfabric_scripts/cli/cli_dataset.py +10 -0
- bfabric_scripts/cli/cli_executable.py +10 -0
- bfabric_scripts/cli/cli_external_job.py +53 -0
- bfabric_scripts/cli/cli_feeder.py +115 -0
- bfabric_scripts/cli/cli_workunit.py +10 -0
- bfabric_scripts/cli/dataset/__init__.py +0 -0
- bfabric_scripts/cli/dataset/download.py +49 -0
- bfabric_scripts/cli/dataset/show.py +54 -0
- bfabric_scripts/cli/dataset/upload.py +125 -0
- bfabric_scripts/cli/executable/__init__.py +0 -0
- bfabric_scripts/cli/executable/dump.py +79 -0
- bfabric_scripts/cli/executable/show.py +39 -0
- bfabric_scripts/cli/executable/upload.py +75 -0
- bfabric_scripts/cli/external_job/__init__.py +0 -0
- bfabric_scripts/cli/external_job/upload_submitter_executable.py +69 -0
- bfabric_scripts/cli/external_job/upload_wrapper_creator_executable.py +22 -0
- bfabric_scripts/cli/workunit/__init__.py +0 -0
- bfabric_scripts/cli/workunit/export_definition.py +21 -0
- bfabric_scripts/cli/workunit/not_available.py +120 -0
- bfabric_scripts/feeder/__init__.py +0 -0
- bfabric_scripts/feeder/file_attributes.py +52 -0
- bfabric_scripts/feeder/path_convention_compms.py +61 -0
- bfabric_scripts/feeder/report.py +39 -0
- bfabric_scripts/fgcz_maxquant_scaffold-wrapper.py +165 -0
- bfabric_scripts/fgcz_maxquant_wrapper.py +634 -0
- bfabric_scripts/fgcz_sge_maxquant_linux.bash +69 -0
- bfabric_scripts/optional_features.py +23 -0
- bfabric_scripts/py.typed +0 -0
- bfabric_scripts-1.13.40.dist-info/METADATA +53 -0
- bfabric_scripts-1.13.40.dist-info/RECORD +72 -0
- bfabric_scripts-1.13.40.dist-info/WHEEL +4 -0
- bfabric_scripts-1.13.40.dist-info/entry_points.txt +23 -0
|
File without changes
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Submitter for B-Fabric functional test
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Copyright (C) 2014,2015 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved.
|
|
8
|
+
# Modified to submit to the Slurm scheduler on 2020-09-28
|
|
9
|
+
#
|
|
10
|
+
# Authors:
|
|
11
|
+
# Christian Panse <cp@fgcz.ethz.ch>
|
|
12
|
+
# Maria d'Errico <maria.derrico@fgcz.ethz.ch>
|
|
13
|
+
#
|
|
14
|
+
# Licensed under GPL version 3
|
|
15
|
+
#
|
|
16
|
+
#
|
|
17
|
+
# @name: bfabric_executable_submitter_functionalTest.py
|
|
18
|
+
# @description: this script is a dummy submitter executable used by the bfabricPy functional test
|
|
19
|
+
# @context: SUBMITTER
|
|
20
|
+
# @oarameter: string, q, bfabric,true,true,queue,this is a queue
|
|
21
|
+
# @version: $Rev: 1232 $
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
this is a dummy submitter executable
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
example:
|
|
28
|
+
|
|
29
|
+
python bfabric_executable_submitter_functionalTest.py -j 45864
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# import os
|
|
34
|
+
# import sys
|
|
35
|
+
from optparse import OptionParser
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def main() -> None:
|
|
39
|
+
parser = OptionParser(usage="usage: %prog -j <externaljobid>", version="%prog 1.0")
|
|
40
|
+
|
|
41
|
+
parser.add_option(
|
|
42
|
+
"-j",
|
|
43
|
+
"--externaljobid",
|
|
44
|
+
type="int",
|
|
45
|
+
action="store",
|
|
46
|
+
dest="externaljobid",
|
|
47
|
+
default=None,
|
|
48
|
+
help="external job id is required.",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
(options, args) = parser.parse_args()
|
|
52
|
+
|
|
53
|
+
if not options.externaljobid:
|
|
54
|
+
parser.error("option '-j' is required.")
|
|
55
|
+
|
|
56
|
+
print("Dummy submitter xecutable defined for the bfabricPy functional test")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
if __name__ == "__main__":
|
|
60
|
+
main()
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Submitter for B-Fabric
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Copyright (C) 2014,2015 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved.
|
|
8
|
+
#
|
|
9
|
+
# Authors:
|
|
10
|
+
# Marco Schmid <marco.schmidt@fgcz.ethz.ch>
|
|
11
|
+
# Christian Panse <cp@fgcz.ethz.ch>
|
|
12
|
+
# Maria d'Errico <maria.derrico@fgcz.ethz.ch>
|
|
13
|
+
#
|
|
14
|
+
# Licensed under GPL version 3
|
|
15
|
+
#
|
|
16
|
+
#
|
|
17
|
+
# @name: bfabric_executable_submitter_gridengine.py
|
|
18
|
+
# @description: this script submits to the Grid Engine scheduler
|
|
19
|
+
# @context: SUBMITTER
|
|
20
|
+
# @oarameter: string, q, bfabric,true,true,queue,this is a queue
|
|
21
|
+
# @version: $Rev: 1232 $
|
|
22
|
+
|
|
23
|
+
"""
|
|
24
|
+
this is the submitter executable
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
example:
|
|
28
|
+
|
|
29
|
+
python bfabric_executable_submitter_gridengine.py -j 45864
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# import os
|
|
34
|
+
# import sys
|
|
35
|
+
from optparse import OptionParser
|
|
36
|
+
from bfabric import BfabricSubmitter
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def main() -> None:
|
|
40
|
+
parser = OptionParser(usage="usage: %prog -j <externaljobid>", version="%prog 1.0")
|
|
41
|
+
|
|
42
|
+
parser.add_option(
|
|
43
|
+
"-j",
|
|
44
|
+
"--externaljobid",
|
|
45
|
+
type="int",
|
|
46
|
+
action="store",
|
|
47
|
+
dest="externaljobid",
|
|
48
|
+
default=None,
|
|
49
|
+
help="external job id is required.",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
(options, args) = parser.parse_args()
|
|
53
|
+
|
|
54
|
+
if not options.externaljobid:
|
|
55
|
+
parser.error("option '-j' is required.")
|
|
56
|
+
|
|
57
|
+
bfapp = BfabricSubmitter(externaljobid=options.externaljobid)
|
|
58
|
+
|
|
59
|
+
bfapp.submitter_yaml()
|
|
60
|
+
# TODO(cp): fix that
|
|
61
|
+
# print(bfapp.query_counter)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
if __name__ == "__main__":
|
|
65
|
+
main()
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Submitter for B-Fabric
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from argparse import ArgumentParser
|
|
8
|
+
|
|
9
|
+
from bfabric import Bfabric
|
|
10
|
+
from bfabric.wrapper_creator.bfabric_submitter import BfabricSubmitter
|
|
11
|
+
|
|
12
|
+
# Copyright (C) 2014,2015 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved.
|
|
13
|
+
# Modified to submit to the Slurm scheduler on 2020-09-28
|
|
14
|
+
#
|
|
15
|
+
# Authors:
|
|
16
|
+
# Marco Schmid <marco.schmidt@fgcz.ethz.ch>
|
|
17
|
+
# Christian Panse <cp@fgcz.ethz.ch>
|
|
18
|
+
# Maria d'Errico <maria.derrico@fgcz.ethz.ch>
|
|
19
|
+
#
|
|
20
|
+
# Licensed under GPL version 3
|
|
21
|
+
#
|
|
22
|
+
#
|
|
23
|
+
# @name: bfabric_executable_submitter_slurm.py
|
|
24
|
+
# @description: this script submits to the Slurm scheduler
|
|
25
|
+
# @context: SUBMITTER
|
|
26
|
+
# @oarameter: string, q, bfabric,true,true,queue,this is a queue
|
|
27
|
+
# @version: $Rev: 1232 $
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
this is the submitter executable
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
example:
|
|
34
|
+
|
|
35
|
+
python bfabric_executable_submitter_slurm.py -j 45864
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def main() -> None:
|
|
40
|
+
parser = ArgumentParser(help="Submitter for B-Fabric")
|
|
41
|
+
parser.add_argument("-j", "--externaljobid", type=int)
|
|
42
|
+
args = parser.parse_args()
|
|
43
|
+
client = Bfabric.connect()
|
|
44
|
+
bfapp = BfabricSubmitter(
|
|
45
|
+
client=client,
|
|
46
|
+
externaljobid=args.externaljobid,
|
|
47
|
+
scheduleroot="/usr/",
|
|
48
|
+
scheduler="Slurm",
|
|
49
|
+
)
|
|
50
|
+
bfapp.submitter_yaml()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if __name__ == "__main__":
|
|
54
|
+
main()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
A wrapper_creator for B-Fabric
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
this code generates a yaml configuaration file
|
|
8
|
+
|
|
9
|
+
example
|
|
10
|
+
|
|
11
|
+
wrapper_creator_yaml.py -j 45631
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
# Copyright (C) 2014 Functional Genomics Center Zurich ETHZ|UZH. All rights reserved.
|
|
16
|
+
#
|
|
17
|
+
# Authors:
|
|
18
|
+
# Christian Panse <cp@fgcz.ethz.ch>
|
|
19
|
+
#
|
|
20
|
+
# Licensed under GPL version 3
|
|
21
|
+
#
|
|
22
|
+
# $HeadURL: http://fgcz-svn/repos/scripts/trunk/linux/bfabric/apps/python/wrapper_creator_yaml.py $
|
|
23
|
+
# $Id: wrapper_creator_yaml.py 2397 2016-09-06 07:04:35Z cpanse $
|
|
24
|
+
|
|
25
|
+
import sys
|
|
26
|
+
from bfabric import BfabricWrapperCreator
|
|
27
|
+
|
|
28
|
+
if __name__ == "__main__":
|
|
29
|
+
externaljobid = -1
|
|
30
|
+
|
|
31
|
+
if len(sys.argv) == 3 and sys.argv[1] == "-j" and int(sys.argv[2]) > 0:
|
|
32
|
+
externaljobid = int(sys.argv[2])
|
|
33
|
+
else:
|
|
34
|
+
print("usage: " + sys.argv[0] + " -j <externaljobid>")
|
|
35
|
+
sys.exit(1)
|
|
36
|
+
|
|
37
|
+
bfapp = BfabricWrapperCreator(externaljobid=externaljobid)
|
|
38
|
+
|
|
39
|
+
bfapp.write_yaml()
|
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
# Author
|
|
4
|
+
2012-10-08 Christian Panse <cp@fgcz.ethz.ch>
|
|
5
|
+
2012-10-10 Christian Panse <cp@fgcz.ethz.ch>
|
|
6
|
+
2012-10-11 Christian Panse <cp@fgcz.ethz.ch>
|
|
7
|
+
2021-01-06 Christian Panse <cp@fgcz.ethz.ch> - replace multiprocess by caching strategy
|
|
8
|
+
2023-10-20 Christian Panse <cp@fgcz.ethz.ch> - add timestamp
|
|
9
|
+
|
|
10
|
+
# Usage
|
|
11
|
+
|
|
12
|
+
find /usr/local/mascot/data/ -type f -mtime -1 -name "*dat" \
|
|
13
|
+
| /home/cpanse/__checkouts/bfabricPy/bfabric/scripts/bfabric_feeder_mascot.py --stdin
|
|
14
|
+
|
|
15
|
+
# Crontab
|
|
16
|
+
0 0 * * 7 nice -19 /usr/local/fgcz-s-018/bfabric-feeder/run_fgcz_dataFeederMascot.bash 365 2>&1 >/dev/null
|
|
17
|
+
3 */2 * * 1-6 nice -19 /usr/local/fgcz-s-018/bfabric-feeder/run_fgcz_dataFeederMascot.bash 7 2>&1 >/dev/null
|
|
18
|
+
*/7 5-22 * * 1-5 nice -19 /usr/local/fgcz-s-018/bfabric-feeder/run_fgcz_dataFeederMascot.bash 1 2>&1 >/dev/null
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import argparse
|
|
24
|
+
import hashlib
|
|
25
|
+
import itertools
|
|
26
|
+
import json
|
|
27
|
+
import os
|
|
28
|
+
import re
|
|
29
|
+
import sys
|
|
30
|
+
import urllib
|
|
31
|
+
from collections import Counter
|
|
32
|
+
from datetime import datetime
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import Any
|
|
35
|
+
|
|
36
|
+
from suds.client import Client
|
|
37
|
+
|
|
38
|
+
workuniturl = "http://fgcz-bfabric.uzh.ch/bfabric/workunit?wsdl"
|
|
39
|
+
clientWorkUnit = Client(workuniturl)
|
|
40
|
+
BFLOGIN = "pfeeder"
|
|
41
|
+
BFPASSWORD = "!ForYourEyesOnly!"
|
|
42
|
+
|
|
43
|
+
DB = {}
|
|
44
|
+
DBfilename = Path.home() / "mascot.json"
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
with DBfilename.open() as file:
|
|
48
|
+
DB = json.load(file)
|
|
49
|
+
print(
|
|
50
|
+
"Read {len} data items from {name} using {size:.1f} GBytes.".format(
|
|
51
|
+
len=len(DB),
|
|
52
|
+
name=DBfilename,
|
|
53
|
+
size=sum(map(lambda x: int(x["resource"]["size"]), DB.values())) / (1024 * 1024 * 1024),
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
except OSError:
|
|
57
|
+
print(f"loading '{DBfilename}' failed")
|
|
58
|
+
pass
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def query_mascot_result(file_path: str) -> bool:
|
|
62
|
+
db_written = False
|
|
63
|
+
print(f"{datetime.now()} input>")
|
|
64
|
+
print(f"\t{file_path}")
|
|
65
|
+
if file_path in DB:
|
|
66
|
+
print("\thit")
|
|
67
|
+
wu = DB[file_path]
|
|
68
|
+
if "workunitid" in wu:
|
|
69
|
+
print(f"\tdat file {file_path} already registered as workunit id {wu['workunitid']}. continue ...")
|
|
70
|
+
return
|
|
71
|
+
else:
|
|
72
|
+
print("\tno workunitid found")
|
|
73
|
+
else:
|
|
74
|
+
print(f"\tparsing mascot result file '{file_path}'...")
|
|
75
|
+
wu = parse_mascot_result_file(file_path)
|
|
76
|
+
print(f"\tupdating cache '{DBfilename}' file ...")
|
|
77
|
+
db_written = True
|
|
78
|
+
DB[file_path] = wu
|
|
79
|
+
|
|
80
|
+
if len(wu["inputresource"]) > 0:
|
|
81
|
+
if re.search("autoQC4L", wu["name"]) or re.search("autoQC01", wu["name"]):
|
|
82
|
+
print(f"WARNING This script ignores autoQC based mascot dat file {file_path}.")
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
print("\tquerying bfabric ...")
|
|
86
|
+
|
|
87
|
+
# just in case
|
|
88
|
+
if "errorreport" in wu:
|
|
89
|
+
del wu["errorreport"]
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
resultClientWorkUnit = clientWorkUnit.service.checkandinsert(
|
|
93
|
+
dict(login=BFLOGIN, password=BFPASSWORD, workunit=wu)
|
|
94
|
+
)
|
|
95
|
+
except ValueError:
|
|
96
|
+
print(f"Exception {ValueError}")
|
|
97
|
+
raise
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
rv = resultClientWorkUnit.workunit[0]
|
|
101
|
+
except ValueError:
|
|
102
|
+
print(f"Exception {ValueError}")
|
|
103
|
+
raise
|
|
104
|
+
|
|
105
|
+
print(f"{datetime.now()} output>")
|
|
106
|
+
if "errorreport" in rv:
|
|
107
|
+
print(f"\tfound errorreport '{rv['errorreport']}'.")
|
|
108
|
+
|
|
109
|
+
if "_id" in rv:
|
|
110
|
+
wu["workunitid"] = rv["_id"]
|
|
111
|
+
print(f"\tfound workunitid'{wu['workunitid']}'.")
|
|
112
|
+
DB[file_path] = wu
|
|
113
|
+
db_written = True
|
|
114
|
+
|
|
115
|
+
if "_id" not in rv and "errorreport" not in rv:
|
|
116
|
+
print("something went wrong.")
|
|
117
|
+
raise
|
|
118
|
+
# print(resultClientWorkUnit)
|
|
119
|
+
# print("exception for file {} with error {}".format(f, e))
|
|
120
|
+
|
|
121
|
+
return db_written
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
"""
|
|
125
|
+
parse the mascot dat file and extract meta data and title information for inputresource retrival
|
|
126
|
+
it returns a 'workunit' dict for the following web api
|
|
127
|
+
|
|
128
|
+
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:end="http://endpoint.webservice.component.bfabric.org/">
|
|
129
|
+
<soapenv:Header/>
|
|
130
|
+
<soapenv:Body>
|
|
131
|
+
<end:checkandinsert>
|
|
132
|
+
<parameters>
|
|
133
|
+
<login>?</login>
|
|
134
|
+
<password>?</password>
|
|
135
|
+
<!--Zero or more repetitions:-->
|
|
136
|
+
<workunit>
|
|
137
|
+
<!--Optional:-->
|
|
138
|
+
<applicationid>?</applicationid>
|
|
139
|
+
<!--Optional:-->
|
|
140
|
+
<projectid>?</projectid>
|
|
141
|
+
<!--Optional:-->
|
|
142
|
+
<name>?</name>
|
|
143
|
+
<!--Optional:-->
|
|
144
|
+
<description>?</description>
|
|
145
|
+
<!--Zero or more repetitions:-->
|
|
146
|
+
<inputresource>
|
|
147
|
+
<!--Optional:-->
|
|
148
|
+
<storageid>?</storageid>
|
|
149
|
+
<!--Optional:-->
|
|
150
|
+
<relativepath>?</relativepath>
|
|
151
|
+
</inputresource>
|
|
152
|
+
<!--Zero or more repetitions:-->
|
|
153
|
+
<resource>
|
|
154
|
+
<!--Optional:-->
|
|
155
|
+
<name>?</name>
|
|
156
|
+
<!--Optional:-->
|
|
157
|
+
<storageid>?</storageid>
|
|
158
|
+
<!--Optional:-->
|
|
159
|
+
<relativepath>?</relativepath>
|
|
160
|
+
<!--Optional:-->
|
|
161
|
+
<weburl>?</weburl>
|
|
162
|
+
<!--Optional:-->
|
|
163
|
+
<size>?</size>
|
|
164
|
+
<!--Optional:-->
|
|
165
|
+
<filechecksum>?</filechecksum>
|
|
166
|
+
</resource>
|
|
167
|
+
</workunit>
|
|
168
|
+
</parameters>
|
|
169
|
+
</end:checkandinsert>
|
|
170
|
+
</soapenv:Body>
|
|
171
|
+
</soapenv:Envelope>
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def parse_mascot_result_file(file_path: str) -> dict[str, Any]:
|
|
176
|
+
# Getting the current date and time
|
|
177
|
+
print(f"{datetime.now()} DEBUG parse_mascot_result_file")
|
|
178
|
+
|
|
179
|
+
regex0 = re.compile("^title=.*(p([0-9]+).+Proteomics.*(raw|RAW|wiff)).*")
|
|
180
|
+
regex3 = re.compile(
|
|
181
|
+
"^(FILE|COM|release|USERNAME|USERID|TOL|TOLU|ITOL|ITOLU|MODS|IT_MODS|CHARGE|INSTRUMENT|QUANTITATION|DECOY)=(.+)$"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
control_chars = "".join(map(chr, itertools.chain(range(0x00, 0x20), range(0x7F, 0xA0))))
|
|
185
|
+
control_char_re = re.compile(f"[{re.escape(control_chars)}]")
|
|
186
|
+
|
|
187
|
+
line_count = 0
|
|
188
|
+
meta_data_dict = dict(
|
|
189
|
+
COM="",
|
|
190
|
+
FILE="",
|
|
191
|
+
release="",
|
|
192
|
+
relativepath=file_path.replace("/usr/local/mascot/", ""),
|
|
193
|
+
)
|
|
194
|
+
inputresourceHitHash = dict()
|
|
195
|
+
inputresourceList = list()
|
|
196
|
+
md5 = hashlib.md5()
|
|
197
|
+
project = -1
|
|
198
|
+
desc = ""
|
|
199
|
+
with Path(file_path).open() as dat:
|
|
200
|
+
for line in dat:
|
|
201
|
+
line_count = line_count + 1
|
|
202
|
+
md5.update(line.encode())
|
|
203
|
+
# check if the first character of the line is a 't' for title to save regex time
|
|
204
|
+
if line[0] == "t":
|
|
205
|
+
result = regex0.match(urllib.parse.unquote(line.strip()).replace("\\", "/").replace("//", "/"))
|
|
206
|
+
if result and result.group(1) not in inputresourceHitHash:
|
|
207
|
+
inputresourceHitHash[result.group(1)] = result.group(2)
|
|
208
|
+
inputresourceList.append(dict(storageid=2, relativepath=result.group(1)))
|
|
209
|
+
project = result.group(2)
|
|
210
|
+
else:
|
|
211
|
+
# nothing as do be done since the input_resource is already recorded
|
|
212
|
+
pass
|
|
213
|
+
elif line_count < 600:
|
|
214
|
+
# none of the regex3 pattern is starting with 't'
|
|
215
|
+
# result = regex3.match(urllib.url2pathname(line.strip()))
|
|
216
|
+
result = regex3.match(urllib.parse.unquote(line.strip()))
|
|
217
|
+
if result:
|
|
218
|
+
desc = desc + result.group(1) + "=" + result.group(2) + "; "
|
|
219
|
+
meta_data_dict[result.group(1)] = result.group(2)
|
|
220
|
+
|
|
221
|
+
desc = desc.encode("ascii", errors="ignore")
|
|
222
|
+
name = f"{meta_data_dict['COM']}; {os.path.basename(meta_data_dict['relativepath'])}"[:255]
|
|
223
|
+
rv = dict(
|
|
224
|
+
applicationid=19,
|
|
225
|
+
containerid=project,
|
|
226
|
+
name=control_char_re.sub("", name),
|
|
227
|
+
description=control_char_re.sub("", desc.decode()),
|
|
228
|
+
inputresource=inputresourceList,
|
|
229
|
+
resource=dict(
|
|
230
|
+
name=meta_data_dict["relativepath"],
|
|
231
|
+
storageid=4,
|
|
232
|
+
status="available",
|
|
233
|
+
relativepath=meta_data_dict["relativepath"],
|
|
234
|
+
size=os.path.getsize(file_path),
|
|
235
|
+
filechecksum=md5.hexdigest(),
|
|
236
|
+
),
|
|
237
|
+
)
|
|
238
|
+
# TODO
|
|
239
|
+
|
|
240
|
+
print(f"{datetime.now()}")
|
|
241
|
+
print(rv)
|
|
242
|
+
print("DEBUG END")
|
|
243
|
+
|
|
244
|
+
return rv
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def print_project_frequency(project_numbers: list[int | str]) -> None:
|
|
248
|
+
"""Prints the frequency of the project numbers in the list, assuming they are either integers or strings of
|
|
249
|
+
individual integers."""
|
|
250
|
+
count = Counter(project_numbers)
|
|
251
|
+
for key in sorted(count.keys(), key=int):
|
|
252
|
+
print(f"p{key}\t{count[key]}")
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def print_statistics() -> None:
|
|
256
|
+
"""Prints statistics about the provided database."""
|
|
257
|
+
print("Statistics ...")
|
|
258
|
+
print(f"len(DB)\t=\t{len(DB)}")
|
|
259
|
+
print_project_frequency(map(lambda x: x["containerid"], DB.values()))
|
|
260
|
+
print(
|
|
261
|
+
"file size\t=\t{} GBytes".format(
|
|
262
|
+
sum(map(lambda x: int(x["resource"]["size"]), DB.values())) / (1024 * 1024 * 1024)
|
|
263
|
+
)
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def main() -> None:
|
|
268
|
+
"""Parses the CLI arguments and calls the appropriate functions."""
|
|
269
|
+
parser = argparse.ArgumentParser()
|
|
270
|
+
group = parser.add_mutually_exclusive_group(required=True)
|
|
271
|
+
group.add_argument("--stdin", action="store_true", help="read file names from stdin")
|
|
272
|
+
group.add_argument("--file", type=str, help="processes the provided file")
|
|
273
|
+
parser.add_argument("--statistics", action="store_true", help="print statistics")
|
|
274
|
+
|
|
275
|
+
args = parser.parse_args()
|
|
276
|
+
|
|
277
|
+
db_written = False
|
|
278
|
+
if args.stdin:
|
|
279
|
+
print("reading file names from stdin ...")
|
|
280
|
+
for filename in sys.stdin.readlines():
|
|
281
|
+
db_written = query_mascot_result(filename.strip()) or db_written
|
|
282
|
+
elif args.file:
|
|
283
|
+
print("processesing", args.file, "...")
|
|
284
|
+
db_written = query_mascot_result(args.file)
|
|
285
|
+
if args.statistics:
|
|
286
|
+
print_statistics()
|
|
287
|
+
|
|
288
|
+
if db_written:
|
|
289
|
+
print(f"dumping json file '{DBfilename}' ...")
|
|
290
|
+
with DBfilename.open("w") as file:
|
|
291
|
+
json.dump(DB, file, sort_keys=True, indent=4)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
if __name__ == "__main__":
|
|
295
|
+
main()
|