qlever 0.5.11__py3-none-any.whl → 0.5.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.dblp +1 -1
- qlever/Qleverfiles/Qleverfile.pubchem +102 -26
- qlever/Qleverfiles/Qleverfile.uniprot +48 -16
- qlever/Qleverfiles/Qleverfile.wikidata +1 -3
- qlever/commands/add_text_index.py +2 -1
- qlever/commands/cache_stats.py +1 -1
- qlever/commands/clear_cache.py +4 -2
- qlever/commands/example_queries.py +120 -50
- qlever/commands/get_data.py +1 -1
- qlever/commands/index.py +148 -77
- qlever/commands/index_stats.py +90 -59
- qlever/commands/log.py +12 -2
- qlever/commands/query.py +66 -27
- qlever/commands/setup_config.py +1 -1
- qlever/commands/start.py +9 -3
- qlever/commands/status.py +2 -1
- qlever/commands/stop.py +4 -6
- qlever/commands/system_info.py +1 -1
- qlever/commands/ui.py +3 -1
- qlever/commands/warmup.py +1 -1
- qlever/qlever_main.py +16 -9
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/METADATA +1 -1
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/RECORD +27 -27
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/LICENSE +0 -0
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/WHEEL +0 -0
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/top_level.txt +0 -0
|
@@ -17,7 +17,7 @@ FORMAT = ttl
|
|
|
17
17
|
|
|
18
18
|
[index]
|
|
19
19
|
INPUT_FILES = *.gz
|
|
20
|
-
MULTI_INPUT_JSON =
|
|
20
|
+
MULTI_INPUT_JSON = { "cmd": "zcat {}", "for-each": "*.gz" }
|
|
21
21
|
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 5000000, "prefixes-external": [""] }
|
|
22
22
|
|
|
23
23
|
[server]
|
|
@@ -1,51 +1,127 @@
|
|
|
1
|
-
# Qleverfile for PubChem, use with
|
|
1
|
+
# Qleverfile for PubChem, use with the QLever CLI (`pip install qlever`)
|
|
2
2
|
#
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
# qlever get-data # ~2 hours, ~150 GB, ~19 billion triples
|
|
6
|
-
# qlever index # ~7 hours, ~20 GB RAM, ~400 GB disk space
|
|
3
|
+
# qlever get-data # ~2 hours, ~120 GB, ~19 billion triples
|
|
4
|
+
# qlever index # ~6 hours, ~20 GB RAM, ~350 GB disk space (for the index)
|
|
7
5
|
# qlever start # a few seconds
|
|
8
6
|
#
|
|
9
|
-
#
|
|
7
|
+
# Measured on an AMD Ryzen 9 7950X with 128 GB RAM, and NVMe SSD (17.12.2024)
|
|
8
|
+
#
|
|
9
|
+
# NOTE 1: `qlever get-data` does not only download the PubChem RDF data, but also
|
|
10
10
|
# a number of ontologies. These are very useful to obtain names for IRIs like
|
|
11
11
|
# `sio:SIO_000008` or `obo:IAO_0000412` (otherwise very hard to understand).
|
|
12
|
-
# The ontologies BAO and NDF-RT are
|
|
12
|
+
# The ontologies BAO and NDF-RT are occasionally updated; for latest versions,
|
|
13
13
|
# see the download links at https://bioportal.bioontology.org/ontologies/BAO
|
|
14
14
|
# and https://bioportal.bioontology.org/ontologies/NDF-RT .
|
|
15
|
-
#
|
|
16
|
-
# NOTE 2: Many of the TTL files have generic prefix definitions in the middle
|
|
17
|
-
# of the file, like @prefix ns23: <http://identifiers.org/biocyc/ARACYC:> .
|
|
18
|
-
# See https://github.com/ad-freiburg/qlever/issues/711#issuecomment-1197113953
|
|
19
|
-
# This is allowed by the standard, but unusual. For use with QLever, we
|
|
20
|
-
# therefore convert the TTL files to NT when downloading them.
|
|
21
15
|
#
|
|
22
|
-
# NOTE
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
# about such IRIs while indexing, but accept them anyway.
|
|
16
|
+
# NOTE 2: The `MULTI_INPUT_JSON` zcats selected files together in one input
|
|
17
|
+
# stream because there are too many files and the command line triggered by
|
|
18
|
+
# `qlever index` would be too long otherwise.
|
|
26
19
|
|
|
27
20
|
[data]
|
|
28
21
|
NAME = pubchem
|
|
29
22
|
GET_DATA_URL = ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF
|
|
23
|
+
ONTOLOGIES_DIR = RDF.ontologies
|
|
24
|
+
PUBCHEM_DIR = RDF.pubchem
|
|
25
|
+
ONTOLOGIES_CSV = ontologies.csv
|
|
30
26
|
CHECK_REQUIREMENTS = for CMD in docker parallel; do $$CMD --version >/dev/null 2>&1 || (echo "Requires \"$$CMD\", please install it"; false); done
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
GET_DATA_CMD = ${CHECK_REQUIREMENTS} && ${
|
|
27
|
+
GET_DATA_CMD_1 = mkdir -p ${ONTOLOGIES_DIR} && cd ${ONTOLOGIES_DIR} && cat ${ONTOLOGIES_CSV} | parallel --colsep "," 'FILE={2} && URL={3} && ERRFILE=$${FILE%.*}.jena-stderr; echo "Processing $$URL ($$FILE) ..." && curl -sLRo $$FILE $$URL && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$FILE 2> $$ERRFILE | gzip -c > $${FILE%.*}.nt.gz && rm -f $$FILE; if [ -s $$ERRFILE ]; then grep -q "ERROR *riot" $$ERRFILE && echo "riot ERRORs in $$FILE, check $$ERRFILE"; else rm $$ERRFILE; fi'
|
|
28
|
+
GET_DATA_CMD_2 = mkdir -p ${PUBCHEM_DIR} && wget -r -nv -nH --cut-dirs=2 --no-parent -P ${PUBCHEM_DIR} ${GET_DATA_URL}
|
|
29
|
+
GET_DATA_CMD = ${CHECK_REQUIREMENTS} && ${GET_DATA_CMD_1} 2>&1 | tee pubchem.get-data-log.txt; ${GET_DATA_CMD_2} 2>&1 | tee -a pubchem.get-data-log.txt
|
|
34
30
|
VERSION = $$(date -r void.ttl +%d.%m.%Y || echo "NO_DATE")
|
|
35
31
|
DESCRIPTION = PubChem RDF from ${GET_DATA_URL} (version ${VERSION}) + associated ontologies (bao, bfo, biopax-level3, chebi, cheminf, cito, dublin_core_terms, fabio, go, iao, ncit, obi, pr, ro, sio, skos, so, uo)
|
|
36
|
-
MAKE_ONTOLOGIES_CSV = $$(mkdir -p
|
|
32
|
+
MAKE_ONTOLOGIES_CSV = $$(mkdir -p ${ONTOLOGIES_DIR} && echo "BAO - BioAssay Ontology,bao.owl,https://data.bioontology.org/ontologies/BAO/submissions/56/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nBFO - Basic Formal Ontology,bfo.owl,http://purl.obolibrary.org/obo/bfo.owl\nBioPAX - biological pathway data,bp.owl,http://www.biopax.org/release/biopax-level3.owl\nCHEMINF - Chemical Information Ontology,cheminf.owl,http://purl.obolibrary.org/obo/cheminf.owl\nChEBI - Chemical Entities of Biological Interest,chebi.owl,http://purl.obolibrary.org/obo/chebi.owl\nCiTO,cito.nt,http://purl.org/spar/cito.nt\nDCMI Terms,dcterms.nt,https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.nt\nFaBiO,fabio.nt,http://purl.org/spar/fabio.nt\nGO - Gene Ontology,go.owl,http://purl.obolibrary.org/obo/go.owl\nIAO - Information Artifact Ontology,iao.owl,http://purl.obolibrary.org/obo/iao.owl\nNCIt,ncit.owl,http://purl.obolibrary.org/obo/ncit.owl\nNDF-RT,ndfrt.owl,https://data.bioontology.org/ontologies/NDF-RT/submissions/1/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nOBI - Ontology for Biomedical Investigations,obi.owl,http://purl.obolibrary.org/obo/obi.owl\nOWL,owl.ttl,http://www.w3.org/2002/07/owl.ttl\nPDBo,pdbo.owl,http://rdf.wwpdb.org/schema/pdbx-v40.owl\nPR - PRotein Ontology (PRO),pr.owl,http://purl.obolibrary.org/obo/pr.owl\nRDF Schema,rdfs.ttl,https://www.w3.org/2000/01/rdf-schema.ttl\nRDF,rdf.ttl,http://www.w3.org/1999/02/22-rdf-syntax-ns.ttl\nRO - Relation Ontology,ro.owl,http://purl.obolibrary.org/obo/ro.owl\nSIO - Semanticscience Integrated Ontology,sio.owl,http://semanticscience.org/ontology/sio.owl\nSKOS,skos.rdf,http://www.w3.org/TR/skos-reference/skos.rdf\nSO - Sequence types and features ontology,so.owl,http://purl.obolibrary.org/obo/so.owl\nUO - Units of measurement ontology,uo.owl,http://purl.obolibrary.org/obo/uo.owl" > ${ONTOLOGIES_DIR}/${ONTOLOGIES_CSV})
|
|
37
33
|
|
|
38
34
|
[index]
|
|
39
|
-
INPUT_FILES
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
35
|
+
INPUT_FILES = ${data:ONTOLOGIES_DIR}/*.nt.gz ${data:PUBCHEM_DIR}/*/*.ttl.gz ${data:PUBCHEM_DIR}/*/*/*.ttl.gz
|
|
36
|
+
BASE_URL = http://rdf.ncbi.nlm.nih.gov/pubchem
|
|
37
|
+
MULTI_INPUT_JSON = [{ "cmd": "zcat ${data:ONTOLOGIES_DIR}/*.nt.gz", "graph": "${BASE_URL}/ruleset"},
|
|
38
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/anatomy", "for-each": "${data:PUBCHEM_DIR}/anatomy/*.ttl.gz" },
|
|
39
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/author", "for-each": "${data:PUBCHEM_DIR}/author/*.ttl.gz" },
|
|
40
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/bioassay", "for-each": "${data:PUBCHEM_DIR}/bioassay/*.ttl.gz" },
|
|
41
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/book", "for-each": "${data:PUBCHEM_DIR}/book/*.ttl.gz" },
|
|
42
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/cell", "for-each": "${data:PUBCHEM_DIR}/cell/*.ttl.gz" },
|
|
43
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*0.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
44
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*1.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
45
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*2.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
46
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*3.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
47
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*4.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
48
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*5.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
49
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*6.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
50
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*7.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
51
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*8.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
52
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*9.ttl.gz", "graph": "${BASE_URL}/compound" },
|
|
53
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/compound", "for-each": "${data:PUBCHEM_DIR}/compound/general/*[!0-9].ttl.gz" },
|
|
54
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/concept", "for-each": "${data:PUBCHEM_DIR}/concept/*.ttl.gz" },
|
|
55
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/conserveddomain", "for-each": "${data:PUBCHEM_DIR}/conserveddomain/*.ttl.gz" },
|
|
56
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/cooccurrence", "for-each": "${data:PUBCHEM_DIR}/cooccurrence/*.ttl.gz" },
|
|
57
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*0.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
58
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*1.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
59
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*2.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
60
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*3.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
61
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*4.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
62
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*5.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
63
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*6.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
64
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*7.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
65
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*8.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
66
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*9.ttl.gz", "graph": "${BASE_URL}/descriptor" },
|
|
67
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/descriptor", "for-each": "${data:PUBCHEM_DIR}/descriptor/compound/*[!0-9].ttl.gz" },
|
|
68
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/descriptor", "for-each": "${data:PUBCHEM_DIR}/descriptor/substance/*.ttl.gz" },
|
|
69
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/disease", "for-each": "${data:PUBCHEM_DIR}/disease/*.ttl.gz" },
|
|
70
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/endpoint", "for-each": "${data:PUBCHEM_DIR}/endpoint/*.ttl.gz" },
|
|
71
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/gene", "for-each": "${data:PUBCHEM_DIR}/gene/*.ttl.gz"},
|
|
72
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/grant", "for-each": "${data:PUBCHEM_DIR}/grant/*.ttl.gz" },
|
|
73
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/inchikey", "for-each": "${data:PUBCHEM_DIR}/inchikey/*.ttl.gz" },
|
|
74
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/journal", "for-each": "${data:PUBCHEM_DIR}/journal/*.ttl.gz" },
|
|
75
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/measuregroup", "for-each": "${data:PUBCHEM_DIR}/measuregroup/*.ttl.gz" },
|
|
76
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/organization", "for-each": "${data:PUBCHEM_DIR}/organization/*.ttl.gz" },
|
|
77
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*0.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
78
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*1.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
79
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*2.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
80
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*3.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
81
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*4.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
82
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*5.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
83
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*6.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
84
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*7.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
85
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*8.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
86
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*9.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
87
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/cpc/*.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
88
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/patent/ipc/*.ttl.gz", "graph": "${BASE_URL}/patent" },
|
|
89
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/patent", "for-each": "${data:PUBCHEM_DIR}/patent/*[!0-9].ttl.gz" },
|
|
90
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/pathway", "for-each": "${data:PUBCHEM_DIR}/pathway/*.ttl.gz" },
|
|
91
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/protein", "for-each": "${data:PUBCHEM_DIR}/protein/*.ttl.gz" },
|
|
92
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*0.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
93
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*1.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
94
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*2.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
95
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*3.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
96
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*4.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
97
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*5.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
98
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*6.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
99
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*7.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
100
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*8.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
101
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*9.ttl.gz", "graph": "${BASE_URL}/reference" },
|
|
102
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/reference", "for-each": "${data:PUBCHEM_DIR}/reference/*[!0-9].ttl.gz" },
|
|
103
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/source", "for-each": "${data:PUBCHEM_DIR}/source/*.ttl.gz" },
|
|
104
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*0.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
105
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*1.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
106
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*2.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
107
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*3.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
108
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*4.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
109
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*5.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
110
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*6.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
111
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*7.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
112
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*8.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
113
|
+
{ "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*9.ttl.gz", "graph": "${BASE_URL}/substance" },
|
|
114
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/substance", "for-each": "${data:PUBCHEM_DIR}/substance/*[!0-9].ttl.gz" },
|
|
115
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/synonym", "for-each": "${data:PUBCHEM_DIR}/synonym/*.ttl.gz" },
|
|
116
|
+
{ "cmd": "zcat {}", "graph": "${BASE_URL}/taxonomy", "for-each": "${data:PUBCHEM_DIR}/taxonomy/*.ttl.gz" }]
|
|
117
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "num-triples-per-batch": 10000000 }
|
|
118
|
+
STXXL_MEMORY = 20G
|
|
43
119
|
|
|
44
120
|
[server]
|
|
45
121
|
PORT = 7023
|
|
46
122
|
ACCESS_TOKEN = ${data:NAME}
|
|
47
123
|
MEMORY_FOR_QUERIES = 20G
|
|
48
|
-
TIMEOUT =
|
|
124
|
+
TIMEOUT = 600s
|
|
49
125
|
|
|
50
126
|
[runtime]
|
|
51
127
|
SYSTEM = docker
|
|
@@ -1,30 +1,62 @@
|
|
|
1
1
|
# Qleverfile for UniProt, use with https://github.com/ad-freiburg/qlever-control
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data # takes ~ 30 hours and ~
|
|
4
|
-
# qlever index # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9
|
|
3
|
+
# qlever get-data # takes ~ 30 hours and ~ 1.6 TB of disk (for the TTL files)
|
|
4
|
+
# qlever index # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 9950X)
|
|
5
5
|
# qlever start # starts the server (takes a few seconds)
|
|
6
6
|
#
|
|
7
|
-
# Install packages: sudo apt install -y libxml2-utils parallel xz-utils
|
|
7
|
+
# Install packages: sudo apt install -y libxml2-utils parallel xz-utils wget
|
|
8
8
|
# Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
|
|
9
9
|
#
|
|
10
10
|
# Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
|
|
11
|
-
# during build, ~ 3 TB after build).
|
|
12
|
-
# HDD without significant performance loss (when running the server).
|
|
11
|
+
# during build, ~ 3 TB after build).
|
|
13
12
|
|
|
14
13
|
[data]
|
|
15
|
-
NAME
|
|
16
|
-
DATE
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
14
|
+
NAME = uniprot
|
|
15
|
+
DATE = 2024-11-27
|
|
16
|
+
RDFXML_DIR = rdf.${DATE}
|
|
17
|
+
TTL_DIR = ttl.${DATE}
|
|
18
|
+
UNIPROT_URL = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
|
|
19
|
+
RHEA_URL = https://ftp.expasy.org/databases/rhea/rdf
|
|
20
|
+
EXAMPLES_URL = https://github.com/sib-swiss/sparql-examples
|
|
21
|
+
GET_EXAMPLES_CMD = mkdir -p ${TTL_DIR} && git clone ${EXAMPLES_URL} && (cd sparql-examples && ./convertToOneTurtle.sh -p uniprot && gzip examples_uniprot.ttl && mv -f examples_uniprot.ttl.gz ../${TTL_DIR} && cd .. && rm -rf sparql-examples)
|
|
22
|
+
GET_RDFXML_CMD = mkdir -p ${RDFXML_DIR} && (echo "${RHEA_URL}/chebi.owl.gz"; echo "${RHEA_URL}/rhea.rdf.gz"; curl -s ${UNIPROT_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" -) | while read URL; do wget --no-verbose -P ${RDFXML_DIR} $$URL 2>&1 | tee -a uniprot.download-log; done
|
|
23
|
+
RDFXML2TTL_CMD = mkdir -p ${TTL_DIR} && for RDFXML in ${RDFXML_DIR}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=ttl -q 2> ${TTL_DIR}/$$(basename $$RDFXML).stderr | gzip -c > ${TTL_DIR}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/ttl.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
|
|
24
|
+
GET_DATA_CMD = date > ${NAME}.get-data.begin-date && ${GET_EXAMPLES_CMD} && ${GET_RDFXML_CMD} && ${RDFXML2TTL_CMD} && date > ${NAME}.get-data.end-date
|
|
25
|
+
DESCRIPTION = Complete UniProt data from ${UNIPROT_URL}, with additional data from ${RHEA_URL} and ${EXAMPLES_URL}
|
|
22
26
|
|
|
23
27
|
[index]
|
|
24
|
-
INPUT_FILES
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
INPUT_FILES = ${data:TTL_DIR}/*.ttl.gz
|
|
29
|
+
MULTI_INPUT_JSON = [{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniprot", "for-each": "${data:TTL_DIR}/uniprotkb_reviewed_*.ttl.gz" },
|
|
30
|
+
{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniprot", "for-each": "${data:TTL_DIR}/uniprotkb_unreviewed_*.ttl.gz" },
|
|
31
|
+
{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniparc", "for-each": "${data:TTL_DIR}/uniparc_*.ttl.gz" },
|
|
32
|
+
{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniref", "for-each": "${data:TTL_DIR}/uniref*.ttl.gz" },
|
|
33
|
+
{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/obsolete", "for-each": "${data:TTL_DIR}/uniprotkb_obsolete_*.ttl.gz" },
|
|
34
|
+
{ "cmd": "zcat ${data:TTL_DIR}/chebi.ttl.gz", "graph": "http://sparql.uniprot.org/chebi" },
|
|
35
|
+
{ "cmd": "zcat ${data:TTL_DIR}/citation_mapping.ttl.gz", "graph": "http://sparql.uniprot.org/citationmapping" },
|
|
36
|
+
{ "cmd": "zcat ${data:TTL_DIR}/citations.ttl.gz", "graph": "http://sparql.uniprot.org/citations" },
|
|
37
|
+
{ "cmd": "zcat ${data:TTL_DIR}/databases.ttl.gz", "graph": "http://sparql.uniprot.org/databases" },
|
|
38
|
+
{ "cmd": "zcat ${data:TTL_DIR}/diseases.ttl.gz", "graph": "http://sparql.uniprot.org/diseases" },
|
|
39
|
+
{ "cmd": "zcat ${data:TTL_DIR}/enzyme-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/enzymes" },
|
|
40
|
+
{ "cmd": "zcat ${data:TTL_DIR}/enzyme.ttl.gz", "graph": "http://sparql.uniprot.org/enzymes" },
|
|
41
|
+
{ "cmd": "zcat ${data:TTL_DIR}/go-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/go" },
|
|
42
|
+
{ "cmd": "zcat ${data:TTL_DIR}/go.ttl.gz", "graph": "http://sparql.uniprot.org/go" },
|
|
43
|
+
{ "cmd": "zcat ${data:TTL_DIR}/journals.ttl.gz", "graph": "http://sparql.uniprot.org/journal" },
|
|
44
|
+
{ "cmd": "zcat ${data:TTL_DIR}/keywords-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/keywords" },
|
|
45
|
+
{ "cmd": "zcat ${data:TTL_DIR}/keywords.ttl.gz", "graph": "http://sparql.uniprot.org/keywords" },
|
|
46
|
+
{ "cmd": "zcat ${data:TTL_DIR}/locations-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/locations" },
|
|
47
|
+
{ "cmd": "zcat ${data:TTL_DIR}/locations.ttl.gz", "graph": "http://sparql.uniprot.org/locations" },
|
|
48
|
+
{ "cmd": "zcat ${data:TTL_DIR}/pathways-hierarchy*.ttl.gz", "graph": "http://sparql.uniprot.org/pathways" },
|
|
49
|
+
{ "cmd": "zcat ${data:TTL_DIR}/pathways.ttl.gz", "graph": "http://sparql.uniprot.org/pathways" },
|
|
50
|
+
{ "cmd": "zcat ${data:TTL_DIR}/proteomes.ttl.gz", "graph": "http://sparql.uniprot.org/proteomes" },
|
|
51
|
+
{ "cmd": "zcat ${data:TTL_DIR}/taxonomy-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/taxonomy" },
|
|
52
|
+
{ "cmd": "zcat ${data:TTL_DIR}/taxonomy.ttl.gz", "graph": "http://sparql.uniprot.org/taxonomy" },
|
|
53
|
+
{ "cmd": "zcat ${data:TTL_DIR}/tissues.ttl.gz", "graph": "http://sparql.uniprot.org/tissues" },
|
|
54
|
+
{ "cmd": "zcat ${data:TTL_DIR}/rhea.ttl.gz", "graph": "https://sparql.rhea-db.org/rhea" },
|
|
55
|
+
{ "cmd": "zcat ${data:TTL_DIR}/examples_uniprot.ttl.gz", "graph": "http://sparql.uniprot.org/.well-known/sparql-examples" },
|
|
56
|
+
{ "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" },
|
|
57
|
+
{ "cmd": "zcat ${data:TTL_DIR}/void.ttl.gz", "graph": "http://rdfs.org/ns/void" }]
|
|
58
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
|
|
59
|
+
STXXL_MEMORY = 60G
|
|
28
60
|
|
|
29
61
|
[server]
|
|
30
62
|
PORT = 7018
|
|
@@ -16,8 +16,7 @@ GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
|
|
|
16
16
|
GET_DATA_CMD = curl -LRC - -O ${GET_DATA_URL}/latest-all.ttl.bz2 -O ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1 | tee wikidata.download-log.txt && curl -sL ${GET_DATA_URL}/dcatap.rdf | docker run -i --rm -v $$(pwd):/data stain/jena riot --syntax=RDF/XML --output=NT /dev/stdin > dcatap.nt
|
|
17
17
|
DATE_WIKIDATA = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
|
|
18
18
|
DATE_WIKIPEDIA = $$(date -r wikipedia-abstracts.nt +%d.%m.%Y || echo "NO_DATE")
|
|
19
|
-
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA})
|
|
20
|
-
TEXT_DESCRIPTION = All English and German literals + all sentences from the English Wikipedia (version ${DATE_WIKIPEDIA}), use with FILTER KEYWORDS(...)
|
|
19
|
+
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA})
|
|
21
20
|
|
|
22
21
|
[index]
|
|
23
22
|
INPUT_FILES = latest-all.ttl.bz2 latest-lexemes.ttl.bz2 dcatap.nt
|
|
@@ -26,7 +25,6 @@ MULTI_INPUT_JSON = [{ "cmd": "lbzcat -n 4 latest-all.ttl.bz2", "format": "ttl",
|
|
|
26
25
|
{ "cmd": "cat dcatap.nt", "format": "nt", "parallel": "false" }]
|
|
27
26
|
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
|
|
28
27
|
STXXL_MEMORY = 10G
|
|
29
|
-
TEXT_INDEX = from_text_records
|
|
30
28
|
|
|
31
29
|
[server]
|
|
32
30
|
PORT = 7001
|
|
@@ -64,7 +64,7 @@ class AddTextIndexCommand(QleverCommand):
|
|
|
64
64
|
# Show the command line.
|
|
65
65
|
self.show(add_text_index_cmd, only_show=args.show)
|
|
66
66
|
if args.show:
|
|
67
|
-
return
|
|
67
|
+
return True
|
|
68
68
|
|
|
69
69
|
# When running natively, check if the binary exists and works.
|
|
70
70
|
if args.system == "native":
|
|
@@ -74,6 +74,7 @@ class AddTextIndexCommand(QleverCommand):
|
|
|
74
74
|
log.error(f"Running \"{args.index_binary}\" failed ({e}), "
|
|
75
75
|
f"set `--index-binary` to a different binary or "
|
|
76
76
|
f"use `--container_system`")
|
|
77
|
+
return False
|
|
77
78
|
|
|
78
79
|
# Check if text index files already exist.
|
|
79
80
|
existing_text_index_files = get_existing_index_files(
|
qlever/commands/cache_stats.py
CHANGED
qlever/commands/clear_cache.py
CHANGED
|
@@ -48,7 +48,7 @@ class ClearCacheCommand(QleverCommand):
|
|
|
48
48
|
f"\"{args.access_token}\"")
|
|
49
49
|
self.show(clear_cache_cmd, only_show=args.show)
|
|
50
50
|
if args.show:
|
|
51
|
-
return
|
|
51
|
+
return True
|
|
52
52
|
|
|
53
53
|
# Execute the command.
|
|
54
54
|
try:
|
|
@@ -76,5 +76,7 @@ class ClearCacheCommand(QleverCommand):
|
|
|
76
76
|
# Show cache stats.
|
|
77
77
|
log.info("")
|
|
78
78
|
args.detailed = False
|
|
79
|
-
CacheStatsCommand().execute(args)
|
|
79
|
+
if not CacheStatsCommand().execute(args):
|
|
80
|
+
log.error("Clearing the cache was successful, but showing the "
|
|
81
|
+
"cache stats failed {e}")
|
|
80
82
|
return True
|
|
@@ -21,10 +21,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
def __init__(self):
|
|
24
|
-
|
|
25
|
-
"virtuoso-wikidata": "https://wikidata.demo.openlinksw.com/sparql",
|
|
26
|
-
"qlever-wikidata": "https://qlever.cs.uni-freiburg.de/api/wikidata",
|
|
27
|
-
}
|
|
24
|
+
pass
|
|
28
25
|
|
|
29
26
|
def description(self) -> str:
|
|
30
27
|
return "Show how much of the cache is currently being used"
|
|
@@ -41,8 +38,15 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
41
38
|
)
|
|
42
39
|
subparser.add_argument(
|
|
43
40
|
"--sparql-endpoint-preset",
|
|
44
|
-
choices=
|
|
45
|
-
|
|
41
|
+
choices=[
|
|
42
|
+
"https://qlever.dev/api/wikidata",
|
|
43
|
+
"https://qlever.dev/api/uniprot",
|
|
44
|
+
"https://qlever.dev/api/pubchem",
|
|
45
|
+
"https://qlever.dev/api/osm-planet",
|
|
46
|
+
"https://wikidata.demo.openlinksw.com/sparql",
|
|
47
|
+
"https://sparql.uniprot.org/sparql",
|
|
48
|
+
],
|
|
49
|
+
help="SPARQL endpoint from fixed list (to save typing)",
|
|
46
50
|
)
|
|
47
51
|
subparser.add_argument(
|
|
48
52
|
"--get-queries-cmd",
|
|
@@ -86,7 +90,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
86
90
|
"application/sparql-results+json",
|
|
87
91
|
"text/turtle",
|
|
88
92
|
],
|
|
89
|
-
default="
|
|
93
|
+
default="application/sparql-results+json",
|
|
90
94
|
help="Accept header for the SPARQL query",
|
|
91
95
|
)
|
|
92
96
|
subparser.add_argument(
|
|
@@ -98,7 +102,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
98
102
|
subparser.add_argument(
|
|
99
103
|
"--width-query-description",
|
|
100
104
|
type=int,
|
|
101
|
-
default=
|
|
105
|
+
default=70,
|
|
102
106
|
help="Width for printing the query description",
|
|
103
107
|
)
|
|
104
108
|
subparser.add_argument(
|
|
@@ -113,6 +117,32 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
113
117
|
default=14,
|
|
114
118
|
help="Width for printing the result size",
|
|
115
119
|
)
|
|
120
|
+
subparser.add_argument(
|
|
121
|
+
"--show-query",
|
|
122
|
+
choices=["always", "never", "on-error"],
|
|
123
|
+
default="never",
|
|
124
|
+
help="Show the queries that will be executed (always, never, on error)",
|
|
125
|
+
)
|
|
126
|
+
subparser.add_argument(
|
|
127
|
+
"--show-prefixes",
|
|
128
|
+
action="store_true",
|
|
129
|
+
default=False,
|
|
130
|
+
help="When showing the query, also show the prefixes",
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def pretty_print_query(self, query: str, show_prefixes: bool) -> None:
|
|
134
|
+
remove_prefixes_cmd = " | sed '/^PREFIX /Id'" if not show_prefixes else ""
|
|
135
|
+
pretty_print_query_cmd = (
|
|
136
|
+
f"echo {shlex.quote(query)}"
|
|
137
|
+
f" | docker run -i --rm sparqling/sparql-formatter"
|
|
138
|
+
f"{remove_prefixes_cmd} | grep -v '^$'"
|
|
139
|
+
)
|
|
140
|
+
try:
|
|
141
|
+
query_pp = run_command(pretty_print_query_cmd, return_output=True)
|
|
142
|
+
log.info(colored(query_pp.rstrip(), "cyan"))
|
|
143
|
+
except Exception as e:
|
|
144
|
+
log.error(f"Failed to pretty-print query: {e}")
|
|
145
|
+
log.info(colored(query.rstrip(), "cyan"))
|
|
116
146
|
|
|
117
147
|
def execute(self, args) -> bool:
|
|
118
148
|
# We can't have both `--remove-offset-and-limit` and `--limit`.
|
|
@@ -135,9 +165,8 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
135
165
|
return False
|
|
136
166
|
|
|
137
167
|
# Handle shotcuts for SPARQL endpoint.
|
|
138
|
-
if args.sparql_endpoint_preset
|
|
139
|
-
args.sparql_endpoint =
|
|
140
|
-
args.ui_config = args.sparql_endpoint_preset.split("-")[1]
|
|
168
|
+
if args.sparql_endpoint_preset:
|
|
169
|
+
args.sparql_endpoint = args.sparql_endpoint_preset
|
|
141
170
|
|
|
142
171
|
# Limit only works with full result.
|
|
143
172
|
if args.limit and args.download_or_count == "count":
|
|
@@ -178,7 +207,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
178
207
|
only_show=args.show,
|
|
179
208
|
)
|
|
180
209
|
if args.show:
|
|
181
|
-
return
|
|
210
|
+
return True
|
|
182
211
|
|
|
183
212
|
# Get the example queries.
|
|
184
213
|
try:
|
|
@@ -210,8 +239,11 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
210
239
|
if args.clear_cache == "yes":
|
|
211
240
|
args.server_url = sparql_endpoint
|
|
212
241
|
args.complete = False
|
|
242
|
+
clear_cache_successful = False
|
|
213
243
|
with mute_log():
|
|
214
|
-
ClearCacheCommand().execute(args)
|
|
244
|
+
clear_cache_successful = ClearCacheCommand().execute(args)
|
|
245
|
+
if not clear_cache_successful:
|
|
246
|
+
log.warn("Failed to clear the cache")
|
|
215
247
|
|
|
216
248
|
# Remove OFFSET and LIMIT (after the last closing bracket).
|
|
217
249
|
if args.remove_offset_and_limit or args.limit:
|
|
@@ -262,6 +294,9 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
262
294
|
# A bit of pretty-printing.
|
|
263
295
|
query = re.sub(r"\s+", " ", query)
|
|
264
296
|
query = re.sub(r"\s*\.\s*\}", " }", query)
|
|
297
|
+
if args.show_query == "always":
|
|
298
|
+
log.info("")
|
|
299
|
+
self.pretty_print_query(query, args.show_prefixes)
|
|
265
300
|
|
|
266
301
|
# Launch query.
|
|
267
302
|
try:
|
|
@@ -282,55 +317,81 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
282
317
|
params={"query": query},
|
|
283
318
|
result_file=result_file,
|
|
284
319
|
).strip()
|
|
285
|
-
if http_code
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
320
|
+
if http_code == "200":
|
|
321
|
+
time_seconds = time.time() - start_time
|
|
322
|
+
error_msg = None
|
|
323
|
+
else:
|
|
324
|
+
error_msg = {
|
|
325
|
+
"short": f"HTTP code: {http_code}",
|
|
326
|
+
"long": re.sub(r"\s+", " ", Path(result_file).read_text()),
|
|
327
|
+
}
|
|
291
328
|
except Exception as e:
|
|
292
329
|
if args.log_level == "DEBUG":
|
|
293
330
|
traceback.print_exc()
|
|
294
|
-
error_msg =
|
|
331
|
+
error_msg = {
|
|
332
|
+
"short": "Exception",
|
|
333
|
+
"long": re.sub(r"\s+", " ", str(e)),
|
|
334
|
+
}
|
|
295
335
|
|
|
296
336
|
# Get result size (via the command line, in order to avoid loading
|
|
297
337
|
# a potentially large JSON file into Python, which is slow).
|
|
298
338
|
if error_msg is None:
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
339
|
+
# CASE 0: Rhe result is empty despite a 200 HTTP code.
|
|
340
|
+
if Path(result_file).stat().st_size == 0:
|
|
341
|
+
result_size = 0
|
|
342
|
+
error_msg = {
|
|
343
|
+
"short": "Empty result",
|
|
344
|
+
"long": "curl returned with code 200, "
|
|
345
|
+
"but the result is empty",
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
# CASE 1: Just counting the size of the result (TSV or JSON).
|
|
349
|
+
elif args.download_or_count == "count":
|
|
350
|
+
if args.accept == "text/tab-separated-values":
|
|
351
|
+
result_size = run_command(
|
|
352
|
+
f"sed 1d {result_file}", return_output=True
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
try:
|
|
306
356
|
result_size = run_command(
|
|
307
357
|
f'jq -r ".results.bindings[0]'
|
|
308
358
|
f" | to_entries[0].value.value"
|
|
309
359
|
f' | tonumber" {result_file}',
|
|
310
360
|
return_output=True,
|
|
311
361
|
)
|
|
362
|
+
except Exception as e:
|
|
363
|
+
error_msg = {
|
|
364
|
+
"short": "Malformed JSON",
|
|
365
|
+
"long": "curl returned with code 200, "
|
|
366
|
+
"but the JSON is malformed: "
|
|
367
|
+
+ re.sub(r"\s+", " ", str(e)),
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
# CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
|
|
371
|
+
else:
|
|
372
|
+
if (
|
|
373
|
+
args.accept == "text/tab-separated-values"
|
|
374
|
+
or args.accept == "text/csv"
|
|
375
|
+
):
|
|
376
|
+
result_size = run_command(
|
|
377
|
+
f"sed 1d {result_file} | wc -l", return_output=True
|
|
378
|
+
)
|
|
379
|
+
elif args.accept == "text/turtle":
|
|
380
|
+
result_size = run_command(
|
|
381
|
+
f"sed '1d;/^@prefix/d;/^\\s*$/d' " f"{result_file} | wc -l",
|
|
382
|
+
return_output=True,
|
|
383
|
+
)
|
|
312
384
|
else:
|
|
313
|
-
|
|
314
|
-
args.accept == "text/tab-separated-values"
|
|
315
|
-
or args.accept == "text/csv"
|
|
316
|
-
):
|
|
317
|
-
result_size = run_command(
|
|
318
|
-
f"sed 1d {result_file} | wc -l", return_output=True
|
|
319
|
-
)
|
|
320
|
-
elif args.accept == "text/turtle":
|
|
321
|
-
result_size = run_command(
|
|
322
|
-
f"sed '1d;/^@prefix/d;/^\\s*$/d' "
|
|
323
|
-
f"{result_file} | wc -l",
|
|
324
|
-
return_output=True,
|
|
325
|
-
)
|
|
326
|
-
else:
|
|
385
|
+
try:
|
|
327
386
|
result_size = run_command(
|
|
328
387
|
f'jq -r ".results.bindings | length"' f" {result_file}",
|
|
329
388
|
return_output=True,
|
|
330
389
|
)
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
390
|
+
except Exception as e:
|
|
391
|
+
error_msg = {
|
|
392
|
+
"short": "Malformed JSON",
|
|
393
|
+
"long": re.sub(r"\s+", " ", str(e)),
|
|
394
|
+
}
|
|
334
395
|
|
|
335
396
|
# Remove the result file (unless in debug mode).
|
|
336
397
|
if args.log_level != "DEBUG":
|
|
@@ -341,6 +402,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
341
402
|
description = description[: args.width_query_description - 3]
|
|
342
403
|
description += "..."
|
|
343
404
|
if error_msg is None:
|
|
405
|
+
result_size = int(result_size)
|
|
344
406
|
log.info(
|
|
345
407
|
f"{description:<{args.width_query_description}} "
|
|
346
408
|
f"{time_seconds:6.2f} s "
|
|
@@ -352,16 +414,24 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
352
414
|
num_failed += 1
|
|
353
415
|
if (
|
|
354
416
|
args.width_error_message > 0
|
|
355
|
-
and len(error_msg) > args.width_error_message
|
|
417
|
+
and len(error_msg["long"]) > args.width_error_message
|
|
356
418
|
and args.log_level != "DEBUG"
|
|
419
|
+
and args.show_query != "on-error"
|
|
357
420
|
):
|
|
358
|
-
error_msg =
|
|
359
|
-
|
|
360
|
-
|
|
421
|
+
error_msg["long"] = (
|
|
422
|
+
error_msg["long"][: args.width_error_message - 3] + "..."
|
|
423
|
+
)
|
|
424
|
+
seperator_short_long = "\n" if args.show_query == "on-error" else " "
|
|
425
|
+
log.info(
|
|
361
426
|
f"{description:<{args.width_query_description}} "
|
|
362
|
-
f"
|
|
363
|
-
f"{colored(error_msg, 'red')}"
|
|
427
|
+
f"{colored('FAILED ', 'red')}"
|
|
428
|
+
f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
|
|
429
|
+
f"{seperator_short_long}"
|
|
430
|
+
f"{colored(error_msg['long'], 'red')}"
|
|
364
431
|
)
|
|
432
|
+
if args.show_query == "on-error":
|
|
433
|
+
self.pretty_print_query(query, args.show_prefixes)
|
|
434
|
+
log.info("")
|
|
365
435
|
|
|
366
436
|
# Check that each query has a time and a result size, or it failed.
|
|
367
437
|
assert len(result_sizes) == len(query_times)
|