qlever 0.5.12__py3-none-any.whl → 0.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

@@ -17,7 +17,7 @@ FORMAT = ttl
17
17
 
18
18
  [index]
19
19
  INPUT_FILES = *.gz
20
- MULTI_INPUT_JSON = $$(ls *.gz | xargs -I {} echo '{ "cmd": "zcat {}" }')
20
+ MULTI_INPUT_JSON = { "cmd": "zcat {}", "for-each": "*.gz" }
21
21
  SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 5000000, "prefixes-external": [""] }
22
22
 
23
23
  [server]
@@ -1,51 +1,127 @@
1
- # Qleverfile for PubChem, use with https://github.com/ad-freiburg/qlever-control
1
+ # Qleverfile for PubChem, use with the QLever CLI (`pip install qlever`)
2
2
  #
3
- # Resource requirements (as of 18.08.2024, on an AMD Ryzen 9 5900X):
4
- #
5
- # qlever get-data # ~2 hours, ~150 GB, ~19 billion triples
6
- # qlever index # ~7 hours, ~20 GB RAM, ~400 GB disk space
3
+ # qlever get-data # ~2 hours, ~120 GB, ~19 billion triples
4
+ # qlever index # ~6 hours, ~20 GB RAM, ~350 GB disk space (for the index)
7
5
  # qlever start # a few seconds
8
6
  #
9
- # NOTE 1: `get-data` does not only download the PubChem RDF data, but also
7
+ # Measured on an AMD Ryzen 9 7950X with 128 GB RAM, and NVMe SSD (17.12.2024)
8
+ #
9
+ # NOTE 1: `qlever get-data` does not only download the PubChem RDF data, but also
10
10
  # a number of ontologies. These are very useful to obtain names for IRIs like
11
11
  # `sio:SIO_000008` or `obo:IAO_0000412` (otherwise very hard to understand).
12
- # The ontologies BAO and NDF-RT are infrequently updated, for latest versions,
12
+ # The ontologies BAO and NDF-RT are occasionally updated; for latest versions,
13
13
  # see the download links at https://bioportal.bioontology.org/ontologies/BAO
14
14
  # and https://bioportal.bioontology.org/ontologies/NDF-RT .
15
- #
16
- # NOTE 2: Many of the TTL files have generic prefix definitions in the middle
17
- # of the file, like @prefix ns23: <http://identifiers.org/biocyc/ARACYC:> .
18
- # See https://github.com/ad-freiburg/qlever/issues/711#issuecomment-1197113953
19
- # This is allowed by the standard, but unusual. For use with QLever, we
20
- # therefore convert the TTL files to NT when downloading them.
21
15
  #
22
- # NOTE 3: The PubChem data contains several invalid IRIs, in particular,
23
- # containing spaces. The previous version of this Qleverfile used a combination
24
- # of `sed` and `awk` to fix this. In the meantime, QLever's default is to warn
25
- # about such IRIs while indexing, but accept them anyway.
16
+ # NOTE 2: The `MULTI_INPUT_JSON` zcats selected files together in one input
17
+ # stream because there are too many files and the command line triggered by
18
+ # `qlever index` would be too long otherwise.
26
19
 
27
20
  [data]
28
21
  NAME = pubchem
29
22
  GET_DATA_URL = ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF
23
+ ONTOLOGIES_DIR = RDF.ontologies
24
+ PUBCHEM_DIR = RDF.pubchem
25
+ ONTOLOGIES_CSV = ontologies.csv
30
26
  CHECK_REQUIREMENTS = for CMD in docker parallel; do $$CMD --version >/dev/null 2>&1 || (echo "Requires \"$$CMD\", please install it"; false); done
31
- MAKE_GET_DATA_CMD_1 = DIR=DATA.ontologies && mkdir -p $$DIR && cat $$DIR/ontologies.csv | while IFS=',' read -r DESC FILE URL; do ERRFILE=$${FILE%.*}.jena-stderr; echo "echo \"Processing $$URL ($$FILE) ...\" && curl -sLRo $$DIR/$$FILE \"$$URL\" && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$DIR/$$FILE 2> $$DIR/$$ERRFILE | gzip -c > $$DIR/$${FILE%.*}.nt.gz && rm -f $$DIR/$$FILE && if [ ! -s $$DIR/$$ERRFILE ]; then rm -f $$DIR/$$ERRFILE; fi || echo \"ERROR processing $$URL ($$FILE)\""; done > pubchem.get-data-cmds.txt
32
- MAKE_GET_DATA_CMD_2 = DIR=DATA.pubchem && mkdir -p $$DIR && curl -LRO ${GET_DATA_URL}/void.ttl && grep -oP '${GET_DATA_URL}/.*?\.ttl\.gz' void.ttl | while read URL; do FILE=$$(basename $$URL); echo "echo \"Processing $$URL ...\" && curl -sLRo $$DIR/$$FILE \"$$URL\" && docker run -i --rm -v $$(pwd):/data stain/jena turtle --output=NT /data/$$DIR/$$FILE | gzip -c > $$DIR/$${FILE%%.*}.nt.gz && rm -f $$DIR/$$FILE || echo \"ERROR processing $$URL\""; done >> pubchem.get-data-cmds.txt
33
- GET_DATA_CMD = ${CHECK_REQUIREMENTS} && ${MAKE_GET_DATA_CMD_1} && ${MAKE_GET_DATA_CMD_2} && cat pubchem.get-data-cmds.txt | parallel --line-buffer 2>&1 | tee pubchem.get-data-log.txt
27
+ GET_DATA_CMD_1 = mkdir -p ${ONTOLOGIES_DIR} && cd ${ONTOLOGIES_DIR} && cat ${ONTOLOGIES_CSV} | parallel --colsep "," 'FILE={2} && URL={3} && ERRFILE=$${FILE%.*}.jena-stderr; echo "Processing $$URL ($$FILE) ..." && curl -sLRo $$FILE $$URL && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$FILE 2> $$ERRFILE | gzip -c > $${FILE%.*}.nt.gz && rm -f $$FILE; if [ -s $$ERRFILE ]; then grep -q "ERROR *riot" $$ERRFILE && echo "riot ERRORs in $$FILE, check $$ERRFILE"; else rm $$ERRFILE; fi'
28
+ GET_DATA_CMD_2 = mkdir -p ${PUBCHEM_DIR} && wget -r -nv -nH --cut-dirs=2 --no-parent -P ${PUBCHEM_DIR} ${GET_DATA_URL}
29
+ GET_DATA_CMD = ${CHECK_REQUIREMENTS} && ${GET_DATA_CMD_1} 2>&1 | tee pubchem.get-data-log.txt; ${GET_DATA_CMD_2} 2>&1 | tee -a pubchem.get-data-log.txt
34
30
  VERSION = $$(date -r void.ttl +%d.%m.%Y || echo "NO_DATE")
35
31
  DESCRIPTION = PubChem RDF from ${GET_DATA_URL} (version ${VERSION}) + associated ontologies (bao, bfo, biopax-level3, chebi, cheminf, cito, dublin_core_terms, fabio, go, iao, ncit, obi, pr, ro, sio, skos, so, uo)
36
- MAKE_ONTOLOGIES_CSV = $$(mkdir -p DATA.ontologies && echo "BAO - BioAssay Ontology,bao.owl,https://data.bioontology.org/ontologies/BAO/submissions/56/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nBFO - Basic Formal Ontology,bfo.owl,http://purl.obolibrary.org/obo/bfo.owl\n BioPAX - biological pathway data,bp.owl,http://www.biopax.org/release/biopax-level3.owl\n CHEMINF - Chemical Information Ontology,cheminf.owl,http://purl.obolibrary.org/obo/cheminf.owl\n ChEBI - Chemical Entities of Biological Interest,chebi.owl,http://purl.obolibrary.org/obo/chebi.owl\n CiTO,cito.nt,http://purl.org/spar/cito.nt\n DCMI Terms,dcterms.nt,https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.nt\n FaBiO,fabio.nt,http://purl.org/spar/fabio.nt\n GO - Gene Ontology,go.owl,http://purl.obolibrary.org/obo/go.owl\n IAO - Information Artifact Ontology,iao.owl,http://purl.obolibrary.org/obo/iao.owl\n NCIt,ncit.owl,http://purl.obolibrary.org/obo/ncit.owl\n NDF-RT,ndfrt.owl,https://data.bioontology.org/ontologies/NDF-RT/submissions/1/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\n OBI - Ontology for Biomedical Investigations,obi.owl,http://purl.obolibrary.org/obo/obi.owl\n OWL,owl.ttl,http://www.w3.org/2002/07/owl.ttl\n PDBo,pdbo.owl,http://rdf.wwpdb.org/schema/pdbx-v40.owl\n PR - PRotein Ontology (PRO),pr.owl,http://purl.obolibrary.org/obo/pr.owl\n RDF Schema,rdfs.ttl,https://www.w3.org/2000/01/rdf-schema.ttl\n RDF,rdf.ttl,http://www.w3.org/1999/02/22-rdf-syntax-ns.ttl\n RO - Relation Ontology,ro.owl,http://purl.obolibrary.org/obo/ro.owl\n SIO - Semanticscience Integrated Ontology,sio.owl,http://semanticscience.org/ontology/sio.owl\n SKOS,skos.rdf,http://www.w3.org/TR/skos-reference/skos.rdf\n SO - Sequence types and features ontology,so.owl,http://purl.obolibrary.org/obo/so.owl\n UO - Units of measurement ontology,uo.owl,http://purl.obolibrary.org/obo/uo.owl" > DATA.ontologies/ontologies.csv)
32
+ MAKE_ONTOLOGIES_CSV = $$(mkdir -p ${ONTOLOGIES_DIR} && echo "BAO - BioAssay Ontology,bao.owl,https://data.bioontology.org/ontologies/BAO/submissions/56/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nBFO - Basic Formal Ontology,bfo.owl,http://purl.obolibrary.org/obo/bfo.owl\nBioPAX - biological pathway data,bp.owl,http://www.biopax.org/release/biopax-level3.owl\nCHEMINF - Chemical Information Ontology,cheminf.owl,http://purl.obolibrary.org/obo/cheminf.owl\nChEBI - Chemical Entities of Biological Interest,chebi.owl,http://purl.obolibrary.org/obo/chebi.owl\nCiTO,cito.nt,http://purl.org/spar/cito.nt\nDCMI Terms,dcterms.nt,https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.nt\nFaBiO,fabio.nt,http://purl.org/spar/fabio.nt\nGO - Gene Ontology,go.owl,http://purl.obolibrary.org/obo/go.owl\nIAO - Information Artifact Ontology,iao.owl,http://purl.obolibrary.org/obo/iao.owl\nNCIt,ncit.owl,http://purl.obolibrary.org/obo/ncit.owl\nNDF-RT,ndfrt.owl,https://data.bioontology.org/ontologies/NDF-RT/submissions/1/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nOBI - Ontology for Biomedical Investigations,obi.owl,http://purl.obolibrary.org/obo/obi.owl\nOWL,owl.ttl,http://www.w3.org/2002/07/owl.ttl\nPDBo,pdbo.owl,http://rdf.wwpdb.org/schema/pdbx-v40.owl\nPR - PRotein Ontology (PRO),pr.owl,http://purl.obolibrary.org/obo/pr.owl\nRDF Schema,rdfs.ttl,https://www.w3.org/2000/01/rdf-schema.ttl\nRDF,rdf.ttl,http://www.w3.org/1999/02/22-rdf-syntax-ns.ttl\nRO - Relation Ontology,ro.owl,http://purl.obolibrary.org/obo/ro.owl\nSIO - Semanticscience Integrated Ontology,sio.owl,http://semanticscience.org/ontology/sio.owl\nSKOS,skos.rdf,http://www.w3.org/TR/skos-reference/skos.rdf\nSO - Sequence types and features ontology,so.owl,http://purl.obolibrary.org/obo/so.owl\nUO - Units of measurement ontology,uo.owl,http://purl.obolibrary.org/obo/uo.owl" > ${ONTOLOGIES_DIR}/${ONTOLOGIES_CSV})
37
33
 
38
34
  [index]
39
- INPUT_FILES = DATA.ontologies/*.nt.gz DATA.pubchem/*.nt.gz
40
- CAT_INPUT_FILES = zcat ${INPUT_FILES}
41
- SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
42
- STXXL_MEMORY = 10G
35
+ INPUT_FILES = ${data:ONTOLOGIES_DIR}/*.nt.gz ${data:PUBCHEM_DIR}/*/*.ttl.gz ${data:PUBCHEM_DIR}/*/*/*.ttl.gz
36
+ BASE_URL = http://rdf.ncbi.nlm.nih.gov/pubchem
37
+ MULTI_INPUT_JSON = [{ "cmd": "zcat ${data:ONTOLOGIES_DIR}/*.nt.gz", "graph": "${BASE_URL}/ruleset"},
38
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/anatomy", "for-each": "${data:PUBCHEM_DIR}/anatomy/*.ttl.gz" },
39
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/author", "for-each": "${data:PUBCHEM_DIR}/author/*.ttl.gz" },
40
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/bioassay", "for-each": "${data:PUBCHEM_DIR}/bioassay/*.ttl.gz" },
41
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/book", "for-each": "${data:PUBCHEM_DIR}/book/*.ttl.gz" },
42
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/cell", "for-each": "${data:PUBCHEM_DIR}/cell/*.ttl.gz" },
43
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*0.ttl.gz", "graph": "${BASE_URL}/compound" },
44
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*1.ttl.gz", "graph": "${BASE_URL}/compound" },
45
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*2.ttl.gz", "graph": "${BASE_URL}/compound" },
46
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*3.ttl.gz", "graph": "${BASE_URL}/compound" },
47
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*4.ttl.gz", "graph": "${BASE_URL}/compound" },
48
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*5.ttl.gz", "graph": "${BASE_URL}/compound" },
49
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*6.ttl.gz", "graph": "${BASE_URL}/compound" },
50
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*7.ttl.gz", "graph": "${BASE_URL}/compound" },
51
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*8.ttl.gz", "graph": "${BASE_URL}/compound" },
52
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*9.ttl.gz", "graph": "${BASE_URL}/compound" },
53
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/compound", "for-each": "${data:PUBCHEM_DIR}/compound/general/*[!0-9].ttl.gz" },
54
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/concept", "for-each": "${data:PUBCHEM_DIR}/concept/*.ttl.gz" },
55
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/conserveddomain", "for-each": "${data:PUBCHEM_DIR}/conserveddomain/*.ttl.gz" },
56
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/cooccurrence", "for-each": "${data:PUBCHEM_DIR}/cooccurrence/*.ttl.gz" },
57
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*0.ttl.gz", "graph": "${BASE_URL}/descriptor" },
58
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*1.ttl.gz", "graph": "${BASE_URL}/descriptor" },
59
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*2.ttl.gz", "graph": "${BASE_URL}/descriptor" },
60
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*3.ttl.gz", "graph": "${BASE_URL}/descriptor" },
61
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*4.ttl.gz", "graph": "${BASE_URL}/descriptor" },
62
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*5.ttl.gz", "graph": "${BASE_URL}/descriptor" },
63
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*6.ttl.gz", "graph": "${BASE_URL}/descriptor" },
64
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*7.ttl.gz", "graph": "${BASE_URL}/descriptor" },
65
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*8.ttl.gz", "graph": "${BASE_URL}/descriptor" },
66
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*9.ttl.gz", "graph": "${BASE_URL}/descriptor" },
67
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/descriptor", "for-each": "${data:PUBCHEM_DIR}/descriptor/compound/*[!0-9].ttl.gz" },
68
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/descriptor", "for-each": "${data:PUBCHEM_DIR}/descriptor/substance/*.ttl.gz" },
69
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/disease", "for-each": "${data:PUBCHEM_DIR}/disease/*.ttl.gz" },
70
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/endpoint", "for-each": "${data:PUBCHEM_DIR}/endpoint/*.ttl.gz" },
71
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/gene", "for-each": "${data:PUBCHEM_DIR}/gene/*.ttl.gz"},
72
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/grant", "for-each": "${data:PUBCHEM_DIR}/grant/*.ttl.gz" },
73
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/inchikey", "for-each": "${data:PUBCHEM_DIR}/inchikey/*.ttl.gz" },
74
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/journal", "for-each": "${data:PUBCHEM_DIR}/journal/*.ttl.gz" },
75
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/measuregroup", "for-each": "${data:PUBCHEM_DIR}/measuregroup/*.ttl.gz" },
76
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/organization", "for-each": "${data:PUBCHEM_DIR}/organization/*.ttl.gz" },
77
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*0.ttl.gz", "graph": "${BASE_URL}/patent" },
78
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*1.ttl.gz", "graph": "${BASE_URL}/patent" },
79
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*2.ttl.gz", "graph": "${BASE_URL}/patent" },
80
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*3.ttl.gz", "graph": "${BASE_URL}/patent" },
81
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*4.ttl.gz", "graph": "${BASE_URL}/patent" },
82
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*5.ttl.gz", "graph": "${BASE_URL}/patent" },
83
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*6.ttl.gz", "graph": "${BASE_URL}/patent" },
84
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*7.ttl.gz", "graph": "${BASE_URL}/patent" },
85
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*8.ttl.gz", "graph": "${BASE_URL}/patent" },
86
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*9.ttl.gz", "graph": "${BASE_URL}/patent" },
87
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/cpc/*.ttl.gz", "graph": "${BASE_URL}/patent" },
88
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/ipc/*.ttl.gz", "graph": "${BASE_URL}/patent" },
89
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/patent", "for-each": "${data:PUBCHEM_DIR}/patent/*[!0-9].ttl.gz" },
90
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/pathway", "for-each": "${data:PUBCHEM_DIR}/pathway/*.ttl.gz" },
91
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/protein", "for-each": "${data:PUBCHEM_DIR}/protein/*.ttl.gz" },
92
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*0.ttl.gz", "graph": "${BASE_URL}/reference" },
93
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*1.ttl.gz", "graph": "${BASE_URL}/reference" },
94
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*2.ttl.gz", "graph": "${BASE_URL}/reference" },
95
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*3.ttl.gz", "graph": "${BASE_URL}/reference" },
96
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*4.ttl.gz", "graph": "${BASE_URL}/reference" },
97
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*5.ttl.gz", "graph": "${BASE_URL}/reference" },
98
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*6.ttl.gz", "graph": "${BASE_URL}/reference" },
99
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*7.ttl.gz", "graph": "${BASE_URL}/reference" },
100
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*8.ttl.gz", "graph": "${BASE_URL}/reference" },
101
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*9.ttl.gz", "graph": "${BASE_URL}/reference" },
102
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/reference", "for-each": "${data:PUBCHEM_DIR}/reference/*[!0-9].ttl.gz" },
103
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/source", "for-each": "${data:PUBCHEM_DIR}/source/*.ttl.gz" },
104
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*0.ttl.gz", "graph": "${BASE_URL}/substance" },
105
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*1.ttl.gz", "graph": "${BASE_URL}/substance" },
106
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*2.ttl.gz", "graph": "${BASE_URL}/substance" },
107
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*3.ttl.gz", "graph": "${BASE_URL}/substance" },
108
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*4.ttl.gz", "graph": "${BASE_URL}/substance" },
109
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*5.ttl.gz", "graph": "${BASE_URL}/substance" },
110
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*6.ttl.gz", "graph": "${BASE_URL}/substance" },
111
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*7.ttl.gz", "graph": "${BASE_URL}/substance" },
112
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*8.ttl.gz", "graph": "${BASE_URL}/substance" },
113
+ { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*9.ttl.gz", "graph": "${BASE_URL}/substance" },
114
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/substance", "for-each": "${data:PUBCHEM_DIR}/substance/*[!0-9].ttl.gz" },
115
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/synonym", "for-each": "${data:PUBCHEM_DIR}/synonym/*.ttl.gz" },
116
+ { "cmd": "zcat {}", "graph": "${BASE_URL}/taxonomy", "for-each": "${data:PUBCHEM_DIR}/taxonomy/*.ttl.gz" }]
117
+ SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "num-triples-per-batch": 10000000 }
118
+ STXXL_MEMORY = 20G
43
119
 
44
120
  [server]
45
121
  PORT = 7023
46
122
  ACCESS_TOKEN = ${data:NAME}
47
123
  MEMORY_FOR_QUERIES = 20G
48
- TIMEOUT = 120s
124
+ TIMEOUT = 600s
49
125
 
50
126
  [runtime]
51
127
  SYSTEM = docker
@@ -1,30 +1,62 @@
1
1
  # Qleverfile for UniProt, use with https://github.com/ad-freiburg/qlever-control
2
2
  #
3
- # qlever get-data # takes ~ 30 hours and ~ 2 TB of disk (for the NT files)
4
- # qlever index # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 5900X)
3
+ # qlever get-data # takes ~ 30 hours and ~ 1.6 TB of disk (for the TTL files)
4
+ # qlever index # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 9950X)
5
5
  # qlever start # starts the server (takes a few seconds)
6
6
  #
7
- # Install packages: sudo apt install -y libxml2-utils parallel xz-utils pv
7
+ # Install packages: sudo apt install -y libxml2-utils parallel xz-utils wget
8
8
  # Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
9
9
  #
10
10
  # Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
11
- # during build, ~ 3 TB after build). The uniprot.index.???.meta files can be on
12
- # HDD without significant performance loss (when running the server).
11
+ # during build, ~ 3 TB after build).
13
12
 
14
13
  [data]
15
- NAME = uniprot
16
- DATE = 2024-05-29
17
- DOWNLOAD_URL = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
18
- GET_RDFXML_CMD = mkdir -p rdf.${DATE} && curl -s ${DOWNLOAD_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" - | while read URL; do wget --no-verbose -P rdf.${DATE} $$URL 2>&1 | tee -a uniprot.download-log; done
19
- RDFXML2NT_CMD = mkdir -p nt.${DATE} && for RDFXML in rdf.${DATE}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=nt 2> /dev/null | gzip -c > nt.${DATE}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/nt.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
20
- GET_DATA_CMD = rdfxml --help && date > ${NAME}.get-data.begin-date && ${GET_RDFXML_CMD} && ${RDFXML2NT_CMD} && date > ${NAME}.get-data.end-date
21
- DESCRIPTION = Complete UniProt data from ${DOWNLOAD_URL}, version ${DATE}
14
+ NAME = uniprot
15
+ DATE = 2024-11-27
16
+ RDFXML_DIR = rdf.${DATE}
17
+ TTL_DIR = ttl.${DATE}
18
+ UNIPROT_URL = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
19
+ RHEA_URL = https://ftp.expasy.org/databases/rhea/rdf
20
+ EXAMPLES_URL = https://github.com/sib-swiss/sparql-examples
21
+ GET_EXAMPLES_CMD = mkdir -p ${TTL_DIR} && git clone ${EXAMPLES_URL} && (cd sparql-examples && ./convertToOneTurtle.sh -p uniprot && gzip examples_uniprot.ttl && mv -f examples_uniprot.ttl.gz ../${TTL_DIR} && cd .. && rm -rf sparql-examples)
22
+ GET_RDFXML_CMD = mkdir -p ${RDFXML_DIR} && (echo "${RHEA_URL}/chebi.owl.gz"; echo "${RHEA_URL}/rhea.rdf.gz"; curl -s ${UNIPROT_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" -) | while read URL; do wget --no-verbose -P ${RDFXML_DIR} $$URL 2>&1 | tee -a uniprot.download-log; done
23
+ RDFXML2TTL_CMD = mkdir -p ${TTL_DIR} && for RDFXML in ${RDFXML_DIR}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=ttl -q 2> ${TTL_DIR}/$$(basename $$RDFXML).stderr | gzip -c > ${TTL_DIR}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/ttl.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
24
+ GET_DATA_CMD = date > ${NAME}.get-data.begin-date && ${GET_EXAMPLES_CMD} && ${GET_RDFXML_CMD} && ${RDFXML2TTL_CMD} && date > ${NAME}.get-data.end-date
25
+ DESCRIPTION = Complete UniProt data from ${UNIPROT_URL}, with additional data from ${RHEA_URL} and ${EXAMPLES_URL}
22
26
 
23
27
  [index]
24
- INPUT_FILES = nt.${data:DATE}/*.nt.gz
25
- CAT_INPUT_FILES = parallel --tmpdir . -j 4 'zcat -f {}' ::: ${INPUT_FILES} | pv -q -B 5G
26
- SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
27
- STXXL_MEMORY = 60G
28
+ INPUT_FILES = ${data:TTL_DIR}/*.ttl.gz
29
+ MULTI_INPUT_JSON = [{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniprot", "for-each": "${data:TTL_DIR}/uniprotkb_reviewed_*.ttl.gz" },
30
+ { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniprot", "for-each": "${data:TTL_DIR}/uniprotkb_unreviewed_*.ttl.gz" },
31
+ { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniparc", "for-each": "${data:TTL_DIR}/uniparc_*.ttl.gz" },
32
+ { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniref", "for-each": "${data:TTL_DIR}/uniref*.ttl.gz" },
33
+ { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/obsolete", "for-each": "${data:TTL_DIR}/uniprotkb_obsolete_*.ttl.gz" },
34
+ { "cmd": "zcat ${data:TTL_DIR}/chebi.ttl.gz", "graph": "http://sparql.uniprot.org/chebi" },
35
+ { "cmd": "zcat ${data:TTL_DIR}/citation_mapping.ttl.gz", "graph": "http://sparql.uniprot.org/citationmapping" },
36
+ { "cmd": "zcat ${data:TTL_DIR}/citations.ttl.gz", "graph": "http://sparql.uniprot.org/citations" },
37
+ { "cmd": "zcat ${data:TTL_DIR}/databases.ttl.gz", "graph": "http://sparql.uniprot.org/databases" },
38
+ { "cmd": "zcat ${data:TTL_DIR}/diseases.ttl.gz", "graph": "http://sparql.uniprot.org/diseases" },
39
+ { "cmd": "zcat ${data:TTL_DIR}/enzyme-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/enzymes" },
40
+ { "cmd": "zcat ${data:TTL_DIR}/enzyme.ttl.gz", "graph": "http://sparql.uniprot.org/enzymes" },
41
+ { "cmd": "zcat ${data:TTL_DIR}/go-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/go" },
42
+ { "cmd": "zcat ${data:TTL_DIR}/go.ttl.gz", "graph": "http://sparql.uniprot.org/go" },
43
+ { "cmd": "zcat ${data:TTL_DIR}/journals.ttl.gz", "graph": "http://sparql.uniprot.org/journal" },
44
+ { "cmd": "zcat ${data:TTL_DIR}/keywords-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/keywords" },
45
+ { "cmd": "zcat ${data:TTL_DIR}/keywords.ttl.gz", "graph": "http://sparql.uniprot.org/keywords" },
46
+ { "cmd": "zcat ${data:TTL_DIR}/locations-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/locations" },
47
+ { "cmd": "zcat ${data:TTL_DIR}/locations.ttl.gz", "graph": "http://sparql.uniprot.org/locations" },
48
+ { "cmd": "zcat ${data:TTL_DIR}/pathways-hierarchy*.ttl.gz", "graph": "http://sparql.uniprot.org/pathways" },
49
+ { "cmd": "zcat ${data:TTL_DIR}/pathways.ttl.gz", "graph": "http://sparql.uniprot.org/pathways" },
50
+ { "cmd": "zcat ${data:TTL_DIR}/proteomes.ttl.gz", "graph": "http://sparql.uniprot.org/proteomes" },
51
+ { "cmd": "zcat ${data:TTL_DIR}/taxonomy-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/taxonomy" },
52
+ { "cmd": "zcat ${data:TTL_DIR}/taxonomy.ttl.gz", "graph": "http://sparql.uniprot.org/taxonomy" },
53
+ { "cmd": "zcat ${data:TTL_DIR}/tissues.ttl.gz", "graph": "http://sparql.uniprot.org/tissues" },
54
+ { "cmd": "zcat ${data:TTL_DIR}/rhea.ttl.gz", "graph": "https://sparql.rhea-db.org/rhea" },
55
+ { "cmd": "zcat ${data:TTL_DIR}/examples_uniprot.ttl.gz", "graph": "http://sparql.uniprot.org/.well-known/sparql-examples" },
56
+ { "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" },
57
+ { "cmd": "zcat ${data:TTL_DIR}/void.ttl.gz", "graph": "http://rdfs.org/ns/void" }]
58
+ SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
59
+ STXXL_MEMORY = 60G
28
60
 
29
61
  [server]
30
62
  PORT = 7018
@@ -64,7 +64,7 @@ class AddTextIndexCommand(QleverCommand):
64
64
  # Show the command line.
65
65
  self.show(add_text_index_cmd, only_show=args.show)
66
66
  if args.show:
67
- return False
67
+ return True
68
68
 
69
69
  # When running natively, check if the binary exists and works.
70
70
  if args.system == "native":
@@ -74,6 +74,7 @@ class AddTextIndexCommand(QleverCommand):
74
74
  log.error(f"Running \"{args.index_binary}\" failed ({e}), "
75
75
  f"set `--index-binary` to a different binary or "
76
76
  f"use `--container_system`")
77
+ return False
77
78
 
78
79
  # Check if text index files already exist.
79
80
  existing_text_index_files = get_existing_index_files(
@@ -47,7 +47,7 @@ class CacheStatsCommand(QleverCommand):
47
47
  self.show("\n".join([cache_stats_cmd, cache_settings_cmd]),
48
48
  only_show=args.show)
49
49
  if args.show:
50
- return False
50
+ return True
51
51
 
52
52
  # Execute them.
53
53
  try:
@@ -48,7 +48,7 @@ class ClearCacheCommand(QleverCommand):
48
48
  f"\"{args.access_token}\"")
49
49
  self.show(clear_cache_cmd, only_show=args.show)
50
50
  if args.show:
51
- return False
51
+ return True
52
52
 
53
53
  # Execute the command.
54
54
  try:
@@ -76,5 +76,7 @@ class ClearCacheCommand(QleverCommand):
76
76
  # Show cache stats.
77
77
  log.info("")
78
78
  args.detailed = False
79
- CacheStatsCommand().execute(args)
79
+ if not CacheStatsCommand().execute(args):
80
+ log.error("Clearing the cache was successful, but showing the "
81
+ "cache stats failed {e}")
80
82
  return True
@@ -21,10 +21,7 @@ class ExampleQueriesCommand(QleverCommand):
21
21
  """
22
22
 
23
23
  def __init__(self):
24
- self.presets = {
25
- "virtuoso-wikidata": "https://wikidata.demo.openlinksw.com/sparql",
26
- "qlever-wikidata": "https://qlever.cs.uni-freiburg.de/api/wikidata",
27
- }
24
+ pass
28
25
 
29
26
  def description(self) -> str:
30
27
  return "Show how much of the cache is currently being used"
@@ -41,8 +38,15 @@ class ExampleQueriesCommand(QleverCommand):
41
38
  )
42
39
  subparser.add_argument(
43
40
  "--sparql-endpoint-preset",
44
- choices=self.presets.keys(),
45
- help="Shortcut for setting the SPARQL endpoint",
41
+ choices=[
42
+ "https://qlever.dev/api/wikidata",
43
+ "https://qlever.dev/api/uniprot",
44
+ "https://qlever.dev/api/pubchem",
45
+ "https://qlever.dev/api/osm-planet",
46
+ "https://wikidata.demo.openlinksw.com/sparql",
47
+ "https://sparql.uniprot.org/sparql",
48
+ ],
49
+ help="SPARQL endpoint from fixed list (to save typing)",
46
50
  )
47
51
  subparser.add_argument(
48
52
  "--get-queries-cmd",
@@ -86,7 +90,7 @@ class ExampleQueriesCommand(QleverCommand):
86
90
  "application/sparql-results+json",
87
91
  "text/turtle",
88
92
  ],
89
- default="text/tab-separated-values",
93
+ default="application/sparql-results+json",
90
94
  help="Accept header for the SPARQL query",
91
95
  )
92
96
  subparser.add_argument(
@@ -98,7 +102,7 @@ class ExampleQueriesCommand(QleverCommand):
98
102
  subparser.add_argument(
99
103
  "--width-query-description",
100
104
  type=int,
101
- default=40,
105
+ default=70,
102
106
  help="Width for printing the query description",
103
107
  )
104
108
  subparser.add_argument(
@@ -113,6 +117,32 @@ class ExampleQueriesCommand(QleverCommand):
113
117
  default=14,
114
118
  help="Width for printing the result size",
115
119
  )
120
+ subparser.add_argument(
121
+ "--show-query",
122
+ choices=["always", "never", "on-error"],
123
+ default="never",
124
+ help="Show the queries that will be executed (always, never, on error)",
125
+ )
126
+ subparser.add_argument(
127
+ "--show-prefixes",
128
+ action="store_true",
129
+ default=False,
130
+ help="When showing the query, also show the prefixes",
131
+ )
132
+
133
+ def pretty_print_query(self, query: str, show_prefixes: bool) -> None:
134
+ remove_prefixes_cmd = " | sed '/^PREFIX /Id'" if not show_prefixes else ""
135
+ pretty_print_query_cmd = (
136
+ f"echo {shlex.quote(query)}"
137
+ f" | docker run -i --rm sparqling/sparql-formatter"
138
+ f"{remove_prefixes_cmd} | grep -v '^$'"
139
+ )
140
+ try:
141
+ query_pp = run_command(pretty_print_query_cmd, return_output=True)
142
+ log.info(colored(query_pp.rstrip(), "cyan"))
143
+ except Exception as e:
144
+ log.error(f"Failed to pretty-print query: {e}")
145
+ log.info(colored(query.rstrip(), "cyan"))
116
146
 
117
147
  def execute(self, args) -> bool:
118
148
  # We can't have both `--remove-offset-and-limit` and `--limit`.
@@ -135,9 +165,8 @@ class ExampleQueriesCommand(QleverCommand):
135
165
  return False
136
166
 
137
167
  # Handle shotcuts for SPARQL endpoint.
138
- if args.sparql_endpoint_preset in self.presets:
139
- args.sparql_endpoint = self.presets[args.sparql_endpoint_preset]
140
- args.ui_config = args.sparql_endpoint_preset.split("-")[1]
168
+ if args.sparql_endpoint_preset:
169
+ args.sparql_endpoint = args.sparql_endpoint_preset
141
170
 
142
171
  # Limit only works with full result.
143
172
  if args.limit and args.download_or_count == "count":
@@ -178,7 +207,7 @@ class ExampleQueriesCommand(QleverCommand):
178
207
  only_show=args.show,
179
208
  )
180
209
  if args.show:
181
- return False
210
+ return True
182
211
 
183
212
  # Get the example queries.
184
213
  try:
@@ -210,8 +239,11 @@ class ExampleQueriesCommand(QleverCommand):
210
239
  if args.clear_cache == "yes":
211
240
  args.server_url = sparql_endpoint
212
241
  args.complete = False
242
+ clear_cache_successful = False
213
243
  with mute_log():
214
- ClearCacheCommand().execute(args)
244
+ clear_cache_successful = ClearCacheCommand().execute(args)
245
+ if not clear_cache_successful:
246
+ log.warn("Failed to clear the cache")
215
247
 
216
248
  # Remove OFFSET and LIMIT (after the last closing bracket).
217
249
  if args.remove_offset_and_limit or args.limit:
@@ -262,6 +294,9 @@ class ExampleQueriesCommand(QleverCommand):
262
294
  # A bit of pretty-printing.
263
295
  query = re.sub(r"\s+", " ", query)
264
296
  query = re.sub(r"\s*\.\s*\}", " }", query)
297
+ if args.show_query == "always":
298
+ log.info("")
299
+ self.pretty_print_query(query, args.show_prefixes)
265
300
 
266
301
  # Launch query.
267
302
  try:
@@ -282,55 +317,81 @@ class ExampleQueriesCommand(QleverCommand):
282
317
  params={"query": query},
283
318
  result_file=result_file,
284
319
  ).strip()
285
- if http_code != "200":
286
- raise Exception(
287
- f"HTTP code {http_code}" f" {Path(result_file).read_text()}"
288
- )
289
- time_seconds = time.time() - start_time
290
- error_msg = None
320
+ if http_code == "200":
321
+ time_seconds = time.time() - start_time
322
+ error_msg = None
323
+ else:
324
+ error_msg = {
325
+ "short": f"HTTP code: {http_code}",
326
+ "long": re.sub(r"\s+", " ", Path(result_file).read_text()),
327
+ }
291
328
  except Exception as e:
292
329
  if args.log_level == "DEBUG":
293
330
  traceback.print_exc()
294
- error_msg = re.sub(r"\s+", " ", str(e))
331
+ error_msg = {
332
+ "short": "Exception",
333
+ "long": re.sub(r"\s+", " ", str(e)),
334
+ }
295
335
 
296
336
  # Get result size (via the command line, in order to avoid loading
297
337
  # a potentially large JSON file into Python, which is slow).
298
338
  if error_msg is None:
299
- try:
300
- if args.download_or_count == "count":
301
- if args.accept == "text/tab-separated-values":
302
- result_size = run_command(
303
- f"sed 1d {result_file}", return_output=True
304
- )
305
- else:
339
+ # CASE 0: Rhe result is empty despite a 200 HTTP code.
340
+ if Path(result_file).stat().st_size == 0:
341
+ result_size = 0
342
+ error_msg = {
343
+ "short": "Empty result",
344
+ "long": "curl returned with code 200, "
345
+ "but the result is empty",
346
+ }
347
+
348
+ # CASE 1: Just counting the size of the result (TSV or JSON).
349
+ elif args.download_or_count == "count":
350
+ if args.accept == "text/tab-separated-values":
351
+ result_size = run_command(
352
+ f"sed 1d {result_file}", return_output=True
353
+ )
354
+ else:
355
+ try:
306
356
  result_size = run_command(
307
357
  f'jq -r ".results.bindings[0]'
308
358
  f" | to_entries[0].value.value"
309
359
  f' | tonumber" {result_file}',
310
360
  return_output=True,
311
361
  )
362
+ except Exception as e:
363
+ error_msg = {
364
+ "short": "Malformed JSON",
365
+ "long": "curl returned with code 200, "
366
+ "but the JSON is malformed: "
367
+ + re.sub(r"\s+", " ", str(e)),
368
+ }
369
+
370
+ # CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
371
+ else:
372
+ if (
373
+ args.accept == "text/tab-separated-values"
374
+ or args.accept == "text/csv"
375
+ ):
376
+ result_size = run_command(
377
+ f"sed 1d {result_file} | wc -l", return_output=True
378
+ )
379
+ elif args.accept == "text/turtle":
380
+ result_size = run_command(
381
+ f"sed '1d;/^@prefix/d;/^\\s*$/d' " f"{result_file} | wc -l",
382
+ return_output=True,
383
+ )
312
384
  else:
313
- if (
314
- args.accept == "text/tab-separated-values"
315
- or args.accept == "text/csv"
316
- ):
317
- result_size = run_command(
318
- f"sed 1d {result_file} | wc -l", return_output=True
319
- )
320
- elif args.accept == "text/turtle":
321
- result_size = run_command(
322
- f"sed '1d;/^@prefix/d;/^\\s*$/d' "
323
- f"{result_file} | wc -l",
324
- return_output=True,
325
- )
326
- else:
385
+ try:
327
386
  result_size = run_command(
328
387
  f'jq -r ".results.bindings | length"' f" {result_file}",
329
388
  return_output=True,
330
389
  )
331
- result_size = int(result_size)
332
- except Exception as e:
333
- error_msg = str(e)
390
+ except Exception as e:
391
+ error_msg = {
392
+ "short": "Malformed JSON",
393
+ "long": re.sub(r"\s+", " ", str(e)),
394
+ }
334
395
 
335
396
  # Remove the result file (unless in debug mode).
336
397
  if args.log_level != "DEBUG":
@@ -341,6 +402,7 @@ class ExampleQueriesCommand(QleverCommand):
341
402
  description = description[: args.width_query_description - 3]
342
403
  description += "..."
343
404
  if error_msg is None:
405
+ result_size = int(result_size)
344
406
  log.info(
345
407
  f"{description:<{args.width_query_description}} "
346
408
  f"{time_seconds:6.2f} s "
@@ -352,16 +414,24 @@ class ExampleQueriesCommand(QleverCommand):
352
414
  num_failed += 1
353
415
  if (
354
416
  args.width_error_message > 0
355
- and len(error_msg) > args.width_error_message
417
+ and len(error_msg["long"]) > args.width_error_message
356
418
  and args.log_level != "DEBUG"
419
+ and args.show_query != "on-error"
357
420
  ):
358
- error_msg = error_msg[: args.width_error_message - 3]
359
- error_msg += "..."
360
- log.error(
421
+ error_msg["long"] = (
422
+ error_msg["long"][: args.width_error_message - 3] + "..."
423
+ )
424
+ seperator_short_long = "\n" if args.show_query == "on-error" else " "
425
+ log.info(
361
426
  f"{description:<{args.width_query_description}} "
362
- f"failed "
363
- f"{colored(error_msg, 'red')}"
427
+ f"{colored('FAILED ', 'red')}"
428
+ f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
429
+ f"{seperator_short_long}"
430
+ f"{colored(error_msg['long'], 'red')}"
364
431
  )
432
+ if args.show_query == "on-error":
433
+ self.pretty_print_query(query, args.show_prefixes)
434
+ log.info("")
365
435
 
366
436
  # Check that each query has a time and a result size, or it failed.
367
437
  assert len(result_sizes) == len(query_times)
@@ -31,7 +31,7 @@ class GetDataCommand(QleverCommand):
31
31
  # Construct the command line and show it.
32
32
  self.show(args.get_data_cmd, only_show=args.show)
33
33
  if args.show:
34
- return False
34
+ return True
35
35
 
36
36
  # Execute the command line.
37
37
  try: