PyPI - qlever - Versions diffs - 0.5.11__py3-none-any.whl → 0.5.15__py3-none-any.whl - Mend

qlever 0.5.11py3-none-any.whl → 0.5.15py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qlever might be problematic. Click here for more details.

Files changed (27) hide show

qlever/Qleverfiles/Qleverfile.dblp +1 -1
qlever/Qleverfiles/Qleverfile.pubchem +102 -26
qlever/Qleverfiles/Qleverfile.uniprot +48 -16
qlever/Qleverfiles/Qleverfile.wikidata +1 -3
qlever/commands/add_text_index.py +2 -1
qlever/commands/cache_stats.py +1 -1
qlever/commands/clear_cache.py +4 -2
qlever/commands/example_queries.py +120 -50
qlever/commands/get_data.py +1 -1
qlever/commands/index.py +148 -77
qlever/commands/index_stats.py +90 -59
qlever/commands/log.py +12 -2
qlever/commands/query.py +66 -27
qlever/commands/setup_config.py +1 -1
qlever/commands/start.py +9 -3
qlever/commands/status.py +2 -1
qlever/commands/stop.py +4 -6
qlever/commands/system_info.py +1 -1
qlever/commands/ui.py +3 -1
qlever/commands/warmup.py +1 -1
qlever/qlever_main.py +16 -9
{qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/METADATA +1 -1
{qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/RECORD +27 -27
{qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/LICENSE +0 -0
{qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/WHEEL +0 -0
{qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/entry_points.txt +0 -0
{qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/top_level.txt +0 -0

qlever/Qleverfiles/Qleverfile.dblp CHANGED Viewed

@@ -17,7 +17,7 @@ FORMAT       = ttl
 [index]
 INPUT_FILES      = *.gz
-MULTI_INPUT_JSON = $$(ls *.gz | awk 'BEGIN { printf "[ " } NR > 1 { printf ", " } { printf "{\"cmd\": \"zcat " $$0 "\"}" } END { printf "]" }')
+MULTI_INPUT_JSON = { "cmd": "zcat {}", "for-each": "*.gz" }
 SETTINGS_JSON    = { "ascii-prefixes-only": false, "num-triples-per-batch": 5000000, "prefixes-external": [""] }
 [server]

qlever/Qleverfiles/Qleverfile.pubchem CHANGED Viewed

@@ -1,51 +1,127 @@
-# Qleverfile for PubChem, use with https://github.com/ad-freiburg/qlever-control
+# Qleverfile for PubChem, use with the QLever CLI (`pip install qlever`)
 #
-# Resource requirements (as of 18.08.2024, on an AMD Ryzen 9 5900X):
-#
-# qlever get-data  # ~2 hours, ~150 GB, ~19 billion triples
-# qlever index     # ~7 hours, ~20 GB RAM, ~400 GB disk space
+# qlever get-data  # ~2 hours, ~120 GB, ~19 billion triples
+# qlever index     # ~6 hours, ~20 GB RAM, ~350 GB disk space (for the index)
 # qlever start     # a few seconds
 #
-# NOTE 1: `get-data` does not only download the PubChem RDF data, but also
+# Measured on an AMD Ryzen 9 7950X with 128 GB RAM, and NVMe SSD (17.12.2024)
+#
+# NOTE 1: `qlever get-data` does not only download the PubChem RDF data, but also
 # a number of ontologies. These are very useful to obtain names for IRIs like
 # `sio:SIO_000008` or `obo:IAO_0000412` (otherwise very hard to understand).
-# The ontologies BAO and NDF-RT are infrequently updated, for latest versions,
+# The ontologies BAO and NDF-RT are occasionally updated; for latest versions,
 # see the download links at https://bioportal.bioontology.org/ontologies/BAO
 # and https://bioportal.bioontology.org/ontologies/NDF-RT .
-#
-# NOTE 2: Many of the TTL files have generic prefix definitions in the middle
-# of the file, like @prefix ns23: <http://identifiers.org/biocyc/ARACYC:> .
-# See https://github.com/ad-freiburg/qlever/issues/711#issuecomment-1197113953
-# This is allowed by the standard, but unusual. For use with QLever, we
-# therefore convert the TTL files to NT when downloading them.
 #
-# NOTE 3: The PubChem data contains several invalid IRIs, in particular,
-# containing spaces. The previous version of this Qleverfile used a combination
-# of `sed` and `awk` to fix this. In the meantime, QLever's default is to warn
-# about such IRIs while indexing, but accept them anyway.
+# NOTE 2: The `MULTI_INPUT_JSON` zcats selected files together in one input
+# stream because there are too many files and the command line triggered by
+# `qlever index` would be too long otherwise.
 [data]
 NAME                = pubchem
 GET_DATA_URL        = ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF
+ONTOLOGIES_DIR      = RDF.ontologies
+PUBCHEM_DIR         = RDF.pubchem
+ONTOLOGIES_CSV      = ontologies.csv
 CHECK_REQUIREMENTS  = for CMD in docker parallel; do $$CMD --version >/dev/null 2>&1 || (echo "Requires \"$$CMD\", please install it"; false); done
-MAKE_GET_DATA_CMD_1 = DIR=DATA.ontologies && mkdir -p $$DIR && cat $$DIR/ontologies.csv | while IFS=',' read -r DESC FILE URL; do ERRFILE=$${FILE%.*}.jena-stderr; echo "echo \"Processing $$URL ($$FILE) ...\" && curl -sLRo $$DIR/$$FILE \"$$URL\" && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$DIR/$$FILE 2> $$DIR/$$ERRFILE | gzip -c > $$DIR/$${FILE%.*}.nt.gz && rm -f $$DIR/$$FILE && if [ ! -s $$DIR/$$ERRFILE ]; then rm -f $$DIR/$$ERRFILE; fi || echo \"ERROR processing $$URL ($$FILE)\""; done > pubchem.get-data-cmds.txt
-MAKE_GET_DATA_CMD_2 = DIR=DATA.pubchem && mkdir -p $$DIR && curl -LRO ${GET_DATA_URL}/void.ttl && grep -oP '${GET_DATA_URL}/.*?\.ttl\.gz' void.ttl | while read URL; do FILE=$$(basename $$URL); echo "echo \"Processing $$URL ...\" && curl -sLRo $$DIR/$$FILE \"$$URL\" && docker run -i --rm -v $$(pwd):/data stain/jena turtle --output=NT /data/$$DIR/$$FILE | gzip -c > $$DIR/$${FILE%%.*}.nt.gz && rm -f $$DIR/$$FILE || echo \"ERROR processing $$URL\""; done >> pubchem.get-data-cmds.txt
-GET_DATA_CMD        = ${CHECK_REQUIREMENTS} && ${MAKE_GET_DATA_CMD_1} && ${MAKE_GET_DATA_CMD_2} && cat pubchem.get-data-cmds.txt | parallel --line-buffer 2>&1 | tee pubchem.get-data-log.txt
+GET_DATA_CMD_1      = mkdir -p ${ONTOLOGIES_DIR} && cd ${ONTOLOGIES_DIR} && cat ${ONTOLOGIES_CSV} | parallel --colsep "," 'FILE={2} && URL={3} && ERRFILE=$${FILE%.*}.jena-stderr; echo "Processing $$URL ($$FILE) ..." && curl -sLRo $$FILE $$URL && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$FILE 2> $$ERRFILE | gzip -c > $${FILE%.*}.nt.gz && rm -f $$FILE; if [ -s $$ERRFILE ]; then grep -q "ERROR *riot" $$ERRFILE && echo "riot ERRORs in $$FILE, check $$ERRFILE"; else rm $$ERRFILE; fi'
+GET_DATA_CMD_2      = mkdir -p ${PUBCHEM_DIR} && wget -r -nv -nH --cut-dirs=2 --no-parent -P ${PUBCHEM_DIR} ${GET_DATA_URL}
+GET_DATA_CMD        = ${CHECK_REQUIREMENTS} && ${GET_DATA_CMD_1} 2>&1 | tee pubchem.get-data-log.txt; ${GET_DATA_CMD_2} 2>&1 | tee -a pubchem.get-data-log.txt
 VERSION             = $$(date -r void.ttl +%d.%m.%Y || echo "NO_DATE")
 DESCRIPTION         = PubChem RDF from ${GET_DATA_URL} (version ${VERSION}) + associated ontologies (bao, bfo, biopax-level3, chebi, cheminf, cito, dublin_core_terms, fabio, go, iao, ncit, obi, pr, ro, sio, skos, so, uo)
-MAKE_ONTOLOGIES_CSV = $$(mkdir -p DATA.ontologies && echo "BAO - BioAssay Ontology,bao.owl,https://data.bioontology.org/ontologies/BAO/submissions/56/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nBFO - Basic Formal Ontology,bfo.owl,http://purl.obolibrary.org/obo/bfo.owl\n BioPAX - biological pathway data,bp.owl,http://www.biopax.org/release/biopax-level3.owl\n CHEMINF - Chemical Information Ontology,cheminf.owl,http://purl.obolibrary.org/obo/cheminf.owl\n ChEBI - Chemical Entities of Biological Interest,chebi.owl,http://purl.obolibrary.org/obo/chebi.owl\n CiTO,cito.nt,http://purl.org/spar/cito.nt\n DCMI Terms,dcterms.nt,https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.nt\n FaBiO,fabio.nt,http://purl.org/spar/fabio.nt\n GO - Gene Ontology,go.owl,http://purl.obolibrary.org/obo/go.owl\n IAO - Information Artifact Ontology,iao.owl,http://purl.obolibrary.org/obo/iao.owl\n NCIt,ncit.owl,http://purl.obolibrary.org/obo/ncit.owl\n NDF-RT,ndfrt.owl,https://data.bioontology.org/ontologies/NDF-RT/submissions/1/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\n OBI - Ontology for Biomedical Investigations,obi.owl,http://purl.obolibrary.org/obo/obi.owl\n OWL,owl.ttl,http://www.w3.org/2002/07/owl.ttl\n PDBo,pdbo.owl,http://rdf.wwpdb.org/schema/pdbx-v40.owl\n PR - PRotein Ontology (PRO),pr.owl,http://purl.obolibrary.org/obo/pr.owl\n RDF Schema,rdfs.ttl,https://www.w3.org/2000/01/rdf-schema.ttl\n RDF,rdf.ttl,http://www.w3.org/1999/02/22-rdf-syntax-ns.ttl\n RO - Relation Ontology,ro.owl,http://purl.obolibrary.org/obo/ro.owl\n SIO - Semanticscience Integrated Ontology,sio.owl,http://semanticscience.org/ontology/sio.owl\n SKOS,skos.rdf,http://www.w3.org/TR/skos-reference/skos.rdf\n SO - Sequence types and features ontology,so.owl,http://purl.obolibrary.org/obo/so.owl\n UO - Units of measurement ontology,uo.owl,http://purl.obolibrary.org/obo/uo.owl" > DATA.ontologies/ontologies.csv)
+MAKE_ONTOLOGIES_CSV = $$(mkdir -p ${ONTOLOGIES_DIR} && echo "BAO - BioAssay Ontology,bao.owl,https://data.bioontology.org/ontologies/BAO/submissions/56/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nBFO - Basic Formal Ontology,bfo.owl,http://purl.obolibrary.org/obo/bfo.owl\nBioPAX - biological pathway data,bp.owl,http://www.biopax.org/release/biopax-level3.owl\nCHEMINF - Chemical Information Ontology,cheminf.owl,http://purl.obolibrary.org/obo/cheminf.owl\nChEBI - Chemical Entities of Biological Interest,chebi.owl,http://purl.obolibrary.org/obo/chebi.owl\nCiTO,cito.nt,http://purl.org/spar/cito.nt\nDCMI Terms,dcterms.nt,https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.nt\nFaBiO,fabio.nt,http://purl.org/spar/fabio.nt\nGO - Gene Ontology,go.owl,http://purl.obolibrary.org/obo/go.owl\nIAO - Information Artifact Ontology,iao.owl,http://purl.obolibrary.org/obo/iao.owl\nNCIt,ncit.owl,http://purl.obolibrary.org/obo/ncit.owl\nNDF-RT,ndfrt.owl,https://data.bioontology.org/ontologies/NDF-RT/submissions/1/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nOBI - Ontology for Biomedical Investigations,obi.owl,http://purl.obolibrary.org/obo/obi.owl\nOWL,owl.ttl,http://www.w3.org/2002/07/owl.ttl\nPDBo,pdbo.owl,http://rdf.wwpdb.org/schema/pdbx-v40.owl\nPR - PRotein Ontology (PRO),pr.owl,http://purl.obolibrary.org/obo/pr.owl\nRDF Schema,rdfs.ttl,https://www.w3.org/2000/01/rdf-schema.ttl\nRDF,rdf.ttl,http://www.w3.org/1999/02/22-rdf-syntax-ns.ttl\nRO - Relation Ontology,ro.owl,http://purl.obolibrary.org/obo/ro.owl\nSIO - Semanticscience Integrated Ontology,sio.owl,http://semanticscience.org/ontology/sio.owl\nSKOS,skos.rdf,http://www.w3.org/TR/skos-reference/skos.rdf\nSO - Sequence types and features ontology,so.owl,http://purl.obolibrary.org/obo/so.owl\nUO - Units of measurement ontology,uo.owl,http://purl.obolibrary.org/obo/uo.owl" > ${ONTOLOGIES_DIR}/${ONTOLOGIES_CSV})
 [index]
-INPUT_FILES     = DATA.ontologies/*.nt.gz DATA.pubchem/*.nt.gz
-CAT_INPUT_FILES = zcat ${INPUT_FILES}
-SETTINGS_JSON   = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
-STXXL_MEMORY    = 10G
+INPUT_FILES      = ${data:ONTOLOGIES_DIR}/*.nt.gz ${data:PUBCHEM_DIR}/*/*.ttl.gz ${data:PUBCHEM_DIR}/*/*/*.ttl.gz
+BASE_URL         = http://rdf.ncbi.nlm.nih.gov/pubchem
+MULTI_INPUT_JSON = [{ "cmd": "zcat ${data:ONTOLOGIES_DIR}/*.nt.gz", "graph": "${BASE_URL}/ruleset"},
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/anatomy", "for-each": "${data:PUBCHEM_DIR}/anatomy/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/author", "for-each": "${data:PUBCHEM_DIR}/author/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/bioassay", "for-each": "${data:PUBCHEM_DIR}/bioassay/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/book", "for-each": "${data:PUBCHEM_DIR}/book/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/cell", "for-each": "${data:PUBCHEM_DIR}/cell/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*0.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*1.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*2.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*3.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*4.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*5.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*6.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*7.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*8.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*9.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/compound", "for-each": "${data:PUBCHEM_DIR}/compound/general/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/concept", "for-each": "${data:PUBCHEM_DIR}/concept/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/conserveddomain", "for-each": "${data:PUBCHEM_DIR}/conserveddomain/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/cooccurrence", "for-each": "${data:PUBCHEM_DIR}/cooccurrence/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*0.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*1.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*2.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*3.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*4.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*5.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*6.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*7.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*8.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*9.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/descriptor", "for-each": "${data:PUBCHEM_DIR}/descriptor/compound/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/descriptor", "for-each": "${data:PUBCHEM_DIR}/descriptor/substance/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/disease", "for-each": "${data:PUBCHEM_DIR}/disease/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/endpoint", "for-each": "${data:PUBCHEM_DIR}/endpoint/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/gene", "for-each": "${data:PUBCHEM_DIR}/gene/*.ttl.gz"},
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/grant", "for-each": "${data:PUBCHEM_DIR}/grant/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/inchikey", "for-each": "${data:PUBCHEM_DIR}/inchikey/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/journal", "for-each": "${data:PUBCHEM_DIR}/journal/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/measuregroup", "for-each": "${data:PUBCHEM_DIR}/measuregroup/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/organization", "for-each": "${data:PUBCHEM_DIR}/organization/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*0.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*1.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*2.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*3.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*4.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*5.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*6.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*7.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*8.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*9.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/cpc/*.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/ipc/*.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/patent", "for-each": "${data:PUBCHEM_DIR}/patent/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/pathway", "for-each": "${data:PUBCHEM_DIR}/pathway/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/protein", "for-each": "${data:PUBCHEM_DIR}/protein/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*0.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*1.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*2.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*3.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*4.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*5.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*6.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*7.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*8.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*9.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/reference", "for-each": "${data:PUBCHEM_DIR}/reference/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/source", "for-each": "${data:PUBCHEM_DIR}/source/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*0.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*1.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*2.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*3.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*4.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*5.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*6.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*7.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*8.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*9.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/substance", "for-each": "${data:PUBCHEM_DIR}/substance/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/synonym", "for-each": "${data:PUBCHEM_DIR}/synonym/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/taxonomy", "for-each": "${data:PUBCHEM_DIR}/taxonomy/*.ttl.gz" }]
+SETTINGS_JSON   = { "languages-internal": [], "prefixes-external": [""], "num-triples-per-batch": 10000000 }
+STXXL_MEMORY    = 20G
 [server]
 PORT               = 7023
 ACCESS_TOKEN       = ${data:NAME}
 MEMORY_FOR_QUERIES = 20G
-TIMEOUT            = 120s
+TIMEOUT            = 600s
 [runtime]
 SYSTEM = docker

qlever/Qleverfiles/Qleverfile.uniprot CHANGED Viewed

@@ -1,30 +1,62 @@
 # Qleverfile for UniProt, use with https://github.com/ad-freiburg/qlever-control
 #
-# qlever get-data  # takes ~ 30 hours and ~ 2 TB of disk (for the NT files)
-# qlever index     # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 5900X)
+# qlever get-data  # takes ~ 30 hours and ~ 1.6 TB of disk (for the TTL files)
+# qlever index     # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 9950X)
 # qlever start     # starts the server (takes a few seconds)
 #
-# Install packages: sudo apt install -y libxml2-utils parallel xz-utils pv
+# Install packages: sudo apt install -y libxml2-utils parallel xz-utils wget
 # Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
 #
 # Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
-# during build, ~ 3 TB after build). The uniprot.index.???.meta files can be on
-# HDD without significant performance loss (when running the server).
+# during build, ~ 3 TB after build).
 [data]
-NAME           = uniprot
-DATE           = 2024-05-29
-DOWNLOAD_URL   = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
-GET_RDFXML_CMD = mkdir -p rdf.${DATE} && curl -s ${DOWNLOAD_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" - | while read URL; do wget --no-verbose -P rdf.${DATE} $$URL 2>&1 | tee -a uniprot.download-log; done
-RDFXML2NT_CMD  = mkdir -p nt.${DATE} && for RDFXML in rdf.${DATE}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=nt 2> /dev/null | gzip -c > nt.${DATE}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/nt.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
-GET_DATA_CMD   = rdfxml --help && date > ${NAME}.get-data.begin-date && ${GET_RDFXML_CMD} && ${RDFXML2NT_CMD} && date > ${NAME}.get-data.end-date
-DESCRIPTION    = Complete UniProt data from ${DOWNLOAD_URL}, version ${DATE}
+NAME             = uniprot
+DATE             = 2024-11-27
+RDFXML_DIR       = rdf.${DATE}
+TTL_DIR          = ttl.${DATE}
+UNIPROT_URL      = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
+RHEA_URL         = https://ftp.expasy.org/databases/rhea/rdf
+EXAMPLES_URL     = https://github.com/sib-swiss/sparql-examples
+GET_EXAMPLES_CMD = mkdir -p ${TTL_DIR} && git clone ${EXAMPLES_URL} && (cd sparql-examples && ./convertToOneTurtle.sh -p uniprot && gzip examples_uniprot.ttl && mv -f examples_uniprot.ttl.gz ../${TTL_DIR} && cd .. && rm -rf sparql-examples)
+GET_RDFXML_CMD   = mkdir -p ${RDFXML_DIR} && (echo "${RHEA_URL}/chebi.owl.gz"; echo "${RHEA_URL}/rhea.rdf.gz"; curl -s ${UNIPROT_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" -) | while read URL; do wget --no-verbose -P ${RDFXML_DIR} $$URL 2>&1 | tee -a uniprot.download-log; done
+RDFXML2TTL_CMD   = mkdir -p ${TTL_DIR} && for RDFXML in ${RDFXML_DIR}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=ttl -q 2> ${TTL_DIR}/$$(basename $$RDFXML).stderr | gzip -c > ${TTL_DIR}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/ttl.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
+GET_DATA_CMD     = date > ${NAME}.get-data.begin-date && ${GET_EXAMPLES_CMD} && ${GET_RDFXML_CMD} && ${RDFXML2TTL_CMD} && date > ${NAME}.get-data.end-date
+DESCRIPTION      = Complete UniProt data from ${UNIPROT_URL}, with additional data from ${RHEA_URL} and ${EXAMPLES_URL}
 [index]
-INPUT_FILES     = nt.${data:DATE}/*.nt.gz
-CAT_INPUT_FILES = parallel --tmpdir . -j 4 'zcat -f {}' ::: ${INPUT_FILES} | pv -q -B 5G
-SETTINGS_JSON   = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
-STXXL_MEMORY    = 60G
+INPUT_FILES      = ${data:TTL_DIR}/*.ttl.gz
+MULTI_INPUT_JSON = [{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniprot", "for-each": "${data:TTL_DIR}/uniprotkb_reviewed_*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniprot", "for-each": "${data:TTL_DIR}/uniprotkb_unreviewed_*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniparc", "for-each": "${data:TTL_DIR}/uniparc_*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniref", "for-each": "${data:TTL_DIR}/uniref*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/obsolete", "for-each": "${data:TTL_DIR}/uniprotkb_obsolete_*.ttl.gz" },
+                    { "cmd": "zcat ${data:TTL_DIR}/chebi.ttl.gz", "graph": "http://sparql.uniprot.org/chebi" },
+                    { "cmd": "zcat ${data:TTL_DIR}/citation_mapping.ttl.gz", "graph": "http://sparql.uniprot.org/citationmapping" },
+                    { "cmd": "zcat ${data:TTL_DIR}/citations.ttl.gz", "graph": "http://sparql.uniprot.org/citations" },
+                    { "cmd": "zcat ${data:TTL_DIR}/databases.ttl.gz", "graph": "http://sparql.uniprot.org/databases" },
+                    { "cmd": "zcat ${data:TTL_DIR}/diseases.ttl.gz", "graph": "http://sparql.uniprot.org/diseases" },
+                    { "cmd": "zcat ${data:TTL_DIR}/enzyme-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/enzymes" },
+                    { "cmd": "zcat ${data:TTL_DIR}/enzyme.ttl.gz", "graph": "http://sparql.uniprot.org/enzymes" },
+                    { "cmd": "zcat ${data:TTL_DIR}/go-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/go" },
+                    { "cmd": "zcat ${data:TTL_DIR}/go.ttl.gz", "graph": "http://sparql.uniprot.org/go" },
+                    { "cmd": "zcat ${data:TTL_DIR}/journals.ttl.gz", "graph": "http://sparql.uniprot.org/journal" },
+                    { "cmd": "zcat ${data:TTL_DIR}/keywords-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/keywords" },
+                    { "cmd": "zcat ${data:TTL_DIR}/keywords.ttl.gz", "graph": "http://sparql.uniprot.org/keywords" },
+                    { "cmd": "zcat ${data:TTL_DIR}/locations-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/locations" },
+                    { "cmd": "zcat ${data:TTL_DIR}/locations.ttl.gz", "graph": "http://sparql.uniprot.org/locations" },
+                    { "cmd": "zcat ${data:TTL_DIR}/pathways-hierarchy*.ttl.gz", "graph": "http://sparql.uniprot.org/pathways" },
+                    { "cmd": "zcat ${data:TTL_DIR}/pathways.ttl.gz", "graph": "http://sparql.uniprot.org/pathways" },
+                    { "cmd": "zcat ${data:TTL_DIR}/proteomes.ttl.gz", "graph": "http://sparql.uniprot.org/proteomes" },
+                    { "cmd": "zcat ${data:TTL_DIR}/taxonomy-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/taxonomy" },
+                    { "cmd": "zcat ${data:TTL_DIR}/taxonomy.ttl.gz", "graph": "http://sparql.uniprot.org/taxonomy" },
+                    { "cmd": "zcat ${data:TTL_DIR}/tissues.ttl.gz", "graph": "http://sparql.uniprot.org/tissues" },
+                    { "cmd": "zcat ${data:TTL_DIR}/rhea.ttl.gz", "graph": "https://sparql.rhea-db.org/rhea" },
+                    { "cmd": "zcat ${data:TTL_DIR}/examples_uniprot.ttl.gz", "graph": "http://sparql.uniprot.org/.well-known/sparql-examples" },
+                    { "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" },
+                    { "cmd": "zcat ${data:TTL_DIR}/void.ttl.gz", "graph": "http://rdfs.org/ns/void" }]
+SETTINGS_JSON    = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
+STXXL_MEMORY     = 60G
 [server]
 PORT                        = 7018

qlever/Qleverfiles/Qleverfile.wikidata CHANGED Viewed

@@ -16,8 +16,7 @@ GET_DATA_URL      = https://dumps.wikimedia.org/wikidatawiki/entities
 GET_DATA_CMD      = curl -LRC - -O ${GET_DATA_URL}/latest-all.ttl.bz2 -O ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1 | tee wikidata.download-log.txt && curl -sL ${GET_DATA_URL}/dcatap.rdf | docker run -i --rm -v $$(pwd):/data stain/jena riot --syntax=RDF/XML --output=NT /dev/stdin > dcatap.nt
 DATE_WIKIDATA     = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
 DATE_WIKIPEDIA    = $$(date -r wikipedia-abstracts.nt +%d.%m.%Y || echo "NO_DATE")
-DESCRIPTION       = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA}) + English Wikipeda abstracts (version ${DATE_WIKIPEDIA}, available via schema:description)
-TEXT_DESCRIPTION  = All English and German literals + all sentences from the English Wikipedia (version ${DATE_WIKIPEDIA}), use with FILTER KEYWORDS(...)
+DESCRIPTION       = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA})
 [index]
 INPUT_FILES      = latest-all.ttl.bz2 latest-lexemes.ttl.bz2 dcatap.nt
@@ -26,7 +25,6 @@ MULTI_INPUT_JSON = [{ "cmd": "lbzcat -n 4 latest-all.ttl.bz2", "format": "ttl",
                     { "cmd": "cat dcatap.nt", "format": "nt", "parallel": "false" }]
 SETTINGS_JSON    = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
 STXXL_MEMORY     = 10G
-TEXT_INDEX       = from_text_records
 [server]
 PORT                        = 7001

qlever/commands/add_text_index.py CHANGED Viewed

@@ -64,7 +64,7 @@ class AddTextIndexCommand(QleverCommand):
         # Show the command line.
         self.show(add_text_index_cmd, only_show=args.show)
         if args.show:
-            return False
+            return True
         # When running natively, check if the binary exists and works.
         if args.system == "native":
@@ -74,6 +74,7 @@ class AddTextIndexCommand(QleverCommand):
                 log.error(f"Running \"{args.index_binary}\" failed ({e}), "
                           f"set `--index-binary` to a different binary or "
                           f"use `--container_system`")
+                return False
         # Check if text index files already exist.
         existing_text_index_files = get_existing_index_files(

qlever/commands/cache_stats.py CHANGED Viewed

@@ -47,7 +47,7 @@ class CacheStatsCommand(QleverCommand):
         self.show("\n".join([cache_stats_cmd, cache_settings_cmd]),
                   only_show=args.show)
         if args.show:
-            return False
+            return True
         # Execute them.
         try:

qlever/commands/clear_cache.py CHANGED Viewed

@@ -48,7 +48,7 @@ class ClearCacheCommand(QleverCommand):
                                 f"\"{args.access_token}\"")
         self.show(clear_cache_cmd, only_show=args.show)
         if args.show:
-            return False
+            return True
         # Execute the command.
         try:
@@ -76,5 +76,7 @@ class ClearCacheCommand(QleverCommand):
         # Show cache stats.
         log.info("")
         args.detailed = False
-        CacheStatsCommand().execute(args)
+        if not CacheStatsCommand().execute(args):
+            log.error("Clearing the cache was successful, but showing the "
+                      "cache stats failed {e}")
         return True

qlever/commands/example_queries.py CHANGED Viewed

@@ -21,10 +21,7 @@ class ExampleQueriesCommand(QleverCommand):
     """
     def __init__(self):
-        self.presets = {
-            "virtuoso-wikidata": "https://wikidata.demo.openlinksw.com/sparql",
-            "qlever-wikidata": "https://qlever.cs.uni-freiburg.de/api/wikidata",
-        }
+        pass
     def description(self) -> str:
         return "Show how much of the cache is currently being used"
@@ -41,8 +38,15 @@ class ExampleQueriesCommand(QleverCommand):
         )
         subparser.add_argument(
             "--sparql-endpoint-preset",
-            choices=self.presets.keys(),
-            help="Shortcut for setting the SPARQL endpoint",
+            choices=[
+                "https://qlever.dev/api/wikidata",
+                "https://qlever.dev/api/uniprot",
+                "https://qlever.dev/api/pubchem",
+                "https://qlever.dev/api/osm-planet",
+                "https://wikidata.demo.openlinksw.com/sparql",
+                "https://sparql.uniprot.org/sparql",
+            ],
+            help="SPARQL endpoint from fixed list (to save typing)",
         )
         subparser.add_argument(
             "--get-queries-cmd",
@@ -86,7 +90,7 @@ class ExampleQueriesCommand(QleverCommand):
                 "application/sparql-results+json",
                 "text/turtle",
             ],
-            default="text/tab-separated-values",
+            default="application/sparql-results+json",
             help="Accept header for the SPARQL query",
         )
         subparser.add_argument(
@@ -98,7 +102,7 @@ class ExampleQueriesCommand(QleverCommand):
         subparser.add_argument(
             "--width-query-description",
             type=int,
-            default=40,
+            default=70,
             help="Width for printing the query description",
         )
         subparser.add_argument(
@@ -113,6 +117,32 @@ class ExampleQueriesCommand(QleverCommand):
             default=14,
             help="Width for printing the result size",
         )
+        subparser.add_argument(
+            "--show-query",
+            choices=["always", "never", "on-error"],
+            default="never",
+            help="Show the queries that will be executed (always, never, on error)",
+        )
+        subparser.add_argument(
+            "--show-prefixes",
+            action="store_true",
+            default=False,
+            help="When showing the query, also show the prefixes",
+        )
+    def pretty_print_query(self, query: str, show_prefixes: bool) -> None:
+        remove_prefixes_cmd = " | sed '/^PREFIX /Id'" if not show_prefixes else ""
+        pretty_print_query_cmd = (
+            f"echo {shlex.quote(query)}"
+            f" | docker run -i --rm sparqling/sparql-formatter"
+            f"{remove_prefixes_cmd} | grep -v '^$'"
+        )
+        try:
+            query_pp = run_command(pretty_print_query_cmd, return_output=True)
+            log.info(colored(query_pp.rstrip(), "cyan"))
+        except Exception as e:
+            log.error(f"Failed to pretty-print query: {e}")
+            log.info(colored(query.rstrip(), "cyan"))
     def execute(self, args) -> bool:
         # We can't have both `--remove-offset-and-limit` and `--limit`.
@@ -135,9 +165,8 @@ class ExampleQueriesCommand(QleverCommand):
                 return False
         # Handle shotcuts for SPARQL endpoint.
-        if args.sparql_endpoint_preset in self.presets:
-            args.sparql_endpoint = self.presets[args.sparql_endpoint_preset]
-            args.ui_config = args.sparql_endpoint_preset.split("-")[1]
+        if args.sparql_endpoint_preset:
+            args.sparql_endpoint = args.sparql_endpoint_preset
         # Limit only works with full result.
         if args.limit and args.download_or_count == "count":
@@ -178,7 +207,7 @@ class ExampleQueriesCommand(QleverCommand):
             only_show=args.show,
         )
         if args.show:
-            return False
+            return True
         # Get the example queries.
         try:
@@ -210,8 +239,11 @@ class ExampleQueriesCommand(QleverCommand):
             if args.clear_cache == "yes":
                 args.server_url = sparql_endpoint
                 args.complete = False
+                clear_cache_successful = False
                 with mute_log():
-                    ClearCacheCommand().execute(args)
+                    clear_cache_successful = ClearCacheCommand().execute(args)
+                if not clear_cache_successful:
+                    log.warn("Failed to clear the cache")
             # Remove OFFSET and LIMIT (after the last closing bracket).
             if args.remove_offset_and_limit or args.limit:
@@ -262,6 +294,9 @@ class ExampleQueriesCommand(QleverCommand):
             # A bit of pretty-printing.
             query = re.sub(r"\s+", " ", query)
             query = re.sub(r"\s*\.\s*\}", " }", query)
+            if args.show_query == "always":
+                log.info("")
+                self.pretty_print_query(query, args.show_prefixes)
             # Launch query.
             try:
@@ -282,55 +317,81 @@ class ExampleQueriesCommand(QleverCommand):
                     params={"query": query},
                     result_file=result_file,
                 ).strip()
-                if http_code != "200":
-                    raise Exception(
-                        f"HTTP code {http_code}" f"  {Path(result_file).read_text()}"
-                    )
-                time_seconds = time.time() - start_time
-                error_msg = None
+                if http_code == "200":
+                    time_seconds = time.time() - start_time
+                    error_msg = None
+                else:
+                    error_msg = {
+                        "short": f"HTTP code: {http_code}",
+                        "long": re.sub(r"\s+", " ", Path(result_file).read_text()),
+                    }
             except Exception as e:
                 if args.log_level == "DEBUG":
                     traceback.print_exc()
-                error_msg = re.sub(r"\s+", " ", str(e))
+                error_msg = {
+                    "short": "Exception",
+                    "long": re.sub(r"\s+", " ", str(e)),
+                }
             # Get result size (via the command line, in order to avoid loading
             # a potentially large JSON file into Python, which is slow).
             if error_msg is None:
-                try:
-                    if args.download_or_count == "count":
-                        if args.accept == "text/tab-separated-values":
-                            result_size = run_command(
-                                f"sed 1d {result_file}", return_output=True
-                            )
-                        else:
+                # CASE 0: Rhe result is empty despite a 200 HTTP code.
+                if Path(result_file).stat().st_size == 0:
+                    result_size = 0
+                    error_msg = {
+                        "short": "Empty result",
+                        "long": "curl returned with code 200, "
+                        "but the result is empty",
+                    }
+                # CASE 1: Just counting the size of the result (TSV or JSON).
+                elif args.download_or_count == "count":
+                    if args.accept == "text/tab-separated-values":
+                        result_size = run_command(
+                            f"sed 1d {result_file}", return_output=True
+                        )
+                    else:
+                        try:
                             result_size = run_command(
                                 f'jq -r ".results.bindings[0]'
                                 f" | to_entries[0].value.value"
                                 f' | tonumber" {result_file}',
                                 return_output=True,
                             )
+                        except Exception as e:
+                            error_msg = {
+                                "short": "Malformed JSON",
+                                "long": "curl returned with code 200, "
+                                "but the JSON is malformed: "
+                                + re.sub(r"\s+", " ", str(e)),
+                            }
+                # CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
+                else:
+                    if (
+                        args.accept == "text/tab-separated-values"
+                        or args.accept == "text/csv"
+                    ):
+                        result_size = run_command(
+                            f"sed 1d {result_file} | wc -l", return_output=True
+                        )
+                    elif args.accept == "text/turtle":
+                        result_size = run_command(
+                            f"sed '1d;/^@prefix/d;/^\\s*$/d' " f"{result_file} | wc -l",
+                            return_output=True,
+                        )
                     else:
-                        if (
-                            args.accept == "text/tab-separated-values"
-                            or args.accept == "text/csv"
-                        ):
-                            result_size = run_command(
-                                f"sed 1d {result_file} | wc -l", return_output=True
-                            )
-                        elif args.accept == "text/turtle":
-                            result_size = run_command(
-                                f"sed '1d;/^@prefix/d;/^\\s*$/d' "
-                                f"{result_file} | wc -l",
-                                return_output=True,
-                            )
-                        else:
+                        try:
                             result_size = run_command(
                                 f'jq -r ".results.bindings | length"' f" {result_file}",
                                 return_output=True,
                             )
-                    result_size = int(result_size)
-                except Exception as e:
-                    error_msg = str(e)
+                        except Exception as e:
+                            error_msg = {
+                                "short": "Malformed JSON",
+                                "long": re.sub(r"\s+", " ", str(e)),
+                            }
             # Remove the result file (unless in debug mode).
             if args.log_level != "DEBUG":
@@ -341,6 +402,7 @@ class ExampleQueriesCommand(QleverCommand):
                 description = description[: args.width_query_description - 3]
                 description += "..."
             if error_msg is None:
+                result_size = int(result_size)
                 log.info(
                     f"{description:<{args.width_query_description}}  "
                     f"{time_seconds:6.2f} s  "
@@ -352,16 +414,24 @@ class ExampleQueriesCommand(QleverCommand):
                 num_failed += 1
                 if (
                     args.width_error_message > 0
-                    and len(error_msg) > args.width_error_message
+                    and len(error_msg["long"]) > args.width_error_message
                     and args.log_level != "DEBUG"
+                    and args.show_query != "on-error"
                 ):
-                    error_msg = error_msg[: args.width_error_message - 3]
-                    error_msg += "..."
-                log.error(
+                    error_msg["long"] = (
+                        error_msg["long"][: args.width_error_message - 3] + "..."
+                    )
+                seperator_short_long = "\n" if args.show_query == "on-error" else "  "
+                log.info(
                     f"{description:<{args.width_query_description}}    "
-                    f"failed   "
-                    f"{colored(error_msg, 'red')}"
+                    f"{colored('FAILED   ', 'red')}"
+                    f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
+                    f"{seperator_short_long}"
+                    f"{colored(error_msg['long'], 'red')}"
                 )
+                if args.show_query == "on-error":
+                    self.pretty_print_query(query, args.show_prefixes)
+                    log.info("")
         # Check that each query has a time and a result size, or it failed.
         assert len(result_sizes) == len(query_times)

qlever 0.5.11__py3-none-any.whl → 0.5.15__py3-none-any.whl

Potentially problematic release.

qlever 0.5.11py3-none-any.whl → 0.5.15py3-none-any.whl