PyPI - qlever - Versions diffs - 0.2.5__py3-none-any.whl → 0.5.41__py3-none-any.whl - Mend

qlever 0.2.5py3-none-any.whl → 0.5.41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

qlever/Qleverfiles/Qleverfile.dblp +36 -0
qlever/Qleverfiles/Qleverfile.dblp-plus +33 -0
qlever/Qleverfiles/Qleverfile.dbpedia +30 -0
qlever/Qleverfiles/Qleverfile.default +51 -0
qlever/Qleverfiles/Qleverfile.dnb +40 -0
qlever/Qleverfiles/Qleverfile.fbeasy +29 -0
qlever/Qleverfiles/Qleverfile.freebase +28 -0
qlever/Qleverfiles/Qleverfile.imdb +36 -0
qlever/Qleverfiles/Qleverfile.ohm-planet +41 -0
qlever/Qleverfiles/Qleverfile.olympics +31 -0
qlever/Qleverfiles/Qleverfile.orkg +30 -0
qlever/Qleverfiles/Qleverfile.osm-country +39 -0
qlever/Qleverfiles/Qleverfile.osm-planet +39 -0
qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf +42 -0
qlever/Qleverfiles/Qleverfile.pubchem +131 -0
qlever/Qleverfiles/Qleverfile.scientists +29 -0
qlever/Qleverfiles/Qleverfile.uniprot +74 -0
qlever/Qleverfiles/Qleverfile.vvz +31 -0
qlever/Qleverfiles/Qleverfile.wikidata +42 -0
qlever/Qleverfiles/Qleverfile.wikipathways +40 -0
qlever/Qleverfiles/Qleverfile.yago-4 +33 -0
qlever/__init__.py +44 -1380
qlever/command.py +87 -0
qlever/commands/__init__.py +0 -0
qlever/commands/add_text_index.py +115 -0
qlever/commands/benchmark_queries.py +1019 -0
qlever/commands/cache_stats.py +125 -0
qlever/commands/clear_cache.py +88 -0
qlever/commands/extract_queries.py +120 -0
qlever/commands/get_data.py +48 -0
qlever/commands/index.py +333 -0
qlever/commands/index_stats.py +306 -0
qlever/commands/log.py +66 -0
qlever/commands/materialized_view.py +110 -0
qlever/commands/query.py +142 -0
qlever/commands/rebuild_index.py +176 -0
qlever/commands/reset_updates.py +59 -0
qlever/commands/settings.py +115 -0
qlever/commands/setup_config.py +97 -0
qlever/commands/start.py +336 -0
qlever/commands/status.py +50 -0
qlever/commands/stop.py +90 -0
qlever/commands/system_info.py +130 -0
qlever/commands/ui.py +271 -0
qlever/commands/update.py +90 -0
qlever/commands/update_wikidata.py +1204 -0
qlever/commands/warmup.py +41 -0
qlever/config.py +223 -0
qlever/containerize.py +167 -0
qlever/log.py +55 -0
qlever/qlever_main.py +79 -0
qlever/qleverfile.py +530 -0
qlever/util.py +330 -0
qlever-0.5.41.dist-info/METADATA +127 -0
qlever-0.5.41.dist-info/RECORD +59 -0
{qlever-0.2.5.dist-info → qlever-0.5.41.dist-info}/WHEEL +1 -1
qlever-0.5.41.dist-info/entry_points.txt +2 -0
qlever-0.5.41.dist-info/top_level.txt +1 -0
build/lib/qlever/__init__.py +0 -1383
build/lib/qlever/__main__.py +0 -4
qlever/__main__.py +0 -4
qlever-0.2.5.dist-info/METADATA +0 -277
qlever-0.2.5.dist-info/RECORD +0 -12
qlever-0.2.5.dist-info/entry_points.txt +0 -2
qlever-0.2.5.dist-info/top_level.txt +0 -4
src/qlever/__init__.py +0 -1383
src/qlever/__main__.py +0 -4
{qlever-0.2.5.dist-info → qlever-0.5.41.dist-info/licenses}/LICENSE +0 -0

qlever/Qleverfiles/Qleverfile.pubchem ADDED Viewed

@@ -0,0 +1,131 @@
+# Qleverfile for PubChem, use with the QLever CLI (`pip install qlever`)
+#
+# qlever get-data  # ~2 hours, ~120 GB, ~19 billion triples
+# qlever index     # ~6 hours, ~20 GB RAM, ~350 GB disk space (for the index)
+# qlever start     # a few seconds
+#
+# Measured on an AMD Ryzen 9 7950X with 128 GB RAM, and NVMe SSD (17.12.2024)
+#
+# NOTE 1: `qlever get-data` does not only download the PubChem RDF data, but also
+# a number of ontologies. These are very useful to obtain names for IRIs like
+# `sio:SIO_000008` or `obo:IAO_0000412` (otherwise very hard to understand).
+# The ontologies BAO and NDF-RT are occasionally updated; for latest versions,
+# see the download links at https://bioportal.bioontology.org/ontologies/BAO
+# and https://bioportal.bioontology.org/ontologies/NDF-RT .
+#
+# NOTE 2: The `MULTI_INPUT_JSON` zcats selected files together in one input
+# stream because there are too many files and the command line triggered by
+# `qlever index` would be too long otherwise.
+[data]
+NAME                = pubchem
+GET_DATA_URL        = ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF
+ONTOLOGIES_DIR      = RDF.ontologies
+PUBCHEM_DIR         = RDF.pubchem
+ONTOLOGIES_CSV      = ontologies.csv
+CHECK_REQUIREMENTS  = for CMD in docker parallel; do $$CMD --version >/dev/null 2>&1 || (echo "Requires \"$$CMD\", please install it"; false); done
+GET_DATA_CMD_1      = mkdir -p ${ONTOLOGIES_DIR} && cd ${ONTOLOGIES_DIR} && cat ${ONTOLOGIES_CSV} | parallel --colsep "," 'FILE={2} && URL={3} && ERRFILE=$${FILE%.*}.jena-stderr; echo "Processing $$URL ($$FILE) ..." && curl -sLRo $$FILE $$URL && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$FILE 2> $$ERRFILE | gzip -c > $${FILE%.*}.nt.gz && rm -f $$FILE; if [ -s $$ERRFILE ]; then grep -q "ERROR *riot" $$ERRFILE && echo "riot ERRORs in $$FILE, check $$ERRFILE"; else rm $$ERRFILE; fi'
+GET_DATA_CMD_2      = mkdir -p ${PUBCHEM_DIR} && wget -r -nv -nH --cut-dirs=2 --no-parent -P ${PUBCHEM_DIR} ${GET_DATA_URL}
+GET_DATA_CMD        = ${CHECK_REQUIREMENTS} && ${GET_DATA_CMD_1} 2>&1 | tee pubchem.get-data-log.txt; ${GET_DATA_CMD_2} 2>&1 | tee -a pubchem.get-data-log.txt
+VERSION             = $$(date -r void.ttl +%d.%m.%Y || echo "NO_DATE")
+DESCRIPTION         = PubChem, RDF TTL from ${GET_DATA_URL} + associated ontologies (bao, bfo, biopax-level3, chebi, cheminf, cito, dublin_core_terms, fabio, go, iao, ncit, obi, pr, ro, sio, skos, so, uo), version ${data:VERSION}
+MAKE_ONTOLOGIES_CSV = $$(mkdir -p ${ONTOLOGIES_DIR} && echo "BAO - BioAssay Ontology,bao.owl,https://data.bioontology.org/ontologies/BAO/submissions/56/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nBFO - Basic Formal Ontology,bfo.owl,http://purl.obolibrary.org/obo/bfo.owl\nBioPAX - biological pathway data,bp.owl,http://www.biopax.org/release/biopax-level3.owl\nCHEMINF - Chemical Information Ontology,cheminf.owl,http://purl.obolibrary.org/obo/cheminf.owl\nChEBI - Chemical Entities of Biological Interest,chebi.owl,http://purl.obolibrary.org/obo/chebi.owl\nCiTO,cito.nt,http://purl.org/spar/cito.nt\nDCMI Terms,dcterms.nt,https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.nt\nFaBiO,fabio.nt,http://purl.org/spar/fabio.nt\nGO - Gene Ontology,go.owl,http://purl.obolibrary.org/obo/go.owl\nIAO - Information Artifact Ontology,iao.owl,http://purl.obolibrary.org/obo/iao.owl\nNCIt,ncit.owl,http://purl.obolibrary.org/obo/ncit.owl\nNDF-RT,ndfrt.owl,https://data.bioontology.org/ontologies/NDF-RT/submissions/1/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nOBI - Ontology for Biomedical Investigations,obi.owl,http://purl.obolibrary.org/obo/obi.owl\nOWL,owl.ttl,http://www.w3.org/2002/07/owl.ttl\nPDBo,pdbo.owl,http://rdf.wwpdb.org/schema/pdbx-v40.owl\nPR - PRotein Ontology (PRO),pr.owl,http://purl.obolibrary.org/obo/pr.owl\nRDF Schema,rdfs.ttl,https://www.w3.org/2000/01/rdf-schema.ttl\nRDF,rdf.ttl,http://www.w3.org/1999/02/22-rdf-syntax-ns.ttl\nRO - Relation Ontology,ro.owl,http://purl.obolibrary.org/obo/ro.owl\nSIO - Semanticscience Integrated Ontology,sio.owl,http://semanticscience.org/ontology/sio.owl\nSKOS,skos.rdf,http://www.w3.org/TR/skos-reference/skos.rdf\nSO - Sequence types and features ontology,so.owl,http://purl.obolibrary.org/obo/so.owl\nUO - Units of measurement ontology,uo.owl,http://purl.obolibrary.org/obo/uo.owl" > ${ONTOLOGIES_DIR}/${ONTOLOGIES_CSV})
+[index]
+INPUT_FILES      = ${data:ONTOLOGIES_DIR}/*.nt.gz ${data:PUBCHEM_DIR}/*/*.ttl.gz ${data:PUBCHEM_DIR}/*/*/*.ttl.gz
+BASE_URL         = http://rdf.ncbi.nlm.nih.gov/pubchem
+MULTI_INPUT_JSON = [{ "cmd": "zcat ${data:ONTOLOGIES_DIR}/*.nt.gz", "graph": "${BASE_URL}/ruleset"},
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/anatomy", "for-each": "${data:PUBCHEM_DIR}/anatomy/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/author", "for-each": "${data:PUBCHEM_DIR}/author/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/bioassay", "for-each": "${data:PUBCHEM_DIR}/bioassay/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/book", "for-each": "${data:PUBCHEM_DIR}/book/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/cell", "for-each": "${data:PUBCHEM_DIR}/cell/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*0.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*1.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*2.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*3.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*4.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*5.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*6.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*7.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*8.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/compound/general/*9.ttl.gz", "graph": "${BASE_URL}/compound" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/compound", "for-each": "${data:PUBCHEM_DIR}/compound/general/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/concept", "for-each": "${data:PUBCHEM_DIR}/concept/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/conserveddomain", "for-each": "${data:PUBCHEM_DIR}/conserveddomain/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/cooccurrence", "for-each": "${data:PUBCHEM_DIR}/cooccurrence/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*0.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*1.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*2.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*3.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*4.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*5.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*6.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*7.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*8.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/descriptor/compound/*9.ttl.gz", "graph": "${BASE_URL}/descriptor" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/descriptor", "for-each": "${data:PUBCHEM_DIR}/descriptor/compound/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/descriptor", "for-each": "${data:PUBCHEM_DIR}/descriptor/substance/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/disease", "for-each": "${data:PUBCHEM_DIR}/disease/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/endpoint", "for-each": "${data:PUBCHEM_DIR}/endpoint/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/gene", "for-each": "${data:PUBCHEM_DIR}/gene/*.ttl.gz"},
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/grant", "for-each": "${data:PUBCHEM_DIR}/grant/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/inchikey", "for-each": "${data:PUBCHEM_DIR}/inchikey/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/journal", "for-each": "${data:PUBCHEM_DIR}/journal/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/measuregroup", "for-each": "${data:PUBCHEM_DIR}/measuregroup/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/organization", "for-each": "${data:PUBCHEM_DIR}/organization/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*0.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*1.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*2.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*3.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*4.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*5.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*6.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*7.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*8.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/*9.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/cpc/*.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/patent/ipc/*.ttl.gz", "graph": "${BASE_URL}/patent" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/patent", "for-each": "${data:PUBCHEM_DIR}/patent/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/pathway", "for-each": "${data:PUBCHEM_DIR}/pathway/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/protein", "for-each": "${data:PUBCHEM_DIR}/protein/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*0.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*1.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*2.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*3.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*4.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*5.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*6.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*7.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*8.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/reference/*9.ttl.gz", "graph": "${BASE_URL}/reference" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/reference", "for-each": "${data:PUBCHEM_DIR}/reference/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/source", "for-each": "${data:PUBCHEM_DIR}/source/*.ttl.gz" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*0.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*1.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*2.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*3.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*4.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*5.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*6.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*7.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*8.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat ${data:PUBCHEM_DIR}/substance/*9.ttl.gz", "graph": "${BASE_URL}/substance" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/substance", "for-each": "${data:PUBCHEM_DIR}/substance/*[!0-9].ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/synonym", "for-each": "${data:PUBCHEM_DIR}/synonym/*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "${BASE_URL}/taxonomy", "for-each": "${data:PUBCHEM_DIR}/taxonomy/*.ttl.gz" }]
+SETTINGS_JSON   = { "languages-internal": [], "prefixes-external": [""], "num-triples-per-batch": 10000000 }
+STXXL_MEMORY    = 20G
+[server]
+PORT               = 7023
+ACCESS_TOKEN       = ${data:NAME}
+MEMORY_FOR_QUERIES = 20G
+TIMEOUT            = 600s
+[runtime]
+SYSTEM = docker
+IMAGE  = docker.io/adfreiburg/qlever:latest
+[ui]
+UI_CONFIG = pubchem

qlever/Qleverfiles/Qleverfile.scientists ADDED Viewed

@@ -0,0 +1,29 @@
+# Qleverfile for "scientists", use with qlever script (pip install qlever)
+#
+# qlever get-data  # get "scientists" dataset (370M triples, 2.2 M texts records)
+# qlever index     # build index, including text index (takes ~20 seconds)
+# qlever start     # start the server (instant)
+[data]
+NAME             = scientists
+GET_DATA_CMD     = curl -LRC - -O https://github.com/ad-freiburg/qlever/raw/master/e2e/scientist-collection.zip && unzip -j scientist-collection.zip && rm -f scientist-collection.zip
+DESCRIPTION      = Test collection from https://github.com/ad-freiburg/qlever/tree/master/e2e (triples and text about scientists)
+TEXT_DESCRIPTION = Text from all literals and Wikipedia articles on scientists (use ql:contains-entity and ql:contains-word)
+[index]
+INPUT_FILES     = ${data:NAME}.nt
+CAT_INPUT_FILES = cat ${INPUT_FILES}
+SETTINGS_JSON   = { "ascii-prefixes-only": true, "num-triples-per-batch": 100000 }
+TEXT_INDEX      = from_text_records_and_literals
+[server]
+PORT               = 7020
+ACCESS_TOKEN       = ${data:NAME}
+MEMORY_FOR_QUERIES = 5G
+[runtime]
+SYSTEM = docker
+IMAGE  = docker.io/adfreiburg/qlever:latest
+[ui]
+UI_CONFIG = scientists

qlever/Qleverfiles/Qleverfile.uniprot ADDED Viewed

@@ -0,0 +1,74 @@
+# Qleverfile for UniProt, use with https://github.com/ad-freiburg/qlever-control
+#
+# qlever get-data  # takes ~ 30 hours and ~ 1.6 TB of disk (for the TTL files)
+# qlever index     # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 9950X)
+# qlever start     # starts the server (takes a few seconds)
+#
+# Install packages: sudo apt install -y libxml2-utils raptor2-utils parallel xz-utils wget
+# Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
+#
+# Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
+# during build, ~ 3 TB after build).
+[data]
+NAME             = uniprot
+DATE             = 2025-06-18
+RDFXML_DIR       = rdf.${DATE}
+TTL_DIR          = ttl.${DATE}
+UNIPROT_URL      = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
+RHEA_URL         = https://ftp.expasy.org/databases/rhea/rdf
+EXAMPLES_URL     = https://github.com/sib-swiss/sparql-examples
+GET_EXAMPLES_CMD = mkdir -p ${TTL_DIR} && git clone ${EXAMPLES_URL} && (cd sparql-examples && ./convertToOneTurtle.sh -p uniprot && gzip examples_uniprot.ttl && mv -f examples_uniprot.ttl.gz ../${TTL_DIR} && cd .. && rm -rf sparql-examples)
+GET_RDFXML_CMD   = mkdir -p ${RDFXML_DIR} && (echo "${RHEA_URL}/chebi.owl.gz"; echo "${RHEA_URL}/rhea.rdf.gz"; curl -s ${UNIPROT_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" -) | while read URL; do wget --no-verbose -P ${RDFXML_DIR} $$URL 2>&1 | tee -a uniprot.download-log; done
+RDFXML2TTL_CMD   = mkdir -p ${TTL_DIR} && for RDFXML in ${RDFXML_DIR}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=ttl -q 2> ${TTL_DIR}/$$(basename $$RDFXML).stderr | gzip -c > ${TTL_DIR}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/ttl.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
+GET_DATA_CMD     = date > ${NAME}.get-data.begin-date && ${GET_EXAMPLES_CMD} && ${GET_RDFXML_CMD} && ${RDFXML2TTL_CMD} && date > ${NAME}.get-data.end-date
+DESCRIPTION      = UniProt, RDF XML from ${UNIPROT_URL} + additional data from ${RHEA_URL} and ${EXAMPLES_URL}, version ${DATE}
+[index]
+INPUT_FILES      = ${data:TTL_DIR}/*.ttl.gz
+MULTI_INPUT_JSON = [{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniprot", "for-each": "${data:TTL_DIR}/uniprotkb_reviewed_*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniprot", "for-each": "${data:TTL_DIR}/uniprotkb_unreviewed_*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniparc", "for-each": "${data:TTL_DIR}/uniparc_*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/uniref", "for-each": "${data:TTL_DIR}/uniref*.ttl.gz" },
+                    { "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/obsolete", "for-each": "${data:TTL_DIR}/uniprotkb_obsolete_*.ttl.gz" },
+                    { "cmd": "zcat ${data:TTL_DIR}/chebi.ttl.gz", "graph": "http://sparql.uniprot.org/chebi" },
+                    { "cmd": "zcat ${data:TTL_DIR}/citation_mapping.ttl.gz", "graph": "http://sparql.uniprot.org/citationmapping" },
+                    { "cmd": "zcat ${data:TTL_DIR}/citations.ttl.gz", "graph": "http://sparql.uniprot.org/citations" },
+                    { "cmd": "zcat ${data:TTL_DIR}/databases.ttl.gz", "graph": "http://sparql.uniprot.org/databases" },
+                    { "cmd": "zcat ${data:TTL_DIR}/diseases.ttl.gz", "graph": "http://sparql.uniprot.org/diseases" },
+                    { "cmd": "zcat ${data:TTL_DIR}/enzyme-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/enzymes" },
+                    { "cmd": "zcat ${data:TTL_DIR}/enzyme.ttl.gz", "graph": "http://sparql.uniprot.org/enzymes" },
+                    { "cmd": "zcat ${data:TTL_DIR}/go-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/go" },
+                    { "cmd": "zcat ${data:TTL_DIR}/go.ttl.gz", "graph": "http://sparql.uniprot.org/go" },
+                    { "cmd": "zcat ${data:TTL_DIR}/journals.ttl.gz", "graph": "http://sparql.uniprot.org/journal" },
+                    { "cmd": "zcat ${data:TTL_DIR}/keywords-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/keywords" },
+                    { "cmd": "zcat ${data:TTL_DIR}/keywords.ttl.gz", "graph": "http://sparql.uniprot.org/keywords" },
+                    { "cmd": "zcat ${data:TTL_DIR}/locations-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/locations" },
+                    { "cmd": "zcat ${data:TTL_DIR}/locations.ttl.gz", "graph": "http://sparql.uniprot.org/locations" },
+                    { "cmd": "zcat ${data:TTL_DIR}/pathways-hierarchy*.ttl.gz", "graph": "http://sparql.uniprot.org/pathways" },
+                    { "cmd": "zcat ${data:TTL_DIR}/pathways.ttl.gz", "graph": "http://sparql.uniprot.org/pathways" },
+                    { "cmd": "zcat ${data:TTL_DIR}/proteomes.ttl.gz", "graph": "http://sparql.uniprot.org/proteomes" },
+                    { "cmd": "zcat ${data:TTL_DIR}/taxonomy-hierarchy.ttl.gz", "graph": "http://sparql.uniprot.org/taxonomy" },
+                    { "cmd": "zcat ${data:TTL_DIR}/taxonomy.ttl.gz", "graph": "http://sparql.uniprot.org/taxonomy" },
+                    { "cmd": "zcat ${data:TTL_DIR}/tissues.ttl.gz", "graph": "http://sparql.uniprot.org/tissues" },
+                    { "cmd": "zcat ${data:TTL_DIR}/rhea.ttl.gz", "graph": "https://sparql.rhea-db.org/rhea" },
+                    { "cmd": "zcat ${data:TTL_DIR}/examples_uniprot.ttl.gz", "graph": "http://sparql.uniprot.org/.well-known/sparql-examples" },
+                    { "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" }]
+SETTINGS_JSON    = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
+STXXL_MEMORY     = 80G
+ULIMIT           = 50000
+[server]
+PORT                        = 7018
+ACCESS_TOKEN                = ${data:NAME}
+MEMORY_FOR_QUERIES          = 20G
+CACHE_MAX_SIZE              = 10G
+CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
+TIMEOUT	                    = 300s
+[runtime]
+SYSTEM = docker
+IMAGE  = docker.io/adfreiburg/qlever:latest
+[ui]
+UI_CONFIG = uniprot

qlever/Qleverfiles/Qleverfile.vvz ADDED Viewed

@@ -0,0 +1,31 @@
+# Qleverfile for VVZ, use with https://github.com/ad-freiburg/qlever-control
+#
+# qlever get-data  # this requires a separate internal tool
+# qlever index     # builds the index (takes a few seconds)
+# qlever start     # starts the server (takes a few seconds)
+#
+# Also builds a text index for fast kewyword search in literals.
+[data]
+NAME             = vvz
+GET_DATA_CMD     = echo "This requires a separate tool"
+DESCRIPTION      = VVZ Uni Freiburg, selected faculties
+TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
+[index]
+INPUT_FILES     = vvz.ttl
+CAT_INPUT_FILES = cat ${INPUT_FILES}
+SETTINGS_JSON   = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
+TEXT_INDEX      = from_literals
+[server]
+PORT               = 7041
+ACCESS_TOKEN       = ${data:NAME}
+MEMORY_FOR_QUERIES = 10G
+[runtime]
+SYSTEM = docker
+IMAGE  = docker.io/adfreiburg/qlever:latest
+[ui]
+UI_CONFIG = vvz

qlever/Qleverfiles/Qleverfile.wikidata ADDED Viewed

@@ -0,0 +1,42 @@
+# Qleverfile for Wikidata, use with the QLever CLI (`pip install qlever`)
+#
+# qlever get-data  # ~7 hours, ~110 GB (compressed), ~20 billion triples
+# qlever index     # ~5 hours, ~20 GB RAM, ~500 GB index size on disk
+# qlever start     # a few seconds, adjust MEMORY_FOR_QUERIES as needed
+#
+# Adding a text index takes an additional ~2 hours and ~50 GB of disk space
+#
+# Measured on an AMD Ryzen 9 5950X with 128 GB RAM, and NVMe SSD (18.10.2024)
+[DEFAULT]
+NAME = wikidata
+[data]
+GET_DATA_URL      = https://dumps.wikimedia.org/wikidatawiki/entities
+GET_DATA_CMD      = curl -LRC - -O ${GET_DATA_URL}/latest-all.ttl.bz2 -O ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1 | tee wikidata.download-log.txt && curl -sL ${GET_DATA_URL}/dcatap.rdf | docker run -i --rm -v $$(pwd):/data stain/jena riot --syntax=RDF/XML --output=NT /dev/stdin > dcatap.nt
+DATE_WIKIDATA     = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
+DATE_WIKIPEDIA    = $$(date -r wikipedia-abstracts.nt +%d.%m.%Y || echo "NO_DATE")
+DESCRIPTION       = Complete Wikidata, from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2), version ${DATE_WIKIDATA}
+[index]
+INPUT_FILES      = latest-all.ttl.bz2 latest-lexemes.ttl.bz2 dcatap.nt
+MULTI_INPUT_JSON = [{ "cmd": "lbzcat -n 4 latest-all.ttl.bz2", "format": "ttl", "parallel": "true" },
+                    { "cmd": "lbzcat -n 1 latest-lexemes.ttl.bz2", "format": "ttl", "parallel": "false" },
+                    { "cmd": "cat dcatap.nt", "format": "nt", "parallel": "false" }]
+SETTINGS_JSON    = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
+STXXL_MEMORY     = 10G
+[server]
+PORT                        = 7001
+ACCESS_TOKEN                = ${data:NAME}
+MEMORY_FOR_QUERIES          = 20G
+CACHE_MAX_SIZE              = 15G
+CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
+TIMEOUT                     = 600s
+[runtime]
+SYSTEM = docker
+IMAGE  = adfreiburg/qlever
+[ui]
+UI_CONFIG = wikidata

qlever/Qleverfiles/Qleverfile.wikipathways ADDED Viewed

@@ -0,0 +1,40 @@
+# Qleverfile for WikiPathways, use with https://github.com/ad-freiburg/qlever-control
+#
+# qlever get-data  # takes ~3 seconds, generates TTL of size ~600 MB
+# qlever index     # takes ~20 seconds and little RAM (on an AMD Ryzen 9 5900X)
+# qlever start     # instant
+#
+# Limitations: does not include the ontologies (WP, GPML, ChEBI, PW, CLO, ...) yet
+[data]
+NAME             = wikipathways
+RELEASE          = current
+GET_DATA_URL     = https://data.wikipathways.org/${RELEASE}/rdf
+GET_DATA_CMD     = wget -O wikipathways-rdf-void.ttl ${GET_DATA_URL}/wikipathways-rdf-void.ttl && \
+                    wget ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-wp.zip && \
+                      unzip -qq -c wikipathways-${RELEASE}-rdf-wp.zip -x wp/wpOntology.ttl > wikipathways-rdf-wp.ttl && \
+                    wget ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-gpml.zip &&
+                      unzip -qq -c wikipathways-${RELEASE}-rdf-gpml.zip -x gpml/gpmlOntology.ttl > wikipathways-rdf-gpml.ttl && \
+                    wget ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-authors.zip && \
+                      unzip -qq -c wikipathways-${RELEASE}-rdf-authors.zip > wikipathways-rdf-authors.ttl && \
+                    cat wikipathways-rdf-*.ttl | grep ^@prefix | tr -s ' ' | sort -u > ${NAME}.prefix-definitions
+DESCRIPTION      = WikiPathways RDF, from ${GET_DATA_URL}
+TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
+[index]
+INPUT_FILES     = ${data:NAME}.prefix-definitions wikipathways-rdf-wp.ttl wikipathways-rdf-gpml.ttl wikipathways-rdf-void.ttl wikipathways-rdf-authors.ttl
+CAT_INPUT_FILES = cat ${INPUT_FILES}
+SETTINGS_JSON   = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
+TEXT_INDEX      = from_literals
+[server]
+PORT               = 7040
+ACCESS_TOKEN       = ${data:NAME}
+MEMORY_FOR_QUERIES = 5G
+[runtime]
+SYSTEM = docker
+IMAGE  = docker.io/adfreiburg/qlever:latest
+[ui]
+UI_CONFIG = wikipathways

qlever/Qleverfiles/Qleverfile.yago-4 ADDED Viewed

@@ -0,0 +1,33 @@
+# Qleverfile for YAGO 4, use with https://github.com/ad-freiburg/qlever-control
+#
+# qlever get-data  # downloads 8 nt.gz file of size ~60 GB (as of 12.03.2020)
+# qlever index     # takes ~4 hours and ~10 GB RAM (on an AMD Ryzen 9 5900X)
+# qlever start     # starts the server
+# NOTE concerning GET_DATA_CMD: The triples from wd-annotated-facts are
+# contained in wd-facts. The "full types" are the YAGO types, the "simple
+# types" are the schema.org types. They don't interfere with each other because
+# they have distinct prefixes.
+[data]
+NAME         = yago-4
+GET_DATA_CMD = curl --location --continue-at - --remote-name-all https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-class.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-facts.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-full-types.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-labels.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-sameAs.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-schema.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-shapes.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-simple-types.nt.gz
+DESCRIPTION  = "Full dump from https://yago-knowledge.org/downloads/yago-4, version 12.03.2020"
+[index]
+INPUT_FILES     = yago-wd-*.nt.gz
+CAT_INPUT_FILES = zcat ${INPUT_FILES}
+SETTINGS_JSON   = { "languages-internal": ["en"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
+STXXL_MEMORY    = 10G
+[server]
+PORT               = 9004
+ACCESS_TOKEN       = ${data:NAME}
+MEMORY_FOR_QUERIES = 30G
+[runtime]
+SYSTEM = docker
+IMAGE  = docker.io/adfreiburg/qlever:latest
+[ui]
+UI_CONFIG = yago-4

qlever 0.2.5__py3-none-any.whl → 0.5.41__py3-none-any.whl

qlever 0.2.5py3-none-any.whl → 0.5.41py3-none-any.whl