PyPI - qlever - Versions diffs - 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl - Mend

qlever 0.5.4py3-none-any.whl → 0.5.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qlever might be problematic. Click here for more details.

Files changed (12) hide show

qlever/Qleverfiles/Qleverfile.pubchem +29 -40
qlever/Qleverfiles/Qleverfile.wikimedia-commons +37 -0
qlever/commands/example_queries.py +75 -28
qlever/commands/index.py +2 -2
qlever/qleverfile.py +5 -1
qlever/util.py +16 -2
{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/METADATA +9 -1
{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/RECORD +12 -11
{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/WHEEL +1 -1
{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/LICENSE +0 -0
{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/entry_points.txt +0 -0
{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/top_level.txt +0 -0

qlever/Qleverfiles/Qleverfile.pubchem CHANGED Viewed

@@ -1,60 +1,49 @@
 # Qleverfile for PubChem, use with https://github.com/ad-freiburg/qlever-control
 #
-# qlever get-data  # downloads .gz files of total size 114 GB; see NOTES 2, 3, 4
-# qlever index     # takes ~5 hours and ~20 GB RAM on an AMD Ryzen 9 5900X
-# qlever start     # starts the server (a few seconds)
+# Resource requirements (as of 18.08.2024, on an AMD Ryzen 9 5900X):
 #
-# IMPORTANT NOTES:
+# qlever get-data  # ~2 hours, ~150 GB, ~19 billion triples
+# qlever index     # ~7 hours, ~20 GB RAM, ~400 GB disk space
+# qlever start     # a few seconds
 #
-# NOTE 1: The SPARQL endpoint at https://qlever.cs.uni-freiburg.de/pubchem also
-# contains data from the following ontologies, which are very useful for
-# resolving names of IRIs like `sio:SIO_000008` or `obo:IAO_0000412`, but which
-# are not part of the PubChem RDF data. For the corresponding URLs, see
-# https://github.com/ad-freiburg/qlever/issues/711#issuecomment-1200479401 .
+# NOTE 1: `get-data` does not only download the PubChem RDF data, but also
+# a number of ontologies. These are very useful to obtain names for IRIs like
+# `sio:SIO_000008` or `obo:IAO_0000412` (otherwise very hard to understand).
+# The ontologies BAO and NDF-RT are infrequently updated, for latest versions,
+# see the download links at https://bioportal.bioontology.org/ontologies/BAO
+# and https://bioportal.bioontology.org/ontologies/NDF-RT .
 #
-# bao bfo biopax-level3 chebi cheminf cito dublin_core_terms fabio go iao ncit
-# obi pr ro sio skos so uo
-#
-# NOTE 2: The robots.txt file from https://ftp.ncbi.nlm.nih.gov currently
-# disallows downloading the PubChem RDF data using `wget --recursive` as in the
-# GET_DATA_CMD below. As a workaround, you can write a simple Python script
-# (using `BeautifulSoup` and `urllib.parse`) to scrape the URLs from the HTML
-# pages and download the files individually. This was done for the latest
-# version of https://qlever.cs.uni-freiburg.de/pubchem .
-#
-# NOTE 3: Many of the TTL files have generic prefix definitions in the middle
+# NOTE 2: Many of the TTL files have generic prefix definitions in the middle
 # of the file, like @prefix ns23: <http://identifiers.org/biocyc/ARACYC:> .
 # See https://github.com/ad-freiburg/qlever/issues/711#issuecomment-1197113953
-# This is allowed by the standard, but VERY unusual. For use with QLever,
-# convert the TTL files to NT before indexing, see GET_DATA_CMD below.
-#
-# NOTE 4: Many of the files (TTL as well as NT) contain invalid IRIs because
-# spaces and braces are not properly escaped. Here is a simple awk-based script
-# to percent-encode spaces and braces in all IRIs in the NT files:
+# This is allowed by the standard, but unusual. For use with QLever, we
+# therefore convert the TTL files to NT when downloading them.
 #
-# for NTGZ in nt.${DATE}/*.nt.gz; do echo "zcat $NTGZ | sed 's/> />\t/1; s/> />\t/1; s/ \.\$/\t./' | awk 'BEGIN{FS=OFS=\"\t\"} {for (i = 1; i <= 3; i++) if (\$i ~ /^<.*>\$/) { gsub(/ /, \"%20\", \$i); gsub(/\[/, \"%5B\", \$i); gsub(/\]/, \"%5D\", \$i); gsub(/{/, \"%7B\", \$i); gsub(/}/, \"%7D\", \$i); } print }' | sed 's/\t/ /g' | gzip -c > nt.${DATE}.FIXED/$(basename $NTGZ)"; done > fix-nt.commands.txt
-# cat fix-nt.commands.txt | parallel
-[DEFAULT]
-NAME = pubchem
-DATE = 2024-02-03
+# NOTE 3: The PubChem data contains several invalid IRIs, in particular,
+# containing spaces. The previous version of this Qleverfile used a combination
+# of `sed` and `awk` to fix this. In the meantime, QLever's default is to warn
+# about such IRIs while indexing, but accept them anyway.
 [data]
-GET_DATA_URL      = ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF
-MAKE_GET_DATA_CMD = curl -s ${GET_DATA_URL}/void.ttl | grep -oP '${GET_DATA_URL}/.*?\.ttl\.gz' | grep -v "nbr[23]d" | while read URL; do echo "echo \"Processing $$URL ...\"; curl --silent --remote-time --output ttl.${DATE}/$$(basename $$URL) $$URL && docker run --rm -v $$(pwd)/ttl.${DATE}:/data stain/jena turtle --output=NT /data/$$(basename $$URL) | sed 's/> />\t/1; s/> />\t/1; s/ \.\$$/\t./' | awk 'BEGIN{FS=OFS=\"\t\"} {for (i = 1; i <= 3; i++) if (\$$i ~ /^<.*>\$$/) { gsub(/ /, \"%20\", \$$i); gsub(/\[/, \"%5B\", \$$i); gsub(/\]/, \"%5D\", \$$i); gsub(/{/, \"%7B\", \$$i); gsub(/}/, \"%7D\", \$$i); } print }' | sed 's/\t/ /g' | gzip -c > nt.${DATE}/$$(basename -s .ttl.gz $$URL).nt.gz"; done > pubchem.get-data-cmds.txt
-GET_DATA_CMD      = mkdir -p ttl.${DATE} && mkdir -p nt.${DATE} && ${MAKE_GET_DATA_CMD} && cat pubchem.get-data-cmds.txt | parallel --line-buffer
-DESCRIPTION       = PubChem RDF from ${GET_DATA_URL}, version ${DATE} (all folders except nbr2d and nbr3d)
+NAME                = pubchem
+GET_DATA_URL        = ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF
+CHECK_REQUIREMENTS  = for CMD in docker parallel; do $$CMD --version >/dev/null 2>&1 || (echo "Requires \"$$CMD\", please install it"; false); done
+MAKE_GET_DATA_CMD_1 = DIR=DATA.ontologies && mkdir -p $$DIR && cat $$DIR/ontologies.csv | while IFS=',' read -r DESC FILE URL; do ERRFILE=$${FILE%.*}.jena-stderr; echo "echo \"Processing $$URL ($$FILE) ...\" && curl -sLRo $$DIR/$$FILE \"$$URL\" && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$DIR/$$FILE 2> $$DIR/$$ERRFILE | gzip -c > $$DIR/$${FILE%.*}.nt.gz && rm -f $$DIR/$$FILE && if [ ! -s $$DIR/$$ERRFILE ]; then rm -f $$DIR/$$ERRFILE; fi || echo \"ERROR processing $$URL ($$FILE)\""; done > pubchem.get-data-cmds.txt
+MAKE_GET_DATA_CMD_2 = DIR=DATA.pubchem && mkdir -p $$DIR && curl -LRO ${GET_DATA_URL}/void.ttl && grep -oP '${GET_DATA_URL}/.*?\.ttl\.gz' void.ttl | while read URL; do FILE=$$(basename $$URL); echo "echo \"Processing $$URL ...\" && curl -sLRo $$DIR/$$FILE \"$$URL\" && docker run -i --rm -v $$(pwd):/data stain/jena turtle --output=NT /data/$$DIR/$$FILE | gzip -c > $$DIR/$${FILE%%.*}.nt.gz && rm -f $$DIR/$$FILE || echo \"ERROR processing $$URL\""; done >> pubchem.get-data-cmds.txt
+GET_DATA_CMD        = ${CHECK_REQUIREMENTS} && ${MAKE_GET_DATA_CMD_1} && ${MAKE_GET_DATA_CMD_2} && cat pubchem.get-data-cmds.txt | parallel --line-buffer 2>&1 | tee pubchem.get-data-log.txt
+VERSION             = $$(date -r void.ttl +%d.%m.%Y || echo "NO_DATE")
+DESCRIPTION         = PubChem RDF from ${GET_DATA_URL} (version ${VERSION}) + associated ontologies (bao, bfo, biopax-level3, chebi, cheminf, cito, dublin_core_terms, fabio, go, iao, ncit, obi, pr, ro, sio, skos, so, uo)
+MAKE_ONTOLOGIES_CSV = $$(mkdir -p DATA.ontologies && echo "BAO - BioAssay Ontology,bao.owl,https://data.bioontology.org/ontologies/BAO/submissions/56/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nBFO - Basic Formal Ontology,bfo.owl,http://purl.obolibrary.org/obo/bfo.owl\n BioPAX - biological pathway data,bp.owl,http://www.biopax.org/release/biopax-level3.owl\n CHEMINF - Chemical Information Ontology,cheminf.owl,http://purl.obolibrary.org/obo/cheminf.owl\n ChEBI - Chemical Entities of Biological Interest,chebi.owl,http://purl.obolibrary.org/obo/chebi.owl\n CiTO,cito.nt,http://purl.org/spar/cito.nt\n DCMI Terms,dcterms.nt,https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.nt\n FaBiO,fabio.nt,http://purl.org/spar/fabio.nt\n GO - Gene Ontology,go.owl,http://purl.obolibrary.org/obo/go.owl\n IAO - Information Artifact Ontology,iao.owl,http://purl.obolibrary.org/obo/iao.owl\n NCIt,ncit.owl,http://purl.obolibrary.org/obo/ncit.owl\n NDF-RT,ndfrt.owl,https://data.bioontology.org/ontologies/NDF-RT/submissions/1/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\n OBI - Ontology for Biomedical Investigations,obi.owl,http://purl.obolibrary.org/obo/obi.owl\n OWL,owl.ttl,http://www.w3.org/2002/07/owl.ttl\n PDBo,pdbo.owl,http://rdf.wwpdb.org/schema/pdbx-v40.owl\n PR - PRotein Ontology (PRO),pr.owl,http://purl.obolibrary.org/obo/pr.owl\n RDF Schema,rdfs.ttl,https://www.w3.org/2000/01/rdf-schema.ttl\n RDF,rdf.ttl,http://www.w3.org/1999/02/22-rdf-syntax-ns.ttl\n RO - Relation Ontology,ro.owl,http://purl.obolibrary.org/obo/ro.owl\n SIO - Semanticscience Integrated Ontology,sio.owl,http://semanticscience.org/ontology/sio.owl\n SKOS,skos.rdf,http://www.w3.org/TR/skos-reference/skos.rdf\n SO - Sequence types and features ontology,so.owl,http://purl.obolibrary.org/obo/so.owl\n UO - Units of measurement ontology,uo.owl,http://purl.obolibrary.org/obo/uo.owl" > DATA.ontologies/ontologies.csv)
 [index]
-INPUT_FILES     = pubchem.additional-ontologies.nt.gz nt.${DATE}/*.nt.gz
+INPUT_FILES     = DATA.ontologies/*.nt.gz DATA.pubchem/*.nt.gz
 CAT_INPUT_FILES = zcat ${INPUT_FILES}
-SETTINGS_JSON   = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
+SETTINGS_JSON   = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
 STXXL_MEMORY    = 10G
 [server]
 PORT               = 7023
-ACCESS_TOKEN       = ${NAME}_310129823
+ACCESS_TOKEN       = ${data:NAME}
 MEMORY_FOR_QUERIES = 20G
 TIMEOUT            = 120s

qlever/Qleverfiles/Qleverfile.wikimedia-commons ADDED Viewed

@@ -0,0 +1,37 @@
+# Qleverfile for Wikimedia Commons, TODO: add to https://github.com/ad-freiburg/qlever-control
+#
+# qlever get-data  # takes ~3 hours to download .bz2 file of size ~40 GB
+# qlever index     # takes ~2 hours and ~40 GB RAM (on an AMD Ryzen 9 5900X)
+# qlever start     # starts the server (takes around 15 seconds)
+[data]
+NAME              = wikimedia-commons
+MAIN_RDF_FILE     = latest-mediainfo.ttl.gz
+DATA_URL_BASE     = https://dumps.wikimedia.org/other/wikibase/commonswiki
+GET_TTL_CMD       = wget -nc ${DATA_URL_BASE}/${MAIN_RDF_FILE}
+GET_PROPS_CMD     = curl -s https://qlever.cs.uni-freiburg.de/api/wikidata -H "Accept: text/turtle" -H "Content-type: application/sparql-query" --data "PREFIX wikibase: <http://wikiba.se/ontology#> CONSTRUCT { ?s ?p ?o } WHERE { VALUES ?p { wikibase:claim wikibase:directClaim wikibase:novalue wikibase:propertyType wikibase:qualifier wikibase:qualifierValue wikibase:reference wikibase:referenceValue wikibase:statementProperty wikibase:statementValue } ?s ?p ?o }" > properties.nt
+GET_LABELS_CMD    = curl -s https://qlever.cs.uni-freiburg.de/api/wikidata -H "Accept: text/turtle" -H "Content-type: application/sparql-query" --data "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> CONSTRUCT { ?subject rdfs:label ?label } WHERE { ?subject @en@rdfs:label ?label }" > labels.nt
+GET_DATA_CMD      = ${GET_TTL_CMD} && ${GET_PROPS_CMD} && ${GET_LABELS_CMD}
+INDEX_DESCRIPTION = Wikimedia Commons from ${DATA_URL_BASE}, version 09.11.2023 + Wikidata triples for rdfs:label and wikibase:claim etc.
+[index]
+INPUT_FILES     = ${data:MAIN_RDF_FILE} labels.nt properties.nt
+CAT_INPUT_FILES = zcat -f ${INPUT_FILES}
+WITH_TEXT_INDEX = from_literals
+STXXL_MEMORY_GB = 5
+SETTINGS_JSON   = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
+[server]
+PORT                           = 7033
+ACCESS_TOKEN                   = ${data:NAME}_2511328747
+MEMORY_FOR_QUERIES_GB          = 20
+CACHE_MAX_SIZE_GB              = 10
+CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 5
+[runtime]
+SYSTEM = native
+IMAGE  = docker.io/adfreiburg/qlever:latest
+[ui]
+PORT   = 7000
+CONFIG = wikimedia-commons

qlever/commands/example_queries.py CHANGED Viewed

@@ -59,17 +59,37 @@ class ExampleQueriesCommand(QleverCommand):
                                "or just compute the size of the result")
         subparser.add_argument("--limit", type=int,
                                help="Limit on the number of results")
+        subparser.add_argument("--remove-offset-and-limit",
+                               action="store_true", default=False,
+                               help="Remove OFFSET and LIMIT from the query")
         subparser.add_argument("--accept", type=str,
                                choices=["text/tab-separated-values",
-                                        "application/sparql-results+json"],
+                                        "text/csv",
+                                        "application/sparql-results+json",
+                                        "text/turtle"],
                                default="text/tab-separated-values",
                                help="Accept header for the SPARQL query")
         subparser.add_argument("--clear-cache",
                                choices=["yes", "no"],
                                default="yes",
                                help="Clear the cache before each query")
+        subparser.add_argument("--width-query-description", type=int,
+                               default=40,
+                               help="Width for printing the query description")
+        subparser.add_argument("--width-error-message", type=int,
+                               default=80,
+                               help="Width for printing the error message "
+                               "(0 = no limit)")
+        subparser.add_argument("--width-result-size", type=int,
+                               default=14,
+                               help="Width for printing the result size")
     def execute(self, args) -> bool:
+        # We can't have both `--remove-offset-and-limit` and `--limit`.
+        if args.remove_offset_and_limit and args.limit:
+            log.error("Cannot have both --remove-offset-and-limit and --limit")
+            return False
         # If `args.accept` is `application/sparql-results+json`, we need `jq`.
         if args.accept == "application/sparql-results+json":
             try:
@@ -153,26 +173,41 @@ class ExampleQueriesCommand(QleverCommand):
                 with mute_log():
                     ClearCacheCommand().execute(args)
-            # Count query.
-            if args.download_or_count == "count":
-                # Find first string matching ?[a-zA-Z0-9_]+ in query.
-                match = re.search(r"\?[a-zA-Z0-9_]+", query)
-                if not match:
-                    log.error("Could not find a variable in this query:")
-                    log.info("")
-                    log.info(query)
-                    return False
-                first_var = match.group(0)
-                query = query.replace(
-                        "SELECT ",
-                        f"SELECT (COUNT({first_var}) AS {first_var}_count_) "
-                        f"WHERE {{ SELECT ", 1) + " }"
+            # Remove OFFSET and LIMIT (after the last closing bracket).
+            if args.remove_offset_and_limit or args.limit:
+                closing_bracket_idx = query.rfind("}")
+                regexes = [re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
+                           re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE)]
+                for regex in regexes:
+                    match = re.search(regex, query[closing_bracket_idx:])
+                    if match:
+                        query = query[:closing_bracket_idx + match.start()] + \
+                                query[closing_bracket_idx + match.end():]
             # Limit query.
             if args.limit:
-                query = query.replace(
-                        "SELECT ", "SELECT * WHERE { SELECT ", 1) \
-                          + f" }} LIMIT {args.limit}"
+                query += f" LIMIT {args.limit}"
+            # Count query.
+            if args.download_or_count == "count":
+                # First find out if there is a FROM clause.
+                regex_from_clause = re.compile(r"\s*FROM\s+<[^>]+>\s*",
+                                               re.IGNORECASE)
+                match_from_clause = re.search(regex_from_clause, query)
+                from_clause = " "
+                if match_from_clause:
+                    from_clause = match_from_clause.group(0)
+                    query = (query[:match_from_clause.start()] + " " +
+                             query[match_from_clause.end():])
+                # Now we can add the outer SELECT COUNT(*).
+                query = re.sub(r"SELECT ",
+                               "SELECT (COUNT(*) AS ?qlever_count_)"
+                               + from_clause + "WHERE { SELECT ",
+                               query, count=1, flags=re.IGNORECASE) + " }"
+            # A bit of pretty-printing.
+            query = re.sub(r"\s+", " ", query)
+            query = re.sub(r"\s*\.\s*\}", " }", query)
             # Launch query.
             try:
@@ -214,10 +249,16 @@ class ExampleQueriesCommand(QleverCommand):
                                     f" | tonumber\" {result_file}",
                                     return_output=True)
                     else:
-                        if args.accept == "text/tab-separated-values":
+                        if (args.accept == "text/tab-separated-values"
+                                or args.accept == "text/csv"):
                             result_size = run_command(
                                     f"sed 1d {result_file} | wc -l",
                                     return_output=True)
+                        elif args.accept == "text/turtle":
+                            result_size = run_command(
+                                    f"sed '1d;/^@prefix/d;/^\\s*$/d' "
+                                    f"{result_file} | wc -l",
+                                    return_output=True)
                         else:
                             result_size = run_command(
                                     f"jq -r \".results.bindings | length\""
@@ -232,19 +273,25 @@ class ExampleQueriesCommand(QleverCommand):
                 Path(result_file).unlink(missing_ok=True)
             # Print description, time, result in tabular form.
-            if (len(description) > 60):
-                description = description[:57] + "..."
+            if len(description) > args.width_query_description:
+                description = description[:args.width_query_description - 3]
+                description += "..."
             if error_msg is None:
-                log.info(f"{description:<60}  {time_seconds:6.2f} s  "
-                         f"{result_size:14,}")
+                log.info(f"{description:<{args.width_query_description}}  "
+                         f"{time_seconds:6.2f} s  "
+                         f"{result_size:>{args.width_result_size},}")
                 count_succeeded += 1
                 total_time_seconds += time_seconds
                 total_result_size += result_size
             else:
                 count_failed += 1
-                if (len(error_msg) > 60) and args.log_level != "DEBUG":
-                    error_msg = error_msg[:57] + "..."
-                log.error(f"{description:<60}    failed   "
+                if (args.width_error_message > 0
+                        and len(error_msg) > args.width_error_message
+                        and args.log_level != "DEBUG"):
+                    error_msg = error_msg[:args.width_error_message - 3]
+                    error_msg += "..."
+                log.error(f"{description:<{args.width_query_description}}    "
+                          f"failed   "
                           f"{colored(error_msg, 'red')}")
         # Print total time.
@@ -252,11 +299,11 @@ class ExampleQueriesCommand(QleverCommand):
         if count_succeeded > 0:
             query_or_queries = "query" if count_succeeded == 1 else "queries"
             description = (f"TOTAL   for {count_succeeded} {query_or_queries}")
-            log.info(f"{description:<60}  "
+            log.info(f"{description:<{args.width_query_description}}  "
                      f"{total_time_seconds:6.2f} s  "
                      f"{total_result_size:>14,}")
             description = (f"AVERAGE for {count_succeeded} {query_or_queries}")
-            log.info(f"{description:<60}  "
+            log.info(f"{description:<{args.width_query_description}}  "
                      f"{total_time_seconds / count_succeeded:6.2f} s  "
                      f"{round(total_result_size / count_succeeded):>14,}")
         else:

qlever/commands/index.py CHANGED Viewed

@@ -25,7 +25,7 @@ class IndexCommand(QleverCommand):
         return True
     def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
-        return {"data": ["name"],
+        return {"data": ["name", "format"],
                 "index": ["input_files", "cat_input_files", "settings_json",
                           "index_binary",
                           "only_pso_and_pos_permutations", "use_patterns",
@@ -41,7 +41,7 @@ class IndexCommand(QleverCommand):
     def execute(self, args) -> bool:
         # Construct the command line.
         index_cmd = (f"{args.cat_input_files} | {args.index_binary}"
-                     f" -F ttl -f -"
+                     f" -F {args.format} -"
                      f" -i {args.name}"
                      f" -s {args.name}.settings.json")
         if args.only_pso_and_pos_permutations:

qlever/qleverfile.py CHANGED Viewed

@@ -53,6 +53,10 @@ class Qleverfile:
                 "--text-description", type=str, default=None,
                 help="A concise description of the additional text data"
                      " if any")
+        data_args["format"] = arg(
+                "--format", type=str, default="ttl",
+                choices=["ttl", "nt", "nq"],
+                help="The format of the data")
         index_args["input_files"] = arg(
                 "--input-files", type=str, required=True,
@@ -102,7 +106,7 @@ class Qleverfile:
                 help="The binary for starting the server (this requires "
                      "that you have compiled QLever on your machine)")
         server_args["host_name"] = arg(
-                "--host-name", type=str, default=f"{socket.getfqdn()}",
+                "--host-name", type=str, default=f"localhost",
                 help="The name of the host on which the server listens for "
                      "requests")
         server_args["port"] = arg(

qlever/util.py CHANGED Viewed

@@ -186,8 +186,9 @@ def get_random_string(length: int) -> str:
 def is_port_used(port: int) -> bool:
     """
-    Try to bind to the port on all interfaces to check if the port is already in use.
-    If the port is already in use, `socket.bind` will raise an `OSError` with errno EADDRINUSE.
+    Try to bind to the port on all interfaces to check if the port is already
+    in use. If the port is already in use, `socket.bind` will raise an
+    `OSError` with errno EADDRINUSE.
     """
     try:
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -200,3 +201,16 @@ def is_port_used(port: int) -> bool:
         if err.errno != errno.EADDRINUSE:
             log.warning(f"Failed to determine if port is used: {err}")
         return True
+def check_if_installed(name: str, check_cmd: str) -> bool:
+    """
+    Helper function that checks if a given program is installed by running
+    the given command.
+    """
+    try:
+        run_command(check_cmd)
+        return True
+    except Exception as e:
+        log.error(f"{name} is not installed: {e}")
+        return False

{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: qlever
-Version: 0.5.4
+Version: 0.5.6
 Summary: Script for using the QLever SPARQL engine.
 Author-email: Hannah Bast <bast@cs.uni-freiburg.de>
 License: Apache-2.0
@@ -77,6 +77,14 @@ There are many more commands and options, see `qlever --help` for general help,
 `qlever <command> --help` for help on a specific command, or just the
 autocompletion.
+# Use with your own dataset
+To use QLever with your own dataset, you should also write a `Qleverfile`, like
+in the example above. The easiest way to write a `Qleverfile` is to get one of
+the existing ones (using `qlever setup-config ...` as explained above) and then
+change it according to your needs (the variable names should be self-explanatory).
+Pick one for a dataset that is similar to yours and when in doubt, pick `olympics`.
 # For developers
 The (Python) code for the script is in the `*.py` files in `src/qlever`. The

{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/RECORD RENAMED Viewed

@@ -6,8 +6,8 @@ qlever/containerize.py,sha256=p8g3O3G8a_0XLzSTzl_e5t9dqjbCQ-ippoA8vI2Z9pI,4193
 qlever/log.py,sha256=2O_RvFymnu_dB10ErBTAOsI8bgjORfdD0tE3USH-siM,1315
 qlever/qlever_main.py,sha256=tA_xqOs_FjvqlDIvKTprwuysfTwzsUjE7at26gRhCVA,2336
 qlever/qlever_old.py,sha256=X-JxmepFKYeFgSLLp0TRDNqXSxDwIbc8_0Xstiems8c,62026
-qlever/qleverfile.py,sha256=NjY3SFyRTm_igI8Rv87TOvZBiLwn1TgHmRh1jVA51DM,12935
-qlever/util.py,sha256=20NQJquSk_mSqvlK4k0OrSBqWrxKs5SgVshm5ucus5o,7847
+qlever/qleverfile.py,sha256=D321zDnWi-ScCefbFGBydKKI7lzzr1CkohHW6KuwVw0,13106
+qlever/util.py,sha256=xNXxXTDfoDqTV0DKo5rKQpkdIwvi7JwfW7ySelvJaZ0,8185
 qlever/Qleverfiles/Qleverfile.dblp,sha256=Y6BqAG1GZg-OmEs0HM00yAQuY2TGnSzsOO1LLmGVn2Y,994
 qlever/Qleverfiles/Qleverfile.dblp-plus,sha256=Dwd9pK1vPcelKfw6sA-IuyhbZ6yIxOh6_84JgPYnB9Q,1332
 qlever/Qleverfiles/Qleverfile.dbpedia,sha256=aaNZZayE-zVePGSwPzXemkX__Ns8-kP_E7DNNKZPnqg,1160
@@ -20,20 +20,21 @@ qlever/Qleverfiles/Qleverfile.ohm-planet,sha256=Y_yUxdpWpUOSDo_zmVKj3caa8X-Wv-1K
 qlever/Qleverfiles/Qleverfile.olympics,sha256=5w9BOFwEBhdSzPz-0LRxwhv-7Gj6xbF539HOXr3cqD0,1088
 qlever/Qleverfiles/Qleverfile.osm-country,sha256=UnlkckSXJDrknZORlU-Hdj_J82U4kStl1aRctCc5n6M,1953
 qlever/Qleverfiles/Qleverfile.osm-planet,sha256=2RilNix0fplN3GsNNyOu3GzmUss1Pq7586WKOFAQnSs,1400
-qlever/Qleverfiles/Qleverfile.pubchem,sha256=a6EAP8mOfC0V6NnVCLarvRagyoQSQDItR7AnrZqL9iE,3899
+qlever/Qleverfiles/Qleverfile.pubchem,sha256=YuDzWQmukSvL1opu7cf1KX9407_P21lmecYZ9cdbuvA,5611
 qlever/Qleverfiles/Qleverfile.scientists,sha256=9eZ2c6P9a3E3VHa3RR7LdOQbF4k3oyyrn56Z3u4LZYs,1164
 qlever/Qleverfiles/Qleverfile.uniprot,sha256=9kAKseomdUnIt7EAZge39g1MTuaLVaSW9JYLHzIMolM,2338
 qlever/Qleverfiles/Qleverfile.vvz,sha256=ftdMj5dCC9jAlFtNt2WR7kP30w0itT_iYtj5HoUVyWU,931
 qlever/Qleverfiles/Qleverfile.wikidata,sha256=vDkTY3mPSx2C8MvFWfB72zZoc4d-TMJSw3f_-FqnEqs,1275
+qlever/Qleverfiles/Qleverfile.wikimedia-commons,sha256=5JJ1MIp6LoM-ROCDFFIRvLREepCF4i4PnjOT9AFihzQ,2247
 qlever/Qleverfiles/Qleverfile.wikipathways,sha256=UFEVLrtOBiSQfibBN9xc2wDXrnWcnx5f8PY9khcE6bc,1983
 qlever/Qleverfiles/Qleverfile.yago-4,sha256=GikYPqChCtbAyZOVqszmVUwgQxSePTcgM8xw2b_21e4,1849
 qlever/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 qlever/commands/add_text_index.py,sha256=dkqYtwgOhgnXiei_eyhBWYCtdAiQUEmjWoa3JMlMb4c,3641
 qlever/commands/cache_stats.py,sha256=6JjueQstAqc8dNfgY8TP2EitFMxdUvCwrcyd7KUEb2o,4157
 qlever/commands/clear_cache.py,sha256=AnE1MOoj1ZexxrRT8FGeBLlv8rtQIVV4DP8VBn5-X-s,2843
-qlever/commands/example_queries.py,sha256=L32gVKdvb7MwZNqphF1K_gK6WARiwno6oiNDBgZuj1Y,12396
+qlever/commands/example_queries.py,sha256=5-0ln5EkuDcQYPqKKAOcLaTIStMzFhkAogaNedfRc_I,15271
 qlever/commands/get_data.py,sha256=f9kjZI3TKad6JHSuXWNkeoajmW8h0Sx8ShvjauDCtNo,1412
-qlever/commands/index.py,sha256=lJhDnweknFZQm1czqPzNyz33EvbjIvOrS4j0wDaJ98o,5663
+qlever/commands/index.py,sha256=iJ1wM7qtlAuRP_x0CupLWIndLRub1GqHvlCbB9ZlyPw,5680
 qlever/commands/index_stats.py,sha256=_BiUNBhmbYd9RPxrlm4HF0oENO6JmqnRiAkwkyOdN4U,11722
 qlever/commands/log.py,sha256=8Krt3MsTUDapYqVw1zUu5X15SF8mV97Uj0qKOWK8jXk,1861
 qlever/commands/query.py,sha256=_IDH-M8gKL_f1i5wzu0X452pZSUD0_qXl6bPXC85wX0,2750
@@ -43,9 +44,9 @@ qlever/commands/status.py,sha256=5S6EdapZEwFKV9cQZtNYcZhMbAXAY-FP6ggjIhfX8ek,163
 qlever/commands/stop.py,sha256=TZs4bxKHvujlZAU8BZmFjA5eXSZNAa6EeNzvPpEZsuI,4139
 qlever/commands/ui.py,sha256=b7g7Mp6ZWevn8f1kwFr-WR4ZWMq42KEV4cGl2QS7M1E,2828
 qlever/commands/warmup.py,sha256=WOZSxeV8U_F6pEEnAb6YybXLQMxZFTRJXs4BPHUhsmc,1030
-qlever-0.5.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-qlever-0.5.4.dist-info/METADATA,sha256=sL8oC3NhgnRmUMEMIqfqozI_RTcHkaFYUWeailPrB8g,4146
-qlever-0.5.4.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
-qlever-0.5.4.dist-info/entry_points.txt,sha256=U_gbYYi0wwdsn884eb0XoOXfvhACOsxhlO330dZ9bi0,87
-qlever-0.5.4.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
-qlever-0.5.4.dist-info/RECORD,,
+qlever-0.5.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+qlever-0.5.6.dist-info/METADATA,sha256=FRJKEH385p07cxSLLKRHSgiND-PFwFtPnIYWQjVBv3M,4582
+qlever-0.5.6.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
+qlever-0.5.6.dist-info/entry_points.txt,sha256=U_gbYYi0wwdsn884eb0XoOXfvhACOsxhlO330dZ9bi0,87
+qlever-0.5.6.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
+qlever-0.5.6.dist-info/RECORD,,

{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (72.2.0)
+Generator: setuptools (74.0.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/LICENSE RENAMED Viewed

File without changes

{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{qlever-0.5.4.dist-info → qlever-0.5.6.dist-info}/top_level.txt RENAMED Viewed

File without changes

qlever 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl

Potentially problematic release.

qlever 0.5.4py3-none-any.whl → 0.5.6py3-none-any.whl