PyPI - qlever - Versions diffs - 0.5.15__py3-none-any.whl → 0.5.18__py3-none-any.whl - Mend

qlever 0.5.15py3-none-any.whl → 0.5.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qlever might be problematic. Click here for more details.

Files changed (25) hide show

qlever/Qleverfiles/Qleverfile.ohm-planet +15 -12
qlever/Qleverfiles/Qleverfile.osm-planet +17 -15
qlever/Qleverfiles/Qleverfile.uniprot +2 -3
qlever/__init__.py +9 -4
qlever/command.py +6 -5
qlever/commands/add_text_index.py +47 -28
qlever/commands/example_queries.py +138 -46
qlever/commands/extract_queries.py +113 -0
qlever/commands/index.py +41 -14
qlever/commands/query.py +32 -3
qlever/commands/settings.py +110 -0
qlever/commands/start.py +215 -104
qlever/commands/stop.py +39 -26
qlever/commands/system_info.py +7 -3
qlever/commands/ui.py +16 -4
qlever/log.py +2 -1
qlever/qlever_old.py +607 -369
qlever/qleverfile.py +29 -6
qlever/util.py +34 -17
{qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/METADATA +2 -2
{qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/RECORD +25 -23
{qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/WHEEL +1 -1
{qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/LICENSE +0 -0
{qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/entry_points.txt +0 -0
{qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/top_level.txt +0 -0

qlever/Qleverfiles/Qleverfile.ohm-planet CHANGED Viewed

@@ -1,33 +1,36 @@
-# Qleverfile for OHM Planet, use with https://github.com/ad-freiburg/qlever-control
+# Qleverfile for OpenHistoricalMap, use with the QLever CLI (`pip install qlever`)
 #
-# qlever get-data  # ~20 mins (download PBF, convert to TTL, add GeoSPARQL triples)
-# qlever index     # ~20 mins and ~5 GB RAM (on an AMD Ryzen 9 5900X)
-# qlever start     # ~1 sec
+# qlever get-data  # ~1 hour, ~14 GB (ttl.gz), ~3.4 B triples (with osm2rdf)
+# qlever index     # ~1 hour, ~10 GB RAM, ~60 GB index size on disk
+# qlever start     # a few seconds, adjust MEMORY_FOR_QUERIES as needed
 #
-# For `qlever get-data` to work, `osm2rdf` must be installed and in the `PATH`.
+# Measured on an AMD Ryzen 9 5900X with 128 GB RAM and 1 x 4 TB NVMe (04.01.2025)
 [data]
 NAME           = ohm-planet
 GET_DATA_URL   = https://planet.openhistoricalmap.org/planet
 CHECK_BINARIES = osm2rdf -h > /dev/null || (echo "osm2rdf not found, make sure that it's installed and in your PATH" && exit 1)
-GET_DATA_CMD_1 = curl -LRfC - -o ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
-GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --cache . --add-hascentroid 2>&1 | tee ${NAME}.osm2rdf-log.txt
-GET_DATA_CMD   = set -o pipefail && ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
+GET_DATA_CMD_1 = unbuffer wget -O ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
+GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --output-compression gz --store-locations=disk-dense --cache . --num-threads 12 --add-way-node-order --no-untagged-nodes-geometric-relations 2>&1 | tee ${NAME}.osm2rdf-log.txt
+GET_DATA_CMD   = ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
 VERSION        = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE")
 DESCRIPTION    = OHM Planet, data from ${GET_DATA_URL} version ${VERSION} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
 [index]
-INPUT_FILES      = ${data:NAME}.ttl.bz2
-CAT_INPUT_FILES  = bzcat -f ${INPUT_FILES}
-SETTINGS_JSON    = { "prefixes-external": [""], "ascii-prefixes-only": false, "parallel-parsing": true, "num-triples-per-batch": 5000000 }
+INPUT_FILES        = ${data:NAME}.ttl.gz
+MULTI_INPUT_JSON   = { "cmd": "zcat ${INPUT_FILES}", "parallel": "true" }
+STXXL_MEMORY       = 5G
+PARSER_BUFFER_SIZE = 50M
+SETTINGS_JSON      = { "num-triples-per-batch": 5000000 }
 [server]
 PORT                        = 7037
 ACCESS_TOKEN                = ${data:NAME}
 MEMORY_FOR_QUERIES          = 10G
 CACHE_MAX_SIZE              = 5G
+TIMEOUT                     = 600s
 CACHE_MAX_SIZE_SINGLE_ENTRY = 4G
-WARMUP_CMD                  = curl -s https://qlever.cs.uni-freiburg.de/mapui-petri/query --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX osm: <https://www.openstreetmap.org/> SELECT ?osm_id ?geometry WHERE { ?osm_id geo:hasGeometry/geo:asWKT ?geometry . ?osm_id rdf:type osm:node } LIMIT 1" --data-urlencode "backend=https://qlever.cs.uni-freiburg.de/api/${data:NAME}" > /dev/null
+WARMUP_CMD                  = curl -s https://qlever.cs.uni-freiburg.de/petrimaps/query --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX osm: <https://www.openstreetmap.org/> SELECT ?osm_id ?geometry WHERE { ?osm_id geo:hasGeometry/geo:asWKT ?geometry . ?osm_id rdf:type osm:node } LIMIT 1" --data-urlencode "backend=https://qlever.cs.uni-freiburg.de/api/${data:NAME}" > /dev/null
 [runtime]
 SYSTEM = docker

qlever/Qleverfiles/Qleverfile.osm-planet CHANGED Viewed

@@ -1,32 +1,34 @@
-# Qleverfile for OSM Planet, use with the qlever script (pip install qlever)
+# Qleverfile for OSM Planet, use with the QLever CLI (`pip install qlever`)
 #
-# qlever get-data  # takes ~50 mins to download .ttl.bz2 file of ~ 300 GB
-# qlever index     # takes ~12 hours and ~20 GB RAM (on an AMD Ryzen 9 5900X)
-# qlever start     # takes a few seconds
+# qlever get-data  # downloads ~400 GB (ttl.bz2), ~100 B triples
+# qlever index     # ~20 hours, ~60 GB RAM, ~1.5 TB index size on disk
+# qlever start     # a few seconds, adjust MEMORY_FOR_QUERIES as needed
 #
-# For the OSM data of a single country, do `qlever setup-config osm-country`
-# and edit the Qleverfile to specify the country,
+# Measured on an AMD Ryzen 9 7950X with 128 GB RAM and 2 x 8 TB NVMe (04.01.2025)
 [data]
 NAME         = osm-planet
 DATA_URL     = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
-GET_DATA_CMD = curl --location --fail --continue-at - --remote-time --output ${NAME}.ttl.bz2 ${DATA_URL}
+GET_DATA_CMD = unbuffer wget -O ${NAME}.ttl.bz2 ${DATA_URL} | tee ${NAME}.download-log.txt
 VERSION      = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
 DESCRIPTION  = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
 [index]
-INPUT_FILES     = ${data:NAME}.ttl.bz2
-CAT_INPUT_FILES = lbzcat -f -n 2 ${INPUT_FILES}
-STXXL_MEMORY    = 20G
-SETTINGS_JSON   = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
+INPUT_FILES        = ${data:NAME}.ttl.bz2
+CAT_INPUT_FILES    = lbzcat -n 2 ${INPUT_FILES}
+PARALLEL_PARSING   = true
+PARSER_BUFFER_SIZE = 100M
+STXXL_MEMORY       = 40G
+SETTINGS_JSON      = { "num-triples-per-batch": 10000000 }
+ULIMIT             = 10000
 [server]
 PORT                        = 7007
 ACCESS_TOKEN                = ${data:NAME}
-MEMORY_FOR_QUERIES          = 90G
-CACHE_MAX_SIZE              = 40G
-CACHE_MAX_SIZE_SINGLE_ENTRY = 30G
-TIMEOUT                     = 300s
+MEMORY_FOR_QUERIES          = 40G
+CACHE_MAX_SIZE              = 20G
+CACHE_MAX_SIZE_SINGLE_ENTRY = 20G
+TIMEOUT                     = 600s
 [runtime]
 SYSTEM = docker

qlever/Qleverfiles/Qleverfile.uniprot CHANGED Viewed

@@ -4,7 +4,7 @@
 # qlever index     # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 9950X)
 # qlever start     # starts the server (takes a few seconds)
 #
-# Install packages: sudo apt install -y libxml2-utils parallel xz-utils wget
+# Install packages: sudo apt install -y libxml2-utils raptor2-utils parallel xz-utils wget
 # Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
 #
 # Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
@@ -53,8 +53,7 @@ MULTI_INPUT_JSON = [{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/unip
                     { "cmd": "zcat ${data:TTL_DIR}/tissues.ttl.gz", "graph": "http://sparql.uniprot.org/tissues" },
                     { "cmd": "zcat ${data:TTL_DIR}/rhea.ttl.gz", "graph": "https://sparql.rhea-db.org/rhea" },
                     { "cmd": "zcat ${data:TTL_DIR}/examples_uniprot.ttl.gz", "graph": "http://sparql.uniprot.org/.well-known/sparql-examples" },
-                    { "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" },
-                    { "cmd": "zcat ${data:TTL_DIR}/void.ttl.gz", "graph": "http://rdfs.org/ns/void" }]
+                    { "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" }]
 SETTINGS_JSON    = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
 STXXL_MEMORY     = 60G

qlever/__init__.py CHANGED Viewed

@@ -13,8 +13,11 @@ def snake_to_camel(str):
 # Each module in `qlever/commands` corresponds to a command. The name
 # of the command is the base name of the module file.
 package_path = Path(__file__).parent
-command_names = [Path(p).stem for p in package_path.glob("commands/*.py")
-                 if p.name != "__init__.py"]
+command_names = [
+    Path(p).stem
+    for p in package_path.glob("commands/*.py")
+    if p.name != "__init__.py"
+]
 # Dynamically load all the command classes and create an object for each.
 command_objects = {}
@@ -24,8 +27,10 @@ for command_name in command_names:
     try:
         module = __import__(module_path, fromlist=[class_name])
     except ImportError as e:
-        raise Exception(f"Could not import class {class_name} from module "
-                        f"{module_path} for command {command_name}: {e}")
+        raise Exception(
+            f"Could not import class {class_name} from module "
+            f"{module_path} for command {command_name}: {e}"
+        )
     # Create an object of the class and store it in the dictionary. For the
     # commands, take - instead of _.
     command_class = getattr(module, class_name)

qlever/command.py CHANGED Viewed

@@ -33,7 +33,6 @@ class QleverCommand(ABC):
     @abstractmethod
     def should_have_qleverfile(self) -> bool:
         """
         Return `True` if the command should have a Qleverfile, `False`
         otherwise. If a command should have a Qleverfile, but none is
@@ -43,7 +42,7 @@ class QleverCommand(ABC):
         pass
     @abstractmethod
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
         """
         Retun the arguments relevant for this command. This must be a subset of
         the names of `all_arguments` defined in `QleverConfig`. Only these
@@ -81,6 +80,8 @@ class QleverCommand(ABC):
         log.info(colored(command_description, "blue"))
         log.info("")
         if only_show:
-            log.info("You called \"qlever ... --show\", therefore the command "
-                     "is only shown, but not executed (omit the \"--show\" to "
-                     "execute it)")
+            log.info(
+                'You called "qlever ... --show", therefore the command '
+                'is only shown, but not executed (omit the "--show" to '
+                "execute it)"
+            )

qlever/commands/add_text_index.py CHANGED Viewed

@@ -17,22 +17,29 @@ class AddTextIndexCommand(QleverCommand):
         pass
     def description(self) -> str:
-        return ("Add text index to an index built with `qlever index`")
+        return "Add text index to an index built with `qlever index`"
     def should_have_qleverfile(self) -> bool:
         return True
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
-        return {"data": ["name"],
-                "index": ["index_binary", "text_index",
-                          "text_words_file", "text_docs_file"],
-                "runtime": ["system", "image", "index_container"]}
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {
+            "data": ["name"],
+            "index": [
+                "index_binary",
+                "text_index",
+                "text_words_file",
+                "text_docs_file",
+            ],
+            "runtime": ["system", "image", "index_container"],
+        }
     def additional_arguments(self, subparser) -> None:
         subparser.add_argument(
-                "--overwrite-existing",
-                action="store_true",
-                help="Overwrite existing text index files")
+            "--overwrite-existing",
+            action="store_true",
+            help="Overwrite existing text index files",
+        )
     def execute(self, args) -> bool:
         # Check that there is actually something to add.
@@ -42,24 +49,31 @@ class AddTextIndexCommand(QleverCommand):
         # Construct the command line.
         add_text_index_cmd = f"{args.index_binary} -A -i {args.name}"
-        if args.text_index in \
-                ["from_text_records", "from_text_records_and_literals"]:
-            add_text_index_cmd += (f" -w {args.text_words_file}"
-                                   f" -d {args.text_docs_file}")
-        if args.text_index in \
-                ["from_literals", "from_text_records_and_literals"]:
+        if args.text_index in [
+            "from_text_records",
+            "from_text_records_and_literals",
+        ]:
+            add_text_index_cmd += (
+                f" -w {args.text_words_file}" f" -d {args.text_docs_file}"
+            )
+        if args.text_index in [
+            "from_literals",
+            "from_text_records_and_literals",
+        ]:
             add_text_index_cmd += " --text-words-from-literals"
         add_text_index_cmd += f" | tee {args.name}.text-index-log.txt"
         # Run the command in a container (if so desired).
         if args.system in Containerize.supported_systems():
             add_text_index_cmd = Containerize().containerize_command(
-                    add_text_index_cmd,
-                    args.system, "run --rm",
-                    args.image,
-                    args.index_container,
-                    volumes=[("$(pwd)", "/index")],
-                    working_directory="/index")
+                add_text_index_cmd,
+                args.system,
+                "run --rm",
+                args.image,
+                args.index_container,
+                volumes=[("$(pwd)", "/index")],
+                working_directory="/index",
+            )
         # Show the command line.
         self.show(add_text_index_cmd, only_show=args.show)
@@ -71,17 +85,22 @@ class AddTextIndexCommand(QleverCommand):
             try:
                 run_command(f"{args.index_binary} --help")
             except Exception as e:
-                log.error(f"Running \"{args.index_binary}\" failed ({e}), "
-                          f"set `--index-binary` to a different binary or "
-                          f"use `--container_system`")
+                log.error(
+                    f'Running "{args.index_binary}" failed ({e}), '
+                    f"set `--index-binary` to a different binary or "
+                    f"use `--container_system`"
+                )
                 return False
         # Check if text index files already exist.
         existing_text_index_files = get_existing_index_files(
-                f"{args.name}.text.*")
+            f"{args.name}.text.*"
+        )
         if len(existing_text_index_files) > 0 and not args.overwrite_existing:
-            log.error("Text index files found, if you want to overwrite them, "
-                      "use --overwrite-existing")
+            log.error(
+                "Text index files found, if you want to overwrite them, "
+                "use --overwrite-existing"
+            )
             log.info("")
             log.info(f"Index files found: {existing_text_index_files}")
             return False
@@ -90,7 +109,7 @@ class AddTextIndexCommand(QleverCommand):
         try:
             subprocess.run(add_text_index_cmd, shell=True, check=True)
         except Exception as e:
-            log.error(f"Running \"{add_text_index_cmd}\" failed ({e})")
+            log.error(f'Running "{add_text_index_cmd}" failed ({e})')
             return False
         return True

qlever 0.5.15__py3-none-any.whl → 0.5.18__py3-none-any.whl

Potentially problematic release.

qlever 0.5.15py3-none-any.whl → 0.5.18py3-none-any.whl