qlever 0.5.17__py3-none-any.whl → 0.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

@@ -1,33 +1,36 @@
1
- # Qleverfile for OHM Planet, use with https://github.com/ad-freiburg/qlever-control
1
+ # Qleverfile for OpenHistoricalMap, use with the QLever CLI (`pip install qlever`)
2
2
  #
3
- # qlever get-data # ~20 mins (download PBF, convert to TTL, add GeoSPARQL triples)
4
- # qlever index # ~20 mins and ~5 GB RAM (on an AMD Ryzen 9 5900X)
5
- # qlever start # ~1 sec
3
+ # qlever get-data # ~1 hour, ~14 GB (ttl.gz), ~3.4 B triples (with osm2rdf)
4
+ # qlever index # ~1 hour, ~10 GB RAM, ~60 GB index size on disk
5
+ # qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
6
6
  #
7
- # For `qlever get-data` to work, `osm2rdf` must be installed and in the `PATH`.
7
+ # Measured on an AMD Ryzen 9 5900X with 128 GB RAM and 1 x 4 TB NVMe (04.01.2025)
8
8
 
9
9
  [data]
10
10
  NAME = ohm-planet
11
11
  GET_DATA_URL = https://planet.openhistoricalmap.org/planet
12
12
  CHECK_BINARIES = osm2rdf -h > /dev/null || (echo "osm2rdf not found, make sure that it's installed and in your PATH" && exit 1)
13
- GET_DATA_CMD_1 = curl -LRfC - -o ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
14
- GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --cache . --add-hascentroid 2>&1 | tee ${NAME}.osm2rdf-log.txt
15
- GET_DATA_CMD = set -o pipefail && ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
13
+ GET_DATA_CMD_1 = unbuffer wget -O ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
14
+ GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --output-compression gz --store-locations=disk-dense --cache . --num-threads 12 --add-way-node-order --no-untagged-nodes-geometric-relations 2>&1 | tee ${NAME}.osm2rdf-log.txt
15
+ GET_DATA_CMD = ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
16
16
  VERSION = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE")
17
17
  DESCRIPTION = OHM Planet, data from ${GET_DATA_URL} version ${VERSION} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
18
18
 
19
19
  [index]
20
- INPUT_FILES = ${data:NAME}.ttl.bz2
21
- CAT_INPUT_FILES = bzcat -f ${INPUT_FILES}
22
- SETTINGS_JSON = { "prefixes-external": [""], "ascii-prefixes-only": false, "parallel-parsing": true, "num-triples-per-batch": 5000000 }
20
+ INPUT_FILES = ${data:NAME}.ttl.gz
21
+ MULTI_INPUT_JSON = { "cmd": "zcat ${INPUT_FILES}", "parallel": "true" }
22
+ STXXL_MEMORY = 5G
23
+ PARSER_BUFFER_SIZE = 50M
24
+ SETTINGS_JSON = { "num-triples-per-batch": 5000000 }
23
25
 
24
26
  [server]
25
27
  PORT = 7037
26
28
  ACCESS_TOKEN = ${data:NAME}
27
29
  MEMORY_FOR_QUERIES = 10G
28
30
  CACHE_MAX_SIZE = 5G
31
+ TIMEOUT = 600s
29
32
  CACHE_MAX_SIZE_SINGLE_ENTRY = 4G
30
- WARMUP_CMD = curl -s https://qlever.cs.uni-freiburg.de/mapui-petri/query --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX osm: <https://www.openstreetmap.org/> SELECT ?osm_id ?geometry WHERE { ?osm_id geo:hasGeometry/geo:asWKT ?geometry . ?osm_id rdf:type osm:node } LIMIT 1" --data-urlencode "backend=https://qlever.cs.uni-freiburg.de/api/${data:NAME}" > /dev/null
33
+ WARMUP_CMD = curl -s https://qlever.cs.uni-freiburg.de/petrimaps/query --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX osm: <https://www.openstreetmap.org/> SELECT ?osm_id ?geometry WHERE { ?osm_id geo:hasGeometry/geo:asWKT ?geometry . ?osm_id rdf:type osm:node } LIMIT 1" --data-urlencode "backend=https://qlever.cs.uni-freiburg.de/api/${data:NAME}" > /dev/null
31
34
 
32
35
  [runtime]
33
36
  SYSTEM = docker
@@ -1,32 +1,34 @@
1
- # Qleverfile for OSM Planet, use with the qlever script (pip install qlever)
1
+ # Qleverfile for OSM Planet, use with the QLever CLI (`pip install qlever`)
2
2
  #
3
- # qlever get-data # takes ~50 mins to download .ttl.bz2 file of ~ 300 GB
4
- # qlever index # takes ~12 hours and ~20 GB RAM (on an AMD Ryzen 9 5900X)
5
- # qlever start # takes a few seconds
3
+ # qlever get-data # downloads ~400 GB (ttl.bz2), ~100 B triples
4
+ # qlever index # ~20 hours, ~60 GB RAM, ~1.5 TB index size on disk
5
+ # qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
6
6
  #
7
- # For the OSM data of a single country, do `qlever setup-config osm-country`
8
- # and edit the Qleverfile to specify the country,
7
+ # Measured on an AMD Ryzen 9 7950X with 128 GB RAM and 2 x 8 TB NVMe (04.01.2025)
9
8
 
10
9
  [data]
11
10
  NAME = osm-planet
12
11
  DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
13
- GET_DATA_CMD = curl --location --fail --continue-at - --remote-time --output ${NAME}.ttl.bz2 ${DATA_URL}
12
+ GET_DATA_CMD = unbuffer wget -O ${NAME}.ttl.bz2 ${DATA_URL} | tee ${NAME}.download-log.txt
14
13
  VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
15
14
  DESCRIPTION = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
16
15
 
17
16
  [index]
18
- INPUT_FILES = ${data:NAME}.ttl.bz2
19
- CAT_INPUT_FILES = lbzcat -f -n 2 ${INPUT_FILES}
20
- STXXL_MEMORY = 20G
21
- SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
17
+ INPUT_FILES = ${data:NAME}.ttl.bz2
18
+ CAT_INPUT_FILES = lbzcat -n 2 ${INPUT_FILES}
19
+ PARALLEL_PARSING = true
20
+ PARSER_BUFFER_SIZE = 100M
21
+ STXXL_MEMORY = 40G
22
+ SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
23
+ ULIMIT = 10000
22
24
 
23
25
  [server]
24
26
  PORT = 7007
25
27
  ACCESS_TOKEN = ${data:NAME}
26
- MEMORY_FOR_QUERIES = 90G
27
- CACHE_MAX_SIZE = 40G
28
- CACHE_MAX_SIZE_SINGLE_ENTRY = 30G
29
- TIMEOUT = 300s
28
+ MEMORY_FOR_QUERIES = 40G
29
+ CACHE_MAX_SIZE = 20G
30
+ CACHE_MAX_SIZE_SINGLE_ENTRY = 20G
31
+ TIMEOUT = 600s
30
32
 
31
33
  [runtime]
32
34
  SYSTEM = docker
@@ -4,7 +4,7 @@
4
4
  # qlever index # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 9950X)
5
5
  # qlever start # starts the server (takes a few seconds)
6
6
  #
7
- # Install packages: sudo apt install -y libxml2-utils parallel xz-utils wget
7
+ # Install packages: sudo apt install -y libxml2-utils raptor2-utils parallel xz-utils wget
8
8
  # Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
9
9
  #
10
10
  # Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
@@ -53,8 +53,7 @@ MULTI_INPUT_JSON = [{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/unip
53
53
  { "cmd": "zcat ${data:TTL_DIR}/tissues.ttl.gz", "graph": "http://sparql.uniprot.org/tissues" },
54
54
  { "cmd": "zcat ${data:TTL_DIR}/rhea.ttl.gz", "graph": "https://sparql.rhea-db.org/rhea" },
55
55
  { "cmd": "zcat ${data:TTL_DIR}/examples_uniprot.ttl.gz", "graph": "http://sparql.uniprot.org/.well-known/sparql-examples" },
56
- { "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" },
57
- { "cmd": "zcat ${data:TTL_DIR}/void.ttl.gz", "graph": "http://rdfs.org/ns/void" }]
56
+ { "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" }]
58
57
  SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
59
58
  STXXL_MEMORY = 60G
60
59
 
qlever/__init__.py CHANGED
@@ -13,8 +13,11 @@ def snake_to_camel(str):
13
13
  # Each module in `qlever/commands` corresponds to a command. The name
14
14
  # of the command is the base name of the module file.
15
15
  package_path = Path(__file__).parent
16
- command_names = [Path(p).stem for p in package_path.glob("commands/*.py")
17
- if p.name != "__init__.py"]
16
+ command_names = [
17
+ Path(p).stem
18
+ for p in package_path.glob("commands/*.py")
19
+ if p.name != "__init__.py"
20
+ ]
18
21
 
19
22
  # Dynamically load all the command classes and create an object for each.
20
23
  command_objects = {}
@@ -24,8 +27,10 @@ for command_name in command_names:
24
27
  try:
25
28
  module = __import__(module_path, fromlist=[class_name])
26
29
  except ImportError as e:
27
- raise Exception(f"Could not import class {class_name} from module "
28
- f"{module_path} for command {command_name}: {e}")
30
+ raise Exception(
31
+ f"Could not import class {class_name} from module "
32
+ f"{module_path} for command {command_name}: {e}"
33
+ )
29
34
  # Create an object of the class and store it in the dictionary. For the
30
35
  # commands, take - instead of _.
31
36
  command_class = getattr(module, class_name)
qlever/command.py CHANGED
@@ -33,7 +33,6 @@ class QleverCommand(ABC):
33
33
 
34
34
  @abstractmethod
35
35
  def should_have_qleverfile(self) -> bool:
36
-
37
36
  """
38
37
  Return `True` if the command should have a Qleverfile, `False`
39
38
  otherwise. If a command should have a Qleverfile, but none is
@@ -43,7 +42,7 @@ class QleverCommand(ABC):
43
42
  pass
44
43
 
45
44
  @abstractmethod
46
- def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
45
+ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
47
46
  """
48
47
  Retun the arguments relevant for this command. This must be a subset of
49
48
  the names of `all_arguments` defined in `QleverConfig`. Only these
@@ -81,6 +80,8 @@ class QleverCommand(ABC):
81
80
  log.info(colored(command_description, "blue"))
82
81
  log.info("")
83
82
  if only_show:
84
- log.info("You called \"qlever ... --show\", therefore the command "
85
- "is only shown, but not executed (omit the \"--show\" to "
86
- "execute it)")
83
+ log.info(
84
+ 'You called "qlever ... --show", therefore the command '
85
+ 'is only shown, but not executed (omit the "--show" to '
86
+ "execute it)"
87
+ )
@@ -17,22 +17,29 @@ class AddTextIndexCommand(QleverCommand):
17
17
  pass
18
18
 
19
19
  def description(self) -> str:
20
- return ("Add text index to an index built with `qlever index`")
20
+ return "Add text index to an index built with `qlever index`"
21
21
 
22
22
  def should_have_qleverfile(self) -> bool:
23
23
  return True
24
24
 
25
- def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
26
- return {"data": ["name"],
27
- "index": ["index_binary", "text_index",
28
- "text_words_file", "text_docs_file"],
29
- "runtime": ["system", "image", "index_container"]}
25
+ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
26
+ return {
27
+ "data": ["name"],
28
+ "index": [
29
+ "index_binary",
30
+ "text_index",
31
+ "text_words_file",
32
+ "text_docs_file",
33
+ ],
34
+ "runtime": ["system", "image", "index_container"],
35
+ }
30
36
 
31
37
  def additional_arguments(self, subparser) -> None:
32
38
  subparser.add_argument(
33
- "--overwrite-existing",
34
- action="store_true",
35
- help="Overwrite existing text index files")
39
+ "--overwrite-existing",
40
+ action="store_true",
41
+ help="Overwrite existing text index files",
42
+ )
36
43
 
37
44
  def execute(self, args) -> bool:
38
45
  # Check that there is actually something to add.
@@ -42,24 +49,31 @@ class AddTextIndexCommand(QleverCommand):
42
49
 
43
50
  # Construct the command line.
44
51
  add_text_index_cmd = f"{args.index_binary} -A -i {args.name}"
45
- if args.text_index in \
46
- ["from_text_records", "from_text_records_and_literals"]:
47
- add_text_index_cmd += (f" -w {args.text_words_file}"
48
- f" -d {args.text_docs_file}")
49
- if args.text_index in \
50
- ["from_literals", "from_text_records_and_literals"]:
52
+ if args.text_index in [
53
+ "from_text_records",
54
+ "from_text_records_and_literals",
55
+ ]:
56
+ add_text_index_cmd += (
57
+ f" -w {args.text_words_file}" f" -d {args.text_docs_file}"
58
+ )
59
+ if args.text_index in [
60
+ "from_literals",
61
+ "from_text_records_and_literals",
62
+ ]:
51
63
  add_text_index_cmd += " --text-words-from-literals"
52
64
  add_text_index_cmd += f" | tee {args.name}.text-index-log.txt"
53
65
 
54
66
  # Run the command in a container (if so desired).
55
67
  if args.system in Containerize.supported_systems():
56
68
  add_text_index_cmd = Containerize().containerize_command(
57
- add_text_index_cmd,
58
- args.system, "run --rm",
59
- args.image,
60
- args.index_container,
61
- volumes=[("$(pwd)", "/index")],
62
- working_directory="/index")
69
+ add_text_index_cmd,
70
+ args.system,
71
+ "run --rm",
72
+ args.image,
73
+ args.index_container,
74
+ volumes=[("$(pwd)", "/index")],
75
+ working_directory="/index",
76
+ )
63
77
 
64
78
  # Show the command line.
65
79
  self.show(add_text_index_cmd, only_show=args.show)
@@ -71,17 +85,22 @@ class AddTextIndexCommand(QleverCommand):
71
85
  try:
72
86
  run_command(f"{args.index_binary} --help")
73
87
  except Exception as e:
74
- log.error(f"Running \"{args.index_binary}\" failed ({e}), "
75
- f"set `--index-binary` to a different binary or "
76
- f"use `--container_system`")
88
+ log.error(
89
+ f'Running "{args.index_binary}" failed ({e}), '
90
+ f"set `--index-binary` to a different binary or "
91
+ f"use `--container_system`"
92
+ )
77
93
  return False
78
94
 
79
95
  # Check if text index files already exist.
80
96
  existing_text_index_files = get_existing_index_files(
81
- f"{args.name}.text.*")
97
+ f"{args.name}.text.*"
98
+ )
82
99
  if len(existing_text_index_files) > 0 and not args.overwrite_existing:
83
- log.error("Text index files found, if you want to overwrite them, "
84
- "use --overwrite-existing")
100
+ log.error(
101
+ "Text index files found, if you want to overwrite them, "
102
+ "use --overwrite-existing"
103
+ )
85
104
  log.info("")
86
105
  log.info(f"Index files found: {existing_text_index_files}")
87
106
  return False
@@ -90,7 +109,7 @@ class AddTextIndexCommand(QleverCommand):
90
109
  try:
91
110
  subprocess.run(add_text_index_cmd, shell=True, check=True)
92
111
  except Exception as e:
93
- log.error(f"Running \"{add_text_index_cmd}\" failed ({e})")
112
+ log.error(f'Running "{add_text_index_cmd}" failed ({e})')
94
113
  return False
95
114
 
96
115
  return True
@@ -17,14 +17,15 @@ from qlever.util import run_command, run_curl_command
17
17
 
18
18
  class ExampleQueriesCommand(QleverCommand):
19
19
  """
20
- Class for executing the `warmup` command.
20
+ Class for running a given sequence of example queries and showing
21
+ their processing times and result sizes.
21
22
  """
22
23
 
23
24
  def __init__(self):
24
25
  pass
25
26
 
26
27
  def description(self) -> str:
27
- return "Show how much of the cache is currently being used"
28
+ return "Run the given queries and show their processing times and result sizes"
28
29
 
29
30
  def should_have_qleverfile(self) -> bool:
30
31
  return False
qlever/commands/index.py CHANGED
@@ -2,13 +2,17 @@ from __future__ import annotations
2
2
 
3
3
  import glob
4
4
  import json
5
- import shlex
6
5
  import re
6
+ import shlex
7
7
 
8
8
  from qlever.command import QleverCommand
9
9
  from qlever.containerize import Containerize
10
10
  from qlever.log import log
11
- from qlever.util import get_existing_index_files, get_total_file_size, run_command
11
+ from qlever.util import (
12
+ get_existing_index_files,
13
+ get_total_file_size,
14
+ run_command,
15
+ )
12
16
 
13
17
 
14
18
  class IndexCommand(QleverCommand):
@@ -36,9 +40,11 @@ class IndexCommand(QleverCommand):
36
40
  "settings_json",
37
41
  "index_binary",
38
42
  "only_pso_and_pos_permutations",
43
+ "ulimit",
39
44
  "use_patterns",
40
45
  "text_index",
41
46
  "stxxl_memory",
47
+ "parser_buffer_size",
42
48
  ],
43
49
  "runtime": ["system", "image", "index_container"],
44
50
  }
@@ -48,7 +54,7 @@ class IndexCommand(QleverCommand):
48
54
  "--overwrite-existing",
49
55
  action="store_true",
50
56
  default=False,
51
- help="Overwrite an existing index, think twice before using.",
57
+ help="Overwrite an existing index, think twice before using this",
52
58
  )
53
59
 
54
60
  # Exception for invalid JSON.
@@ -76,7 +82,8 @@ class IndexCommand(QleverCommand):
76
82
  # Check that it is an array of length at least one.
77
83
  if not isinstance(input_specs, list):
78
84
  raise self.InvalidInputJson(
79
- "`MULTI_INPUT_JSON` must be a JSON array", args.multi_input_json
85
+ "`MULTI_INPUT_JSON` must be a JSON array",
86
+ args.multi_input_json,
80
87
  )
81
88
  if len(input_specs) == 0:
82
89
  raise self.InvalidInputJson(
@@ -90,13 +97,15 @@ class IndexCommand(QleverCommand):
90
97
  # Check that `input_spec` is a dictionary.
91
98
  if not isinstance(input_spec, dict):
92
99
  raise self.InvalidInputJson(
93
- f"Element {i} in `MULTI_INPUT_JSON` must be a JSON " "object",
100
+ f"Element {i} in `MULTI_INPUT_JSON` must be a JSON "
101
+ "object",
94
102
  input_spec,
95
103
  )
96
104
  # For each `input_spec`, we must have a command.
97
105
  if "cmd" not in input_spec:
98
106
  raise self.InvalidInputJson(
99
- f"Element {i} in `MULTI_INPUT_JSON` must contain a " "key `cmd`",
107
+ f"Element {i} in `MULTI_INPUT_JSON` must contain a "
108
+ "key `cmd`",
100
109
  input_spec,
101
110
  )
102
111
  # If the command contains a `{}` placeholder, we need a `for-each`
@@ -204,20 +213,31 @@ class IndexCommand(QleverCommand):
204
213
  index_cmd += " --only-pso-and-pos-permutations --no-patterns"
205
214
  if not args.use_patterns:
206
215
  index_cmd += " --no-patterns"
207
- if args.text_index in ["from_text_records", "from_text_records_and_literals"]:
216
+ if args.text_index in [
217
+ "from_text_records",
218
+ "from_text_records_and_literals",
219
+ ]:
208
220
  index_cmd += (
209
- f" -w {args.name}.wordsfile.tsv" f" -d {args.name}.docsfile.tsv"
221
+ f" -w {args.name}.wordsfile.tsv"
222
+ f" -d {args.name}.docsfile.tsv"
210
223
  )
211
- if args.text_index in ["from_literals", "from_text_records_and_literals"]:
224
+ if args.text_index in [
225
+ "from_literals",
226
+ "from_text_records_and_literals",
227
+ ]:
212
228
  index_cmd += " --text-words-from-literals"
213
229
  if args.stxxl_memory:
214
230
  index_cmd += f" --stxxl-memory {args.stxxl_memory}"
231
+ if args.parser_buffer_size:
232
+ index_cmd += f" --parser-buffer-size {args.parser_buffer_size}"
215
233
  index_cmd += f" | tee {args.name}.index-log.txt"
216
234
 
217
235
  # If the total file size is larger than 10 GB, set ulimit (such that a
218
236
  # large number of open files is allowed).
219
237
  total_file_size = get_total_file_size(shlex.split(args.input_files))
220
- if total_file_size > 1e10:
238
+ if args.ulimit is not None:
239
+ index_cmd = f"ulimit -Sn {args.ulimit}; {index_cmd}"
240
+ elif total_file_size > 1e10:
221
241
  index_cmd = f"ulimit -Sn 1048576; {index_cmd}"
222
242
 
223
243
  # Run the command in a container (if so desired).
@@ -234,7 +254,8 @@ class IndexCommand(QleverCommand):
234
254
 
235
255
  # Command for writing the settings JSON to a file.
236
256
  settings_json_cmd = (
237
- f"echo {shlex.quote(args.settings_json)} " f"> {args.name}.settings.json"
257
+ f"echo {shlex.quote(args.settings_json)} "
258
+ f"> {args.name}.settings.json"
238
259
  )
239
260
 
240
261
  # Show the command line.
@@ -279,9 +300,15 @@ class IndexCommand(QleverCommand):
279
300
  return False
280
301
 
281
302
  # Remove already existing container.
282
- if args.system in Containerize.supported_systems() and args.overwrite_existing:
303
+ if (
304
+ args.system in Containerize.supported_systems()
305
+ and args.overwrite_existing
306
+ ):
283
307
  if Containerize.is_running(args.system, args.index_container):
284
- log.info("Another index process is running, trying to stop " "it ...")
308
+ log.info(
309
+ "Another index process is running, trying to stop "
310
+ "it ..."
311
+ )
285
312
  log.info("")
286
313
  try:
287
314
  run_command(f"{args.system} rm -f {args.index_container}")
qlever/commands/query.py CHANGED
@@ -15,7 +15,21 @@ class QueryCommand(QleverCommand):
15
15
  """
16
16
 
17
17
  def __init__(self):
18
- pass
18
+ self.predefined_queries = {
19
+ "all-predicates": (
20
+ "SELECT (?p AS ?predicate) (COUNT(?p) AS ?count) "
21
+ "WHERE { ?s ?p ?o } "
22
+ "GROUP BY ?p ORDER BY DESC(?count)"
23
+ ),
24
+ "all-graphs": (
25
+ "SELECT ?g (COUNT(?g) AS ?count) "
26
+ "WHERE { GRAPH ?g { ?s ?p ?o } } "
27
+ "GROUP BY ?g ORDER BY DESC(?count)"
28
+ ),
29
+ "ten-random-triples": (
30
+ "SELECT * WHERE { ?s ?p ?o } ORDER BY RAND() LIMIT 10"
31
+ ),
32
+ }
19
33
 
20
34
  def description(self) -> str:
21
35
  return "Send a query to a SPARQL endpoint"
@@ -34,6 +48,12 @@ class QueryCommand(QleverCommand):
34
48
  default="SELECT * WHERE { ?s ?p ?o } LIMIT 10",
35
49
  help="SPARQL query to send",
36
50
  )
51
+ subparser.add_argument(
52
+ "--predefined-query",
53
+ type=str,
54
+ choices=self.predefined_queries.keys(),
55
+ help="Use a predefined query",
56
+ )
37
57
  subparser.add_argument(
38
58
  "--pin-to-cache",
39
59
  action="store_true",
@@ -64,6 +84,10 @@ class QueryCommand(QleverCommand):
64
84
  )
65
85
 
66
86
  def execute(self, args) -> bool:
87
+ # Use a predefined query if requested.
88
+ if args.predefined_query:
89
+ args.query = self.predefined_queries[args.predefined_query]
90
+
67
91
  # When pinning to the cache, set `send=0` and request media type
68
92
  # `application/qlever-results+json` so that we get the result size.
69
93
  # Also, we need to provide the access token.
@@ -83,7 +107,9 @@ class QueryCommand(QleverCommand):
83
107
 
84
108
  # Show what the command will do.
85
109
  sparql_endpoint = (
86
- args.sparql_endpoint if args.sparql_endpoint else f"localhost:{args.port}"
110
+ args.sparql_endpoint
111
+ if args.sparql_endpoint
112
+ else f"localhost:{args.port}"
87
113
  )
88
114
  curl_cmd = (
89
115
  f"curl -s {sparql_endpoint}"
@@ -102,7 +128,10 @@ class QueryCommand(QleverCommand):
102
128
  time_msecs = round(1000 * (time.time() - start_time))
103
129
  if not args.no_time and args.log_level != "NO_LOG":
104
130
  log.info("")
105
- log.info(f"Query processing time (end-to-end):" f" {time_msecs:,d} ms")
131
+ log.info(
132
+ f"Query processing time (end-to-end):"
133
+ f" {time_msecs:,d} ms"
134
+ )
106
135
  except Exception as e:
107
136
  if args.log_level == "DEBUG":
108
137
  traceback.print_exc()
@@ -56,7 +56,9 @@ class SystemInfoCommand(QleverCommand):
56
56
 
57
57
  def execute(self, args) -> bool:
58
58
  # Say what the command is doing.
59
- self.show("Show system information and Qleverfile", only_show=args.show)
59
+ self.show(
60
+ "Show system information and Qleverfile", only_show=args.show
61
+ )
60
62
  if args.show:
61
63
  return True
62
64
 
@@ -80,13 +82,15 @@ class SystemInfoCommand(QleverCommand):
80
82
  memory_total = psutil.virtual_memory().total / (1024.0**3)
81
83
  memory_available = psutil.virtual_memory().available / (1024.0**3)
82
84
  log.info(
83
- f"RAM: {memory_total:.1f} GB total, " f"{memory_available:.1f} GB available"
85
+ f"RAM: {memory_total:.1f} GB total, "
86
+ f"{memory_available:.1f} GB available"
84
87
  )
85
88
  num_cores = psutil.cpu_count(logical=False)
86
89
  num_threads = psutil.cpu_count(logical=True)
87
90
  cpu_freq = psutil.cpu_freq().max / 1000
88
91
  log.info(
89
- f"CPU: {num_cores} Cores, " f"{num_threads} Threads @ {cpu_freq:.2f} GHz"
92
+ f"CPU: {num_cores} Cores, "
93
+ f"{num_threads} Threads @ {cpu_freq:.2f} GHz"
90
94
  )
91
95
 
92
96
  cwd = Path.cwd()
qlever/commands/ui.py CHANGED
@@ -27,7 +27,13 @@ class UiCommand(QleverCommand):
27
27
  return {
28
28
  "data": ["name"],
29
29
  "server": ["host_name", "port"],
30
- "ui": ["ui_port", "ui_config", "ui_system", "ui_image", "ui_container"],
30
+ "ui": [
31
+ "ui_port",
32
+ "ui_config",
33
+ "ui_system",
34
+ "ui_image",
35
+ "ui_container",
36
+ ],
31
37
  }
32
38
 
33
39
  def additional_arguments(self, subparser) -> None:
@@ -35,7 +41,9 @@ class UiCommand(QleverCommand):
35
41
 
36
42
  def execute(self, args) -> bool:
37
43
  # If QLEVER_OVERRIDE_DISABLE_UI is set, this command is disabled.
38
- qlever_is_running_in_container = environ.get("QLEVER_IS_RUNNING_IN_CONTAINER")
44
+ qlever_is_running_in_container = environ.get(
45
+ "QLEVER_IS_RUNNING_IN_CONTAINER"
46
+ )
39
47
  if qlever_is_running_in_container:
40
48
  log.error(
41
49
  "The environment variable `QLEVER_OVERRIDE_DISABLE_UI` is set, "
@@ -67,7 +75,9 @@ class UiCommand(QleverCommand):
67
75
  f'{args.ui_config} {server_url}"'
68
76
  )
69
77
  self.show(
70
- "\n".join(["Stop running containers", pull_cmd, run_cmd, exec_cmd]),
78
+ "\n".join(
79
+ ["Stop running containers", pull_cmd, run_cmd, exec_cmd]
80
+ ),
71
81
  only_show=args.show,
72
82
  )
73
83
  if qlever_is_running_in_container:
@@ -77,7 +87,9 @@ class UiCommand(QleverCommand):
77
87
 
78
88
  # Stop running containers.
79
89
  for container_system in Containerize.supported_systems():
80
- Containerize.stop_and_remove_container(container_system, args.ui_container)
90
+ Containerize.stop_and_remove_container(
91
+ container_system, args.ui_container
92
+ )
81
93
 
82
94
  # Check if the UI port is already being used.
83
95
  if is_port_used(args.ui_port):
qlever/log.py CHANGED
@@ -10,6 +10,7 @@ class QleverLogFormatter(logging.Formatter):
10
10
  """
11
11
  Custom formatter for logging.
12
12
  """
13
+
13
14
  def format(self, record):
14
15
  message = record.getMessage()
15
16
  if record.levelno == logging.DEBUG:
@@ -34,7 +35,7 @@ log_levels = {
34
35
  "WARNING": logging.WARNING,
35
36
  "ERROR": logging.ERROR,
36
37
  "CRITICAL": logging.CRITICAL,
37
- "NO_LOG": logging.CRITICAL + 1
38
+ "NO_LOG": logging.CRITICAL + 1,
38
39
  }
39
40
 
40
41