qlever 0.5.17__py3-none-any.whl → 0.5.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.dblp +1 -0
- qlever/Qleverfiles/Qleverfile.ohm-planet +15 -12
- qlever/Qleverfiles/Qleverfile.osm-planet +17 -15
- qlever/Qleverfiles/Qleverfile.uniprot +2 -3
- qlever/__init__.py +9 -4
- qlever/command.py +6 -5
- qlever/commands/add_text_index.py +47 -28
- qlever/commands/example_queries.py +3 -2
- qlever/commands/index.py +41 -14
- qlever/commands/query.py +32 -3
- qlever/commands/settings.py +1 -0
- qlever/commands/system_info.py +7 -3
- qlever/commands/ui.py +16 -4
- qlever/log.py +2 -1
- qlever/qlever_old.py +607 -369
- qlever/qleverfile.py +29 -6
- {qlever-0.5.17.dist-info → qlever-0.5.19.dist-info}/METADATA +1 -1
- {qlever-0.5.17.dist-info → qlever-0.5.19.dist-info}/RECORD +22 -22
- {qlever-0.5.17.dist-info → qlever-0.5.19.dist-info}/WHEEL +1 -1
- {qlever-0.5.17.dist-info → qlever-0.5.19.dist-info}/LICENSE +0 -0
- {qlever-0.5.17.dist-info → qlever-0.5.19.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.17.dist-info → qlever-0.5.19.dist-info}/top_level.txt +0 -0
|
@@ -1,33 +1,36 @@
|
|
|
1
|
-
# Qleverfile for
|
|
1
|
+
# Qleverfile for OpenHistoricalMap, use with the QLever CLI (`pip install qlever`)
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data # ~
|
|
4
|
-
# qlever index # ~
|
|
5
|
-
# qlever start #
|
|
3
|
+
# qlever get-data # ~1 hour, ~14 GB (ttl.gz), ~3.4 B triples (with osm2rdf)
|
|
4
|
+
# qlever index # ~1 hour, ~10 GB RAM, ~60 GB index size on disk
|
|
5
|
+
# qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
|
|
6
6
|
#
|
|
7
|
-
#
|
|
7
|
+
# Measured on an AMD Ryzen 9 5900X with 128 GB RAM and 1 x 4 TB NVMe (04.01.2025)
|
|
8
8
|
|
|
9
9
|
[data]
|
|
10
10
|
NAME = ohm-planet
|
|
11
11
|
GET_DATA_URL = https://planet.openhistoricalmap.org/planet
|
|
12
12
|
CHECK_BINARIES = osm2rdf -h > /dev/null || (echo "osm2rdf not found, make sure that it's installed and in your PATH" && exit 1)
|
|
13
|
-
GET_DATA_CMD_1 =
|
|
14
|
-
GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --cache . --add-
|
|
15
|
-
GET_DATA_CMD =
|
|
13
|
+
GET_DATA_CMD_1 = unbuffer wget -O ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
|
|
14
|
+
GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --output-compression gz --store-locations=disk-dense --cache . --num-threads 12 --add-way-node-order --no-untagged-nodes-geometric-relations 2>&1 | tee ${NAME}.osm2rdf-log.txt
|
|
15
|
+
GET_DATA_CMD = ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
|
|
16
16
|
VERSION = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE")
|
|
17
17
|
DESCRIPTION = OHM Planet, data from ${GET_DATA_URL} version ${VERSION} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
|
|
18
18
|
|
|
19
19
|
[index]
|
|
20
|
-
INPUT_FILES
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
INPUT_FILES = ${data:NAME}.ttl.gz
|
|
21
|
+
MULTI_INPUT_JSON = { "cmd": "zcat ${INPUT_FILES}", "parallel": "true" }
|
|
22
|
+
STXXL_MEMORY = 5G
|
|
23
|
+
PARSER_BUFFER_SIZE = 50M
|
|
24
|
+
SETTINGS_JSON = { "num-triples-per-batch": 5000000 }
|
|
23
25
|
|
|
24
26
|
[server]
|
|
25
27
|
PORT = 7037
|
|
26
28
|
ACCESS_TOKEN = ${data:NAME}
|
|
27
29
|
MEMORY_FOR_QUERIES = 10G
|
|
28
30
|
CACHE_MAX_SIZE = 5G
|
|
31
|
+
TIMEOUT = 600s
|
|
29
32
|
CACHE_MAX_SIZE_SINGLE_ENTRY = 4G
|
|
30
|
-
WARMUP_CMD = curl -s https://qlever.cs.uni-freiburg.de/
|
|
33
|
+
WARMUP_CMD = curl -s https://qlever.cs.uni-freiburg.de/petrimaps/query --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX osm: <https://www.openstreetmap.org/> SELECT ?osm_id ?geometry WHERE { ?osm_id geo:hasGeometry/geo:asWKT ?geometry . ?osm_id rdf:type osm:node } LIMIT 1" --data-urlencode "backend=https://qlever.cs.uni-freiburg.de/api/${data:NAME}" > /dev/null
|
|
31
34
|
|
|
32
35
|
[runtime]
|
|
33
36
|
SYSTEM = docker
|
|
@@ -1,32 +1,34 @@
|
|
|
1
|
-
# Qleverfile for OSM Planet, use with the
|
|
1
|
+
# Qleverfile for OSM Planet, use with the QLever CLI (`pip install qlever`)
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data #
|
|
4
|
-
# qlever index #
|
|
5
|
-
# qlever start #
|
|
3
|
+
# qlever get-data # downloads ~400 GB (ttl.bz2), ~100 B triples
|
|
4
|
+
# qlever index # ~20 hours, ~60 GB RAM, ~1.5 TB index size on disk
|
|
5
|
+
# qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
|
|
6
6
|
#
|
|
7
|
-
#
|
|
8
|
-
# and edit the Qleverfile to specify the country,
|
|
7
|
+
# Measured on an AMD Ryzen 9 7950X with 128 GB RAM and 2 x 8 TB NVMe (04.01.2025)
|
|
9
8
|
|
|
10
9
|
[data]
|
|
11
10
|
NAME = osm-planet
|
|
12
11
|
DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
|
|
13
|
-
GET_DATA_CMD =
|
|
12
|
+
GET_DATA_CMD = unbuffer wget -O ${NAME}.ttl.bz2 ${DATA_URL} | tee ${NAME}.download-log.txt
|
|
14
13
|
VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
|
|
15
14
|
DESCRIPTION = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
|
|
16
15
|
|
|
17
16
|
[index]
|
|
18
|
-
INPUT_FILES
|
|
19
|
-
CAT_INPUT_FILES
|
|
20
|
-
|
|
21
|
-
|
|
17
|
+
INPUT_FILES = ${data:NAME}.ttl.bz2
|
|
18
|
+
CAT_INPUT_FILES = lbzcat -n 2 ${INPUT_FILES}
|
|
19
|
+
PARALLEL_PARSING = true
|
|
20
|
+
PARSER_BUFFER_SIZE = 100M
|
|
21
|
+
STXXL_MEMORY = 40G
|
|
22
|
+
SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
|
|
23
|
+
ULIMIT = 10000
|
|
22
24
|
|
|
23
25
|
[server]
|
|
24
26
|
PORT = 7007
|
|
25
27
|
ACCESS_TOKEN = ${data:NAME}
|
|
26
|
-
MEMORY_FOR_QUERIES =
|
|
27
|
-
CACHE_MAX_SIZE =
|
|
28
|
-
CACHE_MAX_SIZE_SINGLE_ENTRY =
|
|
29
|
-
TIMEOUT =
|
|
28
|
+
MEMORY_FOR_QUERIES = 40G
|
|
29
|
+
CACHE_MAX_SIZE = 20G
|
|
30
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 20G
|
|
31
|
+
TIMEOUT = 600s
|
|
30
32
|
|
|
31
33
|
[runtime]
|
|
32
34
|
SYSTEM = docker
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# qlever index # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 9950X)
|
|
5
5
|
# qlever start # starts the server (takes a few seconds)
|
|
6
6
|
#
|
|
7
|
-
# Install packages: sudo apt install -y libxml2-utils parallel xz-utils wget
|
|
7
|
+
# Install packages: sudo apt install -y libxml2-utils raptor2-utils parallel xz-utils wget
|
|
8
8
|
# Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
|
|
9
9
|
#
|
|
10
10
|
# Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
|
|
@@ -53,8 +53,7 @@ MULTI_INPUT_JSON = [{ "cmd": "zcat {}", "graph": "http://sparql.uniprot.org/unip
|
|
|
53
53
|
{ "cmd": "zcat ${data:TTL_DIR}/tissues.ttl.gz", "graph": "http://sparql.uniprot.org/tissues" },
|
|
54
54
|
{ "cmd": "zcat ${data:TTL_DIR}/rhea.ttl.gz", "graph": "https://sparql.rhea-db.org/rhea" },
|
|
55
55
|
{ "cmd": "zcat ${data:TTL_DIR}/examples_uniprot.ttl.gz", "graph": "http://sparql.uniprot.org/.well-known/sparql-examples" },
|
|
56
|
-
{ "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" }
|
|
57
|
-
{ "cmd": "zcat ${data:TTL_DIR}/void.ttl.gz", "graph": "http://rdfs.org/ns/void" }]
|
|
56
|
+
{ "cmd": "zcat ${data:TTL_DIR}/core.ttl.gz", "graph": "http://purl.uniprot.org/core" }]
|
|
58
57
|
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
|
|
59
58
|
STXXL_MEMORY = 60G
|
|
60
59
|
|
qlever/__init__.py
CHANGED
|
@@ -13,8 +13,11 @@ def snake_to_camel(str):
|
|
|
13
13
|
# Each module in `qlever/commands` corresponds to a command. The name
|
|
14
14
|
# of the command is the base name of the module file.
|
|
15
15
|
package_path = Path(__file__).parent
|
|
16
|
-
command_names = [
|
|
17
|
-
|
|
16
|
+
command_names = [
|
|
17
|
+
Path(p).stem
|
|
18
|
+
for p in package_path.glob("commands/*.py")
|
|
19
|
+
if p.name != "__init__.py"
|
|
20
|
+
]
|
|
18
21
|
|
|
19
22
|
# Dynamically load all the command classes and create an object for each.
|
|
20
23
|
command_objects = {}
|
|
@@ -24,8 +27,10 @@ for command_name in command_names:
|
|
|
24
27
|
try:
|
|
25
28
|
module = __import__(module_path, fromlist=[class_name])
|
|
26
29
|
except ImportError as e:
|
|
27
|
-
raise Exception(
|
|
28
|
-
|
|
30
|
+
raise Exception(
|
|
31
|
+
f"Could not import class {class_name} from module "
|
|
32
|
+
f"{module_path} for command {command_name}: {e}"
|
|
33
|
+
)
|
|
29
34
|
# Create an object of the class and store it in the dictionary. For the
|
|
30
35
|
# commands, take - instead of _.
|
|
31
36
|
command_class = getattr(module, class_name)
|
qlever/command.py
CHANGED
|
@@ -33,7 +33,6 @@ class QleverCommand(ABC):
|
|
|
33
33
|
|
|
34
34
|
@abstractmethod
|
|
35
35
|
def should_have_qleverfile(self) -> bool:
|
|
36
|
-
|
|
37
36
|
"""
|
|
38
37
|
Return `True` if the command should have a Qleverfile, `False`
|
|
39
38
|
otherwise. If a command should have a Qleverfile, but none is
|
|
@@ -43,7 +42,7 @@ class QleverCommand(ABC):
|
|
|
43
42
|
pass
|
|
44
43
|
|
|
45
44
|
@abstractmethod
|
|
46
|
-
def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
|
|
45
|
+
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
|
|
47
46
|
"""
|
|
48
47
|
Retun the arguments relevant for this command. This must be a subset of
|
|
49
48
|
the names of `all_arguments` defined in `QleverConfig`. Only these
|
|
@@ -81,6 +80,8 @@ class QleverCommand(ABC):
|
|
|
81
80
|
log.info(colored(command_description, "blue"))
|
|
82
81
|
log.info("")
|
|
83
82
|
if only_show:
|
|
84
|
-
log.info(
|
|
85
|
-
|
|
86
|
-
|
|
83
|
+
log.info(
|
|
84
|
+
'You called "qlever ... --show", therefore the command '
|
|
85
|
+
'is only shown, but not executed (omit the "--show" to '
|
|
86
|
+
"execute it)"
|
|
87
|
+
)
|
|
@@ -17,22 +17,29 @@ class AddTextIndexCommand(QleverCommand):
|
|
|
17
17
|
pass
|
|
18
18
|
|
|
19
19
|
def description(self) -> str:
|
|
20
|
-
return
|
|
20
|
+
return "Add text index to an index built with `qlever index`"
|
|
21
21
|
|
|
22
22
|
def should_have_qleverfile(self) -> bool:
|
|
23
23
|
return True
|
|
24
24
|
|
|
25
|
-
def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
|
|
26
|
-
return {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
"
|
|
25
|
+
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
|
|
26
|
+
return {
|
|
27
|
+
"data": ["name"],
|
|
28
|
+
"index": [
|
|
29
|
+
"index_binary",
|
|
30
|
+
"text_index",
|
|
31
|
+
"text_words_file",
|
|
32
|
+
"text_docs_file",
|
|
33
|
+
],
|
|
34
|
+
"runtime": ["system", "image", "index_container"],
|
|
35
|
+
}
|
|
30
36
|
|
|
31
37
|
def additional_arguments(self, subparser) -> None:
|
|
32
38
|
subparser.add_argument(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
39
|
+
"--overwrite-existing",
|
|
40
|
+
action="store_true",
|
|
41
|
+
help="Overwrite existing text index files",
|
|
42
|
+
)
|
|
36
43
|
|
|
37
44
|
def execute(self, args) -> bool:
|
|
38
45
|
# Check that there is actually something to add.
|
|
@@ -42,24 +49,31 @@ class AddTextIndexCommand(QleverCommand):
|
|
|
42
49
|
|
|
43
50
|
# Construct the command line.
|
|
44
51
|
add_text_index_cmd = f"{args.index_binary} -A -i {args.name}"
|
|
45
|
-
if args.text_index in
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
52
|
+
if args.text_index in [
|
|
53
|
+
"from_text_records",
|
|
54
|
+
"from_text_records_and_literals",
|
|
55
|
+
]:
|
|
56
|
+
add_text_index_cmd += (
|
|
57
|
+
f" -w {args.text_words_file}" f" -d {args.text_docs_file}"
|
|
58
|
+
)
|
|
59
|
+
if args.text_index in [
|
|
60
|
+
"from_literals",
|
|
61
|
+
"from_text_records_and_literals",
|
|
62
|
+
]:
|
|
51
63
|
add_text_index_cmd += " --text-words-from-literals"
|
|
52
64
|
add_text_index_cmd += f" | tee {args.name}.text-index-log.txt"
|
|
53
65
|
|
|
54
66
|
# Run the command in a container (if so desired).
|
|
55
67
|
if args.system in Containerize.supported_systems():
|
|
56
68
|
add_text_index_cmd = Containerize().containerize_command(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
69
|
+
add_text_index_cmd,
|
|
70
|
+
args.system,
|
|
71
|
+
"run --rm",
|
|
72
|
+
args.image,
|
|
73
|
+
args.index_container,
|
|
74
|
+
volumes=[("$(pwd)", "/index")],
|
|
75
|
+
working_directory="/index",
|
|
76
|
+
)
|
|
63
77
|
|
|
64
78
|
# Show the command line.
|
|
65
79
|
self.show(add_text_index_cmd, only_show=args.show)
|
|
@@ -71,17 +85,22 @@ class AddTextIndexCommand(QleverCommand):
|
|
|
71
85
|
try:
|
|
72
86
|
run_command(f"{args.index_binary} --help")
|
|
73
87
|
except Exception as e:
|
|
74
|
-
log.error(
|
|
75
|
-
|
|
76
|
-
|
|
88
|
+
log.error(
|
|
89
|
+
f'Running "{args.index_binary}" failed ({e}), '
|
|
90
|
+
f"set `--index-binary` to a different binary or "
|
|
91
|
+
f"use `--container_system`"
|
|
92
|
+
)
|
|
77
93
|
return False
|
|
78
94
|
|
|
79
95
|
# Check if text index files already exist.
|
|
80
96
|
existing_text_index_files = get_existing_index_files(
|
|
81
|
-
|
|
97
|
+
f"{args.name}.text.*"
|
|
98
|
+
)
|
|
82
99
|
if len(existing_text_index_files) > 0 and not args.overwrite_existing:
|
|
83
|
-
log.error(
|
|
84
|
-
|
|
100
|
+
log.error(
|
|
101
|
+
"Text index files found, if you want to overwrite them, "
|
|
102
|
+
"use --overwrite-existing"
|
|
103
|
+
)
|
|
85
104
|
log.info("")
|
|
86
105
|
log.info(f"Index files found: {existing_text_index_files}")
|
|
87
106
|
return False
|
|
@@ -90,7 +109,7 @@ class AddTextIndexCommand(QleverCommand):
|
|
|
90
109
|
try:
|
|
91
110
|
subprocess.run(add_text_index_cmd, shell=True, check=True)
|
|
92
111
|
except Exception as e:
|
|
93
|
-
log.error(f
|
|
112
|
+
log.error(f'Running "{add_text_index_cmd}" failed ({e})')
|
|
94
113
|
return False
|
|
95
114
|
|
|
96
115
|
return True
|
|
@@ -17,14 +17,15 @@ from qlever.util import run_command, run_curl_command
|
|
|
17
17
|
|
|
18
18
|
class ExampleQueriesCommand(QleverCommand):
|
|
19
19
|
"""
|
|
20
|
-
Class for
|
|
20
|
+
Class for running a given sequence of example queries and showing
|
|
21
|
+
their processing times and result sizes.
|
|
21
22
|
"""
|
|
22
23
|
|
|
23
24
|
def __init__(self):
|
|
24
25
|
pass
|
|
25
26
|
|
|
26
27
|
def description(self) -> str:
|
|
27
|
-
return "
|
|
28
|
+
return "Run the given queries and show their processing times and result sizes"
|
|
28
29
|
|
|
29
30
|
def should_have_qleverfile(self) -> bool:
|
|
30
31
|
return False
|
qlever/commands/index.py
CHANGED
|
@@ -2,13 +2,17 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import glob
|
|
4
4
|
import json
|
|
5
|
-
import shlex
|
|
6
5
|
import re
|
|
6
|
+
import shlex
|
|
7
7
|
|
|
8
8
|
from qlever.command import QleverCommand
|
|
9
9
|
from qlever.containerize import Containerize
|
|
10
10
|
from qlever.log import log
|
|
11
|
-
from qlever.util import
|
|
11
|
+
from qlever.util import (
|
|
12
|
+
get_existing_index_files,
|
|
13
|
+
get_total_file_size,
|
|
14
|
+
run_command,
|
|
15
|
+
)
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
class IndexCommand(QleverCommand):
|
|
@@ -36,9 +40,11 @@ class IndexCommand(QleverCommand):
|
|
|
36
40
|
"settings_json",
|
|
37
41
|
"index_binary",
|
|
38
42
|
"only_pso_and_pos_permutations",
|
|
43
|
+
"ulimit",
|
|
39
44
|
"use_patterns",
|
|
40
45
|
"text_index",
|
|
41
46
|
"stxxl_memory",
|
|
47
|
+
"parser_buffer_size",
|
|
42
48
|
],
|
|
43
49
|
"runtime": ["system", "image", "index_container"],
|
|
44
50
|
}
|
|
@@ -48,7 +54,7 @@ class IndexCommand(QleverCommand):
|
|
|
48
54
|
"--overwrite-existing",
|
|
49
55
|
action="store_true",
|
|
50
56
|
default=False,
|
|
51
|
-
help="Overwrite an existing index, think twice before using
|
|
57
|
+
help="Overwrite an existing index, think twice before using this",
|
|
52
58
|
)
|
|
53
59
|
|
|
54
60
|
# Exception for invalid JSON.
|
|
@@ -76,7 +82,8 @@ class IndexCommand(QleverCommand):
|
|
|
76
82
|
# Check that it is an array of length at least one.
|
|
77
83
|
if not isinstance(input_specs, list):
|
|
78
84
|
raise self.InvalidInputJson(
|
|
79
|
-
"`MULTI_INPUT_JSON` must be a JSON array",
|
|
85
|
+
"`MULTI_INPUT_JSON` must be a JSON array",
|
|
86
|
+
args.multi_input_json,
|
|
80
87
|
)
|
|
81
88
|
if len(input_specs) == 0:
|
|
82
89
|
raise self.InvalidInputJson(
|
|
@@ -90,13 +97,15 @@ class IndexCommand(QleverCommand):
|
|
|
90
97
|
# Check that `input_spec` is a dictionary.
|
|
91
98
|
if not isinstance(input_spec, dict):
|
|
92
99
|
raise self.InvalidInputJson(
|
|
93
|
-
f"Element {i} in `MULTI_INPUT_JSON` must be a JSON "
|
|
100
|
+
f"Element {i} in `MULTI_INPUT_JSON` must be a JSON "
|
|
101
|
+
"object",
|
|
94
102
|
input_spec,
|
|
95
103
|
)
|
|
96
104
|
# For each `input_spec`, we must have a command.
|
|
97
105
|
if "cmd" not in input_spec:
|
|
98
106
|
raise self.InvalidInputJson(
|
|
99
|
-
f"Element {i} in `MULTI_INPUT_JSON` must contain a "
|
|
107
|
+
f"Element {i} in `MULTI_INPUT_JSON` must contain a "
|
|
108
|
+
"key `cmd`",
|
|
100
109
|
input_spec,
|
|
101
110
|
)
|
|
102
111
|
# If the command contains a `{}` placeholder, we need a `for-each`
|
|
@@ -204,21 +213,32 @@ class IndexCommand(QleverCommand):
|
|
|
204
213
|
index_cmd += " --only-pso-and-pos-permutations --no-patterns"
|
|
205
214
|
if not args.use_patterns:
|
|
206
215
|
index_cmd += " --no-patterns"
|
|
207
|
-
if args.text_index in [
|
|
216
|
+
if args.text_index in [
|
|
217
|
+
"from_text_records",
|
|
218
|
+
"from_text_records_and_literals",
|
|
219
|
+
]:
|
|
208
220
|
index_cmd += (
|
|
209
|
-
f" -w {args.name}.wordsfile.tsv"
|
|
221
|
+
f" -w {args.name}.wordsfile.tsv"
|
|
222
|
+
f" -d {args.name}.docsfile.tsv"
|
|
210
223
|
)
|
|
211
|
-
if args.text_index in [
|
|
224
|
+
if args.text_index in [
|
|
225
|
+
"from_literals",
|
|
226
|
+
"from_text_records_and_literals",
|
|
227
|
+
]:
|
|
212
228
|
index_cmd += " --text-words-from-literals"
|
|
213
229
|
if args.stxxl_memory:
|
|
214
230
|
index_cmd += f" --stxxl-memory {args.stxxl_memory}"
|
|
231
|
+
if args.parser_buffer_size:
|
|
232
|
+
index_cmd += f" --parser-buffer-size {args.parser_buffer_size}"
|
|
215
233
|
index_cmd += f" | tee {args.name}.index-log.txt"
|
|
216
234
|
|
|
217
235
|
# If the total file size is larger than 10 GB, set ulimit (such that a
|
|
218
236
|
# large number of open files is allowed).
|
|
219
237
|
total_file_size = get_total_file_size(shlex.split(args.input_files))
|
|
220
|
-
if
|
|
221
|
-
index_cmd = f"ulimit -Sn
|
|
238
|
+
if args.ulimit is not None:
|
|
239
|
+
index_cmd = f"ulimit -Sn {args.ulimit} && {index_cmd}"
|
|
240
|
+
elif total_file_size > 1e10:
|
|
241
|
+
index_cmd = f"ulimit -Sn 500000 && {index_cmd}"
|
|
222
242
|
|
|
223
243
|
# Run the command in a container (if so desired).
|
|
224
244
|
if args.system in Containerize.supported_systems():
|
|
@@ -234,7 +254,8 @@ class IndexCommand(QleverCommand):
|
|
|
234
254
|
|
|
235
255
|
# Command for writing the settings JSON to a file.
|
|
236
256
|
settings_json_cmd = (
|
|
237
|
-
f"echo {shlex.quote(args.settings_json)} "
|
|
257
|
+
f"echo {shlex.quote(args.settings_json)} "
|
|
258
|
+
f"> {args.name}.settings.json"
|
|
238
259
|
)
|
|
239
260
|
|
|
240
261
|
# Show the command line.
|
|
@@ -279,9 +300,15 @@ class IndexCommand(QleverCommand):
|
|
|
279
300
|
return False
|
|
280
301
|
|
|
281
302
|
# Remove already existing container.
|
|
282
|
-
if
|
|
303
|
+
if (
|
|
304
|
+
args.system in Containerize.supported_systems()
|
|
305
|
+
and args.overwrite_existing
|
|
306
|
+
):
|
|
283
307
|
if Containerize.is_running(args.system, args.index_container):
|
|
284
|
-
log.info(
|
|
308
|
+
log.info(
|
|
309
|
+
"Another index process is running, trying to stop "
|
|
310
|
+
"it ..."
|
|
311
|
+
)
|
|
285
312
|
log.info("")
|
|
286
313
|
try:
|
|
287
314
|
run_command(f"{args.system} rm -f {args.index_container}")
|
qlever/commands/query.py
CHANGED
|
@@ -15,7 +15,21 @@ class QueryCommand(QleverCommand):
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
def __init__(self):
|
|
18
|
-
|
|
18
|
+
self.predefined_queries = {
|
|
19
|
+
"all-predicates": (
|
|
20
|
+
"SELECT (?p AS ?predicate) (COUNT(?p) AS ?count) "
|
|
21
|
+
"WHERE { ?s ?p ?o } "
|
|
22
|
+
"GROUP BY ?p ORDER BY DESC(?count)"
|
|
23
|
+
),
|
|
24
|
+
"all-graphs": (
|
|
25
|
+
"SELECT ?g (COUNT(?g) AS ?count) "
|
|
26
|
+
"WHERE { GRAPH ?g { ?s ?p ?o } } "
|
|
27
|
+
"GROUP BY ?g ORDER BY DESC(?count)"
|
|
28
|
+
),
|
|
29
|
+
"ten-random-triples": (
|
|
30
|
+
"SELECT * WHERE { ?s ?p ?o } ORDER BY RAND() LIMIT 10"
|
|
31
|
+
),
|
|
32
|
+
}
|
|
19
33
|
|
|
20
34
|
def description(self) -> str:
|
|
21
35
|
return "Send a query to a SPARQL endpoint"
|
|
@@ -34,6 +48,12 @@ class QueryCommand(QleverCommand):
|
|
|
34
48
|
default="SELECT * WHERE { ?s ?p ?o } LIMIT 10",
|
|
35
49
|
help="SPARQL query to send",
|
|
36
50
|
)
|
|
51
|
+
subparser.add_argument(
|
|
52
|
+
"--predefined-query",
|
|
53
|
+
type=str,
|
|
54
|
+
choices=self.predefined_queries.keys(),
|
|
55
|
+
help="Use a predefined query",
|
|
56
|
+
)
|
|
37
57
|
subparser.add_argument(
|
|
38
58
|
"--pin-to-cache",
|
|
39
59
|
action="store_true",
|
|
@@ -64,6 +84,10 @@ class QueryCommand(QleverCommand):
|
|
|
64
84
|
)
|
|
65
85
|
|
|
66
86
|
def execute(self, args) -> bool:
|
|
87
|
+
# Use a predefined query if requested.
|
|
88
|
+
if args.predefined_query:
|
|
89
|
+
args.query = self.predefined_queries[args.predefined_query]
|
|
90
|
+
|
|
67
91
|
# When pinning to the cache, set `send=0` and request media type
|
|
68
92
|
# `application/qlever-results+json` so that we get the result size.
|
|
69
93
|
# Also, we need to provide the access token.
|
|
@@ -83,7 +107,9 @@ class QueryCommand(QleverCommand):
|
|
|
83
107
|
|
|
84
108
|
# Show what the command will do.
|
|
85
109
|
sparql_endpoint = (
|
|
86
|
-
args.sparql_endpoint
|
|
110
|
+
args.sparql_endpoint
|
|
111
|
+
if args.sparql_endpoint
|
|
112
|
+
else f"localhost:{args.port}"
|
|
87
113
|
)
|
|
88
114
|
curl_cmd = (
|
|
89
115
|
f"curl -s {sparql_endpoint}"
|
|
@@ -102,7 +128,10 @@ class QueryCommand(QleverCommand):
|
|
|
102
128
|
time_msecs = round(1000 * (time.time() - start_time))
|
|
103
129
|
if not args.no_time and args.log_level != "NO_LOG":
|
|
104
130
|
log.info("")
|
|
105
|
-
log.info(
|
|
131
|
+
log.info(
|
|
132
|
+
f"Query processing time (end-to-end):"
|
|
133
|
+
f" {time_msecs:,d} ms"
|
|
134
|
+
)
|
|
106
135
|
except Exception as e:
|
|
107
136
|
if args.log_level == "DEBUG":
|
|
108
137
|
traceback.print_exc()
|
qlever/commands/settings.py
CHANGED
qlever/commands/system_info.py
CHANGED
|
@@ -56,7 +56,9 @@ class SystemInfoCommand(QleverCommand):
|
|
|
56
56
|
|
|
57
57
|
def execute(self, args) -> bool:
|
|
58
58
|
# Say what the command is doing.
|
|
59
|
-
self.show(
|
|
59
|
+
self.show(
|
|
60
|
+
"Show system information and Qleverfile", only_show=args.show
|
|
61
|
+
)
|
|
60
62
|
if args.show:
|
|
61
63
|
return True
|
|
62
64
|
|
|
@@ -80,13 +82,15 @@ class SystemInfoCommand(QleverCommand):
|
|
|
80
82
|
memory_total = psutil.virtual_memory().total / (1024.0**3)
|
|
81
83
|
memory_available = psutil.virtual_memory().available / (1024.0**3)
|
|
82
84
|
log.info(
|
|
83
|
-
f"RAM: {memory_total:.1f} GB total, "
|
|
85
|
+
f"RAM: {memory_total:.1f} GB total, "
|
|
86
|
+
f"{memory_available:.1f} GB available"
|
|
84
87
|
)
|
|
85
88
|
num_cores = psutil.cpu_count(logical=False)
|
|
86
89
|
num_threads = psutil.cpu_count(logical=True)
|
|
87
90
|
cpu_freq = psutil.cpu_freq().max / 1000
|
|
88
91
|
log.info(
|
|
89
|
-
f"CPU: {num_cores} Cores, "
|
|
92
|
+
f"CPU: {num_cores} Cores, "
|
|
93
|
+
f"{num_threads} Threads @ {cpu_freq:.2f} GHz"
|
|
90
94
|
)
|
|
91
95
|
|
|
92
96
|
cwd = Path.cwd()
|
qlever/commands/ui.py
CHANGED
|
@@ -27,7 +27,13 @@ class UiCommand(QleverCommand):
|
|
|
27
27
|
return {
|
|
28
28
|
"data": ["name"],
|
|
29
29
|
"server": ["host_name", "port"],
|
|
30
|
-
"ui": [
|
|
30
|
+
"ui": [
|
|
31
|
+
"ui_port",
|
|
32
|
+
"ui_config",
|
|
33
|
+
"ui_system",
|
|
34
|
+
"ui_image",
|
|
35
|
+
"ui_container",
|
|
36
|
+
],
|
|
31
37
|
}
|
|
32
38
|
|
|
33
39
|
def additional_arguments(self, subparser) -> None:
|
|
@@ -35,7 +41,9 @@ class UiCommand(QleverCommand):
|
|
|
35
41
|
|
|
36
42
|
def execute(self, args) -> bool:
|
|
37
43
|
# If QLEVER_OVERRIDE_DISABLE_UI is set, this command is disabled.
|
|
38
|
-
qlever_is_running_in_container = environ.get(
|
|
44
|
+
qlever_is_running_in_container = environ.get(
|
|
45
|
+
"QLEVER_IS_RUNNING_IN_CONTAINER"
|
|
46
|
+
)
|
|
39
47
|
if qlever_is_running_in_container:
|
|
40
48
|
log.error(
|
|
41
49
|
"The environment variable `QLEVER_OVERRIDE_DISABLE_UI` is set, "
|
|
@@ -67,7 +75,9 @@ class UiCommand(QleverCommand):
|
|
|
67
75
|
f'{args.ui_config} {server_url}"'
|
|
68
76
|
)
|
|
69
77
|
self.show(
|
|
70
|
-
"\n".join(
|
|
78
|
+
"\n".join(
|
|
79
|
+
["Stop running containers", pull_cmd, run_cmd, exec_cmd]
|
|
80
|
+
),
|
|
71
81
|
only_show=args.show,
|
|
72
82
|
)
|
|
73
83
|
if qlever_is_running_in_container:
|
|
@@ -77,7 +87,9 @@ class UiCommand(QleverCommand):
|
|
|
77
87
|
|
|
78
88
|
# Stop running containers.
|
|
79
89
|
for container_system in Containerize.supported_systems():
|
|
80
|
-
Containerize.stop_and_remove_container(
|
|
90
|
+
Containerize.stop_and_remove_container(
|
|
91
|
+
container_system, args.ui_container
|
|
92
|
+
)
|
|
81
93
|
|
|
82
94
|
# Check if the UI port is already being used.
|
|
83
95
|
if is_port_used(args.ui_port):
|
qlever/log.py
CHANGED
|
@@ -10,6 +10,7 @@ class QleverLogFormatter(logging.Formatter):
|
|
|
10
10
|
"""
|
|
11
11
|
Custom formatter for logging.
|
|
12
12
|
"""
|
|
13
|
+
|
|
13
14
|
def format(self, record):
|
|
14
15
|
message = record.getMessage()
|
|
15
16
|
if record.levelno == logging.DEBUG:
|
|
@@ -34,7 +35,7 @@ log_levels = {
|
|
|
34
35
|
"WARNING": logging.WARNING,
|
|
35
36
|
"ERROR": logging.ERROR,
|
|
36
37
|
"CRITICAL": logging.CRITICAL,
|
|
37
|
-
"NO_LOG": logging.CRITICAL + 1
|
|
38
|
+
"NO_LOG": logging.CRITICAL + 1,
|
|
38
39
|
}
|
|
39
40
|
|
|
40
41
|
|