qlever 0.5.11__py3-none-any.whl → 0.5.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.dblp +1 -1
- qlever/Qleverfiles/Qleverfile.pubchem +102 -26
- qlever/Qleverfiles/Qleverfile.uniprot +48 -16
- qlever/Qleverfiles/Qleverfile.wikidata +1 -3
- qlever/commands/add_text_index.py +2 -1
- qlever/commands/cache_stats.py +1 -1
- qlever/commands/clear_cache.py +4 -2
- qlever/commands/example_queries.py +120 -50
- qlever/commands/get_data.py +1 -1
- qlever/commands/index.py +148 -77
- qlever/commands/index_stats.py +90 -59
- qlever/commands/log.py +12 -2
- qlever/commands/query.py +66 -27
- qlever/commands/setup_config.py +1 -1
- qlever/commands/start.py +9 -3
- qlever/commands/status.py +2 -1
- qlever/commands/stop.py +4 -6
- qlever/commands/system_info.py +1 -1
- qlever/commands/ui.py +3 -1
- qlever/commands/warmup.py +1 -1
- qlever/qlever_main.py +16 -9
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/METADATA +1 -1
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/RECORD +27 -27
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/LICENSE +0 -0
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/WHEEL +0 -0
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.11.dist-info → qlever-0.5.15.dist-info}/top_level.txt +0 -0
qlever/commands/get_data.py
CHANGED
qlever/commands/index.py
CHANGED
|
@@ -3,12 +3,12 @@ from __future__ import annotations
|
|
|
3
3
|
import glob
|
|
4
4
|
import json
|
|
5
5
|
import shlex
|
|
6
|
+
import re
|
|
6
7
|
|
|
7
8
|
from qlever.command import QleverCommand
|
|
8
9
|
from qlever.containerize import Containerize
|
|
9
10
|
from qlever.log import log
|
|
10
|
-
from qlever.util import
|
|
11
|
-
run_command)
|
|
11
|
+
from qlever.util import get_existing_index_files, get_total_file_size, run_command
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class IndexCommand(QleverCommand):
|
|
@@ -20,24 +20,36 @@ class IndexCommand(QleverCommand):
|
|
|
20
20
|
pass
|
|
21
21
|
|
|
22
22
|
def description(self) -> str:
|
|
23
|
-
return
|
|
23
|
+
return "Build the index for a given RDF dataset"
|
|
24
24
|
|
|
25
25
|
def should_have_qleverfile(self) -> bool:
|
|
26
26
|
return True
|
|
27
27
|
|
|
28
|
-
def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
|
|
29
|
-
return {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
"
|
|
28
|
+
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
|
|
29
|
+
return {
|
|
30
|
+
"data": ["name", "format"],
|
|
31
|
+
"index": [
|
|
32
|
+
"input_files",
|
|
33
|
+
"cat_input_files",
|
|
34
|
+
"multi_input_json",
|
|
35
|
+
"parallel_parsing",
|
|
36
|
+
"settings_json",
|
|
37
|
+
"index_binary",
|
|
38
|
+
"only_pso_and_pos_permutations",
|
|
39
|
+
"use_patterns",
|
|
40
|
+
"text_index",
|
|
41
|
+
"stxxl_memory",
|
|
42
|
+
],
|
|
43
|
+
"runtime": ["system", "image", "index_container"],
|
|
44
|
+
}
|
|
35
45
|
|
|
36
46
|
def additional_arguments(self, subparser) -> None:
|
|
37
47
|
subparser.add_argument(
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
48
|
+
"--overwrite-existing",
|
|
49
|
+
action="store_true",
|
|
50
|
+
default=False,
|
|
51
|
+
help="Overwrite an existing index, think twice before using.",
|
|
52
|
+
)
|
|
41
53
|
|
|
42
54
|
# Exception for invalid JSON.
|
|
43
55
|
class InvalidInputJson(Exception):
|
|
@@ -48,22 +60,29 @@ class IndexCommand(QleverCommand):
|
|
|
48
60
|
|
|
49
61
|
# Helper function to get command line options from JSON.
|
|
50
62
|
def get_input_options_for_json(self, args) -> str:
|
|
51
|
-
# Parse the JSON.
|
|
63
|
+
# Parse the JSON. If `args.multi_input_json` look like JSONL, turn
|
|
64
|
+
# it into a JSON array.
|
|
52
65
|
try:
|
|
66
|
+
jsonl_line_regex = re.compile(r"^\s*\{.*\}\s*$")
|
|
67
|
+
jsonl_lines = args.multi_input_json.split("\n")
|
|
68
|
+
if all(re.match(jsonl_line_regex, line) for line in jsonl_lines):
|
|
69
|
+
args.multi_input_json = "[" + ", ".join(jsonl_lines) + "]"
|
|
53
70
|
input_specs = json.loads(args.multi_input_json)
|
|
54
71
|
except Exception as e:
|
|
55
72
|
raise self.InvalidInputJson(
|
|
56
|
-
|
|
57
|
-
|
|
73
|
+
f"Failed to parse `MULTI_INPUT_JSON` as either JSON or JSONL ({e})",
|
|
74
|
+
args.multi_input_json,
|
|
75
|
+
)
|
|
58
76
|
# Check that it is an array of length at least one.
|
|
59
77
|
if not isinstance(input_specs, list):
|
|
60
78
|
raise self.InvalidInputJson(
|
|
61
|
-
|
|
62
|
-
|
|
79
|
+
"`MULTI_INPUT_JSON` must be a JSON array", args.multi_input_json
|
|
80
|
+
)
|
|
63
81
|
if len(input_specs) == 0:
|
|
64
82
|
raise self.InvalidInputJson(
|
|
65
|
-
|
|
66
|
-
|
|
83
|
+
"`MULTI_INPUT_JSON` must contain at least one element",
|
|
84
|
+
args.multi_input_json,
|
|
85
|
+
)
|
|
67
86
|
# For each of the maps, construct the corresponding command-line
|
|
68
87
|
# options to the index binary.
|
|
69
88
|
input_options = []
|
|
@@ -71,35 +90,77 @@ class IndexCommand(QleverCommand):
|
|
|
71
90
|
# Check that `input_spec` is a dictionary.
|
|
72
91
|
if not isinstance(input_spec, dict):
|
|
73
92
|
raise self.InvalidInputJson(
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
93
|
+
f"Element {i} in `MULTI_INPUT_JSON` must be a JSON " "object",
|
|
94
|
+
input_spec,
|
|
95
|
+
)
|
|
77
96
|
# For each `input_spec`, we must have a command.
|
|
78
97
|
if "cmd" not in input_spec:
|
|
79
98
|
raise self.InvalidInputJson(
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
99
|
+
f"Element {i} in `MULTI_INPUT_JSON` must contain a " "key `cmd`",
|
|
100
|
+
input_spec,
|
|
101
|
+
)
|
|
102
|
+
# If the command contains a `{}` placeholder, we need a `for-each`
|
|
103
|
+
# key` specifying the pattern for the placeholder values, and vice
|
|
104
|
+
# versa.
|
|
105
|
+
if "{}" in input_spec["cmd"] and "for-each" not in input_spec:
|
|
106
|
+
raise self.InvalidInputJson(
|
|
107
|
+
f"Element {i} in `MULTI_INPUT_JSON` must contain a "
|
|
108
|
+
"key `for-each` if the command contains a placeholder "
|
|
109
|
+
"`{}`",
|
|
110
|
+
input_spec,
|
|
111
|
+
)
|
|
112
|
+
if "for-each" in input_spec and "{}" not in input_spec["cmd"]:
|
|
113
|
+
raise self.InvalidInputJson(
|
|
114
|
+
f"Element {i} in `MULTI_INPUT_JSON` contains a "
|
|
115
|
+
"key `for-each`, but the command does not contain a "
|
|
116
|
+
"placeholder `{{}}`",
|
|
117
|
+
input_spec,
|
|
118
|
+
)
|
|
119
|
+
# Get all commands. This is just the value of the `cmd` key if no
|
|
120
|
+
# `for-each` key is specified. Otherwise, we have a command for
|
|
121
|
+
# each file matching the pattern.
|
|
122
|
+
if "for-each" not in input_spec:
|
|
123
|
+
input_cmds = [input_spec["cmd"]]
|
|
124
|
+
else:
|
|
125
|
+
try:
|
|
126
|
+
files = sorted(glob.glob(input_spec["for-each"]))
|
|
127
|
+
except Exception as e:
|
|
128
|
+
raise self.InvalidInputJson(
|
|
129
|
+
f"Element {i} in `MULTI_INPUT_JSON` contains an "
|
|
130
|
+
f"invalid `for-each` pattern: {e}",
|
|
131
|
+
input_spec,
|
|
132
|
+
)
|
|
133
|
+
input_cmds = [input_spec["cmd"].format(file) for file in files]
|
|
84
134
|
# The `format`, `graph`, and `parallel` keys are optional.
|
|
85
135
|
input_format = input_spec.get("format", args.format)
|
|
86
136
|
input_graph = input_spec.get("graph", "-")
|
|
87
137
|
input_parallel = input_spec.get("parallel", "false")
|
|
88
138
|
# There must not be any other keys.
|
|
89
|
-
extra_keys = input_spec.keys() - {
|
|
139
|
+
extra_keys = input_spec.keys() - {
|
|
140
|
+
"cmd",
|
|
141
|
+
"format",
|
|
142
|
+
"graph",
|
|
143
|
+
"parallel",
|
|
144
|
+
"for-each",
|
|
145
|
+
}
|
|
90
146
|
if extra_keys:
|
|
91
147
|
raise self.InvalidInputJson(
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
148
|
+
f"Element {i} in `MULTI_INPUT_JSON` must only contain "
|
|
149
|
+
"the keys `format`, `graph`, and `parallel`. Contains "
|
|
150
|
+
"extra keys {extra_keys}.",
|
|
151
|
+
input_spec,
|
|
152
|
+
)
|
|
96
153
|
# Add the command-line options for this input stream. We use
|
|
97
|
-
# process substitution `<(...)` as a convenient way to handle
|
|
98
|
-
#
|
|
99
|
-
#
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
154
|
+
# process substitution `<(...)` as a convenient way to handle an
|
|
155
|
+
# input stream just like a file. This is not POSIX compliant, but
|
|
156
|
+
# supported by various shells, including bash and zsh. If
|
|
157
|
+
# `for-each` is specified, add one command for each matching file.
|
|
158
|
+
for input_cmd in input_cmds:
|
|
159
|
+
input_option = f"-f <({input_cmd}) -g {input_graph}"
|
|
160
|
+
input_option += f" -F {input_format}"
|
|
161
|
+
if input_parallel == "true":
|
|
162
|
+
input_option += " -p true"
|
|
163
|
+
input_options.append(input_option)
|
|
103
164
|
# Return the concatenated command-line options.
|
|
104
165
|
return " ".join(input_options)
|
|
105
166
|
|
|
@@ -108,11 +169,13 @@ class IndexCommand(QleverCommand):
|
|
|
108
169
|
# basename of the index, and the settings file). There are two ways
|
|
109
170
|
# to specify the input: via a single stream or via multiple streams.
|
|
110
171
|
if args.cat_input_files and not args.multi_input_json:
|
|
111
|
-
index_cmd = (
|
|
112
|
-
|
|
113
|
-
|
|
172
|
+
index_cmd = (
|
|
173
|
+
f"{args.cat_input_files} | {args.index_binary}"
|
|
174
|
+
f" -i {args.name} -s {args.name}.settings.json"
|
|
175
|
+
f" -F {args.format} -f -"
|
|
176
|
+
)
|
|
114
177
|
if args.parallel_parsing:
|
|
115
|
-
index_cmd +=
|
|
178
|
+
index_cmd += f" -p {args.parallel_parsing}"
|
|
116
179
|
elif args.multi_input_json and not args.cat_input_files:
|
|
117
180
|
try:
|
|
118
181
|
input_options = self.get_input_options_for_json(args)
|
|
@@ -121,13 +184,17 @@ class IndexCommand(QleverCommand):
|
|
|
121
184
|
log.info("")
|
|
122
185
|
log.info(e.additional_info)
|
|
123
186
|
return False
|
|
124
|
-
index_cmd = (
|
|
125
|
-
|
|
126
|
-
|
|
187
|
+
index_cmd = (
|
|
188
|
+
f"{args.index_binary}"
|
|
189
|
+
f" -i {args.name} -s {args.name}.settings.json"
|
|
190
|
+
f" {input_options}"
|
|
191
|
+
)
|
|
127
192
|
else:
|
|
128
|
-
log.error(
|
|
129
|
-
|
|
130
|
-
|
|
193
|
+
log.error(
|
|
194
|
+
"Specify exactly one of `CAT_INPUT_FILES` (for a "
|
|
195
|
+
"single input stream) or `MULTI_INPUT_JSON` (for "
|
|
196
|
+
"multiple input streams)"
|
|
197
|
+
)
|
|
131
198
|
log.info("")
|
|
132
199
|
log.info("See `qlever index --help` for more information")
|
|
133
200
|
return False
|
|
@@ -137,12 +204,11 @@ class IndexCommand(QleverCommand):
|
|
|
137
204
|
index_cmd += " --only-pso-and-pos-permutations --no-patterns"
|
|
138
205
|
if not args.use_patterns:
|
|
139
206
|
index_cmd += " --no-patterns"
|
|
140
|
-
if args.text_index in
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
if args.text_index in
|
|
145
|
-
["from_literals", "from_text_records_and_literals"]:
|
|
207
|
+
if args.text_index in ["from_text_records", "from_text_records_and_literals"]:
|
|
208
|
+
index_cmd += (
|
|
209
|
+
f" -w {args.name}.wordsfile.tsv" f" -d {args.name}.docsfile.tsv"
|
|
210
|
+
)
|
|
211
|
+
if args.text_index in ["from_literals", "from_text_records_and_literals"]:
|
|
146
212
|
index_cmd += " --text-words-from-literals"
|
|
147
213
|
if args.stxxl_memory:
|
|
148
214
|
index_cmd += f" --stxxl-memory {args.stxxl_memory}"
|
|
@@ -150,38 +216,42 @@ class IndexCommand(QleverCommand):
|
|
|
150
216
|
|
|
151
217
|
# If the total file size is larger than 10 GB, set ulimit (such that a
|
|
152
218
|
# large number of open files is allowed).
|
|
153
|
-
total_file_size = get_total_file_size(
|
|
154
|
-
shlex.split(args.input_files))
|
|
219
|
+
total_file_size = get_total_file_size(shlex.split(args.input_files))
|
|
155
220
|
if total_file_size > 1e10:
|
|
156
221
|
index_cmd = f"ulimit -Sn 1048576; {index_cmd}"
|
|
157
222
|
|
|
158
223
|
# Run the command in a container (if so desired).
|
|
159
224
|
if args.system in Containerize.supported_systems():
|
|
160
225
|
index_cmd = Containerize().containerize_command(
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
226
|
+
index_cmd,
|
|
227
|
+
args.system,
|
|
228
|
+
"run --rm",
|
|
229
|
+
args.image,
|
|
230
|
+
args.index_container,
|
|
231
|
+
volumes=[("$(pwd)", "/index")],
|
|
232
|
+
working_directory="/index",
|
|
233
|
+
)
|
|
167
234
|
|
|
168
235
|
# Command for writing the settings JSON to a file.
|
|
169
|
-
settings_json_cmd = (
|
|
170
|
-
|
|
236
|
+
settings_json_cmd = (
|
|
237
|
+
f"echo {shlex.quote(args.settings_json)} " f"> {args.name}.settings.json"
|
|
238
|
+
)
|
|
171
239
|
|
|
172
240
|
# Show the command line.
|
|
173
241
|
self.show(f"{settings_json_cmd}\n{index_cmd}", only_show=args.show)
|
|
174
242
|
if args.show:
|
|
175
|
-
return
|
|
243
|
+
return True
|
|
176
244
|
|
|
177
245
|
# When running natively, check if the binary exists and works.
|
|
178
246
|
if args.system == "native":
|
|
179
247
|
try:
|
|
180
248
|
run_command(f"{args.index_binary} --help")
|
|
181
249
|
except Exception as e:
|
|
182
|
-
log.error(
|
|
183
|
-
|
|
184
|
-
|
|
250
|
+
log.error(
|
|
251
|
+
f'Running "{args.index_binary}" failed, '
|
|
252
|
+
f"set `--index-binary` to a different binary or "
|
|
253
|
+
f"set `--system to a container system`"
|
|
254
|
+
)
|
|
185
255
|
log.info("")
|
|
186
256
|
log.info(f"The error message was: {e}")
|
|
187
257
|
return False
|
|
@@ -189,28 +259,29 @@ class IndexCommand(QleverCommand):
|
|
|
189
259
|
# Check if all of the input files exist.
|
|
190
260
|
for pattern in shlex.split(args.input_files):
|
|
191
261
|
if len(glob.glob(pattern)) == 0:
|
|
192
|
-
log.error(f
|
|
262
|
+
log.error(f'No file matching "{pattern}" found')
|
|
193
263
|
log.info("")
|
|
194
|
-
log.info(
|
|
195
|
-
|
|
264
|
+
log.info(
|
|
265
|
+
"Did you call `qlever get-data`? If you did, check "
|
|
266
|
+
"GET_DATA_CMD and INPUT_FILES in the QLeverfile"
|
|
267
|
+
)
|
|
196
268
|
return False
|
|
197
269
|
|
|
198
270
|
# Check if index files (name.index.*) already exist.
|
|
199
271
|
existing_index_files = get_existing_index_files(args.name)
|
|
200
272
|
if len(existing_index_files) > 0 and not args.overwrite_existing:
|
|
201
273
|
log.error(
|
|
202
|
-
|
|
203
|
-
|
|
274
|
+
f'Index files for basename "{args.name}" found, if you '
|
|
275
|
+
f"want to overwrite them, use --overwrite-existing"
|
|
276
|
+
)
|
|
204
277
|
log.info("")
|
|
205
278
|
log.info(f"Index files found: {existing_index_files}")
|
|
206
279
|
return False
|
|
207
280
|
|
|
208
281
|
# Remove already existing container.
|
|
209
|
-
if args.system in Containerize.supported_systems()
|
|
210
|
-
and args.overwrite_existing:
|
|
282
|
+
if args.system in Containerize.supported_systems() and args.overwrite_existing:
|
|
211
283
|
if Containerize.is_running(args.system, args.index_container):
|
|
212
|
-
log.info("Another index process is running, trying to stop "
|
|
213
|
-
"it ...")
|
|
284
|
+
log.info("Another index process is running, trying to stop " "it ...")
|
|
214
285
|
log.info("")
|
|
215
286
|
try:
|
|
216
287
|
run_command(f"{args.system} rm -f {args.index_container}")
|
qlever/commands/index_stats.py
CHANGED
|
@@ -18,32 +18,45 @@ class IndexStatsCommand(QleverCommand):
|
|
|
18
18
|
pass
|
|
19
19
|
|
|
20
20
|
def description(self) -> str:
|
|
21
|
-
return
|
|
21
|
+
return "Breakdown of the time and space used for the index build"
|
|
22
22
|
|
|
23
23
|
def should_have_qleverfile(self) -> bool:
|
|
24
24
|
return False
|
|
25
25
|
|
|
26
|
-
def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
|
|
26
|
+
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
|
|
27
27
|
return {"data": ["name"]}
|
|
28
28
|
|
|
29
29
|
def additional_arguments(self, subparser) -> None:
|
|
30
|
-
subparser.add_argument(
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
subparser.add_argument(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
30
|
+
subparser.add_argument(
|
|
31
|
+
"--only-time",
|
|
32
|
+
action="store_true",
|
|
33
|
+
default=False,
|
|
34
|
+
help="Show only the time used",
|
|
35
|
+
)
|
|
36
|
+
subparser.add_argument(
|
|
37
|
+
"--only-space",
|
|
38
|
+
action="store_true",
|
|
39
|
+
default=False,
|
|
40
|
+
help="Show only the space used",
|
|
41
|
+
)
|
|
42
|
+
subparser.add_argument(
|
|
43
|
+
"--ignore-text-index",
|
|
44
|
+
action="store_true",
|
|
45
|
+
default=False,
|
|
46
|
+
help="Ignore the text index",
|
|
47
|
+
)
|
|
48
|
+
subparser.add_argument(
|
|
49
|
+
"--time-unit",
|
|
50
|
+
choices=["s", "min", "h", "auto"],
|
|
51
|
+
default="auto",
|
|
52
|
+
help="The time unit",
|
|
53
|
+
)
|
|
54
|
+
subparser.add_argument(
|
|
55
|
+
"--size-unit",
|
|
56
|
+
choices=["B", "MB", "GB", "TB", "auto"],
|
|
57
|
+
default="auto",
|
|
58
|
+
help="The size unit",
|
|
59
|
+
)
|
|
47
60
|
|
|
48
61
|
def execute_time(self, args, log_file_name) -> bool:
|
|
49
62
|
"""
|
|
@@ -65,8 +78,9 @@ class IndexStatsCommand(QleverCommand):
|
|
|
65
78
|
with open(text_log_file_name, "r") as text_log_file:
|
|
66
79
|
lines.extend(text_log_file.readlines())
|
|
67
80
|
except Exception as e:
|
|
68
|
-
log.error(
|
|
69
|
-
|
|
81
|
+
log.error(
|
|
82
|
+
f"Problem reading text index log file " f"{text_log_file_name}: {e}"
|
|
83
|
+
)
|
|
70
84
|
return False
|
|
71
85
|
|
|
72
86
|
# Helper function that finds the next line matching the given `regex`,
|
|
@@ -95,12 +109,14 @@ class IndexStatsCommand(QleverCommand):
|
|
|
95
109
|
if regex_match:
|
|
96
110
|
try:
|
|
97
111
|
return datetime.strptime(
|
|
98
|
-
|
|
99
|
-
|
|
112
|
+
re.match(timestamp_regex, line).group(), timestamp_format
|
|
113
|
+
), regex_match
|
|
100
114
|
except Exception as e:
|
|
101
|
-
log.error(
|
|
102
|
-
|
|
103
|
-
|
|
115
|
+
log.error(
|
|
116
|
+
f"Could not parse timestamp of form "
|
|
117
|
+
f'"{timestamp_regex}" from line '
|
|
118
|
+
f' "{line.rstrip()}" ({e})'
|
|
119
|
+
)
|
|
104
120
|
# If we get here, we did not find a matching line.
|
|
105
121
|
if not update_current_line:
|
|
106
122
|
current_line = current_line_backup
|
|
@@ -119,26 +135,32 @@ class IndexStatsCommand(QleverCommand):
|
|
|
119
135
|
# file (old format: "Creating a pair" + names of permutations in
|
|
120
136
|
# line "Writing meta data for ..."; new format: name of
|
|
121
137
|
# permutations already in line "Creating permutations ...").
|
|
122
|
-
perm_begin, _ = find_next_line(
|
|
123
|
-
|
|
138
|
+
perm_begin, _ = find_next_line(
|
|
139
|
+
r"INFO:\s*Creating a pair", update_current_line=False
|
|
140
|
+
)
|
|
124
141
|
if perm_begin is None:
|
|
125
142
|
perm_begin, perm_info = find_next_line(
|
|
126
143
|
r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)",
|
|
127
|
-
update_current_line=False
|
|
144
|
+
update_current_line=False,
|
|
145
|
+
)
|
|
128
146
|
else:
|
|
129
147
|
_, perm_info = find_next_line(
|
|
130
148
|
r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)",
|
|
131
|
-
update_current_line=False
|
|
149
|
+
update_current_line=False,
|
|
150
|
+
)
|
|
132
151
|
if perm_info is None:
|
|
133
152
|
break
|
|
134
153
|
perm_begin_and_info.append((perm_begin, perm_info))
|
|
135
|
-
convert_end = (
|
|
136
|
-
|
|
154
|
+
convert_end = (
|
|
155
|
+
perm_begin_and_info[0][0] if len(perm_begin_and_info) > 0 else None
|
|
156
|
+
)
|
|
137
157
|
normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
|
|
138
|
-
text_begin, _ = find_next_line(
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
158
|
+
text_begin, _ = find_next_line(
|
|
159
|
+
r"INFO:\s*Adding text index", update_current_line=False
|
|
160
|
+
)
|
|
161
|
+
text_end, _ = find_next_line(
|
|
162
|
+
r"INFO:\s*Text index build comp", update_current_line=False
|
|
163
|
+
)
|
|
142
164
|
if args.ignore_text_index:
|
|
143
165
|
text_begin = text_end = None
|
|
144
166
|
|
|
@@ -147,9 +169,11 @@ class IndexStatsCommand(QleverCommand):
|
|
|
147
169
|
log.error("Missing line that index build has started")
|
|
148
170
|
return False
|
|
149
171
|
if overall_begin and not merge_begin:
|
|
150
|
-
log.error(
|
|
151
|
-
|
|
152
|
-
|
|
172
|
+
log.error(
|
|
173
|
+
"According to the log file, the index build "
|
|
174
|
+
"has started, but is still in its first "
|
|
175
|
+
"phase (parsing the input)"
|
|
176
|
+
)
|
|
153
177
|
return False
|
|
154
178
|
|
|
155
179
|
# Helper function that shows the duration for a phase (if the start and
|
|
@@ -187,22 +211,24 @@ class IndexStatsCommand(QleverCommand):
|
|
|
187
211
|
show_duration("Convert to global IDs", [(convert_begin, convert_end)])
|
|
188
212
|
for i in range(len(perm_begin_and_info)):
|
|
189
213
|
perm_begin, perm_info = perm_begin_and_info[i]
|
|
190
|
-
perm_end =
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
214
|
+
perm_end = (
|
|
215
|
+
perm_begin_and_info[i + 1][0]
|
|
216
|
+
if i + 1 < len(perm_begin_and_info)
|
|
217
|
+
else normal_end
|
|
218
|
+
)
|
|
219
|
+
perm_info_text = (
|
|
220
|
+
perm_info.group(1).replace(" and ", " & ") if perm_info else f"#{i + 1}"
|
|
221
|
+
)
|
|
222
|
+
show_duration(f"Permutation {perm_info_text}", [(perm_begin, perm_end)])
|
|
196
223
|
show_duration("Text index", [(text_begin, text_end)])
|
|
197
224
|
if text_begin and text_end:
|
|
198
225
|
log.info("")
|
|
199
|
-
show_duration(
|
|
200
|
-
|
|
201
|
-
|
|
226
|
+
show_duration(
|
|
227
|
+
"TOTAL time", [(overall_begin, normal_end), (text_begin, text_end)]
|
|
228
|
+
)
|
|
202
229
|
elif normal_end:
|
|
203
230
|
log.info("")
|
|
204
|
-
show_duration("TOTAL time",
|
|
205
|
-
[(overall_begin, normal_end)])
|
|
231
|
+
show_duration("TOTAL time", [(overall_begin, normal_end)])
|
|
206
232
|
return True
|
|
207
233
|
|
|
208
234
|
def execute_space(self, args) -> bool:
|
|
@@ -252,24 +278,29 @@ class IndexStatsCommand(QleverCommand):
|
|
|
252
278
|
return True
|
|
253
279
|
|
|
254
280
|
def execute(self, args) -> bool:
|
|
255
|
-
|
|
281
|
+
return_value = True
|
|
256
282
|
|
|
257
283
|
# The "time" part of the command.
|
|
258
284
|
if not args.only_space:
|
|
259
285
|
log_file_name = f"{args.name}.index-log.txt"
|
|
260
|
-
self.show(
|
|
261
|
-
|
|
262
|
-
|
|
286
|
+
self.show(
|
|
287
|
+
f"Breakdown of the time used for "
|
|
288
|
+
f"building the index, based on the timestamps for key "
|
|
289
|
+
f'lines in "{log_file_name}"',
|
|
290
|
+
only_show=args.show,
|
|
291
|
+
)
|
|
263
292
|
if not args.show:
|
|
264
|
-
|
|
293
|
+
return_value &= self.execute_time(args, log_file_name)
|
|
265
294
|
if not args.only_time:
|
|
266
295
|
log.info("")
|
|
267
296
|
|
|
268
297
|
# The "space" part of the command.
|
|
269
298
|
if not args.only_time:
|
|
270
|
-
self.show(
|
|
271
|
-
|
|
299
|
+
self.show(
|
|
300
|
+
"Breakdown of the space used for building the index",
|
|
301
|
+
only_show=args.show,
|
|
302
|
+
)
|
|
272
303
|
if not args.show:
|
|
273
|
-
|
|
304
|
+
return_value &= self.execute_space(args)
|
|
274
305
|
|
|
275
|
-
return
|
|
306
|
+
return return_value
|
qlever/commands/log.py
CHANGED
|
@@ -47,10 +47,20 @@ class LogCommand(QleverCommand):
|
|
|
47
47
|
log_cmd += f" {log_file}"
|
|
48
48
|
self.show(log_cmd, only_show=args.show)
|
|
49
49
|
if args.show:
|
|
50
|
-
return
|
|
50
|
+
return True
|
|
51
51
|
|
|
52
52
|
# Execute the command.
|
|
53
53
|
log.info(f"Follow log file {log_file}, press Ctrl-C to stop"
|
|
54
54
|
f" following (will not stop the server)")
|
|
55
55
|
log.info("")
|
|
56
|
-
|
|
56
|
+
try:
|
|
57
|
+
subprocess.run(log_cmd, shell=True)
|
|
58
|
+
return True
|
|
59
|
+
except Exception as e:
|
|
60
|
+
log.error(e)
|
|
61
|
+
return False
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
|