qlever 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.dblp +13 -9
- qlever/Qleverfiles/Qleverfile.dblp-plus +2 -2
- qlever/Qleverfiles/Qleverfile.default +1 -1
- qlever/Qleverfiles/Qleverfile.fbeasy +4 -4
- qlever/Qleverfiles/Qleverfile.freebase +2 -2
- qlever/Qleverfiles/Qleverfile.imdb +1 -1
- qlever/Qleverfiles/Qleverfile.orkg +30 -0
- qlever/Qleverfiles/Qleverfile.osm-planet +1 -1
- qlever/Qleverfiles/Qleverfile.vvz +3 -3
- qlever/Qleverfiles/Qleverfile.wikidata +29 -17
- qlever/Qleverfiles/Qleverfile.yago-4 +4 -4
- qlever/commands/example_queries.py +250 -150
- qlever/commands/index.py +96 -8
- qlever/commands/setup_config.py +47 -31
- qlever/commands/system_info.py +126 -0
- qlever/commands/ui.py +50 -23
- qlever/containerize.py +67 -33
- qlever/qleverfile.py +10 -3
- qlever/util.py +55 -30
- {qlever-0.5.8.dist-info → qlever-0.5.9.dist-info}/METADATA +1 -1
- {qlever-0.5.8.dist-info → qlever-0.5.9.dist-info}/RECORD +25 -24
- {qlever-0.5.8.dist-info → qlever-0.5.9.dist-info}/WHEEL +1 -1
- qlever/__main__.py +0 -1476
- {qlever-0.5.8.dist-info → qlever-0.5.9.dist-info}/LICENSE +0 -0
- {qlever-0.5.8.dist-info → qlever-0.5.9.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.8.dist-info → qlever-0.5.9.dist-info}/top_level.txt +0 -0
|
@@ -22,67 +22,97 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
22
22
|
|
|
23
23
|
def __init__(self):
|
|
24
24
|
self.presets = {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
"https://qlever.cs.uni-freiburg.de/api/wikidata"
|
|
29
|
-
}
|
|
25
|
+
"virtuoso-wikidata": "https://wikidata.demo.openlinksw.com/sparql",
|
|
26
|
+
"qlever-wikidata": "https://qlever.cs.uni-freiburg.de/api/wikidata",
|
|
27
|
+
}
|
|
30
28
|
|
|
31
29
|
def description(self) -> str:
|
|
32
|
-
return
|
|
30
|
+
return "Show how much of the cache is currently being used"
|
|
33
31
|
|
|
34
32
|
def should_have_qleverfile(self) -> bool:
|
|
35
33
|
return False
|
|
36
34
|
|
|
37
|
-
def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
|
|
35
|
+
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
|
|
38
36
|
return {"server": ["port"], "ui": ["ui_config"]}
|
|
39
37
|
|
|
40
38
|
def additional_arguments(self, subparser) -> None:
|
|
41
|
-
subparser.add_argument(
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
subparser.add_argument(
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
subparser.add_argument(
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
subparser.add_argument(
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
39
|
+
subparser.add_argument(
|
|
40
|
+
"--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
|
|
41
|
+
)
|
|
42
|
+
subparser.add_argument(
|
|
43
|
+
"--sparql-endpoint-preset",
|
|
44
|
+
choices=self.presets.keys(),
|
|
45
|
+
help="Shortcut for setting the SPARQL endpoint",
|
|
46
|
+
)
|
|
47
|
+
subparser.add_argument(
|
|
48
|
+
"--get-queries-cmd",
|
|
49
|
+
type=str,
|
|
50
|
+
help="Command to get example queries as TSV " "(description, query)",
|
|
51
|
+
)
|
|
52
|
+
subparser.add_argument(
|
|
53
|
+
"--query-ids",
|
|
54
|
+
type=str,
|
|
55
|
+
default="1-$",
|
|
56
|
+
help="Query IDs as comma-separated list of " "ranges (e.g., 1-5,7,12-$)",
|
|
57
|
+
)
|
|
58
|
+
subparser.add_argument(
|
|
59
|
+
"--query-regex",
|
|
60
|
+
type=str,
|
|
61
|
+
help="Only consider example queries matching "
|
|
62
|
+
"this regex (using grep -Pi)",
|
|
63
|
+
)
|
|
64
|
+
subparser.add_argument(
|
|
65
|
+
"--download-or-count",
|
|
66
|
+
choices=["download", "count"],
|
|
67
|
+
default="count",
|
|
68
|
+
help="Whether to download the full result "
|
|
69
|
+
"or just compute the size of the result",
|
|
70
|
+
)
|
|
71
|
+
subparser.add_argument(
|
|
72
|
+
"--limit", type=int, help="Limit on the number of results"
|
|
73
|
+
)
|
|
74
|
+
subparser.add_argument(
|
|
75
|
+
"--remove-offset-and-limit",
|
|
76
|
+
action="store_true",
|
|
77
|
+
default=False,
|
|
78
|
+
help="Remove OFFSET and LIMIT from the query",
|
|
79
|
+
)
|
|
80
|
+
subparser.add_argument(
|
|
81
|
+
"--accept",
|
|
82
|
+
type=str,
|
|
83
|
+
choices=[
|
|
84
|
+
"text/tab-separated-values",
|
|
85
|
+
"text/csv",
|
|
86
|
+
"application/sparql-results+json",
|
|
87
|
+
"text/turtle",
|
|
88
|
+
],
|
|
89
|
+
default="text/tab-separated-values",
|
|
90
|
+
help="Accept header for the SPARQL query",
|
|
91
|
+
)
|
|
92
|
+
subparser.add_argument(
|
|
93
|
+
"--clear-cache",
|
|
94
|
+
choices=["yes", "no"],
|
|
95
|
+
default="yes",
|
|
96
|
+
help="Clear the cache before each query",
|
|
97
|
+
)
|
|
98
|
+
subparser.add_argument(
|
|
99
|
+
"--width-query-description",
|
|
100
|
+
type=int,
|
|
101
|
+
default=40,
|
|
102
|
+
help="Width for printing the query description",
|
|
103
|
+
)
|
|
104
|
+
subparser.add_argument(
|
|
105
|
+
"--width-error-message",
|
|
106
|
+
type=int,
|
|
107
|
+
default=80,
|
|
108
|
+
help="Width for printing the error message " "(0 = no limit)",
|
|
109
|
+
)
|
|
110
|
+
subparser.add_argument(
|
|
111
|
+
"--width-result-size",
|
|
112
|
+
type=int,
|
|
113
|
+
default=14,
|
|
114
|
+
help="Width for printing the result size",
|
|
115
|
+
)
|
|
86
116
|
|
|
87
117
|
def execute(self, args) -> bool:
|
|
88
118
|
# We can't have both `--remove-offset-and-limit` and `--limit`.
|
|
@@ -93,9 +123,13 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
93
123
|
# If `args.accept` is `application/sparql-results+json`, we need `jq`.
|
|
94
124
|
if args.accept == "application/sparql-results+json":
|
|
95
125
|
try:
|
|
96
|
-
subprocess.run(
|
|
97
|
-
|
|
98
|
-
|
|
126
|
+
subprocess.run(
|
|
127
|
+
"jq --version",
|
|
128
|
+
shell=True,
|
|
129
|
+
check=True,
|
|
130
|
+
stdout=subprocess.DEVNULL,
|
|
131
|
+
stderr=subprocess.DEVNULL,
|
|
132
|
+
)
|
|
99
133
|
except Exception as e:
|
|
100
134
|
log.error(f"Please install `jq` for {args.accept} ({e})")
|
|
101
135
|
return False
|
|
@@ -111,38 +145,44 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
111
145
|
return False
|
|
112
146
|
|
|
113
147
|
# Clear cache only works for QLever.
|
|
114
|
-
is_qlever =
|
|
115
|
-
|
|
148
|
+
is_qlever = not args.sparql_endpoint or args.sparql_endpoint.startswith(
|
|
149
|
+
"https://qlever"
|
|
150
|
+
)
|
|
116
151
|
if args.clear_cache == "yes" and not is_qlever:
|
|
117
152
|
log.warning("Clearing the cache only works for QLever")
|
|
118
153
|
args.clear_cache = "no"
|
|
119
154
|
|
|
120
155
|
# Show what the command will do.
|
|
121
|
-
get_queries_cmd = (
|
|
122
|
-
|
|
123
|
-
|
|
156
|
+
get_queries_cmd = (
|
|
157
|
+
args.get_queries_cmd
|
|
158
|
+
if args.get_queries_cmd
|
|
159
|
+
else f"curl -sv https://qlever.cs.uni-freiburg.de/"
|
|
160
|
+
f"api/examples/{args.ui_config}"
|
|
161
|
+
)
|
|
124
162
|
sed_arg = args.query_ids.replace(",", "p;").replace("-", ",") + "p"
|
|
125
163
|
get_queries_cmd += f" | sed -n '{sed_arg}'"
|
|
126
164
|
if args.query_regex:
|
|
127
165
|
get_queries_cmd += f" | grep -Pi {shlex.quote(args.query_regex)}"
|
|
128
|
-
sparql_endpoint = (
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
166
|
+
sparql_endpoint = (
|
|
167
|
+
args.sparql_endpoint if args.sparql_endpoint else f"localhost:{args.port}"
|
|
168
|
+
)
|
|
169
|
+
self.show(
|
|
170
|
+
f"Obtain queries via: {get_queries_cmd}\n"
|
|
171
|
+
f"SPARQL endpoint: {sparql_endpoint}\n"
|
|
172
|
+
f"Accept header: {args.accept}\n"
|
|
173
|
+
f"Clear cache before each query:"
|
|
174
|
+
f" {args.clear_cache.upper()}\n"
|
|
175
|
+
f"Download result for each query or just count:"
|
|
176
|
+
f" {args.download_or_count.upper()}"
|
|
177
|
+
+ (f" with LIMIT {args.limit}" if args.limit else ""),
|
|
178
|
+
only_show=args.show,
|
|
179
|
+
)
|
|
139
180
|
if args.show:
|
|
140
181
|
return False
|
|
141
182
|
|
|
142
183
|
# Get the example queries.
|
|
143
184
|
try:
|
|
144
|
-
example_query_lines = run_command(get_queries_cmd,
|
|
145
|
-
return_output=True)
|
|
185
|
+
example_query_lines = run_command(get_queries_cmd, return_output=True)
|
|
146
186
|
if len(example_query_lines) == 0:
|
|
147
187
|
log.error("No example queries matching the criteria found")
|
|
148
188
|
return False
|
|
@@ -152,11 +192,11 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
152
192
|
return False
|
|
153
193
|
|
|
154
194
|
# Launch the queries one after the other and for each print: the
|
|
155
|
-
# description, the result size, and the query
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
195
|
+
# description, the result size (number of rows), and the query
|
|
196
|
+
# processing time (seconds).
|
|
197
|
+
query_times = []
|
|
198
|
+
result_sizes = []
|
|
199
|
+
num_failed = 0
|
|
160
200
|
for example_query_line in example_query_lines:
|
|
161
201
|
# Parse description and query.
|
|
162
202
|
description, query = example_query_line.split("\t")
|
|
@@ -176,13 +216,17 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
176
216
|
# Remove OFFSET and LIMIT (after the last closing bracket).
|
|
177
217
|
if args.remove_offset_and_limit or args.limit:
|
|
178
218
|
closing_bracket_idx = query.rfind("}")
|
|
179
|
-
regexes = [
|
|
180
|
-
|
|
219
|
+
regexes = [
|
|
220
|
+
re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
|
|
221
|
+
re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE),
|
|
222
|
+
]
|
|
181
223
|
for regex in regexes:
|
|
182
224
|
match = re.search(regex, query[closing_bracket_idx:])
|
|
183
225
|
if match:
|
|
184
|
-
query =
|
|
185
|
-
|
|
226
|
+
query = (
|
|
227
|
+
query[: closing_bracket_idx + match.start()]
|
|
228
|
+
+ query[closing_bracket_idx + match.end() :]
|
|
229
|
+
)
|
|
186
230
|
|
|
187
231
|
# Limit query.
|
|
188
232
|
if args.limit:
|
|
@@ -191,19 +235,29 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
191
235
|
# Count query.
|
|
192
236
|
if args.download_or_count == "count":
|
|
193
237
|
# First find out if there is a FROM clause.
|
|
194
|
-
regex_from_clause = re.compile(r"\s*FROM\s+<[^>]+>\s*",
|
|
195
|
-
re.IGNORECASE)
|
|
238
|
+
regex_from_clause = re.compile(r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE)
|
|
196
239
|
match_from_clause = re.search(regex_from_clause, query)
|
|
197
240
|
from_clause = " "
|
|
198
241
|
if match_from_clause:
|
|
199
242
|
from_clause = match_from_clause.group(0)
|
|
200
|
-
query = (
|
|
201
|
-
|
|
243
|
+
query = (
|
|
244
|
+
query[: match_from_clause.start()]
|
|
245
|
+
+ " "
|
|
246
|
+
+ query[match_from_clause.end() :]
|
|
247
|
+
)
|
|
202
248
|
# Now we can add the outer SELECT COUNT(*).
|
|
203
|
-
query =
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
249
|
+
query = (
|
|
250
|
+
re.sub(
|
|
251
|
+
r"SELECT ",
|
|
252
|
+
"SELECT (COUNT(*) AS ?qlever_count_)"
|
|
253
|
+
+ from_clause
|
|
254
|
+
+ "WHERE { SELECT ",
|
|
255
|
+
query,
|
|
256
|
+
count=1,
|
|
257
|
+
flags=re.IGNORECASE,
|
|
258
|
+
)
|
|
259
|
+
+ " }"
|
|
260
|
+
)
|
|
207
261
|
|
|
208
262
|
# A bit of pretty-printing.
|
|
209
263
|
query = re.sub(r"\s+", " ", query)
|
|
@@ -211,21 +265,27 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
211
265
|
|
|
212
266
|
# Launch query.
|
|
213
267
|
try:
|
|
214
|
-
curl_cmd = (
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
268
|
+
curl_cmd = (
|
|
269
|
+
f"curl -s {sparql_endpoint}"
|
|
270
|
+
f' -w "HTTP code: %{{http_code}}\\n"'
|
|
271
|
+
f' -H "Accept: {args.accept}"'
|
|
272
|
+
f" --data-urlencode query={shlex.quote(query)}"
|
|
273
|
+
)
|
|
218
274
|
log.debug(curl_cmd)
|
|
219
|
-
result_file = (
|
|
220
|
-
|
|
275
|
+
result_file = (
|
|
276
|
+
f"qlever.example_queries.result." f"{abs(hash(curl_cmd))}.tmp"
|
|
277
|
+
)
|
|
221
278
|
start_time = time.time()
|
|
222
|
-
http_code = run_curl_command(
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
279
|
+
http_code = run_curl_command(
|
|
280
|
+
sparql_endpoint,
|
|
281
|
+
headers={"Accept": args.accept},
|
|
282
|
+
params={"query": query},
|
|
283
|
+
result_file=result_file,
|
|
284
|
+
).strip()
|
|
226
285
|
if http_code != "200":
|
|
227
|
-
raise Exception(
|
|
228
|
-
|
|
286
|
+
raise Exception(
|
|
287
|
+
f"HTTP code {http_code}" f" {Path(result_file).read_text()}"
|
|
288
|
+
)
|
|
229
289
|
time_seconds = time.time() - start_time
|
|
230
290
|
error_msg = None
|
|
231
291
|
except Exception as e:
|
|
@@ -240,30 +300,34 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
240
300
|
if args.download_or_count == "count":
|
|
241
301
|
if args.accept == "text/tab-separated-values":
|
|
242
302
|
result_size = run_command(
|
|
243
|
-
|
|
244
|
-
|
|
303
|
+
f"sed 1d {result_file}", return_output=True
|
|
304
|
+
)
|
|
245
305
|
else:
|
|
246
306
|
result_size = run_command(
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
307
|
+
f'jq -r ".results.bindings[0]'
|
|
308
|
+
f" | to_entries[0].value.value"
|
|
309
|
+
f' | tonumber" {result_file}',
|
|
310
|
+
return_output=True,
|
|
311
|
+
)
|
|
251
312
|
else:
|
|
252
|
-
if (
|
|
253
|
-
|
|
313
|
+
if (
|
|
314
|
+
args.accept == "text/tab-separated-values"
|
|
315
|
+
or args.accept == "text/csv"
|
|
316
|
+
):
|
|
254
317
|
result_size = run_command(
|
|
255
|
-
|
|
256
|
-
|
|
318
|
+
f"sed 1d {result_file} | wc -l", return_output=True
|
|
319
|
+
)
|
|
257
320
|
elif args.accept == "text/turtle":
|
|
258
321
|
result_size = run_command(
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
322
|
+
f"sed '1d;/^@prefix/d;/^\\s*$/d' "
|
|
323
|
+
f"{result_file} | wc -l",
|
|
324
|
+
return_output=True,
|
|
325
|
+
)
|
|
262
326
|
else:
|
|
263
327
|
result_size = run_command(
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
328
|
+
f'jq -r ".results.bindings | length"' f" {result_file}",
|
|
329
|
+
return_output=True,
|
|
330
|
+
)
|
|
267
331
|
result_size = int(result_size)
|
|
268
332
|
except Exception as e:
|
|
269
333
|
error_msg = str(e)
|
|
@@ -274,43 +338,79 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
274
338
|
|
|
275
339
|
# Print description, time, result in tabular form.
|
|
276
340
|
if len(description) > args.width_query_description:
|
|
277
|
-
description = description[:args.width_query_description - 3]
|
|
341
|
+
description = description[: args.width_query_description - 3]
|
|
278
342
|
description += "..."
|
|
279
343
|
if error_msg is None:
|
|
280
|
-
log.info(
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
344
|
+
log.info(
|
|
345
|
+
f"{description:<{args.width_query_description}} "
|
|
346
|
+
f"{time_seconds:6.2f} s "
|
|
347
|
+
f"{result_size:>{args.width_result_size},}"
|
|
348
|
+
)
|
|
349
|
+
query_times.append(time_seconds)
|
|
350
|
+
result_sizes.append(result_size)
|
|
286
351
|
else:
|
|
287
|
-
|
|
288
|
-
if (
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
352
|
+
num_failed += 1
|
|
353
|
+
if (
|
|
354
|
+
args.width_error_message > 0
|
|
355
|
+
and len(error_msg) > args.width_error_message
|
|
356
|
+
and args.log_level != "DEBUG"
|
|
357
|
+
):
|
|
358
|
+
error_msg = error_msg[: args.width_error_message - 3]
|
|
292
359
|
error_msg += "..."
|
|
293
|
-
log.error(
|
|
294
|
-
|
|
295
|
-
|
|
360
|
+
log.error(
|
|
361
|
+
f"{description:<{args.width_query_description}} "
|
|
362
|
+
f"failed "
|
|
363
|
+
f"{colored(error_msg, 'red')}"
|
|
364
|
+
)
|
|
296
365
|
|
|
297
|
-
#
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
if
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
366
|
+
# Check that each query has a time and a result size, or it failed.
|
|
367
|
+
assert len(result_sizes) == len(query_times)
|
|
368
|
+
assert len(query_times) + num_failed == len(example_query_lines)
|
|
369
|
+
|
|
370
|
+
# Show statistics.
|
|
371
|
+
if len(query_times) > 0:
|
|
372
|
+
n = len(query_times)
|
|
373
|
+
total_query_time = sum(query_times)
|
|
374
|
+
average_query_time = total_query_time / n
|
|
375
|
+
median_query_time = sorted(query_times)[n // 2]
|
|
376
|
+
total_result_size = sum(result_sizes)
|
|
377
|
+
average_result_size = round(total_result_size / n)
|
|
378
|
+
median_result_size = sorted(result_sizes)[n // 2]
|
|
379
|
+
query_or_queries = "query" if n == 1 else "queries"
|
|
380
|
+
description = f"TOTAL for {n} {query_or_queries}"
|
|
381
|
+
log.info("")
|
|
382
|
+
log.info(
|
|
383
|
+
f"{description:<{args.width_query_description}} "
|
|
384
|
+
f"{total_query_time:6.2f} s "
|
|
385
|
+
f"{total_result_size:>14,}"
|
|
386
|
+
)
|
|
387
|
+
description = f"AVERAGE for {n} {query_or_queries}"
|
|
388
|
+
log.info(
|
|
389
|
+
f"{description:<{args.width_query_description}} "
|
|
390
|
+
f"{average_query_time:6.2f} s "
|
|
391
|
+
f"{average_result_size:>14,}"
|
|
392
|
+
)
|
|
393
|
+
description = f"MEDIAN for {n} {query_or_queries}"
|
|
394
|
+
log.info(
|
|
395
|
+
f"{description:<{args.width_query_description}} "
|
|
396
|
+
f"{median_query_time:6.2f} s "
|
|
397
|
+
f"{median_result_size:>14,}"
|
|
398
|
+
)
|
|
399
|
+
|
|
400
|
+
# Show number of failed queries.
|
|
401
|
+
if num_failed > 0:
|
|
402
|
+
log.info("")
|
|
403
|
+
description = "Number of FAILED queries"
|
|
404
|
+
num_failed_string = f"{num_failed:>6}"
|
|
405
|
+
if num_failed == len(example_query_lines):
|
|
406
|
+
num_failed_string += " [all]"
|
|
407
|
+
log.info(
|
|
408
|
+
colored(
|
|
409
|
+
f"{description:<{args.width_query_description}} "
|
|
410
|
+
f"{num_failed:>24}",
|
|
411
|
+
"red",
|
|
412
|
+
)
|
|
413
|
+
)
|
|
314
414
|
|
|
315
415
|
# Return success (has nothing to do with how many queries failed).
|
|
316
416
|
return True
|
qlever/commands/index.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import glob
|
|
4
|
+
import json
|
|
4
5
|
import shlex
|
|
5
6
|
|
|
6
7
|
from qlever.command import QleverCommand
|
|
@@ -26,8 +27,8 @@ class IndexCommand(QleverCommand):
|
|
|
26
27
|
|
|
27
28
|
def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
|
|
28
29
|
return {"data": ["name", "format"],
|
|
29
|
-
"index": ["input_files", "cat_input_files", "
|
|
30
|
-
"index_binary",
|
|
30
|
+
"index": ["input_files", "cat_input_files", "multi_input_json",
|
|
31
|
+
"settings_json", "index_binary",
|
|
31
32
|
"only_pso_and_pos_permutations", "use_patterns",
|
|
32
33
|
"text_index", "stxxl_memory"],
|
|
33
34
|
"runtime": ["system", "image", "index_container"]}
|
|
@@ -38,12 +39,98 @@ class IndexCommand(QleverCommand):
|
|
|
38
39
|
default=False,
|
|
39
40
|
help="Overwrite an existing index, think twice before using.")
|
|
40
41
|
|
|
42
|
+
# Exception for invalid JSON.
|
|
43
|
+
class InvalidInputJson(Exception):
|
|
44
|
+
def __init__(self, error_message, additional_info):
|
|
45
|
+
self.error_message = error_message
|
|
46
|
+
self.additional_info = additional_info
|
|
47
|
+
super().__init__()
|
|
48
|
+
|
|
49
|
+
# Helper function to get command line options from JSON.
|
|
50
|
+
def get_input_options_for_json(self, args) -> str:
|
|
51
|
+
# Parse the JSON.
|
|
52
|
+
try:
|
|
53
|
+
input_specs = json.loads(args.multi_input_json)
|
|
54
|
+
except Exception as e:
|
|
55
|
+
raise self.InvalidInputJson(
|
|
56
|
+
f"Failed to parse `MULTI_INPUT_JSON` ({e})",
|
|
57
|
+
args.multi_input_json)
|
|
58
|
+
# Check that it is an array of length at least one.
|
|
59
|
+
if not isinstance(input_specs, list):
|
|
60
|
+
raise self.InvalidInputJson(
|
|
61
|
+
"`MULTI_INPUT_JSON` must be a JSON array",
|
|
62
|
+
args.multi_input_json)
|
|
63
|
+
if len(input_specs) == 0:
|
|
64
|
+
raise self.InvalidInputJson(
|
|
65
|
+
"`MULTI_INPUT_JSON` must contain at least one element",
|
|
66
|
+
args.multi_input_json)
|
|
67
|
+
# For each of the maps, construct the corresponding command-line
|
|
68
|
+
# options to the index binary.
|
|
69
|
+
input_options = []
|
|
70
|
+
for i, input_spec in enumerate(input_specs):
|
|
71
|
+
# Check that `input_spec` is a dictionary.
|
|
72
|
+
if not isinstance(input_spec, dict):
|
|
73
|
+
raise self.InvalidInputJson(
|
|
74
|
+
f"Element {i} in `MULTI_INPUT_JSON` must be a JSON "
|
|
75
|
+
"object",
|
|
76
|
+
input_spec)
|
|
77
|
+
# For each `input_spec`, we must have a command.
|
|
78
|
+
if "cmd" not in input_spec:
|
|
79
|
+
raise self.InvalidInputJson(
|
|
80
|
+
f"Element {i} in `MULTI_INPUT_JSON` must contain a "
|
|
81
|
+
"key `cmd`",
|
|
82
|
+
input_spec)
|
|
83
|
+
input_cmd = input_spec["cmd"]
|
|
84
|
+
# The `format`, `graph`, and `parallel` keys are optional.
|
|
85
|
+
input_format = input_spec.get("format", args.format)
|
|
86
|
+
input_graph = input_spec.get("graph", "-")
|
|
87
|
+
input_parallel = input_spec.get("parallel", "false")
|
|
88
|
+
# There must not be any other keys.
|
|
89
|
+
extra_keys = input_spec.keys() - {"cmd", "format", "graph", "parallel"}
|
|
90
|
+
if extra_keys:
|
|
91
|
+
raise self.InvalidInputJson(
|
|
92
|
+
f"Element {i} in `MULTI_INPUT_JSON` must only contain "
|
|
93
|
+
"the keys `format`, `graph`, and `parallel`. Contains "
|
|
94
|
+
"extra keys {extra_keys}.",
|
|
95
|
+
input_spec)
|
|
96
|
+
# Add the command-line options for this input stream. We use
|
|
97
|
+
# process substitution `<(...)` as a convenient way to handle
|
|
98
|
+
# an input stream just like a file. This is not POSIX compliant,
|
|
99
|
+
# but supported by various shells, including bash and zsh.
|
|
100
|
+
input_options.append(
|
|
101
|
+
f"-f <({input_cmd}) -F {input_format} "
|
|
102
|
+
f"-g \"{input_graph}\" -p {input_parallel}")
|
|
103
|
+
# Return the concatenated command-line options.
|
|
104
|
+
return " ".join(input_options)
|
|
105
|
+
|
|
41
106
|
def execute(self, args) -> bool:
|
|
42
|
-
#
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
107
|
+
# The mandatory part of the command line (specifying the input, the
|
|
108
|
+
# basename of the index, and the settings file). There are two ways
|
|
109
|
+
# to specify the input: via a single stream or via multiple streams.
|
|
110
|
+
if args.cat_input_files and not args.multi_input_json:
|
|
111
|
+
index_cmd = (f"{args.cat_input_files} | {args.index_binary}"
|
|
112
|
+
f" -i {args.name} -s {args.name}.settings.json"
|
|
113
|
+
f" -F {args.format} -f -")
|
|
114
|
+
elif args.multi_input_json and not args.cat_input_files:
|
|
115
|
+
try:
|
|
116
|
+
input_options = self.get_input_options_for_json(args)
|
|
117
|
+
except self.InvalidInputJson as e:
|
|
118
|
+
log.error(e.error_message)
|
|
119
|
+
log.info("")
|
|
120
|
+
log.info(e.additional_info)
|
|
121
|
+
return False
|
|
122
|
+
index_cmd = (f"{args.index_binary}"
|
|
123
|
+
f" -i {args.name} -s {args.name}.settings.json"
|
|
124
|
+
f" {input_options}")
|
|
125
|
+
else:
|
|
126
|
+
log.error("Specify exactly one of `CAT_INPUT_FILES` (for a "
|
|
127
|
+
"single input stream) or `MULTI_INPUT_JSON` (for "
|
|
128
|
+
"multiple input streams)")
|
|
129
|
+
log.info("")
|
|
130
|
+
log.info("See `qlever index --help` for more information")
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
# Add remaining options.
|
|
47
134
|
if args.only_pso_and_pos_permutations:
|
|
48
135
|
index_cmd += " --only-pso-and-pos-permutations --no-patterns"
|
|
49
136
|
if not args.use_patterns:
|
|
@@ -120,7 +207,8 @@ class IndexCommand(QleverCommand):
|
|
|
120
207
|
if args.system in Containerize.supported_systems() \
|
|
121
208
|
and args.overwrite_existing:
|
|
122
209
|
if Containerize.is_running(args.system, args.index_container):
|
|
123
|
-
log.info("Another index process is running, trying to stop
|
|
210
|
+
log.info("Another index process is running, trying to stop "
|
|
211
|
+
"it ...")
|
|
124
212
|
log.info("")
|
|
125
213
|
try:
|
|
126
214
|
run_command(f"{args.system} rm -f {args.index_container}")
|