qlever 0.5.15__py3-none-any.whl → 0.5.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.ohm-planet +15 -12
- qlever/Qleverfiles/Qleverfile.osm-planet +17 -15
- qlever/Qleverfiles/Qleverfile.uniprot +2 -3
- qlever/__init__.py +9 -4
- qlever/command.py +6 -5
- qlever/commands/add_text_index.py +47 -28
- qlever/commands/example_queries.py +138 -46
- qlever/commands/extract_queries.py +113 -0
- qlever/commands/index.py +41 -14
- qlever/commands/query.py +32 -3
- qlever/commands/settings.py +110 -0
- qlever/commands/start.py +215 -104
- qlever/commands/stop.py +39 -26
- qlever/commands/system_info.py +7 -3
- qlever/commands/ui.py +16 -4
- qlever/log.py +2 -1
- qlever/qlever_old.py +607 -369
- qlever/qleverfile.py +29 -6
- qlever/util.py +34 -17
- {qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/METADATA +2 -2
- {qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/RECORD +25 -23
- {qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/WHEEL +1 -1
- {qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/LICENSE +0 -0
- {qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.15.dist-info → qlever-0.5.18.dist-info}/top_level.txt +0 -0
|
@@ -17,14 +17,15 @@ from qlever.util import run_command, run_curl_command
|
|
|
17
17
|
|
|
18
18
|
class ExampleQueriesCommand(QleverCommand):
|
|
19
19
|
"""
|
|
20
|
-
Class for
|
|
20
|
+
Class for running a given sequence of example queries and showing
|
|
21
|
+
their processing times and result sizes.
|
|
21
22
|
"""
|
|
22
23
|
|
|
23
24
|
def __init__(self):
|
|
24
25
|
pass
|
|
25
26
|
|
|
26
27
|
def description(self) -> str:
|
|
27
|
-
return "
|
|
28
|
+
return "Run the given queries and show their processing times and result sizes"
|
|
28
29
|
|
|
29
30
|
def should_have_qleverfile(self) -> bool:
|
|
30
31
|
return False
|
|
@@ -51,13 +52,15 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
51
52
|
subparser.add_argument(
|
|
52
53
|
"--get-queries-cmd",
|
|
53
54
|
type=str,
|
|
54
|
-
help="Command to get example queries as TSV "
|
|
55
|
+
help="Command to get example queries as TSV "
|
|
56
|
+
"(description, query)",
|
|
55
57
|
)
|
|
56
58
|
subparser.add_argument(
|
|
57
59
|
"--query-ids",
|
|
58
60
|
type=str,
|
|
59
61
|
default="1-$",
|
|
60
|
-
help="Query IDs as comma-separated list of "
|
|
62
|
+
help="Query IDs as comma-separated list of "
|
|
63
|
+
"ranges (e.g., 1-5,7,12-$)",
|
|
61
64
|
)
|
|
62
65
|
subparser.add_argument(
|
|
63
66
|
"--query-regex",
|
|
@@ -68,7 +71,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
68
71
|
subparser.add_argument(
|
|
69
72
|
"--download-or-count",
|
|
70
73
|
choices=["download", "count"],
|
|
71
|
-
default="
|
|
74
|
+
default="download",
|
|
72
75
|
help="Whether to download the full result "
|
|
73
76
|
"or just compute the size of the result",
|
|
74
77
|
)
|
|
@@ -88,10 +91,14 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
88
91
|
"text/tab-separated-values",
|
|
89
92
|
"text/csv",
|
|
90
93
|
"application/sparql-results+json",
|
|
94
|
+
"application/qlever-results+json",
|
|
91
95
|
"text/turtle",
|
|
96
|
+
"AUTO",
|
|
92
97
|
],
|
|
93
98
|
default="application/sparql-results+json",
|
|
94
|
-
help="Accept header for the SPARQL query"
|
|
99
|
+
help="Accept header for the SPARQL query; AUTO means "
|
|
100
|
+
"`text/turtle` for CONSTRUCT AND DESCRIBE queries, "
|
|
101
|
+
"`application/sparql-results+json` for all others",
|
|
95
102
|
)
|
|
96
103
|
subparser.add_argument(
|
|
97
104
|
"--clear-cache",
|
|
@@ -117,6 +124,13 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
117
124
|
default=14,
|
|
118
125
|
help="Width for printing the result size",
|
|
119
126
|
)
|
|
127
|
+
subparser.add_argument(
|
|
128
|
+
"--add-query-type-to-description",
|
|
129
|
+
action="store_true",
|
|
130
|
+
default=False,
|
|
131
|
+
help="Add the query type (SELECT, ASK, CONSTRUCT, DESCRIBE, "
|
|
132
|
+
"UNKNOWN) to the description",
|
|
133
|
+
)
|
|
120
134
|
subparser.add_argument(
|
|
121
135
|
"--show-query",
|
|
122
136
|
choices=["always", "never", "on-error"],
|
|
@@ -130,19 +144,35 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
130
144
|
help="When showing the query, also show the prefixes",
|
|
131
145
|
)
|
|
132
146
|
|
|
133
|
-
def
|
|
134
|
-
remove_prefixes_cmd =
|
|
147
|
+
def pretty_printed_query(self, query: str, show_prefixes: bool) -> str:
|
|
148
|
+
remove_prefixes_cmd = (
|
|
149
|
+
" | sed '/^PREFIX /Id'" if not show_prefixes else ""
|
|
150
|
+
)
|
|
135
151
|
pretty_print_query_cmd = (
|
|
136
152
|
f"echo {shlex.quote(query)}"
|
|
137
153
|
f" | docker run -i --rm sparqling/sparql-formatter"
|
|
138
154
|
f"{remove_prefixes_cmd} | grep -v '^$'"
|
|
139
155
|
)
|
|
140
156
|
try:
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
157
|
+
query_pretty_printed = run_command(
|
|
158
|
+
pretty_print_query_cmd, return_output=True
|
|
159
|
+
)
|
|
160
|
+
return query_pretty_printed.rstrip()
|
|
161
|
+
except Exception:
|
|
162
|
+
log.error(
|
|
163
|
+
"Failed to pretty-print query, "
|
|
164
|
+
"returning original query: {e}"
|
|
165
|
+
)
|
|
166
|
+
return query.rstrip()
|
|
167
|
+
|
|
168
|
+
def sparql_query_type(self, query: str) -> str:
|
|
169
|
+
match = re.search(
|
|
170
|
+
r"(SELECT|ASK|CONSTRUCT|DESCRIBE)\s", query, re.IGNORECASE
|
|
171
|
+
)
|
|
172
|
+
if match:
|
|
173
|
+
return match.group(1).upper()
|
|
174
|
+
else:
|
|
175
|
+
return "UNKNOWN"
|
|
146
176
|
|
|
147
177
|
def execute(self, args) -> bool:
|
|
148
178
|
# We can't have both `--remove-offset-and-limit` and `--limit`.
|
|
@@ -150,8 +180,13 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
150
180
|
log.error("Cannot have both --remove-offset-and-limit and --limit")
|
|
151
181
|
return False
|
|
152
182
|
|
|
153
|
-
# If `args.accept` is `application/sparql-results+json
|
|
154
|
-
|
|
183
|
+
# If `args.accept` is `application/sparql-results+json` or
|
|
184
|
+
# `application/qlever-results+json` or `AUTO`, we need `jq`.
|
|
185
|
+
if (
|
|
186
|
+
args.accept == "application/sparql-results+json"
|
|
187
|
+
or args.accept == "application/qlever-results+json"
|
|
188
|
+
or args.accept == "AUTO"
|
|
189
|
+
):
|
|
155
190
|
try:
|
|
156
191
|
subprocess.run(
|
|
157
192
|
"jq --version",
|
|
@@ -174,8 +209,9 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
174
209
|
return False
|
|
175
210
|
|
|
176
211
|
# Clear cache only works for QLever.
|
|
177
|
-
is_qlever =
|
|
178
|
-
|
|
212
|
+
is_qlever = (
|
|
213
|
+
not args.sparql_endpoint
|
|
214
|
+
or args.sparql_endpoint.startswith("https://qlever")
|
|
179
215
|
)
|
|
180
216
|
if args.clear_cache == "yes" and not is_qlever:
|
|
181
217
|
log.warning("Clearing the cache only works for QLever")
|
|
@@ -193,7 +229,9 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
193
229
|
if args.query_regex:
|
|
194
230
|
get_queries_cmd += f" | grep -Pi {shlex.quote(args.query_regex)}"
|
|
195
231
|
sparql_endpoint = (
|
|
196
|
-
args.sparql_endpoint
|
|
232
|
+
args.sparql_endpoint
|
|
233
|
+
if args.sparql_endpoint
|
|
234
|
+
else f"localhost:{args.port}"
|
|
197
235
|
)
|
|
198
236
|
self.show(
|
|
199
237
|
f"Obtain queries via: {get_queries_cmd}\n"
|
|
@@ -211,7 +249,9 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
211
249
|
|
|
212
250
|
# Get the example queries.
|
|
213
251
|
try:
|
|
214
|
-
example_query_lines = run_command(
|
|
252
|
+
example_query_lines = run_command(
|
|
253
|
+
get_queries_cmd, return_output=True
|
|
254
|
+
)
|
|
215
255
|
if len(example_query_lines) == 0:
|
|
216
256
|
log.error("No example queries matching the criteria found")
|
|
217
257
|
return False
|
|
@@ -220,6 +260,12 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
220
260
|
log.error(f"Failed to get example queries: {e}")
|
|
221
261
|
return False
|
|
222
262
|
|
|
263
|
+
# We want the width of the query description to be an uneven number (in
|
|
264
|
+
# case we have to truncated it, in which case we want to have a " ... "
|
|
265
|
+
# in the middle).
|
|
266
|
+
width_query_description_half = args.width_query_description // 2
|
|
267
|
+
width_query_description = 2 * width_query_description_half + 1
|
|
268
|
+
|
|
223
269
|
# Launch the queries one after the other and for each print: the
|
|
224
270
|
# description, the result size (number of rows), and the query
|
|
225
271
|
# processing time (seconds).
|
|
@@ -227,13 +273,16 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
227
273
|
result_sizes = []
|
|
228
274
|
num_failed = 0
|
|
229
275
|
for example_query_line in example_query_lines:
|
|
230
|
-
# Parse description and query.
|
|
276
|
+
# Parse description and query, and determine query type.
|
|
231
277
|
description, query = example_query_line.split("\t")
|
|
232
278
|
if len(query) == 0:
|
|
233
279
|
log.error("Could not parse description and query, line is:")
|
|
234
280
|
log.info("")
|
|
235
281
|
log.info(example_query_line)
|
|
236
282
|
return False
|
|
283
|
+
query_type = self.sparql_query_type(query)
|
|
284
|
+
if args.add_query_type_to_description or args.accept == "AUTO":
|
|
285
|
+
description = f"{description} [{query_type}]"
|
|
237
286
|
|
|
238
287
|
# Clear the cache.
|
|
239
288
|
if args.clear_cache == "yes":
|
|
@@ -267,7 +316,9 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
267
316
|
# Count query.
|
|
268
317
|
if args.download_or_count == "count":
|
|
269
318
|
# First find out if there is a FROM clause.
|
|
270
|
-
regex_from_clause = re.compile(
|
|
319
|
+
regex_from_clause = re.compile(
|
|
320
|
+
r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE
|
|
321
|
+
)
|
|
271
322
|
match_from_clause = re.search(regex_from_clause, query)
|
|
272
323
|
from_clause = " "
|
|
273
324
|
if match_from_clause:
|
|
@@ -296,24 +347,39 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
296
347
|
query = re.sub(r"\s*\.\s*\}", " }", query)
|
|
297
348
|
if args.show_query == "always":
|
|
298
349
|
log.info("")
|
|
299
|
-
|
|
350
|
+
log.info(
|
|
351
|
+
colored(
|
|
352
|
+
self.pretty_printed_query(query, args.show_prefixes),
|
|
353
|
+
"cyan",
|
|
354
|
+
)
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Accept header. For "AUTO", use `text/turtle` for CONSTRUCT
|
|
358
|
+
# queries and `application/sparql-results+json` for all others.
|
|
359
|
+
accept_header = args.accept
|
|
360
|
+
if accept_header == "AUTO":
|
|
361
|
+
if query_type == "CONSTRUCT" or query_type == "DESCRIBE":
|
|
362
|
+
accept_header = "text/turtle"
|
|
363
|
+
else:
|
|
364
|
+
accept_header = "application/sparql-results+json"
|
|
300
365
|
|
|
301
366
|
# Launch query.
|
|
302
367
|
try:
|
|
303
368
|
curl_cmd = (
|
|
304
369
|
f"curl -s {sparql_endpoint}"
|
|
305
370
|
f' -w "HTTP code: %{{http_code}}\\n"'
|
|
306
|
-
f' -H "Accept: {
|
|
371
|
+
f' -H "Accept: {accept_header}"'
|
|
307
372
|
f" --data-urlencode query={shlex.quote(query)}"
|
|
308
373
|
)
|
|
309
374
|
log.debug(curl_cmd)
|
|
310
375
|
result_file = (
|
|
311
|
-
f"qlever.example_queries.result."
|
|
376
|
+
f"qlever.example_queries.result."
|
|
377
|
+
f"{abs(hash(curl_cmd))}.tmp"
|
|
312
378
|
)
|
|
313
379
|
start_time = time.time()
|
|
314
380
|
http_code = run_curl_command(
|
|
315
381
|
sparql_endpoint,
|
|
316
|
-
headers={"Accept":
|
|
382
|
+
headers={"Accept": accept_header},
|
|
317
383
|
params={"query": query},
|
|
318
384
|
result_file=result_file,
|
|
319
385
|
).strip()
|
|
@@ -323,7 +389,9 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
323
389
|
else:
|
|
324
390
|
error_msg = {
|
|
325
391
|
"short": f"HTTP code: {http_code}",
|
|
326
|
-
"long": re.sub(
|
|
392
|
+
"long": re.sub(
|
|
393
|
+
r"\s+", " ", Path(result_file).read_text()
|
|
394
|
+
),
|
|
327
395
|
}
|
|
328
396
|
except Exception as e:
|
|
329
397
|
if args.log_level == "DEBUG":
|
|
@@ -336,8 +404,12 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
336
404
|
# Get result size (via the command line, in order to avoid loading
|
|
337
405
|
# a potentially large JSON file into Python, which is slow).
|
|
338
406
|
if error_msg is None:
|
|
339
|
-
# CASE 0:
|
|
340
|
-
|
|
407
|
+
# CASE 0: The result is empty despite a 200 HTTP code (not a
|
|
408
|
+
# problem for CONSTRUCT and DESCRIBE queries).
|
|
409
|
+
if Path(result_file).stat().st_size == 0 and (
|
|
410
|
+
not query_type == "CONSTRUCT"
|
|
411
|
+
and not query_type == "DESCRIBE"
|
|
412
|
+
):
|
|
341
413
|
result_size = 0
|
|
342
414
|
error_msg = {
|
|
343
415
|
"short": "Empty result",
|
|
@@ -347,7 +419,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
347
419
|
|
|
348
420
|
# CASE 1: Just counting the size of the result (TSV or JSON).
|
|
349
421
|
elif args.download_or_count == "count":
|
|
350
|
-
if
|
|
422
|
+
if accept_header == "text/tab-separated-values":
|
|
351
423
|
result_size = run_command(
|
|
352
424
|
f"sed 1d {result_file}", return_output=True
|
|
353
425
|
)
|
|
@@ -370,21 +442,28 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
370
442
|
# CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
|
|
371
443
|
else:
|
|
372
444
|
if (
|
|
373
|
-
|
|
374
|
-
or
|
|
445
|
+
accept_header == "text/tab-separated-values"
|
|
446
|
+
or accept_header == "text/csv"
|
|
375
447
|
):
|
|
376
448
|
result_size = run_command(
|
|
377
449
|
f"sed 1d {result_file} | wc -l", return_output=True
|
|
378
450
|
)
|
|
379
|
-
elif
|
|
451
|
+
elif accept_header == "text/turtle":
|
|
380
452
|
result_size = run_command(
|
|
381
|
-
f"sed '1d;/^@prefix/d;/^\\s*$/d' "
|
|
453
|
+
f"sed '1d;/^@prefix/d;/^\\s*$/d' "
|
|
454
|
+
f"{result_file} | wc -l",
|
|
455
|
+
return_output=True,
|
|
456
|
+
)
|
|
457
|
+
elif accept_header == "application/qlever-results+json":
|
|
458
|
+
result_size = run_command(
|
|
459
|
+
f'jq -r ".resultsize" {result_file}',
|
|
382
460
|
return_output=True,
|
|
383
461
|
)
|
|
384
462
|
else:
|
|
385
463
|
try:
|
|
386
464
|
result_size = run_command(
|
|
387
|
-
f'jq -r ".results.bindings | length"'
|
|
465
|
+
f'jq -r ".results.bindings | length"'
|
|
466
|
+
f" {result_file}",
|
|
388
467
|
return_output=True,
|
|
389
468
|
)
|
|
390
469
|
except Exception as e:
|
|
@@ -398,13 +477,16 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
398
477
|
Path(result_file).unlink(missing_ok=True)
|
|
399
478
|
|
|
400
479
|
# Print description, time, result in tabular form.
|
|
401
|
-
if len(description) >
|
|
402
|
-
description =
|
|
403
|
-
|
|
480
|
+
if len(description) > width_query_description:
|
|
481
|
+
description = (
|
|
482
|
+
description[: width_query_description_half - 2]
|
|
483
|
+
+ " ... "
|
|
484
|
+
+ description[-width_query_description_half + 2 :]
|
|
485
|
+
)
|
|
404
486
|
if error_msg is None:
|
|
405
487
|
result_size = int(result_size)
|
|
406
488
|
log.info(
|
|
407
|
-
f"{description:<{
|
|
489
|
+
f"{description:<{width_query_description}} "
|
|
408
490
|
f"{time_seconds:6.2f} s "
|
|
409
491
|
f"{result_size:>{args.width_result_size},}"
|
|
410
492
|
)
|
|
@@ -419,18 +501,28 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
419
501
|
and args.show_query != "on-error"
|
|
420
502
|
):
|
|
421
503
|
error_msg["long"] = (
|
|
422
|
-
error_msg["long"][: args.width_error_message - 3]
|
|
504
|
+
error_msg["long"][: args.width_error_message - 3]
|
|
505
|
+
+ "..."
|
|
423
506
|
)
|
|
424
|
-
seperator_short_long =
|
|
507
|
+
seperator_short_long = (
|
|
508
|
+
"\n" if args.show_query == "on-error" else " "
|
|
509
|
+
)
|
|
425
510
|
log.info(
|
|
426
|
-
f"{description:<{
|
|
511
|
+
f"{description:<{width_query_description}} "
|
|
427
512
|
f"{colored('FAILED ', 'red')}"
|
|
428
513
|
f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
|
|
429
514
|
f"{seperator_short_long}"
|
|
430
515
|
f"{colored(error_msg['long'], 'red')}"
|
|
431
516
|
)
|
|
432
517
|
if args.show_query == "on-error":
|
|
433
|
-
|
|
518
|
+
log.info(
|
|
519
|
+
colored(
|
|
520
|
+
self.pretty_printed_query(
|
|
521
|
+
query, args.show_prefixes
|
|
522
|
+
),
|
|
523
|
+
"cyan",
|
|
524
|
+
)
|
|
525
|
+
)
|
|
434
526
|
log.info("")
|
|
435
527
|
|
|
436
528
|
# Check that each query has a time and a result size, or it failed.
|
|
@@ -450,19 +542,19 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
450
542
|
description = f"TOTAL for {n} {query_or_queries}"
|
|
451
543
|
log.info("")
|
|
452
544
|
log.info(
|
|
453
|
-
f"{description:<{
|
|
545
|
+
f"{description:<{width_query_description}} "
|
|
454
546
|
f"{total_query_time:6.2f} s "
|
|
455
547
|
f"{total_result_size:>14,}"
|
|
456
548
|
)
|
|
457
549
|
description = f"AVERAGE for {n} {query_or_queries}"
|
|
458
550
|
log.info(
|
|
459
|
-
f"{description:<{
|
|
551
|
+
f"{description:<{width_query_description}} "
|
|
460
552
|
f"{average_query_time:6.2f} s "
|
|
461
553
|
f"{average_result_size:>14,}"
|
|
462
554
|
)
|
|
463
555
|
description = f"MEDIAN for {n} {query_or_queries}"
|
|
464
556
|
log.info(
|
|
465
|
-
f"{description:<{
|
|
557
|
+
f"{description:<{width_query_description}} "
|
|
466
558
|
f"{median_query_time:6.2f} s "
|
|
467
559
|
f"{median_result_size:>14,}"
|
|
468
560
|
)
|
|
@@ -476,7 +568,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
476
568
|
num_failed_string += " [all]"
|
|
477
569
|
log.info(
|
|
478
570
|
colored(
|
|
479
|
-
f"{description:<{
|
|
571
|
+
f"{description:<{width_query_description}} "
|
|
480
572
|
f"{num_failed:>24}",
|
|
481
573
|
"red",
|
|
482
574
|
)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from qlever.command import QleverCommand
|
|
6
|
+
from qlever.log import log
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ExtractQueriesCommand(QleverCommand):
|
|
10
|
+
"""
|
|
11
|
+
Class for executing the `extract-queries` command.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
def description(self) -> str:
|
|
18
|
+
return "Extract all SPARQL queries from the server log"
|
|
19
|
+
|
|
20
|
+
def should_have_qleverfile(self) -> bool:
|
|
21
|
+
return True
|
|
22
|
+
|
|
23
|
+
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
|
|
24
|
+
return {"data": ["name"]}
|
|
25
|
+
|
|
26
|
+
def additional_arguments(self, subparser) -> None:
|
|
27
|
+
subparser.add_argument(
|
|
28
|
+
"--description-base",
|
|
29
|
+
type=str,
|
|
30
|
+
default="Log extract",
|
|
31
|
+
help="Base name for the query descriptions"
|
|
32
|
+
" (default: `Log extract`)",
|
|
33
|
+
)
|
|
34
|
+
subparser.add_argument(
|
|
35
|
+
"--log-file",
|
|
36
|
+
type=str,
|
|
37
|
+
help="Name of the log file to extract queries from"
|
|
38
|
+
" (default: `<name>.server-log.txt`)",
|
|
39
|
+
)
|
|
40
|
+
subparser.add_argument(
|
|
41
|
+
"--output-file",
|
|
42
|
+
type=str,
|
|
43
|
+
default="log-queries.txt",
|
|
44
|
+
help="Output file for the extracted queries (default: `log-queries.txt`)",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def execute(self, args) -> bool:
|
|
48
|
+
# Show what the command does.
|
|
49
|
+
if args.log_file is not None:
|
|
50
|
+
log_file_name = args.log_file
|
|
51
|
+
else:
|
|
52
|
+
log_file_name = f"{args.name}.server-log.txt"
|
|
53
|
+
self.show(
|
|
54
|
+
f"Extract SPARQL queries from `{log_file_name}`"
|
|
55
|
+
f" and write them to `{args.output_file}`",
|
|
56
|
+
only_show=args.show,
|
|
57
|
+
)
|
|
58
|
+
if args.show:
|
|
59
|
+
return True
|
|
60
|
+
|
|
61
|
+
# Regex for log entries of the form
|
|
62
|
+
# 2025-01-14 04:47:44.950 - INFO
|
|
63
|
+
log_line_regex = (
|
|
64
|
+
r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) - [A-Z]+:"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Read the log file line by line.
|
|
68
|
+
log_file = open(log_file_name, "r")
|
|
69
|
+
queries_file = open(args.output_file, "w")
|
|
70
|
+
query = None
|
|
71
|
+
description_base = args.description_base
|
|
72
|
+
description_base_count = {}
|
|
73
|
+
tsv_line_short_width = 150
|
|
74
|
+
for line in log_file:
|
|
75
|
+
# An "Alive check" message contains a tag, which we use as the base
|
|
76
|
+
# name of the query description.
|
|
77
|
+
alive_check_regex = r"Alive check with message \"(.*)\""
|
|
78
|
+
match = re.search(alive_check_regex, line)
|
|
79
|
+
if match:
|
|
80
|
+
description_base = match.group(1)
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
# A new query in the log.
|
|
84
|
+
if "Processing the following SPARQL query" in line:
|
|
85
|
+
query = []
|
|
86
|
+
query_index = (
|
|
87
|
+
description_base_count.get(description_base, 0) + 1
|
|
88
|
+
)
|
|
89
|
+
description_base_count[description_base] = query_index
|
|
90
|
+
continue
|
|
91
|
+
# If we have started a query: extend until we meet the next log
|
|
92
|
+
# line, then push the query. Remove comments.
|
|
93
|
+
if query is not None:
|
|
94
|
+
if not re.match(log_line_regex, line):
|
|
95
|
+
if not re.match(r"^\s*#", line):
|
|
96
|
+
line = re.sub(r" #.*", "", line)
|
|
97
|
+
query.append(line)
|
|
98
|
+
else:
|
|
99
|
+
query = re.sub(r"\s+", " ", "\n".join(query)).strip()
|
|
100
|
+
description = f"{description_base}, Query #{query_index}"
|
|
101
|
+
tsv_line = f"{description}\t{query}"
|
|
102
|
+
tsv_line_short = (
|
|
103
|
+
tsv_line
|
|
104
|
+
if len(tsv_line) < tsv_line_short_width
|
|
105
|
+
else tsv_line[:tsv_line_short_width] + "..."
|
|
106
|
+
)
|
|
107
|
+
log.info(tsv_line_short)
|
|
108
|
+
print(tsv_line, file=queries_file)
|
|
109
|
+
query = None
|
|
110
|
+
|
|
111
|
+
log_file.close()
|
|
112
|
+
queries_file.close()
|
|
113
|
+
return True
|
qlever/commands/index.py
CHANGED
|
@@ -2,13 +2,17 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import glob
|
|
4
4
|
import json
|
|
5
|
-
import shlex
|
|
6
5
|
import re
|
|
6
|
+
import shlex
|
|
7
7
|
|
|
8
8
|
from qlever.command import QleverCommand
|
|
9
9
|
from qlever.containerize import Containerize
|
|
10
10
|
from qlever.log import log
|
|
11
|
-
from qlever.util import
|
|
11
|
+
from qlever.util import (
|
|
12
|
+
get_existing_index_files,
|
|
13
|
+
get_total_file_size,
|
|
14
|
+
run_command,
|
|
15
|
+
)
|
|
12
16
|
|
|
13
17
|
|
|
14
18
|
class IndexCommand(QleverCommand):
|
|
@@ -36,9 +40,11 @@ class IndexCommand(QleverCommand):
|
|
|
36
40
|
"settings_json",
|
|
37
41
|
"index_binary",
|
|
38
42
|
"only_pso_and_pos_permutations",
|
|
43
|
+
"ulimit",
|
|
39
44
|
"use_patterns",
|
|
40
45
|
"text_index",
|
|
41
46
|
"stxxl_memory",
|
|
47
|
+
"parser_buffer_size",
|
|
42
48
|
],
|
|
43
49
|
"runtime": ["system", "image", "index_container"],
|
|
44
50
|
}
|
|
@@ -48,7 +54,7 @@ class IndexCommand(QleverCommand):
|
|
|
48
54
|
"--overwrite-existing",
|
|
49
55
|
action="store_true",
|
|
50
56
|
default=False,
|
|
51
|
-
help="Overwrite an existing index, think twice before using
|
|
57
|
+
help="Overwrite an existing index, think twice before using this",
|
|
52
58
|
)
|
|
53
59
|
|
|
54
60
|
# Exception for invalid JSON.
|
|
@@ -76,7 +82,8 @@ class IndexCommand(QleverCommand):
|
|
|
76
82
|
# Check that it is an array of length at least one.
|
|
77
83
|
if not isinstance(input_specs, list):
|
|
78
84
|
raise self.InvalidInputJson(
|
|
79
|
-
"`MULTI_INPUT_JSON` must be a JSON array",
|
|
85
|
+
"`MULTI_INPUT_JSON` must be a JSON array",
|
|
86
|
+
args.multi_input_json,
|
|
80
87
|
)
|
|
81
88
|
if len(input_specs) == 0:
|
|
82
89
|
raise self.InvalidInputJson(
|
|
@@ -90,13 +97,15 @@ class IndexCommand(QleverCommand):
|
|
|
90
97
|
# Check that `input_spec` is a dictionary.
|
|
91
98
|
if not isinstance(input_spec, dict):
|
|
92
99
|
raise self.InvalidInputJson(
|
|
93
|
-
f"Element {i} in `MULTI_INPUT_JSON` must be a JSON "
|
|
100
|
+
f"Element {i} in `MULTI_INPUT_JSON` must be a JSON "
|
|
101
|
+
"object",
|
|
94
102
|
input_spec,
|
|
95
103
|
)
|
|
96
104
|
# For each `input_spec`, we must have a command.
|
|
97
105
|
if "cmd" not in input_spec:
|
|
98
106
|
raise self.InvalidInputJson(
|
|
99
|
-
f"Element {i} in `MULTI_INPUT_JSON` must contain a "
|
|
107
|
+
f"Element {i} in `MULTI_INPUT_JSON` must contain a "
|
|
108
|
+
"key `cmd`",
|
|
100
109
|
input_spec,
|
|
101
110
|
)
|
|
102
111
|
# If the command contains a `{}` placeholder, we need a `for-each`
|
|
@@ -147,7 +156,7 @@ class IndexCommand(QleverCommand):
|
|
|
147
156
|
raise self.InvalidInputJson(
|
|
148
157
|
f"Element {i} in `MULTI_INPUT_JSON` must only contain "
|
|
149
158
|
"the keys `format`, `graph`, and `parallel`. Contains "
|
|
150
|
-
"extra keys {extra_keys}.",
|
|
159
|
+
f"extra keys {extra_keys}.",
|
|
151
160
|
input_spec,
|
|
152
161
|
)
|
|
153
162
|
# Add the command-line options for this input stream. We use
|
|
@@ -204,20 +213,31 @@ class IndexCommand(QleverCommand):
|
|
|
204
213
|
index_cmd += " --only-pso-and-pos-permutations --no-patterns"
|
|
205
214
|
if not args.use_patterns:
|
|
206
215
|
index_cmd += " --no-patterns"
|
|
207
|
-
if args.text_index in [
|
|
216
|
+
if args.text_index in [
|
|
217
|
+
"from_text_records",
|
|
218
|
+
"from_text_records_and_literals",
|
|
219
|
+
]:
|
|
208
220
|
index_cmd += (
|
|
209
|
-
f" -w {args.name}.wordsfile.tsv"
|
|
221
|
+
f" -w {args.name}.wordsfile.tsv"
|
|
222
|
+
f" -d {args.name}.docsfile.tsv"
|
|
210
223
|
)
|
|
211
|
-
if args.text_index in [
|
|
224
|
+
if args.text_index in [
|
|
225
|
+
"from_literals",
|
|
226
|
+
"from_text_records_and_literals",
|
|
227
|
+
]:
|
|
212
228
|
index_cmd += " --text-words-from-literals"
|
|
213
229
|
if args.stxxl_memory:
|
|
214
230
|
index_cmd += f" --stxxl-memory {args.stxxl_memory}"
|
|
231
|
+
if args.parser_buffer_size:
|
|
232
|
+
index_cmd += f" --parser-buffer-size {args.parser_buffer_size}"
|
|
215
233
|
index_cmd += f" | tee {args.name}.index-log.txt"
|
|
216
234
|
|
|
217
235
|
# If the total file size is larger than 10 GB, set ulimit (such that a
|
|
218
236
|
# large number of open files is allowed).
|
|
219
237
|
total_file_size = get_total_file_size(shlex.split(args.input_files))
|
|
220
|
-
if
|
|
238
|
+
if args.ulimit is not None:
|
|
239
|
+
index_cmd = f"ulimit -Sn {args.ulimit}; {index_cmd}"
|
|
240
|
+
elif total_file_size > 1e10:
|
|
221
241
|
index_cmd = f"ulimit -Sn 1048576; {index_cmd}"
|
|
222
242
|
|
|
223
243
|
# Run the command in a container (if so desired).
|
|
@@ -234,7 +254,8 @@ class IndexCommand(QleverCommand):
|
|
|
234
254
|
|
|
235
255
|
# Command for writing the settings JSON to a file.
|
|
236
256
|
settings_json_cmd = (
|
|
237
|
-
f"echo {shlex.quote(args.settings_json)} "
|
|
257
|
+
f"echo {shlex.quote(args.settings_json)} "
|
|
258
|
+
f"> {args.name}.settings.json"
|
|
238
259
|
)
|
|
239
260
|
|
|
240
261
|
# Show the command line.
|
|
@@ -279,9 +300,15 @@ class IndexCommand(QleverCommand):
|
|
|
279
300
|
return False
|
|
280
301
|
|
|
281
302
|
# Remove already existing container.
|
|
282
|
-
if
|
|
303
|
+
if (
|
|
304
|
+
args.system in Containerize.supported_systems()
|
|
305
|
+
and args.overwrite_existing
|
|
306
|
+
):
|
|
283
307
|
if Containerize.is_running(args.system, args.index_container):
|
|
284
|
-
log.info(
|
|
308
|
+
log.info(
|
|
309
|
+
"Another index process is running, trying to stop "
|
|
310
|
+
"it ..."
|
|
311
|
+
)
|
|
285
312
|
log.info("")
|
|
286
313
|
try:
|
|
287
314
|
run_command(f"{args.system} rm -f {args.index_container}")
|