qlever 0.5.12__py3-none-any.whl → 0.5.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.dblp +1 -1
- qlever/Qleverfiles/Qleverfile.pubchem +102 -26
- qlever/Qleverfiles/Qleverfile.uniprot +48 -16
- qlever/commands/add_text_index.py +2 -1
- qlever/commands/cache_stats.py +1 -1
- qlever/commands/clear_cache.py +4 -2
- qlever/commands/example_queries.py +236 -75
- qlever/commands/extract_queries.py +113 -0
- qlever/commands/get_data.py +1 -1
- qlever/commands/index.py +51 -11
- qlever/commands/index_stats.py +90 -59
- qlever/commands/log.py +12 -2
- qlever/commands/query.py +66 -27
- qlever/commands/settings.py +110 -0
- qlever/commands/setup_config.py +1 -1
- qlever/commands/start.py +222 -105
- qlever/commands/status.py +2 -1
- qlever/commands/stop.py +43 -32
- qlever/commands/system_info.py +1 -1
- qlever/commands/ui.py +3 -1
- qlever/commands/warmup.py +1 -1
- qlever/qlever_main.py +16 -9
- qlever/util.py +34 -17
- {qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/METADATA +2 -2
- qlever-0.5.17.dist-info/RECORD +54 -0
- {qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/WHEEL +1 -1
- qlever-0.5.12.dist-info/RECORD +0 -52
- {qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/LICENSE +0 -0
- {qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/top_level.txt +0 -0
|
@@ -21,10 +21,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
21
21
|
"""
|
|
22
22
|
|
|
23
23
|
def __init__(self):
|
|
24
|
-
|
|
25
|
-
"virtuoso-wikidata": "https://wikidata.demo.openlinksw.com/sparql",
|
|
26
|
-
"qlever-wikidata": "https://qlever.cs.uni-freiburg.de/api/wikidata",
|
|
27
|
-
}
|
|
24
|
+
pass
|
|
28
25
|
|
|
29
26
|
def description(self) -> str:
|
|
30
27
|
return "Show how much of the cache is currently being used"
|
|
@@ -41,19 +38,28 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
41
38
|
)
|
|
42
39
|
subparser.add_argument(
|
|
43
40
|
"--sparql-endpoint-preset",
|
|
44
|
-
choices=
|
|
45
|
-
|
|
41
|
+
choices=[
|
|
42
|
+
"https://qlever.dev/api/wikidata",
|
|
43
|
+
"https://qlever.dev/api/uniprot",
|
|
44
|
+
"https://qlever.dev/api/pubchem",
|
|
45
|
+
"https://qlever.dev/api/osm-planet",
|
|
46
|
+
"https://wikidata.demo.openlinksw.com/sparql",
|
|
47
|
+
"https://sparql.uniprot.org/sparql",
|
|
48
|
+
],
|
|
49
|
+
help="SPARQL endpoint from fixed list (to save typing)",
|
|
46
50
|
)
|
|
47
51
|
subparser.add_argument(
|
|
48
52
|
"--get-queries-cmd",
|
|
49
53
|
type=str,
|
|
50
|
-
help="Command to get example queries as TSV "
|
|
54
|
+
help="Command to get example queries as TSV "
|
|
55
|
+
"(description, query)",
|
|
51
56
|
)
|
|
52
57
|
subparser.add_argument(
|
|
53
58
|
"--query-ids",
|
|
54
59
|
type=str,
|
|
55
60
|
default="1-$",
|
|
56
|
-
help="Query IDs as comma-separated list of "
|
|
61
|
+
help="Query IDs as comma-separated list of "
|
|
62
|
+
"ranges (e.g., 1-5,7,12-$)",
|
|
57
63
|
)
|
|
58
64
|
subparser.add_argument(
|
|
59
65
|
"--query-regex",
|
|
@@ -64,7 +70,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
64
70
|
subparser.add_argument(
|
|
65
71
|
"--download-or-count",
|
|
66
72
|
choices=["download", "count"],
|
|
67
|
-
default="
|
|
73
|
+
default="download",
|
|
68
74
|
help="Whether to download the full result "
|
|
69
75
|
"or just compute the size of the result",
|
|
70
76
|
)
|
|
@@ -84,10 +90,14 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
84
90
|
"text/tab-separated-values",
|
|
85
91
|
"text/csv",
|
|
86
92
|
"application/sparql-results+json",
|
|
93
|
+
"application/qlever-results+json",
|
|
87
94
|
"text/turtle",
|
|
95
|
+
"AUTO",
|
|
88
96
|
],
|
|
89
|
-
default="
|
|
90
|
-
help="Accept header for the SPARQL query"
|
|
97
|
+
default="application/sparql-results+json",
|
|
98
|
+
help="Accept header for the SPARQL query; AUTO means "
|
|
99
|
+
"`text/turtle` for CONSTRUCT AND DESCRIBE queries, "
|
|
100
|
+
"`application/sparql-results+json` for all others",
|
|
91
101
|
)
|
|
92
102
|
subparser.add_argument(
|
|
93
103
|
"--clear-cache",
|
|
@@ -98,7 +108,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
98
108
|
subparser.add_argument(
|
|
99
109
|
"--width-query-description",
|
|
100
110
|
type=int,
|
|
101
|
-
default=
|
|
111
|
+
default=70,
|
|
102
112
|
help="Width for printing the query description",
|
|
103
113
|
)
|
|
104
114
|
subparser.add_argument(
|
|
@@ -113,6 +123,55 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
113
123
|
default=14,
|
|
114
124
|
help="Width for printing the result size",
|
|
115
125
|
)
|
|
126
|
+
subparser.add_argument(
|
|
127
|
+
"--add-query-type-to-description",
|
|
128
|
+
action="store_true",
|
|
129
|
+
default=False,
|
|
130
|
+
help="Add the query type (SELECT, ASK, CONSTRUCT, DESCRIBE, "
|
|
131
|
+
"UNKNOWN) to the description",
|
|
132
|
+
)
|
|
133
|
+
subparser.add_argument(
|
|
134
|
+
"--show-query",
|
|
135
|
+
choices=["always", "never", "on-error"],
|
|
136
|
+
default="never",
|
|
137
|
+
help="Show the queries that will be executed (always, never, on error)",
|
|
138
|
+
)
|
|
139
|
+
subparser.add_argument(
|
|
140
|
+
"--show-prefixes",
|
|
141
|
+
action="store_true",
|
|
142
|
+
default=False,
|
|
143
|
+
help="When showing the query, also show the prefixes",
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def pretty_printed_query(self, query: str, show_prefixes: bool) -> str:
|
|
147
|
+
remove_prefixes_cmd = (
|
|
148
|
+
" | sed '/^PREFIX /Id'" if not show_prefixes else ""
|
|
149
|
+
)
|
|
150
|
+
pretty_print_query_cmd = (
|
|
151
|
+
f"echo {shlex.quote(query)}"
|
|
152
|
+
f" | docker run -i --rm sparqling/sparql-formatter"
|
|
153
|
+
f"{remove_prefixes_cmd} | grep -v '^$'"
|
|
154
|
+
)
|
|
155
|
+
try:
|
|
156
|
+
query_pretty_printed = run_command(
|
|
157
|
+
pretty_print_query_cmd, return_output=True
|
|
158
|
+
)
|
|
159
|
+
return query_pretty_printed.rstrip()
|
|
160
|
+
except Exception:
|
|
161
|
+
log.error(
|
|
162
|
+
"Failed to pretty-print query, "
|
|
163
|
+
"returning original query: {e}"
|
|
164
|
+
)
|
|
165
|
+
return query.rstrip()
|
|
166
|
+
|
|
167
|
+
def sparql_query_type(self, query: str) -> str:
|
|
168
|
+
match = re.search(
|
|
169
|
+
r"(SELECT|ASK|CONSTRUCT|DESCRIBE)\s", query, re.IGNORECASE
|
|
170
|
+
)
|
|
171
|
+
if match:
|
|
172
|
+
return match.group(1).upper()
|
|
173
|
+
else:
|
|
174
|
+
return "UNKNOWN"
|
|
116
175
|
|
|
117
176
|
def execute(self, args) -> bool:
|
|
118
177
|
# We can't have both `--remove-offset-and-limit` and `--limit`.
|
|
@@ -120,8 +179,13 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
120
179
|
log.error("Cannot have both --remove-offset-and-limit and --limit")
|
|
121
180
|
return False
|
|
122
181
|
|
|
123
|
-
# If `args.accept` is `application/sparql-results+json
|
|
124
|
-
|
|
182
|
+
# If `args.accept` is `application/sparql-results+json` or
|
|
183
|
+
# `application/qlever-results+json` or `AUTO`, we need `jq`.
|
|
184
|
+
if (
|
|
185
|
+
args.accept == "application/sparql-results+json"
|
|
186
|
+
or args.accept == "application/qlever-results+json"
|
|
187
|
+
or args.accept == "AUTO"
|
|
188
|
+
):
|
|
125
189
|
try:
|
|
126
190
|
subprocess.run(
|
|
127
191
|
"jq --version",
|
|
@@ -135,9 +199,8 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
135
199
|
return False
|
|
136
200
|
|
|
137
201
|
# Handle shotcuts for SPARQL endpoint.
|
|
138
|
-
if args.sparql_endpoint_preset
|
|
139
|
-
args.sparql_endpoint =
|
|
140
|
-
args.ui_config = args.sparql_endpoint_preset.split("-")[1]
|
|
202
|
+
if args.sparql_endpoint_preset:
|
|
203
|
+
args.sparql_endpoint = args.sparql_endpoint_preset
|
|
141
204
|
|
|
142
205
|
# Limit only works with full result.
|
|
143
206
|
if args.limit and args.download_or_count == "count":
|
|
@@ -145,8 +208,9 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
145
208
|
return False
|
|
146
209
|
|
|
147
210
|
# Clear cache only works for QLever.
|
|
148
|
-
is_qlever =
|
|
149
|
-
|
|
211
|
+
is_qlever = (
|
|
212
|
+
not args.sparql_endpoint
|
|
213
|
+
or args.sparql_endpoint.startswith("https://qlever")
|
|
150
214
|
)
|
|
151
215
|
if args.clear_cache == "yes" and not is_qlever:
|
|
152
216
|
log.warning("Clearing the cache only works for QLever")
|
|
@@ -164,7 +228,9 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
164
228
|
if args.query_regex:
|
|
165
229
|
get_queries_cmd += f" | grep -Pi {shlex.quote(args.query_regex)}"
|
|
166
230
|
sparql_endpoint = (
|
|
167
|
-
args.sparql_endpoint
|
|
231
|
+
args.sparql_endpoint
|
|
232
|
+
if args.sparql_endpoint
|
|
233
|
+
else f"localhost:{args.port}"
|
|
168
234
|
)
|
|
169
235
|
self.show(
|
|
170
236
|
f"Obtain queries via: {get_queries_cmd}\n"
|
|
@@ -178,11 +244,13 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
178
244
|
only_show=args.show,
|
|
179
245
|
)
|
|
180
246
|
if args.show:
|
|
181
|
-
return
|
|
247
|
+
return True
|
|
182
248
|
|
|
183
249
|
# Get the example queries.
|
|
184
250
|
try:
|
|
185
|
-
example_query_lines = run_command(
|
|
251
|
+
example_query_lines = run_command(
|
|
252
|
+
get_queries_cmd, return_output=True
|
|
253
|
+
)
|
|
186
254
|
if len(example_query_lines) == 0:
|
|
187
255
|
log.error("No example queries matching the criteria found")
|
|
188
256
|
return False
|
|
@@ -191,6 +259,12 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
191
259
|
log.error(f"Failed to get example queries: {e}")
|
|
192
260
|
return False
|
|
193
261
|
|
|
262
|
+
# We want the width of the query description to be an uneven number (in
|
|
263
|
+
# case we have to truncated it, in which case we want to have a " ... "
|
|
264
|
+
# in the middle).
|
|
265
|
+
width_query_description_half = args.width_query_description // 2
|
|
266
|
+
width_query_description = 2 * width_query_description_half + 1
|
|
267
|
+
|
|
194
268
|
# Launch the queries one after the other and for each print: the
|
|
195
269
|
# description, the result size (number of rows), and the query
|
|
196
270
|
# processing time (seconds).
|
|
@@ -198,20 +272,26 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
198
272
|
result_sizes = []
|
|
199
273
|
num_failed = 0
|
|
200
274
|
for example_query_line in example_query_lines:
|
|
201
|
-
# Parse description and query.
|
|
275
|
+
# Parse description and query, and determine query type.
|
|
202
276
|
description, query = example_query_line.split("\t")
|
|
203
277
|
if len(query) == 0:
|
|
204
278
|
log.error("Could not parse description and query, line is:")
|
|
205
279
|
log.info("")
|
|
206
280
|
log.info(example_query_line)
|
|
207
281
|
return False
|
|
282
|
+
query_type = self.sparql_query_type(query)
|
|
283
|
+
if args.add_query_type_to_description or args.accept == "AUTO":
|
|
284
|
+
description = f"{description} [{query_type}]"
|
|
208
285
|
|
|
209
286
|
# Clear the cache.
|
|
210
287
|
if args.clear_cache == "yes":
|
|
211
288
|
args.server_url = sparql_endpoint
|
|
212
289
|
args.complete = False
|
|
290
|
+
clear_cache_successful = False
|
|
213
291
|
with mute_log():
|
|
214
|
-
ClearCacheCommand().execute(args)
|
|
292
|
+
clear_cache_successful = ClearCacheCommand().execute(args)
|
|
293
|
+
if not clear_cache_successful:
|
|
294
|
+
log.warn("Failed to clear the cache")
|
|
215
295
|
|
|
216
296
|
# Remove OFFSET and LIMIT (after the last closing bracket).
|
|
217
297
|
if args.remove_offset_and_limit or args.limit:
|
|
@@ -235,7 +315,9 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
235
315
|
# Count query.
|
|
236
316
|
if args.download_or_count == "count":
|
|
237
317
|
# First find out if there is a FROM clause.
|
|
238
|
-
regex_from_clause = re.compile(
|
|
318
|
+
regex_from_clause = re.compile(
|
|
319
|
+
r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE
|
|
320
|
+
)
|
|
239
321
|
match_from_clause = re.search(regex_from_clause, query)
|
|
240
322
|
from_clause = " "
|
|
241
323
|
if match_from_clause:
|
|
@@ -262,87 +344,148 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
262
344
|
# A bit of pretty-printing.
|
|
263
345
|
query = re.sub(r"\s+", " ", query)
|
|
264
346
|
query = re.sub(r"\s*\.\s*\}", " }", query)
|
|
347
|
+
if args.show_query == "always":
|
|
348
|
+
log.info("")
|
|
349
|
+
log.info(
|
|
350
|
+
colored(
|
|
351
|
+
self.pretty_printed_query(query, args.show_prefixes),
|
|
352
|
+
"cyan",
|
|
353
|
+
)
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# Accept header. For "AUTO", use `text/turtle` for CONSTRUCT
|
|
357
|
+
# queries and `application/sparql-results+json` for all others.
|
|
358
|
+
accept_header = args.accept
|
|
359
|
+
if accept_header == "AUTO":
|
|
360
|
+
if query_type == "CONSTRUCT" or query_type == "DESCRIBE":
|
|
361
|
+
accept_header = "text/turtle"
|
|
362
|
+
else:
|
|
363
|
+
accept_header = "application/sparql-results+json"
|
|
265
364
|
|
|
266
365
|
# Launch query.
|
|
267
366
|
try:
|
|
268
367
|
curl_cmd = (
|
|
269
368
|
f"curl -s {sparql_endpoint}"
|
|
270
369
|
f' -w "HTTP code: %{{http_code}}\\n"'
|
|
271
|
-
f' -H "Accept: {
|
|
370
|
+
f' -H "Accept: {accept_header}"'
|
|
272
371
|
f" --data-urlencode query={shlex.quote(query)}"
|
|
273
372
|
)
|
|
274
373
|
log.debug(curl_cmd)
|
|
275
374
|
result_file = (
|
|
276
|
-
f"qlever.example_queries.result."
|
|
375
|
+
f"qlever.example_queries.result."
|
|
376
|
+
f"{abs(hash(curl_cmd))}.tmp"
|
|
277
377
|
)
|
|
278
378
|
start_time = time.time()
|
|
279
379
|
http_code = run_curl_command(
|
|
280
380
|
sparql_endpoint,
|
|
281
|
-
headers={"Accept":
|
|
381
|
+
headers={"Accept": accept_header},
|
|
282
382
|
params={"query": query},
|
|
283
383
|
result_file=result_file,
|
|
284
384
|
).strip()
|
|
285
|
-
if http_code
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
385
|
+
if http_code == "200":
|
|
386
|
+
time_seconds = time.time() - start_time
|
|
387
|
+
error_msg = None
|
|
388
|
+
else:
|
|
389
|
+
error_msg = {
|
|
390
|
+
"short": f"HTTP code: {http_code}",
|
|
391
|
+
"long": re.sub(
|
|
392
|
+
r"\s+", " ", Path(result_file).read_text()
|
|
393
|
+
),
|
|
394
|
+
}
|
|
291
395
|
except Exception as e:
|
|
292
396
|
if args.log_level == "DEBUG":
|
|
293
397
|
traceback.print_exc()
|
|
294
|
-
error_msg =
|
|
398
|
+
error_msg = {
|
|
399
|
+
"short": "Exception",
|
|
400
|
+
"long": re.sub(r"\s+", " ", str(e)),
|
|
401
|
+
}
|
|
295
402
|
|
|
296
403
|
# Get result size (via the command line, in order to avoid loading
|
|
297
404
|
# a potentially large JSON file into Python, which is slow).
|
|
298
405
|
if error_msg is None:
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
406
|
+
# CASE 0: The result is empty despite a 200 HTTP code (not a
|
|
407
|
+
# problem for CONSTRUCT and DESCRIBE queries).
|
|
408
|
+
if Path(result_file).stat().st_size == 0 and (
|
|
409
|
+
not query_type == "CONSTRUCT"
|
|
410
|
+
and not query_type == "DESCRIBE"
|
|
411
|
+
):
|
|
412
|
+
result_size = 0
|
|
413
|
+
error_msg = {
|
|
414
|
+
"short": "Empty result",
|
|
415
|
+
"long": "curl returned with code 200, "
|
|
416
|
+
"but the result is empty",
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
# CASE 1: Just counting the size of the result (TSV or JSON).
|
|
420
|
+
elif args.download_or_count == "count":
|
|
421
|
+
if accept_header == "text/tab-separated-values":
|
|
422
|
+
result_size = run_command(
|
|
423
|
+
f"sed 1d {result_file}", return_output=True
|
|
424
|
+
)
|
|
425
|
+
else:
|
|
426
|
+
try:
|
|
306
427
|
result_size = run_command(
|
|
307
428
|
f'jq -r ".results.bindings[0]'
|
|
308
429
|
f" | to_entries[0].value.value"
|
|
309
430
|
f' | tonumber" {result_file}',
|
|
310
431
|
return_output=True,
|
|
311
432
|
)
|
|
433
|
+
except Exception as e:
|
|
434
|
+
error_msg = {
|
|
435
|
+
"short": "Malformed JSON",
|
|
436
|
+
"long": "curl returned with code 200, "
|
|
437
|
+
"but the JSON is malformed: "
|
|
438
|
+
+ re.sub(r"\s+", " ", str(e)),
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
# CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
|
|
442
|
+
else:
|
|
443
|
+
if (
|
|
444
|
+
accept_header == "text/tab-separated-values"
|
|
445
|
+
or accept_header == "text/csv"
|
|
446
|
+
):
|
|
447
|
+
result_size = run_command(
|
|
448
|
+
f"sed 1d {result_file} | wc -l", return_output=True
|
|
449
|
+
)
|
|
450
|
+
elif accept_header == "text/turtle":
|
|
451
|
+
result_size = run_command(
|
|
452
|
+
f"sed '1d;/^@prefix/d;/^\\s*$/d' "
|
|
453
|
+
f"{result_file} | wc -l",
|
|
454
|
+
return_output=True,
|
|
455
|
+
)
|
|
456
|
+
elif accept_header == "application/qlever-results+json":
|
|
457
|
+
result_size = run_command(
|
|
458
|
+
f'jq -r ".resultsize" {result_file}',
|
|
459
|
+
return_output=True,
|
|
460
|
+
)
|
|
312
461
|
else:
|
|
313
|
-
|
|
314
|
-
args.accept == "text/tab-separated-values"
|
|
315
|
-
or args.accept == "text/csv"
|
|
316
|
-
):
|
|
317
|
-
result_size = run_command(
|
|
318
|
-
f"sed 1d {result_file} | wc -l", return_output=True
|
|
319
|
-
)
|
|
320
|
-
elif args.accept == "text/turtle":
|
|
321
|
-
result_size = run_command(
|
|
322
|
-
f"sed '1d;/^@prefix/d;/^\\s*$/d' "
|
|
323
|
-
f"{result_file} | wc -l",
|
|
324
|
-
return_output=True,
|
|
325
|
-
)
|
|
326
|
-
else:
|
|
462
|
+
try:
|
|
327
463
|
result_size = run_command(
|
|
328
|
-
f'jq -r ".results.bindings | length"'
|
|
464
|
+
f'jq -r ".results.bindings | length"'
|
|
465
|
+
f" {result_file}",
|
|
329
466
|
return_output=True,
|
|
330
467
|
)
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
468
|
+
except Exception as e:
|
|
469
|
+
error_msg = {
|
|
470
|
+
"short": "Malformed JSON",
|
|
471
|
+
"long": re.sub(r"\s+", " ", str(e)),
|
|
472
|
+
}
|
|
334
473
|
|
|
335
474
|
# Remove the result file (unless in debug mode).
|
|
336
475
|
if args.log_level != "DEBUG":
|
|
337
476
|
Path(result_file).unlink(missing_ok=True)
|
|
338
477
|
|
|
339
478
|
# Print description, time, result in tabular form.
|
|
340
|
-
if len(description) >
|
|
341
|
-
description =
|
|
342
|
-
|
|
479
|
+
if len(description) > width_query_description:
|
|
480
|
+
description = (
|
|
481
|
+
description[: width_query_description_half - 2]
|
|
482
|
+
+ " ... "
|
|
483
|
+
+ description[-width_query_description_half + 2 :]
|
|
484
|
+
)
|
|
343
485
|
if error_msg is None:
|
|
486
|
+
result_size = int(result_size)
|
|
344
487
|
log.info(
|
|
345
|
-
f"{description:<{
|
|
488
|
+
f"{description:<{width_query_description}} "
|
|
346
489
|
f"{time_seconds:6.2f} s "
|
|
347
490
|
f"{result_size:>{args.width_result_size},}"
|
|
348
491
|
)
|
|
@@ -352,16 +495,34 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
352
495
|
num_failed += 1
|
|
353
496
|
if (
|
|
354
497
|
args.width_error_message > 0
|
|
355
|
-
and len(error_msg) > args.width_error_message
|
|
498
|
+
and len(error_msg["long"]) > args.width_error_message
|
|
356
499
|
and args.log_level != "DEBUG"
|
|
500
|
+
and args.show_query != "on-error"
|
|
357
501
|
):
|
|
358
|
-
error_msg =
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
502
|
+
error_msg["long"] = (
|
|
503
|
+
error_msg["long"][: args.width_error_message - 3]
|
|
504
|
+
+ "..."
|
|
505
|
+
)
|
|
506
|
+
seperator_short_long = (
|
|
507
|
+
"\n" if args.show_query == "on-error" else " "
|
|
508
|
+
)
|
|
509
|
+
log.info(
|
|
510
|
+
f"{description:<{width_query_description}} "
|
|
511
|
+
f"{colored('FAILED ', 'red')}"
|
|
512
|
+
f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
|
|
513
|
+
f"{seperator_short_long}"
|
|
514
|
+
f"{colored(error_msg['long'], 'red')}"
|
|
364
515
|
)
|
|
516
|
+
if args.show_query == "on-error":
|
|
517
|
+
log.info(
|
|
518
|
+
colored(
|
|
519
|
+
self.pretty_printed_query(
|
|
520
|
+
query, args.show_prefixes
|
|
521
|
+
),
|
|
522
|
+
"cyan",
|
|
523
|
+
)
|
|
524
|
+
)
|
|
525
|
+
log.info("")
|
|
365
526
|
|
|
366
527
|
# Check that each query has a time and a result size, or it failed.
|
|
367
528
|
assert len(result_sizes) == len(query_times)
|
|
@@ -380,19 +541,19 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
380
541
|
description = f"TOTAL for {n} {query_or_queries}"
|
|
381
542
|
log.info("")
|
|
382
543
|
log.info(
|
|
383
|
-
f"{description:<{
|
|
544
|
+
f"{description:<{width_query_description}} "
|
|
384
545
|
f"{total_query_time:6.2f} s "
|
|
385
546
|
f"{total_result_size:>14,}"
|
|
386
547
|
)
|
|
387
548
|
description = f"AVERAGE for {n} {query_or_queries}"
|
|
388
549
|
log.info(
|
|
389
|
-
f"{description:<{
|
|
550
|
+
f"{description:<{width_query_description}} "
|
|
390
551
|
f"{average_query_time:6.2f} s "
|
|
391
552
|
f"{average_result_size:>14,}"
|
|
392
553
|
)
|
|
393
554
|
description = f"MEDIAN for {n} {query_or_queries}"
|
|
394
555
|
log.info(
|
|
395
|
-
f"{description:<{
|
|
556
|
+
f"{description:<{width_query_description}} "
|
|
396
557
|
f"{median_query_time:6.2f} s "
|
|
397
558
|
f"{median_result_size:>14,}"
|
|
398
559
|
)
|
|
@@ -406,7 +567,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
406
567
|
num_failed_string += " [all]"
|
|
407
568
|
log.info(
|
|
408
569
|
colored(
|
|
409
|
-
f"{description:<{
|
|
570
|
+
f"{description:<{width_query_description}} "
|
|
410
571
|
f"{num_failed:>24}",
|
|
411
572
|
"red",
|
|
412
573
|
)
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from qlever.command import QleverCommand
|
|
6
|
+
from qlever.log import log
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ExtractQueriesCommand(QleverCommand):
|
|
10
|
+
"""
|
|
11
|
+
Class for executing the `extract-queries` command.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
def description(self) -> str:
|
|
18
|
+
return "Extract all SPARQL queries from the server log"
|
|
19
|
+
|
|
20
|
+
def should_have_qleverfile(self) -> bool:
|
|
21
|
+
return True
|
|
22
|
+
|
|
23
|
+
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
|
|
24
|
+
return {"data": ["name"]}
|
|
25
|
+
|
|
26
|
+
def additional_arguments(self, subparser) -> None:
|
|
27
|
+
subparser.add_argument(
|
|
28
|
+
"--description-base",
|
|
29
|
+
type=str,
|
|
30
|
+
default="Log extract",
|
|
31
|
+
help="Base name for the query descriptions"
|
|
32
|
+
" (default: `Log extract`)",
|
|
33
|
+
)
|
|
34
|
+
subparser.add_argument(
|
|
35
|
+
"--log-file",
|
|
36
|
+
type=str,
|
|
37
|
+
help="Name of the log file to extract queries from"
|
|
38
|
+
" (default: `<name>.server-log.txt`)",
|
|
39
|
+
)
|
|
40
|
+
subparser.add_argument(
|
|
41
|
+
"--output-file",
|
|
42
|
+
type=str,
|
|
43
|
+
default="log-queries.txt",
|
|
44
|
+
help="Output file for the extracted queries (default: `log-queries.txt`)",
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def execute(self, args) -> bool:
|
|
48
|
+
# Show what the command does.
|
|
49
|
+
if args.log_file is not None:
|
|
50
|
+
log_file_name = args.log_file
|
|
51
|
+
else:
|
|
52
|
+
log_file_name = f"{args.name}.server-log.txt"
|
|
53
|
+
self.show(
|
|
54
|
+
f"Extract SPARQL queries from `{log_file_name}`"
|
|
55
|
+
f" and write them to `{args.output_file}`",
|
|
56
|
+
only_show=args.show,
|
|
57
|
+
)
|
|
58
|
+
if args.show:
|
|
59
|
+
return True
|
|
60
|
+
|
|
61
|
+
# Regex for log entries of the form
|
|
62
|
+
# 2025-01-14 04:47:44.950 - INFO
|
|
63
|
+
log_line_regex = (
|
|
64
|
+
r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) - [A-Z]+:"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Read the log file line by line.
|
|
68
|
+
log_file = open(log_file_name, "r")
|
|
69
|
+
queries_file = open(args.output_file, "w")
|
|
70
|
+
query = None
|
|
71
|
+
description_base = args.description_base
|
|
72
|
+
description_base_count = {}
|
|
73
|
+
tsv_line_short_width = 150
|
|
74
|
+
for line in log_file:
|
|
75
|
+
# An "Alive check" message contains a tag, which we use as the base
|
|
76
|
+
# name of the query description.
|
|
77
|
+
alive_check_regex = r"Alive check with message \"(.*)\""
|
|
78
|
+
match = re.search(alive_check_regex, line)
|
|
79
|
+
if match:
|
|
80
|
+
description_base = match.group(1)
|
|
81
|
+
continue
|
|
82
|
+
|
|
83
|
+
# A new query in the log.
|
|
84
|
+
if "Processing the following SPARQL query" in line:
|
|
85
|
+
query = []
|
|
86
|
+
query_index = (
|
|
87
|
+
description_base_count.get(description_base, 0) + 1
|
|
88
|
+
)
|
|
89
|
+
description_base_count[description_base] = query_index
|
|
90
|
+
continue
|
|
91
|
+
# If we have started a query: extend until we meet the next log
|
|
92
|
+
# line, then push the query. Remove comments.
|
|
93
|
+
if query is not None:
|
|
94
|
+
if not re.match(log_line_regex, line):
|
|
95
|
+
if not re.match(r"^\s*#", line):
|
|
96
|
+
line = re.sub(r" #.*", "", line)
|
|
97
|
+
query.append(line)
|
|
98
|
+
else:
|
|
99
|
+
query = re.sub(r"\s+", " ", "\n".join(query)).strip()
|
|
100
|
+
description = f"{description_base}, Query #{query_index}"
|
|
101
|
+
tsv_line = f"{description}\t{query}"
|
|
102
|
+
tsv_line_short = (
|
|
103
|
+
tsv_line
|
|
104
|
+
if len(tsv_line) < tsv_line_short_width
|
|
105
|
+
else tsv_line[:tsv_line_short_width] + "..."
|
|
106
|
+
)
|
|
107
|
+
log.info(tsv_line_short)
|
|
108
|
+
print(tsv_line, file=queries_file)
|
|
109
|
+
query = None
|
|
110
|
+
|
|
111
|
+
log_file.close()
|
|
112
|
+
queries_file.close()
|
|
113
|
+
return True
|
qlever/commands/get_data.py
CHANGED