qlever 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

@@ -1,605 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import re
4
- import shlex
5
- import subprocess
6
- import time
7
- import traceback
8
- from pathlib import Path
9
-
10
- from termcolor import colored
11
-
12
- from qlever.command import QleverCommand
13
- from qlever.commands.clear_cache import ClearCacheCommand
14
- from qlever.log import log, mute_log
15
- from qlever.util import run_command, run_curl_command
16
-
17
-
18
- class ExampleQueriesCommand(QleverCommand):
19
- """
20
- Class for running a given sequence of example queries and showing
21
- their processing times and result sizes.
22
- """
23
-
24
- def __init__(self):
25
- pass
26
-
27
- def description(self) -> str:
28
- return "Run the given queries and show their processing times and result sizes"
29
-
30
- def should_have_qleverfile(self) -> bool:
31
- return False
32
-
33
- def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
34
- return {"server": ["host_name", "port"], "ui": ["ui_config"]}
35
-
36
- def additional_arguments(self, subparser) -> None:
37
- subparser.add_argument(
38
- "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
39
- )
40
- subparser.add_argument(
41
- "--sparql-endpoint-preset",
42
- choices=[
43
- "https://qlever.dev/api/wikidata",
44
- "https://qlever.dev/api/uniprot",
45
- "https://qlever.dev/api/pubchem",
46
- "https://qlever.dev/api/osm-planet",
47
- "https://wikidata.demo.openlinksw.com/sparql",
48
- "https://sparql.uniprot.org/sparql",
49
- ],
50
- help="SPARQL endpoint from fixed list (to save typing)",
51
- )
52
- subparser.add_argument(
53
- "--get-queries-cmd",
54
- type=str,
55
- help="Command to get example queries as TSV "
56
- "(description, query)",
57
- )
58
- subparser.add_argument(
59
- "--query-ids",
60
- type=str,
61
- default="1-$",
62
- help="Query IDs as comma-separated list of "
63
- "ranges (e.g., 1-5,7,12-$)",
64
- )
65
- subparser.add_argument(
66
- "--query-regex",
67
- type=str,
68
- help="Only consider example queries matching "
69
- "this regex (using grep -Pi)",
70
- )
71
- subparser.add_argument(
72
- "--download-or-count",
73
- choices=["download", "count"],
74
- default="download",
75
- help="Whether to download the full result "
76
- "or just compute the size of the result",
77
- )
78
- subparser.add_argument(
79
- "--limit", type=int, help="Limit on the number of results"
80
- )
81
- subparser.add_argument(
82
- "--remove-offset-and-limit",
83
- action="store_true",
84
- default=False,
85
- help="Remove OFFSET and LIMIT from the query",
86
- )
87
- subparser.add_argument(
88
- "--accept",
89
- type=str,
90
- choices=[
91
- "text/tab-separated-values",
92
- "text/csv",
93
- "application/sparql-results+json",
94
- "application/qlever-results+json",
95
- "text/turtle",
96
- "AUTO",
97
- ],
98
- default="application/sparql-results+json",
99
- help="Accept header for the SPARQL query; AUTO means "
100
- "`text/turtle` for CONSTRUCT AND DESCRIBE queries, "
101
- "`application/sparql-results+json` for all others",
102
- )
103
- subparser.add_argument(
104
- "--clear-cache",
105
- choices=["yes", "no"],
106
- default="no",
107
- help="Clear the cache before each query (only works for QLever)",
108
- )
109
- subparser.add_argument(
110
- "--width-query-description",
111
- type=int,
112
- default=70,
113
- help="Width for printing the query description",
114
- )
115
- subparser.add_argument(
116
- "--width-error-message",
117
- type=int,
118
- default=80,
119
- help="Width for printing the error message " "(0 = no limit)",
120
- )
121
- subparser.add_argument(
122
- "--width-result-size",
123
- type=int,
124
- default=14,
125
- help="Width for printing the result size",
126
- )
127
- subparser.add_argument(
128
- "--add-query-type-to-description",
129
- action="store_true",
130
- default=False,
131
- help="Add the query type (SELECT, ASK, CONSTRUCT, DESCRIBE, "
132
- "UNKNOWN) to the description",
133
- )
134
- subparser.add_argument(
135
- "--show-query",
136
- choices=["always", "never", "on-error"],
137
- default="never",
138
- help="Show the queries that will be executed (always, never, on error)",
139
- )
140
- subparser.add_argument(
141
- "--show-prefixes",
142
- action="store_true",
143
- default=False,
144
- help="When showing the query, also show the prefixes",
145
- )
146
-
147
- def pretty_printed_query(self, query: str, show_prefixes: bool) -> str:
148
- remove_prefixes_cmd = (
149
- " | sed '/^PREFIX /Id'" if not show_prefixes else ""
150
- )
151
- pretty_print_query_cmd = (
152
- f"echo {shlex.quote(query)}"
153
- f" | docker run -i --rm sparqling/sparql-formatter"
154
- f"{remove_prefixes_cmd} | grep -v '^$'"
155
- )
156
- try:
157
- query_pretty_printed = run_command(
158
- pretty_print_query_cmd, return_output=True
159
- )
160
- return query_pretty_printed.rstrip()
161
- except Exception:
162
- log.error(
163
- "Failed to pretty-print query, "
164
- "returning original query: {e}"
165
- )
166
- return query.rstrip()
167
-
168
- def sparql_query_type(self, query: str) -> str:
169
- match = re.search(
170
- r"(SELECT|ASK|CONSTRUCT|DESCRIBE)\s", query, re.IGNORECASE
171
- )
172
- if match:
173
- return match.group(1).upper()
174
- else:
175
- return "UNKNOWN"
176
-
177
- def execute(self, args) -> bool:
178
- # We can't have both `--remove-offset-and-limit` and `--limit`.
179
- if args.remove_offset_and_limit and args.limit:
180
- log.error("Cannot have both --remove-offset-and-limit and --limit")
181
- return False
182
-
183
- # If `args.accept` is `application/sparql-results+json` or
184
- # `application/qlever-results+json` or `AUTO`, we need `jq`.
185
- if (
186
- args.accept == "application/sparql-results+json"
187
- or args.accept == "application/qlever-results+json"
188
- or args.accept == "AUTO"
189
- ):
190
- try:
191
- subprocess.run(
192
- "jq --version",
193
- shell=True,
194
- check=True,
195
- stdout=subprocess.DEVNULL,
196
- stderr=subprocess.DEVNULL,
197
- )
198
- except Exception as e:
199
- log.error(f"Please install `jq` for {args.accept} ({e})")
200
- return False
201
-
202
- # Handle shotcuts for SPARQL endpoint.
203
- if args.sparql_endpoint_preset:
204
- args.sparql_endpoint = args.sparql_endpoint_preset
205
-
206
- # Limit only works with full result.
207
- if args.limit and args.download_or_count == "count":
208
- log.error("Limit only works with full result")
209
- return False
210
-
211
- # Clear cache only works for QLever.
212
- is_qlever = (
213
- not args.sparql_endpoint
214
- or args.sparql_endpoint.startswith("https://qlever")
215
- )
216
- if args.clear_cache == "yes":
217
- if is_qlever:
218
- log.warning(
219
- "Clearing the cache before each query"
220
- " (only works for QLever)"
221
- )
222
- else:
223
- log.warning(
224
- "Clearing the cache only works for QLever"
225
- ", option `--clear-cache` is ignored"
226
- )
227
- args.clear_cache = "no"
228
-
229
- # Show what the command will do.
230
- get_queries_cmd = (
231
- args.get_queries_cmd
232
- if args.get_queries_cmd
233
- else f"curl -sv https://qlever.cs.uni-freiburg.de/"
234
- f"api/examples/{args.ui_config}"
235
- )
236
- sed_arg = args.query_ids.replace(",", "p;").replace("-", ",") + "p"
237
- get_queries_cmd += f" | sed -n '{sed_arg}'"
238
- if args.query_regex:
239
- get_queries_cmd += f" | grep -Pi {shlex.quote(args.query_regex)}"
240
- sparql_endpoint = (
241
- args.sparql_endpoint
242
- if args.sparql_endpoint
243
- else f"{args.host_name}:{args.port}"
244
- )
245
- self.show(
246
- f"Obtain queries via: {get_queries_cmd}\n"
247
- f"SPARQL endpoint: {sparql_endpoint}\n"
248
- f"Accept header: {args.accept}\n"
249
- f"Download result for each query or just count:"
250
- f" {args.download_or_count.upper()}"
251
- + (f" with LIMIT {args.limit}" if args.limit else ""),
252
- only_show=args.show,
253
- )
254
- if args.show:
255
- return True
256
-
257
- # Get the example queries.
258
- try:
259
- example_query_lines = run_command(
260
- get_queries_cmd, return_output=True
261
- )
262
- if len(example_query_lines) == 0:
263
- log.error("No example queries matching the criteria found")
264
- return False
265
- example_query_lines = example_query_lines.splitlines()
266
- except Exception as e:
267
- log.error(f"Failed to get example queries: {e}")
268
- return False
269
-
270
- # We want the width of the query description to be an uneven number (in
271
- # case we have to truncated it, in which case we want to have a " ... "
272
- # in the middle).
273
- width_query_description_half = args.width_query_description // 2
274
- width_query_description = 2 * width_query_description_half + 1
275
-
276
- # Launch the queries one after the other and for each print: the
277
- # description, the result size (number of rows), and the query
278
- # processing time (seconds).
279
- query_times = []
280
- result_sizes = []
281
- num_failed = 0
282
- for example_query_line in example_query_lines:
283
- # Parse description and query, and determine query type.
284
- description, query = example_query_line.split("\t")
285
- if len(query) == 0:
286
- log.error("Could not parse description and query, line is:")
287
- log.info("")
288
- log.info(example_query_line)
289
- return False
290
- query_type = self.sparql_query_type(query)
291
- if args.add_query_type_to_description or args.accept == "AUTO":
292
- description = f"{description} [{query_type}]"
293
-
294
- # Clear the cache.
295
- if args.clear_cache == "yes":
296
- args.server_url = sparql_endpoint
297
- args.complete = False
298
- clear_cache_successful = False
299
- with mute_log():
300
- clear_cache_successful = ClearCacheCommand().execute(args)
301
- if not clear_cache_successful:
302
- log.warn("Failed to clear the cache")
303
-
304
- # Remove OFFSET and LIMIT (after the last closing bracket).
305
- if args.remove_offset_and_limit or args.limit:
306
- closing_bracket_idx = query.rfind("}")
307
- regexes = [
308
- re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
309
- re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE),
310
- ]
311
- for regex in regexes:
312
- match = re.search(regex, query[closing_bracket_idx:])
313
- if match:
314
- query = (
315
- query[: closing_bracket_idx + match.start()]
316
- + query[closing_bracket_idx + match.end() :]
317
- )
318
-
319
- # Limit query.
320
- if args.limit:
321
- query += f" LIMIT {args.limit}"
322
-
323
- # Count query.
324
- if args.download_or_count == "count":
325
- # First find out if there is a FROM clause.
326
- regex_from_clause = re.compile(
327
- r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE
328
- )
329
- match_from_clause = re.search(regex_from_clause, query)
330
- from_clause = " "
331
- if match_from_clause:
332
- from_clause = match_from_clause.group(0)
333
- query = (
334
- query[: match_from_clause.start()]
335
- + " "
336
- + query[match_from_clause.end() :]
337
- )
338
- # Now we can add the outer SELECT COUNT(*).
339
- query = (
340
- re.sub(
341
- r"SELECT ",
342
- "SELECT (COUNT(*) AS ?qlever_count_)"
343
- + from_clause
344
- + "WHERE { SELECT ",
345
- query,
346
- count=1,
347
- flags=re.IGNORECASE,
348
- )
349
- + " }"
350
- )
351
-
352
- # A bit of pretty-printing.
353
- query = re.sub(r"\s+", " ", query)
354
- query = re.sub(r"\s*\.\s*\}", " }", query)
355
- if args.show_query == "always":
356
- log.info("")
357
- log.info(
358
- colored(
359
- self.pretty_printed_query(query, args.show_prefixes),
360
- "cyan",
361
- )
362
- )
363
-
364
- # Accept header. For "AUTO", use `text/turtle` for CONSTRUCT
365
- # queries and `application/sparql-results+json` for all others.
366
- accept_header = args.accept
367
- if accept_header == "AUTO":
368
- if query_type == "CONSTRUCT" or query_type == "DESCRIBE":
369
- accept_header = "text/turtle"
370
- else:
371
- accept_header = "application/sparql-results+json"
372
-
373
- # Launch query.
374
- try:
375
- curl_cmd = (
376
- f"curl -s {sparql_endpoint}"
377
- f' -w "HTTP code: %{{http_code}}\\n"'
378
- f' -H "Accept: {accept_header}"'
379
- f" --data-urlencode query={shlex.quote(query)}"
380
- )
381
- log.debug(curl_cmd)
382
- result_file = (
383
- f"qlever.example_queries.result."
384
- f"{abs(hash(curl_cmd))}.tmp"
385
- )
386
- start_time = time.time()
387
- http_code = run_curl_command(
388
- sparql_endpoint,
389
- headers={"Accept": accept_header},
390
- params={"query": query},
391
- result_file=result_file,
392
- ).strip()
393
- if http_code == "200":
394
- time_seconds = time.time() - start_time
395
- error_msg = None
396
- else:
397
- error_msg = {
398
- "short": f"HTTP code: {http_code}",
399
- "long": re.sub(
400
- r"\s+", " ", Path(result_file).read_text()
401
- ),
402
- }
403
- except Exception as e:
404
- if args.log_level == "DEBUG":
405
- traceback.print_exc()
406
- error_msg = {
407
- "short": "Exception",
408
- "long": re.sub(r"\s+", " ", str(e)),
409
- }
410
-
411
- # Get result size (via the command line, in order to avoid loading
412
- # a potentially large JSON file into Python, which is slow).
413
- if error_msg is None:
414
- single_int_result = None
415
- # CASE 0: The result is empty despite a 200 HTTP code (not a
416
- # problem for CONSTRUCT and DESCRIBE queries).
417
- if Path(result_file).stat().st_size == 0 and (
418
- not query_type == "CONSTRUCT"
419
- and not query_type == "DESCRIBE"
420
- ):
421
- result_size = 0
422
- error_msg = {
423
- "short": "Empty result",
424
- "long": "curl returned with code 200, "
425
- "but the result is empty",
426
- }
427
-
428
- # CASE 1: Just counting the size of the result (TSV or JSON).
429
- elif args.download_or_count == "count":
430
- if accept_header == "text/tab-separated-values":
431
- result_size = run_command(
432
- f"sed 1d {result_file}", return_output=True
433
- )
434
- else:
435
- try:
436
- result_size = run_command(
437
- f'jq -r ".results.bindings[0]'
438
- f" | to_entries[0].value.value"
439
- f' | tonumber" {result_file}',
440
- return_output=True,
441
- )
442
- except Exception as e:
443
- error_msg = {
444
- "short": "Malformed JSON",
445
- "long": "curl returned with code 200, "
446
- "but the JSON is malformed: "
447
- + re.sub(r"\s+", " ", str(e)),
448
- }
449
-
450
- # CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
451
- else:
452
- if (
453
- accept_header == "text/tab-separated-values"
454
- or accept_header == "text/csv"
455
- ):
456
- result_size = run_command(
457
- f"sed 1d {result_file} | wc -l", return_output=True
458
- )
459
- elif accept_header == "text/turtle":
460
- result_size = run_command(
461
- f"sed '1d;/^@prefix/d;/^\\s*$/d' "
462
- f"{result_file} | wc -l",
463
- return_output=True,
464
- )
465
- elif accept_header == "application/qlever-results+json":
466
- result_size = run_command(
467
- f'jq -r ".resultsize" {result_file}',
468
- return_output=True,
469
- )
470
- else:
471
- try:
472
- result_size = int(
473
- run_command(
474
- f'jq -r ".results.bindings | length"'
475
- f" {result_file}",
476
- return_output=True,
477
- ).rstrip()
478
- )
479
- except Exception as e:
480
- error_msg = {
481
- "short": "Malformed JSON",
482
- "long": re.sub(r"\s+", " ", str(e)),
483
- }
484
- if result_size == 1:
485
- try:
486
- single_int_result = int(
487
- run_command(
488
- f'jq -e -r ".results.bindings[0][] | .value"'
489
- f" {result_file}",
490
- return_output=True,
491
- ).rstrip()
492
- )
493
- except Exception:
494
- pass
495
-
496
- # Remove the result file (unless in debug mode).
497
- if args.log_level != "DEBUG":
498
- Path(result_file).unlink(missing_ok=True)
499
-
500
- # Print description, time, result in tabular form.
501
- if len(description) > width_query_description:
502
- description = (
503
- description[: width_query_description_half - 2]
504
- + " ... "
505
- + description[-width_query_description_half + 2 :]
506
- )
507
- if error_msg is None:
508
- result_size = int(result_size)
509
- single_int_result = (
510
- f" [single int result: {single_int_result:,}]"
511
- if single_int_result is not None
512
- else ""
513
- )
514
- log.info(
515
- f"{description:<{width_query_description}} "
516
- f"{time_seconds:6.2f} s "
517
- f"{result_size:>{args.width_result_size},}"
518
- f"{single_int_result}"
519
- )
520
- query_times.append(time_seconds)
521
- result_sizes.append(result_size)
522
- else:
523
- num_failed += 1
524
- if (
525
- args.width_error_message > 0
526
- and len(error_msg["long"]) > args.width_error_message
527
- and args.log_level != "DEBUG"
528
- and args.show_query != "on-error"
529
- ):
530
- error_msg["long"] = (
531
- error_msg["long"][: args.width_error_message - 3]
532
- + "..."
533
- )
534
- seperator_short_long = (
535
- "\n" if args.show_query == "on-error" else " "
536
- )
537
- log.info(
538
- f"{description:<{width_query_description}} "
539
- f"{colored('FAILED ', 'red')}"
540
- f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
541
- f"{seperator_short_long}"
542
- f"{colored(error_msg['long'], 'red')}"
543
- )
544
- if args.show_query == "on-error":
545
- log.info(
546
- colored(
547
- self.pretty_printed_query(
548
- query, args.show_prefixes
549
- ),
550
- "cyan",
551
- )
552
- )
553
- log.info("")
554
-
555
- # Check that each query has a time and a result size, or it failed.
556
- assert len(result_sizes) == len(query_times)
557
- assert len(query_times) + num_failed == len(example_query_lines)
558
-
559
- # Show statistics.
560
- if len(query_times) > 0:
561
- n = len(query_times)
562
- total_query_time = sum(query_times)
563
- average_query_time = total_query_time / n
564
- median_query_time = sorted(query_times)[n // 2]
565
- total_result_size = sum(result_sizes)
566
- average_result_size = round(total_result_size / n)
567
- median_result_size = sorted(result_sizes)[n // 2]
568
- query_or_queries = "query" if n == 1 else "queries"
569
- description = f"TOTAL for {n} {query_or_queries}"
570
- log.info("")
571
- log.info(
572
- f"{description:<{width_query_description}} "
573
- f"{total_query_time:6.2f} s "
574
- f"{total_result_size:>14,}"
575
- )
576
- description = f"AVERAGE for {n} {query_or_queries}"
577
- log.info(
578
- f"{description:<{width_query_description}} "
579
- f"{average_query_time:6.2f} s "
580
- f"{average_result_size:>14,}"
581
- )
582
- description = f"MEDIAN for {n} {query_or_queries}"
583
- log.info(
584
- f"{description:<{width_query_description}} "
585
- f"{median_query_time:6.2f} s "
586
- f"{median_result_size:>14,}"
587
- )
588
-
589
- # Show number of failed queries.
590
- if num_failed > 0:
591
- log.info("")
592
- description = "Number of FAILED queries"
593
- num_failed_string = f"{num_failed:>6}"
594
- if num_failed == len(example_query_lines):
595
- num_failed_string += " [all]"
596
- log.info(
597
- colored(
598
- f"{description:<{width_query_description}} "
599
- f"{num_failed:>24}",
600
- "red",
601
- )
602
- )
603
-
604
- # Return success (has nothing to do with how many queries failed).
605
- return True