qlever 0.2.5__py3-none-any.whl → 0.5.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qlever/Qleverfiles/Qleverfile.dblp +36 -0
- qlever/Qleverfiles/Qleverfile.dblp-plus +33 -0
- qlever/Qleverfiles/Qleverfile.dbpedia +30 -0
- qlever/Qleverfiles/Qleverfile.default +51 -0
- qlever/Qleverfiles/Qleverfile.dnb +40 -0
- qlever/Qleverfiles/Qleverfile.fbeasy +29 -0
- qlever/Qleverfiles/Qleverfile.freebase +28 -0
- qlever/Qleverfiles/Qleverfile.imdb +36 -0
- qlever/Qleverfiles/Qleverfile.ohm-planet +41 -0
- qlever/Qleverfiles/Qleverfile.olympics +31 -0
- qlever/Qleverfiles/Qleverfile.orkg +30 -0
- qlever/Qleverfiles/Qleverfile.osm-country +39 -0
- qlever/Qleverfiles/Qleverfile.osm-planet +39 -0
- qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf +42 -0
- qlever/Qleverfiles/Qleverfile.pubchem +131 -0
- qlever/Qleverfiles/Qleverfile.scientists +29 -0
- qlever/Qleverfiles/Qleverfile.uniprot +74 -0
- qlever/Qleverfiles/Qleverfile.vvz +31 -0
- qlever/Qleverfiles/Qleverfile.wikidata +42 -0
- qlever/Qleverfiles/Qleverfile.wikipathways +40 -0
- qlever/Qleverfiles/Qleverfile.yago-4 +33 -0
- qlever/__init__.py +44 -1380
- qlever/command.py +87 -0
- qlever/commands/__init__.py +0 -0
- qlever/commands/add_text_index.py +115 -0
- qlever/commands/benchmark_queries.py +1019 -0
- qlever/commands/cache_stats.py +125 -0
- qlever/commands/clear_cache.py +88 -0
- qlever/commands/extract_queries.py +120 -0
- qlever/commands/get_data.py +48 -0
- qlever/commands/index.py +333 -0
- qlever/commands/index_stats.py +306 -0
- qlever/commands/log.py +66 -0
- qlever/commands/materialized_view.py +110 -0
- qlever/commands/query.py +142 -0
- qlever/commands/rebuild_index.py +176 -0
- qlever/commands/reset_updates.py +59 -0
- qlever/commands/settings.py +115 -0
- qlever/commands/setup_config.py +97 -0
- qlever/commands/start.py +336 -0
- qlever/commands/status.py +50 -0
- qlever/commands/stop.py +90 -0
- qlever/commands/system_info.py +130 -0
- qlever/commands/ui.py +271 -0
- qlever/commands/update.py +90 -0
- qlever/commands/update_wikidata.py +1204 -0
- qlever/commands/warmup.py +41 -0
- qlever/config.py +223 -0
- qlever/containerize.py +167 -0
- qlever/log.py +55 -0
- qlever/qlever_main.py +79 -0
- qlever/qleverfile.py +530 -0
- qlever/util.py +330 -0
- qlever-0.5.41.dist-info/METADATA +127 -0
- qlever-0.5.41.dist-info/RECORD +59 -0
- {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info}/WHEEL +1 -1
- qlever-0.5.41.dist-info/entry_points.txt +2 -0
- qlever-0.5.41.dist-info/top_level.txt +1 -0
- build/lib/qlever/__init__.py +0 -1383
- build/lib/qlever/__main__.py +0 -4
- qlever/__main__.py +0 -4
- qlever-0.2.5.dist-info/METADATA +0 -277
- qlever-0.2.5.dist-info/RECORD +0 -12
- qlever-0.2.5.dist-info/entry_points.txt +0 -2
- qlever-0.2.5.dist-info/top_level.txt +0 -4
- src/qlever/__init__.py +0 -1383
- src/qlever/__main__.py +0 -4
- {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,1019 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
import re
|
|
6
|
+
import shlex
|
|
7
|
+
import subprocess
|
|
8
|
+
import time
|
|
9
|
+
import traceback
|
|
10
|
+
from io import StringIO
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
import rdflib
|
|
15
|
+
import yaml
|
|
16
|
+
from termcolor import colored
|
|
17
|
+
|
|
18
|
+
from qlever.command import QleverCommand
|
|
19
|
+
from qlever.commands.clear_cache import ClearCacheCommand
|
|
20
|
+
from qlever.commands.ui import dict_to_yaml
|
|
21
|
+
from qlever.log import log, mute_log
|
|
22
|
+
from qlever.util import run_command, run_curl_command
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class BenchmarkQueriesCommand(QleverCommand):
|
|
26
|
+
"""
|
|
27
|
+
Class for running a given sequence of benchmark or example queries and
|
|
28
|
+
showing their processing times and result sizes.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self):
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
def description(self) -> str:
|
|
35
|
+
return (
|
|
36
|
+
"Run the given benchmark or example queries and show their "
|
|
37
|
+
"processing times and result sizes"
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def should_have_qleverfile(self) -> bool:
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
|
|
44
|
+
return {"server": ["host_name", "port"], "ui": ["ui_config"]}
|
|
45
|
+
|
|
46
|
+
def additional_arguments(self, subparser) -> None:
|
|
47
|
+
subparser.add_argument(
|
|
48
|
+
"--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
|
|
49
|
+
)
|
|
50
|
+
subparser.add_argument(
|
|
51
|
+
"--sparql-endpoint-preset",
|
|
52
|
+
choices=[
|
|
53
|
+
"https://qlever.dev/api/wikidata",
|
|
54
|
+
"https://qlever.dev/api/uniprot",
|
|
55
|
+
"https://qlever.dev/api/pubchem",
|
|
56
|
+
"https://qlever.dev/api/osm-planet",
|
|
57
|
+
"https://wikidata.demo.openlinksw.com/sparql",
|
|
58
|
+
"https://sparql.uniprot.org/sparql",
|
|
59
|
+
],
|
|
60
|
+
help="SPARQL endpoint from fixed list (to save typing)",
|
|
61
|
+
)
|
|
62
|
+
subparser.add_argument(
|
|
63
|
+
"--queries-tsv",
|
|
64
|
+
type=str,
|
|
65
|
+
default=None,
|
|
66
|
+
help=(
|
|
67
|
+
"Path to a TSV file containing benchmark queries "
|
|
68
|
+
"(query_description, full_sparql_query)"
|
|
69
|
+
),
|
|
70
|
+
)
|
|
71
|
+
subparser.add_argument(
|
|
72
|
+
"--queries-yml",
|
|
73
|
+
type=str,
|
|
74
|
+
default=None,
|
|
75
|
+
help=(
|
|
76
|
+
"Path to a YAML file containing benchmark queries. "
|
|
77
|
+
"The YAML file should have a top-level "
|
|
78
|
+
"key called 'queries', which is a list of dictionaries. "
|
|
79
|
+
"Each dictionary should contain 'query' for the query "
|
|
80
|
+
"description and 'sparql' for the full SPARQL query."
|
|
81
|
+
),
|
|
82
|
+
)
|
|
83
|
+
subparser.add_argument(
|
|
84
|
+
"--query-ids",
|
|
85
|
+
type=str,
|
|
86
|
+
default="1-$",
|
|
87
|
+
help="Query IDs as comma-separated list of "
|
|
88
|
+
"ranges (e.g., 1-5,7,12-$)",
|
|
89
|
+
)
|
|
90
|
+
subparser.add_argument(
|
|
91
|
+
"--query-regex",
|
|
92
|
+
type=str,
|
|
93
|
+
help="Only consider example queries matching "
|
|
94
|
+
"this regex (using grep -Pi)",
|
|
95
|
+
)
|
|
96
|
+
subparser.add_argument(
|
|
97
|
+
"--example-queries",
|
|
98
|
+
action="store_true",
|
|
99
|
+
default=False,
|
|
100
|
+
help=(
|
|
101
|
+
"Run the example queries for the given --ui-config "
|
|
102
|
+
"instead of the benchmark queries from a TSV or YML file"
|
|
103
|
+
),
|
|
104
|
+
)
|
|
105
|
+
subparser.add_argument(
|
|
106
|
+
"--download-or-count",
|
|
107
|
+
choices=["download", "count"],
|
|
108
|
+
default="download",
|
|
109
|
+
help="Whether to download the full result "
|
|
110
|
+
"or just compute the size of the result",
|
|
111
|
+
)
|
|
112
|
+
subparser.add_argument(
|
|
113
|
+
"--limit", type=int, help="Limit on the number of results"
|
|
114
|
+
)
|
|
115
|
+
subparser.add_argument(
|
|
116
|
+
"--remove-offset-and-limit",
|
|
117
|
+
action="store_true",
|
|
118
|
+
default=False,
|
|
119
|
+
help="Remove OFFSET and LIMIT from the query",
|
|
120
|
+
)
|
|
121
|
+
subparser.add_argument(
|
|
122
|
+
"--accept",
|
|
123
|
+
type=str,
|
|
124
|
+
choices=[
|
|
125
|
+
"text/tab-separated-values",
|
|
126
|
+
"text/csv",
|
|
127
|
+
"application/sparql-results+json",
|
|
128
|
+
"application/qlever-results+json",
|
|
129
|
+
"application/octet-stream",
|
|
130
|
+
"text/turtle",
|
|
131
|
+
"AUTO",
|
|
132
|
+
],
|
|
133
|
+
default="application/sparql-results+json",
|
|
134
|
+
help="Accept header for the SPARQL query; AUTO means "
|
|
135
|
+
"`text/turtle` for CONSTRUCT AND DESCRIBE queries, "
|
|
136
|
+
"`application/sparql-results+json` for all others",
|
|
137
|
+
)
|
|
138
|
+
subparser.add_argument(
|
|
139
|
+
"--clear-cache",
|
|
140
|
+
choices=["yes", "no"],
|
|
141
|
+
default="no",
|
|
142
|
+
help="Clear the cache before each query (only works for QLever)",
|
|
143
|
+
)
|
|
144
|
+
subparser.add_argument(
|
|
145
|
+
"--width-query-description",
|
|
146
|
+
type=int,
|
|
147
|
+
default=70,
|
|
148
|
+
help="Width for printing the query description",
|
|
149
|
+
)
|
|
150
|
+
subparser.add_argument(
|
|
151
|
+
"--width-error-message",
|
|
152
|
+
type=int,
|
|
153
|
+
default=50,
|
|
154
|
+
help="Width for printing the error message (0 = no limit)",
|
|
155
|
+
)
|
|
156
|
+
subparser.add_argument(
|
|
157
|
+
"--width-result-size",
|
|
158
|
+
type=int,
|
|
159
|
+
default=14,
|
|
160
|
+
help="Width for printing the result size",
|
|
161
|
+
)
|
|
162
|
+
subparser.add_argument(
|
|
163
|
+
"--add-query-type-to-description",
|
|
164
|
+
action="store_true",
|
|
165
|
+
default=False,
|
|
166
|
+
help="Add the query type (SELECT, ASK, CONSTRUCT, DESCRIBE, "
|
|
167
|
+
"UNKNOWN) to the description",
|
|
168
|
+
)
|
|
169
|
+
subparser.add_argument(
|
|
170
|
+
"--show-query",
|
|
171
|
+
choices=["always", "never", "on-error"],
|
|
172
|
+
default="never",
|
|
173
|
+
help="Show the queries that will be executed (always, never, on error)",
|
|
174
|
+
)
|
|
175
|
+
subparser.add_argument(
|
|
176
|
+
"--show-prefixes",
|
|
177
|
+
action="store_true",
|
|
178
|
+
default=False,
|
|
179
|
+
help="When showing the query, also show the prefixes",
|
|
180
|
+
)
|
|
181
|
+
subparser.add_argument(
|
|
182
|
+
"--results-dir",
|
|
183
|
+
type=str,
|
|
184
|
+
default=".",
|
|
185
|
+
help=(
|
|
186
|
+
"The directory where the YML result file would be saved "
|
|
187
|
+
"for the evaluation web app (Default = current working directory)"
|
|
188
|
+
),
|
|
189
|
+
)
|
|
190
|
+
subparser.add_argument(
|
|
191
|
+
"--result-file",
|
|
192
|
+
type=str,
|
|
193
|
+
default=None,
|
|
194
|
+
help=(
|
|
195
|
+
"Base name used for the result YML file, should be of the "
|
|
196
|
+
"form `<dataset>.<engine>`, e.g., `wikidata.qlever`"
|
|
197
|
+
),
|
|
198
|
+
)
|
|
199
|
+
subparser.add_argument(
|
|
200
|
+
"--max-results-output-file",
|
|
201
|
+
type=int,
|
|
202
|
+
default=5,
|
|
203
|
+
help=(
|
|
204
|
+
"Maximum number of results per query in the output result "
|
|
205
|
+
"YML file (Default = 5)"
|
|
206
|
+
),
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def pretty_printed_query(self, query: str, show_prefixes: bool) -> str:
|
|
210
|
+
remove_prefixes_cmd = (
|
|
211
|
+
" | sed '/^PREFIX /Id'" if not show_prefixes else ""
|
|
212
|
+
)
|
|
213
|
+
pretty_print_query_cmd = (
|
|
214
|
+
f"echo {shlex.quote(query)}"
|
|
215
|
+
f" | docker run -i --rm sparqling/sparql-formatter"
|
|
216
|
+
f"{remove_prefixes_cmd} | grep -v '^$'"
|
|
217
|
+
)
|
|
218
|
+
try:
|
|
219
|
+
query_pretty_printed = run_command(
|
|
220
|
+
pretty_print_query_cmd, return_output=True
|
|
221
|
+
)
|
|
222
|
+
return query_pretty_printed.rstrip()
|
|
223
|
+
except Exception as e:
|
|
224
|
+
log.error(
|
|
225
|
+
f"Failed to pretty-print query, returning original query: {e}"
|
|
226
|
+
)
|
|
227
|
+
return query.rstrip()
|
|
228
|
+
|
|
229
|
+
def sparql_query_type(self, query: str) -> str:
|
|
230
|
+
match = re.search(
|
|
231
|
+
r"(SELECT|ASK|CONSTRUCT|DESCRIBE)\s", query, re.IGNORECASE
|
|
232
|
+
)
|
|
233
|
+
if match:
|
|
234
|
+
return match.group(1).upper()
|
|
235
|
+
else:
|
|
236
|
+
return "UNKNOWN"
|
|
237
|
+
|
|
238
|
+
@staticmethod
|
|
239
|
+
def filter_queries(
|
|
240
|
+
queries: list[tuple[str, str]], query_ids: str, query_regex: str
|
|
241
|
+
) -> list[tuple[str, str]]:
|
|
242
|
+
"""
|
|
243
|
+
Given a list of queries (tuple of query desc and full sparql query),
|
|
244
|
+
filter them and keep the ones which are a part of query_ids
|
|
245
|
+
or match with query_regex
|
|
246
|
+
"""
|
|
247
|
+
# Get the list of query indices to keep
|
|
248
|
+
total_queries = len(queries)
|
|
249
|
+
query_indices = []
|
|
250
|
+
for part in query_ids.split(","):
|
|
251
|
+
if "-" in part:
|
|
252
|
+
start, end = part.split("-")
|
|
253
|
+
if end == "$":
|
|
254
|
+
end = total_queries
|
|
255
|
+
query_indices.extend(range(int(start) - 1, int(end)))
|
|
256
|
+
else:
|
|
257
|
+
idx = int(part) if part != "$" else total_queries
|
|
258
|
+
query_indices.append(idx - 1)
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
filtered_queries = []
|
|
262
|
+
pattern = (
|
|
263
|
+
re.compile(query_regex, re.IGNORECASE) if query_regex else None
|
|
264
|
+
)
|
|
265
|
+
for query_idx in query_indices:
|
|
266
|
+
if query_idx >= total_queries:
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
query_desc, sparql = queries[query_idx]
|
|
270
|
+
|
|
271
|
+
# Only include queries that match the query_regex if present
|
|
272
|
+
if pattern and not (
|
|
273
|
+
pattern.search(query_desc) or pattern.search(sparql)
|
|
274
|
+
):
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
filtered_queries.append((query_desc, sparql))
|
|
278
|
+
return filtered_queries
|
|
279
|
+
except Exception as exc:
|
|
280
|
+
log.error(f"Error filtering queries: {exc}")
|
|
281
|
+
return []
|
|
282
|
+
|
|
283
|
+
@staticmethod
|
|
284
|
+
def parse_queries_tsv(queries_cmd: str) -> list[tuple[str, str]]:
|
|
285
|
+
"""
|
|
286
|
+
Execute the given bash command to fetch tsv queries and return a
|
|
287
|
+
list of queries i.e. tuple(query_description, full_sparql_query)
|
|
288
|
+
"""
|
|
289
|
+
try:
|
|
290
|
+
tsv_queries_str = run_command(queries_cmd, return_output=True)
|
|
291
|
+
if len(tsv_queries_str) == 0:
|
|
292
|
+
log.error("No queries found in the TSV queries file")
|
|
293
|
+
return []
|
|
294
|
+
return [
|
|
295
|
+
tuple(line.split("\t"))
|
|
296
|
+
for line in tsv_queries_str.strip().splitlines()
|
|
297
|
+
]
|
|
298
|
+
except Exception as exc:
|
|
299
|
+
log.error(f"Failed to read the TSV queries file: {exc}")
|
|
300
|
+
return []
|
|
301
|
+
|
|
302
|
+
@staticmethod
|
|
303
|
+
def parse_queries_yml(queries_file: str) -> list[tuple[str, str]]:
|
|
304
|
+
"""
|
|
305
|
+
Parse a YML file, validate its structure and return a list of
|
|
306
|
+
queries i.e. tuple(query_description, full_sparql_query)
|
|
307
|
+
"""
|
|
308
|
+
with open(queries_file, "r", encoding="utf-8") as q_file:
|
|
309
|
+
try:
|
|
310
|
+
data = yaml.safe_load(q_file) # Load YAML safely
|
|
311
|
+
except yaml.YAMLError as exc:
|
|
312
|
+
log.error(f"Error parsing {queries_file} file: {exc}")
|
|
313
|
+
return []
|
|
314
|
+
|
|
315
|
+
# Validate the structure
|
|
316
|
+
if not isinstance(data, dict) or "queries" not in data:
|
|
317
|
+
log.error(
|
|
318
|
+
"Error: YAML file must contain a top-level 'queries' key"
|
|
319
|
+
)
|
|
320
|
+
return []
|
|
321
|
+
|
|
322
|
+
if not isinstance(data["queries"], list):
|
|
323
|
+
log.error("Error: 'queries' key in YML file must hold a list.")
|
|
324
|
+
return []
|
|
325
|
+
|
|
326
|
+
for item in data["queries"]:
|
|
327
|
+
if (
|
|
328
|
+
not isinstance(item, dict)
|
|
329
|
+
or "query" not in item
|
|
330
|
+
or "sparql" not in item
|
|
331
|
+
):
|
|
332
|
+
log.error(
|
|
333
|
+
"Error: Each item in 'queries' must contain "
|
|
334
|
+
"'query' and 'sparql' keys."
|
|
335
|
+
)
|
|
336
|
+
return []
|
|
337
|
+
|
|
338
|
+
return [(query["query"], query["sparql"]) for query in data["queries"]]
|
|
339
|
+
|
|
340
|
+
def get_result_size(
|
|
341
|
+
self,
|
|
342
|
+
count_only: bool,
|
|
343
|
+
query_type: str,
|
|
344
|
+
accept_header: str,
|
|
345
|
+
result_file: str,
|
|
346
|
+
) -> tuple[int, dict[str, str] | None]:
|
|
347
|
+
"""
|
|
348
|
+
Get the result size and error_msg dict (if query failed) for
|
|
349
|
+
different accept headers
|
|
350
|
+
"""
|
|
351
|
+
|
|
352
|
+
def get_json_error_msg(e: Exception) -> dict[str, str]:
|
|
353
|
+
error_msg = {
|
|
354
|
+
"short": "Malformed JSON",
|
|
355
|
+
"long": "curl returned with code 200, "
|
|
356
|
+
"but the JSON is malformed: " + re.sub(r"\s+", " ", str(e)),
|
|
357
|
+
}
|
|
358
|
+
return error_msg
|
|
359
|
+
|
|
360
|
+
result_size = 0
|
|
361
|
+
error_msg = None
|
|
362
|
+
# CASE 0: The result is empty despite a 200 HTTP code (not a
|
|
363
|
+
# problem for CONSTRUCT and DESCRIBE queries).
|
|
364
|
+
if Path(result_file).stat().st_size == 0 and (
|
|
365
|
+
not query_type == "CONSTRUCT" and not query_type == "DESCRIBE"
|
|
366
|
+
):
|
|
367
|
+
result_size = 0
|
|
368
|
+
error_msg = {
|
|
369
|
+
"short": "Empty result",
|
|
370
|
+
"long": "curl returned with code 200, but the result is empty",
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
# CASE 1: Just counting the size of the result (TSV or JSON).
|
|
374
|
+
elif count_only:
|
|
375
|
+
if accept_header in ("text/tab-separated-values", "text/csv"):
|
|
376
|
+
result_size = run_command(
|
|
377
|
+
f"sed 1d {result_file}", return_output=True
|
|
378
|
+
)
|
|
379
|
+
elif accept_header == "application/qlever-results+json":
|
|
380
|
+
try:
|
|
381
|
+
# sed cmd to get the number between 2nd and 3rd double_quotes
|
|
382
|
+
result_size = run_command(
|
|
383
|
+
f"jq '.res[0]' {result_file}"
|
|
384
|
+
" | sed 's/[^0-9]*\\([0-9]*\\).*/\\1/'",
|
|
385
|
+
return_output=True,
|
|
386
|
+
)
|
|
387
|
+
except Exception as e:
|
|
388
|
+
error_msg = get_json_error_msg(e)
|
|
389
|
+
else:
|
|
390
|
+
try:
|
|
391
|
+
result_size = run_command(
|
|
392
|
+
f'jq -r ".results.bindings[0]'
|
|
393
|
+
f" | to_entries[0].value.value"
|
|
394
|
+
f' | tonumber" {result_file}',
|
|
395
|
+
return_output=True,
|
|
396
|
+
)
|
|
397
|
+
except Exception as e:
|
|
398
|
+
error_msg = get_json_error_msg(e)
|
|
399
|
+
|
|
400
|
+
# CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
|
|
401
|
+
else:
|
|
402
|
+
if accept_header in ("text/tab-separated-values", "text/csv"):
|
|
403
|
+
result_size = run_command(
|
|
404
|
+
f"sed 1d {result_file} | wc -l", return_output=True
|
|
405
|
+
)
|
|
406
|
+
elif accept_header == "text/turtle":
|
|
407
|
+
result_size = run_command(
|
|
408
|
+
f"sed '1d;/^@prefix/d;/^\\s*$/d' {result_file} | wc -l",
|
|
409
|
+
return_output=True,
|
|
410
|
+
)
|
|
411
|
+
elif accept_header == "application/qlever-results+json":
|
|
412
|
+
try:
|
|
413
|
+
result_size = run_command(
|
|
414
|
+
f'jq -r ".resultsize" {result_file}',
|
|
415
|
+
return_output=True,
|
|
416
|
+
)
|
|
417
|
+
except Exception as e:
|
|
418
|
+
error_msg = get_json_error_msg(e)
|
|
419
|
+
else:
|
|
420
|
+
try:
|
|
421
|
+
result_size = int(
|
|
422
|
+
run_command(
|
|
423
|
+
f'jq -r ".results.bindings | length"'
|
|
424
|
+
f" {result_file}",
|
|
425
|
+
return_output=True,
|
|
426
|
+
).rstrip()
|
|
427
|
+
)
|
|
428
|
+
except Exception as e:
|
|
429
|
+
error_msg = get_json_error_msg(e)
|
|
430
|
+
return int(result_size), error_msg
|
|
431
|
+
|
|
432
|
+
@staticmethod
|
|
433
|
+
def get_single_int_result(result_file: str) -> int | None:
|
|
434
|
+
"""
|
|
435
|
+
When downloading the full result of a query with accept header as
|
|
436
|
+
application/sparql-results+json and result_size == 1, get the single
|
|
437
|
+
integer result value (if any).
|
|
438
|
+
"""
|
|
439
|
+
single_int_result = None
|
|
440
|
+
try:
|
|
441
|
+
single_int_result = int(
|
|
442
|
+
run_command(
|
|
443
|
+
f'jq -e -r ".results.bindings[0][] | .value"'
|
|
444
|
+
f" {result_file}",
|
|
445
|
+
return_output=True,
|
|
446
|
+
).rstrip()
|
|
447
|
+
)
|
|
448
|
+
except Exception:
|
|
449
|
+
pass
|
|
450
|
+
return single_int_result
|
|
451
|
+
|
|
452
|
+
def execute(self, args) -> bool:
|
|
453
|
+
# We can't have both `--remove-offset-and-limit` and `--limit`.
|
|
454
|
+
if args.remove_offset_and_limit and args.limit:
|
|
455
|
+
log.error("Cannot have both --remove-offset-and-limit and --limit")
|
|
456
|
+
return False
|
|
457
|
+
|
|
458
|
+
# Extract dataset and sparql_engine name from result file
|
|
459
|
+
dataset, engine = None, None
|
|
460
|
+
if args.result_file is not None:
|
|
461
|
+
result_file_parts = args.result_file.split(".")
|
|
462
|
+
if len(result_file_parts) != 2:
|
|
463
|
+
log.error(
|
|
464
|
+
"The argument of --result-file should be of the form "
|
|
465
|
+
"`<dataset>.<engine>`, e.g., `wikidata.qlever`"
|
|
466
|
+
)
|
|
467
|
+
return False
|
|
468
|
+
results_dir_path = Path(args.results_dir)
|
|
469
|
+
if results_dir_path.exists():
|
|
470
|
+
if not results_dir_path.is_dir():
|
|
471
|
+
log.error(
|
|
472
|
+
f"{results_dir_path} exists but is not a directory"
|
|
473
|
+
)
|
|
474
|
+
return False
|
|
475
|
+
else:
|
|
476
|
+
log.info(
|
|
477
|
+
f"Creating results directory: {results_dir_path.absolute()}"
|
|
478
|
+
)
|
|
479
|
+
results_dir_path.mkdir(parents=True, exist_ok=True)
|
|
480
|
+
dataset, engine = result_file_parts
|
|
481
|
+
|
|
482
|
+
# If `args.accept` is `application/sparql-results+json` or
|
|
483
|
+
# `application/qlever-results+json` or `AUTO`, we need `jq`.
|
|
484
|
+
if args.accept in (
|
|
485
|
+
"application/sparql-results+json",
|
|
486
|
+
"application/qlever-results+json",
|
|
487
|
+
"AUTO",
|
|
488
|
+
):
|
|
489
|
+
try:
|
|
490
|
+
subprocess.run(
|
|
491
|
+
"jq --version",
|
|
492
|
+
shell=True,
|
|
493
|
+
check=True,
|
|
494
|
+
stdout=subprocess.DEVNULL,
|
|
495
|
+
stderr=subprocess.DEVNULL,
|
|
496
|
+
)
|
|
497
|
+
except Exception as e:
|
|
498
|
+
log.error(f"Please install `jq` for {args.accept} ({e})")
|
|
499
|
+
return False
|
|
500
|
+
|
|
501
|
+
if not any((args.queries_tsv, args.queries_yml, args.example_queries)):
|
|
502
|
+
log.error(
|
|
503
|
+
"No benchmark or example queries to read! Either pass benchmark "
|
|
504
|
+
"queries using --queries-tsv or --queries-yml, or pass the "
|
|
505
|
+
"argument --example-queries to run example queries for the "
|
|
506
|
+
f"given ui_config {args.ui_config}"
|
|
507
|
+
)
|
|
508
|
+
return False
|
|
509
|
+
|
|
510
|
+
if all((args.queries_tsv, args.queries_yml)):
|
|
511
|
+
log.error("Cannot have both --queries-tsv and --queries-yml")
|
|
512
|
+
return False
|
|
513
|
+
|
|
514
|
+
if any((args.queries_tsv, args.queries_yml)) and args.example_queries:
|
|
515
|
+
queries_file_arg = "tsv" if args.queries_tsv else "yml"
|
|
516
|
+
log.error(
|
|
517
|
+
f"Cannot have both --queries-{queries_file_arg} and "
|
|
518
|
+
"--example-queries"
|
|
519
|
+
)
|
|
520
|
+
return False
|
|
521
|
+
|
|
522
|
+
# Handle shortcuts for SPARQL endpoint.
|
|
523
|
+
if args.sparql_endpoint_preset:
|
|
524
|
+
args.sparql_endpoint = args.sparql_endpoint_preset
|
|
525
|
+
|
|
526
|
+
# Limit only works with full result.
|
|
527
|
+
if args.limit and args.download_or_count == "count":
|
|
528
|
+
log.error("Limit only works with full result")
|
|
529
|
+
return False
|
|
530
|
+
|
|
531
|
+
# Clear cache only works for QLever.
|
|
532
|
+
is_qlever = (
|
|
533
|
+
not args.sparql_endpoint
|
|
534
|
+
or args.sparql_endpoint.startswith("https://qlever")
|
|
535
|
+
)
|
|
536
|
+
if engine is not None:
|
|
537
|
+
is_qlever = is_qlever or "qlever" in engine.lower()
|
|
538
|
+
if args.clear_cache == "yes":
|
|
539
|
+
if is_qlever:
|
|
540
|
+
log.warning(
|
|
541
|
+
"Clearing the cache before each query"
|
|
542
|
+
" (only works for QLever)"
|
|
543
|
+
)
|
|
544
|
+
else:
|
|
545
|
+
log.warning(
|
|
546
|
+
"Clearing the cache only works for QLever"
|
|
547
|
+
", option `--clear-cache` is ignored"
|
|
548
|
+
)
|
|
549
|
+
args.clear_cache = "no"
|
|
550
|
+
|
|
551
|
+
# Show what the command will do.
|
|
552
|
+
example_queries_cmd = (
|
|
553
|
+
f"curl -sv https://qlever.dev/api/examples/{args.ui_config}"
|
|
554
|
+
)
|
|
555
|
+
sparql_endpoint = (
|
|
556
|
+
args.sparql_endpoint
|
|
557
|
+
if args.sparql_endpoint
|
|
558
|
+
else f"{args.host_name}:{args.port}"
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
self.show(
|
|
562
|
+
f"Obtain queries via: {args.queries_yml or args.queries_tsv or example_queries_cmd}\n"
|
|
563
|
+
f"SPARQL endpoint: {sparql_endpoint}\n"
|
|
564
|
+
f"Accept header: {args.accept}\n"
|
|
565
|
+
f"Download result for each query or just count:"
|
|
566
|
+
f" {args.download_or_count.upper()}"
|
|
567
|
+
+ (f" with LIMIT {args.limit}" if args.limit else ""),
|
|
568
|
+
only_show=args.show,
|
|
569
|
+
)
|
|
570
|
+
if args.show:
|
|
571
|
+
return True
|
|
572
|
+
|
|
573
|
+
if args.queries_yml:
|
|
574
|
+
queries = self.parse_queries_yml(args.queries_yml)
|
|
575
|
+
elif args.queries_tsv:
|
|
576
|
+
queries = self.parse_queries_tsv(f"cat {args.queries_tsv}")
|
|
577
|
+
else:
|
|
578
|
+
queries = self.parse_queries_tsv(example_queries_cmd)
|
|
579
|
+
|
|
580
|
+
filtered_queries = self.filter_queries(
|
|
581
|
+
queries, args.query_ids, args.query_regex
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
if len(filtered_queries) == 0 or not filtered_queries[0]:
|
|
585
|
+
log.error("No queries to process!")
|
|
586
|
+
return False
|
|
587
|
+
|
|
588
|
+
# We want the width of the query description to be an uneven number (in
|
|
589
|
+
# case we have to truncated it, in which case we want to have a " ... "
|
|
590
|
+
# in the middle).
|
|
591
|
+
width_query_description_half = args.width_query_description // 2
|
|
592
|
+
width_query_description = 2 * width_query_description_half + 1
|
|
593
|
+
|
|
594
|
+
# Launch the queries one after the other and for each print: the
|
|
595
|
+
# description, the result size (number of rows), and the query
|
|
596
|
+
# processing time (seconds).
|
|
597
|
+
query_times = []
|
|
598
|
+
result_sizes = []
|
|
599
|
+
result_yml_query_records = {"queries": []}
|
|
600
|
+
num_failed = 0
|
|
601
|
+
for description, query in filtered_queries:
|
|
602
|
+
if len(query) == 0:
|
|
603
|
+
log.error("Could not parse description and query, line is:")
|
|
604
|
+
log.info("")
|
|
605
|
+
log.info(f"{description}\t{query}")
|
|
606
|
+
return False
|
|
607
|
+
query_type = self.sparql_query_type(query)
|
|
608
|
+
if args.add_query_type_to_description or args.accept == "AUTO":
|
|
609
|
+
description = f"{description} [{query_type}]"
|
|
610
|
+
|
|
611
|
+
# Clear the cache.
|
|
612
|
+
if args.clear_cache == "yes":
|
|
613
|
+
args.server_url = sparql_endpoint
|
|
614
|
+
args.complete = False
|
|
615
|
+
clear_cache_successful = False
|
|
616
|
+
with mute_log():
|
|
617
|
+
clear_cache_successful = ClearCacheCommand().execute(args)
|
|
618
|
+
if not clear_cache_successful:
|
|
619
|
+
log.warn("Failed to clear the cache")
|
|
620
|
+
|
|
621
|
+
# Remove OFFSET and LIMIT (after the last closing bracket).
|
|
622
|
+
if args.remove_offset_and_limit or args.limit:
|
|
623
|
+
closing_bracket_idx = query.rfind("}")
|
|
624
|
+
regexes = [
|
|
625
|
+
re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
|
|
626
|
+
re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE),
|
|
627
|
+
]
|
|
628
|
+
for regex in regexes:
|
|
629
|
+
match = re.search(regex, query[closing_bracket_idx:])
|
|
630
|
+
if match:
|
|
631
|
+
query = (
|
|
632
|
+
query[: closing_bracket_idx + match.start()]
|
|
633
|
+
+ query[closing_bracket_idx + match.end() :]
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
# Limit query.
|
|
637
|
+
if args.limit:
|
|
638
|
+
query += f" LIMIT {args.limit}"
|
|
639
|
+
|
|
640
|
+
# Count query.
|
|
641
|
+
if args.download_or_count == "count":
|
|
642
|
+
# First find out if there is a FROM clause.
|
|
643
|
+
regex_from_clause = re.compile(
|
|
644
|
+
r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE
|
|
645
|
+
)
|
|
646
|
+
match_from_clause = re.search(regex_from_clause, query)
|
|
647
|
+
from_clause = " "
|
|
648
|
+
if match_from_clause:
|
|
649
|
+
from_clause = match_from_clause.group(0)
|
|
650
|
+
query = (
|
|
651
|
+
query[: match_from_clause.start()]
|
|
652
|
+
+ " "
|
|
653
|
+
+ query[match_from_clause.end() :]
|
|
654
|
+
)
|
|
655
|
+
# Now we can add the outer SELECT COUNT(*).
|
|
656
|
+
query = (
|
|
657
|
+
re.sub(
|
|
658
|
+
r"SELECT ",
|
|
659
|
+
"SELECT (COUNT(*) AS ?qlever_count_)"
|
|
660
|
+
+ from_clause
|
|
661
|
+
+ "WHERE { SELECT ",
|
|
662
|
+
query,
|
|
663
|
+
count=1,
|
|
664
|
+
flags=re.IGNORECASE,
|
|
665
|
+
)
|
|
666
|
+
+ " }"
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
# A bit of pretty-printing.
|
|
670
|
+
query = re.sub(r"\s+", " ", query)
|
|
671
|
+
query = re.sub(r"\s*\.\s*\}", " }", query)
|
|
672
|
+
if args.show_query == "always":
|
|
673
|
+
log.info("")
|
|
674
|
+
log.info(
|
|
675
|
+
colored(
|
|
676
|
+
self.pretty_printed_query(query, args.show_prefixes),
|
|
677
|
+
"cyan",
|
|
678
|
+
)
|
|
679
|
+
)
|
|
680
|
+
|
|
681
|
+
# Accept header. For "AUTO", use `text/turtle` for CONSTRUCT
|
|
682
|
+
# queries and `application/sparql-results+json` for all others.
|
|
683
|
+
accept_header = args.accept
|
|
684
|
+
if accept_header == "AUTO":
|
|
685
|
+
if query_type == "CONSTRUCT" or query_type == "DESCRIBE":
|
|
686
|
+
accept_header = "text/turtle"
|
|
687
|
+
else:
|
|
688
|
+
accept_header = "application/sparql-results+json"
|
|
689
|
+
|
|
690
|
+
# Launch query.
|
|
691
|
+
curl_cmd = (
|
|
692
|
+
f"curl -Ls {sparql_endpoint}"
|
|
693
|
+
f' -w "HTTP code: %{{http_code}}\\n"'
|
|
694
|
+
f' -H "Accept: {accept_header}"'
|
|
695
|
+
f" --data-urlencode query={shlex.quote(query)}"
|
|
696
|
+
)
|
|
697
|
+
log.debug(curl_cmd)
|
|
698
|
+
result_file = (
|
|
699
|
+
f"qlever.example_queries.result.{abs(hash(curl_cmd))}.tmp"
|
|
700
|
+
)
|
|
701
|
+
start_time = time.time()
|
|
702
|
+
try:
|
|
703
|
+
http_code = run_curl_command(
|
|
704
|
+
sparql_endpoint,
|
|
705
|
+
headers={"Accept": accept_header},
|
|
706
|
+
params={"query": query},
|
|
707
|
+
result_file=result_file,
|
|
708
|
+
).strip()
|
|
709
|
+
if http_code == "200":
|
|
710
|
+
time_seconds = time.time() - start_time
|
|
711
|
+
error_msg = None
|
|
712
|
+
else:
|
|
713
|
+
time_seconds = time.time() - start_time
|
|
714
|
+
error_msg = {
|
|
715
|
+
"short": f"HTTP code: {http_code}",
|
|
716
|
+
"long": re.sub(
|
|
717
|
+
r"\s+", " ", Path(result_file).read_text()
|
|
718
|
+
),
|
|
719
|
+
}
|
|
720
|
+
except Exception as e:
|
|
721
|
+
time_seconds = time.time() - start_time
|
|
722
|
+
if args.log_level == "DEBUG":
|
|
723
|
+
traceback.print_exc()
|
|
724
|
+
error_msg = {
|
|
725
|
+
"short": "Exception",
|
|
726
|
+
"long": re.sub(r"\s+", " ", str(e)),
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
# Get result size (via the command line, in order to avoid loading
|
|
730
|
+
# a potentially large JSON file into Python, which is slow).
|
|
731
|
+
if error_msg is None:
|
|
732
|
+
result_size, error_msg = self.get_result_size(
|
|
733
|
+
args.download_or_count == "count",
|
|
734
|
+
query_type,
|
|
735
|
+
accept_header,
|
|
736
|
+
result_file,
|
|
737
|
+
)
|
|
738
|
+
single_int_result = None
|
|
739
|
+
if (
|
|
740
|
+
result_size == 1
|
|
741
|
+
and accept_header == "application/sparql-results+json"
|
|
742
|
+
and args.download_or_count == "download"
|
|
743
|
+
):
|
|
744
|
+
single_int_result = self.get_single_int_result(result_file)
|
|
745
|
+
|
|
746
|
+
# Get the result yaml record if output file needs to be generated
|
|
747
|
+
if args.result_file is not None:
|
|
748
|
+
result_length = None if error_msg is not None else 1
|
|
749
|
+
result_length = (
|
|
750
|
+
result_size
|
|
751
|
+
if args.download_or_count == "download"
|
|
752
|
+
and result_length is not None
|
|
753
|
+
else result_length
|
|
754
|
+
)
|
|
755
|
+
query_results = (
|
|
756
|
+
error_msg if error_msg is not None else result_file
|
|
757
|
+
)
|
|
758
|
+
query_record = self.get_result_yml_query_record(
|
|
759
|
+
query=description,
|
|
760
|
+
sparql=self.pretty_printed_query(
|
|
761
|
+
query, args.show_prefixes
|
|
762
|
+
),
|
|
763
|
+
client_time=time_seconds,
|
|
764
|
+
result=query_results,
|
|
765
|
+
result_size=result_length,
|
|
766
|
+
max_result_size=args.max_results_output_file,
|
|
767
|
+
accept_header=accept_header,
|
|
768
|
+
)
|
|
769
|
+
result_yml_query_records["queries"].append(query_record)
|
|
770
|
+
|
|
771
|
+
# Print description, time, result in tabular form.
|
|
772
|
+
if len(description) > width_query_description:
|
|
773
|
+
description = (
|
|
774
|
+
description[: width_query_description_half - 2]
|
|
775
|
+
+ " ... "
|
|
776
|
+
+ description[-width_query_description_half + 2 :]
|
|
777
|
+
)
|
|
778
|
+
if error_msg is None:
|
|
779
|
+
result_size = int(result_size)
|
|
780
|
+
single_int_result = (
|
|
781
|
+
f" [single int result: {single_int_result:,}]"
|
|
782
|
+
if single_int_result is not None
|
|
783
|
+
else ""
|
|
784
|
+
)
|
|
785
|
+
log.info(
|
|
786
|
+
f"{description:<{width_query_description}} "
|
|
787
|
+
f"{time_seconds:6.2f} s "
|
|
788
|
+
f"{result_size:>{args.width_result_size},}"
|
|
789
|
+
f"{single_int_result}"
|
|
790
|
+
)
|
|
791
|
+
query_times.append(time_seconds)
|
|
792
|
+
result_sizes.append(result_size)
|
|
793
|
+
else:
|
|
794
|
+
num_failed += 1
|
|
795
|
+
if (
|
|
796
|
+
args.width_error_message > 0
|
|
797
|
+
and len(error_msg["long"]) > args.width_error_message
|
|
798
|
+
and args.log_level != "DEBUG"
|
|
799
|
+
and args.show_query != "on-error"
|
|
800
|
+
):
|
|
801
|
+
error_msg["long"] = (
|
|
802
|
+
error_msg["long"][: args.width_error_message - 3]
|
|
803
|
+
+ "..."
|
|
804
|
+
)
|
|
805
|
+
seperator_short_long = (
|
|
806
|
+
"\n" if args.show_query == "on-error" else " "
|
|
807
|
+
)
|
|
808
|
+
log.info(
|
|
809
|
+
f"{description:<{width_query_description}} "
|
|
810
|
+
f"{colored('FAILED ', 'red')}"
|
|
811
|
+
f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
|
|
812
|
+
f"{seperator_short_long}"
|
|
813
|
+
f"{colored(error_msg['long'], 'red')}"
|
|
814
|
+
)
|
|
815
|
+
if args.show_query == "on-error":
|
|
816
|
+
log.info(
|
|
817
|
+
colored(
|
|
818
|
+
self.pretty_printed_query(
|
|
819
|
+
query, args.show_prefixes
|
|
820
|
+
),
|
|
821
|
+
"cyan",
|
|
822
|
+
)
|
|
823
|
+
)
|
|
824
|
+
log.info("")
|
|
825
|
+
|
|
826
|
+
# Remove the result file (unless in debug mode).
|
|
827
|
+
if args.log_level != "DEBUG":
|
|
828
|
+
Path(result_file).unlink(missing_ok=True)
|
|
829
|
+
|
|
830
|
+
# Check that each query has a time and a result size, or it failed.
|
|
831
|
+
assert len(result_sizes) == len(query_times)
|
|
832
|
+
assert len(query_times) + num_failed == len(filtered_queries)
|
|
833
|
+
|
|
834
|
+
if args.result_file:
|
|
835
|
+
if len(result_yml_query_records["queries"]) != 0:
|
|
836
|
+
outfile_name = f"{dataset}.{engine}.results.yaml"
|
|
837
|
+
outfile = Path(args.results_dir) / outfile_name
|
|
838
|
+
self.write_query_records_to_result_file(
|
|
839
|
+
query_data=result_yml_query_records,
|
|
840
|
+
out_file=outfile,
|
|
841
|
+
)
|
|
842
|
+
else:
|
|
843
|
+
log.error(
|
|
844
|
+
f"Nothing to write to output result YML file: {args.result_file}"
|
|
845
|
+
)
|
|
846
|
+
|
|
847
|
+
# Show statistics.
|
|
848
|
+
if len(query_times) > 0:
|
|
849
|
+
n = len(query_times)
|
|
850
|
+
total_query_time = sum(query_times)
|
|
851
|
+
average_query_time = total_query_time / n
|
|
852
|
+
median_query_time = sorted(query_times)[n // 2]
|
|
853
|
+
total_result_size = sum(result_sizes)
|
|
854
|
+
average_result_size = round(total_result_size / n)
|
|
855
|
+
median_result_size = sorted(result_sizes)[n // 2]
|
|
856
|
+
query_or_queries = "query" if n == 1 else "queries"
|
|
857
|
+
description = f"TOTAL for {n} {query_or_queries}"
|
|
858
|
+
log.info("")
|
|
859
|
+
log.info(
|
|
860
|
+
f"{description:<{width_query_description}} "
|
|
861
|
+
f"{total_query_time:6.2f} s "
|
|
862
|
+
f"{total_result_size:>14,}"
|
|
863
|
+
)
|
|
864
|
+
description = f"AVERAGE for {n} {query_or_queries}"
|
|
865
|
+
log.info(
|
|
866
|
+
f"{description:<{width_query_description}} "
|
|
867
|
+
f"{average_query_time:6.2f} s "
|
|
868
|
+
f"{average_result_size:>14,}"
|
|
869
|
+
)
|
|
870
|
+
description = f"MEDIAN for {n} {query_or_queries}"
|
|
871
|
+
log.info(
|
|
872
|
+
f"{description:<{width_query_description}} "
|
|
873
|
+
f"{median_query_time:6.2f} s "
|
|
874
|
+
f"{median_result_size:>14,}"
|
|
875
|
+
)
|
|
876
|
+
|
|
877
|
+
# Show number of failed queries.
|
|
878
|
+
if num_failed > 0:
|
|
879
|
+
log.info("")
|
|
880
|
+
description = "Number of FAILED queries"
|
|
881
|
+
num_failed_string = f"{num_failed:>6}"
|
|
882
|
+
if num_failed == len(filtered_queries):
|
|
883
|
+
num_failed_string += " [all]"
|
|
884
|
+
log.info(
|
|
885
|
+
colored(
|
|
886
|
+
f"{description:<{width_query_description}} "
|
|
887
|
+
f"{num_failed:>24}",
|
|
888
|
+
"red",
|
|
889
|
+
)
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
# Return success (has nothing to do with how many queries failed).
|
|
893
|
+
return True
|
|
894
|
+
|
|
895
|
+
def get_result_yml_query_record(
|
|
896
|
+
self,
|
|
897
|
+
query: str,
|
|
898
|
+
sparql: str,
|
|
899
|
+
client_time: float,
|
|
900
|
+
result: str | dict[str, str],
|
|
901
|
+
result_size: int | None,
|
|
902
|
+
max_result_size: int,
|
|
903
|
+
accept_header: str,
|
|
904
|
+
) -> dict[str, Any]:
|
|
905
|
+
"""
|
|
906
|
+
Construct a dictionary with query information for output result yaml file
|
|
907
|
+
"""
|
|
908
|
+
record = {
|
|
909
|
+
"query": query,
|
|
910
|
+
"sparql": sparql,
|
|
911
|
+
"runtime_info": {},
|
|
912
|
+
}
|
|
913
|
+
if result_size is None:
|
|
914
|
+
results = f"{result['short']}: {result['long']}"
|
|
915
|
+
headers = []
|
|
916
|
+
else:
|
|
917
|
+
record["result_size"] = result_size
|
|
918
|
+
result_size = (
|
|
919
|
+
max_result_size
|
|
920
|
+
if result_size > max_result_size
|
|
921
|
+
else result_size
|
|
922
|
+
)
|
|
923
|
+
headers, results = self.get_query_results(
|
|
924
|
+
result, result_size, accept_header
|
|
925
|
+
)
|
|
926
|
+
if accept_header == "application/qlever-results+json":
|
|
927
|
+
runtime_info_cmd = (
|
|
928
|
+
f"jq 'if .runtimeInformation then"
|
|
929
|
+
f" .runtimeInformation else"
|
|
930
|
+
f' "null" end\' {result}'
|
|
931
|
+
)
|
|
932
|
+
runtime_info_str = run_command(
|
|
933
|
+
runtime_info_cmd, return_output=True
|
|
934
|
+
)
|
|
935
|
+
if runtime_info_str != "null":
|
|
936
|
+
record["runtime_info"] = json.loads(runtime_info_str)
|
|
937
|
+
record["runtime_info"]["client_time"] = client_time
|
|
938
|
+
record["headers"] = headers
|
|
939
|
+
record["results"] = results
|
|
940
|
+
return record
|
|
941
|
+
|
|
942
|
+
def get_query_results(
|
|
943
|
+
self, result_file: str, result_size: int, accept_header: str
|
|
944
|
+
) -> tuple[list[str], list[list[str]]]:
|
|
945
|
+
"""
|
|
946
|
+
Return headers and query results as a tuple for various accept headers
|
|
947
|
+
"""
|
|
948
|
+
if accept_header in ("text/tab-separated-values", "text/csv"):
|
|
949
|
+
separator = "," if accept_header == "text/csv" else "\t"
|
|
950
|
+
get_result_cmd = f"sed -n '1,{result_size + 1}p' {result_file}"
|
|
951
|
+
results_str = run_command(get_result_cmd, return_output=True)
|
|
952
|
+
results = results_str.splitlines()
|
|
953
|
+
reader = csv.reader(StringIO(results_str), delimiter=separator)
|
|
954
|
+
headers = next(reader)
|
|
955
|
+
results = [row for row in reader]
|
|
956
|
+
return headers, results
|
|
957
|
+
|
|
958
|
+
elif accept_header == "application/qlever-results+json":
|
|
959
|
+
get_result_cmd = (
|
|
960
|
+
f"jq '{{headers: .selected, results: .res[0:{result_size}]}}' "
|
|
961
|
+
f"{result_file}"
|
|
962
|
+
)
|
|
963
|
+
results_str = run_command(get_result_cmd, return_output=True)
|
|
964
|
+
results_json = json.loads(results_str)
|
|
965
|
+
return results_json["headers"], results_json["results"]
|
|
966
|
+
|
|
967
|
+
elif accept_header == "application/sparql-results+json":
|
|
968
|
+
get_result_cmd = (
|
|
969
|
+
f"jq '{{headers: .head.vars, "
|
|
970
|
+
f"bindings: .results.bindings[0:{result_size}]}}' "
|
|
971
|
+
f"{result_file}"
|
|
972
|
+
)
|
|
973
|
+
results_str = run_command(get_result_cmd, return_output=True)
|
|
974
|
+
results_json = json.loads(results_str)
|
|
975
|
+
results = []
|
|
976
|
+
bindings = results_json.get("bindings", [])
|
|
977
|
+
for binding in bindings:
|
|
978
|
+
result = []
|
|
979
|
+
if not binding or not isinstance(binding, dict):
|
|
980
|
+
results.append([])
|
|
981
|
+
continue
|
|
982
|
+
for obj in binding.values():
|
|
983
|
+
value = '"' + obj["value"] + '"'
|
|
984
|
+
if obj["type"] == "uri":
|
|
985
|
+
value = "<" + value.strip('"') + ">"
|
|
986
|
+
elif "datatype" in obj:
|
|
987
|
+
value += "^^<" + obj["datatype"] + ">"
|
|
988
|
+
elif "xml:lang" in obj:
|
|
989
|
+
value += "@" + obj["xml:lang"]
|
|
990
|
+
result.append(value)
|
|
991
|
+
results.append(result)
|
|
992
|
+
return results_json["headers"], results
|
|
993
|
+
|
|
994
|
+
else: # text/turtle
|
|
995
|
+
graph = rdflib.Graph()
|
|
996
|
+
graph.parse(result_file, format="turtle")
|
|
997
|
+
headers = ["?subject", "?predicate", "?object"]
|
|
998
|
+
results = []
|
|
999
|
+
for i, (s, p, o) in enumerate(graph):
|
|
1000
|
+
if i >= result_size:
|
|
1001
|
+
break
|
|
1002
|
+
results.append([str(s), str(p), str(o)])
|
|
1003
|
+
return headers, results
|
|
1004
|
+
|
|
1005
|
+
@staticmethod
|
|
1006
|
+
def write_query_records_to_result_file(
|
|
1007
|
+
query_data: dict[str, list[dict[str, Any]]], out_file: Path
|
|
1008
|
+
) -> None:
|
|
1009
|
+
"""
|
|
1010
|
+
Write yaml record for all queries to output yaml file
|
|
1011
|
+
"""
|
|
1012
|
+
config_yaml = dict_to_yaml(query_data)
|
|
1013
|
+
with open(out_file, "w") as eval_yaml_file:
|
|
1014
|
+
eval_yaml_file.write(config_yaml)
|
|
1015
|
+
log.info("")
|
|
1016
|
+
log.info(
|
|
1017
|
+
f"Generated result yaml file: {out_file.stem}{out_file.suffix} "
|
|
1018
|
+
f"in the directory {out_file.parent.resolve()}"
|
|
1019
|
+
)
|