qlever 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/__init__.py +2 -0
- qlever/commands/example_queries.py +101 -32
- qlever/commands/get_data.py +2 -0
- qlever/commands/index_stats.py +30 -17
- qlever/commands/setup_config.py +2 -0
- qlever/log.py +1 -3
- qlever/qlever_main.py +2 -0
- qlever/qlever_old.py +1476 -0
- qlever/util.py +41 -26
- qlever-0.4.3.dist-info/METADATA +100 -0
- {qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/RECORD +15 -14
- {qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/entry_points.txt +1 -1
- qlever-0.4.1.dist-info/METADATA +0 -301
- {qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/LICENSE +0 -0
- {qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/WHEEL +0 -0
- {qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/top_level.txt +0 -0
qlever/__init__.py
CHANGED
|
@@ -4,13 +4,15 @@ import re
|
|
|
4
4
|
import shlex
|
|
5
5
|
import subprocess
|
|
6
6
|
import time
|
|
7
|
+
import traceback
|
|
8
|
+
from pathlib import Path
|
|
7
9
|
|
|
8
10
|
from termcolor import colored
|
|
9
11
|
|
|
10
12
|
from qlever.command import QleverCommand
|
|
11
13
|
from qlever.commands.clear_cache import ClearCacheCommand
|
|
12
14
|
from qlever.log import log, mute_log
|
|
13
|
-
from qlever.util import run_command
|
|
15
|
+
from qlever.util import run_command, run_curl_command
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class ExampleQueriesCommand(QleverCommand):
|
|
@@ -57,12 +59,27 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
57
59
|
"or just compute the size of the result")
|
|
58
60
|
subparser.add_argument("--limit", type=int,
|
|
59
61
|
help="Limit on the number of results")
|
|
62
|
+
subparser.add_argument("--accept", type=str,
|
|
63
|
+
choices=["text/tab-separated-values",
|
|
64
|
+
"application/sparql-results+json"],
|
|
65
|
+
default="text/tab-separated-values",
|
|
66
|
+
help="Accept header for the SPARQL query")
|
|
60
67
|
subparser.add_argument("--clear-cache",
|
|
61
68
|
choices=["yes", "no"],
|
|
62
69
|
default="yes",
|
|
63
70
|
help="Clear the cache before each query")
|
|
64
71
|
|
|
65
72
|
def execute(self, args) -> bool:
|
|
73
|
+
# If `args.accept` is `application/sparql-results+json`, we need `jq`.
|
|
74
|
+
if args.accept == "application/sparql-results+json":
|
|
75
|
+
try:
|
|
76
|
+
subprocess.run("jq --version", shell=True, check=True,
|
|
77
|
+
stdout=subprocess.DEVNULL,
|
|
78
|
+
stderr=subprocess.DEVNULL)
|
|
79
|
+
except Exception as e:
|
|
80
|
+
log.error(f"Please install `jq` for {args.accept} ({e})")
|
|
81
|
+
return False
|
|
82
|
+
|
|
66
83
|
# Handle shotcuts for SPARQL endpoint.
|
|
67
84
|
if args.sparql_endpoint_preset in self.presets:
|
|
68
85
|
args.sparql_endpoint = self.presets[args.sparql_endpoint_preset]
|
|
@@ -92,6 +109,7 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
92
109
|
else f"localhost:{args.port}")
|
|
93
110
|
self.show(f"Obtain queries via: {get_queries_cmd}\n"
|
|
94
111
|
f"SPARQL endpoint: {sparql_endpoint}\n"
|
|
112
|
+
f"Accept header: {args.accept}\n"
|
|
95
113
|
f"Clear cache before each query:"
|
|
96
114
|
f" {args.clear_cache.upper()}\n"
|
|
97
115
|
f"Download result for each query or just count:"
|
|
@@ -103,7 +121,8 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
103
121
|
|
|
104
122
|
# Get the example queries.
|
|
105
123
|
try:
|
|
106
|
-
example_query_lines = run_command(get_queries_cmd,
|
|
124
|
+
example_query_lines = run_command(get_queries_cmd,
|
|
125
|
+
return_output=True)
|
|
107
126
|
if len(example_query_lines) == 0:
|
|
108
127
|
log.error("No example queries matching the criteria found")
|
|
109
128
|
return False
|
|
@@ -114,9 +133,10 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
114
133
|
|
|
115
134
|
# Launch the queries one after the other and for each print: the
|
|
116
135
|
# description, the result size, and the query processing time.
|
|
117
|
-
count = 0
|
|
118
136
|
total_time_seconds = 0.0
|
|
119
137
|
total_result_size = 0
|
|
138
|
+
count_succeeded = 0
|
|
139
|
+
count_failed = 0
|
|
120
140
|
for example_query_line in example_query_lines:
|
|
121
141
|
# Parse description and query.
|
|
122
142
|
description, query = example_query_line.split("\t")
|
|
@@ -155,44 +175,93 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
155
175
|
+ f" }} LIMIT {args.limit}"
|
|
156
176
|
|
|
157
177
|
# Launch query.
|
|
158
|
-
query_cmd = (f"curl -sv {sparql_endpoint}"
|
|
159
|
-
f" -H \"Accept: text/tab-separated-values\""
|
|
160
|
-
f" --data-urlencode query={shlex.quote(query)}")
|
|
161
|
-
if args.download_or_count == "count":
|
|
162
|
-
query_cmd += " | sed 1d"
|
|
163
|
-
else:
|
|
164
|
-
query_cmd += " | sed 1d | wc -l"
|
|
165
178
|
try:
|
|
166
|
-
|
|
179
|
+
curl_cmd = (f"curl -s {sparql_endpoint}"
|
|
180
|
+
f" -w \"HTTP code: %{{http_code}}\\n\""
|
|
181
|
+
f" -H \"Accept: {args.accept}\""
|
|
182
|
+
f" --data-urlencode query={shlex.quote(query)}")
|
|
183
|
+
log.debug(curl_cmd)
|
|
184
|
+
result_file = (f"qlever.example_queries.result."
|
|
185
|
+
f"{abs(hash(curl_cmd))}.tmp")
|
|
167
186
|
start_time = time.time()
|
|
168
|
-
|
|
169
|
-
|
|
187
|
+
http_code = run_curl_command(sparql_endpoint,
|
|
188
|
+
headers={"Accept": args.accept},
|
|
189
|
+
params={"query": query},
|
|
190
|
+
result_file=result_file).strip()
|
|
191
|
+
if http_code != "200":
|
|
192
|
+
raise Exception(f"HTTP code {http_code}"
|
|
193
|
+
f" {Path(result_file).read_text()}")
|
|
170
194
|
time_seconds = time.time() - start_time
|
|
171
|
-
|
|
172
|
-
result_string = f"{result_size:>14,}"
|
|
195
|
+
error_msg = None
|
|
173
196
|
except Exception as e:
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
197
|
+
if args.log_level == "DEBUG":
|
|
198
|
+
traceback.print_exc()
|
|
199
|
+
error_msg = re.sub(r"\s+", " ", str(e))
|
|
200
|
+
|
|
201
|
+
# Get result size (via the command line, in order to avoid loading
|
|
202
|
+
# a potentially large JSON file into Python, which is slow).
|
|
203
|
+
if error_msg is None:
|
|
204
|
+
try:
|
|
205
|
+
if args.download_or_count == "count":
|
|
206
|
+
if args.accept == "text/tab-separated-values":
|
|
207
|
+
result_size = run_command(
|
|
208
|
+
f"sed 1d {result_file}",
|
|
209
|
+
return_output=True)
|
|
210
|
+
else:
|
|
211
|
+
result_size = run_command(
|
|
212
|
+
f"jq -r \".results.bindings[0]"
|
|
213
|
+
f" | to_entries[0].value.value"
|
|
214
|
+
f" | tonumber\" {result_file}",
|
|
215
|
+
return_output=True)
|
|
216
|
+
else:
|
|
217
|
+
if args.accept == "text/tab-separated-values":
|
|
218
|
+
result_size = run_command(
|
|
219
|
+
f"sed 1d {result_file} | wc -l",
|
|
220
|
+
return_output=True)
|
|
221
|
+
else:
|
|
222
|
+
result_size = run_command(
|
|
223
|
+
f"jq -r \".results.bindings | length\""
|
|
224
|
+
f" {result_file}",
|
|
225
|
+
return_output=True)
|
|
226
|
+
result_size = int(result_size)
|
|
227
|
+
except Exception as e:
|
|
228
|
+
error_msg = str(e)
|
|
178
229
|
|
|
179
230
|
# Print description, time, result in tabular form.
|
|
180
231
|
if (len(description) > 60):
|
|
181
232
|
description = description[:57] + "..."
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
233
|
+
if error_msg is None:
|
|
234
|
+
log.info(f"{description:<60} {time_seconds:6.2f} s "
|
|
235
|
+
f"{result_size:14,}")
|
|
236
|
+
count_succeeded += 1
|
|
237
|
+
total_time_seconds += time_seconds
|
|
238
|
+
total_result_size += result_size
|
|
239
|
+
else:
|
|
240
|
+
count_failed += 1
|
|
241
|
+
if (len(error_msg) > 60) and args.log_level != "DEBUG":
|
|
242
|
+
error_msg = error_msg[:57] + "..."
|
|
243
|
+
log.error(f"{description:<60} failed "
|
|
244
|
+
f"{colored(error_msg, 'red')}")
|
|
187
245
|
|
|
188
246
|
# Print total time.
|
|
189
247
|
log.info("")
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
248
|
+
if count_succeeded > 0:
|
|
249
|
+
query_or_queries = "query" if count_succeeded == 1 else "queries"
|
|
250
|
+
description = (f"TOTAL for {count_succeeded} {query_or_queries}")
|
|
251
|
+
log.info(f"{description:<60} "
|
|
252
|
+
f"{total_time_seconds:6.2f} s "
|
|
253
|
+
f"{total_result_size:>14,}")
|
|
254
|
+
description = (f"AVERAGE for {count_succeeded} {query_or_queries}")
|
|
255
|
+
log.info(f"{description:<60} "
|
|
256
|
+
f"{total_time_seconds / count_succeeded:6.2f} s "
|
|
257
|
+
f"{round(total_result_size / count_succeeded):>14,}")
|
|
258
|
+
else:
|
|
259
|
+
if count_failed == 1:
|
|
260
|
+
log.info(colored("One query failed", "red"))
|
|
261
|
+
elif count_failed > 1:
|
|
262
|
+
log.info(colored("All queries failed", "red"))
|
|
263
|
+
|
|
264
|
+
# Return success (has nothing to do with how many queries failed).
|
|
265
|
+
if args.log_level != "DEBUG":
|
|
266
|
+
Path(result_file).unlink(missing_ok=True)
|
|
198
267
|
return True
|
qlever/commands/get_data.py
CHANGED
qlever/commands/index_stats.py
CHANGED
|
@@ -71,14 +71,17 @@ class IndexStatsCommand(QleverCommand):
|
|
|
71
71
|
|
|
72
72
|
# Helper function that finds the next line matching the given `regex`,
|
|
73
73
|
# starting from `current_line`, and extracts the time. Returns a tuple
|
|
74
|
-
# of the time and the regex match object.
|
|
75
|
-
#
|
|
76
|
-
# `
|
|
77
|
-
#
|
|
78
|
-
#
|
|
74
|
+
# of the time and the regex match object.
|
|
75
|
+
#
|
|
76
|
+
# If `update_current_line` is `False`, then `current_line` will not be
|
|
77
|
+
# updated by this call.
|
|
78
|
+
#
|
|
79
|
+
# Otherwise, and this is the default behavior, `current_line` will be
|
|
80
|
+
# updated to the line after the first match, or one beyond the last
|
|
81
|
+
# line if no match is found.
|
|
79
82
|
current_line = 0
|
|
80
83
|
|
|
81
|
-
def find_next_line(regex,
|
|
84
|
+
def find_next_line(regex, update_current_line=True):
|
|
82
85
|
nonlocal lines
|
|
83
86
|
nonlocal current_line
|
|
84
87
|
current_line_backup = current_line
|
|
@@ -99,7 +102,7 @@ class IndexStatsCommand(QleverCommand):
|
|
|
99
102
|
f"\"{timestamp_regex}\" from line "
|
|
100
103
|
f" \"{line.rstrip()}\" ({e})")
|
|
101
104
|
# If we get here, we did not find a matching line.
|
|
102
|
-
if
|
|
105
|
+
if not update_current_line:
|
|
103
106
|
current_line = current_line_backup
|
|
104
107
|
return None, None
|
|
105
108
|
|
|
@@ -110,24 +113,34 @@ class IndexStatsCommand(QleverCommand):
|
|
|
110
113
|
convert_begin, _ = find_next_line(r"INFO:\s*Converting triples")
|
|
111
114
|
perm_begin_and_info = []
|
|
112
115
|
while True:
|
|
113
|
-
|
|
116
|
+
# Find the next line that starts a permutation.
|
|
117
|
+
#
|
|
118
|
+
# NOTE: Should work for the old and new format of the index log
|
|
119
|
+
# file (old format: "Creating a pair" + names of permutations in
|
|
120
|
+
# line "Writing meta data for ..."; new format: name of
|
|
121
|
+
# permutations already in line "Creating permutations ...").
|
|
122
|
+
perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair",
|
|
123
|
+
update_current_line=False)
|
|
114
124
|
if perm_begin is None:
|
|
125
|
+
perm_begin, perm_info = find_next_line(
|
|
126
|
+
r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)",
|
|
127
|
+
update_current_line=False)
|
|
128
|
+
else:
|
|
129
|
+
_, perm_info = find_next_line(
|
|
130
|
+
r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)",
|
|
131
|
+
update_current_line=False)
|
|
132
|
+
if perm_info is None:
|
|
115
133
|
break
|
|
116
|
-
_, perm_info = find_next_line(r"INFO:\s*Writing meta data for"
|
|
117
|
-
r" ([A-Z]+ and [A-Z]+)", True)
|
|
118
|
-
# if perm_info is None:
|
|
119
|
-
# break
|
|
120
134
|
perm_begin_and_info.append((perm_begin, perm_info))
|
|
121
135
|
convert_end = (perm_begin_and_info[0][0] if
|
|
122
136
|
len(perm_begin_and_info) > 0 else None)
|
|
123
137
|
normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
|
|
124
|
-
text_begin, _ = find_next_line(r"INFO:\s*Adding text index",
|
|
125
|
-
|
|
138
|
+
text_begin, _ = find_next_line(r"INFO:\s*Adding text index",
|
|
139
|
+
update_current_line=False)
|
|
140
|
+
text_end, _ = find_next_line(r"INFO:\s*Text index build comp",
|
|
141
|
+
update_current_line=False)
|
|
126
142
|
if args.ignore_text_index:
|
|
127
143
|
text_begin = text_end = None
|
|
128
|
-
# print("DEBUG:", len(perm_begin_and_info), perm_begin_and_info)
|
|
129
|
-
# print("DEBUG:", overall_begin)
|
|
130
|
-
# print("DEBUG:", normal_end)
|
|
131
144
|
|
|
132
145
|
# Check whether at least the first phase is done.
|
|
133
146
|
if overall_begin is None:
|
qlever/commands/setup_config.py
CHANGED
qlever/log.py
CHANGED