qlever 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

qlever/__init__.py CHANGED
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import sys
2
4
  from pathlib import Path
3
5
 
@@ -4,13 +4,15 @@ import re
4
4
  import shlex
5
5
  import subprocess
6
6
  import time
7
+ import traceback
8
+ from pathlib import Path
7
9
 
8
10
  from termcolor import colored
9
11
 
10
12
  from qlever.command import QleverCommand
11
13
  from qlever.commands.clear_cache import ClearCacheCommand
12
14
  from qlever.log import log, mute_log
13
- from qlever.util import run_command
15
+ from qlever.util import run_command, run_curl_command
14
16
 
15
17
 
16
18
  class ExampleQueriesCommand(QleverCommand):
@@ -57,12 +59,27 @@ class ExampleQueriesCommand(QleverCommand):
57
59
  "or just compute the size of the result")
58
60
  subparser.add_argument("--limit", type=int,
59
61
  help="Limit on the number of results")
62
+ subparser.add_argument("--accept", type=str,
63
+ choices=["text/tab-separated-values",
64
+ "application/sparql-results+json"],
65
+ default="text/tab-separated-values",
66
+ help="Accept header for the SPARQL query")
60
67
  subparser.add_argument("--clear-cache",
61
68
  choices=["yes", "no"],
62
69
  default="yes",
63
70
  help="Clear the cache before each query")
64
71
 
65
72
  def execute(self, args) -> bool:
73
+ # If `args.accept` is `application/sparql-results+json`, we need `jq`.
74
+ if args.accept == "application/sparql-results+json":
75
+ try:
76
+ subprocess.run("jq --version", shell=True, check=True,
77
+ stdout=subprocess.DEVNULL,
78
+ stderr=subprocess.DEVNULL)
79
+ except Exception as e:
80
+ log.error(f"Please install `jq` for {args.accept} ({e})")
81
+ return False
82
+
66
83
  # Handle shotcuts for SPARQL endpoint.
67
84
  if args.sparql_endpoint_preset in self.presets:
68
85
  args.sparql_endpoint = self.presets[args.sparql_endpoint_preset]
@@ -92,6 +109,7 @@ class ExampleQueriesCommand(QleverCommand):
92
109
  else f"localhost:{args.port}")
93
110
  self.show(f"Obtain queries via: {get_queries_cmd}\n"
94
111
  f"SPARQL endpoint: {sparql_endpoint}\n"
112
+ f"Accept header: {args.accept}\n"
95
113
  f"Clear cache before each query:"
96
114
  f" {args.clear_cache.upper()}\n"
97
115
  f"Download result for each query or just count:"
@@ -103,7 +121,8 @@ class ExampleQueriesCommand(QleverCommand):
103
121
 
104
122
  # Get the example queries.
105
123
  try:
106
- example_query_lines = run_command(get_queries_cmd, return_output=True)
124
+ example_query_lines = run_command(get_queries_cmd,
125
+ return_output=True)
107
126
  if len(example_query_lines) == 0:
108
127
  log.error("No example queries matching the criteria found")
109
128
  return False
@@ -114,9 +133,10 @@ class ExampleQueriesCommand(QleverCommand):
114
133
 
115
134
  # Launch the queries one after the other and for each print: the
116
135
  # description, the result size, and the query processing time.
117
- count = 0
118
136
  total_time_seconds = 0.0
119
137
  total_result_size = 0
138
+ count_succeeded = 0
139
+ count_failed = 0
120
140
  for example_query_line in example_query_lines:
121
141
  # Parse description and query.
122
142
  description, query = example_query_line.split("\t")
@@ -155,44 +175,93 @@ class ExampleQueriesCommand(QleverCommand):
155
175
  + f" }} LIMIT {args.limit}"
156
176
 
157
177
  # Launch query.
158
- query_cmd = (f"curl -sv {sparql_endpoint}"
159
- f" -H \"Accept: text/tab-separated-values\""
160
- f" --data-urlencode query={shlex.quote(query)}")
161
- if args.download_or_count == "count":
162
- query_cmd += " | sed 1d"
163
- else:
164
- query_cmd += " | sed 1d | wc -l"
165
178
  try:
166
- log.debug(query_cmd)
179
+ curl_cmd = (f"curl -s {sparql_endpoint}"
180
+ f" -w \"HTTP code: %{{http_code}}\\n\""
181
+ f" -H \"Accept: {args.accept}\""
182
+ f" --data-urlencode query={shlex.quote(query)}")
183
+ log.debug(curl_cmd)
184
+ result_file = (f"qlever.example_queries.result."
185
+ f"{abs(hash(curl_cmd))}.tmp")
167
186
  start_time = time.time()
168
- result_size = run_command(query_cmd, return_output=True)
169
- result_size = int(result_size.strip())
187
+ http_code = run_curl_command(sparql_endpoint,
188
+ headers={"Accept": args.accept},
189
+ params={"query": query},
190
+ result_file=result_file).strip()
191
+ if http_code != "200":
192
+ raise Exception(f"HTTP code {http_code}"
193
+ f" {Path(result_file).read_text()}")
170
194
  time_seconds = time.time() - start_time
171
- time_string = f"{time_seconds:.2f}"
172
- result_string = f"{result_size:>14,}"
195
+ error_msg = None
173
196
  except Exception as e:
174
- time_seconds = 0.0
175
- time_string = "---"
176
- result_size = 0
177
- result_string = colored(f" FAILED {e}", "red")
197
+ if args.log_level == "DEBUG":
198
+ traceback.print_exc()
199
+ error_msg = re.sub(r"\s+", " ", str(e))
200
+
201
+ # Get result size (via the command line, in order to avoid loading
202
+ # a potentially large JSON file into Python, which is slow).
203
+ if error_msg is None:
204
+ try:
205
+ if args.download_or_count == "count":
206
+ if args.accept == "text/tab-separated-values":
207
+ result_size = run_command(
208
+ f"sed 1d {result_file}",
209
+ return_output=True)
210
+ else:
211
+ result_size = run_command(
212
+ f"jq -r \".results.bindings[0]"
213
+ f" | to_entries[0].value.value"
214
+ f" | tonumber\" {result_file}",
215
+ return_output=True)
216
+ else:
217
+ if args.accept == "text/tab-separated-values":
218
+ result_size = run_command(
219
+ f"sed 1d {result_file} | wc -l",
220
+ return_output=True)
221
+ else:
222
+ result_size = run_command(
223
+ f"jq -r \".results.bindings | length\""
224
+ f" {result_file}",
225
+ return_output=True)
226
+ result_size = int(result_size)
227
+ except Exception as e:
228
+ error_msg = str(e)
178
229
 
179
230
  # Print description, time, result in tabular form.
180
231
  if (len(description) > 60):
181
232
  description = description[:57] + "..."
182
- log.info(f"{description:<60} {time_string:>6} s "
183
- f"{result_string}")
184
- count += 1
185
- total_time_seconds += time_seconds
186
- total_result_size += result_size
233
+ if error_msg is None:
234
+ log.info(f"{description:<60} {time_seconds:6.2f} s "
235
+ f"{result_size:14,}")
236
+ count_succeeded += 1
237
+ total_time_seconds += time_seconds
238
+ total_result_size += result_size
239
+ else:
240
+ count_failed += 1
241
+ if (len(error_msg) > 60) and args.log_level != "DEBUG":
242
+ error_msg = error_msg[:57] + "..."
243
+ log.error(f"{description:<60} failed "
244
+ f"{colored(error_msg, 'red')}")
187
245
 
188
246
  # Print total time.
189
247
  log.info("")
190
- description = (f"TOTAL for {count} "
191
- f"{'query' if count == 1 else 'queries'}")
192
- log.info(f"{description:<60} {total_time_seconds:6.2f} s "
193
- f"{total_result_size:>14,}")
194
- description = (f"AVERAGE for {count} "
195
- f"{'query' if count == 1 else 'queries'}")
196
- log.info(f"{description:<60} {total_time_seconds / count:6.2f} s "
197
- f"{round(total_result_size / count):>14,}")
248
+ if count_succeeded > 0:
249
+ query_or_queries = "query" if count_succeeded == 1 else "queries"
250
+ description = (f"TOTAL for {count_succeeded} {query_or_queries}")
251
+ log.info(f"{description:<60} "
252
+ f"{total_time_seconds:6.2f} s "
253
+ f"{total_result_size:>14,}")
254
+ description = (f"AVERAGE for {count_succeeded} {query_or_queries}")
255
+ log.info(f"{description:<60} "
256
+ f"{total_time_seconds / count_succeeded:6.2f} s "
257
+ f"{round(total_result_size / count_succeeded):>14,}")
258
+ else:
259
+ if count_failed == 1:
260
+ log.info(colored("One query failed", "red"))
261
+ elif count_failed > 1:
262
+ log.info(colored("All queries failed", "red"))
263
+
264
+ # Return success (has nothing to do with how many queries failed).
265
+ if args.log_level != "DEBUG":
266
+ Path(result_file).unlink(missing_ok=True)
198
267
  return True
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import shlex
2
4
  import subprocess
3
5
 
@@ -71,14 +71,17 @@ class IndexStatsCommand(QleverCommand):
71
71
 
72
72
  # Helper function that finds the next line matching the given `regex`,
73
73
  # starting from `current_line`, and extracts the time. Returns a tuple
74
- # of the time and the regex match object. If a match is found,
75
- # `current_line` is updated to the line after the match. Otherwise,
76
- # `current_line` will be one beyond the last line, unless
77
- # `line_is_optional` is true, in which case it will be the same as when
78
- # the function was entered.
74
+ # of the time and the regex match object.
75
+ #
76
+ # If `update_current_line` is `False`, then `current_line` will not be
77
+ # updated by this call.
78
+ #
79
+ # Otherwise, and this is the default behavior, `current_line` will be
80
+ # updated to the line after the first match, or one beyond the last
81
+ # line if no match is found.
79
82
  current_line = 0
80
83
 
81
- def find_next_line(regex, line_is_optional=False):
84
+ def find_next_line(regex, update_current_line=True):
82
85
  nonlocal lines
83
86
  nonlocal current_line
84
87
  current_line_backup = current_line
@@ -99,7 +102,7 @@ class IndexStatsCommand(QleverCommand):
99
102
  f"\"{timestamp_regex}\" from line "
100
103
  f" \"{line.rstrip()}\" ({e})")
101
104
  # If we get here, we did not find a matching line.
102
- if line_is_optional:
105
+ if not update_current_line:
103
106
  current_line = current_line_backup
104
107
  return None, None
105
108
 
@@ -110,24 +113,34 @@ class IndexStatsCommand(QleverCommand):
110
113
  convert_begin, _ = find_next_line(r"INFO:\s*Converting triples")
111
114
  perm_begin_and_info = []
112
115
  while True:
113
- perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair", True)
116
+ # Find the next line that starts a permutation.
117
+ #
118
+ # NOTE: Should work for the old and new format of the index log
119
+ # file (old format: "Creating a pair" + names of permutations in
120
+ # line "Writing meta data for ..."; new format: name of
121
+ # permutations already in line "Creating permutations ...").
122
+ perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair",
123
+ update_current_line=False)
114
124
  if perm_begin is None:
125
+ perm_begin, perm_info = find_next_line(
126
+ r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)",
127
+ update_current_line=False)
128
+ else:
129
+ _, perm_info = find_next_line(
130
+ r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)",
131
+ update_current_line=False)
132
+ if perm_info is None:
115
133
  break
116
- _, perm_info = find_next_line(r"INFO:\s*Writing meta data for"
117
- r" ([A-Z]+ and [A-Z]+)", True)
118
- # if perm_info is None:
119
- # break
120
134
  perm_begin_and_info.append((perm_begin, perm_info))
121
135
  convert_end = (perm_begin_and_info[0][0] if
122
136
  len(perm_begin_and_info) > 0 else None)
123
137
  normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
124
- text_begin, _ = find_next_line(r"INFO:\s*Adding text index", True)
125
- text_end, _ = find_next_line(r"INFO:\s*Text index build comp", True)
138
+ text_begin, _ = find_next_line(r"INFO:\s*Adding text index",
139
+ update_current_line=False)
140
+ text_end, _ = find_next_line(r"INFO:\s*Text index build comp",
141
+ update_current_line=False)
126
142
  if args.ignore_text_index:
127
143
  text_begin = text_end = None
128
- # print("DEBUG:", len(perm_begin_and_info), perm_begin_and_info)
129
- # print("DEBUG:", overall_begin)
130
- # print("DEBUG:", normal_end)
131
144
 
132
145
  # Check whether at least the first phase is done.
133
146
  if overall_begin is None:
@@ -1,3 +1,5 @@
1
+ from __future__ import annotations
2
+
1
3
  import subprocess
2
4
  from pathlib import Path
3
5
 
qlever/log.py CHANGED
@@ -1,6 +1,4 @@
1
- # Copyright 2024, University of Freiburg,
2
- # Chair of Algorithms and Data Structures
3
- # Author: Hannah Bast <bast@cs.uni-freiburg.de>
1
+ from __future__ import annotations
4
2
 
5
3
  import logging
6
4
  from contextlib import contextmanager
qlever/qlever_main.py CHANGED
@@ -5,6 +5,8 @@
5
5
  # Chair of Algorithms and Data Structures
6
6
  # Author: Hannah Bast <bast@cs.uni-freiburg.de>
7
7
 
8
+ from __future__ import annotations
9
+
8
10
  import re
9
11
  import traceback
10
12