qlever 0.5.23__py3-none-any.whl → 0.5.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

@@ -56,6 +56,8 @@ class CacheStatsCommand(QleverCommand):
56
56
  shell=True)
57
57
  cache_stats_dict = json.loads(cache_stats)
58
58
  cache_settings_dict = json.loads(cache_settings)
59
+ if isinstance(cache_settings_dict, list):
60
+ cache_settings_dict = cache_settings_dict[0]
59
61
  except Exception as e:
60
62
  log.error(f"Failed to get cache stats and settings: {e}")
61
63
  return False
qlever/commands/index.py CHANGED
@@ -39,6 +39,7 @@ class IndexCommand(QleverCommand):
39
39
  "multi_input_json",
40
40
  "parallel_parsing",
41
41
  "settings_json",
42
+ "vocabulary_type",
42
43
  "index_binary",
43
44
  "only_pso_and_pos_permutations",
44
45
  "ulimit",
@@ -184,6 +185,7 @@ class IndexCommand(QleverCommand):
184
185
  index_cmd = (
185
186
  f"{args.cat_input_files} | {args.index_binary}"
186
187
  f" -i {args.name} -s {args.name}.settings.json"
188
+ f" --vocabulary-type {args.vocabulary_type}"
187
189
  f" -F {args.format} -f -"
188
190
  )
189
191
  if args.parallel_parsing:
@@ -199,6 +201,7 @@ class IndexCommand(QleverCommand):
199
201
  index_cmd = (
200
202
  f"{args.index_binary}"
201
203
  f" -i {args.name} -s {args.name}.settings.json"
204
+ f" --vocabulary-type {args.vocabulary_type}"
202
205
  f" {input_options}"
203
206
  )
204
207
  else:
qlever/commands/query.py CHANGED
@@ -72,6 +72,7 @@ class QueryCommand(QleverCommand):
72
72
  "application/sparql-results+json",
73
73
  "application/sparql-results+xml",
74
74
  "application/qlever-results+json",
75
+ "application/octet-stream",
75
76
  ],
76
77
  default="text/tab-separated-values",
77
78
  help="Accept header for the SPARQL query",
@@ -34,6 +34,8 @@ class SettingsCommand(QleverCommand):
34
34
  "cache-max-size-single-entry",
35
35
  "cache-service-results",
36
36
  "default-query-timeout",
37
+ "division-by-zero-is-undef",
38
+ "enable-prefilter-on-index-scans",
37
39
  "group-by-disable-index-scan-optimizations",
38
40
  "group-by-hash-map-enabled",
39
41
  "lazy-index-scan-max-size-materialization",
@@ -44,6 +46,9 @@ class SettingsCommand(QleverCommand):
44
46
  "request-body-limit",
45
47
  "service-max-value-rows",
46
48
  "sort-estimate-cancellation-factor",
49
+ "spatial-join-prefilter-max-size",
50
+ "spatial-join-max-num-threads",
51
+ "syntax-test-mode",
47
52
  "throw-on-unbound-variables",
48
53
  "use-binsearch-transitive-path",
49
54
  ]
@@ -97,6 +102,8 @@ class SettingsCommand(QleverCommand):
97
102
  try:
98
103
  settings_json = run_command(curl_cmd, return_output=True)
99
104
  settings_dict = json.loads(settings_json)
105
+ if isinstance(settings_dict, list):
106
+ settings_dict = settings_dict[0]
100
107
  except Exception as e:
101
108
  log.error(f"setting command failed: {e}")
102
109
  return False
qlever/commands/ui.py CHANGED
@@ -13,13 +13,16 @@ from qlever.util import is_port_used, run_command
13
13
 
14
14
  # Return a YAML string for the given dictionary. Format values with
15
15
  # newlines using the "|" style.
16
- def dict_to_yaml(dictionary):
17
- # Custom representer for yaml, which uses the "|" style only for
18
- # multiline strings.
19
- #
20
- # NOTE: We replace all `\r\n` with `\n` because otherwise the `|` style
21
- # does not work as expected.
22
- class MultiLineDumper(yaml.Dumper):
16
+ def dict_to_yaml(dictionary: dict) -> str:
17
+ """
18
+ Custom representer for yaml, which uses the "|" style only for
19
+ multiline strings.
20
+
21
+ NOTE: We replace all `\r\n` with `\n` because otherwise the `|` style
22
+ does not work as expected.
23
+ """
24
+
25
+ class MultiLineDumper(yaml.SafeDumper):
23
26
  def represent_scalar(self, tag, value, style=None):
24
27
  value = value.replace("\r\n", "\n")
25
28
  if isinstance(value, str) and "\n" in value:
@@ -30,6 +33,7 @@ def dict_to_yaml(dictionary):
30
33
  return yaml.dump(
31
34
  dictionary,
32
35
  sort_keys=False,
36
+ allow_unicode=True,
33
37
  Dumper=MultiLineDumper,
34
38
  )
35
39
 
@@ -0,0 +1,551 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ import signal
6
+ import time
7
+ from datetime import datetime, timezone
8
+
9
+ import rdflib.term
10
+ import requests
11
+ import requests_sse
12
+ from rdflib import Graph
13
+ from termcolor import colored
14
+
15
+ from qlever.command import QleverCommand
16
+ from qlever.log import log
17
+ from qlever.util import run_command
18
+
19
+
20
+ # Monkey patch `rdflib.term._castLexicalToPython` to avoid casting of literals
21
+ # to Python types. We do not need it (all we want it convert Turtle to N-Triples),
22
+ # and we can speed up parsing by a factor of about 2.
23
+ def custom_cast_lexical_to_python(lexical, datatype):
24
+ return None # Your desired behavior
25
+
26
+
27
+ rdflib.term._castLexicalToPython = custom_cast_lexical_to_python
28
+
29
+
30
+ class UpdateWikidataCommand(QleverCommand):
31
+ """
32
+ Class for executing the `update` command.
33
+ """
34
+
35
+ def __init__(self):
36
+ # SPARQL query to get the date until which the updates of the
37
+ # SPARQL endpoint are complete.
38
+ self.sparql_updates_complete_until_query = (
39
+ "PREFIX wikibase: <http://wikiba.se/ontology#> "
40
+ "PREFIX schema: <http://schema.org/> "
41
+ "SELECT * WHERE { "
42
+ "{ SELECT (MIN(?date_modified) AS ?updates_complete_until) { "
43
+ "wikibase:Dump schema:dateModified ?date_modified } } "
44
+ "UNION { wikibase:Dump wikibase:updatesCompleteUntil ?updates_complete_until } "
45
+ "} ORDER BY DESC(?updates_complete_until) LIMIT 1"
46
+ )
47
+ # URL of the Wikidata SSE stream.
48
+ self.wikidata_update_stream_url = (
49
+ "https://stream.wikimedia.org/v2/"
50
+ "stream/rdf-streaming-updater.mutation.v2"
51
+ )
52
+ # Remember if Ctrl+C was pressed, so we can handle it gracefully.
53
+ self.ctrl_c_pressed = False
54
+
55
+ def description(self) -> str:
56
+ return "Update from given SSE stream"
57
+
58
+ def should_have_qleverfile(self) -> bool:
59
+ return True
60
+
61
+ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
62
+ return {"server": ["host_name", "port", "access_token"]}
63
+
64
+ def additional_arguments(self, subparser) -> None:
65
+ subparser.add_argument(
66
+ "sse_stream_url",
67
+ nargs="?",
68
+ type=str,
69
+ default=self.wikidata_update_stream_url,
70
+ help="URL of the SSE stream to update from",
71
+ )
72
+ subparser.add_argument(
73
+ "--batch-size",
74
+ type=int,
75
+ default=100000,
76
+ help="Group this many messages together into one update "
77
+ "(default: one update for each message); NOTE: this simply "
78
+ "concatenates the `rdf_added_data` and `rdf_deleted_data` fields, "
79
+ "which is not 100%% correct; as soon as chaining is supported, "
80
+ "this will be fixed",
81
+ )
82
+ subparser.add_argument(
83
+ "--lag-seconds",
84
+ type=int,
85
+ default=1,
86
+ help="When a message is encountered that is within this many "
87
+ "seconds of the current time, finish the current batch "
88
+ "(and show a warning that this happened)",
89
+ )
90
+ subparser.add_argument(
91
+ "--since",
92
+ type=str,
93
+ help="Consume stream messages since this date "
94
+ "(default: determine automatically from the SPARQL endpoint)",
95
+ )
96
+ subparser.add_argument(
97
+ "--topics",
98
+ type=str,
99
+ default="eqiad.rdf-streaming-updater.mutation",
100
+ help="Comma-separated list of topics to consume from the SSE stream"
101
+ " (default: only eqiad.rdf-streaming-updater.mutation)",
102
+ )
103
+ subparser.add_argument(
104
+ "--min-or-max-date",
105
+ choices=["min", "max"],
106
+ default="max",
107
+ help="Use the minimum or maximum date of the batch for the "
108
+ "`updatesCompleteUntil` property (default: maximum)",
109
+ )
110
+ subparser.add_argument(
111
+ "--wait-between-batches",
112
+ type=int,
113
+ default=3600,
114
+ help="Wait this many seconds between batches that were "
115
+ "finished due to a message that is within `lag_seconds` of "
116
+ "the current time (default: 3600s)",
117
+ )
118
+
119
+ # Handle Ctrl+C gracefully by finishing the current batch and then exiting.
120
+ def handle_ctrl_c(self, signal_received, frame):
121
+ if self.ctrl_c_pressed:
122
+ log.warn("\rCtrl+C pressed again, undoing the previous Ctrl+C")
123
+ self.ctrl_c_pressed = False
124
+ else:
125
+ self.ctrl_c_pressed = True
126
+ log.warn(
127
+ "\rCtrl+C pressed, will finish the current batch and then exit"
128
+ " [press Ctrl+C again to continue]"
129
+ )
130
+
131
+ def execute(self, args) -> bool:
132
+ # cURL command to get the date until which the updates of the
133
+ # SPARQL endpoint are complete.
134
+ sparql_endpoint = f"http://{args.host_name}:{args.port}"
135
+ curl_cmd_updates_complete_until = (
136
+ f"curl -s {sparql_endpoint}"
137
+ f' -H "Accept: text/csv"'
138
+ f' -H "Content-type: application/sparql-query"'
139
+ f' --data "{self.sparql_updates_complete_until_query}"'
140
+ )
141
+
142
+ # Construct the command and show it.
143
+ lag_seconds_str = (
144
+ f"{args.lag_seconds} second{'s' if args.lag_seconds > 1 else ''}"
145
+ )
146
+ cmd_description = []
147
+ if args.since:
148
+ cmd_description.append(f"SINCE={args.since}")
149
+ else:
150
+ cmd_description.append(
151
+ f"SINCE=$({curl_cmd_updates_complete_until} | sed 1d)"
152
+ )
153
+ cmd_description.append(
154
+ f"Process SSE stream from {args.sse_stream_url}?since=$SINCE "
155
+ f"in batches of {args.batch_size:,} messages "
156
+ f"(less if a message is encountered that is within "
157
+ f"{lag_seconds_str} of the current time)"
158
+ )
159
+ self.show("\n".join(cmd_description), only_show=args.show)
160
+ if args.show:
161
+ return True
162
+
163
+ # Compute the `since` date if not given.
164
+ if not args.since:
165
+ try:
166
+ args.since = run_command(
167
+ f"{curl_cmd_updates_complete_until} | sed 1d",
168
+ return_output=True,
169
+ ).strip()
170
+ except Exception as e:
171
+ log.error(
172
+ f"Error running `{curl_cmd_updates_complete_until}`: {e}"
173
+ )
174
+ return False
175
+
176
+ # Special handling of Ctrl+C, see `handle_ctrl_c` above.
177
+ signal.signal(signal.SIGINT, self.handle_ctrl_c)
178
+ log.warn(
179
+ "Press Ctrl+C to finish the current batch and end gracefully, "
180
+ "press Ctrl+C again to continue with the next batch"
181
+ )
182
+ log.info("")
183
+ log.info(f"SINCE={args.since}")
184
+ log.info("")
185
+ args.sse_stream_url = f"{args.sse_stream_url}?since={args.since}"
186
+
187
+ # Initialize the SSE stream and all the statistics variables.
188
+ source = requests_sse.EventSource(
189
+ args.sse_stream_url,
190
+ headers={
191
+ "Accept": "text/event-stream",
192
+ "User-Agent": "qlever update-wikidata",
193
+ },
194
+ )
195
+ source.connect()
196
+ current_batch_size = 0
197
+ batch_count = 0
198
+ total_num_ops = 0
199
+ total_time_s = 0
200
+ start_time = time.perf_counter()
201
+ topics_to_consider = set(args.topics.split(","))
202
+ wait_before_next_batch = False
203
+
204
+ # Iterating over all messages in the stream.
205
+ for event in source:
206
+ # Beginning of a new batch of messages.
207
+ if current_batch_size == 0:
208
+ date_list = []
209
+ delta_to_now_list = []
210
+ batch_assembly_start_time = time.perf_counter()
211
+ insert_triples = set()
212
+ delete_triples = set()
213
+ if wait_before_next_batch:
214
+ log.info(
215
+ f"Waiting {args.wait_between_batches} "
216
+ f"second{'s' if args.wait_between_batches > 1 else ''} "
217
+ f"before processing the next batch"
218
+ )
219
+ log.info("")
220
+ time.sleep(args.wait_between_batches)
221
+ wait_before_next_batch = False
222
+
223
+ # Check if the `args.batch_size` is reached (note that we come here
224
+ # after a `continue` due to an error).
225
+ if self.ctrl_c_pressed:
226
+ break
227
+
228
+ # Process the message. Skip messages that are not of type `message`
229
+ # (should not happen), have no field `data` (should not happen
230
+ # either), or where the topic is not in `args.topics`.
231
+ if event.type != "message" or not event.data:
232
+ continue
233
+ event_data = json.loads(event.data)
234
+ topic = event_data.get("meta").get("topic")
235
+ if topic not in topics_to_consider:
236
+ continue
237
+
238
+ try:
239
+ # event_id = json.loads(event.last_event_id)
240
+ # date_ms_since_epoch = event_id[0].get("timestamp")
241
+ # date = time.strftime(
242
+ # "%Y-%m-%dT%H:%M:%SZ",
243
+ # time.gmtime(date_ms_since_epoch / 1000.0),
244
+ # )
245
+ date = event_data.get("meta").get("dt")
246
+ # date = event_data.get("dt")
247
+ date = re.sub(r"\.\d*Z$", "Z", date)
248
+ # entity_id = event_data.get("entity_id")
249
+ # operation = event_data.get("operation")
250
+ rdf_added_data = event_data.get("rdf_added_data")
251
+ rdf_deleted_data = event_data.get("rdf_deleted_data")
252
+
253
+ # Process the to-be-deleted triples.
254
+ if rdf_deleted_data is not None:
255
+ try:
256
+ rdf_deleted_data = rdf_deleted_data.get("data")
257
+ graph = Graph()
258
+ log.debug(f"RDF deleted data: {rdf_deleted_data}")
259
+ graph.parse(data=rdf_deleted_data, format="turtle")
260
+ for s, p, o in graph:
261
+ triple = f"{s.n3()} {p.n3()} {o.n3()}"
262
+ # NOTE: In case there was a previous `insert` of that
263
+ # triple, it is safe to remove that `insert`, but not
264
+ # the `delete` (in case the triple is contained in the
265
+ # original data).
266
+ if triple in insert_triples:
267
+ insert_triples.remove(triple)
268
+ delete_triples.add(triple)
269
+ except Exception as e:
270
+ log.error(f"Error reading `rdf_deleted_data`: {e}")
271
+ return False
272
+
273
+ # Process the to-be-added triples.
274
+ if rdf_added_data is not None:
275
+ try:
276
+ rdf_added_data = rdf_added_data.get("data")
277
+ graph = Graph()
278
+ log.debug("RDF added data: {rdf_added_data}")
279
+ graph.parse(data=rdf_added_data, format="turtle")
280
+ for s, p, o in graph:
281
+ triple = f"{s.n3()} {p.n3()} {o.n3()}"
282
+ # NOTE: In case there was a previous `delete` of that
283
+ # triple, it is safe to remove that `delete`, but not
284
+ # the `insert` (in case the triple is not contained in
285
+ # the original data).
286
+ if triple in delete_triples:
287
+ delete_triples.remove(triple)
288
+ insert_triples.add(triple)
289
+ except Exception as e:
290
+ log.error(f"Error reading `rdf_added_data`: {e}")
291
+ return False
292
+
293
+ except Exception as e:
294
+ log.error(f"Error reading data from message: {e}")
295
+ log.info(event)
296
+ continue
297
+
298
+ # Continue assembling until either the batch size is reached, or
299
+ # we encounter a message that is within `args.lag_seconds` of the
300
+ # current time.
301
+ current_batch_size += 1
302
+ date_as_epoch_s = (
303
+ datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ")
304
+ .replace(tzinfo=timezone.utc)
305
+ .timestamp()
306
+ )
307
+ now_as_epoch_s = time.time()
308
+ delta_to_now_s = now_as_epoch_s - date_as_epoch_s
309
+ log.debug(
310
+ f"DATE: {date_as_epoch_s:.0f} [{date}], "
311
+ f"NOW: {now_as_epoch_s:.0f}, "
312
+ f"DELTA: {now_as_epoch_s - date_as_epoch_s:.0f}"
313
+ )
314
+ date_list.append(date)
315
+ delta_to_now_list.append(delta_to_now_s)
316
+ if (
317
+ current_batch_size < args.batch_size
318
+ and not self.ctrl_c_pressed
319
+ ):
320
+ if delta_to_now_s < args.lag_seconds:
321
+ log.warn(
322
+ f"Encountered message with date {date}, which is within "
323
+ f"{args.lag_seconds} "
324
+ f"second{'s' if args.lag_seconds > 1 else ''} "
325
+ f"of the current time, finishing the current batch"
326
+ )
327
+ else:
328
+ continue
329
+
330
+ # Process the current batch of messages.
331
+ batch_assembly_end_time = time.perf_counter()
332
+ batch_assembly_time_ms = int(
333
+ 1000 * (batch_assembly_end_time - batch_assembly_start_time)
334
+ )
335
+ batch_count += 1
336
+ date_list.sort()
337
+ delta_to_now_list.sort()
338
+ min_delta_to_now_s = delta_to_now_list[0]
339
+ if min_delta_to_now_s < 10:
340
+ min_delta_to_now_s = f"{min_delta_to_now_s:.1f}"
341
+ else:
342
+ min_delta_to_now_s = f"{int(min_delta_to_now_s):,}"
343
+ log.info(
344
+ f"Processing batch #{batch_count} "
345
+ f"with {current_batch_size:,} "
346
+ f"message{'s' if current_batch_size > 1 else ''}, "
347
+ f"date range: {date_list[0]} - {date_list[-1]} "
348
+ f"[assembly time: {batch_assembly_time_ms:,} ms, "
349
+ f"min delta to NOW: {min_delta_to_now_s} s]"
350
+ )
351
+ wait_before_next_batch = (
352
+ args.wait_between_batches is not None
353
+ and current_batch_size < args.batch_size
354
+ )
355
+ current_batch_size = 0
356
+
357
+ # Add the min and max date of the batch to `insert_triples`.
358
+ #
359
+ # NOTE: The min date means that we have *all* updates until that
360
+ # date. The max date is the date of the latest update we have seen.
361
+ # However, there may still be earlier updates that we have not seen
362
+ # yet. Wikidata uses `schema:dateModified` for the latter semantics,
363
+ # so we use it here as well. For the other semantics, we invent
364
+ # a new property `wikibase:updatesCompleteUntil`.
365
+ insert_triples.add(
366
+ f"<http://wikiba.se/ontology#Dump> "
367
+ f"<http://schema.org/dateModified> "
368
+ f'"{date_list[-1]}"^^<http://www.w3.org/2001/XMLSchema#dateTime>'
369
+ )
370
+ updates_complete_until = (
371
+ date_list[-1]
372
+ if args.min_or_max_date == "max"
373
+ else date_list[0]
374
+ )
375
+ insert_triples.add(
376
+ f"<http://wikiba.se/ontology#Dump> "
377
+ f"<http://wikiba.se/ontology#updatesCompleteUntil> "
378
+ f'"{updates_complete_until}"'
379
+ f"^^<http://www.w3.org/2001/XMLSchema#dateTime>"
380
+ )
381
+
382
+ # Construct update operation.
383
+ delete_block = " . \n ".join(delete_triples)
384
+ insert_block = " . \n ".join(insert_triples)
385
+ delete_insert_operation = (
386
+ f"DELETE {{\n {delete_block} .\n}} "
387
+ f"INSERT {{\n {insert_block} .\n}} "
388
+ f"WHERE {{ }}\n"
389
+ )
390
+
391
+ # Construct curl command. For batch size 1, send the operation via
392
+ # `--data-urlencode`, otherwise write to file and send via `--data-binary`.
393
+ curl_cmd = (
394
+ f"curl -s -X POST {sparql_endpoint}"
395
+ f" -H 'Authorization: Bearer {args.access_token}'"
396
+ f" -H 'Content-Type: application/sparql-update'"
397
+ )
398
+ update_arg_file_name = f"update.sparql.{batch_count}"
399
+ with open(update_arg_file_name, "w") as f:
400
+ f.write(delete_insert_operation)
401
+ curl_cmd += f" --data-binary @{update_arg_file_name}"
402
+ log.info(colored(curl_cmd, "blue"))
403
+
404
+ # Run it (using `curl` for batch size up to 1000, otherwise
405
+ # `requests`).
406
+ try:
407
+ headers = {
408
+ "Authorization": f"Bearer {args.access_token}",
409
+ "Content-Type": "application/sparql-update",
410
+ }
411
+ response = requests.post(
412
+ url=sparql_endpoint,
413
+ headers=headers,
414
+ data=delete_insert_operation,
415
+ )
416
+ result = response.text
417
+ with open(f"update.result.{batch_count}", "w") as f:
418
+ f.write(result)
419
+ except Exception as e:
420
+ log.warn(f"Error running `requests.post`: {e}")
421
+ log.info("")
422
+ continue
423
+
424
+ # Results should be a JSON, parse it.
425
+ try:
426
+ result = json.loads(result)
427
+ if isinstance(result, list):
428
+ result = result[0]
429
+ except Exception as e:
430
+ log.error(
431
+ f"Error parsing JSON result: {e}"
432
+ f", the first 1000 characters are:"
433
+ )
434
+ log.info(result[:1000])
435
+ log.info("")
436
+ continue
437
+
438
+ # Check if the result contains a QLever exception.
439
+ if "exception" in result:
440
+ error_msg = result["exception"]
441
+ log.error(f"QLever exception: {error_msg}")
442
+ log.info("")
443
+ continue
444
+
445
+ # Helper function for getting the value of `result["time"][...]`
446
+ # without the "ms" suffix.
447
+ def get_time_ms(*keys: str) -> int:
448
+ value = result["time"]
449
+ for key in keys:
450
+ value = value[key]
451
+ return int(value)
452
+ # return int(re.sub(r"ms$", "", value))
453
+
454
+ # Show statistics of the update operation.
455
+ try:
456
+ ins_after = result["delta-triples"]["after"]["inserted"]
457
+ del_after = result["delta-triples"]["after"]["deleted"]
458
+ ops_after = result["delta-triples"]["after"]["total"]
459
+ num_ins = int(result["delta-triples"]["operation"]["inserted"])
460
+ num_del = int(result["delta-triples"]["operation"]["deleted"])
461
+ num_ops = int(result["delta-triples"]["operation"]["total"])
462
+ time_ms = get_time_ms("total")
463
+ time_us_per_op = int(1000 * time_ms / num_ops)
464
+ log.info(
465
+ colored(
466
+ f"NUM_OPS: {num_ops:+6,} -> {ops_after:6,}, "
467
+ f"INS: {num_ins:+6,} -> {ins_after:6,}, "
468
+ f"DEL: {num_del:+6,} -> {del_after:6,}, "
469
+ f"TIME: {time_ms:7,} ms, "
470
+ f"TIME/OP: {time_us_per_op:,} µs",
471
+ attrs=["bold"],
472
+ )
473
+ )
474
+
475
+ # Also show a detailed breakdown of the total time.
476
+ time_preparation = get_time_ms(
477
+ "execution", "processUpdateImpl", "preparation"
478
+ )
479
+ time_insert = get_time_ms(
480
+ "execution", "processUpdateImpl", "insertTriples", "total"
481
+ )
482
+ time_delete = get_time_ms(
483
+ "execution", "processUpdateImpl", "deleteTriples", "total"
484
+ )
485
+ time_snapshot = get_time_ms("execution", "snapshotCreation")
486
+ time_writeback = get_time_ms("execution", "diskWriteback")
487
+ time_unaccounted = time_ms - (
488
+ time_delete
489
+ + time_insert
490
+ + time_preparation
491
+ + time_snapshot
492
+ + time_writeback
493
+ )
494
+ log.info(
495
+ f"PREPARATION: {100 * time_preparation / time_ms:2.0f}%, "
496
+ # f"PLANNING: {100 * time_planning / time_ms:2.0f}%, "
497
+ f"INSERT: {100 * time_insert / time_ms:2.0f}%, "
498
+ f"DELETE: {100 * time_delete / time_ms:2.0f}%, "
499
+ f"SNAPSHOT: {100 * time_snapshot / time_ms:2.0f}%, "
500
+ f"WRITEBACK: {100 * time_writeback / time_ms:2.0f}%, "
501
+ f"UNACCOUNTED: {100 * time_unaccounted / time_ms:2.0f}%",
502
+ )
503
+
504
+ # Show the totals so far.
505
+ total_num_ops += num_ops
506
+ total_time_s += time_ms / 1000.0
507
+ elapsed_time_s = time.perf_counter() - start_time
508
+ time_us_per_op = int(1e6 * total_time_s / total_num_ops)
509
+ log.info(
510
+ colored(
511
+ f"TOTAL NUM_OPS SO FAR: {total_num_ops:8,}, "
512
+ f"TOTAL UPDATE TIME SO FAR: {total_time_s:4.0f} s, "
513
+ f"ELAPSED TIME SO FAR: {elapsed_time_s:4.0f} s, "
514
+ f"AVG TIME/OP SO FAR: {time_us_per_op:,} µs",
515
+ attrs=["bold"],
516
+ )
517
+ )
518
+
519
+ except Exception as e:
520
+ log.warn(
521
+ f"Error extracting statistics: {e}, "
522
+ f"curl command was: {curl_cmd}"
523
+ )
524
+ # Show traceback for debugging.
525
+ import traceback
526
+
527
+ traceback.print_exc()
528
+ log.info("")
529
+ continue
530
+
531
+ # Stop after processing the specified number of batches.
532
+ log.info("")
533
+
534
+ # Final statistics after all batches have been processed.
535
+ elapsed_time_s = time.perf_counter() - start_time
536
+ time_us_per_op = int(1e6 * total_time_s / total_num_ops)
537
+ log.info(
538
+ f"Processed {batch_count} "
539
+ f"{'batches' if batch_count > 1 else 'batch'} "
540
+ f"terminating update command"
541
+ )
542
+ log.info(
543
+ colored(
544
+ f"TOTAL NUM_OPS: {total_num_ops:8,}, "
545
+ f"TOTAL TIME: {total_time_s:4.0f} s, "
546
+ f"ELAPSED TIME: {elapsed_time_s:4.0f} s, "
547
+ f"AVG TIME/OP: {time_us_per_op:,} µs",
548
+ attrs=["bold"],
549
+ )
550
+ )
551
+ return True
qlever/qlever_main.py CHANGED
@@ -40,8 +40,7 @@ def main():
40
40
  if not command_successful:
41
41
  exit(1)
42
42
  except KeyboardInterrupt:
43
- log.info("")
44
- log.info("Ctrl-C pressed, exiting ...")
43
+ log.warn("\rCtrl-C pressed, exiting ...")
45
44
  log.info("")
46
45
  exit(1)
47
46
  except Exception as e: