qlever 0.5.23__py3-none-any.whl → 0.5.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

@@ -0,0 +1,554 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ import signal
6
+ import time
7
+ from datetime import datetime, timezone
8
+
9
+ import rdflib.term
10
+ import requests
11
+ import requests_sse
12
+ from rdflib import Graph
13
+ from termcolor import colored
14
+
15
+ from qlever.command import QleverCommand
16
+ from qlever.log import log
17
+ from qlever.util import run_command
18
+
19
+
20
+ # Monkey patch `rdflib.term._castLexicalToPython` to avoid casting of literals
21
+ # to Python types. We do not need it (all we want it convert Turtle to N-Triples),
22
+ # and we can speed up parsing by a factor of about 2.
23
+ def custom_cast_lexical_to_python(lexical, datatype):
24
+ return None # Your desired behavior
25
+
26
+
27
+ rdflib.term._castLexicalToPython = custom_cast_lexical_to_python
28
+
29
+
30
+ class UpdateWikidataCommand(QleverCommand):
31
+ """
32
+ Class for executing the `update` command.
33
+ """
34
+
35
+ def __init__(self):
36
+ # SPARQL query to get the date until which the updates of the
37
+ # SPARQL endpoint are complete.
38
+ self.sparql_updates_complete_until_query = (
39
+ "PREFIX wikibase: <http://wikiba.se/ontology#> "
40
+ "PREFIX schema: <http://schema.org/> "
41
+ "SELECT * WHERE { "
42
+ "{ SELECT (MIN(?date_modified) AS ?updates_complete_until) { "
43
+ "wikibase:Dump schema:dateModified ?date_modified } } "
44
+ "UNION { wikibase:Dump wikibase:updatesCompleteUntil ?updates_complete_until } "
45
+ "} ORDER BY DESC(?updates_complete_until) LIMIT 1"
46
+ )
47
+ # URL of the Wikidata SSE stream.
48
+ self.wikidata_update_stream_url = (
49
+ "https://stream.wikimedia.org/v2/"
50
+ "stream/rdf-streaming-updater.mutation.v2"
51
+ )
52
+ # Remember if Ctrl+C was pressed, so we can handle it gracefully.
53
+ self.ctrl_c_pressed = False
54
+
55
+ def description(self) -> str:
56
+ return "Update from given SSE stream"
57
+
58
+ def should_have_qleverfile(self) -> bool:
59
+ return True
60
+
61
+ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
62
+ return {"server": ["host_name", "port", "access_token"]}
63
+
64
+ def additional_arguments(self, subparser) -> None:
65
+ subparser.add_argument(
66
+ "sse_stream_url",
67
+ nargs="?",
68
+ type=str,
69
+ default=self.wikidata_update_stream_url,
70
+ help="URL of the SSE stream to update from",
71
+ )
72
+ subparser.add_argument(
73
+ "--batch-size",
74
+ type=int,
75
+ default=100000,
76
+ help="Group this many messages together into one update "
77
+ "(default: one update for each message); NOTE: this simply "
78
+ "concatenates the `rdf_added_data` and `rdf_deleted_data` fields, "
79
+ "which is not 100%% correct; as soon as chaining is supported, "
80
+ "this will be fixed",
81
+ )
82
+ subparser.add_argument(
83
+ "--lag-seconds",
84
+ type=int,
85
+ default=1,
86
+ help="When a message is encountered that is within this many "
87
+ "seconds of the current time, finish the current batch "
88
+ "(and show a warning that this happened)",
89
+ )
90
+ subparser.add_argument(
91
+ "--since",
92
+ type=str,
93
+ help="Consume stream messages since this date "
94
+ "(default: determine automatically from the SPARQL endpoint)",
95
+ )
96
+ subparser.add_argument(
97
+ "--topics",
98
+ type=str,
99
+ default="eqiad.rdf-streaming-updater.mutation",
100
+ help="Comma-separated list of topics to consume from the SSE stream"
101
+ " (default: only eqiad.rdf-streaming-updater.mutation)",
102
+ )
103
+ subparser.add_argument(
104
+ "--min-or-max-date",
105
+ choices=["min", "max"],
106
+ default="max",
107
+ help="Use the minimum or maximum date of the batch for the "
108
+ "`updatesCompleteUntil` property (default: maximum)",
109
+ )
110
+ subparser.add_argument(
111
+ "--wait-between-batches",
112
+ type=int,
113
+ default=3600,
114
+ help="Wait this many seconds between batches that were "
115
+ "finished due to a message that is within `lag_seconds` of "
116
+ "the current time (default: 3600s)",
117
+ )
118
+
119
+ # Handle Ctrl+C gracefully by finishing the current batch and then exiting.
120
+ def handle_ctrl_c(self, signal_received, frame):
121
+ if self.ctrl_c_pressed:
122
+ log.warn("\rCtrl+C pressed again, undoing the previous Ctrl+C")
123
+ self.ctrl_c_pressed = False
124
+ else:
125
+ self.ctrl_c_pressed = True
126
+ log.warn(
127
+ "\rCtrl+C pressed, will finish the current batch and then exit"
128
+ " [press Ctrl+C again to continue]"
129
+ )
130
+
131
+ def execute(self, args) -> bool:
132
+ # cURL command to get the date until which the updates of the
133
+ # SPARQL endpoint are complete.
134
+ sparql_endpoint = f"http://{args.host_name}:{args.port}"
135
+ curl_cmd_updates_complete_until = (
136
+ f"curl -s {sparql_endpoint}"
137
+ f' -H "Accept: text/csv"'
138
+ f' -H "Content-type: application/sparql-query"'
139
+ f' --data "{self.sparql_updates_complete_until_query}"'
140
+ )
141
+
142
+ # Construct the command and show it.
143
+ lag_seconds_str = (
144
+ f"{args.lag_seconds} second{'s' if args.lag_seconds > 1 else ''}"
145
+ )
146
+ cmd_description = []
147
+ if args.since:
148
+ cmd_description.append(f"SINCE={args.since}")
149
+ else:
150
+ cmd_description.append(
151
+ f"SINCE=$({curl_cmd_updates_complete_until} | sed 1d)"
152
+ )
153
+ cmd_description.append(
154
+ f"Process SSE stream from {args.sse_stream_url}?since=$SINCE "
155
+ f"in batches of {args.batch_size:,} messages "
156
+ f"(less if a message is encountered that is within "
157
+ f"{lag_seconds_str} of the current time)"
158
+ )
159
+ self.show("\n".join(cmd_description), only_show=args.show)
160
+ if args.show:
161
+ return True
162
+
163
+ # Compute the `since` date if not given.
164
+ if not args.since:
165
+ try:
166
+ args.since = run_command(
167
+ f"{curl_cmd_updates_complete_until} | sed 1d",
168
+ return_output=True,
169
+ ).strip()
170
+ except Exception as e:
171
+ log.error(
172
+ f"Error running `{curl_cmd_updates_complete_until}`: {e}"
173
+ )
174
+ return False
175
+
176
+ # Special handling of Ctrl+C, see `handle_ctrl_c` above.
177
+ signal.signal(signal.SIGINT, self.handle_ctrl_c)
178
+ log.warn(
179
+ "Press Ctrl+C to finish the current batch and end gracefully, "
180
+ "press Ctrl+C again to continue with the next batch"
181
+ )
182
+ log.info("")
183
+ log.info(f"SINCE={args.since}")
184
+ log.info("")
185
+ args.sse_stream_url = f"{args.sse_stream_url}?since={args.since}"
186
+
187
+ # Initialize the SSE stream and all the statistics variables.
188
+ source = requests_sse.EventSource(
189
+ args.sse_stream_url,
190
+ headers={
191
+ "Accept": "text/event-stream",
192
+ "User-Agent": "qlever update-wikidata",
193
+ },
194
+ )
195
+ source.connect()
196
+ current_batch_size = 0
197
+ batch_count = 0
198
+ total_num_ops = 0
199
+ total_time_s = 0
200
+ start_time = time.perf_counter()
201
+ topics_to_consider = set(args.topics.split(","))
202
+ wait_before_next_batch = False
203
+
204
+ # Iterating over all messages in the stream.
205
+ for event in source:
206
+ # Beginning of a new batch of messages.
207
+ if current_batch_size == 0:
208
+ date_list = []
209
+ delta_to_now_list = []
210
+ batch_assembly_start_time = time.perf_counter()
211
+ insert_triples = set()
212
+ delete_triples = set()
213
+ if wait_before_next_batch:
214
+ log.info(
215
+ f"Waiting {args.wait_between_batches} "
216
+ f"second{'s' if args.wait_between_batches > 1 else ''} "
217
+ f"before processing the next batch"
218
+ )
219
+ log.info("")
220
+ wait_before_next_batch = False
221
+ for _ in range(args.wait_between_batches):
222
+ if self.ctrl_c_pressed:
223
+ break
224
+ time.sleep(1)
225
+
226
+ # Check if the `args.batch_size` is reached (note that we come here
227
+ # after a `continue` due to an error).
228
+ if self.ctrl_c_pressed:
229
+ break
230
+
231
+ # Process the message. Skip messages that are not of type `message`
232
+ # (should not happen), have no field `data` (should not happen
233
+ # either), or where the topic is not in `args.topics`.
234
+ if event.type != "message" or not event.data:
235
+ continue
236
+ event_data = json.loads(event.data)
237
+ topic = event_data.get("meta").get("topic")
238
+ if topic not in topics_to_consider:
239
+ continue
240
+
241
+ try:
242
+ # event_id = json.loads(event.last_event_id)
243
+ # date_ms_since_epoch = event_id[0].get("timestamp")
244
+ # date = time.strftime(
245
+ # "%Y-%m-%dT%H:%M:%SZ",
246
+ # time.gmtime(date_ms_since_epoch / 1000.0),
247
+ # )
248
+ date = event_data.get("meta").get("dt")
249
+ # date = event_data.get("dt")
250
+ date = re.sub(r"\.\d*Z$", "Z", date)
251
+ # entity_id = event_data.get("entity_id")
252
+ # operation = event_data.get("operation")
253
+ rdf_added_data = event_data.get("rdf_added_data")
254
+ rdf_deleted_data = event_data.get("rdf_deleted_data")
255
+
256
+ # Process the to-be-deleted triples.
257
+ if rdf_deleted_data is not None:
258
+ try:
259
+ rdf_deleted_data = rdf_deleted_data.get("data")
260
+ graph = Graph()
261
+ log.debug(f"RDF deleted data: {rdf_deleted_data}")
262
+ graph.parse(data=rdf_deleted_data, format="turtle")
263
+ for s, p, o in graph:
264
+ triple = f"{s.n3()} {p.n3()} {o.n3()}"
265
+ # NOTE: In case there was a previous `insert` of that
266
+ # triple, it is safe to remove that `insert`, but not
267
+ # the `delete` (in case the triple is contained in the
268
+ # original data).
269
+ if triple in insert_triples:
270
+ insert_triples.remove(triple)
271
+ delete_triples.add(triple)
272
+ except Exception as e:
273
+ log.error(f"Error reading `rdf_deleted_data`: {e}")
274
+ return False
275
+
276
+ # Process the to-be-added triples.
277
+ if rdf_added_data is not None:
278
+ try:
279
+ rdf_added_data = rdf_added_data.get("data")
280
+ graph = Graph()
281
+ log.debug("RDF added data: {rdf_added_data}")
282
+ graph.parse(data=rdf_added_data, format="turtle")
283
+ for s, p, o in graph:
284
+ triple = f"{s.n3()} {p.n3()} {o.n3()}"
285
+ # NOTE: In case there was a previous `delete` of that
286
+ # triple, it is safe to remove that `delete`, but not
287
+ # the `insert` (in case the triple is not contained in
288
+ # the original data).
289
+ if triple in delete_triples:
290
+ delete_triples.remove(triple)
291
+ insert_triples.add(triple)
292
+ except Exception as e:
293
+ log.error(f"Error reading `rdf_added_data`: {e}")
294
+ return False
295
+
296
+ except Exception as e:
297
+ log.error(f"Error reading data from message: {e}")
298
+ log.info(event)
299
+ continue
300
+
301
+ # Continue assembling until either the batch size is reached, or
302
+ # we encounter a message that is within `args.lag_seconds` of the
303
+ # current time.
304
+ current_batch_size += 1
305
+ date_as_epoch_s = (
306
+ datetime.strptime(date, "%Y-%m-%dT%H:%M:%SZ")
307
+ .replace(tzinfo=timezone.utc)
308
+ .timestamp()
309
+ )
310
+ now_as_epoch_s = time.time()
311
+ delta_to_now_s = now_as_epoch_s - date_as_epoch_s
312
+ log.debug(
313
+ f"DATE: {date_as_epoch_s:.0f} [{date}], "
314
+ f"NOW: {now_as_epoch_s:.0f}, "
315
+ f"DELTA: {now_as_epoch_s - date_as_epoch_s:.0f}"
316
+ )
317
+ date_list.append(date)
318
+ delta_to_now_list.append(delta_to_now_s)
319
+ if (
320
+ current_batch_size < args.batch_size
321
+ and not self.ctrl_c_pressed
322
+ ):
323
+ if delta_to_now_s < args.lag_seconds:
324
+ log.warn(
325
+ f"Encountered message with date {date}, which is within "
326
+ f"{args.lag_seconds} "
327
+ f"second{'s' if args.lag_seconds > 1 else ''} "
328
+ f"of the current time, finishing the current batch"
329
+ )
330
+ else:
331
+ continue
332
+
333
+ # Process the current batch of messages.
334
+ batch_assembly_end_time = time.perf_counter()
335
+ batch_assembly_time_ms = int(
336
+ 1000 * (batch_assembly_end_time - batch_assembly_start_time)
337
+ )
338
+ batch_count += 1
339
+ date_list.sort()
340
+ delta_to_now_list.sort()
341
+ min_delta_to_now_s = delta_to_now_list[0]
342
+ if min_delta_to_now_s < 10:
343
+ min_delta_to_now_s = f"{min_delta_to_now_s:.1f}"
344
+ else:
345
+ min_delta_to_now_s = f"{int(min_delta_to_now_s):,}"
346
+ log.info(
347
+ f"Processing batch #{batch_count} "
348
+ f"with {current_batch_size:,} "
349
+ f"message{'s' if current_batch_size > 1 else ''}, "
350
+ f"date range: {date_list[0]} - {date_list[-1]} "
351
+ f"[assembly time: {batch_assembly_time_ms:,} ms, "
352
+ f"min delta to NOW: {min_delta_to_now_s} s]"
353
+ )
354
+ wait_before_next_batch = (
355
+ args.wait_between_batches is not None
356
+ and current_batch_size < args.batch_size
357
+ )
358
+ current_batch_size = 0
359
+
360
+ # Add the min and max date of the batch to `insert_triples`.
361
+ #
362
+ # NOTE: The min date means that we have *all* updates until that
363
+ # date. The max date is the date of the latest update we have seen.
364
+ # However, there may still be earlier updates that we have not seen
365
+ # yet. Wikidata uses `schema:dateModified` for the latter semantics,
366
+ # so we use it here as well. For the other semantics, we invent
367
+ # a new property `wikibase:updatesCompleteUntil`.
368
+ insert_triples.add(
369
+ f"<http://wikiba.se/ontology#Dump> "
370
+ f"<http://schema.org/dateModified> "
371
+ f'"{date_list[-1]}"^^<http://www.w3.org/2001/XMLSchema#dateTime>'
372
+ )
373
+ updates_complete_until = (
374
+ date_list[-1]
375
+ if args.min_or_max_date == "max"
376
+ else date_list[0]
377
+ )
378
+ insert_triples.add(
379
+ f"<http://wikiba.se/ontology#Dump> "
380
+ f"<http://wikiba.se/ontology#updatesCompleteUntil> "
381
+ f'"{updates_complete_until}"'
382
+ f"^^<http://www.w3.org/2001/XMLSchema#dateTime>"
383
+ )
384
+
385
+ # Construct update operation.
386
+ delete_block = " . \n ".join(delete_triples)
387
+ insert_block = " . \n ".join(insert_triples)
388
+ delete_insert_operation = (
389
+ f"DELETE {{\n {delete_block} .\n}} "
390
+ f"INSERT {{\n {insert_block} .\n}} "
391
+ f"WHERE {{ }}\n"
392
+ )
393
+
394
+ # Construct curl command. For batch size 1, send the operation via
395
+ # `--data-urlencode`, otherwise write to file and send via `--data-binary`.
396
+ curl_cmd = (
397
+ f"curl -s -X POST {sparql_endpoint}"
398
+ f" -H 'Authorization: Bearer {args.access_token}'"
399
+ f" -H 'Content-Type: application/sparql-update'"
400
+ )
401
+ update_arg_file_name = f"update.sparql.{batch_count}"
402
+ with open(update_arg_file_name, "w") as f:
403
+ f.write(delete_insert_operation)
404
+ curl_cmd += f" --data-binary @{update_arg_file_name}"
405
+ log.info(colored(curl_cmd, "blue"))
406
+
407
+ # Run it (using `curl` for batch size up to 1000, otherwise
408
+ # `requests`).
409
+ try:
410
+ headers = {
411
+ "Authorization": f"Bearer {args.access_token}",
412
+ "Content-Type": "application/sparql-update",
413
+ }
414
+ response = requests.post(
415
+ url=sparql_endpoint,
416
+ headers=headers,
417
+ data=delete_insert_operation,
418
+ )
419
+ result = response.text
420
+ with open(f"update.result.{batch_count}", "w") as f:
421
+ f.write(result)
422
+ except Exception as e:
423
+ log.warn(f"Error running `requests.post`: {e}")
424
+ log.info("")
425
+ continue
426
+
427
+ # Results should be a JSON, parse it.
428
+ try:
429
+ result = json.loads(result)
430
+ if isinstance(result, list):
431
+ result = result[0]
432
+ except Exception as e:
433
+ log.error(
434
+ f"Error parsing JSON result: {e}"
435
+ f", the first 1000 characters are:"
436
+ )
437
+ log.info(result[:1000])
438
+ log.info("")
439
+ continue
440
+
441
+ # Check if the result contains a QLever exception.
442
+ if "exception" in result:
443
+ error_msg = result["exception"]
444
+ log.error(f"QLever exception: {error_msg}")
445
+ log.info("")
446
+ continue
447
+
448
+ # Helper function for getting the value of `result["time"][...]`
449
+ # without the "ms" suffix.
450
+ def get_time_ms(*keys: str) -> int:
451
+ value = result["time"]
452
+ for key in keys:
453
+ value = value[key]
454
+ return int(value)
455
+ # return int(re.sub(r"ms$", "", value))
456
+
457
+ # Show statistics of the update operation.
458
+ try:
459
+ ins_after = result["delta-triples"]["after"]["inserted"]
460
+ del_after = result["delta-triples"]["after"]["deleted"]
461
+ ops_after = result["delta-triples"]["after"]["total"]
462
+ num_ins = int(result["delta-triples"]["operation"]["inserted"])
463
+ num_del = int(result["delta-triples"]["operation"]["deleted"])
464
+ num_ops = int(result["delta-triples"]["operation"]["total"])
465
+ time_ms = get_time_ms("total")
466
+ time_us_per_op = int(1000 * time_ms / num_ops)
467
+ log.info(
468
+ colored(
469
+ f"NUM_OPS: {num_ops:+6,} -> {ops_after:6,}, "
470
+ f"INS: {num_ins:+6,} -> {ins_after:6,}, "
471
+ f"DEL: {num_del:+6,} -> {del_after:6,}, "
472
+ f"TIME: {time_ms:7,} ms, "
473
+ f"TIME/OP: {time_us_per_op:,} µs",
474
+ attrs=["bold"],
475
+ )
476
+ )
477
+
478
+ # Also show a detailed breakdown of the total time.
479
+ time_preparation = get_time_ms(
480
+ "execution", "processUpdateImpl", "preparation"
481
+ )
482
+ time_insert = get_time_ms(
483
+ "execution", "processUpdateImpl", "insertTriples", "total"
484
+ )
485
+ time_delete = get_time_ms(
486
+ "execution", "processUpdateImpl", "deleteTriples", "total"
487
+ )
488
+ time_snapshot = get_time_ms("execution", "snapshotCreation")
489
+ time_writeback = get_time_ms("execution", "diskWriteback")
490
+ time_unaccounted = time_ms - (
491
+ time_delete
492
+ + time_insert
493
+ + time_preparation
494
+ + time_snapshot
495
+ + time_writeback
496
+ )
497
+ log.info(
498
+ f"PREPARATION: {100 * time_preparation / time_ms:2.0f}%, "
499
+ # f"PLANNING: {100 * time_planning / time_ms:2.0f}%, "
500
+ f"INSERT: {100 * time_insert / time_ms:2.0f}%, "
501
+ f"DELETE: {100 * time_delete / time_ms:2.0f}%, "
502
+ f"SNAPSHOT: {100 * time_snapshot / time_ms:2.0f}%, "
503
+ f"WRITEBACK: {100 * time_writeback / time_ms:2.0f}%, "
504
+ f"UNACCOUNTED: {100 * time_unaccounted / time_ms:2.0f}%",
505
+ )
506
+
507
+ # Show the totals so far.
508
+ total_num_ops += num_ops
509
+ total_time_s += time_ms / 1000.0
510
+ elapsed_time_s = time.perf_counter() - start_time
511
+ time_us_per_op = int(1e6 * total_time_s / total_num_ops)
512
+ log.info(
513
+ colored(
514
+ f"TOTAL NUM_OPS SO FAR: {total_num_ops:8,}, "
515
+ f"TOTAL UPDATE TIME SO FAR: {total_time_s:4.0f} s, "
516
+ f"ELAPSED TIME SO FAR: {elapsed_time_s:4.0f} s, "
517
+ f"AVG TIME/OP SO FAR: {time_us_per_op:,} µs",
518
+ attrs=["bold"],
519
+ )
520
+ )
521
+
522
+ except Exception as e:
523
+ log.warn(
524
+ f"Error extracting statistics: {e}, "
525
+ f"curl command was: {curl_cmd}"
526
+ )
527
+ # Show traceback for debugging.
528
+ import traceback
529
+
530
+ traceback.print_exc()
531
+ log.info("")
532
+ continue
533
+
534
+ # Stop after processing the specified number of batches.
535
+ log.info("")
536
+
537
+ # Final statistics after all batches have been processed.
538
+ elapsed_time_s = time.perf_counter() - start_time
539
+ time_us_per_op = int(1e6 * total_time_s / total_num_ops)
540
+ log.info(
541
+ f"Processed {batch_count} "
542
+ f"{'batches' if batch_count > 1 else 'batch'} "
543
+ f"terminating update command"
544
+ )
545
+ log.info(
546
+ colored(
547
+ f"TOTAL NUM_OPS: {total_num_ops:8,}, "
548
+ f"TOTAL TIME: {total_time_s:4.0f} s, "
549
+ f"ELAPSED TIME: {elapsed_time_s:4.0f} s, "
550
+ f"AVG TIME/OP: {time_us_per_op:,} µs",
551
+ attrs=["bold"],
552
+ )
553
+ )
554
+ return True
qlever/qlever_main.py CHANGED
@@ -40,8 +40,7 @@ def main():
40
40
  if not command_successful:
41
41
  exit(1)
42
42
  except KeyboardInterrupt:
43
- log.info("")
44
- log.info("Ctrl-C pressed, exiting ...")
43
+ log.warn("\rCtrl-C pressed, exiting ...")
45
44
  log.info("")
46
45
  exit(1)
47
46
  except Exception as e:
qlever/qleverfile.py CHANGED
@@ -116,6 +116,20 @@ class Qleverfile:
116
116
  "files (default: 1048576 when the total size of the input files "
117
117
  "is larger than 10 GB)",
118
118
  )
119
+ index_args["vocabulary_type"] = arg(
120
+ "--vocabulary-type",
121
+ type=str,
122
+ choices=[
123
+ "on-disk-compressed",
124
+ "on-disk-uncompressed",
125
+ "in-memory-compressed",
126
+ "in-memory-uncompressed",
127
+ "on-disk-compressed-geo-split",
128
+ ],
129
+ default="on-disk-compressed",
130
+ help="The type of the vocabulary to use for the index "
131
+ " (default: `on-disk-compressed`)",
132
+ )
119
133
  index_args["index_binary"] = arg(
120
134
  "--index-binary",
121
135
  type=str,
@@ -137,6 +151,14 @@ class Qleverfile:
137
151
  "large enough to contain the end of at least one statement "
138
152
  "(default: 10M)",
139
153
  )
154
+ index_args["encode_as_id"] = arg(
155
+ "--encode-as-id",
156
+ type=str,
157
+ help="Space-separated list of IRI prefixes (without angle "
158
+ "brackets); IRIs that start with one of these prefixes, followed "
159
+ "by a sequence of digits, do not require a vocabulary entry but "
160
+ "are directly encoded in the ID (default: none)",
161
+ )
140
162
  index_args["only_pso_and_pos_permutations"] = arg(
141
163
  "--only-pso-and-pos-permutations",
142
164
  action="store_true",
qlever/util.py CHANGED
@@ -99,7 +99,7 @@ def run_curl_command(
99
99
  default_result_file = "/tmp/qlever.curl.result"
100
100
  actual_result_file = result_file if result_file else default_result_file
101
101
  curl_cmd = (
102
- f'curl -s -o "{actual_result_file}"'
102
+ f'curl -Ls -o "{actual_result_file}"'
103
103
  f' -w "%{{http_code}}\n" {url}'
104
104
  + "".join([f' -H "{key}: {value}"' for key, value in headers.items()])
105
105
  + "".join(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: qlever
3
- Version: 0.5.23
3
+ Version: 0.5.25
4
4
  Summary: Command-line tool for using the QLever graph database
5
5
  Author-email: Hannah Bast <bast@cs.uni-freiburg.de>
6
6
  License: Apache-2.0
@@ -15,6 +15,8 @@ Requires-Dist: psutil
15
15
  Requires-Dist: termcolor
16
16
  Requires-Dist: argcomplete
17
17
  Requires-Dist: pyyaml
18
+ Requires-Dist: rdflib
19
+ Requires-Dist: requests-sse
18
20
  Dynamic: license-file
19
21
 
20
22
  # QLever
@@ -56,7 +58,7 @@ qlever setup-config olympics # Get Qleverfile (config file) for this dataset
56
58
  qlever get-data # Download the dataset
57
59
  qlever index # Build index data structures for this dataset
58
60
  qlever start # Start a QLever server using that index
59
- qlever example-queries # Launch some example queries
61
+ qlever query # Launch an example query
60
62
  qlever ui # Launch the QLever UI
61
63
  ```
62
64