qlever 0.5.8__py3-none-any.whl → 0.5.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

@@ -22,67 +22,97 @@ class ExampleQueriesCommand(QleverCommand):
22
22
 
23
23
  def __init__(self):
24
24
  self.presets = {
25
- "virtuoso-wikidata":
26
- "https://wikidata.demo.openlinksw.com/sparql",
27
- "qlever-wikidata":
28
- "https://qlever.cs.uni-freiburg.de/api/wikidata"
29
- }
25
+ "virtuoso-wikidata": "https://wikidata.demo.openlinksw.com/sparql",
26
+ "qlever-wikidata": "https://qlever.cs.uni-freiburg.de/api/wikidata",
27
+ }
30
28
 
31
29
  def description(self) -> str:
32
- return ("Show how much of the cache is currently being used")
30
+ return "Show how much of the cache is currently being used"
33
31
 
34
32
  def should_have_qleverfile(self) -> bool:
35
33
  return False
36
34
 
37
- def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
35
+ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
38
36
  return {"server": ["port"], "ui": ["ui_config"]}
39
37
 
40
38
  def additional_arguments(self, subparser) -> None:
41
- subparser.add_argument("--sparql-endpoint", type=str,
42
- help="URL of the SPARQL endpoint")
43
- subparser.add_argument("--sparql-endpoint-preset",
44
- choices=self.presets.keys(),
45
- help="Shortcut for setting the SPARQL endpoint")
46
- subparser.add_argument("--get-queries-cmd", type=str,
47
- help="Command to get example queries as TSV "
48
- "(description, query)")
49
- subparser.add_argument("--query-ids", type=str,
50
- default="1-$",
51
- help="Query IDs as comma-separated list of "
52
- "ranges (e.g., 1-5,7,12-$)")
53
- subparser.add_argument("--query-regex", type=str,
54
- help="Only consider example queries matching "
55
- "this regex (using grep -Pi)")
56
- subparser.add_argument("--download-or-count",
57
- choices=["download", "count"], default="count",
58
- help="Whether to download the full result "
59
- "or just compute the size of the result")
60
- subparser.add_argument("--limit", type=int,
61
- help="Limit on the number of results")
62
- subparser.add_argument("--remove-offset-and-limit",
63
- action="store_true", default=False,
64
- help="Remove OFFSET and LIMIT from the query")
65
- subparser.add_argument("--accept", type=str,
66
- choices=["text/tab-separated-values",
67
- "text/csv",
68
- "application/sparql-results+json",
69
- "text/turtle"],
70
- default="text/tab-separated-values",
71
- help="Accept header for the SPARQL query")
72
- subparser.add_argument("--clear-cache",
73
- choices=["yes", "no"],
74
- default="yes",
75
- help="Clear the cache before each query")
76
- subparser.add_argument("--width-query-description", type=int,
77
- default=40,
78
- help="Width for printing the query description")
79
- subparser.add_argument("--width-error-message", type=int,
80
- default=80,
81
- help="Width for printing the error message "
82
- "(0 = no limit)")
83
- subparser.add_argument("--width-result-size", type=int,
84
- default=14,
85
- help="Width for printing the result size")
39
+ subparser.add_argument(
40
+ "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
41
+ )
42
+ subparser.add_argument(
43
+ "--sparql-endpoint-preset",
44
+ choices=self.presets.keys(),
45
+ help="Shortcut for setting the SPARQL endpoint",
46
+ )
47
+ subparser.add_argument(
48
+ "--get-queries-cmd",
49
+ type=str,
50
+ help="Command to get example queries as TSV " "(description, query)",
51
+ )
52
+ subparser.add_argument(
53
+ "--query-ids",
54
+ type=str,
55
+ default="1-$",
56
+ help="Query IDs as comma-separated list of " "ranges (e.g., 1-5,7,12-$)",
57
+ )
58
+ subparser.add_argument(
59
+ "--query-regex",
60
+ type=str,
61
+ help="Only consider example queries matching "
62
+ "this regex (using grep -Pi)",
63
+ )
64
+ subparser.add_argument(
65
+ "--download-or-count",
66
+ choices=["download", "count"],
67
+ default="count",
68
+ help="Whether to download the full result "
69
+ "or just compute the size of the result",
70
+ )
71
+ subparser.add_argument(
72
+ "--limit", type=int, help="Limit on the number of results"
73
+ )
74
+ subparser.add_argument(
75
+ "--remove-offset-and-limit",
76
+ action="store_true",
77
+ default=False,
78
+ help="Remove OFFSET and LIMIT from the query",
79
+ )
80
+ subparser.add_argument(
81
+ "--accept",
82
+ type=str,
83
+ choices=[
84
+ "text/tab-separated-values",
85
+ "text/csv",
86
+ "application/sparql-results+json",
87
+ "text/turtle",
88
+ ],
89
+ default="text/tab-separated-values",
90
+ help="Accept header for the SPARQL query",
91
+ )
92
+ subparser.add_argument(
93
+ "--clear-cache",
94
+ choices=["yes", "no"],
95
+ default="yes",
96
+ help="Clear the cache before each query",
97
+ )
98
+ subparser.add_argument(
99
+ "--width-query-description",
100
+ type=int,
101
+ default=40,
102
+ help="Width for printing the query description",
103
+ )
104
+ subparser.add_argument(
105
+ "--width-error-message",
106
+ type=int,
107
+ default=80,
108
+ help="Width for printing the error message " "(0 = no limit)",
109
+ )
110
+ subparser.add_argument(
111
+ "--width-result-size",
112
+ type=int,
113
+ default=14,
114
+ help="Width for printing the result size",
115
+ )
86
116
 
87
117
  def execute(self, args) -> bool:
88
118
  # We can't have both `--remove-offset-and-limit` and `--limit`.
@@ -93,9 +123,13 @@ class ExampleQueriesCommand(QleverCommand):
93
123
  # If `args.accept` is `application/sparql-results+json`, we need `jq`.
94
124
  if args.accept == "application/sparql-results+json":
95
125
  try:
96
- subprocess.run("jq --version", shell=True, check=True,
97
- stdout=subprocess.DEVNULL,
98
- stderr=subprocess.DEVNULL)
126
+ subprocess.run(
127
+ "jq --version",
128
+ shell=True,
129
+ check=True,
130
+ stdout=subprocess.DEVNULL,
131
+ stderr=subprocess.DEVNULL,
132
+ )
99
133
  except Exception as e:
100
134
  log.error(f"Please install `jq` for {args.accept} ({e})")
101
135
  return False
@@ -111,38 +145,44 @@ class ExampleQueriesCommand(QleverCommand):
111
145
  return False
112
146
 
113
147
  # Clear cache only works for QLever.
114
- is_qlever = (not args.sparql_endpoint
115
- or args.sparql_endpoint.startswith("https://qlever"))
148
+ is_qlever = not args.sparql_endpoint or args.sparql_endpoint.startswith(
149
+ "https://qlever"
150
+ )
116
151
  if args.clear_cache == "yes" and not is_qlever:
117
152
  log.warning("Clearing the cache only works for QLever")
118
153
  args.clear_cache = "no"
119
154
 
120
155
  # Show what the command will do.
121
- get_queries_cmd = (args.get_queries_cmd if args.get_queries_cmd
122
- else f"curl -sv https://qlever.cs.uni-freiburg.de/"
123
- f"api/examples/{args.ui_config}")
156
+ get_queries_cmd = (
157
+ args.get_queries_cmd
158
+ if args.get_queries_cmd
159
+ else f"curl -sv https://qlever.cs.uni-freiburg.de/"
160
+ f"api/examples/{args.ui_config}"
161
+ )
124
162
  sed_arg = args.query_ids.replace(",", "p;").replace("-", ",") + "p"
125
163
  get_queries_cmd += f" | sed -n '{sed_arg}'"
126
164
  if args.query_regex:
127
165
  get_queries_cmd += f" | grep -Pi {shlex.quote(args.query_regex)}"
128
- sparql_endpoint = (args.sparql_endpoint if args.sparql_endpoint
129
- else f"localhost:{args.port}")
130
- self.show(f"Obtain queries via: {get_queries_cmd}\n"
131
- f"SPARQL endpoint: {sparql_endpoint}\n"
132
- f"Accept header: {args.accept}\n"
133
- f"Clear cache before each query:"
134
- f" {args.clear_cache.upper()}\n"
135
- f"Download result for each query or just count:"
136
- f" {args.download_or_count.upper()}" +
137
- (f" with LIMIT {args.limit}" if args.limit else ""),
138
- only_show=args.show)
166
+ sparql_endpoint = (
167
+ args.sparql_endpoint if args.sparql_endpoint else f"localhost:{args.port}"
168
+ )
169
+ self.show(
170
+ f"Obtain queries via: {get_queries_cmd}\n"
171
+ f"SPARQL endpoint: {sparql_endpoint}\n"
172
+ f"Accept header: {args.accept}\n"
173
+ f"Clear cache before each query:"
174
+ f" {args.clear_cache.upper()}\n"
175
+ f"Download result for each query or just count:"
176
+ f" {args.download_or_count.upper()}"
177
+ + (f" with LIMIT {args.limit}" if args.limit else ""),
178
+ only_show=args.show,
179
+ )
139
180
  if args.show:
140
181
  return False
141
182
 
142
183
  # Get the example queries.
143
184
  try:
144
- example_query_lines = run_command(get_queries_cmd,
145
- return_output=True)
185
+ example_query_lines = run_command(get_queries_cmd, return_output=True)
146
186
  if len(example_query_lines) == 0:
147
187
  log.error("No example queries matching the criteria found")
148
188
  return False
@@ -152,11 +192,11 @@ class ExampleQueriesCommand(QleverCommand):
152
192
  return False
153
193
 
154
194
  # Launch the queries one after the other and for each print: the
155
- # description, the result size, and the query processing time.
156
- total_time_seconds = 0.0
157
- total_result_size = 0
158
- count_succeeded = 0
159
- count_failed = 0
195
+ # description, the result size (number of rows), and the query
196
+ # processing time (seconds).
197
+ query_times = []
198
+ result_sizes = []
199
+ num_failed = 0
160
200
  for example_query_line in example_query_lines:
161
201
  # Parse description and query.
162
202
  description, query = example_query_line.split("\t")
@@ -176,13 +216,17 @@ class ExampleQueriesCommand(QleverCommand):
176
216
  # Remove OFFSET and LIMIT (after the last closing bracket).
177
217
  if args.remove_offset_and_limit or args.limit:
178
218
  closing_bracket_idx = query.rfind("}")
179
- regexes = [re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
180
- re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE)]
219
+ regexes = [
220
+ re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
221
+ re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE),
222
+ ]
181
223
  for regex in regexes:
182
224
  match = re.search(regex, query[closing_bracket_idx:])
183
225
  if match:
184
- query = query[:closing_bracket_idx + match.start()] + \
185
- query[closing_bracket_idx + match.end():]
226
+ query = (
227
+ query[: closing_bracket_idx + match.start()]
228
+ + query[closing_bracket_idx + match.end() :]
229
+ )
186
230
 
187
231
  # Limit query.
188
232
  if args.limit:
@@ -191,19 +235,29 @@ class ExampleQueriesCommand(QleverCommand):
191
235
  # Count query.
192
236
  if args.download_or_count == "count":
193
237
  # First find out if there is a FROM clause.
194
- regex_from_clause = re.compile(r"\s*FROM\s+<[^>]+>\s*",
195
- re.IGNORECASE)
238
+ regex_from_clause = re.compile(r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE)
196
239
  match_from_clause = re.search(regex_from_clause, query)
197
240
  from_clause = " "
198
241
  if match_from_clause:
199
242
  from_clause = match_from_clause.group(0)
200
- query = (query[:match_from_clause.start()] + " " +
201
- query[match_from_clause.end():])
243
+ query = (
244
+ query[: match_from_clause.start()]
245
+ + " "
246
+ + query[match_from_clause.end() :]
247
+ )
202
248
  # Now we can add the outer SELECT COUNT(*).
203
- query = re.sub(r"SELECT ",
204
- "SELECT (COUNT(*) AS ?qlever_count_)"
205
- + from_clause + "WHERE { SELECT ",
206
- query, count=1, flags=re.IGNORECASE) + " }"
249
+ query = (
250
+ re.sub(
251
+ r"SELECT ",
252
+ "SELECT (COUNT(*) AS ?qlever_count_)"
253
+ + from_clause
254
+ + "WHERE { SELECT ",
255
+ query,
256
+ count=1,
257
+ flags=re.IGNORECASE,
258
+ )
259
+ + " }"
260
+ )
207
261
 
208
262
  # A bit of pretty-printing.
209
263
  query = re.sub(r"\s+", " ", query)
@@ -211,21 +265,27 @@ class ExampleQueriesCommand(QleverCommand):
211
265
 
212
266
  # Launch query.
213
267
  try:
214
- curl_cmd = (f"curl -s {sparql_endpoint}"
215
- f" -w \"HTTP code: %{{http_code}}\\n\""
216
- f" -H \"Accept: {args.accept}\""
217
- f" --data-urlencode query={shlex.quote(query)}")
268
+ curl_cmd = (
269
+ f"curl -s {sparql_endpoint}"
270
+ f' -w "HTTP code: %{{http_code}}\\n"'
271
+ f' -H "Accept: {args.accept}"'
272
+ f" --data-urlencode query={shlex.quote(query)}"
273
+ )
218
274
  log.debug(curl_cmd)
219
- result_file = (f"qlever.example_queries.result."
220
- f"{abs(hash(curl_cmd))}.tmp")
275
+ result_file = (
276
+ f"qlever.example_queries.result." f"{abs(hash(curl_cmd))}.tmp"
277
+ )
221
278
  start_time = time.time()
222
- http_code = run_curl_command(sparql_endpoint,
223
- headers={"Accept": args.accept},
224
- params={"query": query},
225
- result_file=result_file).strip()
279
+ http_code = run_curl_command(
280
+ sparql_endpoint,
281
+ headers={"Accept": args.accept},
282
+ params={"query": query},
283
+ result_file=result_file,
284
+ ).strip()
226
285
  if http_code != "200":
227
- raise Exception(f"HTTP code {http_code}"
228
- f" {Path(result_file).read_text()}")
286
+ raise Exception(
287
+ f"HTTP code {http_code}" f" {Path(result_file).read_text()}"
288
+ )
229
289
  time_seconds = time.time() - start_time
230
290
  error_msg = None
231
291
  except Exception as e:
@@ -240,30 +300,34 @@ class ExampleQueriesCommand(QleverCommand):
240
300
  if args.download_or_count == "count":
241
301
  if args.accept == "text/tab-separated-values":
242
302
  result_size = run_command(
243
- f"sed 1d {result_file}",
244
- return_output=True)
303
+ f"sed 1d {result_file}", return_output=True
304
+ )
245
305
  else:
246
306
  result_size = run_command(
247
- f"jq -r \".results.bindings[0]"
248
- f" | to_entries[0].value.value"
249
- f" | tonumber\" {result_file}",
250
- return_output=True)
307
+ f'jq -r ".results.bindings[0]'
308
+ f" | to_entries[0].value.value"
309
+ f' | tonumber" {result_file}',
310
+ return_output=True,
311
+ )
251
312
  else:
252
- if (args.accept == "text/tab-separated-values"
253
- or args.accept == "text/csv"):
313
+ if (
314
+ args.accept == "text/tab-separated-values"
315
+ or args.accept == "text/csv"
316
+ ):
254
317
  result_size = run_command(
255
- f"sed 1d {result_file} | wc -l",
256
- return_output=True)
318
+ f"sed 1d {result_file} | wc -l", return_output=True
319
+ )
257
320
  elif args.accept == "text/turtle":
258
321
  result_size = run_command(
259
- f"sed '1d;/^@prefix/d;/^\\s*$/d' "
260
- f"{result_file} | wc -l",
261
- return_output=True)
322
+ f"sed '1d;/^@prefix/d;/^\\s*$/d' "
323
+ f"{result_file} | wc -l",
324
+ return_output=True,
325
+ )
262
326
  else:
263
327
  result_size = run_command(
264
- f"jq -r \".results.bindings | length\""
265
- f" {result_file}",
266
- return_output=True)
328
+ f'jq -r ".results.bindings | length"' f" {result_file}",
329
+ return_output=True,
330
+ )
267
331
  result_size = int(result_size)
268
332
  except Exception as e:
269
333
  error_msg = str(e)
@@ -274,43 +338,79 @@ class ExampleQueriesCommand(QleverCommand):
274
338
 
275
339
  # Print description, time, result in tabular form.
276
340
  if len(description) > args.width_query_description:
277
- description = description[:args.width_query_description - 3]
341
+ description = description[: args.width_query_description - 3]
278
342
  description += "..."
279
343
  if error_msg is None:
280
- log.info(f"{description:<{args.width_query_description}} "
281
- f"{time_seconds:6.2f} s "
282
- f"{result_size:>{args.width_result_size},}")
283
- count_succeeded += 1
284
- total_time_seconds += time_seconds
285
- total_result_size += result_size
344
+ log.info(
345
+ f"{description:<{args.width_query_description}} "
346
+ f"{time_seconds:6.2f} s "
347
+ f"{result_size:>{args.width_result_size},}"
348
+ )
349
+ query_times.append(time_seconds)
350
+ result_sizes.append(result_size)
286
351
  else:
287
- count_failed += 1
288
- if (args.width_error_message > 0
289
- and len(error_msg) > args.width_error_message
290
- and args.log_level != "DEBUG"):
291
- error_msg = error_msg[:args.width_error_message - 3]
352
+ num_failed += 1
353
+ if (
354
+ args.width_error_message > 0
355
+ and len(error_msg) > args.width_error_message
356
+ and args.log_level != "DEBUG"
357
+ ):
358
+ error_msg = error_msg[: args.width_error_message - 3]
292
359
  error_msg += "..."
293
- log.error(f"{description:<{args.width_query_description}} "
294
- f"failed "
295
- f"{colored(error_msg, 'red')}")
360
+ log.error(
361
+ f"{description:<{args.width_query_description}} "
362
+ f"failed "
363
+ f"{colored(error_msg, 'red')}"
364
+ )
296
365
 
297
- # Print total time.
298
- log.info("")
299
- if count_succeeded > 0:
300
- query_or_queries = "query" if count_succeeded == 1 else "queries"
301
- description = (f"TOTAL for {count_succeeded} {query_or_queries}")
302
- log.info(f"{description:<{args.width_query_description}} "
303
- f"{total_time_seconds:6.2f} s "
304
- f"{total_result_size:>14,}")
305
- description = (f"AVERAGE for {count_succeeded} {query_or_queries}")
306
- log.info(f"{description:<{args.width_query_description}} "
307
- f"{total_time_seconds / count_succeeded:6.2f} s "
308
- f"{round(total_result_size / count_succeeded):>14,}")
309
- else:
310
- if count_failed == 1:
311
- log.info(colored("One query failed", "red"))
312
- elif count_failed > 1:
313
- log.info(colored("All queries failed", "red"))
366
+ # Check that each query has a time and a result size, or it failed.
367
+ assert len(result_sizes) == len(query_times)
368
+ assert len(query_times) + num_failed == len(example_query_lines)
369
+
370
+ # Show statistics.
371
+ if len(query_times) > 0:
372
+ n = len(query_times)
373
+ total_query_time = sum(query_times)
374
+ average_query_time = total_query_time / n
375
+ median_query_time = sorted(query_times)[n // 2]
376
+ total_result_size = sum(result_sizes)
377
+ average_result_size = round(total_result_size / n)
378
+ median_result_size = sorted(result_sizes)[n // 2]
379
+ query_or_queries = "query" if n == 1 else "queries"
380
+ description = f"TOTAL for {n} {query_or_queries}"
381
+ log.info("")
382
+ log.info(
383
+ f"{description:<{args.width_query_description}} "
384
+ f"{total_query_time:6.2f} s "
385
+ f"{total_result_size:>14,}"
386
+ )
387
+ description = f"AVERAGE for {n} {query_or_queries}"
388
+ log.info(
389
+ f"{description:<{args.width_query_description}} "
390
+ f"{average_query_time:6.2f} s "
391
+ f"{average_result_size:>14,}"
392
+ )
393
+ description = f"MEDIAN for {n} {query_or_queries}"
394
+ log.info(
395
+ f"{description:<{args.width_query_description}} "
396
+ f"{median_query_time:6.2f} s "
397
+ f"{median_result_size:>14,}"
398
+ )
399
+
400
+ # Show number of failed queries.
401
+ if num_failed > 0:
402
+ log.info("")
403
+ description = "Number of FAILED queries"
404
+ num_failed_string = f"{num_failed:>6}"
405
+ if num_failed == len(example_query_lines):
406
+ num_failed_string += " [all]"
407
+ log.info(
408
+ colored(
409
+ f"{description:<{args.width_query_description}} "
410
+ f"{num_failed:>24}",
411
+ "red",
412
+ )
413
+ )
314
414
 
315
415
  # Return success (has nothing to do with how many queries failed).
316
416
  return True
qlever/commands/index.py CHANGED
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import glob
4
+ import json
4
5
  import shlex
5
6
 
6
7
  from qlever.command import QleverCommand
@@ -26,8 +27,8 @@ class IndexCommand(QleverCommand):
26
27
 
27
28
  def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
28
29
  return {"data": ["name", "format"],
29
- "index": ["input_files", "cat_input_files", "settings_json",
30
- "index_binary",
30
+ "index": ["input_files", "cat_input_files", "multi_input_json",
31
+ "parallel_parsing", "settings_json", "index_binary",
31
32
  "only_pso_and_pos_permutations", "use_patterns",
32
33
  "text_index", "stxxl_memory"],
33
34
  "runtime": ["system", "image", "index_container"]}
@@ -38,12 +39,100 @@ class IndexCommand(QleverCommand):
38
39
  default=False,
39
40
  help="Overwrite an existing index, think twice before using.")
40
41
 
42
+ # Exception for invalid JSON.
43
+ class InvalidInputJson(Exception):
44
+ def __init__(self, error_message, additional_info):
45
+ self.error_message = error_message
46
+ self.additional_info = additional_info
47
+ super().__init__()
48
+
49
+ # Helper function to get command line options from JSON.
50
+ def get_input_options_for_json(self, args) -> str:
51
+ # Parse the JSON.
52
+ try:
53
+ input_specs = json.loads(args.multi_input_json)
54
+ except Exception as e:
55
+ raise self.InvalidInputJson(
56
+ f"Failed to parse `MULTI_INPUT_JSON` ({e})",
57
+ args.multi_input_json)
58
+ # Check that it is an array of length at least one.
59
+ if not isinstance(input_specs, list):
60
+ raise self.InvalidInputJson(
61
+ "`MULTI_INPUT_JSON` must be a JSON array",
62
+ args.multi_input_json)
63
+ if len(input_specs) == 0:
64
+ raise self.InvalidInputJson(
65
+ "`MULTI_INPUT_JSON` must contain at least one element",
66
+ args.multi_input_json)
67
+ # For each of the maps, construct the corresponding command-line
68
+ # options to the index binary.
69
+ input_options = []
70
+ for i, input_spec in enumerate(input_specs):
71
+ # Check that `input_spec` is a dictionary.
72
+ if not isinstance(input_spec, dict):
73
+ raise self.InvalidInputJson(
74
+ f"Element {i} in `MULTI_INPUT_JSON` must be a JSON "
75
+ "object",
76
+ input_spec)
77
+ # For each `input_spec`, we must have a command.
78
+ if "cmd" not in input_spec:
79
+ raise self.InvalidInputJson(
80
+ f"Element {i} in `MULTI_INPUT_JSON` must contain a "
81
+ "key `cmd`",
82
+ input_spec)
83
+ input_cmd = input_spec["cmd"]
84
+ # The `format`, `graph`, and `parallel` keys are optional.
85
+ input_format = input_spec.get("format", args.format)
86
+ input_graph = input_spec.get("graph", "-")
87
+ input_parallel = input_spec.get("parallel", "false")
88
+ # There must not be any other keys.
89
+ extra_keys = input_spec.keys() - {"cmd", "format", "graph", "parallel"}
90
+ if extra_keys:
91
+ raise self.InvalidInputJson(
92
+ f"Element {i} in `MULTI_INPUT_JSON` must only contain "
93
+ "the keys `format`, `graph`, and `parallel`. Contains "
94
+ "extra keys {extra_keys}.",
95
+ input_spec)
96
+ # Add the command-line options for this input stream. We use
97
+ # process substitution `<(...)` as a convenient way to handle
98
+ # an input stream just like a file. This is not POSIX compliant,
99
+ # but supported by various shells, including bash and zsh.
100
+ input_options.append(
101
+ f"-f <({input_cmd}) -F {input_format} "
102
+ f"-g \"{input_graph}\" -p {input_parallel}")
103
+ # Return the concatenated command-line options.
104
+ return " ".join(input_options)
105
+
41
106
  def execute(self, args) -> bool:
42
- # Construct the command line.
43
- index_cmd = (f"{args.cat_input_files} | {args.index_binary}"
44
- f" -F {args.format} -f -"
45
- f" -i {args.name}"
46
- f" -s {args.name}.settings.json")
107
+ # The mandatory part of the command line (specifying the input, the
108
+ # basename of the index, and the settings file). There are two ways
109
+ # to specify the input: via a single stream or via multiple streams.
110
+ if args.cat_input_files and not args.multi_input_json:
111
+ index_cmd = (f"{args.cat_input_files} | {args.index_binary}"
112
+ f" -i {args.name} -s {args.name}.settings.json"
113
+ f" -F {args.format} -f -")
114
+ if args.parallel_parsing:
115
+ index_cmd += (f" -p {args.parallel_parsing}")
116
+ elif args.multi_input_json and not args.cat_input_files:
117
+ try:
118
+ input_options = self.get_input_options_for_json(args)
119
+ except self.InvalidInputJson as e:
120
+ log.error(e.error_message)
121
+ log.info("")
122
+ log.info(e.additional_info)
123
+ return False
124
+ index_cmd = (f"{args.index_binary}"
125
+ f" -i {args.name} -s {args.name}.settings.json"
126
+ f" {input_options}")
127
+ else:
128
+ log.error("Specify exactly one of `CAT_INPUT_FILES` (for a "
129
+ "single input stream) or `MULTI_INPUT_JSON` (for "
130
+ "multiple input streams)")
131
+ log.info("")
132
+ log.info("See `qlever index --help` for more information")
133
+ return False
134
+
135
+ # Add remaining options.
47
136
  if args.only_pso_and_pos_permutations:
48
137
  index_cmd += " --only-pso-and-pos-permutations --no-patterns"
49
138
  if not args.use_patterns:
@@ -120,7 +209,8 @@ class IndexCommand(QleverCommand):
120
209
  if args.system in Containerize.supported_systems() \
121
210
  and args.overwrite_existing:
122
211
  if Containerize.is_running(args.system, args.index_container):
123
- log.info("Another index process is running, trying to stop it ...")
212
+ log.info("Another index process is running, trying to stop "
213
+ "it ...")
124
214
  log.info("")
125
215
  try:
126
216
  run_command(f"{args.system} rm -f {args.index_container}")