PyPI - qlever - Versions diffs - 0.5.8__py3-none-any.whl → 0.5.10__py3-none-any.whl - Mend

qlever 0.5.8py3-none-any.whl → 0.5.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qlever might be problematic. Click here for more details.

Files changed (26) hide show

qlever/Qleverfiles/Qleverfile.dblp +13 -9
qlever/Qleverfiles/Qleverfile.dblp-plus +2 -2
qlever/Qleverfiles/Qleverfile.default +1 -1
qlever/Qleverfiles/Qleverfile.fbeasy +4 -4
qlever/Qleverfiles/Qleverfile.freebase +2 -2
qlever/Qleverfiles/Qleverfile.imdb +1 -1
qlever/Qleverfiles/Qleverfile.orkg +30 -0
qlever/Qleverfiles/Qleverfile.osm-planet +1 -1
qlever/Qleverfiles/Qleverfile.vvz +3 -3
qlever/Qleverfiles/Qleverfile.wikidata +28 -17
qlever/Qleverfiles/Qleverfile.yago-4 +4 -4
qlever/commands/example_queries.py +250 -150
qlever/commands/index.py +98 -8
qlever/commands/setup_config.py +47 -31
qlever/commands/system_info.py +126 -0
qlever/commands/ui.py +50 -23
qlever/containerize.py +67 -33
qlever/qleverfile.py +243 -125
qlever/util.py +55 -30
{qlever-0.5.8.dist-info → qlever-0.5.10.dist-info}/METADATA +1 -1
{qlever-0.5.8.dist-info → qlever-0.5.10.dist-info}/RECORD +25 -24
{qlever-0.5.8.dist-info → qlever-0.5.10.dist-info}/WHEEL +1 -1
qlever/__main__.py +0 -1476
{qlever-0.5.8.dist-info → qlever-0.5.10.dist-info}/LICENSE +0 -0
{qlever-0.5.8.dist-info → qlever-0.5.10.dist-info}/entry_points.txt +0 -0
{qlever-0.5.8.dist-info → qlever-0.5.10.dist-info}/top_level.txt +0 -0

qlever/commands/example_queries.py CHANGED Viewed

@@ -22,67 +22,97 @@ class ExampleQueriesCommand(QleverCommand):
     def __init__(self):
         self.presets = {
-                "virtuoso-wikidata":
-                "https://wikidata.demo.openlinksw.com/sparql",
-                "qlever-wikidata":
-                "https://qlever.cs.uni-freiburg.de/api/wikidata"
-                }
+            "virtuoso-wikidata": "https://wikidata.demo.openlinksw.com/sparql",
+            "qlever-wikidata": "https://qlever.cs.uni-freiburg.de/api/wikidata",
+        }
     def description(self) -> str:
-        return ("Show how much of the cache is currently being used")
+        return "Show how much of the cache is currently being used"
     def should_have_qleverfile(self) -> bool:
         return False
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
         return {"server": ["port"], "ui": ["ui_config"]}
     def additional_arguments(self, subparser) -> None:
-        subparser.add_argument("--sparql-endpoint", type=str,
-                               help="URL of the SPARQL endpoint")
-        subparser.add_argument("--sparql-endpoint-preset",
-                               choices=self.presets.keys(),
-                               help="Shortcut for setting the SPARQL endpoint")
-        subparser.add_argument("--get-queries-cmd", type=str,
-                               help="Command to get example queries as TSV "
-                               "(description, query)")
-        subparser.add_argument("--query-ids", type=str,
-                               default="1-$",
-                               help="Query IDs as comma-separated list of "
-                               "ranges (e.g., 1-5,7,12-$)")
-        subparser.add_argument("--query-regex", type=str,
-                               help="Only consider example queries matching "
-                               "this regex (using grep -Pi)")
-        subparser.add_argument("--download-or-count",
-                               choices=["download", "count"], default="count",
-                               help="Whether to download the full result "
-                               "or just compute the size of the result")
-        subparser.add_argument("--limit", type=int,
-                               help="Limit on the number of results")
-        subparser.add_argument("--remove-offset-and-limit",
-                               action="store_true", default=False,
-                               help="Remove OFFSET and LIMIT from the query")
-        subparser.add_argument("--accept", type=str,
-                               choices=["text/tab-separated-values",
-                                        "text/csv",
-                                        "application/sparql-results+json",
-                                        "text/turtle"],
-                               default="text/tab-separated-values",
-                               help="Accept header for the SPARQL query")
-        subparser.add_argument("--clear-cache",
-                               choices=["yes", "no"],
-                               default="yes",
-                               help="Clear the cache before each query")
-        subparser.add_argument("--width-query-description", type=int,
-                               default=40,
-                               help="Width for printing the query description")
-        subparser.add_argument("--width-error-message", type=int,
-                               default=80,
-                               help="Width for printing the error message "
-                               "(0 = no limit)")
-        subparser.add_argument("--width-result-size", type=int,
-                               default=14,
-                               help="Width for printing the result size")
+        subparser.add_argument(
+            "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
+        )
+        subparser.add_argument(
+            "--sparql-endpoint-preset",
+            choices=self.presets.keys(),
+            help="Shortcut for setting the SPARQL endpoint",
+        )
+        subparser.add_argument(
+            "--get-queries-cmd",
+            type=str,
+            help="Command to get example queries as TSV " "(description, query)",
+        )
+        subparser.add_argument(
+            "--query-ids",
+            type=str,
+            default="1-$",
+            help="Query IDs as comma-separated list of " "ranges (e.g., 1-5,7,12-$)",
+        )
+        subparser.add_argument(
+            "--query-regex",
+            type=str,
+            help="Only consider example queries matching "
+            "this regex (using grep -Pi)",
+        )
+        subparser.add_argument(
+            "--download-or-count",
+            choices=["download", "count"],
+            default="count",
+            help="Whether to download the full result "
+            "or just compute the size of the result",
+        )
+        subparser.add_argument(
+            "--limit", type=int, help="Limit on the number of results"
+        )
+        subparser.add_argument(
+            "--remove-offset-and-limit",
+            action="store_true",
+            default=False,
+            help="Remove OFFSET and LIMIT from the query",
+        )
+        subparser.add_argument(
+            "--accept",
+            type=str,
+            choices=[
+                "text/tab-separated-values",
+                "text/csv",
+                "application/sparql-results+json",
+                "text/turtle",
+            ],
+            default="text/tab-separated-values",
+            help="Accept header for the SPARQL query",
+        )
+        subparser.add_argument(
+            "--clear-cache",
+            choices=["yes", "no"],
+            default="yes",
+            help="Clear the cache before each query",
+        )
+        subparser.add_argument(
+            "--width-query-description",
+            type=int,
+            default=40,
+            help="Width for printing the query description",
+        )
+        subparser.add_argument(
+            "--width-error-message",
+            type=int,
+            default=80,
+            help="Width for printing the error message " "(0 = no limit)",
+        )
+        subparser.add_argument(
+            "--width-result-size",
+            type=int,
+            default=14,
+            help="Width for printing the result size",
+        )
     def execute(self, args) -> bool:
         # We can't have both `--remove-offset-and-limit` and `--limit`.
@@ -93,9 +123,13 @@ class ExampleQueriesCommand(QleverCommand):
         # If `args.accept` is `application/sparql-results+json`, we need `jq`.
         if args.accept == "application/sparql-results+json":
             try:
-                subprocess.run("jq --version", shell=True, check=True,
-                               stdout=subprocess.DEVNULL,
-                               stderr=subprocess.DEVNULL)
+                subprocess.run(
+                    "jq --version",
+                    shell=True,
+                    check=True,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                )
             except Exception as e:
                 log.error(f"Please install `jq` for {args.accept} ({e})")
                 return False
@@ -111,38 +145,44 @@ class ExampleQueriesCommand(QleverCommand):
             return False
         # Clear cache only works for QLever.
-        is_qlever = (not args.sparql_endpoint
-                     or args.sparql_endpoint.startswith("https://qlever"))
+        is_qlever = not args.sparql_endpoint or args.sparql_endpoint.startswith(
+            "https://qlever"
+        )
         if args.clear_cache == "yes" and not is_qlever:
             log.warning("Clearing the cache only works for QLever")
             args.clear_cache = "no"
         # Show what the command will do.
-        get_queries_cmd = (args.get_queries_cmd if args.get_queries_cmd
-                           else f"curl -sv https://qlever.cs.uni-freiburg.de/"
-                                f"api/examples/{args.ui_config}")
+        get_queries_cmd = (
+            args.get_queries_cmd
+            if args.get_queries_cmd
+            else f"curl -sv https://qlever.cs.uni-freiburg.de/"
+            f"api/examples/{args.ui_config}"
+        )
         sed_arg = args.query_ids.replace(",", "p;").replace("-", ",") + "p"
         get_queries_cmd += f" | sed -n '{sed_arg}'"
         if args.query_regex:
             get_queries_cmd += f" | grep -Pi {shlex.quote(args.query_regex)}"
-        sparql_endpoint = (args.sparql_endpoint if args.sparql_endpoint
-                           else f"localhost:{args.port}")
-        self.show(f"Obtain queries via: {get_queries_cmd}\n"
-                  f"SPARQL endpoint: {sparql_endpoint}\n"
-                  f"Accept header: {args.accept}\n"
-                  f"Clear cache before each query:"
-                  f" {args.clear_cache.upper()}\n"
-                  f"Download result for each query or just count:"
-                  f" {args.download_or_count.upper()}" +
-                  (f" with LIMIT {args.limit}" if args.limit else ""),
-                  only_show=args.show)
+        sparql_endpoint = (
+            args.sparql_endpoint if args.sparql_endpoint else f"localhost:{args.port}"
+        )
+        self.show(
+            f"Obtain queries via: {get_queries_cmd}\n"
+            f"SPARQL endpoint: {sparql_endpoint}\n"
+            f"Accept header: {args.accept}\n"
+            f"Clear cache before each query:"
+            f" {args.clear_cache.upper()}\n"
+            f"Download result for each query or just count:"
+            f" {args.download_or_count.upper()}"
+            + (f" with LIMIT {args.limit}" if args.limit else ""),
+            only_show=args.show,
+        )
         if args.show:
             return False
         # Get the example queries.
         try:
-            example_query_lines = run_command(get_queries_cmd,
-                                              return_output=True)
+            example_query_lines = run_command(get_queries_cmd, return_output=True)
             if len(example_query_lines) == 0:
                 log.error("No example queries matching the criteria found")
                 return False
@@ -152,11 +192,11 @@ class ExampleQueriesCommand(QleverCommand):
             return False
         # Launch the queries one after the other and for each print: the
-        # description, the result size, and the query processing time.
-        total_time_seconds = 0.0
-        total_result_size = 0
-        count_succeeded = 0
-        count_failed = 0
+        # description, the result size (number of rows), and the query
+        # processing time (seconds).
+        query_times = []
+        result_sizes = []
+        num_failed = 0
         for example_query_line in example_query_lines:
             # Parse description and query.
             description, query = example_query_line.split("\t")
@@ -176,13 +216,17 @@ class ExampleQueriesCommand(QleverCommand):
             # Remove OFFSET and LIMIT (after the last closing bracket).
             if args.remove_offset_and_limit or args.limit:
                 closing_bracket_idx = query.rfind("}")
-                regexes = [re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
-                           re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE)]
+                regexes = [
+                    re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
+                    re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE),
+                ]
                 for regex in regexes:
                     match = re.search(regex, query[closing_bracket_idx:])
                     if match:
-                        query = query[:closing_bracket_idx + match.start()] + \
-                                query[closing_bracket_idx + match.end():]
+                        query = (
+                            query[: closing_bracket_idx + match.start()]
+                            + query[closing_bracket_idx + match.end() :]
+                        )
             # Limit query.
             if args.limit:
@@ -191,19 +235,29 @@ class ExampleQueriesCommand(QleverCommand):
             # Count query.
             if args.download_or_count == "count":
                 # First find out if there is a FROM clause.
-                regex_from_clause = re.compile(r"\s*FROM\s+<[^>]+>\s*",
-                                               re.IGNORECASE)
+                regex_from_clause = re.compile(r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE)
                 match_from_clause = re.search(regex_from_clause, query)
                 from_clause = " "
                 if match_from_clause:
                     from_clause = match_from_clause.group(0)
-                    query = (query[:match_from_clause.start()] + " " +
-                             query[match_from_clause.end():])
+                    query = (
+                        query[: match_from_clause.start()]
+                        + " "
+                        + query[match_from_clause.end() :]
+                    )
                 # Now we can add the outer SELECT COUNT(*).
-                query = re.sub(r"SELECT ",
-                               "SELECT (COUNT(*) AS ?qlever_count_)"
-                               + from_clause + "WHERE { SELECT ",
-                               query, count=1, flags=re.IGNORECASE) + " }"
+                query = (
+                    re.sub(
+                        r"SELECT ",
+                        "SELECT (COUNT(*) AS ?qlever_count_)"
+                        + from_clause
+                        + "WHERE { SELECT ",
+                        query,
+                        count=1,
+                        flags=re.IGNORECASE,
+                    )
+                    + " }"
+                )
             # A bit of pretty-printing.
             query = re.sub(r"\s+", " ", query)
@@ -211,21 +265,27 @@ class ExampleQueriesCommand(QleverCommand):
             # Launch query.
             try:
-                curl_cmd = (f"curl -s {sparql_endpoint}"
-                            f" -w \"HTTP code: %{{http_code}}\\n\""
-                            f" -H \"Accept: {args.accept}\""
-                            f" --data-urlencode query={shlex.quote(query)}")
+                curl_cmd = (
+                    f"curl -s {sparql_endpoint}"
+                    f' -w "HTTP code: %{{http_code}}\\n"'
+                    f' -H "Accept: {args.accept}"'
+                    f" --data-urlencode query={shlex.quote(query)}"
+                )
                 log.debug(curl_cmd)
-                result_file = (f"qlever.example_queries.result."
-                               f"{abs(hash(curl_cmd))}.tmp")
+                result_file = (
+                    f"qlever.example_queries.result." f"{abs(hash(curl_cmd))}.tmp"
+                )
                 start_time = time.time()
-                http_code = run_curl_command(sparql_endpoint,
-                                             headers={"Accept": args.accept},
-                                             params={"query": query},
-                                             result_file=result_file).strip()
+                http_code = run_curl_command(
+                    sparql_endpoint,
+                    headers={"Accept": args.accept},
+                    params={"query": query},
+                    result_file=result_file,
+                ).strip()
                 if http_code != "200":
-                    raise Exception(f"HTTP code {http_code}"
-                                    f"  {Path(result_file).read_text()}")
+                    raise Exception(
+                        f"HTTP code {http_code}" f"  {Path(result_file).read_text()}"
+                    )
                 time_seconds = time.time() - start_time
                 error_msg = None
             except Exception as e:
@@ -240,30 +300,34 @@ class ExampleQueriesCommand(QleverCommand):
                     if args.download_or_count == "count":
                         if args.accept == "text/tab-separated-values":
                             result_size = run_command(
-                                    f"sed 1d {result_file}",
-                                    return_output=True)
+                                f"sed 1d {result_file}", return_output=True
+                            )
                         else:
                             result_size = run_command(
-                                    f"jq -r \".results.bindings[0]"
-                                    f" | to_entries[0].value.value"
-                                    f" | tonumber\" {result_file}",
-                                    return_output=True)
+                                f'jq -r ".results.bindings[0]'
+                                f" | to_entries[0].value.value"
+                                f' | tonumber" {result_file}',
+                                return_output=True,
+                            )
                     else:
-                        if (args.accept == "text/tab-separated-values"
-                                or args.accept == "text/csv"):
+                        if (
+                            args.accept == "text/tab-separated-values"
+                            or args.accept == "text/csv"
+                        ):
                             result_size = run_command(
-                                    f"sed 1d {result_file} | wc -l",
-                                    return_output=True)
+                                f"sed 1d {result_file} | wc -l", return_output=True
+                            )
                         elif args.accept == "text/turtle":
                             result_size = run_command(
-                                    f"sed '1d;/^@prefix/d;/^\\s*$/d' "
-                                    f"{result_file} | wc -l",
-                                    return_output=True)
+                                f"sed '1d;/^@prefix/d;/^\\s*$/d' "
+                                f"{result_file} | wc -l",
+                                return_output=True,
+                            )
                         else:
                             result_size = run_command(
-                                    f"jq -r \".results.bindings | length\""
-                                    f" {result_file}",
-                                    return_output=True)
+                                f'jq -r ".results.bindings | length"' f" {result_file}",
+                                return_output=True,
+                            )
                     result_size = int(result_size)
                 except Exception as e:
                     error_msg = str(e)
@@ -274,43 +338,79 @@ class ExampleQueriesCommand(QleverCommand):
             # Print description, time, result in tabular form.
             if len(description) > args.width_query_description:
-                description = description[:args.width_query_description - 3]
+                description = description[: args.width_query_description - 3]
                 description += "..."
             if error_msg is None:
-                log.info(f"{description:<{args.width_query_description}}  "
-                         f"{time_seconds:6.2f} s  "
-                         f"{result_size:>{args.width_result_size},}")
-                count_succeeded += 1
-                total_time_seconds += time_seconds
-                total_result_size += result_size
+                log.info(
+                    f"{description:<{args.width_query_description}}  "
+                    f"{time_seconds:6.2f} s  "
+                    f"{result_size:>{args.width_result_size},}"
+                )
+                query_times.append(time_seconds)
+                result_sizes.append(result_size)
             else:
-                count_failed += 1
-                if (args.width_error_message > 0
-                        and len(error_msg) > args.width_error_message
-                        and args.log_level != "DEBUG"):
-                    error_msg = error_msg[:args.width_error_message - 3]
+                num_failed += 1
+                if (
+                    args.width_error_message > 0
+                    and len(error_msg) > args.width_error_message
+                    and args.log_level != "DEBUG"
+                ):
+                    error_msg = error_msg[: args.width_error_message - 3]
                     error_msg += "..."
-                log.error(f"{description:<{args.width_query_description}}    "
-                          f"failed   "
-                          f"{colored(error_msg, 'red')}")
+                log.error(
+                    f"{description:<{args.width_query_description}}    "
+                    f"failed   "
+                    f"{colored(error_msg, 'red')}"
+                )
-        # Print total time.
-        log.info("")
-        if count_succeeded > 0:
-            query_or_queries = "query" if count_succeeded == 1 else "queries"
-            description = (f"TOTAL   for {count_succeeded} {query_or_queries}")
-            log.info(f"{description:<{args.width_query_description}}  "
-                     f"{total_time_seconds:6.2f} s  "
-                     f"{total_result_size:>14,}")
-            description = (f"AVERAGE for {count_succeeded} {query_or_queries}")
-            log.info(f"{description:<{args.width_query_description}}  "
-                     f"{total_time_seconds / count_succeeded:6.2f} s  "
-                     f"{round(total_result_size / count_succeeded):>14,}")
-        else:
-            if count_failed == 1:
-                log.info(colored("One query failed", "red"))
-            elif count_failed > 1:
-                log.info(colored("All queries failed", "red"))
+        # Check that each query has a time and a result size, or it failed.
+        assert len(result_sizes) == len(query_times)
+        assert len(query_times) + num_failed == len(example_query_lines)
+        # Show statistics.
+        if len(query_times) > 0:
+            n = len(query_times)
+            total_query_time = sum(query_times)
+            average_query_time = total_query_time / n
+            median_query_time = sorted(query_times)[n // 2]
+            total_result_size = sum(result_sizes)
+            average_result_size = round(total_result_size / n)
+            median_result_size = sorted(result_sizes)[n // 2]
+            query_or_queries = "query" if n == 1 else "queries"
+            description = f"TOTAL   for {n} {query_or_queries}"
+            log.info("")
+            log.info(
+                f"{description:<{args.width_query_description}}  "
+                f"{total_query_time:6.2f} s  "
+                f"{total_result_size:>14,}"
+            )
+            description = f"AVERAGE for {n} {query_or_queries}"
+            log.info(
+                f"{description:<{args.width_query_description}}  "
+                f"{average_query_time:6.2f} s  "
+                f"{average_result_size:>14,}"
+            )
+            description = f"MEDIAN  for {n} {query_or_queries}"
+            log.info(
+                f"{description:<{args.width_query_description}}  "
+                f"{median_query_time:6.2f} s  "
+                f"{median_result_size:>14,}"
+            )
+        # Show number of failed queries.
+        if num_failed > 0:
+            log.info("")
+            description = "Number of FAILED queries"
+            num_failed_string = f"{num_failed:>6}"
+            if num_failed == len(example_query_lines):
+                num_failed_string += "  [all]"
+            log.info(
+                colored(
+                    f"{description:<{args.width_query_description}}  "
+                    f"{num_failed:>24}",
+                    "red",
+                )
+            )
         # Return success (has nothing to do with how many queries failed).
         return True

qlever/commands/index.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import glob
+import json
 import shlex
 from qlever.command import QleverCommand
@@ -26,8 +27,8 @@ class IndexCommand(QleverCommand):
     def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
         return {"data": ["name", "format"],
-                "index": ["input_files", "cat_input_files", "settings_json",
-                          "index_binary",
+                "index": ["input_files", "cat_input_files", "multi_input_json",
+                          "parallel_parsing", "settings_json", "index_binary",
                           "only_pso_and_pos_permutations", "use_patterns",
                           "text_index", "stxxl_memory"],
                 "runtime": ["system", "image", "index_container"]}
@@ -38,12 +39,100 @@ class IndexCommand(QleverCommand):
                 default=False,
                 help="Overwrite an existing index, think twice before using.")
+    # Exception for invalid JSON.
+    class InvalidInputJson(Exception):
+        def __init__(self, error_message, additional_info):
+            self.error_message = error_message
+            self.additional_info = additional_info
+            super().__init__()
+    # Helper function to get command line options from JSON.
+    def get_input_options_for_json(self, args) -> str:
+        # Parse the JSON.
+        try:
+            input_specs = json.loads(args.multi_input_json)
+        except Exception as e:
+            raise self.InvalidInputJson(
+                    f"Failed to parse `MULTI_INPUT_JSON` ({e})",
+                    args.multi_input_json)
+        # Check that it is an array of length at least one.
+        if not isinstance(input_specs, list):
+            raise self.InvalidInputJson(
+                    "`MULTI_INPUT_JSON` must be a JSON array",
+                    args.multi_input_json)
+        if len(input_specs) == 0:
+            raise self.InvalidInputJson(
+                    "`MULTI_INPUT_JSON` must contain at least one element",
+                    args.multi_input_json)
+        # For each of the maps, construct the corresponding command-line
+        # options to the index binary.
+        input_options = []
+        for i, input_spec in enumerate(input_specs):
+            # Check that `input_spec` is a dictionary.
+            if not isinstance(input_spec, dict):
+                raise self.InvalidInputJson(
+                        f"Element {i} in `MULTI_INPUT_JSON` must be a JSON "
+                        "object",
+                        input_spec)
+            # For each `input_spec`, we must have a command.
+            if "cmd" not in input_spec:
+                raise self.InvalidInputJson(
+                        f"Element {i} in `MULTI_INPUT_JSON` must contain a "
+                        "key `cmd`",
+                        input_spec)
+            input_cmd = input_spec["cmd"]
+            # The `format`, `graph`, and `parallel` keys are optional.
+            input_format = input_spec.get("format", args.format)
+            input_graph = input_spec.get("graph", "-")
+            input_parallel = input_spec.get("parallel", "false")
+            # There must not be any other keys.
+            extra_keys = input_spec.keys() - {"cmd", "format", "graph", "parallel"}
+            if extra_keys:
+                raise self.InvalidInputJson(
+                        f"Element {i} in `MULTI_INPUT_JSON` must only contain "
+                        "the keys `format`, `graph`, and `parallel`. Contains "
+                        "extra keys {extra_keys}.",
+                        input_spec)
+            # Add the command-line options for this input stream. We use
+            # process substitution `<(...)` as a convenient way to handle
+            # an input stream just like a file. This is not POSIX compliant,
+            # but supported by various shells, including bash and zsh.
+            input_options.append(
+                    f"-f <({input_cmd}) -F {input_format} "
+                    f"-g \"{input_graph}\" -p {input_parallel}")
+        # Return the concatenated command-line options.
+        return " ".join(input_options)
     def execute(self, args) -> bool:
-        # Construct the command line.
-        index_cmd = (f"{args.cat_input_files} | {args.index_binary}"
-                     f" -F {args.format} -f -"
-                     f" -i {args.name}"
-                     f" -s {args.name}.settings.json")
+        # The mandatory part of the command line (specifying the input, the
+        # basename of the index, and the settings file). There are two ways
+        # to specify the input: via a single stream or via multiple streams.
+        if args.cat_input_files and not args.multi_input_json:
+            index_cmd = (f"{args.cat_input_files} | {args.index_binary}"
+                         f" -i {args.name} -s {args.name}.settings.json"
+                         f" -F {args.format} -f -")
+            if args.parallel_parsing:
+                index_cmd += (f" -p {args.parallel_parsing}")
+        elif args.multi_input_json and not args.cat_input_files:
+            try:
+                input_options = self.get_input_options_for_json(args)
+            except self.InvalidInputJson as e:
+                log.error(e.error_message)
+                log.info("")
+                log.info(e.additional_info)
+                return False
+            index_cmd = (f"{args.index_binary}"
+                         f" -i {args.name} -s {args.name}.settings.json"
+                         f" {input_options}")
+        else:
+            log.error("Specify exactly one of `CAT_INPUT_FILES` (for a "
+                      "single input stream) or `MULTI_INPUT_JSON` (for "
+                      "multiple input streams)")
+            log.info("")
+            log.info("See `qlever index --help` for more information")
+            return False
+        # Add remaining options.
         if args.only_pso_and_pos_permutations:
             index_cmd += " --only-pso-and-pos-permutations --no-patterns"
         if not args.use_patterns:
@@ -120,7 +209,8 @@ class IndexCommand(QleverCommand):
         if args.system in Containerize.supported_systems() \
                 and args.overwrite_existing:
             if Containerize.is_running(args.system, args.index_container):
-                log.info("Another index process is running, trying to stop it ...")
+                log.info("Another index process is running, trying to stop "
+                         "it ...")
                 log.info("")
                 try:
                     run_command(f"{args.system} rm -f {args.index_container}")

qlever 0.5.8__py3-none-any.whl → 0.5.10__py3-none-any.whl

Potentially problematic release.

qlever 0.5.8py3-none-any.whl → 0.5.10py3-none-any.whl