PyPI - qlever - Versions diffs - 0.5.12__py3-none-any.whl → 0.5.17__py3-none-any.whl - Mend

qlever 0.5.12py3-none-any.whl → 0.5.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qlever might be problematic. Click here for more details.

Files changed (30) hide show

qlever/Qleverfiles/Qleverfile.dblp +1 -1
qlever/Qleverfiles/Qleverfile.pubchem +102 -26
qlever/Qleverfiles/Qleverfile.uniprot +48 -16
qlever/commands/add_text_index.py +2 -1
qlever/commands/cache_stats.py +1 -1
qlever/commands/clear_cache.py +4 -2
qlever/commands/example_queries.py +236 -75
qlever/commands/extract_queries.py +113 -0
qlever/commands/get_data.py +1 -1
qlever/commands/index.py +51 -11
qlever/commands/index_stats.py +90 -59
qlever/commands/log.py +12 -2
qlever/commands/query.py +66 -27
qlever/commands/settings.py +110 -0
qlever/commands/setup_config.py +1 -1
qlever/commands/start.py +222 -105
qlever/commands/status.py +2 -1
qlever/commands/stop.py +43 -32
qlever/commands/system_info.py +1 -1
qlever/commands/ui.py +3 -1
qlever/commands/warmup.py +1 -1
qlever/qlever_main.py +16 -9
qlever/util.py +34 -17
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/METADATA +2 -2
qlever-0.5.17.dist-info/RECORD +54 -0
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/WHEEL +1 -1
qlever-0.5.12.dist-info/RECORD +0 -52
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/LICENSE +0 -0
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/entry_points.txt +0 -0
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/top_level.txt +0 -0

qlever/commands/index.py CHANGED Viewed

@@ -99,28 +99,68 @@ class IndexCommand(QleverCommand):
                     f"Element {i} in `MULTI_INPUT_JSON` must contain a " "key `cmd`",
                     input_spec,
                 )
-            input_cmd = input_spec["cmd"]
+            # If the command contains a `{}` placeholder, we need a `for-each`
+            # key` specifying the pattern for the placeholder values, and vice
+            # versa.
+            if "{}" in input_spec["cmd"] and "for-each" not in input_spec:
+                raise self.InvalidInputJson(
+                    f"Element {i} in `MULTI_INPUT_JSON` must contain a "
+                    "key `for-each` if the command contains a placeholder "
+                    "`{}`",
+                    input_spec,
+                )
+            if "for-each" in input_spec and "{}" not in input_spec["cmd"]:
+                raise self.InvalidInputJson(
+                    f"Element {i} in `MULTI_INPUT_JSON` contains a "
+                    "key `for-each`, but the command does not contain a "
+                    "placeholder `{{}}`",
+                    input_spec,
+                )
+            # Get all commands. This is just the value of the `cmd` key if no
+            # `for-each` key is specified. Otherwise, we have a command for
+            # each file matching the pattern.
+            if "for-each" not in input_spec:
+                input_cmds = [input_spec["cmd"]]
+            else:
+                try:
+                    files = sorted(glob.glob(input_spec["for-each"]))
+                except Exception as e:
+                    raise self.InvalidInputJson(
+                        f"Element {i} in `MULTI_INPUT_JSON` contains an "
+                        f"invalid `for-each` pattern: {e}",
+                        input_spec,
+                    )
+                input_cmds = [input_spec["cmd"].format(file) for file in files]
             # The `format`, `graph`, and `parallel` keys are optional.
             input_format = input_spec.get("format", args.format)
             input_graph = input_spec.get("graph", "-")
             input_parallel = input_spec.get("parallel", "false")
             # There must not be any other keys.
-            extra_keys = input_spec.keys() - {"cmd", "format", "graph", "parallel"}
+            extra_keys = input_spec.keys() - {
+                "cmd",
+                "format",
+                "graph",
+                "parallel",
+                "for-each",
+            }
             if extra_keys:
                 raise self.InvalidInputJson(
                     f"Element {i} in `MULTI_INPUT_JSON` must only contain "
                     "the keys `format`, `graph`, and `parallel`. Contains "
-                    "extra keys {extra_keys}.",
+                    f"extra keys {extra_keys}.",
                     input_spec,
                 )
             # Add the command-line options for this input stream. We use
-            # process substitution `<(...)` as a convenient way to handle
-            # an input stream just like a file. This is not POSIX compliant,
-            # but supported by various shells, including bash and zsh.
-            input_options.append(
-                f"-f <({input_cmd}) -F {input_format} "
-                f'-g "{input_graph}" -p {input_parallel}'
-            )
+            # process substitution `<(...)` as a convenient way to handle an
+            # input stream just like a file. This is not POSIX compliant, but
+            # supported by various shells, including bash and zsh. If
+            # `for-each` is specified, add one command for each matching file.
+            for input_cmd in input_cmds:
+                input_option = f"-f <({input_cmd}) -g {input_graph}"
+                input_option += f" -F {input_format}"
+                if input_parallel == "true":
+                    input_option += " -p true"
+                input_options.append(input_option)
         # Return the concatenated command-line options.
         return " ".join(input_options)
@@ -200,7 +240,7 @@ class IndexCommand(QleverCommand):
         # Show the command line.
         self.show(f"{settings_json_cmd}\n{index_cmd}", only_show=args.show)
         if args.show:
-            return False
+            return True
         # When running natively, check if the binary exists and works.
         if args.system == "native":

qlever/commands/index_stats.py CHANGED Viewed

@@ -18,32 +18,45 @@ class IndexStatsCommand(QleverCommand):
         pass
     def description(self) -> str:
-        return ("Breakdown of the time and space used for the index build")
+        return "Breakdown of the time and space used for the index build"
     def should_have_qleverfile(self) -> bool:
         return False
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
         return {"data": ["name"]}
     def additional_arguments(self, subparser) -> None:
-        subparser.add_argument("--only-time", action="store_true",
-                               default=False,
-                               help="Show only the time used")
-        subparser.add_argument("--only-space", action="store_true",
-                               default=False,
-                               help="Show only the space used")
-        subparser.add_argument("--ignore-text-index", action="store_true",
-                               default=False,
-                               help="Ignore the text index")
-        subparser.add_argument("--time-unit",
-                               choices=["s", "min", "h", "auto"],
-                               default="auto",
-                               help="The time unit")
-        subparser.add_argument("--size-unit",
-                               choices=["B", "MB", "GB", "TB", "auto"],
-                               default="auto",
-                               help="The size unit")
+        subparser.add_argument(
+            "--only-time",
+            action="store_true",
+            default=False,
+            help="Show only the time used",
+        )
+        subparser.add_argument(
+            "--only-space",
+            action="store_true",
+            default=False,
+            help="Show only the space used",
+        )
+        subparser.add_argument(
+            "--ignore-text-index",
+            action="store_true",
+            default=False,
+            help="Ignore the text index",
+        )
+        subparser.add_argument(
+            "--time-unit",
+            choices=["s", "min", "h", "auto"],
+            default="auto",
+            help="The time unit",
+        )
+        subparser.add_argument(
+            "--size-unit",
+            choices=["B", "MB", "GB", "TB", "auto"],
+            default="auto",
+            help="The size unit",
+        )
     def execute_time(self, args, log_file_name) -> bool:
         """
@@ -65,8 +78,9 @@ class IndexStatsCommand(QleverCommand):
                 with open(text_log_file_name, "r") as text_log_file:
                     lines.extend(text_log_file.readlines())
         except Exception as e:
-            log.error(f"Problem reading text index log file "
-                      f"{text_log_file_name}: {e}")
+            log.error(
+                f"Problem reading text index log file " f"{text_log_file_name}: {e}"
+            )
             return False
         # Helper function that finds the next line matching the given `regex`,
@@ -95,12 +109,14 @@ class IndexStatsCommand(QleverCommand):
                 if regex_match:
                     try:
                         return datetime.strptime(
-                                re.match(timestamp_regex, line).group(),
-                                timestamp_format), regex_match
+                            re.match(timestamp_regex, line).group(), timestamp_format
+                        ), regex_match
                     except Exception as e:
-                        log.error(f"Could not parse timestamp of form "
-                                  f"\"{timestamp_regex}\" from line "
-                                  f" \"{line.rstrip()}\" ({e})")
+                        log.error(
+                            f"Could not parse timestamp of form "
+                            f'"{timestamp_regex}" from line '
+                            f' "{line.rstrip()}" ({e})'
+                        )
             # If we get here, we did not find a matching line.
             if not update_current_line:
                 current_line = current_line_backup
@@ -119,26 +135,32 @@ class IndexStatsCommand(QleverCommand):
             # file (old format: "Creating a pair" + names of permutations in
             # line "Writing meta data for ..."; new format: name of
             # permutations already in line "Creating permutations ...").
-            perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair",
-                                           update_current_line=False)
+            perm_begin, _ = find_next_line(
+                r"INFO:\s*Creating a pair", update_current_line=False
+            )
             if perm_begin is None:
                 perm_begin, perm_info = find_next_line(
                     r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)",
-                    update_current_line=False)
+                    update_current_line=False,
+                )
             else:
                 _, perm_info = find_next_line(
                     r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)",
-                    update_current_line=False)
+                    update_current_line=False,
+                )
             if perm_info is None:
                 break
             perm_begin_and_info.append((perm_begin, perm_info))
-        convert_end = (perm_begin_and_info[0][0] if
-                       len(perm_begin_and_info) > 0 else None)
+        convert_end = (
+            perm_begin_and_info[0][0] if len(perm_begin_and_info) > 0 else None
+        )
         normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
-        text_begin, _ = find_next_line(r"INFO:\s*Adding text index",
-                                       update_current_line=False)
-        text_end, _ = find_next_line(r"INFO:\s*Text index build comp",
-                                     update_current_line=False)
+        text_begin, _ = find_next_line(
+            r"INFO:\s*Adding text index", update_current_line=False
+        )
+        text_end, _ = find_next_line(
+            r"INFO:\s*Text index build comp", update_current_line=False
+        )
         if args.ignore_text_index:
             text_begin = text_end = None
@@ -147,9 +169,11 @@ class IndexStatsCommand(QleverCommand):
             log.error("Missing line that index build has started")
             return False
         if overall_begin and not merge_begin:
-            log.error("According to the log file, the index build "
-                      "has started, but is still in its first "
-                      "phase (parsing the input)")
+            log.error(
+                "According to the log file, the index build "
+                "has started, but is still in its first "
+                "phase (parsing the input)"
+            )
             return False
         # Helper function that shows the duration for a phase (if the start and
@@ -187,22 +211,24 @@ class IndexStatsCommand(QleverCommand):
         show_duration("Convert to global IDs", [(convert_begin, convert_end)])
         for i in range(len(perm_begin_and_info)):
             perm_begin, perm_info = perm_begin_and_info[i]
-            perm_end = perm_begin_and_info[i + 1][0] if i + 1 < len(
-                    perm_begin_and_info) else normal_end
-            perm_info_text = (perm_info.group(1).replace(" and ", " & ")
-                              if perm_info else f"#{i + 1}")
-            show_duration(f"Permutation {perm_info_text}",
-                          [(perm_begin, perm_end)])
+            perm_end = (
+                perm_begin_and_info[i + 1][0]
+                if i + 1 < len(perm_begin_and_info)
+                else normal_end
+            )
+            perm_info_text = (
+                perm_info.group(1).replace(" and ", " & ") if perm_info else f"#{i + 1}"
+            )
+            show_duration(f"Permutation {perm_info_text}", [(perm_begin, perm_end)])
         show_duration("Text index", [(text_begin, text_end)])
         if text_begin and text_end:
             log.info("")
-            show_duration("TOTAL time",
-                          [(overall_begin, normal_end),
-                           (text_begin, text_end)])
+            show_duration(
+                "TOTAL time", [(overall_begin, normal_end), (text_begin, text_end)]
+            )
         elif normal_end:
             log.info("")
-            show_duration("TOTAL time",
-                          [(overall_begin, normal_end)])
+            show_duration("TOTAL time", [(overall_begin, normal_end)])
         return True
     def execute_space(self, args) -> bool:
@@ -252,24 +278,29 @@ class IndexStatsCommand(QleverCommand):
         return True
     def execute(self, args) -> bool:
-        ret_value = args.show
+        return_value = True
         # The "time" part of the command.
         if not args.only_space:
             log_file_name = f"{args.name}.index-log.txt"
-            self.show(f"Breakdown of the time used for "
-                      f"building the index, based on the timestamps for key "
-                      f"lines in \"{log_file_name}\"", only_show=args.show)
+            self.show(
+                f"Breakdown of the time used for "
+                f"building the index, based on the timestamps for key "
+                f'lines in "{log_file_name}"',
+                only_show=args.show,
+            )
             if not args.show:
-                ret_value &= self.execute_time(args, log_file_name)
+                return_value &= self.execute_time(args, log_file_name)
             if not args.only_time:
                 log.info("")
         # The "space" part of the command.
         if not args.only_time:
-            self.show("Breakdown of the space used for building the index",
-                      only_show=args.show)
+            self.show(
+                "Breakdown of the space used for building the index",
+                only_show=args.show,
+            )
             if not args.show:
-                ret_value &= self.execute_space(args)
+                return_value &= self.execute_space(args)
-        return ret_value
+        return return_value

qlever/commands/log.py CHANGED Viewed

@@ -47,10 +47,20 @@ class LogCommand(QleverCommand):
         log_cmd += f" {log_file}"
         self.show(log_cmd, only_show=args.show)
         if args.show:
-            return False
+            return True
         # Execute the command.
         log.info(f"Follow log file {log_file}, press Ctrl-C to stop"
                  f" following (will not stop the server)")
         log.info("")
-        subprocess.run(log_cmd, shell=True)
+        try:
+            subprocess.run(log_cmd, shell=True)
+            return True
+        except Exception as e:
+            log.error(e)
+            return False

qlever/commands/query.py CHANGED Viewed

@@ -18,42 +18,82 @@ class QueryCommand(QleverCommand):
         pass
     def description(self) -> str:
-        return ("Send a query to a SPARQL endpoint")
+        return "Send a query to a SPARQL endpoint"
     def should_have_qleverfile(self) -> bool:
         return False
-    def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
-        return {"server": ["port"]}
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {"server": ["port", "access_token"]}
     def additional_arguments(self, subparser) -> None:
-        subparser.add_argument("--query", type=str,
-                               default="SELECT * WHERE { ?s ?p ?o } LIMIT 10",
-                               help="SPARQL query to send")
-        subparser.add_argument("--sparql-endpoint", type=str,
-                               help="URL of the SPARQL endpoint")
-        subparser.add_argument("--accept", type=str,
-                               choices=["text/tab-separated-values",
-                                        "text/csv",
-                                        "application/sparql-results+json",
-                                        "application/sparql-results+xml",
-                                        "application/qlever-results+json"],
-                               default="text/tab-separated-values",
-                               help="Accept header for the SPARQL query")
-        subparser.add_argument("--no-time", action="store_true",
-                               default=False,
-                               help="Do not print the (end-to-end) time taken")
+        subparser.add_argument(
+            "query",
+            type=str,
+            nargs="?",
+            default="SELECT * WHERE { ?s ?p ?o } LIMIT 10",
+            help="SPARQL query to send",
+        )
+        subparser.add_argument(
+            "--pin-to-cache",
+            action="store_true",
+            default=False,
+            help="Pin the query to the cache",
+        )
+        subparser.add_argument(
+            "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
+        )
+        subparser.add_argument(
+            "--accept",
+            type=str,
+            choices=[
+                "text/tab-separated-values",
+                "text/csv",
+                "application/sparql-results+json",
+                "application/sparql-results+xml",
+                "application/qlever-results+json",
+            ],
+            default="text/tab-separated-values",
+            help="Accept header for the SPARQL query",
+        )
+        subparser.add_argument(
+            "--no-time",
+            action="store_true",
+            default=False,
+            help="Do not print the (end-to-end) time taken",
+        )
     def execute(self, args) -> bool:
+        # When pinning to the cache, set `send=0` and request media type
+        # `application/qlever-results+json` so that we get the result size.
+        # Also, we need to provide the access token.
+        if args.pin_to_cache:
+            args.accept = "application/qlever-results+json"
+            curl_cmd_additions = (
+                f" --data pinresult=true --data send=0"
+                f" --data access-token="
+                f"{shlex.quote(args.access_token)}"
+                f" | jq .resultsize | numfmt --grouping"
+                f" | xargs -I {{}} printf"
+                f' "Result pinned to cache,'
+                f' number of rows: {{}}\\n"'
+            )
+        else:
+            curl_cmd_additions = ""
         # Show what the command will do.
-        sparql_endpoint = (args.sparql_endpoint if args.sparql_endpoint
-                           else f"localhost:{args.port}")
-        curl_cmd = (f"curl -s {sparql_endpoint}"
-                    f" -H \"Accept: {args.accept}\""
-                    f" --data-urlencode query={shlex.quote(args.query)}")
+        sparql_endpoint = (
+            args.sparql_endpoint if args.sparql_endpoint else f"localhost:{args.port}"
+        )
+        curl_cmd = (
+            f"curl -s {sparql_endpoint}"
+            f' -H "Accept: {args.accept}"'
+            f" --data-urlencode query={shlex.quote(args.query)}"
+            f"{curl_cmd_additions}"
+        )
         self.show(curl_cmd, only_show=args.show)
         if args.show:
-            return False
+            return True
         # Launch query.
         try:
@@ -62,8 +102,7 @@ class QueryCommand(QleverCommand):
             time_msecs = round(1000 * (time.time() - start_time))
             if not args.no_time and args.log_level != "NO_LOG":
                 log.info("")
-                log.info(f"Query processing time (end-to-end):"
-                         f" {time_msecs:,d} ms")
+                log.info(f"Query processing time (end-to-end):" f" {time_msecs:,d} ms")
         except Exception as e:
             if args.log_level == "DEBUG":
                 traceback.print_exc()

qlever/commands/settings.py ADDED Viewed

@@ -0,0 +1,110 @@
+from __future__ import annotations
+import json
+from termcolor import colored
+from qlever.command import QleverCommand
+from qlever.log import log
+from qlever.util import run_command
+class SettingsCommand(QleverCommand):
+    """
+    Class for executing the `settings` command.
+    """
+    def __init__(self):
+        pass
+    def description(self) -> str:
+        return "Show or set server settings (after `qlever start`)"
+    def should_have_qleverfile(self) -> bool:
+        return True
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {"server": ["port", "host_name", "access_token"]}
+    def additional_arguments(self, subparser) -> None:
+        all_keys = [
+            "always-multiply-unions",
+            "cache-max-num-entries",
+            "cache-max-size",
+            "cache-max-size-single-entry",
+            "default-query-timeout",
+            "group-by-disable-index-scan-optimizations",
+            "group-by-hash-map-enabled",
+            "lazy-index-scan-max-size-materialization",
+            "lazy-index-scan-num-threads",
+            "lazy-index-scan-queue-size",
+            "lazy-result-max-cache-size",
+            "query-planning-budget",
+            "service-max-value-rows",
+            "sort-estimate-cancellation-factor",
+            "throw-on-unbound-variables",
+            "use-binsearch-transitive-path",
+        ]
+        subparser.add_argument(
+            "runtime_parameter",
+            nargs="?",
+            help="Set the given runtime parameter (key=value)"
+            "; if no argument is given, show all settings",
+        ).completer = lambda **kwargs: [f"{key}=" for key in all_keys]
+        subparser.add_argument(
+            "--endpoint_url",
+            type=str,
+            help="An arbitrary endpoint URL "
+            "(overriding the one in the Qleverfile)",
+        )
+    def execute(self, args) -> bool:
+        # Get endpoint URL from command line or Qleverfile.
+        if args.endpoint_url:
+            endpoint_url = args.endpoint_url
+        else:
+            endpoint_url = f"http://{args.host_name}:{args.port}"
+        # Construct the `curl` command for getting or setting.
+        if args.runtime_parameter:
+            try:
+                parameter_key, parameter_value = args.runtime_parameter.split(
+                    "="
+                )
+            except ValueError:
+                log.error("Runtime parameter must be given as `key=value`")
+                return False
+            curl_cmd = (
+                f"curl -s {endpoint_url}"
+                f' --data-urlencode "{parameter_key}={parameter_value}"'
+                f' --data-urlencode "access-token={args.access_token}"'
+            )
+        else:
+            curl_cmd = (
+                f"curl -s {endpoint_url}" f" --data-urlencode cmd=get-settings"
+            )
+            parameter_key, parameter_value = None, None
+        self.show(curl_cmd, only_show=args.show)
+        if args.show:
+            return True
+        # Execute the `curl` command. Note that the `get-settings` command
+        # returns all settings in both scencarios (that is, also when setting a
+        # parameter).
+        try:
+            settings_json = run_command(curl_cmd, return_output=True)
+            settings_dict = json.loads(settings_json)
+        except Exception as e:
+            log.error(f"setting command failed: {e}")
+            return False
+        for key, value in settings_dict.items():
+            print(
+                colored(
+                    f"{key:<45}: {value}",
+                    "blue"
+                    if parameter_key and key == parameter_key
+                    else None,
+                )
+            )
+        return True

qlever/commands/setup_config.py CHANGED Viewed

@@ -60,7 +60,7 @@ class SetupConfigCommand(QleverCommand):
         setup_config_cmd += "> Qleverfile"
         self.show(setup_config_cmd, only_show=args.show)
         if args.show:
-            return False
+            return True
         # If there is already a Qleverfile in the current directory, exit.
         qleverfile_path = Path("Qleverfile")

qlever 0.5.12__py3-none-any.whl → 0.5.17__py3-none-any.whl

Potentially problematic release.

qlever 0.5.12py3-none-any.whl → 0.5.17py3-none-any.whl