PyPI - qlever - Versions diffs - 0.5.12__py3-none-any.whl → 0.5.17__py3-none-any.whl - Mend

qlever 0.5.12py3-none-any.whl → 0.5.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qlever might be problematic. Click here for more details.

Files changed (30) hide show

qlever/Qleverfiles/Qleverfile.dblp +1 -1
qlever/Qleverfiles/Qleverfile.pubchem +102 -26
qlever/Qleverfiles/Qleverfile.uniprot +48 -16
qlever/commands/add_text_index.py +2 -1
qlever/commands/cache_stats.py +1 -1
qlever/commands/clear_cache.py +4 -2
qlever/commands/example_queries.py +236 -75
qlever/commands/extract_queries.py +113 -0
qlever/commands/get_data.py +1 -1
qlever/commands/index.py +51 -11
qlever/commands/index_stats.py +90 -59
qlever/commands/log.py +12 -2
qlever/commands/query.py +66 -27
qlever/commands/settings.py +110 -0
qlever/commands/setup_config.py +1 -1
qlever/commands/start.py +222 -105
qlever/commands/status.py +2 -1
qlever/commands/stop.py +43 -32
qlever/commands/system_info.py +1 -1
qlever/commands/ui.py +3 -1
qlever/commands/warmup.py +1 -1
qlever/qlever_main.py +16 -9
qlever/util.py +34 -17
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/METADATA +2 -2
qlever-0.5.17.dist-info/RECORD +54 -0
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/WHEEL +1 -1
qlever-0.5.12.dist-info/RECORD +0 -52
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/LICENSE +0 -0
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/entry_points.txt +0 -0
{qlever-0.5.12.dist-info → qlever-0.5.17.dist-info}/top_level.txt +0 -0

qlever/commands/example_queries.py CHANGED Viewed

@@ -21,10 +21,7 @@ class ExampleQueriesCommand(QleverCommand):
     """
     def __init__(self):
-        self.presets = {
-            "virtuoso-wikidata": "https://wikidata.demo.openlinksw.com/sparql",
-            "qlever-wikidata": "https://qlever.cs.uni-freiburg.de/api/wikidata",
-        }
+        pass
     def description(self) -> str:
         return "Show how much of the cache is currently being used"
@@ -41,19 +38,28 @@ class ExampleQueriesCommand(QleverCommand):
         )
         subparser.add_argument(
             "--sparql-endpoint-preset",
-            choices=self.presets.keys(),
-            help="Shortcut for setting the SPARQL endpoint",
+            choices=[
+                "https://qlever.dev/api/wikidata",
+                "https://qlever.dev/api/uniprot",
+                "https://qlever.dev/api/pubchem",
+                "https://qlever.dev/api/osm-planet",
+                "https://wikidata.demo.openlinksw.com/sparql",
+                "https://sparql.uniprot.org/sparql",
+            ],
+            help="SPARQL endpoint from fixed list (to save typing)",
         )
         subparser.add_argument(
             "--get-queries-cmd",
             type=str,
-            help="Command to get example queries as TSV " "(description, query)",
+            help="Command to get example queries as TSV "
+            "(description, query)",
         )
         subparser.add_argument(
             "--query-ids",
             type=str,
             default="1-$",
-            help="Query IDs as comma-separated list of " "ranges (e.g., 1-5,7,12-$)",
+            help="Query IDs as comma-separated list of "
+            "ranges (e.g., 1-5,7,12-$)",
         )
         subparser.add_argument(
             "--query-regex",
@@ -64,7 +70,7 @@ class ExampleQueriesCommand(QleverCommand):
         subparser.add_argument(
             "--download-or-count",
             choices=["download", "count"],
-            default="count",
+            default="download",
             help="Whether to download the full result "
             "or just compute the size of the result",
         )
@@ -84,10 +90,14 @@ class ExampleQueriesCommand(QleverCommand):
                 "text/tab-separated-values",
                 "text/csv",
                 "application/sparql-results+json",
+                "application/qlever-results+json",
                 "text/turtle",
+                "AUTO",
             ],
-            default="text/tab-separated-values",
-            help="Accept header for the SPARQL query",
+            default="application/sparql-results+json",
+            help="Accept header for the SPARQL query; AUTO means "
+            "`text/turtle` for CONSTRUCT AND DESCRIBE queries, "
+            "`application/sparql-results+json` for all others",
         )
         subparser.add_argument(
             "--clear-cache",
@@ -98,7 +108,7 @@ class ExampleQueriesCommand(QleverCommand):
         subparser.add_argument(
             "--width-query-description",
             type=int,
-            default=40,
+            default=70,
             help="Width for printing the query description",
         )
         subparser.add_argument(
@@ -113,6 +123,55 @@ class ExampleQueriesCommand(QleverCommand):
             default=14,
             help="Width for printing the result size",
         )
+        subparser.add_argument(
+            "--add-query-type-to-description",
+            action="store_true",
+            default=False,
+            help="Add the query type (SELECT, ASK, CONSTRUCT, DESCRIBE, "
+            "UNKNOWN) to the description",
+        )
+        subparser.add_argument(
+            "--show-query",
+            choices=["always", "never", "on-error"],
+            default="never",
+            help="Show the queries that will be executed (always, never, on error)",
+        )
+        subparser.add_argument(
+            "--show-prefixes",
+            action="store_true",
+            default=False,
+            help="When showing the query, also show the prefixes",
+        )
+    def pretty_printed_query(self, query: str, show_prefixes: bool) -> str:
+        remove_prefixes_cmd = (
+            " | sed '/^PREFIX /Id'" if not show_prefixes else ""
+        )
+        pretty_print_query_cmd = (
+            f"echo {shlex.quote(query)}"
+            f" | docker run -i --rm sparqling/sparql-formatter"
+            f"{remove_prefixes_cmd} | grep -v '^$'"
+        )
+        try:
+            query_pretty_printed = run_command(
+                pretty_print_query_cmd, return_output=True
+            )
+            return query_pretty_printed.rstrip()
+        except Exception:
+            log.error(
+                "Failed to pretty-print query, "
+                "returning original query: {e}"
+            )
+            return query.rstrip()
+    def sparql_query_type(self, query: str) -> str:
+        match = re.search(
+            r"(SELECT|ASK|CONSTRUCT|DESCRIBE)\s", query, re.IGNORECASE
+        )
+        if match:
+            return match.group(1).upper()
+        else:
+            return "UNKNOWN"
     def execute(self, args) -> bool:
         # We can't have both `--remove-offset-and-limit` and `--limit`.
@@ -120,8 +179,13 @@ class ExampleQueriesCommand(QleverCommand):
             log.error("Cannot have both --remove-offset-and-limit and --limit")
             return False
-        # If `args.accept` is `application/sparql-results+json`, we need `jq`.
-        if args.accept == "application/sparql-results+json":
+        # If `args.accept` is `application/sparql-results+json` or
+        # `application/qlever-results+json` or `AUTO`, we need `jq`.
+        if (
+            args.accept == "application/sparql-results+json"
+            or args.accept == "application/qlever-results+json"
+            or args.accept == "AUTO"
+        ):
             try:
                 subprocess.run(
                     "jq --version",
@@ -135,9 +199,8 @@ class ExampleQueriesCommand(QleverCommand):
                 return False
         # Handle shotcuts for SPARQL endpoint.
-        if args.sparql_endpoint_preset in self.presets:
-            args.sparql_endpoint = self.presets[args.sparql_endpoint_preset]
-            args.ui_config = args.sparql_endpoint_preset.split("-")[1]
+        if args.sparql_endpoint_preset:
+            args.sparql_endpoint = args.sparql_endpoint_preset
         # Limit only works with full result.
         if args.limit and args.download_or_count == "count":
@@ -145,8 +208,9 @@ class ExampleQueriesCommand(QleverCommand):
             return False
         # Clear cache only works for QLever.
-        is_qlever = not args.sparql_endpoint or args.sparql_endpoint.startswith(
-            "https://qlever"
+        is_qlever = (
+            not args.sparql_endpoint
+            or args.sparql_endpoint.startswith("https://qlever")
         )
         if args.clear_cache == "yes" and not is_qlever:
             log.warning("Clearing the cache only works for QLever")
@@ -164,7 +228,9 @@ class ExampleQueriesCommand(QleverCommand):
         if args.query_regex:
             get_queries_cmd += f" | grep -Pi {shlex.quote(args.query_regex)}"
         sparql_endpoint = (
-            args.sparql_endpoint if args.sparql_endpoint else f"localhost:{args.port}"
+            args.sparql_endpoint
+            if args.sparql_endpoint
+            else f"localhost:{args.port}"
         )
         self.show(
             f"Obtain queries via: {get_queries_cmd}\n"
@@ -178,11 +244,13 @@ class ExampleQueriesCommand(QleverCommand):
             only_show=args.show,
         )
         if args.show:
-            return False
+            return True
         # Get the example queries.
         try:
-            example_query_lines = run_command(get_queries_cmd, return_output=True)
+            example_query_lines = run_command(
+                get_queries_cmd, return_output=True
+            )
             if len(example_query_lines) == 0:
                 log.error("No example queries matching the criteria found")
                 return False
@@ -191,6 +259,12 @@ class ExampleQueriesCommand(QleverCommand):
             log.error(f"Failed to get example queries: {e}")
             return False
+        # We want the width of the query description to be an uneven number (in
+        # case we have to truncated it, in which case we want to have a " ... "
+        # in the middle).
+        width_query_description_half = args.width_query_description // 2
+        width_query_description = 2 * width_query_description_half + 1
         # Launch the queries one after the other and for each print: the
         # description, the result size (number of rows), and the query
         # processing time (seconds).
@@ -198,20 +272,26 @@ class ExampleQueriesCommand(QleverCommand):
         result_sizes = []
         num_failed = 0
         for example_query_line in example_query_lines:
-            # Parse description and query.
+            # Parse description and query, and determine query type.
             description, query = example_query_line.split("\t")
             if len(query) == 0:
                 log.error("Could not parse description and query, line is:")
                 log.info("")
                 log.info(example_query_line)
                 return False
+            query_type = self.sparql_query_type(query)
+            if args.add_query_type_to_description or args.accept == "AUTO":
+                description = f"{description} [{query_type}]"
             # Clear the cache.
             if args.clear_cache == "yes":
                 args.server_url = sparql_endpoint
                 args.complete = False
+                clear_cache_successful = False
                 with mute_log():
-                    ClearCacheCommand().execute(args)
+                    clear_cache_successful = ClearCacheCommand().execute(args)
+                if not clear_cache_successful:
+                    log.warn("Failed to clear the cache")
             # Remove OFFSET and LIMIT (after the last closing bracket).
             if args.remove_offset_and_limit or args.limit:
@@ -235,7 +315,9 @@ class ExampleQueriesCommand(QleverCommand):
             # Count query.
             if args.download_or_count == "count":
                 # First find out if there is a FROM clause.
-                regex_from_clause = re.compile(r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE)
+                regex_from_clause = re.compile(
+                    r"\s*FROM\s+<[^>]+>\s*", re.IGNORECASE
+                )
                 match_from_clause = re.search(regex_from_clause, query)
                 from_clause = " "
                 if match_from_clause:
@@ -262,87 +344,148 @@ class ExampleQueriesCommand(QleverCommand):
             # A bit of pretty-printing.
             query = re.sub(r"\s+", " ", query)
             query = re.sub(r"\s*\.\s*\}", " }", query)
+            if args.show_query == "always":
+                log.info("")
+                log.info(
+                    colored(
+                        self.pretty_printed_query(query, args.show_prefixes),
+                        "cyan",
+                    )
+                )
+            # Accept header. For "AUTO", use `text/turtle` for CONSTRUCT
+            # queries and `application/sparql-results+json` for all others.
+            accept_header = args.accept
+            if accept_header == "AUTO":
+                if query_type == "CONSTRUCT" or query_type == "DESCRIBE":
+                    accept_header = "text/turtle"
+                else:
+                    accept_header = "application/sparql-results+json"
             # Launch query.
             try:
                 curl_cmd = (
                     f"curl -s {sparql_endpoint}"
                     f' -w "HTTP code: %{{http_code}}\\n"'
-                    f' -H "Accept: {args.accept}"'
+                    f' -H "Accept: {accept_header}"'
                     f" --data-urlencode query={shlex.quote(query)}"
                 )
                 log.debug(curl_cmd)
                 result_file = (
-                    f"qlever.example_queries.result." f"{abs(hash(curl_cmd))}.tmp"
+                    f"qlever.example_queries.result."
+                    f"{abs(hash(curl_cmd))}.tmp"
                 )
                 start_time = time.time()
                 http_code = run_curl_command(
                     sparql_endpoint,
-                    headers={"Accept": args.accept},
+                    headers={"Accept": accept_header},
                     params={"query": query},
                     result_file=result_file,
                 ).strip()
-                if http_code != "200":
-                    raise Exception(
-                        f"HTTP code {http_code}" f"  {Path(result_file).read_text()}"
-                    )
-                time_seconds = time.time() - start_time
-                error_msg = None
+                if http_code == "200":
+                    time_seconds = time.time() - start_time
+                    error_msg = None
+                else:
+                    error_msg = {
+                        "short": f"HTTP code: {http_code}",
+                        "long": re.sub(
+                            r"\s+", " ", Path(result_file).read_text()
+                        ),
+                    }
             except Exception as e:
                 if args.log_level == "DEBUG":
                     traceback.print_exc()
-                error_msg = re.sub(r"\s+", " ", str(e))
+                error_msg = {
+                    "short": "Exception",
+                    "long": re.sub(r"\s+", " ", str(e)),
+                }
             # Get result size (via the command line, in order to avoid loading
             # a potentially large JSON file into Python, which is slow).
             if error_msg is None:
-                try:
-                    if args.download_or_count == "count":
-                        if args.accept == "text/tab-separated-values":
-                            result_size = run_command(
-                                f"sed 1d {result_file}", return_output=True
-                            )
-                        else:
+                # CASE 0: The result is empty despite a 200 HTTP code (not a
+                # problem for CONSTRUCT and DESCRIBE queries).
+                if Path(result_file).stat().st_size == 0 and (
+                    not query_type == "CONSTRUCT"
+                    and not query_type == "DESCRIBE"
+                ):
+                    result_size = 0
+                    error_msg = {
+                        "short": "Empty result",
+                        "long": "curl returned with code 200, "
+                        "but the result is empty",
+                    }
+                # CASE 1: Just counting the size of the result (TSV or JSON).
+                elif args.download_or_count == "count":
+                    if accept_header == "text/tab-separated-values":
+                        result_size = run_command(
+                            f"sed 1d {result_file}", return_output=True
+                        )
+                    else:
+                        try:
                             result_size = run_command(
                                 f'jq -r ".results.bindings[0]'
                                 f" | to_entries[0].value.value"
                                 f' | tonumber" {result_file}',
                                 return_output=True,
                             )
+                        except Exception as e:
+                            error_msg = {
+                                "short": "Malformed JSON",
+                                "long": "curl returned with code 200, "
+                                "but the JSON is malformed: "
+                                + re.sub(r"\s+", " ", str(e)),
+                            }
+                # CASE 2: Downloading the full result (TSV, CSV, Turtle, JSON).
+                else:
+                    if (
+                        accept_header == "text/tab-separated-values"
+                        or accept_header == "text/csv"
+                    ):
+                        result_size = run_command(
+                            f"sed 1d {result_file} | wc -l", return_output=True
+                        )
+                    elif accept_header == "text/turtle":
+                        result_size = run_command(
+                            f"sed '1d;/^@prefix/d;/^\\s*$/d' "
+                            f"{result_file} | wc -l",
+                            return_output=True,
+                        )
+                    elif accept_header == "application/qlever-results+json":
+                        result_size = run_command(
+                            f'jq -r ".resultsize" {result_file}',
+                            return_output=True,
+                        )
                     else:
-                        if (
-                            args.accept == "text/tab-separated-values"
-                            or args.accept == "text/csv"
-                        ):
-                            result_size = run_command(
-                                f"sed 1d {result_file} | wc -l", return_output=True
-                            )
-                        elif args.accept == "text/turtle":
-                            result_size = run_command(
-                                f"sed '1d;/^@prefix/d;/^\\s*$/d' "
-                                f"{result_file} | wc -l",
-                                return_output=True,
-                            )
-                        else:
+                        try:
                             result_size = run_command(
-                                f'jq -r ".results.bindings | length"' f" {result_file}",
+                                f'jq -r ".results.bindings | length"'
+                                f" {result_file}",
                                 return_output=True,
                             )
-                    result_size = int(result_size)
-                except Exception as e:
-                    error_msg = str(e)
+                        except Exception as e:
+                            error_msg = {
+                                "short": "Malformed JSON",
+                                "long": re.sub(r"\s+", " ", str(e)),
+                            }
             # Remove the result file (unless in debug mode).
             if args.log_level != "DEBUG":
                 Path(result_file).unlink(missing_ok=True)
             # Print description, time, result in tabular form.
-            if len(description) > args.width_query_description:
-                description = description[: args.width_query_description - 3]
-                description += "..."
+            if len(description) > width_query_description:
+                description = (
+                    description[: width_query_description_half - 2]
+                    + " ... "
+                    + description[-width_query_description_half + 2 :]
+                )
             if error_msg is None:
+                result_size = int(result_size)
                 log.info(
-                    f"{description:<{args.width_query_description}}  "
+                    f"{description:<{width_query_description}}  "
                     f"{time_seconds:6.2f} s  "
                     f"{result_size:>{args.width_result_size},}"
                 )
@@ -352,16 +495,34 @@ class ExampleQueriesCommand(QleverCommand):
                 num_failed += 1
                 if (
                     args.width_error_message > 0
-                    and len(error_msg) > args.width_error_message
+                    and len(error_msg["long"]) > args.width_error_message
                     and args.log_level != "DEBUG"
+                    and args.show_query != "on-error"
                 ):
-                    error_msg = error_msg[: args.width_error_message - 3]
-                    error_msg += "..."
-                log.error(
-                    f"{description:<{args.width_query_description}}    "
-                    f"failed   "
-                    f"{colored(error_msg, 'red')}"
+                    error_msg["long"] = (
+                        error_msg["long"][: args.width_error_message - 3]
+                        + "..."
+                    )
+                seperator_short_long = (
+                    "\n" if args.show_query == "on-error" else "  "
+                )
+                log.info(
+                    f"{description:<{width_query_description}}    "
+                    f"{colored('FAILED   ', 'red')}"
+                    f"{colored(error_msg['short'], 'red'):>{args.width_result_size}}"
+                    f"{seperator_short_long}"
+                    f"{colored(error_msg['long'], 'red')}"
                 )
+                if args.show_query == "on-error":
+                    log.info(
+                        colored(
+                            self.pretty_printed_query(
+                                query, args.show_prefixes
+                            ),
+                            "cyan",
+                        )
+                    )
+                    log.info("")
         # Check that each query has a time and a result size, or it failed.
         assert len(result_sizes) == len(query_times)
@@ -380,19 +541,19 @@ class ExampleQueriesCommand(QleverCommand):
             description = f"TOTAL   for {n} {query_or_queries}"
             log.info("")
             log.info(
-                f"{description:<{args.width_query_description}}  "
+                f"{description:<{width_query_description}}  "
                 f"{total_query_time:6.2f} s  "
                 f"{total_result_size:>14,}"
             )
             description = f"AVERAGE for {n} {query_or_queries}"
             log.info(
-                f"{description:<{args.width_query_description}}  "
+                f"{description:<{width_query_description}}  "
                 f"{average_query_time:6.2f} s  "
                 f"{average_result_size:>14,}"
             )
             description = f"MEDIAN  for {n} {query_or_queries}"
             log.info(
-                f"{description:<{args.width_query_description}}  "
+                f"{description:<{width_query_description}}  "
                 f"{median_query_time:6.2f} s  "
                 f"{median_result_size:>14,}"
             )
@@ -406,7 +567,7 @@ class ExampleQueriesCommand(QleverCommand):
                 num_failed_string += "  [all]"
             log.info(
                 colored(
-                    f"{description:<{args.width_query_description}}  "
+                    f"{description:<{width_query_description}}  "
                     f"{num_failed:>24}",
                     "red",
                 )

qlever/commands/extract_queries.py ADDED Viewed

@@ -0,0 +1,113 @@
+from __future__ import annotations
+import re
+from qlever.command import QleverCommand
+from qlever.log import log
+class ExtractQueriesCommand(QleverCommand):
+    """
+    Class for executing the `extract-queries` command.
+    """
+    def __init__(self):
+        pass
+    def description(self) -> str:
+        return "Extract all SPARQL queries from the server log"
+    def should_have_qleverfile(self) -> bool:
+        return True
+    def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
+        return {"data": ["name"]}
+    def additional_arguments(self, subparser) -> None:
+        subparser.add_argument(
+            "--description-base",
+            type=str,
+            default="Log extract",
+            help="Base name for the query descriptions"
+            " (default: `Log extract`)",
+        )
+        subparser.add_argument(
+            "--log-file",
+            type=str,
+            help="Name of the log file to extract queries from"
+            " (default: `<name>.server-log.txt`)",
+        )
+        subparser.add_argument(
+            "--output-file",
+            type=str,
+            default="log-queries.txt",
+            help="Output file for the extracted queries (default: `log-queries.txt`)",
+        )
+    def execute(self, args) -> bool:
+        # Show what the command does.
+        if args.log_file is not None:
+            log_file_name = args.log_file
+        else:
+            log_file_name = f"{args.name}.server-log.txt"
+        self.show(
+            f"Extract SPARQL queries from `{log_file_name}`"
+            f" and write them to `{args.output_file}`",
+            only_show=args.show,
+        )
+        if args.show:
+            return True
+        # Regex for log entries of the form
+        # 2025-01-14 04:47:44.950 - INFO
+        log_line_regex = (
+            r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) - [A-Z]+:"
+        )
+        # Read the log file line by line.
+        log_file = open(log_file_name, "r")
+        queries_file = open(args.output_file, "w")
+        query = None
+        description_base = args.description_base
+        description_base_count = {}
+        tsv_line_short_width = 150
+        for line in log_file:
+            # An "Alive check" message contains a tag, which we use as the base
+            # name of the query description.
+            alive_check_regex = r"Alive check with message \"(.*)\""
+            match = re.search(alive_check_regex, line)
+            if match:
+                description_base = match.group(1)
+                continue
+            # A new query in the log.
+            if "Processing the following SPARQL query" in line:
+                query = []
+                query_index = (
+                    description_base_count.get(description_base, 0) + 1
+                )
+                description_base_count[description_base] = query_index
+                continue
+            # If we have started a query: extend until we meet the next log
+            # line, then push the query. Remove comments.
+            if query is not None:
+                if not re.match(log_line_regex, line):
+                    if not re.match(r"^\s*#", line):
+                        line = re.sub(r" #.*", "", line)
+                        query.append(line)
+                else:
+                    query = re.sub(r"\s+", " ", "\n".join(query)).strip()
+                    description = f"{description_base}, Query #{query_index}"
+                    tsv_line = f"{description}\t{query}"
+                    tsv_line_short = (
+                        tsv_line
+                        if len(tsv_line) < tsv_line_short_width
+                        else tsv_line[:tsv_line_short_width] + "..."
+                    )
+                    log.info(tsv_line_short)
+                    print(tsv_line, file=queries_file)
+                    query = None
+        log_file.close()
+        queries_file.close()
+        return True

qlever/commands/get_data.py CHANGED Viewed

@@ -31,7 +31,7 @@ class GetDataCommand(QleverCommand):
         # Construct the command line and show it.
         self.show(args.get_data_cmd, only_show=args.show)
         if args.show:
-            return False
+            return True
         # Execute the command line.
         try:

qlever 0.5.12__py3-none-any.whl → 0.5.17__py3-none-any.whl

Potentially problematic release.

qlever 0.5.12py3-none-any.whl → 0.5.17py3-none-any.whl