PyPI - qlever - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend

qlever 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of qlever might be problematic. Click here for more details.

Files changed (16) hide show

qlever/__init__.py +2 -0
qlever/commands/example_queries.py +101 -32
qlever/commands/get_data.py +2 -0
qlever/commands/index_stats.py +30 -17
qlever/commands/setup_config.py +2 -0
qlever/log.py +1 -3
qlever/qlever_main.py +2 -0
qlever/qlever_old.py +1476 -0
qlever/util.py +41 -26
qlever-0.4.3.dist-info/METADATA +100 -0
{qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/RECORD +15 -14
{qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/entry_points.txt +1 -1
qlever-0.4.1.dist-info/METADATA +0 -301
{qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/LICENSE +0 -0
{qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/WHEEL +0 -0
{qlever-0.4.1.dist-info → qlever-0.4.3.dist-info}/top_level.txt +0 -0

qlever/__init__.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import sys
 from pathlib import Path

qlever/commands/example_queries.py CHANGED Viewed

@@ -4,13 +4,15 @@ import re
 import shlex
 import subprocess
 import time
+import traceback
+from pathlib import Path
 from termcolor import colored
 from qlever.command import QleverCommand
 from qlever.commands.clear_cache import ClearCacheCommand
 from qlever.log import log, mute_log
-from qlever.util import run_command
+from qlever.util import run_command, run_curl_command
 class ExampleQueriesCommand(QleverCommand):
@@ -57,12 +59,27 @@ class ExampleQueriesCommand(QleverCommand):
                                "or just compute the size of the result")
         subparser.add_argument("--limit", type=int,
                                help="Limit on the number of results")
+        subparser.add_argument("--accept", type=str,
+                               choices=["text/tab-separated-values",
+                                        "application/sparql-results+json"],
+                               default="text/tab-separated-values",
+                               help="Accept header for the SPARQL query")
         subparser.add_argument("--clear-cache",
                                choices=["yes", "no"],
                                default="yes",
                                help="Clear the cache before each query")
     def execute(self, args) -> bool:
+        # If `args.accept` is `application/sparql-results+json`, we need `jq`.
+        if args.accept == "application/sparql-results+json":
+            try:
+                subprocess.run("jq --version", shell=True, check=True,
+                               stdout=subprocess.DEVNULL,
+                               stderr=subprocess.DEVNULL)
+            except Exception as e:
+                log.error(f"Please install `jq` for {args.accept} ({e})")
+                return False
         # Handle shotcuts for SPARQL endpoint.
         if args.sparql_endpoint_preset in self.presets:
             args.sparql_endpoint = self.presets[args.sparql_endpoint_preset]
@@ -92,6 +109,7 @@ class ExampleQueriesCommand(QleverCommand):
                            else f"localhost:{args.port}")
         self.show(f"Obtain queries via: {get_queries_cmd}\n"
                   f"SPARQL endpoint: {sparql_endpoint}\n"
+                  f"Accept header: {args.accept}\n"
                   f"Clear cache before each query:"
                   f" {args.clear_cache.upper()}\n"
                   f"Download result for each query or just count:"
@@ -103,7 +121,8 @@ class ExampleQueriesCommand(QleverCommand):
         # Get the example queries.
         try:
-            example_query_lines = run_command(get_queries_cmd, return_output=True)
+            example_query_lines = run_command(get_queries_cmd,
+                                              return_output=True)
             if len(example_query_lines) == 0:
                 log.error("No example queries matching the criteria found")
                 return False
@@ -114,9 +133,10 @@ class ExampleQueriesCommand(QleverCommand):
         # Launch the queries one after the other and for each print: the
         # description, the result size, and the query processing time.
-        count = 0
         total_time_seconds = 0.0
         total_result_size = 0
+        count_succeeded = 0
+        count_failed = 0
         for example_query_line in example_query_lines:
             # Parse description and query.
             description, query = example_query_line.split("\t")
@@ -155,44 +175,93 @@ class ExampleQueriesCommand(QleverCommand):
                           + f" }} LIMIT {args.limit}"
             # Launch query.
-            query_cmd = (f"curl -sv {sparql_endpoint}"
-                         f" -H \"Accept: text/tab-separated-values\""
-                         f" --data-urlencode query={shlex.quote(query)}")
-            if args.download_or_count == "count":
-                query_cmd += " | sed 1d"
-            else:
-                query_cmd += " | sed 1d | wc -l"
             try:
-                log.debug(query_cmd)
+                curl_cmd = (f"curl -s {sparql_endpoint}"
+                            f" -w \"HTTP code: %{{http_code}}\\n\""
+                            f" -H \"Accept: {args.accept}\""
+                            f" --data-urlencode query={shlex.quote(query)}")
+                log.debug(curl_cmd)
+                result_file = (f"qlever.example_queries.result."
+                               f"{abs(hash(curl_cmd))}.tmp")
                 start_time = time.time()
-                result_size = run_command(query_cmd, return_output=True)
-                result_size = int(result_size.strip())
+                http_code = run_curl_command(sparql_endpoint,
+                                             headers={"Accept": args.accept},
+                                             params={"query": query},
+                                             result_file=result_file).strip()
+                if http_code != "200":
+                    raise Exception(f"HTTP code {http_code}"
+                                    f"  {Path(result_file).read_text()}")
                 time_seconds = time.time() - start_time
-                time_string = f"{time_seconds:.2f}"
-                result_string = f"{result_size:>14,}"
+                error_msg = None
             except Exception as e:
-                time_seconds = 0.0
-                time_string = "---"
-                result_size = 0
-                result_string = colored(f"        FAILED {e}", "red")
+                if args.log_level == "DEBUG":
+                    traceback.print_exc()
+                error_msg = re.sub(r"\s+", " ", str(e))
+            # Get result size (via the command line, in order to avoid loading
+            # a potentially large JSON file into Python, which is slow).
+            if error_msg is None:
+                try:
+                    if args.download_or_count == "count":
+                        if args.accept == "text/tab-separated-values":
+                            result_size = run_command(
+                                    f"sed 1d {result_file}",
+                                    return_output=True)
+                        else:
+                            result_size = run_command(
+                                    f"jq -r \".results.bindings[0]"
+                                    f" | to_entries[0].value.value"
+                                    f" | tonumber\" {result_file}",
+                                    return_output=True)
+                    else:
+                        if args.accept == "text/tab-separated-values":
+                            result_size = run_command(
+                                    f"sed 1d {result_file} | wc -l",
+                                    return_output=True)
+                        else:
+                            result_size = run_command(
+                                    f"jq -r \".results.bindings | length\""
+                                    f" {result_file}",
+                                    return_output=True)
+                    result_size = int(result_size)
+                except Exception as e:
+                    error_msg = str(e)
             # Print description, time, result in tabular form.
             if (len(description) > 60):
                 description = description[:57] + "..."
-            log.info(f"{description:<60}  {time_string:>6} s  "
-                     f"{result_string}")
-            count += 1
-            total_time_seconds += time_seconds
-            total_result_size += result_size
+            if error_msg is None:
+                log.info(f"{description:<60}  {time_seconds:6.2f} s  "
+                         f"{result_size:14,}")
+                count_succeeded += 1
+                total_time_seconds += time_seconds
+                total_result_size += result_size
+            else:
+                count_failed += 1
+                if (len(error_msg) > 60) and args.log_level != "DEBUG":
+                    error_msg = error_msg[:57] + "..."
+                log.error(f"{description:<60}    failed   "
+                          f"{colored(error_msg, 'red')}")
         # Print total time.
         log.info("")
-        description = (f"TOTAL   for {count} "
-                       f"{'query' if count == 1 else 'queries'}")
-        log.info(f"{description:<60}  {total_time_seconds:6.2f} s  "
-                 f"{total_result_size:>14,}")
-        description = (f"AVERAGE for {count} "
-                       f"{'query' if count == 1 else 'queries'}")
-        log.info(f"{description:<60}  {total_time_seconds / count:6.2f} s  "
-                 f"{round(total_result_size / count):>14,}")
+        if count_succeeded > 0:
+            query_or_queries = "query" if count_succeeded == 1 else "queries"
+            description = (f"TOTAL   for {count_succeeded} {query_or_queries}")
+            log.info(f"{description:<60}  "
+                     f"{total_time_seconds:6.2f} s  "
+                     f"{total_result_size:>14,}")
+            description = (f"AVERAGE for {count_succeeded} {query_or_queries}")
+            log.info(f"{description:<60}  "
+                     f"{total_time_seconds / count_succeeded:6.2f} s  "
+                     f"{round(total_result_size / count_succeeded):>14,}")
+        else:
+            if count_failed == 1:
+                log.info(colored("One query failed", "red"))
+            elif count_failed > 1:
+                log.info(colored("All queries failed", "red"))
+        # Return success (has nothing to do with how many queries failed).
+        if args.log_level != "DEBUG":
+            Path(result_file).unlink(missing_ok=True)
         return True

qlever/commands/get_data.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import shlex
 import subprocess

qlever/commands/index_stats.py CHANGED Viewed

@@ -71,14 +71,17 @@ class IndexStatsCommand(QleverCommand):
         # Helper function that finds the next line matching the given `regex`,
         # starting from `current_line`, and extracts the time. Returns a tuple
-        # of the time and the regex match object. If a match is found,
-        # `current_line` is updated to the line after the match. Otherwise,
-        # `current_line` will be one beyond the last line, unless
-        # `line_is_optional` is true, in which case it will be the same as when
-        # the function was entered.
+        # of the time and the regex match object.
+        #
+        # If `update_current_line` is `False`, then `current_line` will not be
+        # updated by this call.
+        #
+        # Otherwise, and this is the default behavior, `current_line` will be
+        # updated to the line after the first match, or one beyond the last
+        # line if no match is found.
         current_line = 0
-        def find_next_line(regex, line_is_optional=False):
+        def find_next_line(regex, update_current_line=True):
             nonlocal lines
             nonlocal current_line
             current_line_backup = current_line
@@ -99,7 +102,7 @@ class IndexStatsCommand(QleverCommand):
                                   f"\"{timestamp_regex}\" from line "
                                   f" \"{line.rstrip()}\" ({e})")
             # If we get here, we did not find a matching line.
-            if line_is_optional:
+            if not update_current_line:
                 current_line = current_line_backup
             return None, None
@@ -110,24 +113,34 @@ class IndexStatsCommand(QleverCommand):
         convert_begin, _ = find_next_line(r"INFO:\s*Converting triples")
         perm_begin_and_info = []
         while True:
-            perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair", True)
+            # Find the next line that starts a permutation.
+            #
+            # NOTE: Should work for the old and new format of the index log
+            # file (old format: "Creating a pair" + names of permutations in
+            # line "Writing meta data for ..."; new format: name of
+            # permutations already in line "Creating permutations ...").
+            perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair",
+                                           update_current_line=False)
             if perm_begin is None:
+                perm_begin, perm_info = find_next_line(
+                    r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)",
+                    update_current_line=False)
+            else:
+                _, perm_info = find_next_line(
+                    r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)",
+                    update_current_line=False)
+            if perm_info is None:
                 break
-            _, perm_info = find_next_line(r"INFO:\s*Writing meta data for"
-                                          r" ([A-Z]+ and [A-Z]+)", True)
-            # if perm_info is None:
-            #     break
             perm_begin_and_info.append((perm_begin, perm_info))
         convert_end = (perm_begin_and_info[0][0] if
                        len(perm_begin_and_info) > 0 else None)
         normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
-        text_begin, _ = find_next_line(r"INFO:\s*Adding text index", True)
-        text_end, _ = find_next_line(r"INFO:\s*Text index build comp", True)
+        text_begin, _ = find_next_line(r"INFO:\s*Adding text index",
+                                       update_current_line=False)
+        text_end, _ = find_next_line(r"INFO:\s*Text index build comp",
+                                     update_current_line=False)
         if args.ignore_text_index:
             text_begin = text_end = None
-        # print("DEBUG:", len(perm_begin_and_info), perm_begin_and_info)
-        # print("DEBUG:", overall_begin)
-        # print("DEBUG:", normal_end)
         # Check whether at least the first phase is done.
         if overall_begin is None:

qlever/commands/setup_config.py CHANGED Viewed

@@ -1,3 +1,5 @@
+from __future__ import annotations
 import subprocess
 from pathlib import Path

qlever/log.py CHANGED Viewed

@@ -1,6 +1,4 @@
-# Copyright 2024, University of Freiburg,
-# Chair of Algorithms and Data Structures
-# Author: Hannah Bast <bast@cs.uni-freiburg.de>
+from __future__ import annotations
 import logging
 from contextlib import contextmanager

qlever/qlever_main.py CHANGED Viewed

@@ -5,6 +5,8 @@
 # Chair of Algorithms and Data Structures
 # Author: Hannah Bast <bast@cs.uni-freiburg.de>
+from __future__ import annotations
 import re
 import traceback

qlever 0.4.1__py3-none-any.whl → 0.4.3__py3-none-any.whl

Potentially problematic release.

qlever 0.4.1py3-none-any.whl → 0.4.3py3-none-any.whl