qlever 0.2.5__py3-none-any.whl → 0.5.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. qlever/Qleverfiles/Qleverfile.dblp +36 -0
  2. qlever/Qleverfiles/Qleverfile.dblp-plus +33 -0
  3. qlever/Qleverfiles/Qleverfile.dbpedia +30 -0
  4. qlever/Qleverfiles/Qleverfile.default +51 -0
  5. qlever/Qleverfiles/Qleverfile.dnb +40 -0
  6. qlever/Qleverfiles/Qleverfile.fbeasy +29 -0
  7. qlever/Qleverfiles/Qleverfile.freebase +28 -0
  8. qlever/Qleverfiles/Qleverfile.imdb +36 -0
  9. qlever/Qleverfiles/Qleverfile.ohm-planet +41 -0
  10. qlever/Qleverfiles/Qleverfile.olympics +31 -0
  11. qlever/Qleverfiles/Qleverfile.orkg +30 -0
  12. qlever/Qleverfiles/Qleverfile.osm-country +39 -0
  13. qlever/Qleverfiles/Qleverfile.osm-planet +39 -0
  14. qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf +42 -0
  15. qlever/Qleverfiles/Qleverfile.pubchem +131 -0
  16. qlever/Qleverfiles/Qleverfile.scientists +29 -0
  17. qlever/Qleverfiles/Qleverfile.uniprot +74 -0
  18. qlever/Qleverfiles/Qleverfile.vvz +31 -0
  19. qlever/Qleverfiles/Qleverfile.wikidata +42 -0
  20. qlever/Qleverfiles/Qleverfile.wikipathways +40 -0
  21. qlever/Qleverfiles/Qleverfile.yago-4 +33 -0
  22. qlever/__init__.py +44 -1380
  23. qlever/command.py +87 -0
  24. qlever/commands/__init__.py +0 -0
  25. qlever/commands/add_text_index.py +115 -0
  26. qlever/commands/benchmark_queries.py +1019 -0
  27. qlever/commands/cache_stats.py +125 -0
  28. qlever/commands/clear_cache.py +88 -0
  29. qlever/commands/extract_queries.py +120 -0
  30. qlever/commands/get_data.py +48 -0
  31. qlever/commands/index.py +333 -0
  32. qlever/commands/index_stats.py +306 -0
  33. qlever/commands/log.py +66 -0
  34. qlever/commands/materialized_view.py +110 -0
  35. qlever/commands/query.py +142 -0
  36. qlever/commands/rebuild_index.py +176 -0
  37. qlever/commands/reset_updates.py +59 -0
  38. qlever/commands/settings.py +115 -0
  39. qlever/commands/setup_config.py +97 -0
  40. qlever/commands/start.py +336 -0
  41. qlever/commands/status.py +50 -0
  42. qlever/commands/stop.py +90 -0
  43. qlever/commands/system_info.py +130 -0
  44. qlever/commands/ui.py +271 -0
  45. qlever/commands/update.py +90 -0
  46. qlever/commands/update_wikidata.py +1204 -0
  47. qlever/commands/warmup.py +41 -0
  48. qlever/config.py +223 -0
  49. qlever/containerize.py +167 -0
  50. qlever/log.py +55 -0
  51. qlever/qlever_main.py +79 -0
  52. qlever/qleverfile.py +530 -0
  53. qlever/util.py +330 -0
  54. qlever-0.5.41.dist-info/METADATA +127 -0
  55. qlever-0.5.41.dist-info/RECORD +59 -0
  56. {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info}/WHEEL +1 -1
  57. qlever-0.5.41.dist-info/entry_points.txt +2 -0
  58. qlever-0.5.41.dist-info/top_level.txt +1 -0
  59. build/lib/qlever/__init__.py +0 -1383
  60. build/lib/qlever/__main__.py +0 -4
  61. qlever/__main__.py +0 -4
  62. qlever-0.2.5.dist-info/METADATA +0 -277
  63. qlever-0.2.5.dist-info/RECORD +0 -12
  64. qlever-0.2.5.dist-info/entry_points.txt +0 -2
  65. qlever-0.2.5.dist-info/top_level.txt +0 -4
  66. src/qlever/__init__.py +0 -1383
  67. src/qlever/__main__.py +0 -4
  68. {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,125 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ import subprocess
6
+
7
+ from qlever.command import QleverCommand
8
+ from qlever.log import log
9
+
10
+
11
+ class CacheStatsCommand(QleverCommand):
12
+ """
13
+ Class for executing the `warmup` command.
14
+ """
15
+
16
+ def __init__(self):
17
+ pass
18
+
19
+ def description(self) -> str:
20
+ return "Show how much of the cache is currently being used"
21
+
22
+ def should_have_qleverfile(self) -> bool:
23
+ return False
24
+
25
+ def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
26
+ return {"server": ["host_name", "port"]}
27
+
28
+ def additional_arguments(self, subparser) -> None:
29
+ subparser.add_argument(
30
+ "--sparql-endpoint",
31
+ help="URL of the SPARQL endpoint, default is {host_name}:{port}",
32
+ )
33
+ subparser.add_argument(
34
+ "--detailed",
35
+ action="store_true",
36
+ default=False,
37
+ help="Show detailed statistics and settings",
38
+ )
39
+
40
+ def execute(self, args) -> bool:
41
+ # Construct the two curl commands.
42
+ sparql_endpoint = (
43
+ args.sparql_endpoint
44
+ if args.sparql_endpoint
45
+ else f"{args.host_name}:{args.port}"
46
+ )
47
+ cache_stats_cmd = (
48
+ f'curl -s {sparql_endpoint} --data-urlencode "cmd=cache-stats"'
49
+ )
50
+ cache_settings_cmd = (
51
+ f'curl -s {sparql_endpoint} --data-urlencode "cmd=get-settings"'
52
+ )
53
+
54
+ # Show them.
55
+ self.show(
56
+ "\n".join([cache_stats_cmd, cache_settings_cmd]),
57
+ only_show=args.show,
58
+ )
59
+ if args.show:
60
+ return True
61
+
62
+ # Execute them.
63
+ try:
64
+ cache_stats = subprocess.check_output(cache_stats_cmd, shell=True)
65
+ cache_settings = subprocess.check_output(
66
+ cache_settings_cmd, shell=True
67
+ )
68
+ cache_stats_dict = json.loads(cache_stats)
69
+ cache_settings_dict = json.loads(cache_settings)
70
+ if isinstance(cache_settings_dict, list):
71
+ cache_settings_dict = cache_settings_dict[0]
72
+ except Exception as e:
73
+ log.error(f"Failed to get cache stats and settings: {e}")
74
+ return False
75
+
76
+ # Brief version.
77
+ if not args.detailed:
78
+ cache_size = cache_settings_dict["cache-max-size"]
79
+ if not cache_size.endswith(" GB"):
80
+ log.error(
81
+ f"Cache size {cache_size} is not in GB, "
82
+ f"QLever should return bytes instead"
83
+ )
84
+ return False
85
+ else:
86
+ cache_size = float(cache_size[:-3])
87
+ pinned_size = cache_stats_dict["cache-size-pinned"] / 1e9
88
+ non_pinned_size = cache_stats_dict["cache-size-unpinned"] / 1e9
89
+ cached_size = pinned_size + non_pinned_size
90
+ free_size = cache_size - cached_size
91
+ if cached_size == 0:
92
+ log.info(f"Cache is empty, all {cache_size:.1f} GB available")
93
+ else:
94
+ log.info(
95
+ f"Pinned queries : "
96
+ f"{pinned_size:5.1f} GB of {cache_size:5.1f} GB"
97
+ f" [{pinned_size / cache_size:5.1%}]"
98
+ )
99
+ log.info(
100
+ f"Non-pinned queries : "
101
+ f"{non_pinned_size:5.1f} GB of {cache_size:5.1f} GB"
102
+ f" [{non_pinned_size / cache_size:5.1%}]"
103
+ )
104
+ log.info(
105
+ f"FREE : "
106
+ f"{free_size:5.1f} GB of {cache_size:5.1f} GB"
107
+ f" [{1 - cached_size / cache_size:5.1%}]"
108
+ )
109
+ return True
110
+
111
+ # Complete version.
112
+ def show_dict_as_table(key_value_pairs):
113
+ max_key_len = max([len(key) for key, _ in key_value_pairs])
114
+ for key, value in key_value_pairs:
115
+ if isinstance(value, int) or re.match(r"^\d+$", value):
116
+ value = "{:,}".format(int(value))
117
+ if re.match(r"^\d+\.\d+$", value):
118
+ value = "{:.2f}".format(float(value))
119
+ log.info(f"{key.ljust(max_key_len)} : {value}")
120
+
121
+ show_dict_as_table(cache_stats_dict.items())
122
+ log.info("")
123
+ show_dict_as_table(cache_settings_dict.items())
124
+
125
+ return True
@@ -0,0 +1,88 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from qlever.command import QleverCommand
6
+ from qlever.commands.cache_stats import CacheStatsCommand
7
+ from qlever.log import log
8
+ from qlever.util import run_command
9
+
10
+
11
+ class ClearCacheCommand(QleverCommand):
12
+ """
13
+ Class for executing the `clear-cache` command.
14
+ """
15
+
16
+ def __init__(self):
17
+ pass
18
+
19
+ def description(self) -> str:
20
+ return "Clear the query processing cache"
21
+
22
+ def should_have_qleverfile(self) -> bool:
23
+ return True
24
+
25
+ def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
26
+ return {"server": ["host_name", "port", "access_token"]}
27
+
28
+ def additional_arguments(self, subparser) -> None:
29
+ subparser.add_argument(
30
+ "--sparql-endpoint",
31
+ help="URL of the QLever server, default is {host_name}:{port}",
32
+ )
33
+ subparser.add_argument(
34
+ "--complete",
35
+ action="store_true",
36
+ default=False,
37
+ help="Clear the cache completely, including the pinned queries",
38
+ )
39
+
40
+ def execute(self, args) -> bool:
41
+ # Determine SPARQL endpoint.
42
+ sparql_endpoint = (
43
+ args.sparql_endpoint
44
+ if args.sparql_endpoint
45
+ else (f"{args.host_name}:{args.port}")
46
+ )
47
+
48
+ # Construct command line and show it.
49
+ clear_cache_cmd = f"curl -s {sparql_endpoint} -d cmd=clear-cache"
50
+ if args.complete:
51
+ clear_cache_cmd += (
52
+ f"-complete"
53
+ f' --data-urlencode access-token="{args.access_token}"'
54
+ )
55
+ self.show(clear_cache_cmd, only_show=args.show)
56
+ if args.show:
57
+ return True
58
+
59
+ # Execute the command.
60
+ try:
61
+ clear_cache_cmd += ' -w " %{http_code}"'
62
+ result = run_command(clear_cache_cmd, return_output=True)
63
+ match = re.match(r"^(.*) (\d+)$", result, re.DOTALL)
64
+ if not match:
65
+ raise Exception(f"Unexpected output:\n{result}")
66
+ error_message = match.group(1).strip()
67
+ status_code = match.group(2)
68
+ if status_code != "200":
69
+ raise Exception(error_message)
70
+ message = "Cache cleared successfully"
71
+ if args.complete:
72
+ message += " (pinned and unpinned queries)"
73
+ else:
74
+ message += " (only unpinned queries)"
75
+ log.info(message)
76
+ except Exception as e:
77
+ log.error(e)
78
+ return False
79
+
80
+ # Show cache stats.
81
+ log.info("")
82
+ args.detailed = False
83
+ if not CacheStatsCommand().execute(args):
84
+ log.error(
85
+ "Clearing the cache was successful, but showing the "
86
+ "cache stats failed {e}"
87
+ )
88
+ return True
@@ -0,0 +1,120 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ from qlever.command import QleverCommand
6
+ from qlever.log import log
7
+
8
+
9
+ class ExtractQueriesCommand(QleverCommand):
10
+ """
11
+ Class for executing the `extract-queries` command.
12
+ """
13
+
14
+ def __init__(self):
15
+ pass
16
+
17
+ def description(self) -> str:
18
+ return "Extract all SPARQL queries from the server log"
19
+
20
+ def should_have_qleverfile(self) -> bool:
21
+ return True
22
+
23
+ def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
24
+ return {"data": ["name"]}
25
+
26
+ def additional_arguments(self, subparser) -> None:
27
+ subparser.add_argument(
28
+ "--description-base",
29
+ type=str,
30
+ default="Log extract",
31
+ help="Base name for the query descriptions"
32
+ " (default: `Log extract`)",
33
+ )
34
+ subparser.add_argument(
35
+ "--log-file",
36
+ type=str,
37
+ help="Name of the log file to extract queries from"
38
+ " (default: `<name>.server-log.txt`)",
39
+ )
40
+ subparser.add_argument(
41
+ "--output-file",
42
+ type=str,
43
+ default="log-queries.txt",
44
+ help="Output file for the extracted queries (default: `log-queries.txt`)",
45
+ )
46
+ subparser.add_argument(
47
+ "--use-alive-check-tag-as-description-base",
48
+ action="store_true",
49
+ help="Use the tag from 'Alive check' messages"
50
+ " as the base for query descriptions (default: False)",
51
+ )
52
+
53
+ def execute(self, args) -> bool:
54
+ # Show what the command does.
55
+ if args.log_file is not None:
56
+ log_file_name = args.log_file
57
+ else:
58
+ log_file_name = f"{args.name}.server-log.txt"
59
+ self.show(
60
+ f"Extract SPARQL queries from `{log_file_name}`"
61
+ f" and write them to `{args.output_file}`",
62
+ only_show=args.show,
63
+ )
64
+ if args.show:
65
+ return True
66
+
67
+ # Regex for log entries of the form
68
+ # 2025-01-14 04:47:44.950 - INFO
69
+ log_line_regex = (
70
+ r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) - [A-Z]+:"
71
+ )
72
+
73
+ # Read the log file line by line.
74
+ log_file = open(log_file_name, "r")
75
+ queries_file = open(args.output_file, "w")
76
+ query = None
77
+ description_base = args.description_base
78
+ description_base_count = {}
79
+ tsv_line_short_width = 150
80
+ for line in log_file:
81
+ # An "Alive check" message contains a tag, which we use as the base
82
+ # name of the query description.
83
+ if args.use_alive_check_tag_as_description_base:
84
+ alive_check_regex = r"Alive check with message \"(.*)\""
85
+ match = re.search(alive_check_regex, line)
86
+ if match:
87
+ description_base = match.group(1)
88
+ continue
89
+
90
+ # A new query in the log.
91
+ if "Processing the following SPARQL query" in line:
92
+ query = []
93
+ query_index = (
94
+ description_base_count.get(description_base, 0) + 1
95
+ )
96
+ description_base_count[description_base] = query_index
97
+ continue
98
+ # If we have started a query: extend until we meet the next log
99
+ # line, then push the query. Remove comments.
100
+ if query is not None:
101
+ if not re.match(log_line_regex, line):
102
+ if not re.match(r"^\s*#", line):
103
+ line = re.sub(r" #.*", "", line)
104
+ query.append(line)
105
+ else:
106
+ query = re.sub(r"\s+", " ", "\n".join(query)).strip()
107
+ description = f"{description_base}, Query #{query_index}"
108
+ tsv_line = f"{description}\t{query}"
109
+ tsv_line_short = (
110
+ tsv_line
111
+ if len(tsv_line) < tsv_line_short_width
112
+ else tsv_line[:tsv_line_short_width] + "..."
113
+ )
114
+ log.info(tsv_line_short)
115
+ print(tsv_line, file=queries_file)
116
+ query = None
117
+
118
+ log_file.close()
119
+ queries_file.close()
120
+ return True
@@ -0,0 +1,48 @@
1
+ from __future__ import annotations
2
+
3
+ import shlex
4
+
5
+ from qlever.command import QleverCommand
6
+ from qlever.log import log
7
+ from qlever.util import get_total_file_size, run_command
8
+
9
+
10
+ class GetDataCommand(QleverCommand):
11
+ """
12
+ Class for executing the `get-data` command.
13
+ """
14
+
15
+ def __init__(self):
16
+ pass
17
+
18
+ def description(self) -> str:
19
+ return "Get data using the GET_DATA_CMD in the Qleverfile"
20
+
21
+ def should_have_qleverfile(self) -> bool:
22
+ return True
23
+
24
+ def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
25
+ return {"data": ["name", "get_data_cmd"], "index": ["input_files"]}
26
+
27
+ def additional_arguments(self, subparser) -> None:
28
+ pass
29
+
30
+ def execute(self, args) -> bool:
31
+ # Construct the command line and show it.
32
+ self.show(args.get_data_cmd, only_show=args.show)
33
+ if args.show:
34
+ return True
35
+
36
+ # Execute the command line.
37
+ try:
38
+ run_command(args.get_data_cmd, show_output=True)
39
+ except Exception as e:
40
+ log.error(f"Problem executing \"{args.get_data_cmd}\": {e}")
41
+ return False
42
+
43
+ # Show the total file size in GB and return.
44
+ patterns = shlex.split(args.input_files)
45
+ total_file_size = get_total_file_size(patterns)
46
+ print(f"Download successful, total file size: "
47
+ f"{total_file_size:,} bytes")
48
+ return True