qlever 0.2.5__py3-none-any.whl → 0.5.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qlever/Qleverfiles/Qleverfile.dblp +36 -0
- qlever/Qleverfiles/Qleverfile.dblp-plus +33 -0
- qlever/Qleverfiles/Qleverfile.dbpedia +30 -0
- qlever/Qleverfiles/Qleverfile.default +51 -0
- qlever/Qleverfiles/Qleverfile.dnb +40 -0
- qlever/Qleverfiles/Qleverfile.fbeasy +29 -0
- qlever/Qleverfiles/Qleverfile.freebase +28 -0
- qlever/Qleverfiles/Qleverfile.imdb +36 -0
- qlever/Qleverfiles/Qleverfile.ohm-planet +41 -0
- qlever/Qleverfiles/Qleverfile.olympics +31 -0
- qlever/Qleverfiles/Qleverfile.orkg +30 -0
- qlever/Qleverfiles/Qleverfile.osm-country +39 -0
- qlever/Qleverfiles/Qleverfile.osm-planet +39 -0
- qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf +42 -0
- qlever/Qleverfiles/Qleverfile.pubchem +131 -0
- qlever/Qleverfiles/Qleverfile.scientists +29 -0
- qlever/Qleverfiles/Qleverfile.uniprot +74 -0
- qlever/Qleverfiles/Qleverfile.vvz +31 -0
- qlever/Qleverfiles/Qleverfile.wikidata +42 -0
- qlever/Qleverfiles/Qleverfile.wikipathways +40 -0
- qlever/Qleverfiles/Qleverfile.yago-4 +33 -0
- qlever/__init__.py +44 -1380
- qlever/command.py +87 -0
- qlever/commands/__init__.py +0 -0
- qlever/commands/add_text_index.py +115 -0
- qlever/commands/benchmark_queries.py +1019 -0
- qlever/commands/cache_stats.py +125 -0
- qlever/commands/clear_cache.py +88 -0
- qlever/commands/extract_queries.py +120 -0
- qlever/commands/get_data.py +48 -0
- qlever/commands/index.py +333 -0
- qlever/commands/index_stats.py +306 -0
- qlever/commands/log.py +66 -0
- qlever/commands/materialized_view.py +110 -0
- qlever/commands/query.py +142 -0
- qlever/commands/rebuild_index.py +176 -0
- qlever/commands/reset_updates.py +59 -0
- qlever/commands/settings.py +115 -0
- qlever/commands/setup_config.py +97 -0
- qlever/commands/start.py +336 -0
- qlever/commands/status.py +50 -0
- qlever/commands/stop.py +90 -0
- qlever/commands/system_info.py +130 -0
- qlever/commands/ui.py +271 -0
- qlever/commands/update.py +90 -0
- qlever/commands/update_wikidata.py +1204 -0
- qlever/commands/warmup.py +41 -0
- qlever/config.py +223 -0
- qlever/containerize.py +167 -0
- qlever/log.py +55 -0
- qlever/qlever_main.py +79 -0
- qlever/qleverfile.py +530 -0
- qlever/util.py +330 -0
- qlever-0.5.41.dist-info/METADATA +127 -0
- qlever-0.5.41.dist-info/RECORD +59 -0
- {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info}/WHEEL +1 -1
- qlever-0.5.41.dist-info/entry_points.txt +2 -0
- qlever-0.5.41.dist-info/top_level.txt +1 -0
- build/lib/qlever/__init__.py +0 -1383
- build/lib/qlever/__main__.py +0 -4
- qlever/__main__.py +0 -4
- qlever-0.2.5.dist-info/METADATA +0 -277
- qlever-0.2.5.dist-info/RECORD +0 -12
- qlever-0.2.5.dist-info/entry_points.txt +0 -2
- qlever-0.2.5.dist-info/top_level.txt +0 -4
- src/qlever/__init__.py +0 -1383
- src/qlever/__main__.py +0 -4
- {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import re
|
|
5
|
+
import subprocess
|
|
6
|
+
|
|
7
|
+
from qlever.command import QleverCommand
|
|
8
|
+
from qlever.log import log
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CacheStatsCommand(QleverCommand):
|
|
12
|
+
"""
|
|
13
|
+
Class for executing the `warmup` command.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
def description(self) -> str:
|
|
20
|
+
return "Show how much of the cache is currently being used"
|
|
21
|
+
|
|
22
|
+
def should_have_qleverfile(self) -> bool:
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
|
|
26
|
+
return {"server": ["host_name", "port"]}
|
|
27
|
+
|
|
28
|
+
def additional_arguments(self, subparser) -> None:
|
|
29
|
+
subparser.add_argument(
|
|
30
|
+
"--sparql-endpoint",
|
|
31
|
+
help="URL of the SPARQL endpoint, default is {host_name}:{port}",
|
|
32
|
+
)
|
|
33
|
+
subparser.add_argument(
|
|
34
|
+
"--detailed",
|
|
35
|
+
action="store_true",
|
|
36
|
+
default=False,
|
|
37
|
+
help="Show detailed statistics and settings",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def execute(self, args) -> bool:
|
|
41
|
+
# Construct the two curl commands.
|
|
42
|
+
sparql_endpoint = (
|
|
43
|
+
args.sparql_endpoint
|
|
44
|
+
if args.sparql_endpoint
|
|
45
|
+
else f"{args.host_name}:{args.port}"
|
|
46
|
+
)
|
|
47
|
+
cache_stats_cmd = (
|
|
48
|
+
f'curl -s {sparql_endpoint} --data-urlencode "cmd=cache-stats"'
|
|
49
|
+
)
|
|
50
|
+
cache_settings_cmd = (
|
|
51
|
+
f'curl -s {sparql_endpoint} --data-urlencode "cmd=get-settings"'
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# Show them.
|
|
55
|
+
self.show(
|
|
56
|
+
"\n".join([cache_stats_cmd, cache_settings_cmd]),
|
|
57
|
+
only_show=args.show,
|
|
58
|
+
)
|
|
59
|
+
if args.show:
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
# Execute them.
|
|
63
|
+
try:
|
|
64
|
+
cache_stats = subprocess.check_output(cache_stats_cmd, shell=True)
|
|
65
|
+
cache_settings = subprocess.check_output(
|
|
66
|
+
cache_settings_cmd, shell=True
|
|
67
|
+
)
|
|
68
|
+
cache_stats_dict = json.loads(cache_stats)
|
|
69
|
+
cache_settings_dict = json.loads(cache_settings)
|
|
70
|
+
if isinstance(cache_settings_dict, list):
|
|
71
|
+
cache_settings_dict = cache_settings_dict[0]
|
|
72
|
+
except Exception as e:
|
|
73
|
+
log.error(f"Failed to get cache stats and settings: {e}")
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
# Brief version.
|
|
77
|
+
if not args.detailed:
|
|
78
|
+
cache_size = cache_settings_dict["cache-max-size"]
|
|
79
|
+
if not cache_size.endswith(" GB"):
|
|
80
|
+
log.error(
|
|
81
|
+
f"Cache size {cache_size} is not in GB, "
|
|
82
|
+
f"QLever should return bytes instead"
|
|
83
|
+
)
|
|
84
|
+
return False
|
|
85
|
+
else:
|
|
86
|
+
cache_size = float(cache_size[:-3])
|
|
87
|
+
pinned_size = cache_stats_dict["cache-size-pinned"] / 1e9
|
|
88
|
+
non_pinned_size = cache_stats_dict["cache-size-unpinned"] / 1e9
|
|
89
|
+
cached_size = pinned_size + non_pinned_size
|
|
90
|
+
free_size = cache_size - cached_size
|
|
91
|
+
if cached_size == 0:
|
|
92
|
+
log.info(f"Cache is empty, all {cache_size:.1f} GB available")
|
|
93
|
+
else:
|
|
94
|
+
log.info(
|
|
95
|
+
f"Pinned queries : "
|
|
96
|
+
f"{pinned_size:5.1f} GB of {cache_size:5.1f} GB"
|
|
97
|
+
f" [{pinned_size / cache_size:5.1%}]"
|
|
98
|
+
)
|
|
99
|
+
log.info(
|
|
100
|
+
f"Non-pinned queries : "
|
|
101
|
+
f"{non_pinned_size:5.1f} GB of {cache_size:5.1f} GB"
|
|
102
|
+
f" [{non_pinned_size / cache_size:5.1%}]"
|
|
103
|
+
)
|
|
104
|
+
log.info(
|
|
105
|
+
f"FREE : "
|
|
106
|
+
f"{free_size:5.1f} GB of {cache_size:5.1f} GB"
|
|
107
|
+
f" [{1 - cached_size / cache_size:5.1%}]"
|
|
108
|
+
)
|
|
109
|
+
return True
|
|
110
|
+
|
|
111
|
+
# Complete version.
|
|
112
|
+
def show_dict_as_table(key_value_pairs):
|
|
113
|
+
max_key_len = max([len(key) for key, _ in key_value_pairs])
|
|
114
|
+
for key, value in key_value_pairs:
|
|
115
|
+
if isinstance(value, int) or re.match(r"^\d+$", value):
|
|
116
|
+
value = "{:,}".format(int(value))
|
|
117
|
+
if re.match(r"^\d+\.\d+$", value):
|
|
118
|
+
value = "{:.2f}".format(float(value))
|
|
119
|
+
log.info(f"{key.ljust(max_key_len)} : {value}")
|
|
120
|
+
|
|
121
|
+
show_dict_as_table(cache_stats_dict.items())
|
|
122
|
+
log.info("")
|
|
123
|
+
show_dict_as_table(cache_settings_dict.items())
|
|
124
|
+
|
|
125
|
+
return True
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from qlever.command import QleverCommand
|
|
6
|
+
from qlever.commands.cache_stats import CacheStatsCommand
|
|
7
|
+
from qlever.log import log
|
|
8
|
+
from qlever.util import run_command
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ClearCacheCommand(QleverCommand):
|
|
12
|
+
"""
|
|
13
|
+
Class for executing the `clear-cache` command.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self):
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
def description(self) -> str:
|
|
20
|
+
return "Clear the query processing cache"
|
|
21
|
+
|
|
22
|
+
def should_have_qleverfile(self) -> bool:
|
|
23
|
+
return True
|
|
24
|
+
|
|
25
|
+
def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
|
|
26
|
+
return {"server": ["host_name", "port", "access_token"]}
|
|
27
|
+
|
|
28
|
+
def additional_arguments(self, subparser) -> None:
|
|
29
|
+
subparser.add_argument(
|
|
30
|
+
"--sparql-endpoint",
|
|
31
|
+
help="URL of the QLever server, default is {host_name}:{port}",
|
|
32
|
+
)
|
|
33
|
+
subparser.add_argument(
|
|
34
|
+
"--complete",
|
|
35
|
+
action="store_true",
|
|
36
|
+
default=False,
|
|
37
|
+
help="Clear the cache completely, including the pinned queries",
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def execute(self, args) -> bool:
|
|
41
|
+
# Determine SPARQL endpoint.
|
|
42
|
+
sparql_endpoint = (
|
|
43
|
+
args.sparql_endpoint
|
|
44
|
+
if args.sparql_endpoint
|
|
45
|
+
else (f"{args.host_name}:{args.port}")
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Construct command line and show it.
|
|
49
|
+
clear_cache_cmd = f"curl -s {sparql_endpoint} -d cmd=clear-cache"
|
|
50
|
+
if args.complete:
|
|
51
|
+
clear_cache_cmd += (
|
|
52
|
+
f"-complete"
|
|
53
|
+
f' --data-urlencode access-token="{args.access_token}"'
|
|
54
|
+
)
|
|
55
|
+
self.show(clear_cache_cmd, only_show=args.show)
|
|
56
|
+
if args.show:
|
|
57
|
+
return True
|
|
58
|
+
|
|
59
|
+
# Execute the command.
|
|
60
|
+
try:
|
|
61
|
+
clear_cache_cmd += ' -w " %{http_code}"'
|
|
62
|
+
result = run_command(clear_cache_cmd, return_output=True)
|
|
63
|
+
match = re.match(r"^(.*) (\d+)$", result, re.DOTALL)
|
|
64
|
+
if not match:
|
|
65
|
+
raise Exception(f"Unexpected output:\n{result}")
|
|
66
|
+
error_message = match.group(1).strip()
|
|
67
|
+
status_code = match.group(2)
|
|
68
|
+
if status_code != "200":
|
|
69
|
+
raise Exception(error_message)
|
|
70
|
+
message = "Cache cleared successfully"
|
|
71
|
+
if args.complete:
|
|
72
|
+
message += " (pinned and unpinned queries)"
|
|
73
|
+
else:
|
|
74
|
+
message += " (only unpinned queries)"
|
|
75
|
+
log.info(message)
|
|
76
|
+
except Exception as e:
|
|
77
|
+
log.error(e)
|
|
78
|
+
return False
|
|
79
|
+
|
|
80
|
+
# Show cache stats.
|
|
81
|
+
log.info("")
|
|
82
|
+
args.detailed = False
|
|
83
|
+
if not CacheStatsCommand().execute(args):
|
|
84
|
+
log.error(
|
|
85
|
+
"Clearing the cache was successful, but showing the "
|
|
86
|
+
"cache stats failed {e}"
|
|
87
|
+
)
|
|
88
|
+
return True
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from qlever.command import QleverCommand
|
|
6
|
+
from qlever.log import log
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class ExtractQueriesCommand(QleverCommand):
|
|
10
|
+
"""
|
|
11
|
+
Class for executing the `extract-queries` command.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
def description(self) -> str:
|
|
18
|
+
return "Extract all SPARQL queries from the server log"
|
|
19
|
+
|
|
20
|
+
def should_have_qleverfile(self) -> bool:
|
|
21
|
+
return True
|
|
22
|
+
|
|
23
|
+
def relevant_qleverfile_arguments(self) -> dict[str, list[str]]:
|
|
24
|
+
return {"data": ["name"]}
|
|
25
|
+
|
|
26
|
+
def additional_arguments(self, subparser) -> None:
|
|
27
|
+
subparser.add_argument(
|
|
28
|
+
"--description-base",
|
|
29
|
+
type=str,
|
|
30
|
+
default="Log extract",
|
|
31
|
+
help="Base name for the query descriptions"
|
|
32
|
+
" (default: `Log extract`)",
|
|
33
|
+
)
|
|
34
|
+
subparser.add_argument(
|
|
35
|
+
"--log-file",
|
|
36
|
+
type=str,
|
|
37
|
+
help="Name of the log file to extract queries from"
|
|
38
|
+
" (default: `<name>.server-log.txt`)",
|
|
39
|
+
)
|
|
40
|
+
subparser.add_argument(
|
|
41
|
+
"--output-file",
|
|
42
|
+
type=str,
|
|
43
|
+
default="log-queries.txt",
|
|
44
|
+
help="Output file for the extracted queries (default: `log-queries.txt`)",
|
|
45
|
+
)
|
|
46
|
+
subparser.add_argument(
|
|
47
|
+
"--use-alive-check-tag-as-description-base",
|
|
48
|
+
action="store_true",
|
|
49
|
+
help="Use the tag from 'Alive check' messages"
|
|
50
|
+
" as the base for query descriptions (default: False)",
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
def execute(self, args) -> bool:
|
|
54
|
+
# Show what the command does.
|
|
55
|
+
if args.log_file is not None:
|
|
56
|
+
log_file_name = args.log_file
|
|
57
|
+
else:
|
|
58
|
+
log_file_name = f"{args.name}.server-log.txt"
|
|
59
|
+
self.show(
|
|
60
|
+
f"Extract SPARQL queries from `{log_file_name}`"
|
|
61
|
+
f" and write them to `{args.output_file}`",
|
|
62
|
+
only_show=args.show,
|
|
63
|
+
)
|
|
64
|
+
if args.show:
|
|
65
|
+
return True
|
|
66
|
+
|
|
67
|
+
# Regex for log entries of the form
|
|
68
|
+
# 2025-01-14 04:47:44.950 - INFO
|
|
69
|
+
log_line_regex = (
|
|
70
|
+
r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3}) - [A-Z]+:"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Read the log file line by line.
|
|
74
|
+
log_file = open(log_file_name, "r")
|
|
75
|
+
queries_file = open(args.output_file, "w")
|
|
76
|
+
query = None
|
|
77
|
+
description_base = args.description_base
|
|
78
|
+
description_base_count = {}
|
|
79
|
+
tsv_line_short_width = 150
|
|
80
|
+
for line in log_file:
|
|
81
|
+
# An "Alive check" message contains a tag, which we use as the base
|
|
82
|
+
# name of the query description.
|
|
83
|
+
if args.use_alive_check_tag_as_description_base:
|
|
84
|
+
alive_check_regex = r"Alive check with message \"(.*)\""
|
|
85
|
+
match = re.search(alive_check_regex, line)
|
|
86
|
+
if match:
|
|
87
|
+
description_base = match.group(1)
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
# A new query in the log.
|
|
91
|
+
if "Processing the following SPARQL query" in line:
|
|
92
|
+
query = []
|
|
93
|
+
query_index = (
|
|
94
|
+
description_base_count.get(description_base, 0) + 1
|
|
95
|
+
)
|
|
96
|
+
description_base_count[description_base] = query_index
|
|
97
|
+
continue
|
|
98
|
+
# If we have started a query: extend until we meet the next log
|
|
99
|
+
# line, then push the query. Remove comments.
|
|
100
|
+
if query is not None:
|
|
101
|
+
if not re.match(log_line_regex, line):
|
|
102
|
+
if not re.match(r"^\s*#", line):
|
|
103
|
+
line = re.sub(r" #.*", "", line)
|
|
104
|
+
query.append(line)
|
|
105
|
+
else:
|
|
106
|
+
query = re.sub(r"\s+", " ", "\n".join(query)).strip()
|
|
107
|
+
description = f"{description_base}, Query #{query_index}"
|
|
108
|
+
tsv_line = f"{description}\t{query}"
|
|
109
|
+
tsv_line_short = (
|
|
110
|
+
tsv_line
|
|
111
|
+
if len(tsv_line) < tsv_line_short_width
|
|
112
|
+
else tsv_line[:tsv_line_short_width] + "..."
|
|
113
|
+
)
|
|
114
|
+
log.info(tsv_line_short)
|
|
115
|
+
print(tsv_line, file=queries_file)
|
|
116
|
+
query = None
|
|
117
|
+
|
|
118
|
+
log_file.close()
|
|
119
|
+
queries_file.close()
|
|
120
|
+
return True
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shlex
|
|
4
|
+
|
|
5
|
+
from qlever.command import QleverCommand
|
|
6
|
+
from qlever.log import log
|
|
7
|
+
from qlever.util import get_total_file_size, run_command
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class GetDataCommand(QleverCommand):
|
|
11
|
+
"""
|
|
12
|
+
Class for executing the `get-data` command.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
def description(self) -> str:
|
|
19
|
+
return "Get data using the GET_DATA_CMD in the Qleverfile"
|
|
20
|
+
|
|
21
|
+
def should_have_qleverfile(self) -> bool:
|
|
22
|
+
return True
|
|
23
|
+
|
|
24
|
+
def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
|
|
25
|
+
return {"data": ["name", "get_data_cmd"], "index": ["input_files"]}
|
|
26
|
+
|
|
27
|
+
def additional_arguments(self, subparser) -> None:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
def execute(self, args) -> bool:
|
|
31
|
+
# Construct the command line and show it.
|
|
32
|
+
self.show(args.get_data_cmd, only_show=args.show)
|
|
33
|
+
if args.show:
|
|
34
|
+
return True
|
|
35
|
+
|
|
36
|
+
# Execute the command line.
|
|
37
|
+
try:
|
|
38
|
+
run_command(args.get_data_cmd, show_output=True)
|
|
39
|
+
except Exception as e:
|
|
40
|
+
log.error(f"Problem executing \"{args.get_data_cmd}\": {e}")
|
|
41
|
+
return False
|
|
42
|
+
|
|
43
|
+
# Show the total file size in GB and return.
|
|
44
|
+
patterns = shlex.split(args.input_files)
|
|
45
|
+
total_file_size = get_total_file_size(patterns)
|
|
46
|
+
print(f"Download successful, total file size: "
|
|
47
|
+
f"{total_file_size:,} bytes")
|
|
48
|
+
return True
|