qlever 0.5.23__py3-none-any.whl → 0.5.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.ohm-planet +2 -1
- qlever/Qleverfiles/Qleverfile.osm-country +3 -2
- qlever/Qleverfiles/Qleverfile.osm-planet +4 -3
- qlever/Qleverfiles/Qleverfile.uniprot +2 -1
- qlever/Qleverfiles/Qleverfile.wikipathways +1 -1
- qlever/commands/benchmark_queries.py +1022 -0
- qlever/commands/cache_stats.py +54 -32
- qlever/commands/index.py +6 -0
- qlever/commands/query.py +2 -1
- qlever/commands/settings.py +7 -0
- qlever/commands/ui.py +11 -7
- qlever/commands/update_wikidata.py +554 -0
- qlever/qlever_main.py +1 -2
- qlever/qleverfile.py +22 -0
- qlever/util.py +1 -1
- {qlever-0.5.23.dist-info → qlever-0.5.25.dist-info}/METADATA +4 -2
- {qlever-0.5.23.dist-info → qlever-0.5.25.dist-info}/RECORD +21 -20
- {qlever-0.5.23.dist-info → qlever-0.5.25.dist-info}/WHEEL +1 -1
- qlever/commands/example_queries.py +0 -605
- {qlever-0.5.23.dist-info → qlever-0.5.25.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.23.dist-info → qlever-0.5.25.dist-info}/licenses/LICENSE +0 -0
- {qlever-0.5.23.dist-info → qlever-0.5.25.dist-info}/top_level.txt +0 -0
qlever/commands/cache_stats.py
CHANGED
|
@@ -17,45 +17,58 @@ class CacheStatsCommand(QleverCommand):
|
|
|
17
17
|
pass
|
|
18
18
|
|
|
19
19
|
def description(self) -> str:
|
|
20
|
-
return
|
|
20
|
+
return "Show how much of the cache is currently being used"
|
|
21
21
|
|
|
22
22
|
def should_have_qleverfile(self) -> bool:
|
|
23
23
|
return False
|
|
24
24
|
|
|
25
|
-
def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
|
|
25
|
+
def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
|
|
26
26
|
return {"server": ["host_name", "port"]}
|
|
27
27
|
|
|
28
28
|
def additional_arguments(self, subparser) -> None:
|
|
29
|
-
subparser.add_argument(
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
29
|
+
subparser.add_argument(
|
|
30
|
+
"--server-url",
|
|
31
|
+
help="URL of the QLever server, default is {host_name}:{port}",
|
|
32
|
+
)
|
|
33
|
+
subparser.add_argument(
|
|
34
|
+
"--detailed",
|
|
35
|
+
action="store_true",
|
|
36
|
+
default=False,
|
|
37
|
+
help="Show detailed statistics and settings",
|
|
38
|
+
)
|
|
36
39
|
|
|
37
40
|
def execute(self, args) -> bool:
|
|
38
41
|
# Construct the two curl commands.
|
|
39
|
-
server_url = (
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
42
|
+
server_url = (
|
|
43
|
+
args.server_url
|
|
44
|
+
if args.server_url
|
|
45
|
+
else f"{args.host_name}:{args.port}"
|
|
46
|
+
)
|
|
47
|
+
cache_stats_cmd = (
|
|
48
|
+
f'curl -s {server_url} --data-urlencode "cmd=cache-stats"'
|
|
49
|
+
)
|
|
50
|
+
cache_settings_cmd = (
|
|
51
|
+
f'curl -s {server_url} --data-urlencode "cmd=get-settings"'
|
|
52
|
+
)
|
|
45
53
|
|
|
46
54
|
# Show them.
|
|
47
|
-
self.show(
|
|
48
|
-
|
|
55
|
+
self.show(
|
|
56
|
+
"\n".join([cache_stats_cmd, cache_settings_cmd]),
|
|
57
|
+
only_show=args.show,
|
|
58
|
+
)
|
|
49
59
|
if args.show:
|
|
50
60
|
return True
|
|
51
61
|
|
|
52
62
|
# Execute them.
|
|
53
63
|
try:
|
|
54
64
|
cache_stats = subprocess.check_output(cache_stats_cmd, shell=True)
|
|
55
|
-
cache_settings = subprocess.check_output(
|
|
56
|
-
|
|
65
|
+
cache_settings = subprocess.check_output(
|
|
66
|
+
cache_settings_cmd, shell=True
|
|
67
|
+
)
|
|
57
68
|
cache_stats_dict = json.loads(cache_stats)
|
|
58
69
|
cache_settings_dict = json.loads(cache_settings)
|
|
70
|
+
if isinstance(cache_settings_dict, list):
|
|
71
|
+
cache_settings_dict = cache_settings_dict[0]
|
|
59
72
|
except Exception as e:
|
|
60
73
|
log.error(f"Failed to get cache stats and settings: {e}")
|
|
61
74
|
return False
|
|
@@ -64,27 +77,35 @@ class CacheStatsCommand(QleverCommand):
|
|
|
64
77
|
if not args.detailed:
|
|
65
78
|
cache_size = cache_settings_dict["cache-max-size"]
|
|
66
79
|
if not cache_size.endswith(" GB"):
|
|
67
|
-
log.error(
|
|
68
|
-
|
|
80
|
+
log.error(
|
|
81
|
+
f"Cache size {cache_size} is not in GB, "
|
|
82
|
+
f"QLever should return bytes instead"
|
|
83
|
+
)
|
|
69
84
|
return False
|
|
70
85
|
else:
|
|
71
86
|
cache_size = float(cache_size[:-3])
|
|
72
|
-
pinned_size = cache_stats_dict["
|
|
73
|
-
non_pinned_size = cache_stats_dict["
|
|
87
|
+
pinned_size = cache_stats_dict["cache-size-pinned"] / 1e9
|
|
88
|
+
non_pinned_size = cache_stats_dict["cache-size-unpinned"] / 1e9
|
|
74
89
|
cached_size = pinned_size + non_pinned_size
|
|
75
90
|
free_size = cache_size - cached_size
|
|
76
91
|
if cached_size == 0:
|
|
77
92
|
log.info(f"Cache is empty, all {cache_size:.1f} GB available")
|
|
78
93
|
else:
|
|
79
|
-
log.info(
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
94
|
+
log.info(
|
|
95
|
+
f"Pinned queries : "
|
|
96
|
+
f"{pinned_size:5.1f} GB of {cache_size:5.1f} GB"
|
|
97
|
+
f" [{pinned_size / cache_size:5.1%}]"
|
|
98
|
+
)
|
|
99
|
+
log.info(
|
|
100
|
+
f"Non-pinned queries : "
|
|
101
|
+
f"{non_pinned_size:5.1f} GB of {cache_size:5.1f} GB"
|
|
102
|
+
f" [{non_pinned_size / cache_size:5.1%}]"
|
|
103
|
+
)
|
|
104
|
+
log.info(
|
|
105
|
+
f"FREE : "
|
|
106
|
+
f"{free_size:5.1f} GB of {cache_size:5.1f} GB"
|
|
107
|
+
f" [{1 - cached_size / cache_size:5.1%}]"
|
|
108
|
+
)
|
|
88
109
|
return True
|
|
89
110
|
|
|
90
111
|
# Complete version.
|
|
@@ -96,6 +117,7 @@ class CacheStatsCommand(QleverCommand):
|
|
|
96
117
|
if re.match(r"^\d+\.\d+$", value):
|
|
97
118
|
value = "{:.2f}".format(float(value))
|
|
98
119
|
log.info(f"{key.ljust(max_key_len)} : {value}")
|
|
120
|
+
|
|
99
121
|
show_dict_as_table(cache_stats_dict.items())
|
|
100
122
|
log.info("")
|
|
101
123
|
show_dict_as_table(cache_settings_dict.items())
|
qlever/commands/index.py
CHANGED
|
@@ -36,9 +36,11 @@ class IndexCommand(QleverCommand):
|
|
|
36
36
|
"index": [
|
|
37
37
|
"input_files",
|
|
38
38
|
"cat_input_files",
|
|
39
|
+
"encode_as_id",
|
|
39
40
|
"multi_input_json",
|
|
40
41
|
"parallel_parsing",
|
|
41
42
|
"settings_json",
|
|
43
|
+
"vocabulary_type",
|
|
42
44
|
"index_binary",
|
|
43
45
|
"only_pso_and_pos_permutations",
|
|
44
46
|
"ulimit",
|
|
@@ -184,6 +186,7 @@ class IndexCommand(QleverCommand):
|
|
|
184
186
|
index_cmd = (
|
|
185
187
|
f"{args.cat_input_files} | {args.index_binary}"
|
|
186
188
|
f" -i {args.name} -s {args.name}.settings.json"
|
|
189
|
+
f" --vocabulary-type {args.vocabulary_type}"
|
|
187
190
|
f" -F {args.format} -f -"
|
|
188
191
|
)
|
|
189
192
|
if args.parallel_parsing:
|
|
@@ -199,6 +202,7 @@ class IndexCommand(QleverCommand):
|
|
|
199
202
|
index_cmd = (
|
|
200
203
|
f"{args.index_binary}"
|
|
201
204
|
f" -i {args.name} -s {args.name}.settings.json"
|
|
205
|
+
f" --vocabulary-type {args.vocabulary_type}"
|
|
202
206
|
f" {input_options}"
|
|
203
207
|
)
|
|
204
208
|
else:
|
|
@@ -212,6 +216,8 @@ class IndexCommand(QleverCommand):
|
|
|
212
216
|
return False
|
|
213
217
|
|
|
214
218
|
# Add remaining options.
|
|
219
|
+
if args.encode_as_id:
|
|
220
|
+
index_cmd += f" --encode-as-id {args.encode_as_id}"
|
|
215
221
|
if args.only_pso_and_pos_permutations:
|
|
216
222
|
index_cmd += " --only-pso-and-pos-permutations --no-patterns"
|
|
217
223
|
if not args.use_patterns:
|
qlever/commands/query.py
CHANGED
|
@@ -72,6 +72,7 @@ class QueryCommand(QleverCommand):
|
|
|
72
72
|
"application/sparql-results+json",
|
|
73
73
|
"application/sparql-results+xml",
|
|
74
74
|
"application/qlever-results+json",
|
|
75
|
+
"application/octet-stream",
|
|
75
76
|
],
|
|
76
77
|
default="text/tab-separated-values",
|
|
77
78
|
help="Accept header for the SPARQL query",
|
|
@@ -94,7 +95,7 @@ class QueryCommand(QleverCommand):
|
|
|
94
95
|
if args.pin_to_cache:
|
|
95
96
|
args.accept = "application/qlever-results+json"
|
|
96
97
|
curl_cmd_additions = (
|
|
97
|
-
f" --data
|
|
98
|
+
f" --data pin-result=true --data send=0"
|
|
98
99
|
f" --data access-token="
|
|
99
100
|
f"{shlex.quote(args.access_token)}"
|
|
100
101
|
f" | jq .resultsize | numfmt --grouping"
|
qlever/commands/settings.py
CHANGED
|
@@ -34,6 +34,8 @@ class SettingsCommand(QleverCommand):
|
|
|
34
34
|
"cache-max-size-single-entry",
|
|
35
35
|
"cache-service-results",
|
|
36
36
|
"default-query-timeout",
|
|
37
|
+
"division-by-zero-is-undef",
|
|
38
|
+
"enable-prefilter-on-index-scans",
|
|
37
39
|
"group-by-disable-index-scan-optimizations",
|
|
38
40
|
"group-by-hash-map-enabled",
|
|
39
41
|
"lazy-index-scan-max-size-materialization",
|
|
@@ -44,6 +46,9 @@ class SettingsCommand(QleverCommand):
|
|
|
44
46
|
"request-body-limit",
|
|
45
47
|
"service-max-value-rows",
|
|
46
48
|
"sort-estimate-cancellation-factor",
|
|
49
|
+
"spatial-join-prefilter-max-size",
|
|
50
|
+
"spatial-join-max-num-threads",
|
|
51
|
+
"syntax-test-mode",
|
|
47
52
|
"throw-on-unbound-variables",
|
|
48
53
|
"use-binsearch-transitive-path",
|
|
49
54
|
]
|
|
@@ -97,6 +102,8 @@ class SettingsCommand(QleverCommand):
|
|
|
97
102
|
try:
|
|
98
103
|
settings_json = run_command(curl_cmd, return_output=True)
|
|
99
104
|
settings_dict = json.loads(settings_json)
|
|
105
|
+
if isinstance(settings_dict, list):
|
|
106
|
+
settings_dict = settings_dict[0]
|
|
100
107
|
except Exception as e:
|
|
101
108
|
log.error(f"setting command failed: {e}")
|
|
102
109
|
return False
|
qlever/commands/ui.py
CHANGED
|
@@ -13,13 +13,16 @@ from qlever.util import is_port_used, run_command
|
|
|
13
13
|
|
|
14
14
|
# Return a YAML string for the given dictionary. Format values with
|
|
15
15
|
# newlines using the "|" style.
|
|
16
|
-
def dict_to_yaml(dictionary):
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
16
|
+
def dict_to_yaml(dictionary: dict) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Custom representer for yaml, which uses the "|" style only for
|
|
19
|
+
multiline strings.
|
|
20
|
+
|
|
21
|
+
NOTE: We replace all `\r\n` with `\n` because otherwise the `|` style
|
|
22
|
+
does not work as expected.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
class MultiLineDumper(yaml.SafeDumper):
|
|
23
26
|
def represent_scalar(self, tag, value, style=None):
|
|
24
27
|
value = value.replace("\r\n", "\n")
|
|
25
28
|
if isinstance(value, str) and "\n" in value:
|
|
@@ -30,6 +33,7 @@ def dict_to_yaml(dictionary):
|
|
|
30
33
|
return yaml.dump(
|
|
31
34
|
dictionary,
|
|
32
35
|
sort_keys=False,
|
|
36
|
+
allow_unicode=True,
|
|
33
37
|
Dumper=MultiLineDumper,
|
|
34
38
|
)
|
|
35
39
|
|