qlever 0.5.12__py3-none-any.whl → 0.5.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

qlever/commands/index.py CHANGED
@@ -99,28 +99,68 @@ class IndexCommand(QleverCommand):
99
99
  f"Element {i} in `MULTI_INPUT_JSON` must contain a " "key `cmd`",
100
100
  input_spec,
101
101
  )
102
- input_cmd = input_spec["cmd"]
102
+ # If the command contains a `{}` placeholder, we need a `for-each`
103
+ # key` specifying the pattern for the placeholder values, and vice
104
+ # versa.
105
+ if "{}" in input_spec["cmd"] and "for-each" not in input_spec:
106
+ raise self.InvalidInputJson(
107
+ f"Element {i} in `MULTI_INPUT_JSON` must contain a "
108
+ "key `for-each` if the command contains a placeholder "
109
+ "`{}`",
110
+ input_spec,
111
+ )
112
+ if "for-each" in input_spec and "{}" not in input_spec["cmd"]:
113
+ raise self.InvalidInputJson(
114
+ f"Element {i} in `MULTI_INPUT_JSON` contains a "
115
+ "key `for-each`, but the command does not contain a "
116
+ "placeholder `{{}}`",
117
+ input_spec,
118
+ )
119
+ # Get all commands. This is just the value of the `cmd` key if no
120
+ # `for-each` key is specified. Otherwise, we have a command for
121
+ # each file matching the pattern.
122
+ if "for-each" not in input_spec:
123
+ input_cmds = [input_spec["cmd"]]
124
+ else:
125
+ try:
126
+ files = sorted(glob.glob(input_spec["for-each"]))
127
+ except Exception as e:
128
+ raise self.InvalidInputJson(
129
+ f"Element {i} in `MULTI_INPUT_JSON` contains an "
130
+ f"invalid `for-each` pattern: {e}",
131
+ input_spec,
132
+ )
133
+ input_cmds = [input_spec["cmd"].format(file) for file in files]
103
134
  # The `format`, `graph`, and `parallel` keys are optional.
104
135
  input_format = input_spec.get("format", args.format)
105
136
  input_graph = input_spec.get("graph", "-")
106
137
  input_parallel = input_spec.get("parallel", "false")
107
138
  # There must not be any other keys.
108
- extra_keys = input_spec.keys() - {"cmd", "format", "graph", "parallel"}
139
+ extra_keys = input_spec.keys() - {
140
+ "cmd",
141
+ "format",
142
+ "graph",
143
+ "parallel",
144
+ "for-each",
145
+ }
109
146
  if extra_keys:
110
147
  raise self.InvalidInputJson(
111
148
  f"Element {i} in `MULTI_INPUT_JSON` must only contain "
112
149
  "the keys `format`, `graph`, and `parallel`. Contains "
113
- "extra keys {extra_keys}.",
150
+ f"extra keys {extra_keys}.",
114
151
  input_spec,
115
152
  )
116
153
  # Add the command-line options for this input stream. We use
117
- # process substitution `<(...)` as a convenient way to handle
118
- # an input stream just like a file. This is not POSIX compliant,
119
- # but supported by various shells, including bash and zsh.
120
- input_options.append(
121
- f"-f <({input_cmd}) -F {input_format} "
122
- f'-g "{input_graph}" -p {input_parallel}'
123
- )
154
+ # process substitution `<(...)` as a convenient way to handle an
155
+ # input stream just like a file. This is not POSIX compliant, but
156
+ # supported by various shells, including bash and zsh. If
157
+ # `for-each` is specified, add one command for each matching file.
158
+ for input_cmd in input_cmds:
159
+ input_option = f"-f <({input_cmd}) -g {input_graph}"
160
+ input_option += f" -F {input_format}"
161
+ if input_parallel == "true":
162
+ input_option += " -p true"
163
+ input_options.append(input_option)
124
164
  # Return the concatenated command-line options.
125
165
  return " ".join(input_options)
126
166
 
@@ -200,7 +240,7 @@ class IndexCommand(QleverCommand):
200
240
  # Show the command line.
201
241
  self.show(f"{settings_json_cmd}\n{index_cmd}", only_show=args.show)
202
242
  if args.show:
203
- return False
243
+ return True
204
244
 
205
245
  # When running natively, check if the binary exists and works.
206
246
  if args.system == "native":
@@ -18,32 +18,45 @@ class IndexStatsCommand(QleverCommand):
18
18
  pass
19
19
 
20
20
  def description(self) -> str:
21
- return ("Breakdown of the time and space used for the index build")
21
+ return "Breakdown of the time and space used for the index build"
22
22
 
23
23
  def should_have_qleverfile(self) -> bool:
24
24
  return False
25
25
 
26
- def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
26
+ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
27
27
  return {"data": ["name"]}
28
28
 
29
29
  def additional_arguments(self, subparser) -> None:
30
- subparser.add_argument("--only-time", action="store_true",
31
- default=False,
32
- help="Show only the time used")
33
- subparser.add_argument("--only-space", action="store_true",
34
- default=False,
35
- help="Show only the space used")
36
- subparser.add_argument("--ignore-text-index", action="store_true",
37
- default=False,
38
- help="Ignore the text index")
39
- subparser.add_argument("--time-unit",
40
- choices=["s", "min", "h", "auto"],
41
- default="auto",
42
- help="The time unit")
43
- subparser.add_argument("--size-unit",
44
- choices=["B", "MB", "GB", "TB", "auto"],
45
- default="auto",
46
- help="The size unit")
30
+ subparser.add_argument(
31
+ "--only-time",
32
+ action="store_true",
33
+ default=False,
34
+ help="Show only the time used",
35
+ )
36
+ subparser.add_argument(
37
+ "--only-space",
38
+ action="store_true",
39
+ default=False,
40
+ help="Show only the space used",
41
+ )
42
+ subparser.add_argument(
43
+ "--ignore-text-index",
44
+ action="store_true",
45
+ default=False,
46
+ help="Ignore the text index",
47
+ )
48
+ subparser.add_argument(
49
+ "--time-unit",
50
+ choices=["s", "min", "h", "auto"],
51
+ default="auto",
52
+ help="The time unit",
53
+ )
54
+ subparser.add_argument(
55
+ "--size-unit",
56
+ choices=["B", "MB", "GB", "TB", "auto"],
57
+ default="auto",
58
+ help="The size unit",
59
+ )
47
60
 
48
61
  def execute_time(self, args, log_file_name) -> bool:
49
62
  """
@@ -65,8 +78,9 @@ class IndexStatsCommand(QleverCommand):
65
78
  with open(text_log_file_name, "r") as text_log_file:
66
79
  lines.extend(text_log_file.readlines())
67
80
  except Exception as e:
68
- log.error(f"Problem reading text index log file "
69
- f"{text_log_file_name}: {e}")
81
+ log.error(
82
+ f"Problem reading text index log file " f"{text_log_file_name}: {e}"
83
+ )
70
84
  return False
71
85
 
72
86
  # Helper function that finds the next line matching the given `regex`,
@@ -95,12 +109,14 @@ class IndexStatsCommand(QleverCommand):
95
109
  if regex_match:
96
110
  try:
97
111
  return datetime.strptime(
98
- re.match(timestamp_regex, line).group(),
99
- timestamp_format), regex_match
112
+ re.match(timestamp_regex, line).group(), timestamp_format
113
+ ), regex_match
100
114
  except Exception as e:
101
- log.error(f"Could not parse timestamp of form "
102
- f"\"{timestamp_regex}\" from line "
103
- f" \"{line.rstrip()}\" ({e})")
115
+ log.error(
116
+ f"Could not parse timestamp of form "
117
+ f'"{timestamp_regex}" from line '
118
+ f' "{line.rstrip()}" ({e})'
119
+ )
104
120
  # If we get here, we did not find a matching line.
105
121
  if not update_current_line:
106
122
  current_line = current_line_backup
@@ -119,26 +135,32 @@ class IndexStatsCommand(QleverCommand):
119
135
  # file (old format: "Creating a pair" + names of permutations in
120
136
  # line "Writing meta data for ..."; new format: name of
121
137
  # permutations already in line "Creating permutations ...").
122
- perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair",
123
- update_current_line=False)
138
+ perm_begin, _ = find_next_line(
139
+ r"INFO:\s*Creating a pair", update_current_line=False
140
+ )
124
141
  if perm_begin is None:
125
142
  perm_begin, perm_info = find_next_line(
126
143
  r"INFO:\s*Creating permutations ([A-Z]+ and [A-Z]+)",
127
- update_current_line=False)
144
+ update_current_line=False,
145
+ )
128
146
  else:
129
147
  _, perm_info = find_next_line(
130
148
  r"INFO:\s*Writing meta data for ([A-Z]+ and [A-Z]+)",
131
- update_current_line=False)
149
+ update_current_line=False,
150
+ )
132
151
  if perm_info is None:
133
152
  break
134
153
  perm_begin_and_info.append((perm_begin, perm_info))
135
- convert_end = (perm_begin_and_info[0][0] if
136
- len(perm_begin_and_info) > 0 else None)
154
+ convert_end = (
155
+ perm_begin_and_info[0][0] if len(perm_begin_and_info) > 0 else None
156
+ )
137
157
  normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
138
- text_begin, _ = find_next_line(r"INFO:\s*Adding text index",
139
- update_current_line=False)
140
- text_end, _ = find_next_line(r"INFO:\s*Text index build comp",
141
- update_current_line=False)
158
+ text_begin, _ = find_next_line(
159
+ r"INFO:\s*Adding text index", update_current_line=False
160
+ )
161
+ text_end, _ = find_next_line(
162
+ r"INFO:\s*Text index build comp", update_current_line=False
163
+ )
142
164
  if args.ignore_text_index:
143
165
  text_begin = text_end = None
144
166
 
@@ -147,9 +169,11 @@ class IndexStatsCommand(QleverCommand):
147
169
  log.error("Missing line that index build has started")
148
170
  return False
149
171
  if overall_begin and not merge_begin:
150
- log.error("According to the log file, the index build "
151
- "has started, but is still in its first "
152
- "phase (parsing the input)")
172
+ log.error(
173
+ "According to the log file, the index build "
174
+ "has started, but is still in its first "
175
+ "phase (parsing the input)"
176
+ )
153
177
  return False
154
178
 
155
179
  # Helper function that shows the duration for a phase (if the start and
@@ -187,22 +211,24 @@ class IndexStatsCommand(QleverCommand):
187
211
  show_duration("Convert to global IDs", [(convert_begin, convert_end)])
188
212
  for i in range(len(perm_begin_and_info)):
189
213
  perm_begin, perm_info = perm_begin_and_info[i]
190
- perm_end = perm_begin_and_info[i + 1][0] if i + 1 < len(
191
- perm_begin_and_info) else normal_end
192
- perm_info_text = (perm_info.group(1).replace(" and ", " & ")
193
- if perm_info else f"#{i + 1}")
194
- show_duration(f"Permutation {perm_info_text}",
195
- [(perm_begin, perm_end)])
214
+ perm_end = (
215
+ perm_begin_and_info[i + 1][0]
216
+ if i + 1 < len(perm_begin_and_info)
217
+ else normal_end
218
+ )
219
+ perm_info_text = (
220
+ perm_info.group(1).replace(" and ", " & ") if perm_info else f"#{i + 1}"
221
+ )
222
+ show_duration(f"Permutation {perm_info_text}", [(perm_begin, perm_end)])
196
223
  show_duration("Text index", [(text_begin, text_end)])
197
224
  if text_begin and text_end:
198
225
  log.info("")
199
- show_duration("TOTAL time",
200
- [(overall_begin, normal_end),
201
- (text_begin, text_end)])
226
+ show_duration(
227
+ "TOTAL time", [(overall_begin, normal_end), (text_begin, text_end)]
228
+ )
202
229
  elif normal_end:
203
230
  log.info("")
204
- show_duration("TOTAL time",
205
- [(overall_begin, normal_end)])
231
+ show_duration("TOTAL time", [(overall_begin, normal_end)])
206
232
  return True
207
233
 
208
234
  def execute_space(self, args) -> bool:
@@ -252,24 +278,29 @@ class IndexStatsCommand(QleverCommand):
252
278
  return True
253
279
 
254
280
  def execute(self, args) -> bool:
255
- ret_value = args.show
281
+ return_value = True
256
282
 
257
283
  # The "time" part of the command.
258
284
  if not args.only_space:
259
285
  log_file_name = f"{args.name}.index-log.txt"
260
- self.show(f"Breakdown of the time used for "
261
- f"building the index, based on the timestamps for key "
262
- f"lines in \"{log_file_name}\"", only_show=args.show)
286
+ self.show(
287
+ f"Breakdown of the time used for "
288
+ f"building the index, based on the timestamps for key "
289
+ f'lines in "{log_file_name}"',
290
+ only_show=args.show,
291
+ )
263
292
  if not args.show:
264
- ret_value &= self.execute_time(args, log_file_name)
293
+ return_value &= self.execute_time(args, log_file_name)
265
294
  if not args.only_time:
266
295
  log.info("")
267
296
 
268
297
  # The "space" part of the command.
269
298
  if not args.only_time:
270
- self.show("Breakdown of the space used for building the index",
271
- only_show=args.show)
299
+ self.show(
300
+ "Breakdown of the space used for building the index",
301
+ only_show=args.show,
302
+ )
272
303
  if not args.show:
273
- ret_value &= self.execute_space(args)
304
+ return_value &= self.execute_space(args)
274
305
 
275
- return ret_value
306
+ return return_value
qlever/commands/log.py CHANGED
@@ -47,10 +47,20 @@ class LogCommand(QleverCommand):
47
47
  log_cmd += f" {log_file}"
48
48
  self.show(log_cmd, only_show=args.show)
49
49
  if args.show:
50
- return False
50
+ return True
51
51
 
52
52
  # Execute the command.
53
53
  log.info(f"Follow log file {log_file}, press Ctrl-C to stop"
54
54
  f" following (will not stop the server)")
55
55
  log.info("")
56
- subprocess.run(log_cmd, shell=True)
56
+ try:
57
+ subprocess.run(log_cmd, shell=True)
58
+ return True
59
+ except Exception as e:
60
+ log.error(e)
61
+ return False
62
+
63
+
64
+
65
+
66
+
qlever/commands/query.py CHANGED
@@ -18,42 +18,82 @@ class QueryCommand(QleverCommand):
18
18
  pass
19
19
 
20
20
  def description(self) -> str:
21
- return ("Send a query to a SPARQL endpoint")
21
+ return "Send a query to a SPARQL endpoint"
22
22
 
23
23
  def should_have_qleverfile(self) -> bool:
24
24
  return False
25
25
 
26
- def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
27
- return {"server": ["port"]}
26
+ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
27
+ return {"server": ["port", "access_token"]}
28
28
 
29
29
  def additional_arguments(self, subparser) -> None:
30
- subparser.add_argument("--query", type=str,
31
- default="SELECT * WHERE { ?s ?p ?o } LIMIT 10",
32
- help="SPARQL query to send")
33
- subparser.add_argument("--sparql-endpoint", type=str,
34
- help="URL of the SPARQL endpoint")
35
- subparser.add_argument("--accept", type=str,
36
- choices=["text/tab-separated-values",
37
- "text/csv",
38
- "application/sparql-results+json",
39
- "application/sparql-results+xml",
40
- "application/qlever-results+json"],
41
- default="text/tab-separated-values",
42
- help="Accept header for the SPARQL query")
43
- subparser.add_argument("--no-time", action="store_true",
44
- default=False,
45
- help="Do not print the (end-to-end) time taken")
30
+ subparser.add_argument(
31
+ "query",
32
+ type=str,
33
+ nargs="?",
34
+ default="SELECT * WHERE { ?s ?p ?o } LIMIT 10",
35
+ help="SPARQL query to send",
36
+ )
37
+ subparser.add_argument(
38
+ "--pin-to-cache",
39
+ action="store_true",
40
+ default=False,
41
+ help="Pin the query to the cache",
42
+ )
43
+ subparser.add_argument(
44
+ "--sparql-endpoint", type=str, help="URL of the SPARQL endpoint"
45
+ )
46
+ subparser.add_argument(
47
+ "--accept",
48
+ type=str,
49
+ choices=[
50
+ "text/tab-separated-values",
51
+ "text/csv",
52
+ "application/sparql-results+json",
53
+ "application/sparql-results+xml",
54
+ "application/qlever-results+json",
55
+ ],
56
+ default="text/tab-separated-values",
57
+ help="Accept header for the SPARQL query",
58
+ )
59
+ subparser.add_argument(
60
+ "--no-time",
61
+ action="store_true",
62
+ default=False,
63
+ help="Do not print the (end-to-end) time taken",
64
+ )
46
65
 
47
66
  def execute(self, args) -> bool:
67
+ # When pinning to the cache, set `send=0` and request media type
68
+ # `application/qlever-results+json` so that we get the result size.
69
+ # Also, we need to provide the access token.
70
+ if args.pin_to_cache:
71
+ args.accept = "application/qlever-results+json"
72
+ curl_cmd_additions = (
73
+ f" --data pinresult=true --data send=0"
74
+ f" --data access-token="
75
+ f"{shlex.quote(args.access_token)}"
76
+ f" | jq .resultsize | numfmt --grouping"
77
+ f" | xargs -I {{}} printf"
78
+ f' "Result pinned to cache,'
79
+ f' number of rows: {{}}\\n"'
80
+ )
81
+ else:
82
+ curl_cmd_additions = ""
83
+
48
84
  # Show what the command will do.
49
- sparql_endpoint = (args.sparql_endpoint if args.sparql_endpoint
50
- else f"localhost:{args.port}")
51
- curl_cmd = (f"curl -s {sparql_endpoint}"
52
- f" -H \"Accept: {args.accept}\""
53
- f" --data-urlencode query={shlex.quote(args.query)}")
85
+ sparql_endpoint = (
86
+ args.sparql_endpoint if args.sparql_endpoint else f"localhost:{args.port}"
87
+ )
88
+ curl_cmd = (
89
+ f"curl -s {sparql_endpoint}"
90
+ f' -H "Accept: {args.accept}"'
91
+ f" --data-urlencode query={shlex.quote(args.query)}"
92
+ f"{curl_cmd_additions}"
93
+ )
54
94
  self.show(curl_cmd, only_show=args.show)
55
95
  if args.show:
56
- return False
96
+ return True
57
97
 
58
98
  # Launch query.
59
99
  try:
@@ -62,8 +102,7 @@ class QueryCommand(QleverCommand):
62
102
  time_msecs = round(1000 * (time.time() - start_time))
63
103
  if not args.no_time and args.log_level != "NO_LOG":
64
104
  log.info("")
65
- log.info(f"Query processing time (end-to-end):"
66
- f" {time_msecs:,d} ms")
105
+ log.info(f"Query processing time (end-to-end):" f" {time_msecs:,d} ms")
67
106
  except Exception as e:
68
107
  if args.log_level == "DEBUG":
69
108
  traceback.print_exc()
@@ -0,0 +1,110 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+
5
+ from termcolor import colored
6
+
7
+ from qlever.command import QleverCommand
8
+ from qlever.log import log
9
+ from qlever.util import run_command
10
+
11
+
12
+ class SettingsCommand(QleverCommand):
13
+ """
14
+ Class for executing the `settings` command.
15
+ """
16
+
17
+ def __init__(self):
18
+ pass
19
+
20
+ def description(self) -> str:
21
+ return "Show or set server settings (after `qlever start`)"
22
+
23
+ def should_have_qleverfile(self) -> bool:
24
+ return True
25
+
26
+ def relevant_qleverfile_arguments(self) -> dict[str : list[str]]:
27
+ return {"server": ["port", "host_name", "access_token"]}
28
+
29
+ def additional_arguments(self, subparser) -> None:
30
+ all_keys = [
31
+ "always-multiply-unions",
32
+ "cache-max-num-entries",
33
+ "cache-max-size",
34
+ "cache-max-size-single-entry",
35
+ "default-query-timeout",
36
+ "group-by-disable-index-scan-optimizations",
37
+ "group-by-hash-map-enabled",
38
+ "lazy-index-scan-max-size-materialization",
39
+ "lazy-index-scan-num-threads",
40
+ "lazy-index-scan-queue-size",
41
+ "lazy-result-max-cache-size",
42
+ "query-planning-budget",
43
+ "service-max-value-rows",
44
+ "sort-estimate-cancellation-factor",
45
+ "throw-on-unbound-variables",
46
+ "use-binsearch-transitive-path",
47
+ ]
48
+ subparser.add_argument(
49
+ "runtime_parameter",
50
+ nargs="?",
51
+ help="Set the given runtime parameter (key=value)"
52
+ "; if no argument is given, show all settings",
53
+ ).completer = lambda **kwargs: [f"{key}=" for key in all_keys]
54
+ subparser.add_argument(
55
+ "--endpoint_url",
56
+ type=str,
57
+ help="An arbitrary endpoint URL "
58
+ "(overriding the one in the Qleverfile)",
59
+ )
60
+
61
+ def execute(self, args) -> bool:
62
+ # Get endpoint URL from command line or Qleverfile.
63
+ if args.endpoint_url:
64
+ endpoint_url = args.endpoint_url
65
+ else:
66
+ endpoint_url = f"http://{args.host_name}:{args.port}"
67
+
68
+ # Construct the `curl` command for getting or setting.
69
+ if args.runtime_parameter:
70
+ try:
71
+ parameter_key, parameter_value = args.runtime_parameter.split(
72
+ "="
73
+ )
74
+ except ValueError:
75
+ log.error("Runtime parameter must be given as `key=value`")
76
+ return False
77
+
78
+ curl_cmd = (
79
+ f"curl -s {endpoint_url}"
80
+ f' --data-urlencode "{parameter_key}={parameter_value}"'
81
+ f' --data-urlencode "access-token={args.access_token}"'
82
+ )
83
+ else:
84
+ curl_cmd = (
85
+ f"curl -s {endpoint_url}" f" --data-urlencode cmd=get-settings"
86
+ )
87
+ parameter_key, parameter_value = None, None
88
+ self.show(curl_cmd, only_show=args.show)
89
+ if args.show:
90
+ return True
91
+
92
+ # Execute the `curl` command. Note that the `get-settings` command
93
+ # returns all settings in both scencarios (that is, also when setting a
94
+ # parameter).
95
+ try:
96
+ settings_json = run_command(curl_cmd, return_output=True)
97
+ settings_dict = json.loads(settings_json)
98
+ except Exception as e:
99
+ log.error(f"setting command failed: {e}")
100
+ return False
101
+ for key, value in settings_dict.items():
102
+ print(
103
+ colored(
104
+ f"{key:<45}: {value}",
105
+ "blue"
106
+ if parameter_key and key == parameter_key
107
+ else None,
108
+ )
109
+ )
110
+ return True
@@ -60,7 +60,7 @@ class SetupConfigCommand(QleverCommand):
60
60
  setup_config_cmd += "> Qleverfile"
61
61
  self.show(setup_config_cmd, only_show=args.show)
62
62
  if args.show:
63
- return False
63
+ return True
64
64
 
65
65
  # If there is already a Qleverfile in the current directory, exit.
66
66
  qleverfile_path = Path("Qleverfile")