qlever 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

qlever/__main__.py DELETED
@@ -1,1476 +0,0 @@
1
- #!/usr/bin/env python3
2
- # PYTHON_ARGCOMPLETE_OK
3
-
4
- # This is the `qlever` script (new version, written in Python). It serves as a
5
- # convenient command-line tool for all things QLever. See the `README.md` file
6
- # for how to use it.
7
-
8
- from configparser import ConfigParser, ExtendedInterpolation
9
- from datetime import datetime, date
10
- import os
11
- import glob
12
- import inspect
13
- import json
14
- import logging
15
- import psutil
16
- import re
17
- import shlex
18
- import shutil
19
- import socket
20
- import subprocess
21
- import sys
22
- import time
23
- import pkg_resources
24
- from termcolor import colored
25
- import traceback
26
-
27
- BLUE = "\033[34m"
28
- RED = "\033[31m"
29
- BOLD = "\033[1m"
30
- NORMAL = "\033[0m"
31
-
32
- from qlever.log import log
33
- # # Custom formatter for log messages.
34
- # class CustomFormatter(logging.Formatter):
35
- # def format(self, record):
36
- # message = record.getMessage()
37
- # if record.levelno == logging.DEBUG:
38
- # return colored(message, "magenta")
39
- # elif record.levelno == logging.WARNING:
40
- # return colored(message, "yellow")
41
- # elif record.levelno in [logging.CRITICAL, logging.ERROR]:
42
- # return colored(message, "red")
43
- # else:
44
- # return message
45
- #
46
- #
47
- # # Custom logger.
48
- # log = logging.getLogger("qlever")
49
- # log.setLevel(logging.INFO)
50
- # handler = logging.StreamHandler()
51
- # handler.setFormatter(CustomFormatter())
52
- # log.addHandler(handler)
53
-
54
-
55
- # Helper function for tracking the order of the actions in class `Actions`.
56
- def track_action_rank(method):
57
- method.rank = track_action_rank.counter
58
- track_action_rank.counter += 1
59
- return method
60
- track_action_rank.counter = 0 # noqa: E305
61
-
62
-
63
- # Abort the script.
64
- def abort_script(error_code=1):
65
- log.info("")
66
- sys.exit(error_code)
67
-
68
-
69
- # Show the available config names.
70
- def show_available_config_names():
71
- # Get available config names from the Qleverfiles directory (which should
72
- # be in the same directory as this script).
73
- script_dir = os.path.dirname(__file__)
74
- try:
75
- qleverfiles_dir = os.path.join(script_dir, "Qleverfiles")
76
- config_names = [qleverfile_name.split(".")[1] for
77
- qleverfile_name in os.listdir(qleverfiles_dir)]
78
- if not config_names:
79
- raise Exception(f"Directory \"{qleverfiles_dir}\" exists, but "
80
- f"contains no Qleverfiles")
81
- except Exception as e:
82
- log.error(f"Could not find any Qleverfiles in \"{qleverfiles_dir}\" "
83
- f"({e})")
84
- log.info("")
85
- log.info("Check that you have fully downloaded or cloned "
86
- "https://github.com/ad-freiburg/qlever-control, and "
87
- "not just the script itself")
88
- abort_script()
89
- # Show available config names.
90
- log.info(f"Available config names are: {', '.join(sorted(config_names))}")
91
-
92
-
93
- # Show the available action names.
94
- def show_available_action_names():
95
- log.info("You can now execute a sequence of actions, for example:")
96
- log.info("")
97
- log.info(f"{BLUE}qlever get-data index restart test-query ui {NORMAL}")
98
- log.info("")
99
- log.info(f"Available action names are: {', '.join(action_names)}")
100
- log.info("")
101
- log.info("To get autocompletion for these, run the following or "
102
- "add it to your `.bashrc`:")
103
- log.info("")
104
- log.info(f"{BLUE}eval \"$(qlever setup-autocompletion)\"{NORMAL}")
105
-
106
-
107
- # We want to distinguish between exception that we throw intentionally and all
108
- # others.
109
- class ActionException(Exception):
110
- pass
111
-
112
-
113
- # This class contains all the action :-)
114
- class Actions:
115
-
116
- def __init__(self):
117
- self.config = ConfigParser(interpolation=ExtendedInterpolation())
118
- # Check if the Qleverfile exists.
119
- if not os.path.isfile("Qleverfile"):
120
- log.setLevel(logging.INFO)
121
- log.info("")
122
- log.error("The qlever script needs a \"Qleverfile\" "
123
- "in the current directory, but I could not find it")
124
- log.info("")
125
- log.info("Run `qlever setup-config <config name>` to create a "
126
- "pre-filled Qleverfile")
127
- log.info("")
128
- show_available_config_names()
129
- abort_script()
130
- files_read = self.config.read("Qleverfile")
131
- if not files_read:
132
- log.error("ConfigParser could not read \"Qleverfile\"")
133
- abort_script()
134
- self.name = self.config['data']['name']
135
- self.yes_values = ["1", "true", "yes"]
136
-
137
- # Defaults for [server] that carry over from [index].
138
- for option in ["with_text_index", "only_pso_and_pos_permutations",
139
- "use_patterns"]:
140
- if option in self.config['index'] and \
141
- option not in self.config['server']:
142
- self.config['server'][option] = \
143
- self.config['index'][option]
144
-
145
- # Default values for options that are not mandatory in the Qleverfile.
146
- defaults = {
147
- "general": {
148
- "log_level": "info",
149
- "pid": "0",
150
- "example_queries_url": (f"https://qlever.cs.uni-freiburg.de/"
151
- f"api/examples/"
152
- f"{self.config['ui']['config']}"),
153
- "example_queries_limit": "10",
154
- "example_queries_send": "0",
155
- },
156
- "index": {
157
- "binary": "IndexBuilderMain",
158
- "with_text_index": "false",
159
- "only_pso_and_pos_permutations": "false",
160
- "use_patterns": "true",
161
- },
162
- "server": {
163
- "port": "7000",
164
- "binary": "ServerMain",
165
- "num_threads": "8",
166
- "cache_max_size": "5G",
167
- "cache_max_size_single_entry": "1G",
168
- "cache_max_num_entries": "100",
169
- "with_text_index": "false",
170
- "only_pso_and_pos_permutations": "false",
171
- "timeout": "30s",
172
- "use_patterns": "true",
173
- "url": f"http://localhost:{self.config['server']['port']}",
174
- },
175
- "docker": {
176
- "image": "adfreiburg/qlever",
177
- "container_server": f"qlever.server.{self.name}",
178
- "container_indexer": f"qlever.indexer.{self.name}",
179
- },
180
- "ui": {
181
- "port": "7000",
182
- "image": "adfreiburg/qlever-ui",
183
- "container": "qlever-ui",
184
- "url": "https://qlever.cs.uni-freiburg.de/api",
185
-
186
- }
187
- }
188
- for section in defaults:
189
- # If the section does not exist, create it.
190
- if not self.config.has_section(section):
191
- self.config[section] = {}
192
- # If an option does not exist, set it to the default value.
193
- for option in defaults[section]:
194
- if not self.config[section].get(option):
195
- self.config[section][option] = defaults[section][option]
196
-
197
- # If the log level was not explicitly set by the first command-line
198
- # argument (see below), set it according to the Qleverfile.
199
- if log.level == logging.NOTSET:
200
- log_level = self.config['general']['log_level'].upper()
201
- try:
202
- log.setLevel(getattr(logging, log_level))
203
- except AttributeError:
204
- log.error(f"Invalid log level: \"{log_level}\"")
205
- abort_script()
206
-
207
- # Show some information (for testing purposes only).
208
- log.debug(f"Parsed Qleverfile, sections are: "
209
- f"{', '.join(self.config.sections())}")
210
-
211
- # Check specifics of the installation.
212
- self.check_installation()
213
-
214
- def check_installation(self):
215
- """
216
- Helper function that checks particulars of the installation and
217
- remembers them so that all actions execute without errors.
218
- """
219
-
220
- # Handle the case Systems like macOS do not allow
221
- # psutil.net_connections().
222
- try:
223
- psutil.net_connections()
224
- self.net_connections_enabled = True
225
- except Exception as e:
226
- self.net_connections_enabled = False
227
- log.debug(f"Note: psutil.net_connections() failed ({e}),"
228
- f" will not scan network connections for action"
229
- f" \"start\"")
230
-
231
- # Check whether docker is installed and works (on MacOS 12, docker
232
- # hangs when installed without GUI, hence the timeout).
233
- try:
234
- completed_process = subprocess.run(
235
- ["docker", "info"], timeout=0.5,
236
- stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
237
- if completed_process.returncode != 0:
238
- raise Exception("docker info failed")
239
- self.docker_enabled = True
240
- except Exception:
241
- self.docker_enabled = False
242
- print("Note: `docker info` failed, therefore"
243
- " docker.USE_DOCKER=true not supported")
244
-
245
- def set_config(self, section, option, value):
246
- """
247
- Helper function that sets a value in the config file (and throws an
248
- exceptionon if the section or option does not exist).
249
- """
250
-
251
- if not self.config.has_section(section):
252
- log.error(f"Section [{section}] does not exist in Qleverfile")
253
- abort_script()
254
- if not self.config.has_option(section, option):
255
- log.error(f"Option {option.upper()} does not exist in section "
256
- f"[{section}] in Qleverfile")
257
- abort_script()
258
- self.config[section][option] = value
259
-
260
- def get_total_file_size(self, paths):
261
- """
262
- Helper function that gets the total size of all files in the given
263
- paths in GB.
264
- """
265
-
266
- total_size = 0
267
- for path in paths:
268
- for file in glob.glob(path):
269
- total_size += os.path.getsize(file)
270
- return total_size / 1e9
271
-
272
- def alive_check(self, port):
273
- """
274
- Helper function that checks if a QLever server is running on the given
275
- port.
276
- """
277
-
278
- message = "from the qlever script".replace(" ", "%20")
279
- curl_cmd = f"curl -s http://localhost:{port}/ping?msg={message}"
280
- exit_code = subprocess.call(curl_cmd, shell=True,
281
- stdout=subprocess.DEVNULL,
282
- stderr=subprocess.DEVNULL)
283
- return exit_code == 0
284
-
285
- def show_process_info(self, psutil_process,
286
- cmdline_regex, show_heading=True):
287
- """
288
- Helper function that shows information about a process if information
289
- about the process can be retrieved and the command line matches the
290
- given regex (in which case the function returns `True`). The heading is
291
- only shown if `show_heading` is `True` and the function returns `True`.
292
- """
293
-
294
- def show_table_line(pid, user, start_time, rss, cmdline):
295
- log.info(f"{pid:<8} {user:<8} {start_time:>5} {rss:>5} {cmdline}")
296
- try:
297
- pinfo = psutil_process.as_dict(
298
- attrs=['pid', 'username', 'create_time',
299
- 'memory_info', 'cmdline'])
300
- cmdline = " ".join(pinfo['cmdline'])
301
- if not re.search(cmdline_regex, cmdline):
302
- return False
303
- pid = pinfo['pid']
304
- user = pinfo['username'] if pinfo['username'] else ""
305
- start_time = datetime.fromtimestamp(pinfo['create_time'])
306
- if start_time.date() == date.today():
307
- start_time = start_time.strftime("%H:%M")
308
- else:
309
- start_time = start_time.strftime("%b%d")
310
- rss = f"{pinfo['memory_info'].rss / 1e9:.0f}G"
311
- if show_heading:
312
- show_table_line("PID", "USER", "START", "RSS", "COMMAND")
313
- show_table_line(pid, user, start_time, rss, cmdline)
314
- return True
315
- except Exception as e:
316
- log.debug(f"Could not get process info: {e}")
317
- return False
318
-
319
- def show(self, action_description, only_show):
320
- """
321
- Helper function that shows the command line or description of an
322
- action, together with an explanation.
323
- """
324
-
325
- log.info(f"{BLUE}{action_description}{NORMAL}")
326
- log.info("")
327
- if only_show:
328
- log.info("You called \"qlever ... show\", therefore the action "
329
- "is only shown, but not executed (omit the \"show\" to "
330
- "execute it)")
331
-
332
- @staticmethod
333
- @track_action_rank
334
- def action_setup_config(config_name="default"):
335
- """
336
- Setup a pre-filled Qleverfile in the current directory.
337
- """
338
-
339
- # log.info(f"{BLUE}Creating a pre-filled Qleverfile{NORMAL}")
340
- # log.info("")
341
-
342
- # If there is already a Qleverfile in the current directory, exit.
343
- if os.path.isfile("Qleverfile"):
344
- log.error("Qleverfile already exists in current directory")
345
- log.info("")
346
- log.info("If you want to create a new Qleverfile using "
347
- "`qlever setup-config`, delete the existing Qleverfile "
348
- "first")
349
- abort_script()
350
-
351
- # Get the directory of this script and copy the Qleverfile for `config`
352
- # to the current directory.
353
- script_dir = os.path.dirname(os.path.realpath(__file__))
354
- qleverfile_path = os.path.join(script_dir,
355
- f"Qleverfiles/Qleverfile.{config_name}")
356
- if not os.path.isfile(qleverfile_path):
357
- log.error(f"File \"{qleverfile_path}\" does not exist")
358
- log.info("")
359
- abort_script()
360
- try:
361
- shutil.copy(qleverfile_path, "Qleverfile")
362
- except Exception as e:
363
- log.error(f"Could not copy \"{qleverfile_path}\""
364
- f" to current directory: {e}")
365
- abort_script()
366
- log.info(f"Created Qleverfile for config \"{config_name}\""
367
- f" in current directory")
368
- log.info("")
369
- if config_name == "default":
370
- log.info("Since this is the default Qleverfile, you need to "
371
- "edit it before you can continue")
372
- log.info("")
373
- log.info("Afterwards, run `qlever` without arguments to see "
374
- "which actions are available")
375
- else:
376
- show_available_action_names()
377
- log.info("")
378
-
379
- @track_action_rank
380
- def action_show_config(self, only_show=False):
381
- """
382
- Action that shows the current configuration including the default
383
- values for options that are not set explicitly in the Qleverfile.
384
- """
385
-
386
- print(f"{BLUE}Showing the current configuration, including default"
387
- f" values for options that are not set explicitly in the"
388
- f" Qleverfile{NORMAL}")
389
- for section in self.config.sections():
390
- print()
391
- print(f"[{section}]")
392
- max_option_length = max([len(option) for option in
393
- self.config[section]])
394
- for option in self.config[section]:
395
- print(f"{option.upper().ljust(max_option_length)} = "
396
- f"{self.config[section][option]}")
397
-
398
- print()
399
-
400
- @track_action_rank
401
- def action_get_data(self, only_show=False):
402
- """
403
- Action that gets the data according to GET_DATA_CMD.
404
- """
405
-
406
- # Construct the command line.
407
- if not self.config['data']['get_data_cmd']:
408
- log.error(f"{RED}No GET_DATA_CMD specified in Qleverfile")
409
- return
410
- get_data_cmd = self.config['data']['get_data_cmd']
411
-
412
- # Show it.
413
- self.show(get_data_cmd, only_show)
414
- if only_show:
415
- return
416
-
417
- # Execute the command line.
418
- subprocess.run(get_data_cmd, shell=True)
419
- total_file_size = self.get_total_file_size(
420
- self.config['index']['file_names'].split())
421
- print(f"Total file size: {total_file_size:.1f} GB")
422
-
423
- @track_action_rank
424
- def action_index(self, only_show=False):
425
- """
426
- Action that builds a QLever index according to the settings in the
427
- [index] section of the Qleverfile.
428
- """
429
-
430
- # Construct the command line based on the config file.
431
- index_config = self.config['index']
432
- cmdline = (f"{index_config['cat_files']} | {index_config['binary']}"
433
- f" -F ttl -f -"
434
- f" -i {self.name}"
435
- f" -s {self.name}.settings.json")
436
- if index_config['only_pso_and_pos_permutations'] in self.yes_values:
437
- cmdline += " --only-pso-and-pos-permutations --no-patterns"
438
- if not index_config['use_patterns'] in self.yes_values:
439
- cmdline += " --no-patterns"
440
- if index_config['with_text_index'] in \
441
- ["from_text_records", "from_text_records_and_literals"]:
442
- cmdline += (f" -w {self.name}.wordsfile.tsv"
443
- f" -d {self.name}.docsfile.tsv")
444
- if index_config['with_text_index'] in \
445
- ["from_literals", "from_text_records_and_literals"]:
446
- cmdline += " --text-words-from-literals"
447
- if 'stxxl_memory' in index_config:
448
- cmdline += f" --stxxl-memory {index_config['stxxl_memory']}"
449
- cmdline += f" | tee {self.name}.index-log.txt"
450
-
451
- # If the total file size is larger than 10 GB, set ulimit (such that a
452
- # large number of open files is allowed).
453
- total_file_size = self.get_total_file_size(
454
- self.config['index']['file_names'].split())
455
- if total_file_size > 10:
456
- cmdline = f"ulimit -Sn 1048576; {cmdline}"
457
-
458
- # If we are using Docker, run the command in a Docker container.
459
- # Here is how the shell script does it:
460
- if self.config['docker']['use_docker'] in self.yes_values:
461
- docker_config = self.config['docker']
462
- cmdline = (f"docker run -it --rm -u $(id -u):$(id -g)"
463
- f" -v /etc/localtime:/etc/localtime:ro"
464
- f" -v $(pwd):/index -w /index"
465
- f" --entrypoint bash"
466
- f" --name {docker_config['container_indexer']}"
467
- f" {docker_config['image']}"
468
- f" -c {shlex.quote(cmdline)}")
469
-
470
- # Show the command line.
471
- self.show(f"Write value of config variable index.SETTINGS_JSON to "
472
- f"file {self.name}.settings.json\n"
473
- f"{cmdline}", only_show)
474
- if only_show:
475
- return
476
-
477
- # When docker.USE_DOCKER=false, check if the binary for building the
478
- # index exists and works.
479
- if self.config['docker']['use_docker'] not in self.yes_values:
480
- try:
481
- check_binary_cmd = f"{self.config['index']['binary']} --help"
482
- subprocess.run(check_binary_cmd, shell=True, check=True,
483
- stdout=subprocess.DEVNULL,
484
- stderr=subprocess.DEVNULL)
485
- except subprocess.CalledProcessError as e:
486
- log.error(f"Running \"{check_binary_cmd}\" failed ({e}), "
487
- f"set index.BINARY to a different binary or "
488
- f"set docker.USE_DOCKER=true")
489
- abort_script()
490
-
491
- # Check if index files (name.index.*) already exist.
492
- if glob.glob(f"{self.name}.index.*"):
493
- raise ActionException(
494
- f"Index files \"{self.name}.index.*\" already exist, "
495
- f"please delete them if you want to rebuild the index")
496
-
497
- # Write settings.json file and run the command.
498
- with open(f"{self.name}.settings.json", "w") as f:
499
- f.write(self.config['index']['settings_json'])
500
- subprocess.run(cmdline, shell=True)
501
-
502
- @track_action_rank
503
- def action_remove_index(self, only_show=False):
504
- """
505
- Action that removes the index files.
506
- """
507
-
508
- # List of all the index files (not all of them need to be there).
509
- index_fileglobs = (f"{self.name}.index.*",
510
- f"{self.name}.patterns.*",
511
- f"{self.name}.prefixes",
512
- f"{self.name}.meta-data.json",
513
- f"{self.name}.vocabulary.*")
514
-
515
- # Show the command line.
516
- self.show(f"Remove index files {', '.join(index_fileglobs)}",
517
- only_show)
518
- if only_show:
519
- return
520
-
521
- # Remove the index files.
522
- files_removed = []
523
- total_file_size = 0
524
- for index_fileglob in index_fileglobs:
525
- for filename in glob.glob(index_fileglob):
526
- if os.path.isfile(filename):
527
- total_file_size += os.path.getsize(filename)
528
- os.remove(filename)
529
- files_removed.append(filename)
530
- if files_removed:
531
- log.info(f"Removed the following index files of total size "
532
- f"{total_file_size / 1e9:.1f} GB:")
533
- log.info("")
534
- log.info(", ".join(files_removed))
535
- else:
536
- log.info("None of the listed index files found, nothing removed")
537
-
538
- @track_action_rank
539
- def action_start(self, only_show=False):
540
- """
541
- Action that starts the QLever server according to the settings in the
542
- [server] section of the Qleverfile. If a server is already running, the
543
- action reports that fact and does nothing.
544
- """
545
-
546
- # Construct the command line based on the config file.
547
- server_config = self.config['server']
548
- cmdline = (f"{self.config['server']['binary']}"
549
- f" -i {self.name}"
550
- f" -j {server_config['num_threads']}"
551
- f" -p {server_config['port']}"
552
- f" -m {server_config['memory_for_queries']}"
553
- f" -c {server_config['cache_max_size']}"
554
- f" -e {server_config['cache_max_size_single_entry']}"
555
- f" -k {server_config['cache_max_num_entries']}")
556
- if server_config['timeout']:
557
- cmdline += f" -s {server_config['timeout']}"
558
- if server_config['access_token']:
559
- cmdline += f" -a {server_config['access_token']}"
560
- if server_config['only_pso_and_pos_permutations'] in self.yes_values:
561
- cmdline += " --only-pso-and-pos-permutations"
562
- if not server_config['use_patterns'] in self.yes_values:
563
- cmdline += " --no-patterns"
564
- if server_config['with_text_index'] in \
565
- ["from_text_records",
566
- "from_literals",
567
- "from_text_records_and_literals"]:
568
- cmdline += " -t"
569
- cmdline += f" > {self.name}.server-log.txt 2>&1"
570
-
571
- # If we are using Docker, run the command in a docker container.
572
- if self.config['docker']['use_docker'] in self.yes_values:
573
- docker_config = self.config['docker']
574
- cmdline = (f"docker run -d --restart=unless-stopped"
575
- f" -u $(id -u):$(id -g)"
576
- f" -it -v /etc/localtime:/etc/localtime:ro"
577
- f" -v $(pwd):/index"
578
- f" -p {server_config['port']}:{server_config['port']}"
579
- f" -w /index"
580
- f" --entrypoint bash"
581
- f" --name {docker_config['container_server']}"
582
- f" --init"
583
- f" {docker_config['image']}"
584
- f" -c {shlex.quote(cmdline)}")
585
- else:
586
- cmdline = f"nohup {cmdline} &"
587
-
588
- # Show the command line.
589
- self.show(cmdline, only_show)
590
- if only_show:
591
- return
592
-
593
- # When docker.USE_DOCKER=false, check if the binary for starting the
594
- # server exists and works.
595
- if self.config['docker']['use_docker'] not in self.yes_values:
596
- try:
597
- check_binary_cmd = f"{self.config['server']['binary']} --help"
598
- subprocess.run(check_binary_cmd, shell=True, check=True,
599
- stdout=subprocess.DEVNULL,
600
- stderr=subprocess.DEVNULL)
601
- except subprocess.CalledProcessError as e:
602
- log.error(f"Running \"{check_binary_cmd}\" failed ({e}), "
603
- f"set server.BINARY to a different binary or "
604
- f"set docker.USE_DOCKER=true")
605
- abort_script()
606
-
607
- # Check if a QLever server is already running on this port.
608
- port = server_config['port']
609
- if self.alive_check(port):
610
- raise ActionException(
611
- f"QLever server already running on port {port}")
612
-
613
- # Check if another process is already listening.
614
- if self.net_connections_enabled:
615
- if port in [conn.laddr.port for conn
616
- in psutil.net_connections()]:
617
- raise ActionException(
618
- f"Port {port} is already in use by another process")
619
-
620
- # Execute the command line.
621
- subprocess.run(cmdline, shell=True,
622
- stdout=subprocess.DEVNULL,
623
- stderr=subprocess.DEVNULL)
624
-
625
- # Tail the server log until the server is ready (note that the `exec`
626
- # is important to make sure that the tail process is killed and not
627
- # just the bash process).
628
- log.info(f"Follow {self.name}.server-log.txt until the server is ready"
629
- f" (Ctrl-C stops following the log, but not the server)")
630
- log.info("")
631
- tail_cmd = f"exec tail -f {self.name}.server-log.txt"
632
- tail_proc = subprocess.Popen(tail_cmd, shell=True)
633
- while not self.alive_check(port):
634
- time.sleep(1)
635
-
636
- # Set the access token if specified.
637
- access_token = server_config['access_token']
638
- access_arg = f"--data-urlencode \"access-token={access_token}\""
639
- if "index_description" in self.config['data']:
640
- desc = self.config['data']['index_description']
641
- curl_cmd = (f"curl -Gs http://localhost:{port}/api"
642
- f" --data-urlencode \"index-description={desc}\""
643
- f" {access_arg} > /dev/null")
644
- log.debug(curl_cmd)
645
- subprocess.run(curl_cmd, shell=True)
646
- if "text_description" in self.config['data']:
647
- desc = self.config['data']['text_description']
648
- curl_cmd = (f"curl -Gs http://localhost:{port}/api"
649
- f" --data-urlencode \"text-description={desc}\""
650
- f" {access_arg} > /dev/null")
651
- log.debug(curl_cmd)
652
- subprocess.run(curl_cmd, shell=True)
653
-
654
- # Kill the tail process. NOTE: `tail_proc.kill()` does not work.
655
- tail_proc.terminate()
656
-
657
- @track_action_rank
658
- def action_stop(self, only_show=False, fail_if_not_running=True):
659
- """
660
- Action that stops the QLever server according to the settings in the
661
- [server] section of the Qleverfile. If no server is running, the action
662
- does nothing.
663
- """
664
-
665
- # Show action description.
666
- docker_container_name = self.config['docker']['container_server']
667
- cmdline_regex = (f"ServerMain.* -i [^ ]*{self.name}")
668
- self.show(f"Checking for process matching \"{cmdline_regex}\" "
669
- f"and for Docker container with name "
670
- f"\"{docker_container_name}\"", only_show)
671
- if only_show:
672
- return
673
-
674
- # First check if there is docker container running.
675
- if self.docker_enabled:
676
- docker_cmd = (f"docker stop {docker_container_name} && "
677
- f"docker rm {docker_container_name}")
678
- try:
679
- subprocess.run(docker_cmd, shell=True, check=True,
680
- stdout=subprocess.DEVNULL,
681
- stderr=subprocess.DEVNULL)
682
- log.info(f"Docker container with name "
683
- f"\"{docker_container_name}\" "
684
- f"stopped and removed")
685
- return
686
- except Exception as e:
687
- log.debug(f"Error running \"{docker_cmd}\": {e}")
688
-
689
- # Check if there is a process running on the server port using psutil.
690
- #
691
- # NOTE: On MacOS, some of the proc's returned by psutil.process_iter()
692
- # no longer exist when we try to access them, so we just skip them.
693
- for proc in psutil.process_iter():
694
- try:
695
- pinfo = proc.as_dict(
696
- attrs=['pid', 'username', 'create_time',
697
- 'memory_info', 'cmdline'])
698
- cmdline = " ".join(pinfo['cmdline'])
699
- except Exception as err:
700
- log.debug(f"Error getting process info: {err}")
701
- if re.match(cmdline_regex, cmdline):
702
- log.info(f"Found process {pinfo['pid']} from user "
703
- f"{pinfo['username']} with command line: {cmdline}")
704
- print()
705
- try:
706
- proc.kill()
707
- log.info(f"Killed process {pinfo['pid']}")
708
- except Exception as e:
709
- raise ActionException(
710
- f"Could not kill process with PID "
711
- f"{pinfo['pid']}: {e}")
712
- return
713
-
714
- # No matching process found.
715
- message = "No matching process or Docker container found"
716
- if fail_if_not_running:
717
- raise ActionException(message)
718
- else:
719
- log.info(f"{message}, so nothing to stop")
720
-
721
- @track_action_rank
722
- def action_restart(self, only_show=False):
723
- """
724
- Action that restarts the QLever server.
725
- """
726
-
727
- # Show action description.
728
- self.show("Stop running server if found, then start new server",
729
- only_show)
730
- if only_show:
731
- return
732
-
733
- # Do it.
734
- self.action_stop(only_show=only_show, fail_if_not_running=False)
735
- log.info("")
736
- self.action_start()
737
-
738
- @track_action_rank
739
- def action_log(self, only_show=False):
740
- """
741
- Action that shows the server log.
742
- """
743
-
744
- # Show action description.
745
- log_cmd = f"tail -f -n 50 {self.name}.server-log.txt"
746
- self.show(log_cmd, only_show)
747
- if only_show:
748
- return
749
-
750
- # Do it.
751
- log.info(f"Follow {self.name}.server-log.txt (Ctrl-C stops"
752
- f" following the log, but not the server)")
753
- log.info("")
754
- subprocess.run(log_cmd, shell=True)
755
-
756
- @track_action_rank
757
- def action_status(self, only_show=False):
758
- """
759
- Action that shows all QLever processes running on this machine.
760
-
761
- TODO: Also show the QLever-related docker containers.
762
- """
763
-
764
- # Show action description.
765
- cmdline_regex = "(ServerMain|IndexBuilderMain)"
766
- # cmdline_regex = f"(ServerMain|IndexBuilderMain).*{self.name}"
767
- self.show(f"{BLUE}Show all processes on this machine where "
768
- f"the command line matches {cmdline_regex}"
769
- f" using Python's psutil library", only_show)
770
- if only_show:
771
- return
772
-
773
- # Show the results as a table.
774
- num_processes_found = 0
775
- for proc in psutil.process_iter():
776
- show_heading = num_processes_found == 0
777
- process_shown = self.show_process_info(proc, cmdline_regex,
778
- show_heading=show_heading)
779
- if process_shown:
780
- num_processes_found += 1
781
- if num_processes_found == 0:
782
- print("No processes found")
783
-
784
- @track_action_rank
785
- def action_index_stats(self, only_show=False):
786
- """
787
- Action that provides a breakdown of the time needed for building the
788
- index, based on the log file of th index build.
789
- """
790
-
791
- log_file_name = self.config['data']['name'] + ".index-log.txt"
792
- log.info(f"{BLUE}Breakdown of the time for building the index, "
793
- f"based on the timestamps for key lines in "
794
- f"\"{log_file_name}{NORMAL}\"")
795
- log.info("")
796
- if only_show:
797
- return
798
-
799
- # Read the content of `log_file_name` into a list of lines.
800
- try:
801
- with open(log_file_name, "r") as f:
802
- lines = f.readlines()
803
- except Exception as e:
804
- raise ActionException(f"Could not read log file {log_file_name}: "
805
- f"{e}")
806
- current_line = 0
807
-
808
- # Helper lambda that finds the next line matching the given `regex`,
809
- # starting from `current_line`, and extracts the time. Returns a tuple
810
- # of the time and the regex match object. If a match is found,
811
- # `current_line` is updated to the line after the match. Otherwise,
812
- # `current_line` will be one beyond the last line, unless
813
- # `line_is_optional` is true, in which case it will be the same as when
814
- # the function was entered.
815
- def find_next_line(regex, line_is_optional=False):
816
- nonlocal lines
817
- nonlocal current_line
818
- current_line_backup = current_line
819
- # Find starting from `current_line`.
820
- while current_line < len(lines):
821
- line = lines[current_line]
822
- current_line += 1
823
- timestamp_regex = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"
824
- timestamp_format = "%Y-%m-%d %H:%M:%S"
825
- regex_match = re.search(regex, line)
826
- if regex_match:
827
- try:
828
- return datetime.strptime(
829
- re.match(timestamp_regex, line).group(),
830
- timestamp_format), regex_match
831
- except Exception as e:
832
- raise ActionException(
833
- f"Could not parse timestamp of form "
834
- f"\"{timestamp_regex}\" from line "
835
- f" \"{line.rstrip()}\" ({e})")
836
- # If we get here, we did not find a matching line.
837
- if line_is_optional:
838
- current_line = current_line_backup
839
- return None, None
840
-
841
- # Find the lines matching th key_lines_regex and extract the time
842
- # information from them.
843
- overall_begin, _ = find_next_line(r"INFO:\s*Processing")
844
- merge_begin, _ = find_next_line(r"INFO:\s*Merging partial vocab")
845
- convert_begin, _ = find_next_line(r"INFO:\s*Converting triples")
846
- perm_begin_and_info = []
847
- while True:
848
- perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair", True)
849
- if perm_begin is None:
850
- break
851
- _, perm_info = find_next_line(r"INFO:\s*Writing meta data for"
852
- r" ([A-Z]+ and [A-Z]+)", True)
853
- # if perm_info is None:
854
- # break
855
- perm_begin_and_info.append((perm_begin, perm_info))
856
- convert_end = (perm_begin_and_info[0][0] if
857
- len(perm_begin_and_info) > 0 else None)
858
- normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
859
- text_begin, _ = find_next_line(r"INFO:\s*Adding text index", True)
860
- text_end, _ = find_next_line(r"INFO:\s*DocsDB done", True)
861
- # print("DEBUG:", len(perm_begin_and_info), perm_begin_and_info)
862
- # print("DEBUG:", overall_begin)
863
- # print("DEBUG:", normal_end)
864
-
865
- # Check whether at least the first phase is done.
866
- if overall_begin is None:
867
- raise ActionException("Missing line that index build has started")
868
- if overall_begin and not merge_begin:
869
- raise ActionException("According to the log file, the index build "
870
- "has started, but is still in its first "
871
- "phase (parsing the input)")
872
-
873
- # Helper lambda that shows the duration for a phase (if the start and
874
- # end timestamps are available).
875
- def show_duration(heading, start_end_pairs):
876
- nonlocal unit
877
- num_start_end_pairs = 0
878
- diff_seconds = 0
879
- for start, end in start_end_pairs:
880
- if start and end:
881
- diff_seconds += (end - start).total_seconds()
882
- num_start_end_pairs += 1
883
- if num_start_end_pairs > 0:
884
- if unit == "h":
885
- diff = diff_seconds / 3600
886
- elif unit == "min":
887
- diff = diff_seconds / 60
888
- else:
889
- diff = diff_seconds
890
- log.info(f"{heading:<23} : {diff:>5.1f} {unit}")
891
-
892
- # Get the times of the various phases (hours or minutes, depending on
893
- # how long the first phase took).
894
- unit = "h"
895
- if merge_begin and overall_begin:
896
- parse_duration = (merge_begin - overall_begin).total_seconds()
897
- if parse_duration < 200:
898
- unit = "s"
899
- elif parse_duration < 3600:
900
- unit = "min"
901
- show_duration("Parse input", [(overall_begin, merge_begin)])
902
- show_duration("Build vocabularies", [(merge_begin, convert_begin)])
903
- show_duration("Convert to global IDs", [(convert_begin, convert_end)])
904
- for i in range(len(perm_begin_and_info)):
905
- perm_begin, perm_info = perm_begin_and_info[i]
906
- perm_end = perm_begin_and_info[i + 1][0] if i + 1 < len(
907
- perm_begin_and_info) else normal_end
908
- perm_info_text = (perm_info.group(1).replace(" and ", " & ")
909
- if perm_info else f"#{i + 1}")
910
- show_duration(f"Permutation {perm_info_text}",
911
- [(perm_begin, perm_end)])
912
- show_duration("Text index", [(text_begin, text_end)])
913
- if text_begin and text_end:
914
- log.info("")
915
- show_duration("TOTAL index build time",
916
- [(overall_begin, normal_end),
917
- (text_begin, text_end)])
918
- elif normal_end:
919
- log.info("")
920
- show_duration("TOTAL index build time",
921
- [(overall_begin, normal_end)])
922
-
923
- @track_action_rank
924
- def action_test_query(self, only_show=False):
925
- """
926
- Action that sends a simple test SPARQL query to the server.
927
- """
928
-
929
- # Construct the curl command.
930
- query = "SELECT * WHERE { ?s ?p ?o } LIMIT 10"
931
- headers = ["Accept: text/tab-separated-values",
932
- "Content-Type: application/sparql-query"]
933
- curl_cmd = (f"curl -s {self.config['server']['url']} "
934
- f"-H \"{headers[0]}\" -H \"{headers[1]}\" "
935
- f"--data \"{query}\"")
936
-
937
- # Show it.
938
- self.show(curl_cmd, only_show)
939
- if only_show:
940
- return
941
-
942
- # Execute it.
943
- subprocess.run(curl_cmd, shell=True)
944
-
945
- @track_action_rank
946
- def action_ui(self, only_show=False):
947
- """
948
- Action that starts the QLever UI with the server according to the
949
- Qleverfile as backend.
950
- """
951
-
952
- # Construct commands.
953
- host_name = socket.getfqdn()
954
- server_url = f"http://{host_name}:{self.config['server']['port']}"
955
- docker_rm_cmd = f"docker rm -f {self.config['ui']['container']}"
956
- docker_pull_cmd = f"docker pull {self.config['ui']['image']}"
957
- docker_run_cmd = (f"docker run -d -p {self.config['ui']['port']}:7000 "
958
- f"--name {self.config['ui']['container']} "
959
- f"{self.config['ui']['image']} ")
960
- docker_exec_cmd = (f"docker exec -it "
961
- f"{self.config['ui']['container']} "
962
- f"bash -c \"python manage.py configure "
963
- f"{self.config['ui']['config']} "
964
- f"{server_url}\"")
965
-
966
- # Show them.
967
- self.show("\n".join([docker_rm_cmd, docker_pull_cmd, docker_run_cmd,
968
- docker_exec_cmd]), only_show)
969
- if only_show:
970
- return
971
-
972
- # Execute them.
973
- try:
974
- subprocess.run(docker_rm_cmd, shell=True,
975
- stdout=subprocess.DEVNULL)
976
- subprocess.run(docker_pull_cmd, shell=True,
977
- stdout=subprocess.DEVNULL)
978
- subprocess.run(docker_run_cmd, shell=True,
979
- stdout=subprocess.DEVNULL)
980
- subprocess.run(docker_exec_cmd, shell=True,
981
- stdout=subprocess.DEVNULL)
982
- except subprocess.CalledProcessError as e:
983
- raise ActionException(f"Failed to start the QLever UI {e}")
984
- log.info(f"The QLever UI should now be up at "
985
- f"http://{host_name}:{self.config['ui']['port']}")
986
- log.info("You can log in as QLever UI admin with username and "
987
- "passwort \"demo\"")
988
-
989
- @track_action_rank
990
- def action_cache_stats_and_settings(self, only_show=False):
991
- """
992
- Action that shows the cache statistics and settings.
993
- """
994
-
995
- # Construct the two curl commands.
996
- cache_stats_cmd = (f"curl -s {self.config['server']['url']} "
997
- f"--data-urlencode \"cmd=cache-stats\"")
998
- cache_settings_cmd = (f"curl -s {self.config['server']['url']} "
999
- f"--data-urlencode \"cmd=get-settings\"")
1000
-
1001
- # Show them.
1002
- self.show("\n".join([cache_stats_cmd, cache_settings_cmd]), only_show)
1003
- if only_show:
1004
- return
1005
-
1006
- # Execute them.
1007
- try:
1008
- cache_stats = subprocess.check_output(cache_stats_cmd, shell=True)
1009
- cache_settings = subprocess.check_output(cache_settings_cmd,
1010
- shell=True)
1011
-
1012
- # Print the key-value pairs of the stats JSON in tabular form.
1013
- def print_json_as_tabular(raw_json):
1014
- key_value_pairs = json.loads(raw_json).items()
1015
- max_key_len = max([len(key) for key, _ in key_value_pairs])
1016
- for key, value in key_value_pairs:
1017
- if isinstance(value, int) or re.match(r"^\d+$", value):
1018
- value = "{:,}".format(int(value))
1019
- if re.match(r"^\d+\.\d+$", value):
1020
- value = "{:.2f}".format(float(value))
1021
- log.info(f"{key.ljust(max_key_len)} : {value}")
1022
- print_json_as_tabular(cache_stats)
1023
- log.info("")
1024
- print_json_as_tabular(cache_settings)
1025
- except Exception as e:
1026
- raise ActionException(f"Failed to get cache stats and settings: "
1027
- f"{e}")
1028
-
1029
- @track_action_rank
1030
- def action_clear_cache(self, only_show=False):
1031
- """
1032
- Action that clears the cache (unpinned entries only).
1033
- """
1034
-
1035
- # Construct the curl command.
1036
- clear_cache_cmd = (f"curl -s {self.config['server']['url']} "
1037
- f"--data-urlencode \"cmd=clear-cache\"")
1038
-
1039
- # Show it.
1040
- self.show(clear_cache_cmd, only_show)
1041
- if only_show:
1042
- return
1043
-
1044
- # Execute it.
1045
- try:
1046
- subprocess.run(clear_cache_cmd, shell=True,
1047
- stdout=subprocess.DEVNULL)
1048
- print("Cache cleared (only unpinned entries)")
1049
- print()
1050
- self.action_cache_stats_and_settings(only_show)
1051
- except Exception as e:
1052
- raise ActionException(f"Failed to clear the cache: {e}")
1053
-
1054
- @track_action_rank
1055
- def action_clear_cache_complete(self, only_show=False):
1056
- """
1057
- Action that clears the cache completely (both pinned and unpinned
1058
- entries).
1059
- """
1060
-
1061
- # Construct the curl command.
1062
- access_token = self.config['server']['access_token']
1063
- clear_cache_cmd = (f"curl -s {self.config['server']['url']} "
1064
- f"--data-urlencode \"cmd=clear-cache-complete\" "
1065
- f"--data-urlencode \"access-token={access_token}\"")
1066
-
1067
- # Show it.
1068
- self.show(clear_cache_cmd, only_show)
1069
- if only_show:
1070
- return
1071
-
1072
- # Execute it.
1073
- try:
1074
- subprocess.run(clear_cache_cmd, shell=True,
1075
- stdout=subprocess.DEVNULL)
1076
- print("Cache cleared (both pinned and unpinned entries)")
1077
- print()
1078
- self.action_cache_stats_and_settings(only_show)
1079
- except Exception as e:
1080
- raise ActionException(f"Failed to clear the cache: {e}")
1081
-
1082
- @track_action_rank
1083
- def action_autocompletion_warmup(self, only_show=False):
1084
- """
1085
- Action that pins the autocompletion queries from `ui.config` to the
1086
- cache.
1087
- """
1088
-
1089
- # Construct curl command to obtain the warmup queries.
1090
- #
1091
- # TODO: This is the access token expected by Django in views.py, where
1092
- # it is currently set to dummy value. Find a sound yet simple mechanism
1093
- # for this.
1094
- access_token_ui = "top-secret"
1095
- config_name = self.config["ui"]["config"]
1096
- warmup_url = f"{self.config['ui']['url']}/warmup/{config_name}"
1097
- curl_cmd = (f"curl -s {warmup_url}/queries?token={access_token_ui}")
1098
-
1099
- # Show it.
1100
- self.show(f"Pin warmup queries obtained via: {curl_cmd}", only_show)
1101
- if only_show:
1102
- return
1103
-
1104
- # Get the queries.
1105
- try:
1106
- queries = subprocess.check_output(curl_cmd, shell=True)
1107
- except subprocess.CalledProcessError as e:
1108
- raise ActionException(f"Failed to get warmup queries ({e})")
1109
-
1110
- # Iterate over them and pin them to the cache. Give a more generous
1111
- # timeout (which requires an access token).
1112
- header = "Accept: application/qlever-results+json"
1113
- first = True
1114
- timeout = "300s"
1115
- access_token = self.config["server"]["access_token"]
1116
- for description, query in [line.split("\t") for line in
1117
- queries.decode("utf-8").split("\n")]:
1118
- if first:
1119
- first = False
1120
- else:
1121
- log.info("")
1122
- log.info(f"{BOLD}Pin query: {description}{NORMAL}")
1123
- pin_cmd = (f"curl -s {self.config['server']['url']}/api "
1124
- f"-H \"{header}\" "
1125
- f"--data-urlencode query={shlex.quote(query)} "
1126
- f"--data-urlencode timeout={timeout} "
1127
- f"--data-urlencode access-token={access_token} "
1128
- f"--data-urlencode pinresult=true "
1129
- f"--data-urlencode send=0")
1130
- clear_cache_cmd = (f"curl -s {self.config['server']['url']} "
1131
- f"--data-urlencode \"cmd=clear-cache\"")
1132
- log.info(pin_cmd)
1133
- # Launch query and show the `resultsize` of the JSON response.
1134
- try:
1135
- result = subprocess.check_output(pin_cmd, shell=True)
1136
- json_result = json.loads(result.decode("utf-8"))
1137
- # Check if the JSON has a key "exception".
1138
- if "exception" in json_result:
1139
- raise Exception(json_result["exception"])
1140
- log.info(f"Result size: {json_result['resultsize']:,}")
1141
- log.info(clear_cache_cmd)
1142
- subprocess.check_output(clear_cache_cmd, shell=True,
1143
- stderr=subprocess.DEVNULL)
1144
- except Exception as e:
1145
- log.error(f"Query failed: {e}")
1146
-
1147
- @track_action_rank
1148
- def action_example_queries(self, only_show=False):
1149
- """
1150
- Action that shows the example queries from `ui.config`.
1151
- """
1152
-
1153
- # Construct curl command to obtain the example queries.
1154
- config_general = self.config["general"]
1155
- example_queries_url = config_general["example_queries_url"]
1156
- example_queries_limit = int(config_general["example_queries_limit"])
1157
- example_queries_send = int(config_general["example_queries_send"])
1158
- curl_cmd = f"curl -s {example_queries_url}"
1159
-
1160
- # Show what the action does.
1161
- self.show(f"Launch example queries obtained via: {curl_cmd}\n"
1162
- f"SPARQL endpoint: {self.config['server']['url']}\n"
1163
- f"Clearing the cache before each query\n"
1164
- f"Using send={example_queries_send} and limit="
1165
- f"{example_queries_limit}",
1166
- only_show)
1167
- if only_show:
1168
- return
1169
-
1170
- # Get the queries.
1171
- try:
1172
- queries = subprocess.check_output(curl_cmd, shell=True)
1173
- except subprocess.CalledProcessError as e:
1174
- raise ActionException(f"Failed to get example queries ({e})")
1175
-
1176
- # Launch the queries one after the other and for each print: the
1177
- # description, the result size, and the query processing time.
1178
- count = 0
1179
- total_time_seconds = 0.0
1180
- total_result_size = 0
1181
- for description, query in [line.split("\t") for line in
1182
- queries.decode("utf-8").splitlines()]:
1183
- # Launch query and show the `resultsize` of the JSON response.
1184
- clear_cache_cmd = (f"curl -s {self.config['server']['url']} "
1185
- f"--data-urlencode cmd=clear-cache")
1186
- query_cmd = (f"curl -s {self.config['server']['url']} "
1187
- f"-H \"Accept: application/qlever-results+json\" "
1188
- f"--data-urlencode query={shlex.quote(query)} "
1189
- f"--data-urlencode send={example_queries_send}")
1190
- try:
1191
- subprocess.run(clear_cache_cmd, shell=True,
1192
- stdout=subprocess.DEVNULL,
1193
- stderr=subprocess.DEVNULL)
1194
- start_time = time.time()
1195
- result = subprocess.check_output(query_cmd, shell=True)
1196
- time_seconds = time.time() - start_time
1197
- json_result = json.loads(result.decode("utf-8"))
1198
- if "exception" in json_result:
1199
- raise Exception(json_result["exception"])
1200
- result_size = int(json_result["resultsize"])
1201
- result_string = f"{result_size:>14,}"
1202
- except Exception as e:
1203
- time_seconds = 0.0
1204
- result_size = 0
1205
- result_string = (f"{RED} FAILED{NORMAL}"
1206
- f" {RED}({e}){NORMAL}")
1207
-
1208
- # Print description, time, result in tabular form.
1209
- log.debug(query)
1210
- if (len(description) > 60):
1211
- description = description[:57] + "..."
1212
- log.info(f"{description:<60} {time_seconds:6.2f} s "
1213
- f"{result_string}")
1214
- count += 1
1215
- total_time_seconds += time_seconds
1216
- total_result_size += result_size
1217
- if count == example_queries_limit:
1218
- break
1219
-
1220
- # Print total time.
1221
- log.info("")
1222
- description = (f"TOTAL for {count} "
1223
- f"{'query' if count == 1 else 'queries'}")
1224
- log.info(f"{description:<60} {total_time_seconds:6.2f} s "
1225
- f"{total_result_size:>14,}")
1226
- description = (f"AVERAGE for {count} "
1227
- f"{'query' if count == 1 else 'queries'}")
1228
- log.info(f"{description:<60} {total_time_seconds / count:6.2f} s "
1229
- f"{round(total_result_size / count):>14,}")
1230
-
1231
- @track_action_rank
1232
- def action_memory_profile(self, only_show=False):
1233
- """
1234
- Action that prints the memory usage of a process (specified via
1235
- `general.PID`) to a file `<PID>.memory-usage.tsv`.
1236
- """
1237
-
1238
- # Show what the action does.
1239
- self.show("Poll memory usage of the given process every second "
1240
- "and print it to a file", only_show)
1241
- if only_show:
1242
- return
1243
-
1244
- # Show process information.
1245
- if "pid" not in self.config["general"]:
1246
- raise ActionException("PID must be specified via general.PID")
1247
- try:
1248
- pid = int(self.config["general"]["pid"])
1249
- proc = psutil.Process(pid)
1250
- except Exception as e:
1251
- raise ActionException(f"Could not obtain information for process "
1252
- f"with PID {pid} ({e})")
1253
- self.show_process_info(proc, "", show_heading=True)
1254
- log.info("")
1255
-
1256
- # As long as the process exists, poll memory usage once per second and
1257
- # print it to the screen as well as to a file `<PID>.memory-usage.tsv`.
1258
- file = open(f"{pid}.memory-usage.tsv", "w")
1259
- seconds = 0
1260
- while proc.is_running():
1261
- # Get memory usage in bytes and print as <timestamp>\t<size>, with
1262
- # the timestand in the usual logger format (second precision).
1263
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1264
- memory_usage_gb = f"{proc.memory_info().rss / 1e9:.1f}"
1265
- log.info(f"{timestamp}\t{memory_usage_gb}")
1266
- file.write(f"{timestamp}\t{memory_usage_gb}\n")
1267
- time.sleep(1)
1268
- seconds += 1
1269
- if seconds % 60 == 0:
1270
- file.flush()
1271
- file.close()
1272
-
1273
- @track_action_rank
1274
- def action_memory_profile_show(self, only_show=False):
1275
- """
1276
- Action that shows a plot of the memory profile produce with action
1277
- `memory_profile`.
1278
- """
1279
-
1280
- # Construct gnuplot command.
1281
- if "pid" not in self.config["general"]:
1282
- raise ActionException("PID must be specified via general.PID")
1283
- pid = int(self.config["general"]["pid"])
1284
- gnuplot_script = (f"set datafile separator \"\t\"; "
1285
- f"set xdata time; "
1286
- f"set timefmt \"%Y-%m-%d %H:%M:%S\"; "
1287
- f"set xlabel \"Time\"; "
1288
- f"set ylabel \"Memory Usage\"; "
1289
- f"set grid; "
1290
- f"plot \"{pid}.memory-usage.tsv\" "
1291
- f"using 1:2 with lines; "
1292
- f"pause -1")
1293
- gnuplot_cmd = f"gnuplot -e {shlex.quote(gnuplot_script)}"
1294
-
1295
- # Show it.
1296
- self.show(gnuplot_cmd, only_show)
1297
- if only_show:
1298
- return
1299
-
1300
- # Launch gnuplot.
1301
- try:
1302
- subprocess.check_output(gnuplot_cmd, shell=True)
1303
- except subprocess.CalledProcessError as e:
1304
- raise ActionException(f"Failed to launch gnuplot ({e})")
1305
-
1306
-
1307
- def setup_autocompletion_cmd():
1308
- """
1309
- Print the command for setting up autocompletion for the qlever.py script.
1310
-
1311
- TODO: Currently works for bash only.
1312
- """
1313
-
1314
- # Get methods that start wth "action_" from the Actions class, sorted by
1315
- # their appearance in the class (see the `@track_action_rank` decorator).
1316
- methods = inspect.getmembers(Actions, predicate=inspect.isfunction)
1317
- methods = [m for m in methods if m[0].startswith("action_")]
1318
- action_names = sorted([m[0] for m in methods],
1319
- key=lambda m: getattr(Actions, m).rank)
1320
- action_names = [_.replace("action_", "") for _ in action_names]
1321
- action_names = [_.replace("_", "-") for _ in action_names]
1322
- action_names = " ".join(action_names)
1323
-
1324
- # Add config settings to the list of possible actions for autocompletion.
1325
- action_names += " docker.USE_DOCKER=true docker.USE_DOCKER=false"
1326
- action_names += " index.BINARY=IndexBuilderMain"
1327
- action_names += " server.BINARY=ServerMain"
1328
-
1329
- # Return multiline string with the command for setting up autocompletion.
1330
- return f"""\
1331
- _qlever_completion() {{
1332
- local cur=${{COMP_WORDS[COMP_CWORD]}}
1333
- COMPREPLY=( $(compgen -W "{action_names}" -- $cur) )
1334
- }}
1335
- complete -o nosort -F _qlever_completion qlever
1336
- """
1337
-
1338
-
1339
- # Get all action names.
1340
- action_names = [_ for _ in dir(Actions) if _.startswith("action_")]
1341
- action_names = [_.replace("action_", "") for _ in action_names]
1342
- action_names = [_.replace("_", "-") for _ in action_names]
1343
-
1344
-
1345
- def main():
1346
- # Get the version.
1347
- try:
1348
- version = pkg_resources.get_distribution("qlever").version
1349
- except Exception as e:
1350
- log.error(f"Could not determine package version: {e}")
1351
- version = "unknown"
1352
- # If the script is called without argument, say hello and provide some
1353
- # help to get started.
1354
- if len(sys.argv) == 1 or \
1355
- (len(sys.argv) == 2 and sys.argv[1] == "help") or \
1356
- (len(sys.argv) == 2 and sys.argv[1] == "--help") or \
1357
- (len(sys.argv) == 2 and sys.argv[1] == "-h"):
1358
- log.info("")
1359
- log.info(f"{BOLD}Hello, I am the qlever script"
1360
- f" (version {version}){NORMAL}")
1361
- log.info("")
1362
- if os.path.exists("Qleverfile"):
1363
- log.info("I see that you already have a \"Qleverfile\" in the "
1364
- "current directory, so you are ready to start")
1365
- log.info("")
1366
- show_available_action_names()
1367
- else:
1368
- log.info("You need a Qleverfile in the current directory, which "
1369
- "you can create as follows:")
1370
- log.info("")
1371
- log.info(f"{BLUE}qlever setup-config <config name>{NORMAL}")
1372
- log.info("")
1373
- show_available_config_names()
1374
- log.info("")
1375
- log.info("If you omit <config name>, you get a default Qleverfile")
1376
- log.info("")
1377
- return
1378
-
1379
- # If there is only argument `setup-autocompletion`, call the function
1380
- # `Actions.setup_autocompletion()` above and exit.
1381
- if len(sys.argv) == 2 and sys.argv[1] == "setup-autocompletion":
1382
- log.setLevel(logging.ERROR)
1383
- print(setup_autocompletion_cmd())
1384
- sys.exit(0)
1385
-
1386
- # If the first argument sets the log level, deal with that immediately (so
1387
- # that it goes into effect before we do anything else). Otherwise, set the
1388
- # log level to `NOTSET` (which will signal to the Actions class that it can
1389
- # take the log level from the config file).
1390
- log.setLevel(logging.NOTSET)
1391
- if len(sys.argv) > 1:
1392
- set_log_level_match = re.match(r"general.log_level=(\w+)",
1393
- sys.argv[1], re.IGNORECASE)
1394
- if set_log_level_match:
1395
- log_level = set_log_level_match.group(1).upper()
1396
- sys.argv = sys.argv[1:]
1397
- try:
1398
- log.setLevel(getattr(logging, log_level))
1399
- log.debug("")
1400
- log.debug(f"Log level set to {log_level}")
1401
- log.debug("")
1402
- except AttributeError:
1403
- log.error(f"Invalid log level: \"{log_level}\"")
1404
- abort_script()
1405
-
1406
- # Helper function that executes an action.
1407
- def execute_action(actions, action_name, **kwargs):
1408
- log.info("")
1409
- log.info(f"{BOLD}Action \"{action_name}\"{NORMAL}")
1410
- log.info("")
1411
- action = f"action_{action_name.replace('-', '_')}"
1412
- try:
1413
- getattr(actions, action)(**kwargs)
1414
- except ActionException as err:
1415
- print(f"{RED}{err}{NORMAL}")
1416
- abort_script()
1417
- except Exception as err:
1418
- line = traceback.extract_tb(err.__traceback__)[-1].lineno
1419
- print(f"{RED}Error in Python script (line {line}: {err})"
1420
- f", stack trace follows:{NORMAL}")
1421
- print()
1422
- raise err
1423
-
1424
- # If `setup-config` is among the command-line arguments, it must the first
1425
- # one, followed by at most one more argument.
1426
- if "setup-config" in sys.argv:
1427
- if sys.argv.index("setup-config") > 1:
1428
- log.setLevel(logging.ERROR)
1429
- log.error("Action `setup-config` must be the first argument")
1430
- abort_script()
1431
- if len(sys.argv) > 3:
1432
- log.setLevel(logging.ERROR)
1433
- log.error("Action `setup-config` must be followed by at most one "
1434
- "argument (the name of the desied configuration)")
1435
- abort_script()
1436
- log.setLevel(logging.INFO)
1437
- config_name = sys.argv[2] if len(sys.argv) == 3 else "default"
1438
- execute_action(Actions, "setup-config", config_name=config_name)
1439
- return
1440
-
1441
- actions = Actions()
1442
- # log.info(f"Actions available are: {', '.join(action_names)}")
1443
- # Show the log level as string.
1444
- # log.info(f"Log level: {logging.getLevelName(log.getEffectiveLevel())}")
1445
-
1446
- # Check if the last argument is "show" (if yes, remember it and remove it).
1447
- only_show = True if len(sys.argv) > 1 and sys.argv[-1] == "show" else False
1448
- if only_show:
1449
- sys.argv = sys.argv[:-1]
1450
-
1451
- # Initalize actions.
1452
- # Execute the actions specified on the command line.
1453
- for action_name in sys.argv[1:]:
1454
- # If the action is of the form section.key=value, set the config value.
1455
- set_config_match = re.match(r"(\w+)\.(\w+)=(.*)", action_name)
1456
- if set_config_match:
1457
- section, option, value = set_config_match.groups()
1458
- log.info(f"Setting config value: {section}.{option}={value}")
1459
- try:
1460
- actions.set_config(section, option, value)
1461
- except ValueError as err:
1462
- log.error(err)
1463
- abort_script()
1464
- continue
1465
- # If the action name does not exist, exit.
1466
- if action_name not in action_names:
1467
- log.error(f"Action \"{action_name}\" does not exist, available "
1468
- f"actions are: {', '.join(action_names)}")
1469
- abort_script()
1470
- # Execute the action (or only show what would be executed).
1471
- execute_action(actions, action_name, only_show=only_show)
1472
- log.info("")
1473
-
1474
-
1475
- if __name__ == "__main__":
1476
- main()