qlever 0.5.19__py3-none-any.whl → 0.5.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

qlever/qlever_old.py DELETED
@@ -1,1715 +0,0 @@
1
- #!/usr/bin/env python3
2
- # PYTHON_ARGCOMPLETE_OK
3
-
4
- # This is the `qlever` script (new version, written in Python). It serves as a
5
- # convenient command-line tool for all things QLever. See the `README.md` file
6
- # for how to use it.
7
-
8
- import glob
9
- import inspect
10
- import json
11
- import logging
12
- import os
13
- import re
14
- import shlex
15
- import shutil
16
- import socket
17
- import subprocess
18
- import sys
19
- import time
20
- import traceback
21
- from configparser import ConfigParser, ExtendedInterpolation
22
- from datetime import date, datetime
23
-
24
- import pkg_resources
25
- import psutil
26
-
27
- from qlever.log import log
28
-
29
- BLUE = "\033[34m"
30
- RED = "\033[31m"
31
- BOLD = "\033[1m"
32
- NORMAL = "\033[0m"
33
-
34
- # # Custom formatter for log messages.
35
- # class CustomFormatter(logging.Formatter):
36
- # def format(self, record):
37
- # message = record.getMessage()
38
- # if record.levelno == logging.DEBUG:
39
- # return colored(message, "magenta")
40
- # elif record.levelno == logging.WARNING:
41
- # return colored(message, "yellow")
42
- # elif record.levelno in [logging.CRITICAL, logging.ERROR]:
43
- # return colored(message, "red")
44
- # else:
45
- # return message
46
- #
47
- #
48
- # # Custom logger.
49
- # log = logging.getLogger("qlever")
50
- # log.setLevel(logging.INFO)
51
- # handler = logging.StreamHandler()
52
- # handler.setFormatter(CustomFormatter())
53
- # log.addHandler(handler)
54
-
55
-
56
- # Helper function for tracking the order of the actions in class `Actions`.
57
- def track_action_rank(method):
58
- method.rank = track_action_rank.counter
59
- track_action_rank.counter += 1
60
- return method
61
-
62
-
63
- track_action_rank.counter = 0 # noqa: E305
64
-
65
-
66
- # Abort the script.
67
- def abort_script(error_code=1):
68
- log.info("")
69
- sys.exit(error_code)
70
-
71
-
72
- # Show the available config names.
73
- def show_available_config_names():
74
- # Get available config names from the Qleverfiles directory (which should
75
- # be in the same directory as this script).
76
- script_dir = os.path.dirname(__file__)
77
- try:
78
- qleverfiles_dir = os.path.join(script_dir, "Qleverfiles")
79
- config_names = [
80
- qleverfile_name.split(".")[1]
81
- for qleverfile_name in os.listdir(qleverfiles_dir)
82
- ]
83
- if not config_names:
84
- raise Exception(
85
- f'Directory "{qleverfiles_dir}" exists, but '
86
- f"contains no Qleverfiles"
87
- )
88
- except Exception as e:
89
- log.error(
90
- f'Could not find any Qleverfiles in "{qleverfiles_dir}" ' f"({e})"
91
- )
92
- log.info("")
93
- log.info(
94
- "Check that you have fully downloaded or cloned "
95
- "https://github.com/ad-freiburg/qlever-control, and "
96
- "not just the script itself"
97
- )
98
- abort_script()
99
- # Show available config names.
100
- log.info(f"Available config names are: {', '.join(sorted(config_names))}")
101
-
102
-
103
- # Show the available action names.
104
- def show_available_action_names():
105
- log.info("You can now execute a sequence of actions, for example:")
106
- log.info("")
107
- log.info(f"{BLUE}qlever-old get-data index restart test-query ui {NORMAL}")
108
- log.info("")
109
- log.info(f"Available action names are: {', '.join(action_names)}")
110
- log.info("")
111
- log.info(
112
- "To get autocompletion for these, run the following or "
113
- "add it to your `.bashrc`:"
114
- )
115
- log.info("")
116
- log.info(f'{BLUE}eval "$(qlever-old setup-autocompletion)"{NORMAL}')
117
-
118
-
119
- # We want to distinguish between exception that we throw intentionally and all
120
- # others.
121
- class ActionException(Exception):
122
- pass
123
-
124
-
125
- # This class contains all the action :-)
126
- class Actions:
127
- def __init__(self):
128
- self.config = ConfigParser(interpolation=ExtendedInterpolation())
129
- # Check if the Qleverfile exists.
130
- if not os.path.isfile("Qleverfile"):
131
- log.setLevel(logging.INFO)
132
- log.info("")
133
- log.error(
134
- 'The qlever script needs a "Qleverfile" '
135
- "in the current directory, but I could not find it"
136
- )
137
- log.info("")
138
- log.info(
139
- "Run `qlever-old setup-config <config name>` to create a "
140
- "pre-filled Qleverfile"
141
- )
142
- log.info("")
143
- show_available_config_names()
144
- abort_script()
145
- files_read = self.config.read("Qleverfile")
146
- if not files_read:
147
- log.error('ConfigParser could not read "Qleverfile"')
148
- abort_script()
149
- self.name = self.config["data"]["name"]
150
- self.yes_values = ["1", "true", "yes"]
151
-
152
- # Defaults for [server] that carry over from [index].
153
- for option in [
154
- "with_text_index",
155
- "only_pso_and_pos_permutations",
156
- "use_patterns",
157
- ]:
158
- if (
159
- option in self.config["index"]
160
- and option not in self.config["server"]
161
- ):
162
- self.config["server"][option] = self.config["index"][option]
163
-
164
- # Default values for options that are not mandatory in the Qleverfile.
165
- defaults = {
166
- "general": {
167
- "log_level": "info",
168
- "pid": "0",
169
- "example_queries_url": (
170
- f"https://qlever.cs.uni-freiburg.de/"
171
- f"api/examples/"
172
- f"{self.config['ui']['config']}"
173
- ),
174
- "example_queries_limit": "10",
175
- "example_queries_send": "0",
176
- },
177
- "index": {
178
- "binary": "IndexBuilderMain",
179
- "with_text_index": "false",
180
- "only_pso_and_pos_permutations": "false",
181
- "use_patterns": "true",
182
- },
183
- "server": {
184
- "port": "7000",
185
- "binary": "ServerMain",
186
- "num_threads": "8",
187
- "cache_max_size": "5G",
188
- "cache_max_size_single_entry": "1G",
189
- "cache_max_num_entries": "100",
190
- "with_text_index": "false",
191
- "only_pso_and_pos_permutations": "false",
192
- "timeout": "30s",
193
- "use_patterns": "true",
194
- "url": f"http://localhost:{self.config['server']['port']}",
195
- },
196
- "docker": {
197
- "image": "adfreiburg/qlever",
198
- "container_server": f"qlever.server.{self.name}",
199
- "container_indexer": f"qlever.indexer.{self.name}",
200
- },
201
- "ui": {
202
- "port": "7000",
203
- "image": "adfreiburg/qlever-ui",
204
- "container": "qlever-ui",
205
- "url": "https://qlever.cs.uni-freiburg.de/api",
206
- },
207
- }
208
- for section in defaults:
209
- # If the section does not exist, create it.
210
- if not self.config.has_section(section):
211
- self.config[section] = {}
212
- # If an option does not exist, set it to the default value.
213
- for option in defaults[section]:
214
- if not self.config[section].get(option):
215
- self.config[section][option] = defaults[section][option]
216
-
217
- # If the log level was not explicitly set by the first command-line
218
- # argument (see below), set it according to the Qleverfile.
219
- if log.level == logging.NOTSET:
220
- log_level = self.config["general"]["log_level"].upper()
221
- try:
222
- log.setLevel(getattr(logging, log_level))
223
- except AttributeError:
224
- log.error(f'Invalid log level: "{log_level}"')
225
- abort_script()
226
-
227
- # Show some information (for testing purposes only).
228
- log.debug(
229
- f"Parsed Qleverfile, sections are: "
230
- f"{', '.join(self.config.sections())}"
231
- )
232
-
233
- # Check specifics of the installation.
234
- self.check_installation()
235
-
236
- def check_installation(self):
237
- """
238
- Helper function that checks particulars of the installation and
239
- remembers them so that all actions execute without errors.
240
- """
241
-
242
- # Handle the case Systems like macOS do not allow
243
- # psutil.net_connections().
244
- try:
245
- psutil.net_connections()
246
- self.net_connections_enabled = True
247
- except Exception as e:
248
- self.net_connections_enabled = False
249
- log.debug(
250
- f"Note: psutil.net_connections() failed ({e}),"
251
- f" will not scan network connections for action"
252
- f' "start"'
253
- )
254
-
255
- # Check whether docker is installed and works (on MacOS 12, docker
256
- # hangs when installed without GUI, hence the timeout).
257
- try:
258
- completed_process = subprocess.run(
259
- ["docker", "info"],
260
- timeout=0.5,
261
- stdout=subprocess.DEVNULL,
262
- stderr=subprocess.DEVNULL,
263
- )
264
- if completed_process.returncode != 0:
265
- raise Exception("docker info failed")
266
- self.docker_enabled = True
267
- except Exception:
268
- self.docker_enabled = False
269
- print(
270
- "Note: `docker info` failed, therefore"
271
- " docker.USE_DOCKER=true not supported"
272
- )
273
-
274
- def set_config(self, section, option, value):
275
- """
276
- Helper function that sets a value in the config file (and throws an
277
- exceptionon if the section or option does not exist).
278
- """
279
-
280
- if not self.config.has_section(section):
281
- log.error(f"Section [{section}] does not exist in Qleverfile")
282
- abort_script()
283
- if not self.config.has_option(section, option):
284
- log.error(
285
- f"Option {option.upper()} does not exist in section "
286
- f"[{section}] in Qleverfile"
287
- )
288
- abort_script()
289
- self.config[section][option] = value
290
-
291
- def get_total_file_size(self, paths):
292
- """
293
- Helper function that gets the total size of all files in the given
294
- paths in GB.
295
- """
296
-
297
- total_size = 0
298
- for path in paths:
299
- for file in glob.glob(path):
300
- total_size += os.path.getsize(file)
301
- return total_size / 1e9
302
-
303
- def alive_check(self, port):
304
- """
305
- Helper function that checks if a QLever server is running on the given
306
- port.
307
- """
308
-
309
- message = "from the qlever script".replace(" ", "%20")
310
- curl_cmd = f"curl -s http://localhost:{port}/ping?msg={message}"
311
- exit_code = subprocess.call(
312
- curl_cmd,
313
- shell=True,
314
- stdout=subprocess.DEVNULL,
315
- stderr=subprocess.DEVNULL,
316
- )
317
- return exit_code == 0
318
-
319
- def show_process_info(
320
- self, psutil_process, cmdline_regex, show_heading=True
321
- ):
322
- """
323
- Helper function that shows information about a process if information
324
- about the process can be retrieved and the command line matches the
325
- given regex (in which case the function returns `True`). The heading is
326
- only shown if `show_heading` is `True` and the function returns `True`.
327
- """
328
-
329
- def show_table_line(pid, user, start_time, rss, cmdline):
330
- log.info(f"{pid:<8} {user:<8} {start_time:>5} {rss:>5} {cmdline}")
331
-
332
- try:
333
- pinfo = psutil_process.as_dict(
334
- attrs=[
335
- "pid",
336
- "username",
337
- "create_time",
338
- "memory_info",
339
- "cmdline",
340
- ]
341
- )
342
- cmdline = " ".join(pinfo["cmdline"])
343
- if not re.search(cmdline_regex, cmdline):
344
- return False
345
- pid = pinfo["pid"]
346
- user = pinfo["username"] if pinfo["username"] else ""
347
- start_time = datetime.fromtimestamp(pinfo["create_time"])
348
- if start_time.date() == date.today():
349
- start_time = start_time.strftime("%H:%M")
350
- else:
351
- start_time = start_time.strftime("%b%d")
352
- rss = f"{pinfo['memory_info'].rss / 1e9:.0f}G"
353
- if show_heading:
354
- show_table_line("PID", "USER", "START", "RSS", "COMMAND")
355
- show_table_line(pid, user, start_time, rss, cmdline)
356
- return True
357
- except Exception as e:
358
- log.debug(f"Could not get process info: {e}")
359
- return False
360
-
361
- def show(self, action_description, only_show):
362
- """
363
- Helper function that shows the command line or description of an
364
- action, together with an explanation.
365
- """
366
-
367
- log.info(f"{BLUE}{action_description}{NORMAL}")
368
- log.info("")
369
- if only_show:
370
- log.info(
371
- 'You called "qlever-old ... show", therefore the '
372
- "action is only shown, but not executed (omit the "
373
- '"show" to execute it)'
374
- )
375
-
376
- @staticmethod
377
- @track_action_rank
378
- def action_setup_config(config_name="default"):
379
- """
380
- Setup a pre-filled Qleverfile in the current directory.
381
- """
382
-
383
- # log.info(f"{BLUE}Creating a pre-filled Qleverfile{NORMAL}")
384
- # log.info("")
385
-
386
- # If there is already a Qleverfile in the current directory, exit.
387
- if os.path.isfile("Qleverfile"):
388
- log.error("Qleverfile already exists in current directory")
389
- log.info("")
390
- log.info(
391
- "If you want to create a new Qleverfile using "
392
- "`qlever-old setup-config`, delete the existing "
393
- "Qleverfile first"
394
- )
395
- abort_script()
396
-
397
- # Get the directory of this script and copy the Qleverfile for `config`
398
- # to the current directory.
399
- script_dir = os.path.dirname(os.path.realpath(__file__))
400
- qleverfile_path = os.path.join(
401
- script_dir, f"Qleverfiles/Qleverfile.{config_name}"
402
- )
403
- if not os.path.isfile(qleverfile_path):
404
- log.error(f'File "{qleverfile_path}" does not exist')
405
- log.info("")
406
- abort_script()
407
- try:
408
- shutil.copy(qleverfile_path, "Qleverfile")
409
- except Exception as e:
410
- log.error(
411
- f'Could not copy "{qleverfile_path}"'
412
- f" to current directory: {e}"
413
- )
414
- abort_script()
415
- log.info(
416
- f'Created Qleverfile for config "{config_name}"'
417
- f" in current directory"
418
- )
419
- log.info("")
420
- if config_name == "default":
421
- log.info(
422
- "Since this is the default Qleverfile, you need to "
423
- "edit it before you can continue"
424
- )
425
- log.info("")
426
- log.info(
427
- "Afterwards, run `qlever` without arguments to see "
428
- "which actions are available"
429
- )
430
- else:
431
- show_available_action_names()
432
- log.info("")
433
-
434
- @track_action_rank
435
- def action_show_config(self, only_show=False):
436
- """
437
- Action that shows the current configuration including the default
438
- values for options that are not set explicitly in the Qleverfile.
439
- """
440
-
441
- print(
442
- f"{BLUE}Showing the current configuration, including default"
443
- f" values for options that are not set explicitly in the"
444
- f" Qleverfile{NORMAL}"
445
- )
446
- for section in self.config.sections():
447
- print()
448
- print(f"[{section}]")
449
- max_option_length = max(
450
- [len(option) for option in self.config[section]]
451
- )
452
- for option in self.config[section]:
453
- print(
454
- f"{option.upper().ljust(max_option_length)} = "
455
- f"{self.config[section][option]}"
456
- )
457
-
458
- print()
459
-
460
- @track_action_rank
461
- def action_get_data(self, only_show=False):
462
- """
463
- Action that gets the data according to GET_DATA_CMD.
464
- """
465
-
466
- # Construct the command line.
467
- if not self.config["data"]["get_data_cmd"]:
468
- log.error(f"{RED}No GET_DATA_CMD specified in Qleverfile")
469
- return
470
- get_data_cmd = self.config["data"]["get_data_cmd"]
471
-
472
- # Show it.
473
- self.show(get_data_cmd, only_show)
474
- if only_show:
475
- return
476
-
477
- # Execute the command line.
478
- subprocess.run(get_data_cmd, shell=True)
479
- total_file_size = self.get_total_file_size(
480
- self.config["index"]["file_names"].split()
481
- )
482
- print(f"Total file size: {total_file_size:.1f} GB")
483
-
484
- @track_action_rank
485
- def action_index(self, only_show=False):
486
- """
487
- Action that builds a QLever index according to the settings in the
488
- [index] section of the Qleverfile.
489
- """
490
-
491
- # Construct the command line based on the config file.
492
- index_config = self.config["index"]
493
- cmdline = (
494
- f"{index_config['cat_files']} | {index_config['binary']}"
495
- f" -F ttl -f -"
496
- f" -i {self.name}"
497
- f" -s {self.name}.settings.json"
498
- )
499
- if index_config["only_pso_and_pos_permutations"] in self.yes_values:
500
- cmdline += " --only-pso-and-pos-permutations --no-patterns"
501
- if index_config["use_patterns"] not in self.yes_values:
502
- cmdline += " --no-patterns"
503
- if index_config["with_text_index"] in [
504
- "from_text_records",
505
- "from_text_records_and_literals",
506
- ]:
507
- cmdline += (
508
- f" -w {self.name}.wordsfile.tsv"
509
- f" -d {self.name}.docsfile.tsv"
510
- )
511
- if index_config["with_text_index"] in [
512
- "from_literals",
513
- "from_text_records_and_literals",
514
- ]:
515
- cmdline += " --text-words-from-literals"
516
- if "stxxl_memory" in index_config:
517
- cmdline += f" --stxxl-memory {index_config['stxxl_memory']}"
518
- cmdline += f" | tee {self.name}.index-log.txt"
519
-
520
- # If the total file size is larger than 10 GB, set ulimit (such that a
521
- # large number of open files is allowed).
522
- total_file_size = self.get_total_file_size(
523
- self.config["index"]["file_names"].split()
524
- )
525
- if total_file_size > 10:
526
- cmdline = f"ulimit -Sn 1048576; {cmdline}"
527
-
528
- # If we are using Docker, run the command in a Docker container.
529
- # Here is how the shell script does it:
530
- if self.config["docker"]["use_docker"] in self.yes_values:
531
- docker_config = self.config["docker"]
532
- cmdline = (
533
- f"docker run -it --rm -u $(id -u):$(id -g)"
534
- f" -v /etc/localtime:/etc/localtime:ro"
535
- f" -v $(pwd):/index -w /index"
536
- f" --entrypoint bash"
537
- f" --name {docker_config['container_indexer']}"
538
- f" {docker_config['image']}"
539
- f" -c {shlex.quote(cmdline)}"
540
- )
541
-
542
- # Show the command line.
543
- self.show(
544
- f"Write value of config variable index.SETTINGS_JSON to "
545
- f"file {self.name}.settings.json\n"
546
- f"{cmdline}",
547
- only_show,
548
- )
549
- if only_show:
550
- return
551
-
552
- # When docker.USE_DOCKER=false, check if the binary for building the
553
- # index exists and works.
554
- if self.config["docker"]["use_docker"] not in self.yes_values:
555
- try:
556
- check_binary_cmd = f"{self.config['index']['binary']} --help"
557
- subprocess.run(
558
- check_binary_cmd,
559
- shell=True,
560
- check=True,
561
- stdout=subprocess.DEVNULL,
562
- stderr=subprocess.DEVNULL,
563
- )
564
- except subprocess.CalledProcessError as e:
565
- log.error(
566
- f'Running "{check_binary_cmd}" failed ({e}), '
567
- f"set index.BINARY to a different binary or "
568
- f"set docker.USE_DOCKER=true"
569
- )
570
- abort_script()
571
-
572
- # Check if index files (name.index.*) already exist.
573
- if glob.glob(f"{self.name}.index.*"):
574
- raise ActionException(
575
- f'Index files "{self.name}.index.*" already exist, '
576
- f"please delete them if you want to rebuild the index"
577
- )
578
-
579
- # Write settings.json file and run the command.
580
- with open(f"{self.name}.settings.json", "w") as f:
581
- f.write(self.config["index"]["settings_json"])
582
- subprocess.run(cmdline, shell=True)
583
-
584
- @track_action_rank
585
- def action_remove_index(self, only_show=False):
586
- """
587
- Action that removes the index files.
588
- """
589
-
590
- # List of all the index files (not all of them need to be there).
591
- index_fileglobs = (
592
- f"{self.name}.index.*",
593
- f"{self.name}.patterns.*",
594
- f"{self.name}.prefixes",
595
- f"{self.name}.meta-data.json",
596
- f"{self.name}.vocabulary.*",
597
- )
598
-
599
- # Show the command line.
600
- self.show(
601
- f"Remove index files {', '.join(index_fileglobs)}", only_show
602
- )
603
- if only_show:
604
- return
605
-
606
- # Remove the index files.
607
- files_removed = []
608
- total_file_size = 0
609
- for index_fileglob in index_fileglobs:
610
- for filename in glob.glob(index_fileglob):
611
- if os.path.isfile(filename):
612
- total_file_size += os.path.getsize(filename)
613
- os.remove(filename)
614
- files_removed.append(filename)
615
- if files_removed:
616
- log.info(
617
- f"Removed the following index files of total size "
618
- f"{total_file_size / 1e9:.1f} GB:"
619
- )
620
- log.info("")
621
- log.info(", ".join(files_removed))
622
- else:
623
- log.info("None of the listed index files found, nothing removed")
624
-
625
- @track_action_rank
626
- def action_start(self, only_show=False):
627
- """
628
- Action that starts the QLever server according to the settings in the
629
- [server] section of the Qleverfile. If a server is already running, the
630
- action reports that fact and does nothing.
631
- """
632
-
633
- # Construct the command line based on the config file.
634
- server_config = self.config["server"]
635
- cmdline = (
636
- f"{self.config['server']['binary']}"
637
- f" -i {self.name}"
638
- f" -j {server_config['num_threads']}"
639
- f" -p {server_config['port']}"
640
- f" -m {server_config['memory_for_queries']}"
641
- f" -c {server_config['cache_max_size']}"
642
- f" -e {server_config['cache_max_size_single_entry']}"
643
- f" -k {server_config['cache_max_num_entries']}"
644
- )
645
- if server_config["timeout"]:
646
- cmdline += f" -s {server_config['timeout']}"
647
- if server_config["access_token"]:
648
- cmdline += f" -a {server_config['access_token']}"
649
- if server_config["only_pso_and_pos_permutations"] in self.yes_values:
650
- cmdline += " --only-pso-and-pos-permutations"
651
- if server_config["use_patterns"] not in self.yes_values:
652
- cmdline += " --no-patterns"
653
- if server_config["with_text_index"] in [
654
- "from_text_records",
655
- "from_literals",
656
- "from_text_records_and_literals",
657
- ]:
658
- cmdline += " -t"
659
- cmdline += f" > {self.name}.server-log.txt 2>&1"
660
-
661
- # If we are using Docker, run the command in a docker container.
662
- if self.config["docker"]["use_docker"] in self.yes_values:
663
- docker_config = self.config["docker"]
664
- cmdline = (
665
- f"docker run -d --restart=unless-stopped"
666
- f" -u $(id -u):$(id -g)"
667
- f" -it -v /etc/localtime:/etc/localtime:ro"
668
- f" -v $(pwd):/index"
669
- f" -p {server_config['port']}:{server_config['port']}"
670
- f" -w /index"
671
- f" --entrypoint bash"
672
- f" --name {docker_config['container_server']}"
673
- f" --init"
674
- f" {docker_config['image']}"
675
- f" -c {shlex.quote(cmdline)}"
676
- )
677
- else:
678
- cmdline = f"nohup {cmdline} &"
679
-
680
- # Show the command line.
681
- self.show(cmdline, only_show)
682
- if only_show:
683
- return
684
-
685
- # When docker.USE_DOCKER=false, check if the binary for starting the
686
- # server exists and works.
687
- if self.config["docker"]["use_docker"] not in self.yes_values:
688
- try:
689
- check_binary_cmd = f"{self.config['server']['binary']} --help"
690
- subprocess.run(
691
- check_binary_cmd,
692
- shell=True,
693
- check=True,
694
- stdout=subprocess.DEVNULL,
695
- stderr=subprocess.DEVNULL,
696
- )
697
- except subprocess.CalledProcessError as e:
698
- log.error(
699
- f'Running "{check_binary_cmd}" failed ({e}), '
700
- f"set server.BINARY to a different binary or "
701
- f"set docker.USE_DOCKER=true"
702
- )
703
- abort_script()
704
-
705
- # Check if a QLever server is already running on this port.
706
- port = server_config["port"]
707
- if self.alive_check(port):
708
- raise ActionException(
709
- f"QLever server already running on port {port}"
710
- )
711
-
712
- # Check if another process is already listening.
713
- if self.net_connections_enabled:
714
- if port in [conn.laddr.port for conn in psutil.net_connections()]:
715
- raise ActionException(
716
- f"Port {port} is already in use by another process"
717
- )
718
-
719
- # Execute the command line.
720
- subprocess.run(
721
- cmdline,
722
- shell=True,
723
- stdout=subprocess.DEVNULL,
724
- stderr=subprocess.DEVNULL,
725
- )
726
-
727
- # Tail the server log until the server is ready (note that the `exec`
728
- # is important to make sure that the tail process is killed and not
729
- # just the bash process).
730
- log.info(
731
- f"Follow {self.name}.server-log.txt until the server is ready"
732
- f" (Ctrl-C stops following the log, but not the server)"
733
- )
734
- log.info("")
735
- tail_cmd = f"exec tail -f {self.name}.server-log.txt"
736
- tail_proc = subprocess.Popen(tail_cmd, shell=True)
737
- while not self.alive_check(port):
738
- time.sleep(1)
739
-
740
- # Set the access token if specified.
741
- access_token = server_config["access_token"]
742
- access_arg = f'--data-urlencode "access-token={access_token}"'
743
- if "index_description" in self.config["data"]:
744
- desc = self.config["data"]["index_description"]
745
- curl_cmd = (
746
- f"curl -Gs http://localhost:{port}/api"
747
- f' --data-urlencode "index-description={desc}"'
748
- f" {access_arg} > /dev/null"
749
- )
750
- log.debug(curl_cmd)
751
- subprocess.run(curl_cmd, shell=True)
752
- if "text_description" in self.config["data"]:
753
- desc = self.config["data"]["text_description"]
754
- curl_cmd = (
755
- f"curl -Gs http://localhost:{port}/api"
756
- f' --data-urlencode "text-description={desc}"'
757
- f" {access_arg} > /dev/null"
758
- )
759
- log.debug(curl_cmd)
760
- subprocess.run(curl_cmd, shell=True)
761
-
762
- # Kill the tail process. NOTE: `tail_proc.kill()` does not work.
763
- tail_proc.terminate()
764
-
765
- @track_action_rank
766
- def action_stop(self, only_show=False, fail_if_not_running=True):
767
- """
768
- Action that stops the QLever server according to the settings in the
769
- [server] section of the Qleverfile. If no server is running, the action
770
- does nothing.
771
- """
772
-
773
- # Show action description.
774
- docker_container_name = self.config["docker"]["container_server"]
775
- cmdline_regex = f"ServerMain.* -i [^ ]*{self.name}"
776
- self.show(
777
- f'Checking for process matching "{cmdline_regex}" '
778
- f"and for Docker container with name "
779
- f'"{docker_container_name}"',
780
- only_show,
781
- )
782
- if only_show:
783
- return
784
-
785
- # First check if there is docker container running.
786
- if self.docker_enabled:
787
- docker_cmd = (
788
- f"docker stop {docker_container_name} && "
789
- f"docker rm {docker_container_name}"
790
- )
791
- try:
792
- subprocess.run(
793
- docker_cmd,
794
- shell=True,
795
- check=True,
796
- stdout=subprocess.DEVNULL,
797
- stderr=subprocess.DEVNULL,
798
- )
799
- log.info(
800
- f"Docker container with name "
801
- f'"{docker_container_name}" '
802
- f"stopped and removed"
803
- )
804
- return
805
- except Exception as e:
806
- log.debug(f'Error running "{docker_cmd}": {e}')
807
-
808
- # Check if there is a process running on the server port using psutil.
809
- #
810
- # NOTE: On MacOS, some of the proc's returned by psutil.process_iter()
811
- # no longer exist when we try to access them, so we just skip them.
812
- for proc in psutil.process_iter():
813
- try:
814
- pinfo = proc.as_dict(
815
- attrs=[
816
- "pid",
817
- "username",
818
- "create_time",
819
- "memory_info",
820
- "cmdline",
821
- ]
822
- )
823
- cmdline = " ".join(pinfo["cmdline"])
824
- except Exception as err:
825
- log.debug(f"Error getting process info: {err}")
826
- if re.match(cmdline_regex, cmdline):
827
- log.info(
828
- f"Found process {pinfo['pid']} from user "
829
- f"{pinfo['username']} with command line: {cmdline}"
830
- )
831
- print()
832
- try:
833
- proc.kill()
834
- log.info(f"Killed process {pinfo['pid']}")
835
- except Exception as e:
836
- raise ActionException(
837
- f"Could not kill process with PID "
838
- f"{pinfo['pid']}: {e}"
839
- )
840
- return
841
-
842
- # No matching process found.
843
- message = "No matching process or Docker container found"
844
- if fail_if_not_running:
845
- raise ActionException(message)
846
- else:
847
- log.info(f"{message}, so nothing to stop")
848
-
849
- @track_action_rank
850
- def action_restart(self, only_show=False):
851
- """
852
- Action that restarts the QLever server.
853
- """
854
-
855
- # Show action description.
856
- self.show(
857
- "Stop running server if found, then start new server", only_show
858
- )
859
- if only_show:
860
- return
861
-
862
- # Do it.
863
- self.action_stop(only_show=only_show, fail_if_not_running=False)
864
- log.info("")
865
- self.action_start()
866
-
867
- @track_action_rank
868
- def action_log(self, only_show=False):
869
- """
870
- Action that shows the server log.
871
- """
872
-
873
- # Show action description.
874
- log_cmd = f"tail -f -n 50 {self.name}.server-log.txt"
875
- self.show(log_cmd, only_show)
876
- if only_show:
877
- return
878
-
879
- # Do it.
880
- log.info(
881
- f"Follow {self.name}.server-log.txt (Ctrl-C stops"
882
- f" following the log, but not the server)"
883
- )
884
- log.info("")
885
- subprocess.run(log_cmd, shell=True)
886
-
887
- @track_action_rank
888
- def action_status(self, only_show=False):
889
- """
890
- Action that shows all QLever processes running on this machine.
891
-
892
- TODO: Also show the QLever-related docker containers.
893
- """
894
-
895
- # Show action description.
896
- cmdline_regex = "(ServerMain|IndexBuilderMain)"
897
- # cmdline_regex = f"(ServerMain|IndexBuilderMain).*{self.name}"
898
- self.show(
899
- f"{BLUE}Show all processes on this machine where "
900
- f"the command line matches {cmdline_regex}"
901
- f" using Python's psutil library",
902
- only_show,
903
- )
904
- if only_show:
905
- return
906
-
907
- # Show the results as a table.
908
- num_processes_found = 0
909
- for proc in psutil.process_iter():
910
- show_heading = num_processes_found == 0
911
- process_shown = self.show_process_info(
912
- proc, cmdline_regex, show_heading=show_heading
913
- )
914
- if process_shown:
915
- num_processes_found += 1
916
- if num_processes_found == 0:
917
- print("No processes found")
918
-
919
- @track_action_rank
920
- def action_index_stats(self, only_show=False):
921
- """
922
- Action that provides a breakdown of the time needed for building the
923
- index, based on the log file of th index build.
924
- """
925
-
926
- log_file_name = self.config["data"]["name"] + ".index-log.txt"
927
- log.info(
928
- f"{BLUE}Breakdown of the time for building the index, "
929
- f"based on the timestamps for key lines in "
930
- f'"{log_file_name}{NORMAL}"'
931
- )
932
- log.info("")
933
- if only_show:
934
- return
935
-
936
- # Read the content of `log_file_name` into a list of lines.
937
- try:
938
- with open(log_file_name, "r") as f:
939
- lines = f.readlines()
940
- except Exception as e:
941
- raise ActionException(
942
- f"Could not read log file {log_file_name}: " f"{e}"
943
- )
944
- current_line = 0
945
-
946
- # Helper lambda that finds the next line matching the given `regex`,
947
- # starting from `current_line`, and extracts the time. Returns a tuple
948
- # of the time and the regex match object. If a match is found,
949
- # `current_line` is updated to the line after the match. Otherwise,
950
- # `current_line` will be one beyond the last line, unless
951
- # `line_is_optional` is true, in which case it will be the same as when
952
- # the function was entered.
953
- def find_next_line(regex, line_is_optional=False):
954
- nonlocal lines
955
- nonlocal current_line
956
- current_line_backup = current_line
957
- # Find starting from `current_line`.
958
- while current_line < len(lines):
959
- line = lines[current_line]
960
- current_line += 1
961
- timestamp_regex = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"
962
- timestamp_format = "%Y-%m-%d %H:%M:%S"
963
- regex_match = re.search(regex, line)
964
- if regex_match:
965
- try:
966
- return datetime.strptime(
967
- re.match(timestamp_regex, line).group(),
968
- timestamp_format,
969
- ), regex_match
970
- except Exception as e:
971
- raise ActionException(
972
- f"Could not parse timestamp of form "
973
- f'"{timestamp_regex}" from line '
974
- f' "{line.rstrip()}" ({e})'
975
- )
976
- # If we get here, we did not find a matching line.
977
- if line_is_optional:
978
- current_line = current_line_backup
979
- return None, None
980
-
981
- # Find the lines matching th key_lines_regex and extract the time
982
- # information from them.
983
- overall_begin, _ = find_next_line(r"INFO:\s*Processing")
984
- merge_begin, _ = find_next_line(r"INFO:\s*Merging partial vocab")
985
- convert_begin, _ = find_next_line(r"INFO:\s*Converting triples")
986
- perm_begin_and_info = []
987
- while True:
988
- perm_begin, _ = find_next_line(r"INFO:\s*Creating a pair", True)
989
- if perm_begin is None:
990
- break
991
- _, perm_info = find_next_line(
992
- r"INFO:\s*Writing meta data for" r" ([A-Z]+ and [A-Z]+)", True
993
- )
994
- # if perm_info is None:
995
- # break
996
- perm_begin_and_info.append((perm_begin, perm_info))
997
- convert_end = (
998
- perm_begin_and_info[0][0] if len(perm_begin_and_info) > 0 else None
999
- )
1000
- normal_end, _ = find_next_line(r"INFO:\s*Index build completed")
1001
- text_begin, _ = find_next_line(r"INFO:\s*Adding text index", True)
1002
- text_end, _ = find_next_line(r"INFO:\s*DocsDB done", True)
1003
- # print("DEBUG:", len(perm_begin_and_info), perm_begin_and_info)
1004
- # print("DEBUG:", overall_begin)
1005
- # print("DEBUG:", normal_end)
1006
-
1007
- # Check whether at least the first phase is done.
1008
- if overall_begin is None:
1009
- raise ActionException("Missing line that index build has started")
1010
- if overall_begin and not merge_begin:
1011
- raise ActionException(
1012
- "According to the log file, the index build "
1013
- "has started, but is still in its first "
1014
- "phase (parsing the input)"
1015
- )
1016
-
1017
- # Helper lambda that shows the duration for a phase (if the start and
1018
- # end timestamps are available).
1019
- def show_duration(heading, start_end_pairs):
1020
- nonlocal unit
1021
- num_start_end_pairs = 0
1022
- diff_seconds = 0
1023
- for start, end in start_end_pairs:
1024
- if start and end:
1025
- diff_seconds += (end - start).total_seconds()
1026
- num_start_end_pairs += 1
1027
- if num_start_end_pairs > 0:
1028
- if unit == "h":
1029
- diff = diff_seconds / 3600
1030
- elif unit == "min":
1031
- diff = diff_seconds / 60
1032
- else:
1033
- diff = diff_seconds
1034
- log.info(f"{heading:<23} : {diff:>5.1f} {unit}")
1035
-
1036
- # Get the times of the various phases (hours or minutes, depending on
1037
- # how long the first phase took).
1038
- unit = "h"
1039
- if merge_begin and overall_begin:
1040
- parse_duration = (merge_begin - overall_begin).total_seconds()
1041
- if parse_duration < 200:
1042
- unit = "s"
1043
- elif parse_duration < 3600:
1044
- unit = "min"
1045
- show_duration("Parse input", [(overall_begin, merge_begin)])
1046
- show_duration("Build vocabularies", [(merge_begin, convert_begin)])
1047
- show_duration("Convert to global IDs", [(convert_begin, convert_end)])
1048
- for i in range(len(perm_begin_and_info)):
1049
- perm_begin, perm_info = perm_begin_and_info[i]
1050
- perm_end = (
1051
- perm_begin_and_info[i + 1][0]
1052
- if i + 1 < len(perm_begin_and_info)
1053
- else normal_end
1054
- )
1055
- perm_info_text = (
1056
- perm_info.group(1).replace(" and ", " & ")
1057
- if perm_info
1058
- else f"#{i + 1}"
1059
- )
1060
- show_duration(
1061
- f"Permutation {perm_info_text}", [(perm_begin, perm_end)]
1062
- )
1063
- show_duration("Text index", [(text_begin, text_end)])
1064
- if text_begin and text_end:
1065
- log.info("")
1066
- show_duration(
1067
- "TOTAL index build time",
1068
- [(overall_begin, normal_end), (text_begin, text_end)],
1069
- )
1070
- elif normal_end:
1071
- log.info("")
1072
- show_duration(
1073
- "TOTAL index build time", [(overall_begin, normal_end)]
1074
- )
1075
-
1076
- @track_action_rank
1077
- def action_test_query(self, only_show=False):
1078
- """
1079
- Action that sends a simple test SPARQL query to the server.
1080
- """
1081
-
1082
- # Construct the curl command.
1083
- query = "SELECT * WHERE { ?s ?p ?o } LIMIT 10"
1084
- headers = [
1085
- "Accept: text/tab-separated-values",
1086
- "Content-Type: application/sparql-query",
1087
- ]
1088
- curl_cmd = (
1089
- f"curl -s {self.config['server']['url']} "
1090
- f"-H \"{headers[0]}\" -H \"{headers[1]}\" "
1091
- f"--data \"{query}\""
1092
- )
1093
-
1094
- # Show it.
1095
- self.show(curl_cmd, only_show)
1096
- if only_show:
1097
- return
1098
-
1099
- # Execute it.
1100
- subprocess.run(curl_cmd, shell=True)
1101
-
1102
- @track_action_rank
1103
- def action_ui(self, only_show=False):
1104
- """
1105
- Action that starts the QLever UI with the server according to the
1106
- Qleverfile as backend.
1107
- """
1108
-
1109
- # Construct commands.
1110
- host_name = socket.getfqdn()
1111
- server_url = f"http://{host_name}:{self.config['server']['port']}"
1112
- docker_rm_cmd = f"docker rm -f {self.config['ui']['container']}"
1113
- docker_pull_cmd = f"docker pull {self.config['ui']['image']}"
1114
- docker_run_cmd = (
1115
- f"docker run -d -p {self.config['ui']['port']}:7000 "
1116
- f"--name {self.config['ui']['container']} "
1117
- f"{self.config['ui']['image']} "
1118
- )
1119
- docker_exec_cmd = (
1120
- f"docker exec -it "
1121
- f"{self.config['ui']['container']} "
1122
- f"bash -c \"python manage.py configure "
1123
- f"{self.config['ui']['config']} "
1124
- f"{server_url}\""
1125
- )
1126
-
1127
- # Show them.
1128
- self.show(
1129
- "\n".join(
1130
- [
1131
- docker_rm_cmd,
1132
- docker_pull_cmd,
1133
- docker_run_cmd,
1134
- docker_exec_cmd,
1135
- ]
1136
- ),
1137
- only_show,
1138
- )
1139
- if only_show:
1140
- return
1141
-
1142
- # Execute them.
1143
- try:
1144
- subprocess.run(
1145
- docker_rm_cmd, shell=True, stdout=subprocess.DEVNULL
1146
- )
1147
- subprocess.run(
1148
- docker_pull_cmd, shell=True, stdout=subprocess.DEVNULL
1149
- )
1150
- subprocess.run(
1151
- docker_run_cmd, shell=True, stdout=subprocess.DEVNULL
1152
- )
1153
- subprocess.run(
1154
- docker_exec_cmd, shell=True, stdout=subprocess.DEVNULL
1155
- )
1156
- except subprocess.CalledProcessError as e:
1157
- raise ActionException(f"Failed to start the QLever UI {e}")
1158
- log.info(
1159
- f"The QLever UI should now be up at "
1160
- f"http://{host_name}:{self.config['ui']['port']}"
1161
- )
1162
- log.info(
1163
- "You can log in as QLever UI admin with username and "
1164
- 'password "demo"'
1165
- )
1166
-
1167
- @track_action_rank
1168
- def action_cache_stats_and_settings(self, only_show=False):
1169
- """
1170
- Action that shows the cache statistics and settings.
1171
- """
1172
-
1173
- # Construct the two curl commands.
1174
- cache_stats_cmd = (
1175
- f"curl -s {self.config['server']['url']} "
1176
- f"--data-urlencode \"cmd=cache-stats\""
1177
- )
1178
- cache_settings_cmd = (
1179
- f"curl -s {self.config['server']['url']} "
1180
- f"--data-urlencode \"cmd=get-settings\""
1181
- )
1182
-
1183
- # Show them.
1184
- self.show("\n".join([cache_stats_cmd, cache_settings_cmd]), only_show)
1185
- if only_show:
1186
- return
1187
-
1188
- # Execute them.
1189
- try:
1190
- cache_stats = subprocess.check_output(cache_stats_cmd, shell=True)
1191
- cache_settings = subprocess.check_output(
1192
- cache_settings_cmd, shell=True
1193
- )
1194
-
1195
- # Print the key-value pairs of the stats JSON in tabular form.
1196
- def print_json_as_tabular(raw_json):
1197
- key_value_pairs = json.loads(raw_json).items()
1198
- max_key_len = max([len(key) for key, _ in key_value_pairs])
1199
- for key, value in key_value_pairs:
1200
- if isinstance(value, int) or re.match(r"^\d+$", value):
1201
- value = "{:,}".format(int(value))
1202
- if re.match(r"^\d+\.\d+$", value):
1203
- value = "{:.2f}".format(float(value))
1204
- log.info(f"{key.ljust(max_key_len)} : {value}")
1205
-
1206
- print_json_as_tabular(cache_stats)
1207
- log.info("")
1208
- print_json_as_tabular(cache_settings)
1209
- except Exception as e:
1210
- raise ActionException(
1211
- f"Failed to get cache stats and settings: " f"{e}"
1212
- )
1213
-
1214
- @track_action_rank
1215
- def action_clear_cache(self, only_show=False):
1216
- """
1217
- Action that clears the cache (unpinned entries only).
1218
- """
1219
-
1220
- # Construct the curl command.
1221
- clear_cache_cmd = (
1222
- f"curl -s {self.config['server']['url']} "
1223
- f"--data-urlencode \"cmd=clear-cache\""
1224
- )
1225
-
1226
- # Show it.
1227
- self.show(clear_cache_cmd, only_show)
1228
- if only_show:
1229
- return
1230
-
1231
- # Execute it.
1232
- try:
1233
- subprocess.run(
1234
- clear_cache_cmd, shell=True, stdout=subprocess.DEVNULL
1235
- )
1236
- print("Cache cleared (only unpinned entries)")
1237
- print()
1238
- self.action_cache_stats_and_settings(only_show)
1239
- except Exception as e:
1240
- raise ActionException(f"Failed to clear the cache: {e}")
1241
-
1242
- @track_action_rank
1243
- def action_clear_cache_complete(self, only_show=False):
1244
- """
1245
- Action that clears the cache completely (both pinned and unpinned
1246
- entries).
1247
- """
1248
-
1249
- # Construct the curl command.
1250
- access_token = self.config["server"]["access_token"]
1251
- clear_cache_cmd = (
1252
- f"curl -s {self.config['server']['url']} "
1253
- f"--data-urlencode \"cmd=clear-cache-complete\" "
1254
- f"--data-urlencode \"access-token={access_token}\""
1255
- )
1256
-
1257
- # Show it.
1258
- self.show(clear_cache_cmd, only_show)
1259
- if only_show:
1260
- return
1261
-
1262
- # Execute it.
1263
- try:
1264
- subprocess.run(
1265
- clear_cache_cmd, shell=True, stdout=subprocess.DEVNULL
1266
- )
1267
- print("Cache cleared (both pinned and unpinned entries)")
1268
- print()
1269
- self.action_cache_stats_and_settings(only_show)
1270
- except Exception as e:
1271
- raise ActionException(f"Failed to clear the cache: {e}")
1272
-
1273
- @track_action_rank
1274
- def action_autocompletion_warmup(self, only_show=False):
1275
- """
1276
- Action that pins the autocompletion queries from `ui.config` to the
1277
- cache.
1278
- """
1279
-
1280
- # Construct curl command to obtain the warmup queries.
1281
- #
1282
- # TODO: This is the access token expected by Django in views.py, where
1283
- # it is currently set to dummy value. Find a sound yet simple mechanism
1284
- # for this.
1285
- access_token_ui = "top-secret"
1286
- config_name = self.config["ui"]["config"]
1287
- warmup_url = f"{self.config['ui']['url']}/warmup/{config_name}"
1288
- curl_cmd = f"curl -s {warmup_url}/queries?token={access_token_ui}"
1289
-
1290
- # Show it.
1291
- self.show(f"Pin warmup queries obtained via: {curl_cmd}", only_show)
1292
- if only_show:
1293
- return
1294
-
1295
- # Get the queries.
1296
- try:
1297
- queries = subprocess.check_output(curl_cmd, shell=True)
1298
- except subprocess.CalledProcessError as e:
1299
- raise ActionException(f"Failed to get warmup queries ({e})")
1300
-
1301
- # Iterate over them and pin them to the cache. Give a more generous
1302
- # timeout (which requires an access token).
1303
- header = "Accept: application/qlever-results+json"
1304
- first = True
1305
- timeout = "300s"
1306
- access_token = self.config["server"]["access_token"]
1307
- for description, query in [
1308
- line.split("\t") for line in queries.decode("utf-8").split("\n")
1309
- ]:
1310
- if first:
1311
- first = False
1312
- else:
1313
- log.info("")
1314
- log.info(f"{BOLD}Pin query: {description}{NORMAL}")
1315
- pin_cmd = (
1316
- f"curl -s {self.config['server']['url']}/api "
1317
- f"-H \"{header}\" "
1318
- f"--data-urlencode query={shlex.quote(query)} "
1319
- f"--data-urlencode timeout={timeout} "
1320
- f"--data-urlencode access-token={access_token} "
1321
- f"--data-urlencode pinresult=true "
1322
- f"--data-urlencode send=0"
1323
- )
1324
- clear_cache_cmd = (
1325
- f"curl -s {self.config['server']['url']} "
1326
- f"--data-urlencode \"cmd=clear-cache\""
1327
- )
1328
- log.info(pin_cmd)
1329
- # Launch query and show the `resultsize` of the JSON response.
1330
- try:
1331
- result = subprocess.check_output(pin_cmd, shell=True)
1332
- json_result = json.loads(result.decode("utf-8"))
1333
- # Check if the JSON has a key "exception".
1334
- if "exception" in json_result:
1335
- raise Exception(json_result["exception"])
1336
- log.info(f"Result size: {json_result['resultsize']:,}")
1337
- log.info(clear_cache_cmd)
1338
- subprocess.check_output(
1339
- clear_cache_cmd, shell=True, stderr=subprocess.DEVNULL
1340
- )
1341
- except Exception as e:
1342
- log.error(f"Query failed: {e}")
1343
-
1344
- @track_action_rank
1345
- def action_example_queries(self, only_show=False):
1346
- """
1347
- Action that shows the example queries from `ui.config`.
1348
- """
1349
-
1350
- # Construct curl command to obtain the example queries.
1351
- config_general = self.config["general"]
1352
- example_queries_url = config_general["example_queries_url"]
1353
- example_queries_limit = int(config_general["example_queries_limit"])
1354
- example_queries_send = int(config_general["example_queries_send"])
1355
- curl_cmd = f"curl -s {example_queries_url}"
1356
-
1357
- # Show what the action does.
1358
- self.show(
1359
- f"Launch example queries obtained via: {curl_cmd}\n"
1360
- f"SPARQL endpoint: {self.config['server']['url']}\n"
1361
- f"Clearing the cache before each query\n"
1362
- f"Using send={example_queries_send} and limit="
1363
- f"{example_queries_limit}",
1364
- only_show,
1365
- )
1366
- if only_show:
1367
- return
1368
-
1369
- # Get the queries.
1370
- try:
1371
- queries = subprocess.check_output(curl_cmd, shell=True)
1372
- except subprocess.CalledProcessError as e:
1373
- raise ActionException(f"Failed to get example queries ({e})")
1374
-
1375
- # Launch the queries one after the other and for each print: the
1376
- # description, the result size, and the query processing time.
1377
- count = 0
1378
- total_time_seconds = 0.0
1379
- total_result_size = 0
1380
- for description, query in [
1381
- line.split("\t") for line in queries.decode("utf-8").splitlines()
1382
- ]:
1383
- # Launch query and show the `resultsize` of the JSON response.
1384
- clear_cache_cmd = (
1385
- f"curl -s {self.config['server']['url']} "
1386
- f"--data-urlencode cmd=clear-cache"
1387
- )
1388
- query_cmd = (
1389
- f"curl -s {self.config['server']['url']} "
1390
- f"-H \"Accept: application/qlever-results+json\" "
1391
- f"--data-urlencode query={shlex.quote(query)} "
1392
- f"--data-urlencode send={example_queries_send}"
1393
- )
1394
- try:
1395
- subprocess.run(
1396
- clear_cache_cmd,
1397
- shell=True,
1398
- stdout=subprocess.DEVNULL,
1399
- stderr=subprocess.DEVNULL,
1400
- )
1401
- start_time = time.time()
1402
- result = subprocess.check_output(query_cmd, shell=True)
1403
- time_seconds = time.time() - start_time
1404
- json_result = json.loads(result.decode("utf-8"))
1405
- if "exception" in json_result:
1406
- raise Exception(json_result["exception"])
1407
- result_size = int(json_result["resultsize"])
1408
- result_string = f"{result_size:>14,}"
1409
- except Exception as e:
1410
- time_seconds = 0.0
1411
- result_size = 0
1412
- result_string = (
1413
- f"{RED} FAILED{NORMAL}" f" {RED}({e}){NORMAL}"
1414
- )
1415
-
1416
- # Print description, time, result in tabular form.
1417
- log.debug(query)
1418
- if len(description) > 60:
1419
- description = description[:57] + "..."
1420
- log.info(
1421
- f"{description:<60} {time_seconds:6.2f} s "
1422
- f"{result_string}"
1423
- )
1424
- count += 1
1425
- total_time_seconds += time_seconds
1426
- total_result_size += result_size
1427
- if count == example_queries_limit:
1428
- break
1429
-
1430
- # Print total time.
1431
- log.info("")
1432
- description = (
1433
- f"TOTAL for {count} " f"{'query' if count == 1 else 'queries'}"
1434
- )
1435
- log.info(
1436
- f"{description:<60} {total_time_seconds:6.2f} s "
1437
- f"{total_result_size:>14,}"
1438
- )
1439
- description = (
1440
- f"AVERAGE for {count} " f"{'query' if count == 1 else 'queries'}"
1441
- )
1442
- log.info(
1443
- f"{description:<60} {total_time_seconds / count:6.2f} s "
1444
- f"{round(total_result_size / count):>14,}"
1445
- )
1446
-
1447
- @track_action_rank
1448
- def action_memory_profile(self, only_show=False):
1449
- """
1450
- Action that prints the memory usage of a process (specified via
1451
- `general.PID`) to a file `<PID>.memory-usage.tsv`.
1452
- """
1453
-
1454
- # Show what the action does.
1455
- self.show(
1456
- "Poll memory usage of the given process every second "
1457
- "and print it to a file",
1458
- only_show,
1459
- )
1460
- if only_show:
1461
- return
1462
-
1463
- # Show process information.
1464
- if "pid" not in self.config["general"]:
1465
- raise ActionException("PID must be specified via general.PID")
1466
- try:
1467
- pid = int(self.config["general"]["pid"])
1468
- proc = psutil.Process(pid)
1469
- except Exception as e:
1470
- raise ActionException(
1471
- f"Could not obtain information for process "
1472
- f"with PID {pid} ({e})"
1473
- )
1474
- self.show_process_info(proc, "", show_heading=True)
1475
- log.info("")
1476
-
1477
- # As long as the process exists, poll memory usage once per second and
1478
- # print it to the screen as well as to a file `<PID>.memory-usage.tsv`.
1479
- file = open(f"{pid}.memory-usage.tsv", "w")
1480
- seconds = 0
1481
- while proc.is_running():
1482
- # Get memory usage in bytes and print as <timestamp>\t<size>, with
1483
- # the timestand in the usual logger format (second precision).
1484
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1485
- memory_usage_gb = f"{proc.memory_info().rss / 1e9:.1f}"
1486
- log.info(f"{timestamp}\t{memory_usage_gb}")
1487
- file.write(f"{timestamp}\t{memory_usage_gb}\n")
1488
- time.sleep(1)
1489
- seconds += 1
1490
- if seconds % 60 == 0:
1491
- file.flush()
1492
- file.close()
1493
-
1494
- @track_action_rank
1495
- def action_memory_profile_show(self, only_show=False):
1496
- """
1497
- Action that shows a plot of the memory profile produce with action
1498
- `memory_profile`.
1499
- """
1500
-
1501
- # Construct gnuplot command.
1502
- if "pid" not in self.config["general"]:
1503
- raise ActionException("PID must be specified via general.PID")
1504
- pid = int(self.config["general"]["pid"])
1505
- gnuplot_script = (
1506
- f'set datafile separator "\t"; '
1507
- f"set xdata time; "
1508
- f'set timefmt "%Y-%m-%d %H:%M:%S"; '
1509
- f'set xlabel "Time"; '
1510
- f'set ylabel "Memory Usage"; '
1511
- f"set grid; "
1512
- f'plot "{pid}.memory-usage.tsv" '
1513
- f"using 1:2 with lines; "
1514
- f"pause -1"
1515
- )
1516
- gnuplot_cmd = f"gnuplot -e {shlex.quote(gnuplot_script)}"
1517
-
1518
- # Show it.
1519
- self.show(gnuplot_cmd, only_show)
1520
- if only_show:
1521
- return
1522
-
1523
- # Launch gnuplot.
1524
- try:
1525
- subprocess.check_output(gnuplot_cmd, shell=True)
1526
- except subprocess.CalledProcessError as e:
1527
- raise ActionException(f"Failed to launch gnuplot ({e})")
1528
-
1529
-
1530
- def setup_autocompletion_cmd():
1531
- """
1532
- Print the command for setting up autocompletion for the qlever.py script.
1533
-
1534
- TODO: Currently works for bash only.
1535
- """
1536
-
1537
- # Get methods that start wth "action_" from the Actions class, sorted by
1538
- # their appearance in the class (see the `@track_action_rank` decorator).
1539
- methods = inspect.getmembers(Actions, predicate=inspect.isfunction)
1540
- methods = [m for m in methods if m[0].startswith("action_")]
1541
- action_names = sorted(
1542
- [m[0] for m in methods], key=lambda m: getattr(Actions, m).rank
1543
- )
1544
- action_names = [_.replace("action_", "") for _ in action_names]
1545
- action_names = [_.replace("_", "-") for _ in action_names]
1546
- action_names = " ".join(action_names)
1547
-
1548
- # Add config settings to the list of possible actions for autocompletion.
1549
- action_names += " docker.USE_DOCKER=true docker.USE_DOCKER=false"
1550
- action_names += " index.BINARY=IndexBuilderMain"
1551
- action_names += " server.BINARY=ServerMain"
1552
-
1553
- # Return multiline string with the command for setting up autocompletion.
1554
- return f"""\
1555
- _qlever_old_completion() {{
1556
- local cur=${{COMP_WORDS[COMP_CWORD]}}
1557
- COMPREPLY=( $(compgen -W "{action_names}" -- $cur) )
1558
- }}
1559
- complete -o nosort -F _qlever_old_completion qlever-old
1560
- """
1561
-
1562
-
1563
- # Get all action names.
1564
- action_names = [_ for _ in dir(Actions) if _.startswith("action_")]
1565
- action_names = [_.replace("action_", "") for _ in action_names]
1566
- action_names = [_.replace("_", "-") for _ in action_names]
1567
-
1568
-
1569
- def main():
1570
- # Get the version.
1571
- try:
1572
- version = pkg_resources.get_distribution("qlever").version
1573
- except Exception as e:
1574
- log.error(f"Could not determine package version: {e}")
1575
- version = "unknown"
1576
- # If the script is called without argument, say hello and provide some
1577
- # help to get started.
1578
- if (
1579
- len(sys.argv) == 1
1580
- or (len(sys.argv) == 2 and sys.argv[1] == "help")
1581
- or (len(sys.argv) == 2 and sys.argv[1] == "--help")
1582
- or (len(sys.argv) == 2 and sys.argv[1] == "-h")
1583
- ):
1584
- log.info("")
1585
- log.info(
1586
- f"{BOLD}Hello, I am the OLD qlever script"
1587
- f" (version {version}){NORMAL}"
1588
- )
1589
- log.info("")
1590
- if os.path.exists("Qleverfile"):
1591
- log.info(
1592
- 'I see that you already have a "Qleverfile" in the '
1593
- "current directory, so you are ready to start"
1594
- )
1595
- log.info("")
1596
- show_available_action_names()
1597
- else:
1598
- log.info(
1599
- "You need a Qleverfile in the current directory, which "
1600
- "you can create as follows:"
1601
- )
1602
- log.info("")
1603
- log.info(f"{BLUE}qlever-old setup-config <config name>{NORMAL}")
1604
- log.info("")
1605
- show_available_config_names()
1606
- log.info("")
1607
- log.info("If you omit <config name>, you get a default Qleverfile")
1608
- log.info("")
1609
- return
1610
-
1611
- # If there is only argument `setup-autocompletion`, call the function
1612
- # `Actions.setup_autocompletion()` above and exit.
1613
- if len(sys.argv) == 2 and sys.argv[1] == "setup-autocompletion":
1614
- log.setLevel(logging.ERROR)
1615
- print(setup_autocompletion_cmd())
1616
- sys.exit(0)
1617
-
1618
- # If the first argument sets the log level, deal with that immediately (so
1619
- # that it goes into effect before we do anything else). Otherwise, set the
1620
- # log level to `NOTSET` (which will signal to the Actions class that it can
1621
- # take the log level from the config file).
1622
- log.setLevel(logging.NOTSET)
1623
- if len(sys.argv) > 1:
1624
- set_log_level_match = re.match(
1625
- r"general.log_level=(\w+)", sys.argv[1], re.IGNORECASE
1626
- )
1627
- if set_log_level_match:
1628
- log_level = set_log_level_match.group(1).upper()
1629
- sys.argv = sys.argv[1:]
1630
- try:
1631
- log.setLevel(getattr(logging, log_level))
1632
- log.debug("")
1633
- log.debug(f"Log level set to {log_level}")
1634
- log.debug("")
1635
- except AttributeError:
1636
- log.error(f'Invalid log level: "{log_level}"')
1637
- abort_script()
1638
-
1639
- # Helper function that executes an action.
1640
- def execute_action(actions, action_name, **kwargs):
1641
- log.info("")
1642
- log.info(f'{BOLD}Action "{action_name}"{NORMAL}')
1643
- log.info("")
1644
- action = f"action_{action_name.replace('-', '_')}"
1645
- try:
1646
- getattr(actions, action)(**kwargs)
1647
- except ActionException as err:
1648
- print(f"{RED}{err}{NORMAL}")
1649
- abort_script()
1650
- except Exception as err:
1651
- line = traceback.extract_tb(err.__traceback__)[-1].lineno
1652
- print(
1653
- f"{RED}Error in Python script (line {line}: {err})"
1654
- f", stack trace follows:{NORMAL}"
1655
- )
1656
- print()
1657
- raise err
1658
-
1659
- # If `setup-config` is among the command-line arguments, it must the first
1660
- # one, followed by at most one more argument.
1661
- if "setup-config" in sys.argv:
1662
- if sys.argv.index("setup-config") > 1:
1663
- log.setLevel(logging.ERROR)
1664
- log.error("Action `setup-config` must be the first argument")
1665
- abort_script()
1666
- if len(sys.argv) > 3:
1667
- log.setLevel(logging.ERROR)
1668
- log.error(
1669
- "Action `setup-config` must be followed by at most one "
1670
- "argument (the name of the desied configuration)"
1671
- )
1672
- abort_script()
1673
- log.setLevel(logging.INFO)
1674
- config_name = sys.argv[2] if len(sys.argv) == 3 else "default"
1675
- execute_action(Actions, "setup-config", config_name=config_name)
1676
- return
1677
-
1678
- actions = Actions()
1679
- # log.info(f"Actions available are: {', '.join(action_names)}")
1680
- # Show the log level as string.
1681
- # log.info(f"Log level: {logging.getLevelName(log.getEffectiveLevel())}")
1682
-
1683
- # Check if the last argument is "show" (if yes, remember it and remove it).
1684
- only_show = True if len(sys.argv) > 1 and sys.argv[-1] == "show" else False
1685
- if only_show:
1686
- sys.argv = sys.argv[:-1]
1687
-
1688
- # Initalize actions.
1689
- # Execute the actions specified on the command line.
1690
- for action_name in sys.argv[1:]:
1691
- # If the action is of the form section.key=value, set the config value.
1692
- set_config_match = re.match(r"(\w+)\.(\w+)=(.*)", action_name)
1693
- if set_config_match:
1694
- section, option, value = set_config_match.groups()
1695
- log.info(f"Setting config value: {section}.{option}={value}")
1696
- try:
1697
- actions.set_config(section, option, value)
1698
- except ValueError as err:
1699
- log.error(err)
1700
- abort_script()
1701
- continue
1702
- # If the action name does not exist, exit.
1703
- if action_name not in action_names:
1704
- log.error(
1705
- f"Action \"{action_name}\" does not exist, available "
1706
- f"actions are: {', '.join(action_names)}"
1707
- )
1708
- abort_script()
1709
- # Execute the action (or only show what would be executed).
1710
- execute_action(actions, action_name, only_show=only_show)
1711
- log.info("")
1712
-
1713
-
1714
- if __name__ == "__main__":
1715
- main()