qlever 0.5.2__py3-none-any.whl → 0.5.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.dblp +4 -4
- qlever/Qleverfiles/Qleverfile.dbpedia +30 -0
- qlever/Qleverfiles/Qleverfile.default +35 -31
- qlever/Qleverfiles/Qleverfile.dnb +3 -3
- qlever/Qleverfiles/Qleverfile.imdb +5 -5
- qlever/Qleverfiles/Qleverfile.pubchem +1 -1
- qlever/Qleverfiles/Qleverfile.uniprot +16 -15
- qlever/Qleverfiles/Qleverfile.wikidata +14 -11
- qlever/Qleverfiles/Qleverfile.wikipathways +6 -6
- qlever/commands/example_queries.py +4 -2
- qlever/commands/ui.py +6 -1
- qlever/qlever_old.py +1 -1
- qlever/qleverfile.py +2 -2
- qlever/util.py +20 -0
- {qlever-0.5.2.dist-info → qlever-0.5.4.dist-info}/METADATA +1 -1
- {qlever-0.5.2.dist-info → qlever-0.5.4.dist-info}/RECORD +20 -19
- {qlever-0.5.2.dist-info → qlever-0.5.4.dist-info}/WHEEL +1 -1
- {qlever-0.5.2.dist-info → qlever-0.5.4.dist-info}/LICENSE +0 -0
- {qlever-0.5.2.dist-info → qlever-0.5.4.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.2.dist-info → qlever-0.5.4.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# Qleverfile for DBLP, use with https://github.com/ad-freiburg/qlever-control
|
|
2
2
|
#
|
|
3
3
|
# qlever get-data # takes ~3 mins (downloads .ttl.gz file of size ~3 GB)
|
|
4
|
-
# qlever index # takes ~
|
|
4
|
+
# qlever index # takes ~4 mins (on an AMD Ryzen 9 5900X)
|
|
5
5
|
# qlever start # takes a few seconds
|
|
6
6
|
|
|
7
7
|
[data]
|
|
8
8
|
NAME = dblp
|
|
9
|
-
GET_DATA_URL = https://dblp.org/rdf
|
|
10
|
-
GET_DATA_CMD = curl -LRC - O ${GET_DATA_URL} 2>&1 | tee ${data:NAME}.download-log.txt
|
|
9
|
+
GET_DATA_URL = https://dblp.org/rdf/dblp.ttl.gz
|
|
10
|
+
GET_DATA_CMD = curl -LRC - -O ${GET_DATA_URL} 2>&1 | tee ${data:NAME}.download-log.txt
|
|
11
11
|
VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
|
|
12
12
|
DESCRIPTION = DBLP computer science bibliography, data from ${GET_DATA_URL} (version ${VERSION})
|
|
13
13
|
|
|
@@ -19,7 +19,7 @@ SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 10000
|
|
|
19
19
|
[server]
|
|
20
20
|
PORT = 7015
|
|
21
21
|
ACCESS_TOKEN = ${data:NAME}
|
|
22
|
-
MEMORY_FOR_QUERIES =
|
|
22
|
+
MEMORY_FOR_QUERIES = 10G
|
|
23
23
|
CACHE_MAX_SIZE = 5G
|
|
24
24
|
|
|
25
25
|
[runtime]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Qleverfile for DBpedia, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # ~14 GB, ~850 M triples (as of 30.07.2024)
|
|
4
|
+
# qlever index # ~20 min (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # ~3 sec
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = dbpedia
|
|
9
|
+
DATABUS_URL = https://databus.dbpedia.org/dbpedia/collections/latest-core
|
|
10
|
+
GET_DATA_CMD = curl -X POST -H "Accept: text/csv" --data-urlencode "query=$$(curl -s -H "Accept:text/sparql" https://databus.dbpedia.org/dbpedia/collections/latest-core)" https://databus.dbpedia.org/sparql | tail -n+2 | sed 's/\r$$//' | sed 's/"//g' | while read -r file; do wget -P rdf-input $$file; done
|
|
11
|
+
DESCRIPTION = RDF data from ${DATABUS_URL}
|
|
12
|
+
|
|
13
|
+
[index]
|
|
14
|
+
INPUT_FILES = rdf-input/*
|
|
15
|
+
CAT_INPUT_FILES = (cat rdf-input/*.nt; lbzcat -n2 rdf-input/*.bzip2 rdf-input/*.bz2)
|
|
16
|
+
SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
|
|
17
|
+
WITH_TEXT_INDEX = false
|
|
18
|
+
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7012
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
|
+
MEMORY_FOR_QUERIES = 10G
|
|
23
|
+
CACHE_MAX_SIZE = 5G
|
|
24
|
+
|
|
25
|
+
[runtime]
|
|
26
|
+
SYSTEM = docker
|
|
27
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
28
|
+
|
|
29
|
+
[ui]
|
|
30
|
+
UI_CONFIG = dbpedia
|
|
@@ -1,47 +1,51 @@
|
|
|
1
|
-
#
|
|
1
|
+
# Default Qleverfile, use with https://github.com/ad-freiburg/qlever-control
|
|
2
2
|
#
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
# pre-filled Qleverfiles on http://qlever.cs.uni-freiburg.de/qlever-control/
|
|
8
|
-
# Qleverfiles first to get some inspiration. Or execute `qlever setup-config
|
|
9
|
-
# <config name>` with a config name of your choice.
|
|
3
|
+
# If you have never seen a Qleverfile before, we recommend that you first look
|
|
4
|
+
# at the example Qleverfiles on http://qlever.cs.uni-freiburg.de/qlever-control/
|
|
5
|
+
# src/qlever/Qleverfiles . Or execute `qlever setup-config <dataset>` on the
|
|
6
|
+
# command line to obtain the example Qleverfiles for <dataset>.
|
|
10
7
|
|
|
11
8
|
# As a minimum, each dataset needs a name. If you want `qlever get-data` to do
|
|
12
|
-
# something meaningful, you need to define GET_DATA_CMD.
|
|
13
|
-
#
|
|
14
|
-
#
|
|
9
|
+
# something meaningful, you need to define GET_DATA_CMD. Otherwise, you need to
|
|
10
|
+
# generate (or download or copy from somewhere) the input files yourself. Each
|
|
11
|
+
# dataset should have a short DESCRIPTION, ideally with a date.
|
|
15
12
|
[data]
|
|
16
|
-
NAME
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
# TEXT_DESCRIPTION =
|
|
13
|
+
NAME =
|
|
14
|
+
GET_DATA_CMD =
|
|
15
|
+
DESCRIPTION =
|
|
20
16
|
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
17
|
+
# The format for INPUT_FILES should be such that `ls ${INPUT_FILES}` lists all
|
|
18
|
+
# input files. CAT_INPUT_FILES should write a concatenation of all input files
|
|
19
|
+
# to stdout. For example, if your input files are gzipped, you can write `zcat
|
|
20
|
+
# ${INPUT_FILES}`. Regarding SETTINGS_JSON, look at the other Qleverfiles for
|
|
21
|
+
# examples. Several batches of size `num-triples-per-batch` are kept in RAM at
|
|
22
|
+
# the same time; increasing this, increases the memory usage but speeds up the
|
|
23
|
+
# loading process.
|
|
24
24
|
[index]
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
25
|
+
INPUT_FILES = *.ttl
|
|
26
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
27
|
+
SETTINGS_JSON = { "num-triples-per-batch": 1000000 }
|
|
28
28
|
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
#
|
|
29
|
+
# The server listens on PORT. If you want to send privileged commands to the
|
|
30
|
+
# server, you need to specify an ACCESS_TOKEN, which you then have to set via a
|
|
31
|
+
# URL parameter `access_token`. It should not be easily guessable, unless you
|
|
32
|
+
# don't mind others to get privileged access to your server.
|
|
32
33
|
[server]
|
|
33
|
-
PORT
|
|
34
|
-
|
|
34
|
+
PORT =
|
|
35
|
+
ACCESS_TOKEN =
|
|
35
36
|
|
|
36
|
-
#
|
|
37
|
-
#
|
|
38
|
-
#
|
|
37
|
+
# Use SYSTEM = docker to run QLever inside a docker container; the Docker image
|
|
38
|
+
# will be downloaded automatically. Use SYSTEM = native to use self-compiled
|
|
39
|
+
# binaries `IndexBuilderMain` and `ServerMain` (which should be in you PATH).
|
|
39
40
|
[runtime]
|
|
40
|
-
SYSTEM =
|
|
41
|
+
SYSTEM = docker
|
|
41
42
|
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
42
43
|
|
|
44
|
+
# UI_PORT specifies the port of the QLever UI web app, when you run `qlever ui`.
|
|
43
45
|
# The UI_CONFIG must be one of the slugs from http://qlever.cs.uni-freiburg.de
|
|
44
46
|
# (see the dropdown menu on the top right, the slug is the last part of the URL).
|
|
45
|
-
#
|
|
47
|
+
# It determines the example queries and which SPARQL queries are launched to
|
|
48
|
+
# obtain suggestions as you type a query.
|
|
46
49
|
[ui]
|
|
50
|
+
UI_PORT = 8176
|
|
47
51
|
UI_CONFIG = default
|
|
@@ -17,14 +17,14 @@
|
|
|
17
17
|
[data]
|
|
18
18
|
NAME = dnb
|
|
19
19
|
BASE_URL = https://data.dnb.de/opendata
|
|
20
|
-
GET_DATA_CMD = curl -L -C - --remote-name-all ${BASE_URL}/authorities-gnd_lds.nt.gz ${BASE_URL}/dnb-all_lds.nt.gz ${BASE_URL}/dnb-all_ldsprov.nt.gz ${BASE_URL}/zdb_lds.nt.gz 2>&1 | tee ${data:NAME}.getdata-log.txt
|
|
20
|
+
GET_DATA_CMD = curl -L -C - --remote-name-all --remote-time ${BASE_URL}/authorities-gnd_lds.nt.gz ${BASE_URL}/dnb-all_lds.nt.gz ${BASE_URL}/dnb-all_ldsprov.nt.gz ${BASE_URL}/zdb_lds.nt.gz 2>&1 | tee ${data:NAME}.getdata-log.txt
|
|
21
21
|
VERSION = $$(date -r dnb-all_lds.nt.gz +%d.%m.%Y || echo "NO_DATE")
|
|
22
22
|
DESCRIPTION = DNB data from ${BASE_URL} (authoritities-gnd_lds, dnb_all_lds, dnb-all_ldsprov, zdb_lds), version ${VERSION}
|
|
23
23
|
|
|
24
24
|
[index]
|
|
25
25
|
INPUT_FILES = *.nt.gz
|
|
26
|
-
CAT_INPUT_FILES = zcat ${INPUT_FILES}
|
|
27
|
-
SETTINGS_JSON = { "ascii-prefixes-only":
|
|
26
|
+
CAT_INPUT_FILES = zcat ${INPUT_FILES} | sed '/"\$$R0"/d;/"0\.03013\$$D"/d'
|
|
27
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
28
28
|
|
|
29
29
|
[server]
|
|
30
30
|
PORT = 7035
|
|
@@ -9,8 +9,8 @@
|
|
|
9
9
|
[data]
|
|
10
10
|
NAME = imdb
|
|
11
11
|
IMDB_DATA_URL = https://datasets.imdbws.com
|
|
12
|
-
GET_PREFIXES = echo "@prefix imdb: <https://www.imdb.com/>
|
|
13
|
-
GET_IMDB_BASICS = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE}
|
|
12
|
+
GET_PREFIXES = echo "@prefix imdb: <https://www.imdb.com/> ."
|
|
13
|
+
GET_IMDB_BASICS = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\\\\", "\\\\", $$3); gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE}
|
|
14
14
|
GET_IMDB_RATINGS = FILE=title.ratings.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ printf "imdb:%s imdb:averageRating %s ; imdb:numVotes %s .\n", $$1, $$2, $$3 }'; rm -f $${FILE}
|
|
15
15
|
GET_DATA_CMD = (${GET_PREFIXES}; ${GET_IMDB_BASICS}; ${GET_IMDB_RATINGS}) > ${NAME}.ttl
|
|
16
16
|
DESCRIPTION = RDF data derived from ${IMDB_DATA_URL}
|
|
@@ -18,17 +18,17 @@ TEXT_DESCRIPTION = All literals, search with FILTER CONTAINS(?var, "...")
|
|
|
18
18
|
|
|
19
19
|
[index]
|
|
20
20
|
INPUT_FILES = ${data:NAME}.ttl
|
|
21
|
-
CAT_INPUT_FILES = cat ${
|
|
21
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
22
22
|
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
23
23
|
TEXT_INDEX = from_literals
|
|
24
24
|
|
|
25
25
|
[server]
|
|
26
26
|
PORT = 7029
|
|
27
|
-
ACCESS_TOKEN = ${data:NAME}
|
|
27
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
28
28
|
MEMORY_FOR_QUERIES = 5G
|
|
29
29
|
|
|
30
30
|
[runtime]
|
|
31
|
-
SYSTEM =
|
|
31
|
+
SYSTEM = native
|
|
32
32
|
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
33
33
|
|
|
34
34
|
[ui]
|
|
@@ -48,7 +48,7 @@ DESCRIPTION = PubChem RDF from ${GET_DATA_URL}, version ${DATE} (all folde
|
|
|
48
48
|
|
|
49
49
|
[index]
|
|
50
50
|
INPUT_FILES = pubchem.additional-ontologies.nt.gz nt.${DATE}/*.nt.gz
|
|
51
|
-
CAT_INPUT_FILES = zcat ${
|
|
51
|
+
CAT_INPUT_FILES = zcat ${INPUT_FILES}
|
|
52
52
|
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
53
53
|
STXXL_MEMORY = 10G
|
|
54
54
|
|
|
@@ -1,37 +1,38 @@
|
|
|
1
1
|
# Qleverfile for UniProt, use with https://github.com/ad-freiburg/qlever-control
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data #
|
|
4
|
-
# qlever index # takes ~
|
|
5
|
-
# qlever start # starts the server (takes a few
|
|
3
|
+
# qlever get-data # takes ~ 30 hours and ~ 2 TB of disk (for the NT files)
|
|
4
|
+
# qlever index # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes a few seconds)
|
|
6
6
|
#
|
|
7
7
|
# Install packages: sudo apt install -y libxml2-utils parallel xz-utils pv
|
|
8
8
|
# Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
|
|
9
9
|
#
|
|
10
|
-
# Set DATE to the date of the latest release
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
# the uniprot.index.???.meta files can be on HDD.
|
|
10
|
+
# Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
|
|
11
|
+
# during build, ~ 3 TB after build). The uniprot.index.???.meta files can be on
|
|
12
|
+
# HDD without significant performance loss (when running the server).
|
|
14
13
|
|
|
15
14
|
[data]
|
|
16
15
|
NAME = uniprot
|
|
17
|
-
DATE = 2024-
|
|
16
|
+
DATE = 2024-05-29
|
|
18
17
|
DOWNLOAD_URL = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
|
|
19
18
|
GET_RDFXML_CMD = mkdir -p rdf.${DATE} && curl -s ${DOWNLOAD_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" - | while read URL; do wget --no-verbose -P rdf.${DATE} $$URL 2>&1 | tee -a uniprot.download-log; done
|
|
20
|
-
RDFXML2NT_CMD = mkdir -p nt.${DATE} && for RDFXML in rdf.${DATE}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=nt 2> /dev/null |
|
|
19
|
+
RDFXML2NT_CMD = mkdir -p nt.${DATE} && for RDFXML in rdf.${DATE}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=nt 2> /dev/null | gzip -c > nt.${DATE}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/nt.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
|
|
21
20
|
GET_DATA_CMD = rdfxml --help && date > ${NAME}.get-data.begin-date && ${GET_RDFXML_CMD} && ${RDFXML2NT_CMD} && date > ${NAME}.get-data.end-date
|
|
22
21
|
DESCRIPTION = Complete UniProt data from ${DOWNLOAD_URL}, version ${DATE}
|
|
23
22
|
|
|
24
23
|
[index]
|
|
25
|
-
INPUT_FILES = nt.${data:DATE}/*.nt.
|
|
26
|
-
CAT_INPUT_FILES = parallel --tmpdir . -j 4 '
|
|
24
|
+
INPUT_FILES = nt.${data:DATE}/*.nt.gz
|
|
25
|
+
CAT_INPUT_FILES = parallel --tmpdir . -j 4 'zcat -f {}' ::: ${INPUT_FILES} | pv -q -B 5G
|
|
27
26
|
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
|
|
28
27
|
STXXL_MEMORY = 60G
|
|
29
28
|
|
|
30
29
|
[server]
|
|
31
|
-
PORT
|
|
32
|
-
ACCESS_TOKEN
|
|
33
|
-
MEMORY_FOR_QUERIES
|
|
34
|
-
CACHE_MAX_SIZE
|
|
30
|
+
PORT = 7018
|
|
31
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
32
|
+
MEMORY_FOR_QUERIES = 20G
|
|
33
|
+
CACHE_MAX_SIZE = 10G
|
|
34
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
|
|
35
|
+
TIMEOUT = 300s
|
|
35
36
|
|
|
36
37
|
[runtime]
|
|
37
38
|
SYSTEM = docker
|
|
@@ -1,24 +1,27 @@
|
|
|
1
|
-
# Qleverfile for Wikidata, use with
|
|
1
|
+
# Qleverfile for Wikidata, use with qlever script (`pip install qlever`)
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data
|
|
4
|
-
# qlever index
|
|
5
|
-
# qlever start
|
|
3
|
+
# qlever get-data # downloads two .bz2 files of total size ~100 GB
|
|
4
|
+
# qlever index # takes ~4.5 hours and ~20 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes a few seconds)
|
|
6
|
+
|
|
7
|
+
[DEFAULT]
|
|
8
|
+
NAME = wikidata
|
|
6
9
|
|
|
7
10
|
[data]
|
|
8
|
-
NAME = wikidata
|
|
9
11
|
GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
|
|
10
|
-
GET_DATA_CMD = curl -
|
|
11
|
-
|
|
12
|
+
GET_DATA_CMD = curl -LRC - --remote-name-all ${GET_DATA_URL}/latest-all.ttl.bz2 ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1
|
|
13
|
+
VERSION = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
|
|
14
|
+
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${VERSION})
|
|
12
15
|
|
|
13
16
|
[index]
|
|
14
|
-
INPUT_FILES = latest-
|
|
15
|
-
CAT_INPUT_FILES =
|
|
16
|
-
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only":
|
|
17
|
+
INPUT_FILES = latest-all.ttl.bz2 latest-lexemes.ttl.bz2
|
|
18
|
+
CAT_INPUT_FILES = lbzcat -n 4 -f ${INPUT_FILES}
|
|
19
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
|
|
17
20
|
STXXL_MEMORY = 10G
|
|
18
21
|
|
|
19
22
|
[server]
|
|
20
23
|
PORT = 7001
|
|
21
|
-
ACCESS_TOKEN = ${data:NAME}
|
|
24
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
25
|
MEMORY_FOR_QUERIES = 20G
|
|
23
26
|
CACHE_MAX_SIZE = 10G
|
|
24
27
|
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
# Qleverfile for WikiPathways, use with https://github.com/ad-freiburg/qlever-control
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data #
|
|
3
|
+
# qlever get-data # takes ~3 seconds, generates TTL of size ~600 MB
|
|
4
4
|
# qlever index # takes ~20 seconds and little RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
-
# qlever start #
|
|
5
|
+
# qlever start # instant
|
|
6
6
|
#
|
|
7
7
|
# Limitations: does not include the ontologies (WP, GPML, ChEBI, PW, CLO, ...) yet
|
|
8
8
|
|
|
9
9
|
[data]
|
|
10
10
|
NAME = wikipathways
|
|
11
|
-
RELEASE =
|
|
11
|
+
RELEASE = 20240810
|
|
12
12
|
GET_DATA_URL = https://data.wikipathways.org/${RELEASE}/rdf
|
|
13
|
-
GET_DATA_CMD = wget -O wikipathways-rdf-void.ttl ${GET_DATA_URL}/wikipathways
|
|
13
|
+
GET_DATA_CMD = wget -O wikipathways-rdf-void.ttl ${GET_DATA_URL}/wikipathways-rdf-void.ttl && \
|
|
14
14
|
wget ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-wp.zip && \
|
|
15
15
|
unzip -qq -c wikipathways-${RELEASE}-rdf-wp.zip -x wp/wpOntology.ttl > wikipathways-rdf-wp.ttl && \
|
|
16
16
|
wget ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-gpml.zip &&
|
|
@@ -23,13 +23,13 @@ TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
|
|
|
23
23
|
|
|
24
24
|
[index]
|
|
25
25
|
INPUT_FILES = ${data:NAME}.prefix-definitions wikipathways-rdf-wp.ttl wikipathways-rdf-gpml.ttl wikipathways-rdf-void.ttl wikipathways-rdf-authors.ttl
|
|
26
|
-
CAT_INPUT_FILES = cat ${
|
|
26
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
27
27
|
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
|
|
28
28
|
TEXT_INDEX = from_literals
|
|
29
29
|
|
|
30
30
|
[server]
|
|
31
31
|
PORT = 7040
|
|
32
|
-
ACCESS_TOKEN = ${data:NAME}
|
|
32
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
33
33
|
MEMORY_FOR_QUERIES = 5G
|
|
34
34
|
|
|
35
35
|
[runtime]
|
|
@@ -227,6 +227,10 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
227
227
|
except Exception as e:
|
|
228
228
|
error_msg = str(e)
|
|
229
229
|
|
|
230
|
+
# Remove the result file (unless in debug mode).
|
|
231
|
+
if args.log_level != "DEBUG":
|
|
232
|
+
Path(result_file).unlink(missing_ok=True)
|
|
233
|
+
|
|
230
234
|
# Print description, time, result in tabular form.
|
|
231
235
|
if (len(description) > 60):
|
|
232
236
|
description = description[:57] + "..."
|
|
@@ -262,6 +266,4 @@ class ExampleQueriesCommand(QleverCommand):
|
|
|
262
266
|
log.info(colored("All queries failed", "red"))
|
|
263
267
|
|
|
264
268
|
# Return success (has nothing to do with how many queries failed).
|
|
265
|
-
if args.log_level != "DEBUG":
|
|
266
|
-
Path(result_file).unlink(missing_ok=True)
|
|
267
269
|
return True
|
qlever/commands/ui.py
CHANGED
|
@@ -5,6 +5,7 @@ import subprocess
|
|
|
5
5
|
from qlever.command import QleverCommand
|
|
6
6
|
from qlever.containerize import Containerize
|
|
7
7
|
from qlever.log import log
|
|
8
|
+
from qlever.util import is_port_used
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class UiCommand(QleverCommand):
|
|
@@ -53,6 +54,10 @@ class UiCommand(QleverCommand):
|
|
|
53
54
|
Containerize.stop_and_remove_container(
|
|
54
55
|
container_system, args.ui_container)
|
|
55
56
|
|
|
57
|
+
# Check if the UI port is already being used.
|
|
58
|
+
if is_port_used(args.ui_port):
|
|
59
|
+
log.warning(f"It looks like the specified port for the UI ({args.ui_port}) is already in use. You can set another port in the Qleverfile in the [ui] section with the UI_PORT variable.")
|
|
60
|
+
|
|
56
61
|
# Try to start the QLever UI.
|
|
57
62
|
try:
|
|
58
63
|
subprocess.run(pull_cmd, shell=True, stdout=subprocess.DEVNULL)
|
|
@@ -65,5 +70,5 @@ class UiCommand(QleverCommand):
|
|
|
65
70
|
# Success.
|
|
66
71
|
log.info(f"The QLever UI should now be up at {ui_url} ..."
|
|
67
72
|
f"You can log in as QLever UI admin with username and "
|
|
68
|
-
f"
|
|
73
|
+
f"password \"demo\"")
|
|
69
74
|
return True
|
qlever/qlever_old.py
CHANGED
|
@@ -985,7 +985,7 @@ class Actions:
|
|
|
985
985
|
log.info(f"The QLever UI should now be up at "
|
|
986
986
|
f"http://{host_name}:{self.config['ui']['port']}")
|
|
987
987
|
log.info("You can log in as QLever UI admin with username and "
|
|
988
|
-
"
|
|
988
|
+
"password \"demo\"")
|
|
989
989
|
|
|
990
990
|
@track_action_rank
|
|
991
991
|
def action_cache_stats_and_settings(self, only_show=False):
|
qlever/qleverfile.py
CHANGED
|
@@ -51,7 +51,7 @@ class Qleverfile:
|
|
|
51
51
|
help="A concise description of the dataset")
|
|
52
52
|
data_args["text_description"] = arg(
|
|
53
53
|
"--text-description", type=str, default=None,
|
|
54
|
-
help="A
|
|
54
|
+
help="A concise description of the additional text data"
|
|
55
55
|
" if any")
|
|
56
56
|
|
|
57
57
|
index_args["input_files"] = arg(
|
|
@@ -173,7 +173,7 @@ class Qleverfile:
|
|
|
173
173
|
help="The name of the container used by `qlever start`")
|
|
174
174
|
|
|
175
175
|
ui_args["ui_port"] = arg(
|
|
176
|
-
"--
|
|
176
|
+
"--ui-port", type=int, default=8176,
|
|
177
177
|
help="The port of the Qlever UI when running `qlever ui`")
|
|
178
178
|
ui_args["ui_config"] = arg(
|
|
179
179
|
"--ui-config", type=str, default="default",
|
qlever/util.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import errno
|
|
3
4
|
import re
|
|
4
5
|
import secrets
|
|
6
|
+
import socket
|
|
5
7
|
import shlex
|
|
6
8
|
import shutil
|
|
7
9
|
import string
|
|
@@ -180,3 +182,21 @@ def get_random_string(length: int) -> str:
|
|
|
180
182
|
"""
|
|
181
183
|
characters = string.ascii_letters + string.digits
|
|
182
184
|
return "".join(secrets.choice(characters) for _ in range(length))
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def is_port_used(port: int) -> bool:
|
|
188
|
+
"""
|
|
189
|
+
Try to bind to the port on all interfaces to check if the port is already in use.
|
|
190
|
+
If the port is already in use, `socket.bind` will raise an `OSError` with errno EADDRINUSE.
|
|
191
|
+
"""
|
|
192
|
+
try:
|
|
193
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
194
|
+
# Ensure that the port is not blocked after the check.
|
|
195
|
+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
196
|
+
sock.bind(('', port))
|
|
197
|
+
sock.close()
|
|
198
|
+
return False
|
|
199
|
+
except OSError as err:
|
|
200
|
+
if err.errno != errno.EADDRINUSE:
|
|
201
|
+
log.warning(f"Failed to determine if port is used: {err}")
|
|
202
|
+
return True
|
|
@@ -5,32 +5,33 @@ qlever/config.py,sha256=qYPy-MQ7BwGrvKSazQWhs0lnlOFqm-d47mpZhc3fptc,10254
|
|
|
5
5
|
qlever/containerize.py,sha256=p8g3O3G8a_0XLzSTzl_e5t9dqjbCQ-ippoA8vI2Z9pI,4193
|
|
6
6
|
qlever/log.py,sha256=2O_RvFymnu_dB10ErBTAOsI8bgjORfdD0tE3USH-siM,1315
|
|
7
7
|
qlever/qlever_main.py,sha256=tA_xqOs_FjvqlDIvKTprwuysfTwzsUjE7at26gRhCVA,2336
|
|
8
|
-
qlever/qlever_old.py,sha256=
|
|
9
|
-
qlever/qleverfile.py,sha256=
|
|
10
|
-
qlever/util.py,sha256=
|
|
11
|
-
qlever/Qleverfiles/Qleverfile.dblp,sha256=
|
|
8
|
+
qlever/qlever_old.py,sha256=X-JxmepFKYeFgSLLp0TRDNqXSxDwIbc8_0Xstiems8c,62026
|
|
9
|
+
qlever/qleverfile.py,sha256=NjY3SFyRTm_igI8Rv87TOvZBiLwn1TgHmRh1jVA51DM,12935
|
|
10
|
+
qlever/util.py,sha256=20NQJquSk_mSqvlK4k0OrSBqWrxKs5SgVshm5ucus5o,7847
|
|
11
|
+
qlever/Qleverfiles/Qleverfile.dblp,sha256=Y6BqAG1GZg-OmEs0HM00yAQuY2TGnSzsOO1LLmGVn2Y,994
|
|
12
12
|
qlever/Qleverfiles/Qleverfile.dblp-plus,sha256=Dwd9pK1vPcelKfw6sA-IuyhbZ6yIxOh6_84JgPYnB9Q,1332
|
|
13
|
-
qlever/Qleverfiles/Qleverfile.
|
|
14
|
-
qlever/Qleverfiles/Qleverfile.
|
|
13
|
+
qlever/Qleverfiles/Qleverfile.dbpedia,sha256=aaNZZayE-zVePGSwPzXemkX__Ns8-kP_E7DNNKZPnqg,1160
|
|
14
|
+
qlever/Qleverfiles/Qleverfile.default,sha256=UWDy9ohhbKjE4n5xEYqbOPZOiYDFzsnJ_aNNBMCOlaI,2402
|
|
15
|
+
qlever/Qleverfiles/Qleverfile.dnb,sha256=43w_CVi00yf7FHdDvBtHHQR3yU1d-JCNnD_uxYZJOvk,1803
|
|
15
16
|
qlever/Qleverfiles/Qleverfile.fbeasy,sha256=jeztW4gFpWL_w1nCH5qGHeZyZv2lz_kG6f1G3r3DkJ4,974
|
|
16
17
|
qlever/Qleverfiles/Qleverfile.freebase,sha256=k6PqYrtHTBr0EydObm1Hg9QWyAAM9fXkdcjhReDg0fM,1035
|
|
17
|
-
qlever/Qleverfiles/Qleverfile.imdb,sha256=
|
|
18
|
+
qlever/Qleverfiles/Qleverfile.imdb,sha256=8F6tpEi0uXsMm6lofwjNkLq1X9IEUSoHK8YK_5YCf_g,1638
|
|
18
19
|
qlever/Qleverfiles/Qleverfile.ohm-planet,sha256=Y_yUxdpWpUOSDo_zmVKj3caa8X-Wv-1KmacBMks_4QA,2650
|
|
19
20
|
qlever/Qleverfiles/Qleverfile.olympics,sha256=5w9BOFwEBhdSzPz-0LRxwhv-7Gj6xbF539HOXr3cqD0,1088
|
|
20
21
|
qlever/Qleverfiles/Qleverfile.osm-country,sha256=UnlkckSXJDrknZORlU-Hdj_J82U4kStl1aRctCc5n6M,1953
|
|
21
22
|
qlever/Qleverfiles/Qleverfile.osm-planet,sha256=2RilNix0fplN3GsNNyOu3GzmUss1Pq7586WKOFAQnSs,1400
|
|
22
|
-
qlever/Qleverfiles/Qleverfile.pubchem,sha256=
|
|
23
|
+
qlever/Qleverfiles/Qleverfile.pubchem,sha256=a6EAP8mOfC0V6NnVCLarvRagyoQSQDItR7AnrZqL9iE,3899
|
|
23
24
|
qlever/Qleverfiles/Qleverfile.scientists,sha256=9eZ2c6P9a3E3VHa3RR7LdOQbF4k3oyyrn56Z3u4LZYs,1164
|
|
24
|
-
qlever/Qleverfiles/Qleverfile.uniprot,sha256=
|
|
25
|
+
qlever/Qleverfiles/Qleverfile.uniprot,sha256=9kAKseomdUnIt7EAZge39g1MTuaLVaSW9JYLHzIMolM,2338
|
|
25
26
|
qlever/Qleverfiles/Qleverfile.vvz,sha256=ftdMj5dCC9jAlFtNt2WR7kP30w0itT_iYtj5HoUVyWU,931
|
|
26
|
-
qlever/Qleverfiles/Qleverfile.wikidata,sha256=
|
|
27
|
-
qlever/Qleverfiles/Qleverfile.wikipathways,sha256=
|
|
27
|
+
qlever/Qleverfiles/Qleverfile.wikidata,sha256=vDkTY3mPSx2C8MvFWfB72zZoc4d-TMJSw3f_-FqnEqs,1275
|
|
28
|
+
qlever/Qleverfiles/Qleverfile.wikipathways,sha256=UFEVLrtOBiSQfibBN9xc2wDXrnWcnx5f8PY9khcE6bc,1983
|
|
28
29
|
qlever/Qleverfiles/Qleverfile.yago-4,sha256=GikYPqChCtbAyZOVqszmVUwgQxSePTcgM8xw2b_21e4,1849
|
|
29
30
|
qlever/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
31
|
qlever/commands/add_text_index.py,sha256=dkqYtwgOhgnXiei_eyhBWYCtdAiQUEmjWoa3JMlMb4c,3641
|
|
31
32
|
qlever/commands/cache_stats.py,sha256=6JjueQstAqc8dNfgY8TP2EitFMxdUvCwrcyd7KUEb2o,4157
|
|
32
33
|
qlever/commands/clear_cache.py,sha256=AnE1MOoj1ZexxrRT8FGeBLlv8rtQIVV4DP8VBn5-X-s,2843
|
|
33
|
-
qlever/commands/example_queries.py,sha256=
|
|
34
|
+
qlever/commands/example_queries.py,sha256=L32gVKdvb7MwZNqphF1K_gK6WARiwno6oiNDBgZuj1Y,12396
|
|
34
35
|
qlever/commands/get_data.py,sha256=f9kjZI3TKad6JHSuXWNkeoajmW8h0Sx8ShvjauDCtNo,1412
|
|
35
36
|
qlever/commands/index.py,sha256=lJhDnweknFZQm1czqPzNyz33EvbjIvOrS4j0wDaJ98o,5663
|
|
36
37
|
qlever/commands/index_stats.py,sha256=_BiUNBhmbYd9RPxrlm4HF0oENO6JmqnRiAkwkyOdN4U,11722
|
|
@@ -40,11 +41,11 @@ qlever/commands/setup_config.py,sha256=6T0rXrIdejKMKhDbOMEMBKyMF_hAqO5nJaRFb57QP
|
|
|
40
41
|
qlever/commands/start.py,sha256=2rOtk3NmhEs28D5csL_a1BdjSWU9VkcH6AqYT0vdww0,9285
|
|
41
42
|
qlever/commands/status.py,sha256=5S6EdapZEwFKV9cQZtNYcZhMbAXAY-FP6ggjIhfX8ek,1631
|
|
42
43
|
qlever/commands/stop.py,sha256=TZs4bxKHvujlZAU8BZmFjA5eXSZNAa6EeNzvPpEZsuI,4139
|
|
43
|
-
qlever/commands/ui.py,sha256=
|
|
44
|
+
qlever/commands/ui.py,sha256=b7g7Mp6ZWevn8f1kwFr-WR4ZWMq42KEV4cGl2QS7M1E,2828
|
|
44
45
|
qlever/commands/warmup.py,sha256=WOZSxeV8U_F6pEEnAb6YybXLQMxZFTRJXs4BPHUhsmc,1030
|
|
45
|
-
qlever-0.5.
|
|
46
|
-
qlever-0.5.
|
|
47
|
-
qlever-0.5.
|
|
48
|
-
qlever-0.5.
|
|
49
|
-
qlever-0.5.
|
|
50
|
-
qlever-0.5.
|
|
46
|
+
qlever-0.5.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
47
|
+
qlever-0.5.4.dist-info/METADATA,sha256=sL8oC3NhgnRmUMEMIqfqozI_RTcHkaFYUWeailPrB8g,4146
|
|
48
|
+
qlever-0.5.4.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
|
|
49
|
+
qlever-0.5.4.dist-info/entry_points.txt,sha256=U_gbYYi0wwdsn884eb0XoOXfvhACOsxhlO330dZ9bi0,87
|
|
50
|
+
qlever-0.5.4.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
|
|
51
|
+
qlever-0.5.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|