qlever 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.dblp +34 -0
- qlever/Qleverfiles/Qleverfile.dblp-plus +33 -0
- qlever/Qleverfiles/Qleverfile.default +47 -0
- qlever/Qleverfiles/Qleverfile.dnb +37 -0
- qlever/Qleverfiles/Qleverfile.fbeasy +29 -0
- qlever/Qleverfiles/Qleverfile.freebase +28 -0
- qlever/Qleverfiles/Qleverfile.imdb +35 -0
- qlever/Qleverfiles/Qleverfile.olympics +31 -0
- qlever/Qleverfiles/Qleverfile.osm-country +42 -0
- qlever/Qleverfiles/Qleverfile.osm-planet +36 -0
- qlever/Qleverfiles/Qleverfile.pubchem +66 -0
- qlever/Qleverfiles/Qleverfile.scientists +39 -0
- qlever/Qleverfiles/Qleverfile.uniprot +41 -0
- qlever/Qleverfiles/Qleverfile.vvz +31 -0
- qlever/Qleverfiles/Qleverfile.wikidata +30 -0
- qlever/Qleverfiles/Qleverfile.wikipathways +40 -0
- qlever/Qleverfiles/Qleverfile.yago-4 +33 -0
- qlever/config.py +3 -0
- qlever/util.py +7 -7
- {qlever-0.4.0.dist-info → qlever-0.4.1.dist-info}/METADATA +2 -1
- qlever-0.4.1.dist-info/RECORD +47 -0
- qlever-0.4.0.dist-info/RECORD +0 -30
- {qlever-0.4.0.dist-info → qlever-0.4.1.dist-info}/LICENSE +0 -0
- {qlever-0.4.0.dist-info → qlever-0.4.1.dist-info}/WHEEL +0 -0
- {qlever-0.4.0.dist-info → qlever-0.4.1.dist-info}/entry_points.txt +0 -0
- {qlever-0.4.0.dist-info → qlever-0.4.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Qleverfile for DBLP, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .gz file of size ~3 GB (as of 31.07.2022)
|
|
4
|
+
# qlever index # takes ~30 minutes and ~20 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes around 2 minutes)
|
|
6
|
+
#
|
|
7
|
+
# Also builds a text index for fast kewyword search in literals. Without that
|
|
8
|
+
# (WITH_TEXT_INDEX = false), the index build takes only ~10 minutes.
|
|
9
|
+
|
|
10
|
+
[data]
|
|
11
|
+
NAME = dblp
|
|
12
|
+
GET_DATA_URL = https://dblp.org/rdf/${index:INPUT_FILES}
|
|
13
|
+
GET_DATA_CMD = curl -LO -C - ${GET_DATA_URL}
|
|
14
|
+
DESCRIPTION = DBLP computer science bibliography, data from ${GET_DATA_URL}
|
|
15
|
+
TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
|
|
16
|
+
|
|
17
|
+
[index]
|
|
18
|
+
INPUT_FILES = dblp.ttl.gz
|
|
19
|
+
CAT_INPUT_FILES = zcat ${INPUT_FILES}
|
|
20
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
21
|
+
TEXT_INDEX = from_literals
|
|
22
|
+
|
|
23
|
+
[server]
|
|
24
|
+
PORT = 7015
|
|
25
|
+
ACCESS_TOKEN = ${data:NAME}_7643543846
|
|
26
|
+
MEMORY_FOR_QUERIES = 30G
|
|
27
|
+
CACHE_MAX_SIZE = 5G
|
|
28
|
+
|
|
29
|
+
[runtime]
|
|
30
|
+
SYSTEM = docker
|
|
31
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
32
|
+
|
|
33
|
+
[ui]
|
|
34
|
+
UI_CONFIG = dblp
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Qleverfile for DBLP Plus, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data downloads .gz file of size ~3 GB (as of 31.07.2022)
|
|
4
|
+
# qlever index takes ~30 minutes and ~20 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start starts the server
|
|
6
|
+
#
|
|
7
|
+
# Also builds a text index for fast kewyword search in literals.
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = dblp-plus
|
|
11
|
+
GET_DATA_CMD = wget -nc -O dblp.ttl.gz https://dblp.org/rdf/dblp.ttl.gz
|
|
12
|
+
INDEX_DESCRIPTION = Publication data from https://dblp.org, with affiliations from https://www.wikidata.org and citations from https://opencitations.net
|
|
13
|
+
TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
|
|
14
|
+
|
|
15
|
+
[index]
|
|
16
|
+
INPUT_FILES = dblp.ttl.gz affiliations.nt affiliations.additions.nt citations.nt
|
|
17
|
+
CAT_INPUT_FILES = zcat -f ${RDF_FILES}
|
|
18
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [ "<https://w3id.org", "<https://doi.org", "<http://dx.doi.org" ] }
|
|
19
|
+
TEXT_INDEX = from_literals
|
|
20
|
+
|
|
21
|
+
[server]
|
|
22
|
+
PORT = 7027
|
|
23
|
+
ACCESS_TOKEN = ${data:NAME}_169238202
|
|
24
|
+
MEMORY_FOR_QUERIES = 20G
|
|
25
|
+
CACHE_MAX_SIZE = 10G
|
|
26
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 2G
|
|
27
|
+
|
|
28
|
+
[runtime]
|
|
29
|
+
SYSTEM = docker
|
|
30
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
31
|
+
|
|
32
|
+
[ui]
|
|
33
|
+
UI_CONFIG = dblp-plus
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Automatically created by the "qlever" script
|
|
2
|
+
#
|
|
3
|
+
# Modify as you see fit. Beware that some of the values below are executed as
|
|
4
|
+
# commands by the script.
|
|
5
|
+
#
|
|
6
|
+
# If you have never seen a Qleverfile before, we recommend that you look at the
|
|
7
|
+
# pre-filled Qleverfiles on http://qlever.cs.uni-freiburg.de/qlever-control/
|
|
8
|
+
# Qleverfiles first to get some inspiration. Or execute `qlever setup-config
|
|
9
|
+
# <config name>` with a config name of your choice.
|
|
10
|
+
|
|
11
|
+
# As a minimum, each dataset needs a name. If you want `qlever get-data` to do
|
|
12
|
+
# something meaningful, you need to define GET_DATA_CMD. If you want to use the
|
|
13
|
+
# QLever UI, you should define DESCRIPTION (and if you have a text index,
|
|
14
|
+
# also TEXT_DESCRIPTION).
|
|
15
|
+
[data]
|
|
16
|
+
NAME =
|
|
17
|
+
# GET_DATA_CMD =
|
|
18
|
+
# DESCRIPTION =
|
|
19
|
+
# TEXT_DESCRIPTION =
|
|
20
|
+
|
|
21
|
+
# CAT_INPUT_FILES produces the data that is piped into QLever's index builder.
|
|
22
|
+
# Use SETTINGS_JSON for more advanced configuration settings (see the other
|
|
23
|
+
# Qleverfiles for examples).
|
|
24
|
+
[index]
|
|
25
|
+
# INPUT_FILES =
|
|
26
|
+
# CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
27
|
+
# SETTINGS_JSON = {}
|
|
28
|
+
|
|
29
|
+
# As a minimum, you need to specify the PORT, where QLever will listen for
|
|
30
|
+
# SPARQL queries. If you want to send priviledged commands to the server, you
|
|
31
|
+
# need to specify an ACCESS_TOKEN (modify the random number below).
|
|
32
|
+
[server]
|
|
33
|
+
PORT = 7001
|
|
34
|
+
# ACCESS_TOKEN = ${data:NAME}_1234567890
|
|
35
|
+
|
|
36
|
+
# With USE_DOCKER = true, the qlever script will download the docker image for
|
|
37
|
+
# you and run QLever inside docker containers. With USE_DOCKER = false, you need
|
|
38
|
+
# the QLever binaries in the PATH of your sheel.
|
|
39
|
+
[runtime]
|
|
40
|
+
SYSTEM = true
|
|
41
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
42
|
+
|
|
43
|
+
# The UI_CONFIG must be one of the slugs from http://qlever.cs.uni-freiburg.de
|
|
44
|
+
# (see the dropdown menu on the top right, the slug is the last part of the URL).
|
|
45
|
+
# In partiular, this determines the example queries.
|
|
46
|
+
[ui]
|
|
47
|
+
UI_CONFIG = default
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Qleverfile for Olympics, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # takes ~ 10 min to download .nt.gz file of size ~ 8 GB
|
|
4
|
+
# qlever index # takes ~ 20 min and ~ 5 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server
|
|
6
|
+
#
|
|
7
|
+
# NOTE: https://data.dnb.de/opendata/ is rather confusing becase of the many
|
|
8
|
+
# files. This Qleverfile downloads all the datasets named "Gesamtabzug", except
|
|
9
|
+
# bib_lds.nt.gz, which contains incorrectly formatted IRIs. The file
|
|
10
|
+
# dnb-all_ldsprov.nt.gz contains invalid floating point literals; to ignore
|
|
11
|
+
# them, compile QLever with TurtleParserBase::invalidLiteralsAreSkipped_ = true
|
|
12
|
+
|
|
13
|
+
[data]
|
|
14
|
+
NAME = dnb
|
|
15
|
+
BASE_URL = https://data.dnb.de/opendata
|
|
16
|
+
GET_DATA_CMD = curl -L -C - --remote-name-all ${BASE_URL}/authorities-gnd_lds.nt.gz ${BASE_URL}/dnb-all_lds.nt.gz ${BASE_URL}/dnb-all_ldsprov.nt.gz ${BASE_URL}/zdb_lds.nt.gz
|
|
17
|
+
DESCRIPTION = DNB data from ${BASE_ULR} (authoritities-gnd_lds, dnb_all_lds, dnb-all_ldsprov, zdb_lds)
|
|
18
|
+
TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?var, "...")
|
|
19
|
+
|
|
20
|
+
[index]
|
|
21
|
+
INPUT_FILES = *.nt.gz
|
|
22
|
+
CAT_INPUT_FILES = zcat ${FILE_NAMES}
|
|
23
|
+
SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 1000000 }
|
|
24
|
+
TEXT_INDEX = from_literals
|
|
25
|
+
|
|
26
|
+
[server]
|
|
27
|
+
PORT = 7035
|
|
28
|
+
ACCESS_TOKEN = ${data:NAME}_284732743
|
|
29
|
+
MEMORY_FOR_QUERIES = 10G
|
|
30
|
+
CACHE_MAX_SIZE = 2G
|
|
31
|
+
|
|
32
|
+
[runtime]
|
|
33
|
+
SYSTEM = true
|
|
34
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
35
|
+
|
|
36
|
+
[ui]
|
|
37
|
+
UI_CONFIG = dnb
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Qleverfile for Fbeasy, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .gz file of size ~3 GB (as of 31.07.2022)
|
|
4
|
+
# qlever index # takes ~10 minutes and ~10 GB RAM (on an AMD Ryzen 7 5900X)
|
|
5
|
+
# qlever start # starts the server
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = fbeasy
|
|
9
|
+
DATA_URL = https://freebase-easy.cs.uni-freiburg.de
|
|
10
|
+
GET_DATA_CMD = wget -nc ${DATA_URL}/dump/fbeasy.nt
|
|
11
|
+
DESCRIPTION = RDF data from ${DATA_URL}, latest version from 18.07.2019
|
|
12
|
+
TEXT_DESCRIPTION = Sentences from Wikipedia that mention at least one Freebase entity
|
|
13
|
+
|
|
14
|
+
[index]
|
|
15
|
+
INPUT_FILES = fbeasy.nt
|
|
16
|
+
CAT_INPUT_FILES = cat ${RDF_FILES}
|
|
17
|
+
SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 10000000 }
|
|
18
|
+
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7003
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}_12631403
|
|
22
|
+
MEMORY_FOR_QUERIES = 5G
|
|
23
|
+
|
|
24
|
+
[runtime]
|
|
25
|
+
SYSTEM = docker
|
|
26
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
27
|
+
|
|
28
|
+
[ui]
|
|
29
|
+
UI_CONFIG = fbeasy
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Qleverfile for Freebase, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .gz file of size ~3 GB (as of 31.07.2022)
|
|
4
|
+
# qlever index # takes ~4 hours and ~20 GB RAM (on an AMD Ryzen 7 5900X)
|
|
5
|
+
# qlever start # starts the server
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = freebase
|
|
9
|
+
DATA_URL = http://commondatastorage.googleapis.com/freebase-public/rdf/freebase-rdf-latest.gz
|
|
10
|
+
GET_DATA_CMD = wget -nc ${DATA_URL}
|
|
11
|
+
DESCRIPTION = RDF data from ${DATA_URL}, latest (and final) version from 09.08.2015
|
|
12
|
+
|
|
13
|
+
[index]
|
|
14
|
+
INPUT_FILES = freebase-rdf-latest.gz
|
|
15
|
+
CAT_INPUT_FILES = zcat ${RDF_FILES}
|
|
16
|
+
SETTINGS_JSON = { "languages-internal": [ "en" ], "prefixes-external": ["<"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 10000000 }
|
|
17
|
+
|
|
18
|
+
[server]
|
|
19
|
+
PORT = 7002
|
|
20
|
+
ACCESS_TOKEN = ${data:NAME}_12631403
|
|
21
|
+
MEMORY_FOR_QUERIES = 10G
|
|
22
|
+
|
|
23
|
+
[runtime]
|
|
24
|
+
SYSTEM = docker
|
|
25
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
26
|
+
|
|
27
|
+
[ui]
|
|
28
|
+
UI_CONFIG = freebase
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# Qleverfile for IMDB, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads "basics" and "ratings" of size ~1 GB
|
|
4
|
+
# qlever index # takes ~5 minutes and ~5 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes a few seconds)
|
|
6
|
+
#
|
|
7
|
+
# Supports fast kewyword search in literals (WITH_TEXT_INDEX = from_literals).
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = imdb
|
|
11
|
+
IMDB_DATA_URL = https://datasets.imdbws.com
|
|
12
|
+
GET_PREFIXES = echo "@prefix imdb: <https://www.imdb.com/> .\n"
|
|
13
|
+
GET_IMDB_BASICS = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE}
|
|
14
|
+
GET_IMDB_RATINGS = FILE=title.ratings.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ printf "imdb:%s imdb:averageRating %s ; imdb:numVotes %s .\n", $$1, $$2, $$3 }'; rm -f $${FILE}
|
|
15
|
+
GET_DATA_CMD = (${GET_PREFIXES}; ${GET_IMDB_BASICS}; ${GET_IMDB_RATINGS}) > ${NAME}.ttl
|
|
16
|
+
DESCRIPTION = RDF data derived from ${IMDB_DATA_URL}
|
|
17
|
+
TEXT_DESCRIPTION = All literals, search with FILTER CONTAINS(?var, "...")
|
|
18
|
+
|
|
19
|
+
[index]
|
|
20
|
+
INPUT_FILES = ${data:NAME}.ttl
|
|
21
|
+
CAT_INPUT_FILES = cat ${FILE_NAMES}
|
|
22
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
23
|
+
TEXT_INDEX = from_literals
|
|
24
|
+
|
|
25
|
+
[server]
|
|
26
|
+
PORT = 7029
|
|
27
|
+
ACCESS_TOKEN = ${data:NAME}_1234567890
|
|
28
|
+
MEMORY_FOR_QUERIES = 5G
|
|
29
|
+
|
|
30
|
+
[runtime]
|
|
31
|
+
SYSTEM = docker
|
|
32
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
33
|
+
|
|
34
|
+
[ui]
|
|
35
|
+
UI_CONFIG = imdb
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Qleverfile for Olympics, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .zip file of size 13 MB, uncompressed to 323 MB
|
|
4
|
+
# qlever index # takes ~10 seconds and ~1 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (instant)
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = olympics
|
|
9
|
+
BASE_URL = https://github.com/wallscope/olympics-rdf
|
|
10
|
+
GET_DATA_CMD = curl -sLo olympics.zip -C - ${BASE_URL}/raw/master/data/olympics-nt-nodup.zip && unzip -q -o olympics.zip && rm olympics.zip
|
|
11
|
+
DESCRIPTION = 120 Years of Olympics, data from ${BASE_URL}
|
|
12
|
+
TEXT_DESCRIPTION = All literals, search with FILTER CONTAINS(?var, "...")
|
|
13
|
+
|
|
14
|
+
[index]
|
|
15
|
+
INPUT_FILES = olympics.nt
|
|
16
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
17
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 100000 }
|
|
18
|
+
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7019
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}_7643543846
|
|
22
|
+
MEMORY_FOR_QUERIES = 5G
|
|
23
|
+
CACHE_MAX_SIZE = 2G
|
|
24
|
+
TIMEOUT = 30s
|
|
25
|
+
|
|
26
|
+
[runtime]
|
|
27
|
+
SYSTEM = docker
|
|
28
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
29
|
+
|
|
30
|
+
[ui]
|
|
31
|
+
UI_CONFIG = olympics
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Qleverfile for OSM of some country, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .pbf file from Geofabrik und builds .ttl.bz2 using osm2rdf
|
|
4
|
+
# qlever index # for example Germany takes ~30 minutes and ~10 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server
|
|
6
|
+
#
|
|
7
|
+
# Make sure that osm2rdf is in your path. Set CONTINENT and COUNTRY such that
|
|
8
|
+
# the link under GET_DATA_CMD exists (the names are usually the canonical
|
|
9
|
+
# names). The time for osm2rdf is around the same as that for "qlever index".
|
|
10
|
+
|
|
11
|
+
# Indexer settings
|
|
12
|
+
CONTINENT = europe
|
|
13
|
+
COUNTRY = switzerland
|
|
14
|
+
DB = osm-${COUNTRY}
|
|
15
|
+
PBF = ${DB}.pbf
|
|
16
|
+
RDF_FILES = "${DB}.ttl.bz2"
|
|
17
|
+
CAT_FILES = "bzcat ${RDF_FILES}"
|
|
18
|
+
WITH_TEXT = false
|
|
19
|
+
STXXL_MEMORY = 10
|
|
20
|
+
SETTINGS_JSON = '{ "prefixes-external": [ "\"LINESTRING(", "\"MULTIPOLYGON(", "\"POLYGON(" ], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }'
|
|
21
|
+
GET_DATA_CMD = "wget -nc -O ${PBF} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf; rm -f ${DB}.*.bz2; ( time /local/data/osm2rdf/build/apps/osm2rdf ${PBF} -o ${DB}.ttl --cache . --write-geometric-relation-statistics ) 2>&1 | tee ${DB}.osm2rdf-log.txt; rm -f spatial-*"
|
|
22
|
+
DESCRIPTION = "OSM ${COUNTRY^}, dump from $(ls -l --time-style=+%d.%m.%Y ${PBF} 2> /dev/null | cut -d' ' -f6) with ogc:contains"
|
|
23
|
+
|
|
24
|
+
# Server settings
|
|
25
|
+
HOSTNAME = $(hostname -f)
|
|
26
|
+
SERVER_PORT = 7025
|
|
27
|
+
ACCESS_TOKEN = ${DB}_%RANDOM%
|
|
28
|
+
MEMORY_FOR_QUERIES = 20G
|
|
29
|
+
CACHE_MAX_SIZE = 10G
|
|
30
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
|
|
31
|
+
CACHE_MAX_NUM_ENTRIES = 100
|
|
32
|
+
|
|
33
|
+
# QLever binaries
|
|
34
|
+
QLEVER_BIN_DIR = %QLEVER_BIN_DIR%
|
|
35
|
+
USE_DOCKER = true
|
|
36
|
+
QLEVER_DOCKER_IMAGE = adfreiburg/qlever
|
|
37
|
+
QLEVER_DOCKER_CONTAINER = qlever.${DB}
|
|
38
|
+
|
|
39
|
+
# QLever UI
|
|
40
|
+
QLEVERUI_PORT = 7000
|
|
41
|
+
QLEVERUI_DIR = qlever-ui
|
|
42
|
+
QLEVERUI_CONFIG = osm
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Qleverfile for OSM Planet, use with the qlever script (pip install qlever)
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # takes ~50 mins to download .ttl.bz2 file of ~ 300 GB
|
|
4
|
+
# qlever index # takes ~12 hours and ~20 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # takes a few seconds
|
|
6
|
+
#
|
|
7
|
+
# For the OSM data of a single country, do `qlever setup-config osm-country`
|
|
8
|
+
# and edit the Qleverfile to specify the country,
|
|
9
|
+
|
|
10
|
+
[data]
|
|
11
|
+
NAME = osm-planet
|
|
12
|
+
DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
|
|
13
|
+
GET_DATA_CMD = curl --location --fail --continue-at - --remote-time --output ${NAME}.ttl.bz2 ${DATA_URL}
|
|
14
|
+
VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y")
|
|
15
|
+
DESCRIPTION = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
|
|
16
|
+
|
|
17
|
+
[index]
|
|
18
|
+
INPUT_FILES = ${data:NAME}.ttl.bz2
|
|
19
|
+
CAT_INPUT_FILES = lbzcat -f -n 2 ${INPUT_FILES}
|
|
20
|
+
STXXL_MEMORY = 20G
|
|
21
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
|
|
22
|
+
|
|
23
|
+
[server]
|
|
24
|
+
PORT = 7007
|
|
25
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
26
|
+
MEMORY_FOR_QUERIES = 90G
|
|
27
|
+
CACHE_MAX_SIZE = 40G
|
|
28
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 30G
|
|
29
|
+
TIMEOUT = 300s
|
|
30
|
+
|
|
31
|
+
[runtime]
|
|
32
|
+
SYSTEM = docker
|
|
33
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
34
|
+
|
|
35
|
+
[ui]
|
|
36
|
+
UI_CONFIG = osm-planet
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# Qleverfile for PubChem, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .gz files of total size 114 GB; see NOTES 2, 3, 4
|
|
4
|
+
# qlever index # takes ~5 hours and ~20 GB RAM on an AMD Ryzen 9 5900X
|
|
5
|
+
# qlever start # starts the server (a few seconds)
|
|
6
|
+
#
|
|
7
|
+
# IMPORTANT NOTES:
|
|
8
|
+
#
|
|
9
|
+
# NOTE 1: The SPARQL endpoint at https://qlever.cs.uni-freiburg.de/pubchem also
|
|
10
|
+
# contains data from the following ontologies, which are very useful for
|
|
11
|
+
# resolving names of IRIs like `sio:SIO_000008` or `obo:IAO_0000412`, but which
|
|
12
|
+
# are not part of the PubChem RDF data. For the corresponding URLs, see
|
|
13
|
+
# https://github.com/ad-freiburg/qlever/issues/711#issuecomment-1200479401 .
|
|
14
|
+
#
|
|
15
|
+
# bao bfo biopax-level3 chebi cheminf cito dublin_core_terms fabio go iao ncit
|
|
16
|
+
# obi pr ro sio skos so uo
|
|
17
|
+
#
|
|
18
|
+
# NOTE 2: The robots.txt file from https://ftp.ncbi.nlm.nih.gov currently
|
|
19
|
+
# disallows downloading the PubChem RDF data using `wget --recursive` as in the
|
|
20
|
+
# GET_DATA_CMD below. As a workaround, you can write a simple Python script
|
|
21
|
+
# (using `BeautifulSoup` and `urllib.parse`) to scrape the URLs from the HTML
|
|
22
|
+
# pages and download the files individually. This was done for the latest
|
|
23
|
+
# version of https://qlever.cs.uni-freiburg.de/pubchem .
|
|
24
|
+
#
|
|
25
|
+
# NOTE 3: Many of the TTL files have generic prefix definitions in the middle
|
|
26
|
+
# of the file, like @prefix ns23: <http://identifiers.org/biocyc/ARACYC:> .
|
|
27
|
+
# See https://github.com/ad-freiburg/qlever/issues/711#issuecomment-1197113953
|
|
28
|
+
# This is allowed by the standard, but VERY unusual. For use with QLever,
|
|
29
|
+
# convert the TTL files to NT before indexing, see GET_DATA_CMD below.
|
|
30
|
+
#
|
|
31
|
+
# NOTE 4: Many of the files (TTL as well as NT) contain invalid IRIs because
|
|
32
|
+
# spaces and braces are not properly escaped. Here is a simple awk-based script
|
|
33
|
+
# to percent-encode spaces and braces in all IRIs in the NT files:
|
|
34
|
+
#
|
|
35
|
+
# for NTGZ in nt.${DATE}/*.nt.gz; do echo "zcat $NTGZ | sed 's/> />\t/1; s/> />\t/1; s/ \.\$/\t./' | awk 'BEGIN{FS=OFS=\"\t\"} {for (i = 1; i <= 3; i++) if (\$i ~ /^<.*>\$/) { gsub(/ /, \"%20\", \$i); gsub(/\[/, \"%5B\", \$i); gsub(/\]/, \"%5D\", \$i); gsub(/{/, \"%7B\", \$i); gsub(/}/, \"%7D\", \$i); } print }' | sed 's/\t/ /g' | gzip -c > nt.${DATE}.FIXED/$(basename $NTGZ)"; done > fix-nt.commands.txt
|
|
36
|
+
# cat fix-nt.commands.txt | parallel
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
[DEFAULT]
|
|
40
|
+
NAME = pubchem
|
|
41
|
+
DATE = 2024-02-03
|
|
42
|
+
|
|
43
|
+
[data]
|
|
44
|
+
GET_DATA_URL = ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF
|
|
45
|
+
MAKE_GET_DATA_CMD = curl -s ${GET_DATA_URL}/void.ttl | grep -oP '${GET_DATA_URL}/.*?\.ttl\.gz' | grep -v "nbr[23]d" | while read URL; do echo "echo \"Processing $$URL ...\"; curl --silent --remote-time --output ttl.${DATE}/$$(basename $$URL) $$URL && docker run --rm -v $$(pwd)/ttl.${DATE}:/data stain/jena turtle --output=NT /data/$$(basename $$URL) | sed 's/> />\t/1; s/> />\t/1; s/ \.\$$/\t./' | awk 'BEGIN{FS=OFS=\"\t\"} {for (i = 1; i <= 3; i++) if (\$$i ~ /^<.*>\$$/) { gsub(/ /, \"%20\", \$$i); gsub(/\[/, \"%5B\", \$$i); gsub(/\]/, \"%5D\", \$$i); gsub(/{/, \"%7B\", \$$i); gsub(/}/, \"%7D\", \$$i); } print }' | sed 's/\t/ /g' | gzip -c > nt.${DATE}/$$(basename -s .ttl.gz $$URL).nt.gz"; done > pubchem.get-data-cmds.txt
|
|
46
|
+
GET_DATA_CMD = mkdir -p ttl.${DATE} && mkdir -p nt.${DATE} && ${MAKE_GET_DATA_CMD} && cat pubchem.get-data-cmds.txt | parallel --line-buffer
|
|
47
|
+
DESCRIPTION = PubChem RDF from ${GET_DATA_URL}, version ${DATE} (all folders except nbr2d and nbr3d)
|
|
48
|
+
|
|
49
|
+
[index]
|
|
50
|
+
INPUT_FILES = pubchem.additional-ontologies.nt.gz nt.${DATE}/*.nt.gz
|
|
51
|
+
CAT_INPUT_FILES = zcat ${FILE_NAMES}
|
|
52
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
53
|
+
STXXL_MEMORY = 10G
|
|
54
|
+
|
|
55
|
+
[server]
|
|
56
|
+
PORT = 7023
|
|
57
|
+
ACCESS_TOKEN = ${NAME}_310129823
|
|
58
|
+
MEMORY_FOR_QUERIES = 20G
|
|
59
|
+
TIMEOUT = 120s
|
|
60
|
+
|
|
61
|
+
[runtime]
|
|
62
|
+
SYSTEM = docker
|
|
63
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
64
|
+
|
|
65
|
+
[ui]
|
|
66
|
+
UI_CONFIG = pubchem
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Qleverfile for Scientists, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # download .zip file of size 79 MB, uncompressed to 318 MB
|
|
4
|
+
# qlever index # takes ~20 seconds and ~1 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (instant)
|
|
6
|
+
#
|
|
7
|
+
# Also builds a text index for keyword search on the literals AND keyword search
|
|
8
|
+
# in Wikipedia sentences linked to the RDF data; see TEXT_DESCRIPTION below.
|
|
9
|
+
|
|
10
|
+
# Indexer settings
|
|
11
|
+
DB = scientists
|
|
12
|
+
RDF_FILES = "${DB}.nt"
|
|
13
|
+
CAT_FILES = "cat ${RDF_FILES}"
|
|
14
|
+
WITH_TEXT_INDEX = from_text_records_and_literals
|
|
15
|
+
STXXL_MEMORY = 1G
|
|
16
|
+
SETTINGS_JSON = '{ "ascii-prefixes-only": true, "num-triples-per-batch": 100000 }'
|
|
17
|
+
GET_DATA_CMD = "wget https://github.com/ad-freiburg/qlever/raw/master/e2e/scientist-collection.zip && unzip -j scientist-collection.zip && rm -f scientist-collection.zip"
|
|
18
|
+
INDEX_DESCRIPTION = "Scientist collection from QLever's end-to-end test, see https://github.com/ad-freiburg/qlever/tree/master/e2e"
|
|
19
|
+
TEXT_DESCRIPTION = "Literals (use FILTER CONTAINS) and Wikipedia articles (use ql:contains-entity and ql:contains-word)"
|
|
20
|
+
|
|
21
|
+
# Server settings
|
|
22
|
+
HOSTNAME = $(hostname -f)
|
|
23
|
+
SERVER_PORT = 7020
|
|
24
|
+
ACCESS_TOKEN = ${DB}_%RANDOM%
|
|
25
|
+
MEMORY_FOR_QUERIES = 5G
|
|
26
|
+
CACHE_MAX_SIZE = 2G
|
|
27
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 1G
|
|
28
|
+
CACHE_MAX_NUM_ENTRIES = 100
|
|
29
|
+
|
|
30
|
+
# QLever binaries
|
|
31
|
+
QLEVER_BIN_DIR = %QLEVER_BIN_DIR%
|
|
32
|
+
USE_DOCKER = true
|
|
33
|
+
QLEVER_DOCKER_IMAGE = adfreiburg/qlever
|
|
34
|
+
QLEVER_DOCKER_CONTAINER = qlever.scientists
|
|
35
|
+
|
|
36
|
+
# QLever UI
|
|
37
|
+
QLEVERUI_PORT = 7000
|
|
38
|
+
QLEVERUI_DIR = qlever-ui
|
|
39
|
+
QLEVERUI_CONFIG = default
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Qleverfile for UniProt, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # download RDFXML and convert to NT (around 1 TB each)
|
|
4
|
+
# qlever index # takes ~ 1.5 days and ~40 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes a few second)
|
|
6
|
+
#
|
|
7
|
+
# Install packages: sudo apt install -y libxml2-utils parallel xz-utils pv
|
|
8
|
+
# Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
|
|
9
|
+
#
|
|
10
|
+
# Set DATE to the date of the latest release
|
|
11
|
+
#
|
|
12
|
+
# IMPORTANT: Build on SSD, disk space required: ~ 10 T. For running the server,
|
|
13
|
+
# the uniprot.index.???.meta files can be on HDD.
|
|
14
|
+
|
|
15
|
+
[data]
|
|
16
|
+
NAME = uniprot
|
|
17
|
+
DATE = 2024-01-24
|
|
18
|
+
DOWNLOAD_URL = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
|
|
19
|
+
GET_RDFXML_CMD = mkdir -p rdf.${DATE} && curl -s ${DOWNLOAD_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" - | while read URL; do wget --no-verbose -P rdf.${DATE} $$URL 2>&1 | tee -a uniprot.download-log; done
|
|
20
|
+
RDFXML2NT_CMD = mkdir -p nt.${DATE} && for RDFXML in rdf.${DATE}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=nt 2> /dev/null | xz -c > nt.${DATE}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/nt.xz/') && echo 'DONE converting $$RDFXML'"; done | parallel
|
|
21
|
+
GET_DATA_CMD = rdfxml --help && date > ${NAME}.get-data.begin-date && ${GET_RDFXML_CMD} && ${RDFXML2NT_CMD} && date > ${NAME}.get-data.end-date
|
|
22
|
+
DESCRIPTION = Complete UniProt data from ${DOWNLOAD_URL}, version ${DATE}
|
|
23
|
+
|
|
24
|
+
[index]
|
|
25
|
+
INPUT_FILES = nt.${data:DATE}/*.nt.xz
|
|
26
|
+
CAT_INPUT_FILES = parallel --tmpdir . -j 4 'xzcat -f {}' ::: nt.${data:DATE}/*.nt.xz | pv -q -B 5G
|
|
27
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
|
|
28
|
+
STXXL_MEMORY = 60G
|
|
29
|
+
|
|
30
|
+
[server]
|
|
31
|
+
PORT = 7018
|
|
32
|
+
ACCESS_TOKEN = ${data:NAME}_1369924040
|
|
33
|
+
MEMORY_FOR_QUERIES = 20G
|
|
34
|
+
CACHE_MAX_SIZE = 10G
|
|
35
|
+
|
|
36
|
+
[runtime]
|
|
37
|
+
SYSTEM = docker
|
|
38
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
39
|
+
|
|
40
|
+
[ui]
|
|
41
|
+
UI_CONFIG = uniprot
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Qleverfile for VVZ, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # this requires a separate internal tool
|
|
4
|
+
# qlever index # builds the index (takes a few seconds)
|
|
5
|
+
# qlever start # starts the server (takes a few seconds)
|
|
6
|
+
#
|
|
7
|
+
# Also builds a text index for fast kewyword search in literals.
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = vvz
|
|
11
|
+
GET_DATA_CMD = echo "This requires a separate tool"
|
|
12
|
+
DESCRIPTION = VVZ Uni Freiburg, selected faculties
|
|
13
|
+
TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
|
|
14
|
+
|
|
15
|
+
[index]
|
|
16
|
+
INPUT_FILES = vvz.ttl
|
|
17
|
+
CAT_INPUT_FILES = cat ${FILE_NAMES}
|
|
18
|
+
SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 1000000 }
|
|
19
|
+
TEXT_INDEX = from_literals
|
|
20
|
+
|
|
21
|
+
[server]
|
|
22
|
+
PORT = 7041
|
|
23
|
+
ACCESS_TOKEN = ${data:NAME}_8736426534
|
|
24
|
+
MEMORY_FOR_QUERIES = 10G
|
|
25
|
+
|
|
26
|
+
[runtime]
|
|
27
|
+
SYSTEM = docker
|
|
28
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
29
|
+
|
|
30
|
+
[ui]
|
|
31
|
+
UI_CONFIG = vvz
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Qleverfile for Wikidata, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data downloads two .bz2 files of total size ~100 GB
|
|
4
|
+
# qlever index takes ~7 hours and ~40 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start starts the server (takes around 30 seconds)
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = wikidata
|
|
9
|
+
GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
|
|
10
|
+
GET_DATA_CMD = curl -LO -C - ${GET_DATA_URL}/latest-all.ttl.bz2 ${GET_DATA_URL}/latest-lexemes.ttl.bz2
|
|
11
|
+
DESCRIPTION = "Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2)"
|
|
12
|
+
|
|
13
|
+
[index]
|
|
14
|
+
INPUT_FILES = latest-lexemes.ttl.bz2 latest-all.ttl.bz2
|
|
15
|
+
CAT_INPUT_FILES = bzcat ${FILE_NAMES}
|
|
16
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
|
|
17
|
+
STXXL_MEMORY = 10G
|
|
18
|
+
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7001
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}_372483264
|
|
22
|
+
MEMORY_FOR_QUERIES = 20G
|
|
23
|
+
CACHE_MAX_SIZE = 10G
|
|
24
|
+
|
|
25
|
+
[runtime]
|
|
26
|
+
SYSTEM = docker
|
|
27
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
28
|
+
|
|
29
|
+
[ui]
|
|
30
|
+
UI_CONFIG = wikidata
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Qleverfile for WikiPathways, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .gz file of size ~100 MB (as of 24.02.2024)
|
|
4
|
+
# qlever index # takes ~20 seconds and little RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes around 2 minutes)
|
|
6
|
+
#
|
|
7
|
+
# Limitations: does not include the ontologies (WP, GPML, ChEBI, PW, CLO, ...) yet
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = wikipathways
|
|
11
|
+
RELEASE = 20231210
|
|
12
|
+
GET_DATA_URL = https://data.wikipathways.org/${RELEASE}/rdf
|
|
13
|
+
GET_DATA_CMD = wget -O wikipathways-rdf-void.ttl ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-void.ttl && \
|
|
14
|
+
wget ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-wp.zip && \
|
|
15
|
+
unzip -qq -c wikipathways-${RELEASE}-rdf-wp.zip -x wp/wpOntology.ttl > wikipathways-rdf-wp.ttl && \
|
|
16
|
+
wget ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-gpml.zip &&
|
|
17
|
+
unzip -qq -c wikipathways-${RELEASE}-rdf-gpml.zip -x gpml/gpmlOntology.ttl > wikipathways-rdf-gpml.ttl && \
|
|
18
|
+
wget ${GET_DATA_URL}/wikipathways-${RELEASE}-rdf-authors.zip && \
|
|
19
|
+
unzip -qq -c wikipathways-${RELEASE}-rdf-authors.zip > wikipathways-rdf-authors.ttl && \
|
|
20
|
+
cat wikipathways-rdf-*.ttl | grep ^@prefix | tr -s ' ' | sort -u > ${NAME}.prefix-definitions
|
|
21
|
+
DESCRIPTION = WikiPathways RDF, from ${GET_DATA_URL}
|
|
22
|
+
TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
|
|
23
|
+
|
|
24
|
+
[index]
|
|
25
|
+
INPUT_FILES = ${data:NAME}.prefix-definitions wikipathways-rdf-wp.ttl wikipathways-rdf-gpml.ttl wikipathways-rdf-void.ttl wikipathways-rdf-authors.ttl
|
|
26
|
+
CAT_INPUT_FILES = cat ${FILE_NAMES}
|
|
27
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
|
|
28
|
+
TEXT_INDEX = from_literals
|
|
29
|
+
|
|
30
|
+
[server]
|
|
31
|
+
PORT = 7040
|
|
32
|
+
ACCESS_TOKEN = ${data:NAME}_7643543846
|
|
33
|
+
MEMORY_FOR_QUERIES = 5G
|
|
34
|
+
|
|
35
|
+
[runtime]
|
|
36
|
+
SYSTEM = docker
|
|
37
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
38
|
+
|
|
39
|
+
[ui]
|
|
40
|
+
UI_CONFIG = wikipathways
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Qleverfile for YAGO 4, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads 8 nt.gz file of size ~60 GB (as of 12.03.2020)
|
|
4
|
+
# qlever index # takes ~4 hours and ~10 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server
|
|
6
|
+
|
|
7
|
+
# NOTE concerning GET_DATA_CMD: The triples from wd-annotated-facts are
|
|
8
|
+
# contained in wd-facts. The "full types" are the YAGO types, the "simple
|
|
9
|
+
# types" are the schema.org types. They don't interfere with each other because
|
|
10
|
+
# they have distinct prefixes.
|
|
11
|
+
|
|
12
|
+
[data]
|
|
13
|
+
NAME = yago-4
|
|
14
|
+
GET_DATA_CMD = curl --location --continue-at - --remote-name-all https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-class.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-facts.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-full-types.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-labels.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-sameAs.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-schema.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-shapes.nt.gz https://yago-knowledge.org/data/yago4/full/2020-02-24/yago-wd-simple-types.nt.gz
|
|
15
|
+
DESCRIPTION = "Full dump from https://yago-knowledge.org/downloads/yago-4, version 12.03.2020"
|
|
16
|
+
|
|
17
|
+
[index]
|
|
18
|
+
INPUT_FILES = yago-wd-*.nt.gz
|
|
19
|
+
CAT_INPUT_FILES = zcat ${FILE_NAMES}
|
|
20
|
+
SETTINGS_JSON = { "languages-internal": ["en"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
|
|
21
|
+
STXXL_MEMORY = 10G
|
|
22
|
+
|
|
23
|
+
[server]
|
|
24
|
+
PORT = 9004
|
|
25
|
+
ACCESS_TOKEN = ${DB}_2347348732
|
|
26
|
+
MEMORY_FOR_QUERIES = 30G
|
|
27
|
+
|
|
28
|
+
[runtime]
|
|
29
|
+
SYSTEM = docker
|
|
30
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
31
|
+
|
|
32
|
+
[ui]
|
|
33
|
+
UI_CONFIG = yago-4
|
qlever/config.py
CHANGED
|
@@ -4,6 +4,7 @@ import argparse
|
|
|
4
4
|
import os
|
|
5
5
|
import traceback
|
|
6
6
|
from pathlib import Path
|
|
7
|
+
from importlib.metadata import version
|
|
7
8
|
|
|
8
9
|
import argcomplete
|
|
9
10
|
|
|
@@ -180,6 +181,8 @@ class QleverConfig:
|
|
|
180
181
|
# are defined in the modules in `qlever/commands`. In `__init__.py`
|
|
181
182
|
# an object of each class is created and stored in `command_objects`.
|
|
182
183
|
parser = argparse.ArgumentParser()
|
|
184
|
+
parser.add_argument("--version", action="version",
|
|
185
|
+
version=f"%(prog)s {version('qlever')}")
|
|
183
186
|
add_qleverfile_option(parser)
|
|
184
187
|
subparsers = parser.add_subparsers(dest='command')
|
|
185
188
|
subparsers.required = True
|
qlever/util.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import
|
|
3
|
+
import secrets
|
|
4
4
|
import re
|
|
5
5
|
import shlex
|
|
6
6
|
import shutil
|
|
@@ -93,7 +93,7 @@ def get_curl_cmd_for_sparql_query(
|
|
|
93
93
|
"""
|
|
94
94
|
Get curl command for given SPARQL query.
|
|
95
95
|
"""
|
|
96
|
-
curl_cmd = (f"curl -s http
|
|
96
|
+
curl_cmd = (f"curl -s http://{host}:{port}"
|
|
97
97
|
f" -H \"Accept: {media_type}\" "
|
|
98
98
|
f" --data-urlencode query={shlex.quote(query)}")
|
|
99
99
|
if pinresult and access_token is not None:
|
|
@@ -137,8 +137,9 @@ def show_process_info(psutil_process, cmdline_regex, show_heading=True):
|
|
|
137
137
|
pinfo = psutil_process.as_dict(
|
|
138
138
|
attrs=['pid', 'username', 'create_time',
|
|
139
139
|
'memory_info', 'cmdline'])
|
|
140
|
-
|
|
141
|
-
|
|
140
|
+
# Note: pinfo[`cmdline`] is `None` if the process is a zombie.
|
|
141
|
+
cmdline = " ".join(pinfo['cmdline'] or [])
|
|
142
|
+
if len(cmdline) == 0 or not re.search(cmdline_regex, cmdline):
|
|
142
143
|
return False
|
|
143
144
|
pid = pinfo['pid']
|
|
144
145
|
user = pinfo['username'] if pinfo['username'] else ""
|
|
@@ -162,6 +163,5 @@ def get_random_string(length: int) -> str:
|
|
|
162
163
|
Helper function that returns a randomly chosen string of the given
|
|
163
164
|
length. Take the current time as seed.
|
|
164
165
|
"""
|
|
165
|
-
|
|
166
|
-
return "".join(
|
|
167
|
-
k=length))
|
|
166
|
+
characters = string.ascii_letters + string.digits
|
|
167
|
+
return "".join(secrets.choice(characters) for _ in range(length))
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: qlever
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Script for using the QLever SPARQL engine.
|
|
5
5
|
Author-email: Hannah Bast <bast@cs.uni-freiburg.de>
|
|
6
6
|
License: Apache License
|
|
@@ -214,6 +214,7 @@ Description-Content-Type: text/markdown
|
|
|
214
214
|
License-File: LICENSE
|
|
215
215
|
Requires-Dist: psutil
|
|
216
216
|
Requires-Dist: termcolor
|
|
217
|
+
Requires-Dist: argcomplete
|
|
217
218
|
|
|
218
219
|
# QLever
|
|
219
220
|
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
qlever/__init__.py,sha256=IyfS1OhlVE7-rjtv6FPlL0R56VxcNsS6KS7NJQhTDIM,1367
|
|
2
|
+
qlever/__main__.py,sha256=MqM37bEzQeJEGUXZvuLcilIvnObZiG2eTGIkfKGpdnw,62016
|
|
3
|
+
qlever/command.py,sha256=yOr0Uc8D8-AM7EjwDsVzbc3KNYjPH-FVOZhIHkqO588,2749
|
|
4
|
+
qlever/config.py,sha256=-jjHAL8jdp25v53SqXKP4gWip6Qw9OdlDvFN6X7uk_4,10184
|
|
5
|
+
qlever/containerize.py,sha256=p8g3O3G8a_0XLzSTzl_e5t9dqjbCQ-ippoA8vI2Z9pI,4193
|
|
6
|
+
qlever/log.py,sha256=k9Mq4hxQ_d2k0e-5ZVgcB2XIRhOsGMO9I3rIR7YQyDA,1376
|
|
7
|
+
qlever/qlever_main.py,sha256=k8vIQYK7zqObFNet11iLf--nrLdPooL5amprmlySi4k,2300
|
|
8
|
+
qlever/qleverfile.py,sha256=6Ll81xkzel_s2Ju9ZfBXUGlRfikaAzZM6Do-dTrdo3k,12934
|
|
9
|
+
qlever/util.py,sha256=dwqtpY14P3ds_PYx5bgqus_nsx_BhPQzUSa0Z86ONdo,6236
|
|
10
|
+
qlever/Qleverfiles/Qleverfile.dblp,sha256=SFjBD20aOSWod4mEQnxHSDWdInoE_EFp2nyMw7ev7ZA,1167
|
|
11
|
+
qlever/Qleverfiles/Qleverfile.dblp-plus,sha256=Dwd9pK1vPcelKfw6sA-IuyhbZ6yIxOh6_84JgPYnB9Q,1332
|
|
12
|
+
qlever/Qleverfiles/Qleverfile.default,sha256=mljl6I1RCkpIWOqMQwjzPZIsarYQx1R0mIlc583KuqU,1869
|
|
13
|
+
qlever/Qleverfiles/Qleverfile.dnb,sha256=yw4MmLsDPP3P5JWPgJwgPJh66TqwkyUXbQR5lSf5oHc,1511
|
|
14
|
+
qlever/Qleverfiles/Qleverfile.fbeasy,sha256=jeztW4gFpWL_w1nCH5qGHeZyZv2lz_kG6f1G3r3DkJ4,974
|
|
15
|
+
qlever/Qleverfiles/Qleverfile.freebase,sha256=k6PqYrtHTBr0EydObm1Hg9QWyAAM9fXkdcjhReDg0fM,1035
|
|
16
|
+
qlever/Qleverfiles/Qleverfile.imdb,sha256=uL5XlPwX01AmH-j6_Bc-PRm2fuPxGSIu8NaDflY525U,1623
|
|
17
|
+
qlever/Qleverfiles/Qleverfile.olympics,sha256=5w9BOFwEBhdSzPz-0LRxwhv-7Gj6xbF539HOXr3cqD0,1088
|
|
18
|
+
qlever/Qleverfiles/Qleverfile.osm-country,sha256=UnlkckSXJDrknZORlU-Hdj_J82U4kStl1aRctCc5n6M,1953
|
|
19
|
+
qlever/Qleverfiles/Qleverfile.osm-planet,sha256=2RilNix0fplN3GsNNyOu3GzmUss1Pq7586WKOFAQnSs,1400
|
|
20
|
+
qlever/Qleverfiles/Qleverfile.pubchem,sha256=bOhiJKUxzDiAm1UyXFPDQLYTqGc9jM8240fhobYLij0,3898
|
|
21
|
+
qlever/Qleverfiles/Qleverfile.scientists,sha256=oFhzURcRFciA27GZ-ux_hsDe0esBLobWHC6h_Vf2xy8,1735
|
|
22
|
+
qlever/Qleverfiles/Qleverfile.uniprot,sha256=FS8QLHvujbjUYyU2Ma0PRgfCWlulviaGLc_1csxpuic,2201
|
|
23
|
+
qlever/Qleverfiles/Qleverfile.vvz,sha256=ftdMj5dCC9jAlFtNt2WR7kP30w0itT_iYtj5HoUVyWU,931
|
|
24
|
+
qlever/Qleverfiles/Qleverfile.wikidata,sha256=fhWSChZTH3c2y14kgP1P5Duq1SsewTOK3wETf6RRmI8,1172
|
|
25
|
+
qlever/Qleverfiles/Qleverfile.wikipathways,sha256=qWjfT-CVQCgRfN6fXPwBORMbjzXS_xsJ2DoCamQI7Rs,2045
|
|
26
|
+
qlever/Qleverfiles/Qleverfile.yago-4,sha256=GikYPqChCtbAyZOVqszmVUwgQxSePTcgM8xw2b_21e4,1849
|
|
27
|
+
qlever/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
qlever/commands/add_text_index.py,sha256=dkqYtwgOhgnXiei_eyhBWYCtdAiQUEmjWoa3JMlMb4c,3641
|
|
29
|
+
qlever/commands/cache_stats.py,sha256=6JjueQstAqc8dNfgY8TP2EitFMxdUvCwrcyd7KUEb2o,4157
|
|
30
|
+
qlever/commands/clear_cache.py,sha256=AnE1MOoj1ZexxrRT8FGeBLlv8rtQIVV4DP8VBn5-X-s,2843
|
|
31
|
+
qlever/commands/example_queries.py,sha256=3jlfHyL7pw1OSTuu3fY-23XaRAPIuEdNGW8QnIY2Va8,8644
|
|
32
|
+
qlever/commands/get_data.py,sha256=0fGuRLDB7YofHtpqk0ctq9_de_xeuliSmSZafGXAo1A,1470
|
|
33
|
+
qlever/commands/index.py,sha256=lJhDnweknFZQm1czqPzNyz33EvbjIvOrS4j0wDaJ98o,5663
|
|
34
|
+
qlever/commands/index_stats.py,sha256=ao7_ySyz8MAjUvCbEp3Kj30PsR5x3MBM3ohgEUWdALM,11083
|
|
35
|
+
qlever/commands/log.py,sha256=8Krt3MsTUDapYqVw1zUu5X15SF8mV97Uj0qKOWK8jXk,1861
|
|
36
|
+
qlever/commands/setup_config.py,sha256=mFkEtCPZ6oeVfehjVLrcLttYcPDgtwXHrNIWWzvHOfo,2928
|
|
37
|
+
qlever/commands/start.py,sha256=2rOtk3NmhEs28D5csL_a1BdjSWU9VkcH6AqYT0vdww0,9285
|
|
38
|
+
qlever/commands/status.py,sha256=5S6EdapZEwFKV9cQZtNYcZhMbAXAY-FP6ggjIhfX8ek,1631
|
|
39
|
+
qlever/commands/stop.py,sha256=TZs4bxKHvujlZAU8BZmFjA5eXSZNAa6EeNzvPpEZsuI,4139
|
|
40
|
+
qlever/commands/ui.py,sha256=rV8u017WLbfz0zVT_c9GC4d9v1WWwrTM3kfGONbeCvQ,2499
|
|
41
|
+
qlever/commands/warmup.py,sha256=WOZSxeV8U_F6pEEnAb6YybXLQMxZFTRJXs4BPHUhsmc,1030
|
|
42
|
+
qlever-0.4.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
43
|
+
qlever-0.4.1.dist-info/METADATA,sha256=GkXf_oneu0Oe02UOPR8OvqVzxDNA-ljS6yPGLi2x_Bk,17076
|
|
44
|
+
qlever-0.4.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
45
|
+
qlever-0.4.1.dist-info/entry_points.txt,sha256=s0iWBHKRUzsJ7B6nVGiyMdOJtiOS84IJMSSxgbNU6LU,85
|
|
46
|
+
qlever-0.4.1.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
|
|
47
|
+
qlever-0.4.1.dist-info/RECORD,,
|
qlever-0.4.0.dist-info/RECORD
DELETED
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
qlever/__init__.py,sha256=IyfS1OhlVE7-rjtv6FPlL0R56VxcNsS6KS7NJQhTDIM,1367
|
|
2
|
-
qlever/__main__.py,sha256=MqM37bEzQeJEGUXZvuLcilIvnObZiG2eTGIkfKGpdnw,62016
|
|
3
|
-
qlever/command.py,sha256=yOr0Uc8D8-AM7EjwDsVzbc3KNYjPH-FVOZhIHkqO588,2749
|
|
4
|
-
qlever/config.py,sha256=LOVW8alFCVgZz_GAWm7vnjZVMVE7m3QTecy34lHgjGE,10017
|
|
5
|
-
qlever/containerize.py,sha256=p8g3O3G8a_0XLzSTzl_e5t9dqjbCQ-ippoA8vI2Z9pI,4193
|
|
6
|
-
qlever/log.py,sha256=k9Mq4hxQ_d2k0e-5ZVgcB2XIRhOsGMO9I3rIR7YQyDA,1376
|
|
7
|
-
qlever/qlever_main.py,sha256=k8vIQYK7zqObFNet11iLf--nrLdPooL5amprmlySi4k,2300
|
|
8
|
-
qlever/qleverfile.py,sha256=6Ll81xkzel_s2Ju9ZfBXUGlRfikaAzZM6Do-dTrdo3k,12934
|
|
9
|
-
qlever/util.py,sha256=WM09PMRffUoPpEse4VwK9BzUavFkaB2Bm8KfVWxC3sQ,6161
|
|
10
|
-
qlever/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
|
-
qlever/commands/add_text_index.py,sha256=dkqYtwgOhgnXiei_eyhBWYCtdAiQUEmjWoa3JMlMb4c,3641
|
|
12
|
-
qlever/commands/cache_stats.py,sha256=6JjueQstAqc8dNfgY8TP2EitFMxdUvCwrcyd7KUEb2o,4157
|
|
13
|
-
qlever/commands/clear_cache.py,sha256=AnE1MOoj1ZexxrRT8FGeBLlv8rtQIVV4DP8VBn5-X-s,2843
|
|
14
|
-
qlever/commands/example_queries.py,sha256=3jlfHyL7pw1OSTuu3fY-23XaRAPIuEdNGW8QnIY2Va8,8644
|
|
15
|
-
qlever/commands/get_data.py,sha256=0fGuRLDB7YofHtpqk0ctq9_de_xeuliSmSZafGXAo1A,1470
|
|
16
|
-
qlever/commands/index.py,sha256=lJhDnweknFZQm1czqPzNyz33EvbjIvOrS4j0wDaJ98o,5663
|
|
17
|
-
qlever/commands/index_stats.py,sha256=ao7_ySyz8MAjUvCbEp3Kj30PsR5x3MBM3ohgEUWdALM,11083
|
|
18
|
-
qlever/commands/log.py,sha256=8Krt3MsTUDapYqVw1zUu5X15SF8mV97Uj0qKOWK8jXk,1861
|
|
19
|
-
qlever/commands/setup_config.py,sha256=mFkEtCPZ6oeVfehjVLrcLttYcPDgtwXHrNIWWzvHOfo,2928
|
|
20
|
-
qlever/commands/start.py,sha256=2rOtk3NmhEs28D5csL_a1BdjSWU9VkcH6AqYT0vdww0,9285
|
|
21
|
-
qlever/commands/status.py,sha256=5S6EdapZEwFKV9cQZtNYcZhMbAXAY-FP6ggjIhfX8ek,1631
|
|
22
|
-
qlever/commands/stop.py,sha256=TZs4bxKHvujlZAU8BZmFjA5eXSZNAa6EeNzvPpEZsuI,4139
|
|
23
|
-
qlever/commands/ui.py,sha256=rV8u017WLbfz0zVT_c9GC4d9v1WWwrTM3kfGONbeCvQ,2499
|
|
24
|
-
qlever/commands/warmup.py,sha256=WOZSxeV8U_F6pEEnAb6YybXLQMxZFTRJXs4BPHUhsmc,1030
|
|
25
|
-
qlever-0.4.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
26
|
-
qlever-0.4.0.dist-info/METADATA,sha256=DuPh4u9Ukjt3-z31WK0mb_zj2OUV6bHnVLn1ESY7Gc0,17049
|
|
27
|
-
qlever-0.4.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
28
|
-
qlever-0.4.0.dist-info/entry_points.txt,sha256=s0iWBHKRUzsJ7B6nVGiyMdOJtiOS84IJMSSxgbNU6LU,85
|
|
29
|
-
qlever-0.4.0.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
|
|
30
|
-
qlever-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|