qlever 0.2.5__py3-none-any.whl → 0.5.41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qlever/Qleverfiles/Qleverfile.dblp +36 -0
- qlever/Qleverfiles/Qleverfile.dblp-plus +33 -0
- qlever/Qleverfiles/Qleverfile.dbpedia +30 -0
- qlever/Qleverfiles/Qleverfile.default +51 -0
- qlever/Qleverfiles/Qleverfile.dnb +40 -0
- qlever/Qleverfiles/Qleverfile.fbeasy +29 -0
- qlever/Qleverfiles/Qleverfile.freebase +28 -0
- qlever/Qleverfiles/Qleverfile.imdb +36 -0
- qlever/Qleverfiles/Qleverfile.ohm-planet +41 -0
- qlever/Qleverfiles/Qleverfile.olympics +31 -0
- qlever/Qleverfiles/Qleverfile.orkg +30 -0
- qlever/Qleverfiles/Qleverfile.osm-country +39 -0
- qlever/Qleverfiles/Qleverfile.osm-planet +39 -0
- qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf +42 -0
- qlever/Qleverfiles/Qleverfile.pubchem +131 -0
- qlever/Qleverfiles/Qleverfile.scientists +29 -0
- qlever/Qleverfiles/Qleverfile.uniprot +74 -0
- qlever/Qleverfiles/Qleverfile.vvz +31 -0
- qlever/Qleverfiles/Qleverfile.wikidata +42 -0
- qlever/Qleverfiles/Qleverfile.wikipathways +40 -0
- qlever/Qleverfiles/Qleverfile.yago-4 +33 -0
- qlever/__init__.py +44 -1380
- qlever/command.py +87 -0
- qlever/commands/__init__.py +0 -0
- qlever/commands/add_text_index.py +115 -0
- qlever/commands/benchmark_queries.py +1019 -0
- qlever/commands/cache_stats.py +125 -0
- qlever/commands/clear_cache.py +88 -0
- qlever/commands/extract_queries.py +120 -0
- qlever/commands/get_data.py +48 -0
- qlever/commands/index.py +333 -0
- qlever/commands/index_stats.py +306 -0
- qlever/commands/log.py +66 -0
- qlever/commands/materialized_view.py +110 -0
- qlever/commands/query.py +142 -0
- qlever/commands/rebuild_index.py +176 -0
- qlever/commands/reset_updates.py +59 -0
- qlever/commands/settings.py +115 -0
- qlever/commands/setup_config.py +97 -0
- qlever/commands/start.py +336 -0
- qlever/commands/status.py +50 -0
- qlever/commands/stop.py +90 -0
- qlever/commands/system_info.py +130 -0
- qlever/commands/ui.py +271 -0
- qlever/commands/update.py +90 -0
- qlever/commands/update_wikidata.py +1204 -0
- qlever/commands/warmup.py +41 -0
- qlever/config.py +223 -0
- qlever/containerize.py +167 -0
- qlever/log.py +55 -0
- qlever/qlever_main.py +79 -0
- qlever/qleverfile.py +530 -0
- qlever/util.py +330 -0
- qlever-0.5.41.dist-info/METADATA +127 -0
- qlever-0.5.41.dist-info/RECORD +59 -0
- {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info}/WHEEL +1 -1
- qlever-0.5.41.dist-info/entry_points.txt +2 -0
- qlever-0.5.41.dist-info/top_level.txt +1 -0
- build/lib/qlever/__init__.py +0 -1383
- build/lib/qlever/__main__.py +0 -4
- qlever/__main__.py +0 -4
- qlever-0.2.5.dist-info/METADATA +0 -277
- qlever-0.2.5.dist-info/RECORD +0 -12
- qlever-0.2.5.dist-info/entry_points.txt +0 -2
- qlever-0.2.5.dist-info/top_level.txt +0 -4
- src/qlever/__init__.py +0 -1383
- src/qlever/__main__.py +0 -4
- {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Qleverfile for DBLP, use with QLever CLI (`pip install qlever`)
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # ~1 min, ~5 GB compressed, 1.3 B triples
|
|
4
|
+
# qlever index # ~30 min, ~20 GB RAM, ~25 GB index size on disk
|
|
5
|
+
# qlever start # ~3 s, adjust MEMORY_FOR_QUERIES as needed
|
|
6
|
+
#
|
|
7
|
+
# Measured on an AMD Ryzen 9 5950X with 128 GB RAM, and NVMe SSD (25.10.2024)
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = dblp
|
|
11
|
+
DATA_TARFILE = dblp_KG_with_associated_data.tar
|
|
12
|
+
GET_DATA_URL = https://sparql.dblp.org/download/${DATA_TARFILE}
|
|
13
|
+
GET_DATA_CMD = (curl -LROC - ${GET_DATA_URL} && tar -xf ${DATA_TARFILE}) 2>&1 | tee ${NAME}.download-log.txt && rm -f ${DATA_TARFILE}
|
|
14
|
+
VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
|
|
15
|
+
DESCRIPTION = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL}, version ${VERSION}
|
|
16
|
+
FORMAT = ttl
|
|
17
|
+
|
|
18
|
+
[index]
|
|
19
|
+
INPUT_FILES = *.gz
|
|
20
|
+
MULTI_INPUT_JSON = { "cmd": "zcat {}", "for-each": "*.gz" }
|
|
21
|
+
SETTINGS_JSON = { "num-triples-per-batch": 5000000 }
|
|
22
|
+
STXXL_MEMORY = 5G
|
|
23
|
+
|
|
24
|
+
[server]
|
|
25
|
+
PORT = 7015
|
|
26
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
27
|
+
MEMORY_FOR_QUERIES = 10G
|
|
28
|
+
CACHE_MAX_SIZE = 5G
|
|
29
|
+
TIMEOUT = 300s
|
|
30
|
+
|
|
31
|
+
[runtime]
|
|
32
|
+
SYSTEM = docker
|
|
33
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
34
|
+
|
|
35
|
+
[ui]
|
|
36
|
+
UI_CONFIG = dblp
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Qleverfile for DBLP Plus, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data downloads .gz file of size ~3 GB (as of 31.07.2022)
|
|
4
|
+
# qlever index takes ~30 minutes and ~20 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start starts the server
|
|
6
|
+
#
|
|
7
|
+
# Also builds a text index for fast kewyword search in literals.
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = dblp-plus
|
|
11
|
+
GET_DATA_CMD = wget -nc -O dblp.ttl.gz https://dblp.org/rdf/dblp.ttl.gz
|
|
12
|
+
DESCRIPTION = Publication data from https://dblp.org, with affiliations from https://www.wikidata.org and citations from https://opencitations.net
|
|
13
|
+
TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
|
|
14
|
+
|
|
15
|
+
[index]
|
|
16
|
+
INPUT_FILES = dblp.ttl.gz affiliations.nt affiliations.additions.nt citations.nt
|
|
17
|
+
CAT_INPUT_FILES = zcat -f ${INPUT_FILES}
|
|
18
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [ "<https://w3id.org", "<https://doi.org", "<http://dx.doi.org" ] }
|
|
19
|
+
TEXT_INDEX = from_literals
|
|
20
|
+
|
|
21
|
+
[server]
|
|
22
|
+
PORT = 7027
|
|
23
|
+
ACCESS_TOKEN = ${data:NAME}_169238202
|
|
24
|
+
MEMORY_FOR_QUERIES = 20G
|
|
25
|
+
CACHE_MAX_SIZE = 10G
|
|
26
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 2G
|
|
27
|
+
|
|
28
|
+
[runtime]
|
|
29
|
+
SYSTEM = docker
|
|
30
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
31
|
+
|
|
32
|
+
[ui]
|
|
33
|
+
UI_CONFIG = dblp-plus
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Qleverfile for DBpedia, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # ~14 GB, ~850 M triples (as of 30.07.2024)
|
|
4
|
+
# qlever index # ~20 min (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # ~3 sec
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = dbpedia
|
|
9
|
+
DATABUS_URL = https://databus.dbpedia.org/dbpedia/collections/latest-core
|
|
10
|
+
GET_DATA_CMD = curl -X POST -H "Accept: text/csv" --data-urlencode "query=$$(curl -s -H "Accept:text/sparql" https://databus.dbpedia.org/dbpedia/collections/latest-core)" https://databus.dbpedia.org/sparql | tail -n+2 | sed 's/\r$$//' | sed 's/"//g' | while read -r file; do wget -P rdf-input $$file; done
|
|
11
|
+
DESCRIPTION = RDF data from ${DATABUS_URL}
|
|
12
|
+
|
|
13
|
+
[index]
|
|
14
|
+
INPUT_FILES = rdf-input/*
|
|
15
|
+
CAT_INPUT_FILES = (cat rdf-input/*.nt; lbzcat -n2 rdf-input/*.bzip2 rdf-input/*.bz2)
|
|
16
|
+
SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
|
|
17
|
+
WITH_TEXT_INDEX = false
|
|
18
|
+
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7012
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
|
+
MEMORY_FOR_QUERIES = 10G
|
|
23
|
+
CACHE_MAX_SIZE = 5G
|
|
24
|
+
|
|
25
|
+
[runtime]
|
|
26
|
+
SYSTEM = docker
|
|
27
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
28
|
+
|
|
29
|
+
[ui]
|
|
30
|
+
UI_CONFIG = dbpedia
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Default Qleverfile, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# If you have never seen a Qleverfile before, we recommend that you first look
|
|
4
|
+
# at the example Qleverfiles on http://qlever.cs.uni-freiburg.de/qlever-control/
|
|
5
|
+
# src/qlever/Qleverfiles . Or execute `qlever setup-config <dataset>` on the
|
|
6
|
+
# command line to obtain the example Qleverfiles for <dataset>.
|
|
7
|
+
|
|
8
|
+
# As a minimum, each dataset needs a name. If you want `qlever get-data` to do
|
|
9
|
+
# something meaningful, you need to define GET_DATA_CMD. Otherwise, you need to
|
|
10
|
+
# generate (or download or copy from somewhere) the input files yourself. Each
|
|
11
|
+
# dataset should have a short DESCRIPTION, ideally with a date.
|
|
12
|
+
[data]
|
|
13
|
+
NAME =
|
|
14
|
+
GET_DATA_CMD =
|
|
15
|
+
DESCRIPTION =
|
|
16
|
+
|
|
17
|
+
# The format for INPUT_FILES should be such that `ls ${INPUT_FILES}` lists all
|
|
18
|
+
# input files. CAT_INPUT_FILES should write a concatenation of all input files
|
|
19
|
+
# to stdout. For example, if your input files are gzipped, you can write `zcat
|
|
20
|
+
# ${INPUT_FILES}`. Regarding SETTINGS_JSON, look at the other Qleverfiles for
|
|
21
|
+
# examples. Several batches of size `num-triples-per-batch` are kept in RAM at
|
|
22
|
+
# the same time; increasing this, increases the memory usage but speeds up the
|
|
23
|
+
# loading process.
|
|
24
|
+
[index]
|
|
25
|
+
INPUT_FILES = *.ttl
|
|
26
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
27
|
+
SETTINGS_JSON = { "num-triples-per-batch": 1000000 }
|
|
28
|
+
|
|
29
|
+
# The server listens on PORT. If you want to send privileged commands to the
|
|
30
|
+
# server, you need to specify an ACCESS_TOKEN, which you then have to set via a
|
|
31
|
+
# URL parameter `access_token`. It should not be easily guessable, unless you
|
|
32
|
+
# don't mind others to get privileged access to your server.
|
|
33
|
+
[server]
|
|
34
|
+
PORT = 8888
|
|
35
|
+
ACCESS_TOKEN =
|
|
36
|
+
|
|
37
|
+
# Use SYSTEM = docker to run QLever inside a docker container; the Docker image
|
|
38
|
+
# will be downloaded automatically. Use SYSTEM = native to use self-compiled
|
|
39
|
+
# binaries `IndexBuilderMain` and `ServerMain` (which should be in you PATH).
|
|
40
|
+
[runtime]
|
|
41
|
+
SYSTEM = docker
|
|
42
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
43
|
+
|
|
44
|
+
# UI_PORT specifies the port of the QLever UI web app, when you run `qlever ui`.
|
|
45
|
+
# The UI_CONFIG must be one of the slugs from http://qlever.cs.uni-freiburg.de
|
|
46
|
+
# (see the dropdown menu on the top right, the slug is the last part of the URL).
|
|
47
|
+
# It determines the example queries and which SPARQL queries are launched to
|
|
48
|
+
# obtain suggestions as you type a query.
|
|
49
|
+
[ui]
|
|
50
|
+
UI_PORT = 8176
|
|
51
|
+
UI_CONFIG = default
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Qleverfile for Olympics, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # takes ~ 10 mins to download .nt.gz file of size ~ 8 GB
|
|
4
|
+
# qlever index # takes ~ 5 min and ~ 5 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (instantaneous)
|
|
6
|
+
#
|
|
7
|
+
# IMPORTANT: The current files contain invalid floating point literals. To make
|
|
8
|
+
# QLever ignore them, compile QLever with `invalidLiteralsAreSkipped_ = true`
|
|
9
|
+
# in `src/parser/TurtleParserBase.h:55`.
|
|
10
|
+
#
|
|
11
|
+
# NOTE: https://data.dnb.de/opendata/ is rather confusing becase of the many
|
|
12
|
+
# files. This Qleverfile downloads all the datasets named "Gesamtabzug", except
|
|
13
|
+
# bib_lds.nt.gz, which contains incorrectly formatted IRIs. The file
|
|
14
|
+
# dnb-all_ldsprov.nt.gz contains invalid floating point literals; to ignore
|
|
15
|
+
# them, compile QLever with TurtleParserBase::invalidLiteralsAreSkipped_ = true
|
|
16
|
+
|
|
17
|
+
[data]
|
|
18
|
+
NAME = dnb
|
|
19
|
+
BASE_URL = https://data.dnb.de/opendata
|
|
20
|
+
GET_DATA_CMD = curl -L -C - --remote-name-all --remote-time ${BASE_URL}/authorities-gnd_lds.nt.gz ${BASE_URL}/dnb-all_lds.nt.gz ${BASE_URL}/dnb-all_ldsprov.nt.gz ${BASE_URL}/zdb_lds.nt.gz 2>&1 | tee ${data:NAME}.getdata-log.txt
|
|
21
|
+
VERSION = $$(date -r dnb-all_lds.nt.gz +%d.%m.%Y || echo "NO_DATE")
|
|
22
|
+
DESCRIPTION = DNB data from ${BASE_URL} (authoritities-gnd_lds, dnb_all_lds, dnb-all_ldsprov, zdb_lds), version ${VERSION}
|
|
23
|
+
|
|
24
|
+
[index]
|
|
25
|
+
INPUT_FILES = *.nt.gz
|
|
26
|
+
CAT_INPUT_FILES = zcat ${INPUT_FILES} | sed '/"\$$R0"/d;/"0\.03013\$$D"/d'
|
|
27
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
28
|
+
|
|
29
|
+
[server]
|
|
30
|
+
PORT = 7035
|
|
31
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
32
|
+
MEMORY_FOR_QUERIES = 5G
|
|
33
|
+
CACHE_MAX_SIZE = 2G
|
|
34
|
+
|
|
35
|
+
[runtime]
|
|
36
|
+
SYSTEM = docker
|
|
37
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
38
|
+
|
|
39
|
+
[ui]
|
|
40
|
+
UI_CONFIG = dnb
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Qleverfile for Fbeasy, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .gz file of size ~3 GB (as of 31.07.2022)
|
|
4
|
+
# qlever index # takes ~10 minutes and ~10 GB RAM (on an AMD Ryzen 7 5900X)
|
|
5
|
+
# qlever start # starts the server
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = fbeasy
|
|
9
|
+
DATA_URL = https://freebase-easy.cs.uni-freiburg.de
|
|
10
|
+
GET_DATA_CMD = wget -nc ${DATA_URL}/dump/fbeasy.nt
|
|
11
|
+
DESCRIPTION = Freebase Easy, RDF NT from ${DATA_URL}, latest version from 18.07.2019
|
|
12
|
+
TEXT_DESCRIPTION = Sentences from the English Wikipedia that mention at least one Freebase entity
|
|
13
|
+
|
|
14
|
+
[index]
|
|
15
|
+
INPUT_FILES = fbeasy.nt
|
|
16
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
17
|
+
SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
|
|
18
|
+
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7003
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
|
+
MEMORY_FOR_QUERIES = 5G
|
|
23
|
+
|
|
24
|
+
[runtime]
|
|
25
|
+
SYSTEM = docker
|
|
26
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
27
|
+
|
|
28
|
+
[ui]
|
|
29
|
+
UI_CONFIG = fbeasy
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# Qleverfile for Freebase, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .gz file of size ~3 GB (as of 31.07.2022)
|
|
4
|
+
# qlever index # takes ~4 hours and ~20 GB RAM (on an AMD Ryzen 7 5900X)
|
|
5
|
+
# qlever start # starts the server
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = freebase
|
|
9
|
+
DATA_URL = http://commondatastorage.googleapis.com/freebase-public/rdf/freebase-rdf-latest.gz
|
|
10
|
+
GET_DATA_CMD = wget -nc ${DATA_URL}
|
|
11
|
+
DESCRIPTION = Freebase, RDF NT from ${DATA_URL}, latest (and final) version from 09.08.2015
|
|
12
|
+
|
|
13
|
+
[index]
|
|
14
|
+
INPUT_FILES = freebase-rdf-latest.gz
|
|
15
|
+
CAT_INPUT_FILES = zcat ${INPUT_FILES}
|
|
16
|
+
SETTINGS_JSON = { "languages-internal": [ "en" ], "prefixes-external": ["<"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 10000000 }
|
|
17
|
+
|
|
18
|
+
[server]
|
|
19
|
+
PORT = 7002
|
|
20
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
21
|
+
MEMORY_FOR_QUERIES = 10G
|
|
22
|
+
|
|
23
|
+
[runtime]
|
|
24
|
+
SYSTEM = docker
|
|
25
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
26
|
+
|
|
27
|
+
[ui]
|
|
28
|
+
UI_CONFIG = freebase
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Qleverfile for IMDB, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads "basics" and "ratings" of size ~1 GB
|
|
4
|
+
# qlever index # takes ~5 minutes and ~5 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes a few seconds)
|
|
6
|
+
#
|
|
7
|
+
# Supports fast kewyword search in literals (WITH_TEXT_INDEX = from_literals).
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = imdb
|
|
11
|
+
IMDB_DATA_URL = https://datasets.imdbws.com
|
|
12
|
+
GET_PREFIXES = echo "@prefix imdb: <https://www.imdb.com/> ."
|
|
13
|
+
GET_IMDB_BASICS = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\\\\", "\\\\", $$3); gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE}
|
|
14
|
+
GET_IMDB_RATINGS = FILE=title.ratings.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ printf "imdb:%s imdb:averageRating %s ; imdb:numVotes %s .\n", $$1, $$2, $$3 }'; rm -f $${FILE}
|
|
15
|
+
GET_DATA_CMD = (${GET_PREFIXES}; ${GET_IMDB_BASICS}; ${GET_IMDB_RATINGS}) > ${NAME}.ttl
|
|
16
|
+
VERSION = $$(date -r imdb.ttl +"%d.%m.%Y %H:%M" || echo "NO_DATE")
|
|
17
|
+
DESCRIPTION = IMDb, CSV from ${IMDB_DATA_URL}, converted to RDF TTL using awk, version ${VERSION}
|
|
18
|
+
TEXT_DESCRIPTION = All literals, search with [ ql:contains-word "..."; ql:contains-entity ?literal ]
|
|
19
|
+
|
|
20
|
+
[index]
|
|
21
|
+
INPUT_FILES = ${data:NAME}.ttl
|
|
22
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
23
|
+
SETTINGS_JSON = { "num-triples-per-batch": 1000000 }
|
|
24
|
+
TEXT_INDEX = from_literals
|
|
25
|
+
|
|
26
|
+
[server]
|
|
27
|
+
PORT = 7029
|
|
28
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
29
|
+
MEMORY_FOR_QUERIES = 5G
|
|
30
|
+
|
|
31
|
+
[runtime]
|
|
32
|
+
SYSTEM = docker
|
|
33
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
34
|
+
|
|
35
|
+
[ui]
|
|
36
|
+
UI_CONFIG = imdb
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Qleverfile for OpenHistoricalMap, use with the QLever CLI (`pip install qlever`)
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # ~1 hour, ~14 GB (ttl.gz), ~3.4 B triples (with osm2rdf)
|
|
4
|
+
# qlever index # ~1 hour, ~10 GB RAM, ~60 GB index size on disk
|
|
5
|
+
# qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
|
|
6
|
+
#
|
|
7
|
+
# Measured on an AMD Ryzen 9 5900X with 128 GB RAM and 1 x 4 TB NVMe (04.01.2025)
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = ohm-planet
|
|
11
|
+
GET_DATA_URL = https://planet.openhistoricalmap.org/planet
|
|
12
|
+
CHECK_BINARIES = osm2rdf -h > /dev/null || (echo "osm2rdf not found, make sure that it's installed and in your PATH" && exit 1)
|
|
13
|
+
GET_DATA_CMD_1 = unbuffer wget -O ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
|
|
14
|
+
GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --output-compression gz --store-locations=disk-dense --cache . --num-threads 12 --iri-prefix-for-untagged-nodes http://www.openhistoricalmap.org/node/ 2>&1 | tee ${NAME}.osm2rdf-log.txt
|
|
15
|
+
GET_DATA_CMD = ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
|
|
16
|
+
VERSION = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE")
|
|
17
|
+
DESCRIPTION = OHM from ${GET_DATA_URL} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects), version ${VERSION}
|
|
18
|
+
|
|
19
|
+
[index]
|
|
20
|
+
INPUT_FILES = ${data:NAME}.ttl.gz
|
|
21
|
+
MULTI_INPUT_JSON = { "cmd": "zcat ${INPUT_FILES}", "parallel": "true" }
|
|
22
|
+
STXXL_MEMORY = 5G
|
|
23
|
+
PARSER_BUFFER_SIZE = 50M
|
|
24
|
+
SETTINGS_JSON = { "num-triples-per-batch": 5000000 }
|
|
25
|
+
ENCODE_AS_ID = https://www.openhistoricalmap.org/node/ http://www.openhistoricalmap.org/node/ https://www.openhistoricalmap.org/way/ https://www.openhistoricalmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmnode_tagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmnode_untagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmrel_ https://www.openstreetmap.org/changeset/
|
|
26
|
+
|
|
27
|
+
[server]
|
|
28
|
+
PORT = 7037
|
|
29
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
30
|
+
MEMORY_FOR_QUERIES = 10G
|
|
31
|
+
CACHE_MAX_SIZE = 5G
|
|
32
|
+
TIMEOUT = 600s
|
|
33
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 4G
|
|
34
|
+
WARMUP_CMD = curl -s https://qlever.cs.uni-freiburg.de/petrimaps/query --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX osm: <https://www.openstreetmap.org/> SELECT ?osm_id ?geometry WHERE { ?osm_id geo:hasGeometry/geo:asWKT ?geometry . ?osm_id rdf:type osm:node } LIMIT 1" --data-urlencode "backend=https://qlever.cs.uni-freiburg.de/api/${data:NAME}" > /dev/null
|
|
35
|
+
|
|
36
|
+
[runtime]
|
|
37
|
+
SYSTEM = docker
|
|
38
|
+
IMAGE = adfreiburg/qlever:latest
|
|
39
|
+
|
|
40
|
+
[ui]
|
|
41
|
+
CONFIG = ohm-planet
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Qleverfile for Olympics, use with https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads .zip file of size 13 MB, uncompressed to 323 MB
|
|
4
|
+
# qlever index # takes ~10 seconds and ~1 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (instant)
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = olympics
|
|
9
|
+
BASE_URL = https://github.com/wallscope/olympics-rdf
|
|
10
|
+
GET_DATA_CMD = curl -sLo olympics.zip -C - ${BASE_URL}/raw/master/data/olympics-nt-nodup.zip && unzip -q -o olympics.zip && rm olympics.zip
|
|
11
|
+
DESCRIPTION = 120 Years of Olympics, data from ${BASE_URL}
|
|
12
|
+
TEXT_DESCRIPTION = All literals, search with FILTER CONTAINS(?var, "...")
|
|
13
|
+
|
|
14
|
+
[index]
|
|
15
|
+
INPUT_FILES = olympics.nt
|
|
16
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
17
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 100000 }
|
|
18
|
+
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7019
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}_7643543846
|
|
22
|
+
MEMORY_FOR_QUERIES = 5G
|
|
23
|
+
CACHE_MAX_SIZE = 2G
|
|
24
|
+
TIMEOUT = 30s
|
|
25
|
+
|
|
26
|
+
[runtime]
|
|
27
|
+
SYSTEM = docker
|
|
28
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
29
|
+
|
|
30
|
+
[ui]
|
|
31
|
+
UI_CONFIG = olympics
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Qleverfile for ORKG, use with the QLever CLI (`pip install qlever`)
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # Get the dataset
|
|
4
|
+
# qlever index # Build index data structures
|
|
5
|
+
# qlever start # Start the server
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = orkg
|
|
9
|
+
GET_DATA_URL = https://orkg.org/api/rdf/dump
|
|
10
|
+
GET_DATA_CMD = curl -LR -o ${NAME}.ttl ${GET_DATA_URL} 2>&1 | tee ${NAME}.download-log.txt
|
|
11
|
+
VERSION = $$(date -r ${NAME}.ttl +%d.%m.%Y || echo "NO_DATE")
|
|
12
|
+
DESCRIPTION = The Open Research Knowledge Graph (ORKG) (data from ${GET_DATA_URL}, version ${VERSION})
|
|
13
|
+
|
|
14
|
+
[index]
|
|
15
|
+
INPUT_FILES = ${data:NAME}.ttl
|
|
16
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
17
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
|
|
18
|
+
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7053
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
|
+
MEMORY_FOR_QUERIES = 10G
|
|
23
|
+
CACHE_MAX_SIZE = 5G
|
|
24
|
+
|
|
25
|
+
[runtime]
|
|
26
|
+
SYSTEM = docker
|
|
27
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
28
|
+
|
|
29
|
+
[ui]
|
|
30
|
+
UI_CONFIG = orkg
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Qleverfile for OSM of some country, use with `qlever` CLI
|
|
2
|
+
#
|
|
3
|
+
# Make sure that `osm2rdf` is in your path. Set CONTINENT and COUNTRY
|
|
4
|
+
# such that the link under GET_DATA_CMD exists (the names are usually
|
|
5
|
+
# the canonical names).
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
CONTINENT = europe
|
|
9
|
+
COUNTRY = switzerland
|
|
10
|
+
NAME = osm-${COUNTRY}
|
|
11
|
+
GET_DATA_URL = https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf
|
|
12
|
+
GET_DATA_CMD = wget -nc -O ${NAME}.pbf ${GET_DATA_URL}; (time osm2rdf ${NAME}.pbf -o ${NAME}.ttl --output-compression gz --cache . --iri-prefix-for-untagged-nodes http://www.openstreetmap.org/node/) 2>&1 | tee ${NAME}.osm2rdf-log.txt
|
|
13
|
+
VERSION = $$(ls -l --time-style=+%d.%m.%Y ${NAME}.pbf 2> /dev/null | cut -d' ' -f6)
|
|
14
|
+
DESCRIPTION = OSM ${COUNTRY}, PBF from ${GET_DATA_URL}, converted to RDF with osm2rdf, version ${VERSION}
|
|
15
|
+
|
|
16
|
+
[index]
|
|
17
|
+
INPUT_FILES = ${data:NAME}.ttl.gz
|
|
18
|
+
CAT_INPUT_FILES = zcat ${data:NAME}.ttl.gz
|
|
19
|
+
PARALLEL_PARSING = true
|
|
20
|
+
VOCABULARY_TYPE = on-disk-compressed-geo-split
|
|
21
|
+
STXXL_MEMORY = 10G
|
|
22
|
+
SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
|
|
23
|
+
ENCODE_AS_ID = https://www.openstreetmap.org/node/ http://www.openstreetmap.org/node/ https://www.openstreetmap.org/way/ https://www.openstreetmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_tagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_untagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmrel_ https://www.openstreetmap.org/changeset/
|
|
24
|
+
|
|
25
|
+
[server]
|
|
26
|
+
PORT = 7025
|
|
27
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
28
|
+
MEMORY_FOR_QUERIES = 20G
|
|
29
|
+
CACHE_MAX_SIZE = 10G
|
|
30
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
|
|
31
|
+
TIMEOUT = 100s
|
|
32
|
+
|
|
33
|
+
[runtime]
|
|
34
|
+
SYSTEM = docker
|
|
35
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
36
|
+
|
|
37
|
+
[ui]
|
|
38
|
+
UI_PORT = 7000
|
|
39
|
+
UI_CONFIG = osm-planet
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Qleverfile for OSM Planet, use with the QLever CLI (`pip install qlever`)
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # downloads ~400 GB (ttl.bz2), ~100 B triples
|
|
4
|
+
# qlever index # ~20 hours, ~60 GB RAM, ~1.5 TB index size on disk
|
|
5
|
+
# qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
|
|
6
|
+
#
|
|
7
|
+
# Measured on an AMD Ryzen 9 7950X with 128 GB RAM and 2 x 8 TB NVMe (04.01.2025)
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = osm-planet
|
|
11
|
+
GET_DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
|
|
12
|
+
GET_DATA_CMD = unbuffer wget -O ${NAME}.ttl.bz2 ${GET_DATA_URL} | tee ${NAME}.download-log.txt
|
|
13
|
+
VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
|
|
14
|
+
DESCRIPTION = OpenStreetMap, RDF TTL from ${GET_DATA_URL} including DE-9IM triples, version ${VERSION}
|
|
15
|
+
|
|
16
|
+
[index]
|
|
17
|
+
INPUT_FILES = ${data:NAME}.ttl.bz2
|
|
18
|
+
MULTI_INPUT_JSON = { "cmd": "lbzcat -n 2 ${INPUT_FILES}", "parallel": "true" }
|
|
19
|
+
VOCABULARY_TYPE = on-disk-compressed-geo-split
|
|
20
|
+
PARSER_BUFFER_SIZE = 100M
|
|
21
|
+
STXXL_MEMORY = 60G
|
|
22
|
+
SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
|
|
23
|
+
ULIMIT = 50000
|
|
24
|
+
ENCODE_AS_ID = https://www.openstreetmap.org/node/ http://www.openstreetmap.org/node/ https://www.openstreetmap.org/way/ https://www.openstreetmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmrel_ https://www.openstreetmap.org/changeset/
|
|
25
|
+
|
|
26
|
+
[server]
|
|
27
|
+
PORT = 7007
|
|
28
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
29
|
+
MEMORY_FOR_QUERIES = 40G
|
|
30
|
+
CACHE_MAX_SIZE = 20G
|
|
31
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 20G
|
|
32
|
+
TIMEOUT = 600s
|
|
33
|
+
|
|
34
|
+
[runtime]
|
|
35
|
+
SYSTEM = docker
|
|
36
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
37
|
+
|
|
38
|
+
[ui]
|
|
39
|
+
UI_CONFIG = osm-planet
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Qleverfile for OSM Planet, use with the QLever CLI (`pip install qlever`)
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # download ~100 GB (pbf), convert with osm2rdf, ~200B triples
|
|
4
|
+
# qlever index # ~40 hours, ~60 GB RAM, ~2.5 TB index size on disk
|
|
5
|
+
# qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
|
|
6
|
+
#
|
|
7
|
+
# Measured on an AMD Ryzen 9 9950X with 128 GB RAM and 4 x 8 TB NVMe (02.10.2025)
|
|
8
|
+
|
|
9
|
+
[data]
|
|
10
|
+
NAME = osm-planet
|
|
11
|
+
PLANET_PBF = planet-250929.osm.pbf
|
|
12
|
+
GET_DATA_URL = https://planet.openstreetmap.org/pbf/${PLANET_PBF}
|
|
13
|
+
GET_PBF_CMD = unbuffer wget -O ${PLANET_PBF} ${GET_DATA_URL}
|
|
14
|
+
OSM2RDF_CMD = unbuffer osm2rdf ${PLANET_PBF} -o ${NAME}.ttl --num-threads 20 --output-compression gz --cache . --store-locations disk-dense --iri-prefix-for-untagged-nodes http://www.openstreetmap.org/node/ --split-tag-key-by-semicolon ref --split-tag-key-by-semicolon service
|
|
15
|
+
GET_DATA_CMD = ${GET_PBF_CMD} && ${OSM2RDF_CMD} 2>&1 | tee ${NAME}.osm2rdf-log.txt
|
|
16
|
+
VERSION = $$(date -r ${PLANET_PBF} +%d.%m.%Y || echo "NO_DATE")
|
|
17
|
+
DESCRIPTION = OpenStreetMap, PBF from ${GET_DATA_URL}, converted to RDF TTL and enhanced by DE-9IM triples using osm2rdf
|
|
18
|
+
|
|
19
|
+
[index]
|
|
20
|
+
INPUT_FILES = ${data:NAME}.ttl.gz
|
|
21
|
+
MULTI_INPUT_JSON = { "cmd": "zcat ${INPUT_FILES}", "parallel": "true" }
|
|
22
|
+
VOCABULARY_TYPE = on-disk-compressed-geo-split
|
|
23
|
+
PARSER_BUFFER_SIZE = 100M
|
|
24
|
+
STXXL_MEMORY = 60G
|
|
25
|
+
SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
|
|
26
|
+
ULIMIT = 50000
|
|
27
|
+
ENCODE_AS_ID = https://www.openstreetmap.org/node/ http://www.openstreetmap.org/node/ https://www.openstreetmap.org/way/ https://www.openstreetmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_tagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_untagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmrel_ https://www.openstreetmap.org/changeset/
|
|
28
|
+
|
|
29
|
+
[server]
|
|
30
|
+
PORT = 7007
|
|
31
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
32
|
+
MEMORY_FOR_QUERIES = 40G
|
|
33
|
+
CACHE_MAX_SIZE = 20G
|
|
34
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 10G
|
|
35
|
+
TIMEOUT = 600s
|
|
36
|
+
|
|
37
|
+
[runtime]
|
|
38
|
+
SYSTEM = docker
|
|
39
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
40
|
+
|
|
41
|
+
[ui]
|
|
42
|
+
UI_CONFIG = osm-planet
|