qlever 0.2.5__py3-none-any.whl → 0.5.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. qlever/Qleverfiles/Qleverfile.dblp +36 -0
  2. qlever/Qleverfiles/Qleverfile.dblp-plus +33 -0
  3. qlever/Qleverfiles/Qleverfile.dbpedia +30 -0
  4. qlever/Qleverfiles/Qleverfile.default +51 -0
  5. qlever/Qleverfiles/Qleverfile.dnb +40 -0
  6. qlever/Qleverfiles/Qleverfile.fbeasy +29 -0
  7. qlever/Qleverfiles/Qleverfile.freebase +28 -0
  8. qlever/Qleverfiles/Qleverfile.imdb +36 -0
  9. qlever/Qleverfiles/Qleverfile.ohm-planet +41 -0
  10. qlever/Qleverfiles/Qleverfile.olympics +31 -0
  11. qlever/Qleverfiles/Qleverfile.orkg +30 -0
  12. qlever/Qleverfiles/Qleverfile.osm-country +39 -0
  13. qlever/Qleverfiles/Qleverfile.osm-planet +39 -0
  14. qlever/Qleverfiles/Qleverfile.osm-planet-from-pbf +42 -0
  15. qlever/Qleverfiles/Qleverfile.pubchem +131 -0
  16. qlever/Qleverfiles/Qleverfile.scientists +29 -0
  17. qlever/Qleverfiles/Qleverfile.uniprot +74 -0
  18. qlever/Qleverfiles/Qleverfile.vvz +31 -0
  19. qlever/Qleverfiles/Qleverfile.wikidata +42 -0
  20. qlever/Qleverfiles/Qleverfile.wikipathways +40 -0
  21. qlever/Qleverfiles/Qleverfile.yago-4 +33 -0
  22. qlever/__init__.py +44 -1380
  23. qlever/command.py +87 -0
  24. qlever/commands/__init__.py +0 -0
  25. qlever/commands/add_text_index.py +115 -0
  26. qlever/commands/benchmark_queries.py +1019 -0
  27. qlever/commands/cache_stats.py +125 -0
  28. qlever/commands/clear_cache.py +88 -0
  29. qlever/commands/extract_queries.py +120 -0
  30. qlever/commands/get_data.py +48 -0
  31. qlever/commands/index.py +333 -0
  32. qlever/commands/index_stats.py +306 -0
  33. qlever/commands/log.py +66 -0
  34. qlever/commands/materialized_view.py +110 -0
  35. qlever/commands/query.py +142 -0
  36. qlever/commands/rebuild_index.py +176 -0
  37. qlever/commands/reset_updates.py +59 -0
  38. qlever/commands/settings.py +115 -0
  39. qlever/commands/setup_config.py +97 -0
  40. qlever/commands/start.py +336 -0
  41. qlever/commands/status.py +50 -0
  42. qlever/commands/stop.py +90 -0
  43. qlever/commands/system_info.py +130 -0
  44. qlever/commands/ui.py +271 -0
  45. qlever/commands/update.py +90 -0
  46. qlever/commands/update_wikidata.py +1204 -0
  47. qlever/commands/warmup.py +41 -0
  48. qlever/config.py +223 -0
  49. qlever/containerize.py +167 -0
  50. qlever/log.py +55 -0
  51. qlever/qlever_main.py +79 -0
  52. qlever/qleverfile.py +530 -0
  53. qlever/util.py +330 -0
  54. qlever-0.5.41.dist-info/METADATA +127 -0
  55. qlever-0.5.41.dist-info/RECORD +59 -0
  56. {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info}/WHEEL +1 -1
  57. qlever-0.5.41.dist-info/entry_points.txt +2 -0
  58. qlever-0.5.41.dist-info/top_level.txt +1 -0
  59. build/lib/qlever/__init__.py +0 -1383
  60. build/lib/qlever/__main__.py +0 -4
  61. qlever/__main__.py +0 -4
  62. qlever-0.2.5.dist-info/METADATA +0 -277
  63. qlever-0.2.5.dist-info/RECORD +0 -12
  64. qlever-0.2.5.dist-info/entry_points.txt +0 -2
  65. qlever-0.2.5.dist-info/top_level.txt +0 -4
  66. src/qlever/__init__.py +0 -1383
  67. src/qlever/__main__.py +0 -4
  68. {qlever-0.2.5.dist-info → qlever-0.5.41.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,36 @@
1
+ # Qleverfile for DBLP, use with QLever CLI (`pip install qlever`)
2
+ #
3
+ # qlever get-data # ~1 min, ~5 GB compressed, 1.3 B triples
4
+ # qlever index # ~30 min, ~20 GB RAM, ~25 GB index size on disk
5
+ # qlever start # ~3 s, adjust MEMORY_FOR_QUERIES as needed
6
+ #
7
+ # Measured on an AMD Ryzen 9 5950X with 128 GB RAM, and NVMe SSD (25.10.2024)
8
+
9
+ [data]
10
+ NAME = dblp
11
+ DATA_TARFILE = dblp_KG_with_associated_data.tar
12
+ GET_DATA_URL = https://sparql.dblp.org/download/${DATA_TARFILE}
13
+ GET_DATA_CMD = (curl -LROC - ${GET_DATA_URL} && tar -xf ${DATA_TARFILE}) 2>&1 | tee ${NAME}.download-log.txt && rm -f ${DATA_TARFILE}
14
+ VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
15
+ DESCRIPTION = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL}, version ${VERSION}
16
+ FORMAT = ttl
17
+
18
+ [index]
19
+ INPUT_FILES = *.gz
20
+ MULTI_INPUT_JSON = { "cmd": "zcat {}", "for-each": "*.gz" }
21
+ SETTINGS_JSON = { "num-triples-per-batch": 5000000 }
22
+ STXXL_MEMORY = 5G
23
+
24
+ [server]
25
+ PORT = 7015
26
+ ACCESS_TOKEN = ${data:NAME}
27
+ MEMORY_FOR_QUERIES = 10G
28
+ CACHE_MAX_SIZE = 5G
29
+ TIMEOUT = 300s
30
+
31
+ [runtime]
32
+ SYSTEM = docker
33
+ IMAGE = docker.io/adfreiburg/qlever:latest
34
+
35
+ [ui]
36
+ UI_CONFIG = dblp
@@ -0,0 +1,33 @@
1
+ # Qleverfile for DBLP Plus, use with https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # qlever get-data downloads .gz file of size ~3 GB (as of 31.07.2022)
4
+ # qlever index takes ~30 minutes and ~20 GB RAM (on an AMD Ryzen 9 5900X)
5
+ # qlever start starts the server
6
+ #
7
+ # Also builds a text index for fast kewyword search in literals.
8
+
9
+ [data]
10
+ NAME = dblp-plus
11
+ GET_DATA_CMD = wget -nc -O dblp.ttl.gz https://dblp.org/rdf/dblp.ttl.gz
12
+ DESCRIPTION = Publication data from https://dblp.org, with affiliations from https://www.wikidata.org and citations from https://opencitations.net
13
+ TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
14
+
15
+ [index]
16
+ INPUT_FILES = dblp.ttl.gz affiliations.nt affiliations.additions.nt citations.nt
17
+ CAT_INPUT_FILES = zcat -f ${INPUT_FILES}
18
+ SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [ "<https://w3id.org", "<https://doi.org", "<http://dx.doi.org" ] }
19
+ TEXT_INDEX = from_literals
20
+
21
+ [server]
22
+ PORT = 7027
23
+ ACCESS_TOKEN = ${data:NAME}_169238202
24
+ MEMORY_FOR_QUERIES = 20G
25
+ CACHE_MAX_SIZE = 10G
26
+ CACHE_MAX_SIZE_SINGLE_ENTRY = 2G
27
+
28
+ [runtime]
29
+ SYSTEM = docker
30
+ IMAGE = docker.io/adfreiburg/qlever:latest
31
+
32
+ [ui]
33
+ UI_CONFIG = dblp-plus
@@ -0,0 +1,30 @@
1
+ # Qleverfile for DBpedia, use with https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # qlever get-data # ~14 GB, ~850 M triples (as of 30.07.2024)
4
+ # qlever index # ~20 min (on an AMD Ryzen 9 5900X)
5
+ # qlever start # ~3 sec
6
+
7
+ [data]
8
+ NAME = dbpedia
9
+ DATABUS_URL = https://databus.dbpedia.org/dbpedia/collections/latest-core
10
+ GET_DATA_CMD = curl -X POST -H "Accept: text/csv" --data-urlencode "query=$$(curl -s -H "Accept:text/sparql" https://databus.dbpedia.org/dbpedia/collections/latest-core)" https://databus.dbpedia.org/sparql | tail -n+2 | sed 's/\r$$//' | sed 's/"//g' | while read -r file; do wget -P rdf-input $$file; done
11
+ DESCRIPTION = RDF data from ${DATABUS_URL}
12
+
13
+ [index]
14
+ INPUT_FILES = rdf-input/*
15
+ CAT_INPUT_FILES = (cat rdf-input/*.nt; lbzcat -n2 rdf-input/*.bzip2 rdf-input/*.bz2)
16
+ SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
17
+ WITH_TEXT_INDEX = false
18
+
19
+ [server]
20
+ PORT = 7012
21
+ ACCESS_TOKEN = ${data:NAME}
22
+ MEMORY_FOR_QUERIES = 10G
23
+ CACHE_MAX_SIZE = 5G
24
+
25
+ [runtime]
26
+ SYSTEM = docker
27
+ IMAGE = docker.io/adfreiburg/qlever:latest
28
+
29
+ [ui]
30
+ UI_CONFIG = dbpedia
@@ -0,0 +1,51 @@
1
+ # Default Qleverfile, use with https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # If you have never seen a Qleverfile before, we recommend that you first look
4
+ # at the example Qleverfiles on http://qlever.cs.uni-freiburg.de/qlever-control/
5
+ # src/qlever/Qleverfiles . Or execute `qlever setup-config <dataset>` on the
6
+ # command line to obtain the example Qleverfiles for <dataset>.
7
+
8
+ # As a minimum, each dataset needs a name. If you want `qlever get-data` to do
9
+ # something meaningful, you need to define GET_DATA_CMD. Otherwise, you need to
10
+ # generate (or download or copy from somewhere) the input files yourself. Each
11
+ # dataset should have a short DESCRIPTION, ideally with a date.
12
+ [data]
13
+ NAME =
14
+ GET_DATA_CMD =
15
+ DESCRIPTION =
16
+
17
+ # The format for INPUT_FILES should be such that `ls ${INPUT_FILES}` lists all
18
+ # input files. CAT_INPUT_FILES should write a concatenation of all input files
19
+ # to stdout. For example, if your input files are gzipped, you can write `zcat
20
+ # ${INPUT_FILES}`. Regarding SETTINGS_JSON, look at the other Qleverfiles for
21
+ # examples. Several batches of size `num-triples-per-batch` are kept in RAM at
22
+ # the same time; increasing this, increases the memory usage but speeds up the
23
+ # loading process.
24
+ [index]
25
+ INPUT_FILES = *.ttl
26
+ CAT_INPUT_FILES = cat ${INPUT_FILES}
27
+ SETTINGS_JSON = { "num-triples-per-batch": 1000000 }
28
+
29
+ # The server listens on PORT. If you want to send privileged commands to the
30
+ # server, you need to specify an ACCESS_TOKEN, which you then have to set via a
31
+ # URL parameter `access_token`. It should not be easily guessable, unless you
32
+ # don't mind others to get privileged access to your server.
33
+ [server]
34
+ PORT = 8888
35
+ ACCESS_TOKEN =
36
+
37
+ # Use SYSTEM = docker to run QLever inside a docker container; the Docker image
38
+ # will be downloaded automatically. Use SYSTEM = native to use self-compiled
39
+ # binaries `IndexBuilderMain` and `ServerMain` (which should be in you PATH).
40
+ [runtime]
41
+ SYSTEM = docker
42
+ IMAGE = docker.io/adfreiburg/qlever:latest
43
+
44
+ # UI_PORT specifies the port of the QLever UI web app, when you run `qlever ui`.
45
+ # The UI_CONFIG must be one of the slugs from http://qlever.cs.uni-freiburg.de
46
+ # (see the dropdown menu on the top right, the slug is the last part of the URL).
47
+ # It determines the example queries and which SPARQL queries are launched to
48
+ # obtain suggestions as you type a query.
49
+ [ui]
50
+ UI_PORT = 8176
51
+ UI_CONFIG = default
@@ -0,0 +1,40 @@
1
+ # Qleverfile for Olympics, use with https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # qlever get-data # takes ~ 10 mins to download .nt.gz file of size ~ 8 GB
4
+ # qlever index # takes ~ 5 min and ~ 5 GB RAM (on an AMD Ryzen 9 5900X)
5
+ # qlever start # starts the server (instantaneous)
6
+ #
7
+ # IMPORTANT: The current files contain invalid floating point literals. To make
8
+ # QLever ignore them, compile QLever with `invalidLiteralsAreSkipped_ = true`
9
+ # in `src/parser/TurtleParserBase.h:55`.
10
+ #
11
+ # NOTE: https://data.dnb.de/opendata/ is rather confusing becase of the many
12
+ # files. This Qleverfile downloads all the datasets named "Gesamtabzug", except
13
+ # bib_lds.nt.gz, which contains incorrectly formatted IRIs. The file
14
+ # dnb-all_ldsprov.nt.gz contains invalid floating point literals; to ignore
15
+ # them, compile QLever with TurtleParserBase::invalidLiteralsAreSkipped_ = true
16
+
17
+ [data]
18
+ NAME = dnb
19
+ BASE_URL = https://data.dnb.de/opendata
20
+ GET_DATA_CMD = curl -L -C - --remote-name-all --remote-time ${BASE_URL}/authorities-gnd_lds.nt.gz ${BASE_URL}/dnb-all_lds.nt.gz ${BASE_URL}/dnb-all_ldsprov.nt.gz ${BASE_URL}/zdb_lds.nt.gz 2>&1 | tee ${data:NAME}.getdata-log.txt
21
+ VERSION = $$(date -r dnb-all_lds.nt.gz +%d.%m.%Y || echo "NO_DATE")
22
+ DESCRIPTION = DNB data from ${BASE_URL} (authoritities-gnd_lds, dnb_all_lds, dnb-all_ldsprov, zdb_lds), version ${VERSION}
23
+
24
+ [index]
25
+ INPUT_FILES = *.nt.gz
26
+ CAT_INPUT_FILES = zcat ${INPUT_FILES} | sed '/"\$$R0"/d;/"0\.03013\$$D"/d'
27
+ SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
28
+
29
+ [server]
30
+ PORT = 7035
31
+ ACCESS_TOKEN = ${data:NAME}
32
+ MEMORY_FOR_QUERIES = 5G
33
+ CACHE_MAX_SIZE = 2G
34
+
35
+ [runtime]
36
+ SYSTEM = docker
37
+ IMAGE = docker.io/adfreiburg/qlever:latest
38
+
39
+ [ui]
40
+ UI_CONFIG = dnb
@@ -0,0 +1,29 @@
1
+ # Qleverfile for Fbeasy, use with https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # qlever get-data # downloads .gz file of size ~3 GB (as of 31.07.2022)
4
+ # qlever index # takes ~10 minutes and ~10 GB RAM (on an AMD Ryzen 7 5900X)
5
+ # qlever start # starts the server
6
+
7
+ [data]
8
+ NAME = fbeasy
9
+ DATA_URL = https://freebase-easy.cs.uni-freiburg.de
10
+ GET_DATA_CMD = wget -nc ${DATA_URL}/dump/fbeasy.nt
11
+ DESCRIPTION = Freebase Easy, RDF NT from ${DATA_URL}, latest version from 18.07.2019
12
+ TEXT_DESCRIPTION = Sentences from the English Wikipedia that mention at least one Freebase entity
13
+
14
+ [index]
15
+ INPUT_FILES = fbeasy.nt
16
+ CAT_INPUT_FILES = cat ${INPUT_FILES}
17
+ SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
18
+
19
+ [server]
20
+ PORT = 7003
21
+ ACCESS_TOKEN = ${data:NAME}
22
+ MEMORY_FOR_QUERIES = 5G
23
+
24
+ [runtime]
25
+ SYSTEM = docker
26
+ IMAGE = docker.io/adfreiburg/qlever:latest
27
+
28
+ [ui]
29
+ UI_CONFIG = fbeasy
@@ -0,0 +1,28 @@
1
+ # Qleverfile for Freebase, use with https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # qlever get-data # downloads .gz file of size ~3 GB (as of 31.07.2022)
4
+ # qlever index # takes ~4 hours and ~20 GB RAM (on an AMD Ryzen 7 5900X)
5
+ # qlever start # starts the server
6
+
7
+ [data]
8
+ NAME = freebase
9
+ DATA_URL = http://commondatastorage.googleapis.com/freebase-public/rdf/freebase-rdf-latest.gz
10
+ GET_DATA_CMD = wget -nc ${DATA_URL}
11
+ DESCRIPTION = Freebase, RDF NT from ${DATA_URL}, latest (and final) version from 09.08.2015
12
+
13
+ [index]
14
+ INPUT_FILES = freebase-rdf-latest.gz
15
+ CAT_INPUT_FILES = zcat ${INPUT_FILES}
16
+ SETTINGS_JSON = { "languages-internal": [ "en" ], "prefixes-external": ["<"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 10000000 }
17
+
18
+ [server]
19
+ PORT = 7002
20
+ ACCESS_TOKEN = ${data:NAME}
21
+ MEMORY_FOR_QUERIES = 10G
22
+
23
+ [runtime]
24
+ SYSTEM = docker
25
+ IMAGE = docker.io/adfreiburg/qlever:latest
26
+
27
+ [ui]
28
+ UI_CONFIG = freebase
@@ -0,0 +1,36 @@
1
+ # Qleverfile for IMDB, use with https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # qlever get-data # downloads "basics" and "ratings" of size ~1 GB
4
+ # qlever index # takes ~5 minutes and ~5 GB RAM (on an AMD Ryzen 9 5900X)
5
+ # qlever start # starts the server (takes a few seconds)
6
+ #
7
+ # Supports fast kewyword search in literals (WITH_TEXT_INDEX = from_literals).
8
+
9
+ [data]
10
+ NAME = imdb
11
+ IMDB_DATA_URL = https://datasets.imdbws.com
12
+ GET_PREFIXES = echo "@prefix imdb: <https://www.imdb.com/> ."
13
+ GET_IMDB_BASICS = FILE=title.basics.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ gsub("\\\\", "\\\\", $$3); gsub("\"", "\\\"", $$3); printf "imdb:%s imdb:id \"%s\" ; imdb:type \"%s\" ; imdb:title \"%s\" .\n", $$1, $$1, $$2, $$3 }'; rm -f $${FILE}
14
+ GET_IMDB_RATINGS = FILE=title.ratings.tsv.gz; curl -sLO -C - ${IMDB_DATA_URL}/$${FILE}; zcat $${FILE} | sed 1d | awk -F'\t' '{ printf "imdb:%s imdb:averageRating %s ; imdb:numVotes %s .\n", $$1, $$2, $$3 }'; rm -f $${FILE}
15
+ GET_DATA_CMD = (${GET_PREFIXES}; ${GET_IMDB_BASICS}; ${GET_IMDB_RATINGS}) > ${NAME}.ttl
16
+ VERSION = $$(date -r imdb.ttl +"%d.%m.%Y %H:%M" || echo "NO_DATE")
17
+ DESCRIPTION = IMDb, CSV from ${IMDB_DATA_URL}, converted to RDF TTL using awk, version ${VERSION}
18
+ TEXT_DESCRIPTION = All literals, search with [ ql:contains-word "..."; ql:contains-entity ?literal ]
19
+
20
+ [index]
21
+ INPUT_FILES = ${data:NAME}.ttl
22
+ CAT_INPUT_FILES = cat ${INPUT_FILES}
23
+ SETTINGS_JSON = { "num-triples-per-batch": 1000000 }
24
+ TEXT_INDEX = from_literals
25
+
26
+ [server]
27
+ PORT = 7029
28
+ ACCESS_TOKEN = ${data:NAME}
29
+ MEMORY_FOR_QUERIES = 5G
30
+
31
+ [runtime]
32
+ SYSTEM = docker
33
+ IMAGE = docker.io/adfreiburg/qlever:latest
34
+
35
+ [ui]
36
+ UI_CONFIG = imdb
@@ -0,0 +1,41 @@
1
+ # Qleverfile for OpenHistoricalMap, use with the QLever CLI (`pip install qlever`)
2
+ #
3
+ # qlever get-data # ~1 hour, ~14 GB (ttl.gz), ~3.4 B triples (with osm2rdf)
4
+ # qlever index # ~1 hour, ~10 GB RAM, ~60 GB index size on disk
5
+ # qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
6
+ #
7
+ # Measured on an AMD Ryzen 9 5900X with 128 GB RAM and 1 x 4 TB NVMe (04.01.2025)
8
+
9
+ [data]
10
+ NAME = ohm-planet
11
+ GET_DATA_URL = https://planet.openhistoricalmap.org/planet
12
+ CHECK_BINARIES = osm2rdf -h > /dev/null || (echo "osm2rdf not found, make sure that it's installed and in your PATH" && exit 1)
13
+ GET_DATA_CMD_1 = unbuffer wget -O ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
14
+ GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --output-compression gz --store-locations=disk-dense --cache . --num-threads 12 --iri-prefix-for-untagged-nodes http://www.openhistoricalmap.org/node/ 2>&1 | tee ${NAME}.osm2rdf-log.txt
15
+ GET_DATA_CMD = ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
16
+ VERSION = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE")
17
+ DESCRIPTION = OHM from ${GET_DATA_URL} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects), version ${VERSION}
18
+
19
+ [index]
20
+ INPUT_FILES = ${data:NAME}.ttl.gz
21
+ MULTI_INPUT_JSON = { "cmd": "zcat ${INPUT_FILES}", "parallel": "true" }
22
+ STXXL_MEMORY = 5G
23
+ PARSER_BUFFER_SIZE = 50M
24
+ SETTINGS_JSON = { "num-triples-per-batch": 5000000 }
25
+ ENCODE_AS_ID = https://www.openhistoricalmap.org/node/ http://www.openhistoricalmap.org/node/ https://www.openhistoricalmap.org/way/ https://www.openhistoricalmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmnode_tagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmnode_untagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#ohmrel_ https://www.openstreetmap.org/changeset/
26
+
27
+ [server]
28
+ PORT = 7037
29
+ ACCESS_TOKEN = ${data:NAME}
30
+ MEMORY_FOR_QUERIES = 10G
31
+ CACHE_MAX_SIZE = 5G
32
+ TIMEOUT = 600s
33
+ CACHE_MAX_SIZE_SINGLE_ENTRY = 4G
34
+ WARMUP_CMD = curl -s https://qlever.cs.uni-freiburg.de/petrimaps/query --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX osm: <https://www.openstreetmap.org/> SELECT ?osm_id ?geometry WHERE { ?osm_id geo:hasGeometry/geo:asWKT ?geometry . ?osm_id rdf:type osm:node } LIMIT 1" --data-urlencode "backend=https://qlever.cs.uni-freiburg.de/api/${data:NAME}" > /dev/null
35
+
36
+ [runtime]
37
+ SYSTEM = docker
38
+ IMAGE = adfreiburg/qlever:latest
39
+
40
+ [ui]
41
+ CONFIG = ohm-planet
@@ -0,0 +1,31 @@
1
+ # Qleverfile for Olympics, use with https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # qlever get-data # downloads .zip file of size 13 MB, uncompressed to 323 MB
4
+ # qlever index # takes ~10 seconds and ~1 GB RAM (on an AMD Ryzen 9 5900X)
5
+ # qlever start # starts the server (instant)
6
+
7
+ [data]
8
+ NAME = olympics
9
+ BASE_URL = https://github.com/wallscope/olympics-rdf
10
+ GET_DATA_CMD = curl -sLo olympics.zip -C - ${BASE_URL}/raw/master/data/olympics-nt-nodup.zip && unzip -q -o olympics.zip && rm olympics.zip
11
+ DESCRIPTION = 120 Years of Olympics, data from ${BASE_URL}
12
+ TEXT_DESCRIPTION = All literals, search with FILTER CONTAINS(?var, "...")
13
+
14
+ [index]
15
+ INPUT_FILES = olympics.nt
16
+ CAT_INPUT_FILES = cat ${INPUT_FILES}
17
+ SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 100000 }
18
+
19
+ [server]
20
+ PORT = 7019
21
+ ACCESS_TOKEN = ${data:NAME}_7643543846
22
+ MEMORY_FOR_QUERIES = 5G
23
+ CACHE_MAX_SIZE = 2G
24
+ TIMEOUT = 30s
25
+
26
+ [runtime]
27
+ SYSTEM = docker
28
+ IMAGE = docker.io/adfreiburg/qlever:latest
29
+
30
+ [ui]
31
+ UI_CONFIG = olympics
@@ -0,0 +1,30 @@
1
+ # Qleverfile for ORKG, use with the QLever CLI (`pip install qlever`)
2
+ #
3
+ # qlever get-data # Get the dataset
4
+ # qlever index # Build index data structures
5
+ # qlever start # Start the server
6
+
7
+ [data]
8
+ NAME = orkg
9
+ GET_DATA_URL = https://orkg.org/api/rdf/dump
10
+ GET_DATA_CMD = curl -LR -o ${NAME}.ttl ${GET_DATA_URL} 2>&1 | tee ${NAME}.download-log.txt
11
+ VERSION = $$(date -r ${NAME}.ttl +%d.%m.%Y || echo "NO_DATE")
12
+ DESCRIPTION = The Open Research Knowledge Graph (ORKG) (data from ${GET_DATA_URL}, version ${VERSION})
13
+
14
+ [index]
15
+ INPUT_FILES = ${data:NAME}.ttl
16
+ CAT_INPUT_FILES = cat ${INPUT_FILES}
17
+ SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
18
+
19
+ [server]
20
+ PORT = 7053
21
+ ACCESS_TOKEN = ${data:NAME}
22
+ MEMORY_FOR_QUERIES = 10G
23
+ CACHE_MAX_SIZE = 5G
24
+
25
+ [runtime]
26
+ SYSTEM = docker
27
+ IMAGE = docker.io/adfreiburg/qlever:latest
28
+
29
+ [ui]
30
+ UI_CONFIG = orkg
@@ -0,0 +1,39 @@
1
+ # Qleverfile for OSM of some country, use with `qlever` CLI
2
+ #
3
+ # Make sure that `osm2rdf` is in your path. Set CONTINENT and COUNTRY
4
+ # such that the link under GET_DATA_CMD exists (the names are usually
5
+ # the canonical names).
6
+
7
+ [data]
8
+ CONTINENT = europe
9
+ COUNTRY = switzerland
10
+ NAME = osm-${COUNTRY}
11
+ GET_DATA_URL = https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf
12
+ GET_DATA_CMD = wget -nc -O ${NAME}.pbf ${GET_DATA_URL}; (time osm2rdf ${NAME}.pbf -o ${NAME}.ttl --output-compression gz --cache . --iri-prefix-for-untagged-nodes http://www.openstreetmap.org/node/) 2>&1 | tee ${NAME}.osm2rdf-log.txt
13
+ VERSION = $$(ls -l --time-style=+%d.%m.%Y ${NAME}.pbf 2> /dev/null | cut -d' ' -f6)
14
+ DESCRIPTION = OSM ${COUNTRY}, PBF from ${GET_DATA_URL}, converted to RDF with osm2rdf, version ${VERSION}
15
+
16
+ [index]
17
+ INPUT_FILES = ${data:NAME}.ttl.gz
18
+ CAT_INPUT_FILES = zcat ${data:NAME}.ttl.gz
19
+ PARALLEL_PARSING = true
20
+ VOCABULARY_TYPE = on-disk-compressed-geo-split
21
+ STXXL_MEMORY = 10G
22
+ SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
23
+ ENCODE_AS_ID = https://www.openstreetmap.org/node/ http://www.openstreetmap.org/node/ https://www.openstreetmap.org/way/ https://www.openstreetmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_tagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_untagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmrel_ https://www.openstreetmap.org/changeset/
24
+
25
+ [server]
26
+ PORT = 7025
27
+ ACCESS_TOKEN = ${data:NAME}
28
+ MEMORY_FOR_QUERIES = 20G
29
+ CACHE_MAX_SIZE = 10G
30
+ CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
31
+ TIMEOUT = 100s
32
+
33
+ [runtime]
34
+ SYSTEM = docker
35
+ IMAGE = docker.io/adfreiburg/qlever:latest
36
+
37
+ [ui]
38
+ UI_PORT = 7000
39
+ UI_CONFIG = osm-planet
@@ -0,0 +1,39 @@
1
+ # Qleverfile for OSM Planet, use with the QLever CLI (`pip install qlever`)
2
+ #
3
+ # qlever get-data # downloads ~400 GB (ttl.bz2), ~100 B triples
4
+ # qlever index # ~20 hours, ~60 GB RAM, ~1.5 TB index size on disk
5
+ # qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
6
+ #
7
+ # Measured on an AMD Ryzen 9 7950X with 128 GB RAM and 2 x 8 TB NVMe (04.01.2025)
8
+
9
+ [data]
10
+ NAME = osm-planet
11
+ GET_DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
12
+ GET_DATA_CMD = unbuffer wget -O ${NAME}.ttl.bz2 ${GET_DATA_URL} | tee ${NAME}.download-log.txt
13
+ VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
14
+ DESCRIPTION = OpenStreetMap, RDF TTL from ${GET_DATA_URL} including DE-9IM triples, version ${VERSION}
15
+
16
+ [index]
17
+ INPUT_FILES = ${data:NAME}.ttl.bz2
18
+ MULTI_INPUT_JSON = { "cmd": "lbzcat -n 2 ${INPUT_FILES}", "parallel": "true" }
19
+ VOCABULARY_TYPE = on-disk-compressed-geo-split
20
+ PARSER_BUFFER_SIZE = 100M
21
+ STXXL_MEMORY = 60G
22
+ SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
23
+ ULIMIT = 50000
24
+ ENCODE_AS_ID = https://www.openstreetmap.org/node/ http://www.openstreetmap.org/node/ https://www.openstreetmap.org/way/ https://www.openstreetmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmrel_ https://www.openstreetmap.org/changeset/
25
+
26
+ [server]
27
+ PORT = 7007
28
+ ACCESS_TOKEN = ${data:NAME}
29
+ MEMORY_FOR_QUERIES = 40G
30
+ CACHE_MAX_SIZE = 20G
31
+ CACHE_MAX_SIZE_SINGLE_ENTRY = 20G
32
+ TIMEOUT = 600s
33
+
34
+ [runtime]
35
+ SYSTEM = docker
36
+ IMAGE = docker.io/adfreiburg/qlever:latest
37
+
38
+ [ui]
39
+ UI_CONFIG = osm-planet
@@ -0,0 +1,42 @@
1
+ # Qleverfile for OSM Planet, use with the QLever CLI (`pip install qlever`)
2
+ #
3
+ # qlever get-data # download ~100 GB (pbf), convert with osm2rdf, ~200B triples
4
+ # qlever index # ~40 hours, ~60 GB RAM, ~2.5 TB index size on disk
5
+ # qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
6
+ #
7
+ # Measured on an AMD Ryzen 9 9950X with 128 GB RAM and 4 x 8 TB NVMe (02.10.2025)
8
+
9
+ [data]
10
+ NAME = osm-planet
11
+ PLANET_PBF = planet-250929.osm.pbf
12
+ GET_DATA_URL = https://planet.openstreetmap.org/pbf/${PLANET_PBF}
13
+ GET_PBF_CMD = unbuffer wget -O ${PLANET_PBF} ${GET_DATA_URL}
14
+ OSM2RDF_CMD = unbuffer osm2rdf ${PLANET_PBF} -o ${NAME}.ttl --num-threads 20 --output-compression gz --cache . --store-locations disk-dense --iri-prefix-for-untagged-nodes http://www.openstreetmap.org/node/ --split-tag-key-by-semicolon ref --split-tag-key-by-semicolon service
15
+ GET_DATA_CMD = ${GET_PBF_CMD} && ${OSM2RDF_CMD} 2>&1 | tee ${NAME}.osm2rdf-log.txt
16
+ VERSION = $$(date -r ${PLANET_PBF} +%d.%m.%Y || echo "NO_DATE")
17
+ DESCRIPTION = OpenStreetMap, PBF from ${GET_DATA_URL}, converted to RDF TTL and enhanced by DE-9IM triples using osm2rdf
18
+
19
+ [index]
20
+ INPUT_FILES = ${data:NAME}.ttl.gz
21
+ MULTI_INPUT_JSON = { "cmd": "zcat ${INPUT_FILES}", "parallel": "true" }
22
+ VOCABULARY_TYPE = on-disk-compressed-geo-split
23
+ PARSER_BUFFER_SIZE = 100M
24
+ STXXL_MEMORY = 60G
25
+ SETTINGS_JSON = { "num-triples-per-batch": 10000000 }
26
+ ULIMIT = 50000
27
+ ENCODE_AS_ID = https://www.openstreetmap.org/node/ http://www.openstreetmap.org/node/ https://www.openstreetmap.org/way/ https://www.openstreetmap.org/relation/ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_tagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmnode_untagged_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmway_ https://osm2rdf.cs.uni-freiburg.de/rdf/geom#osmrel_ https://www.openstreetmap.org/changeset/
28
+
29
+ [server]
30
+ PORT = 7007
31
+ ACCESS_TOKEN = ${data:NAME}
32
+ MEMORY_FOR_QUERIES = 40G
33
+ CACHE_MAX_SIZE = 20G
34
+ CACHE_MAX_SIZE_SINGLE_ENTRY = 10G
35
+ TIMEOUT = 600s
36
+
37
+ [runtime]
38
+ SYSTEM = docker
39
+ IMAGE = docker.io/adfreiburg/qlever:latest
40
+
41
+ [ui]
42
+ UI_CONFIG = osm-planet