qlever 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.ohm-planet +14 -14
- qlever/Qleverfiles/Qleverfile.osm-country +27 -21
- qlever/Qleverfiles/Qleverfile.wikimedia-commons +37 -0
- qlever/__main__.py +1476 -0
- qlever/commands/index.py +8 -5
- qlever/containerize.py +12 -0
- qlever/qleverfile.py +1 -1
- {qlever-0.5.5.dist-info → qlever-0.5.7.dist-info}/METADATA +1 -1
- {qlever-0.5.5.dist-info → qlever-0.5.7.dist-info}/RECORD +13 -11
- {qlever-0.5.5.dist-info → qlever-0.5.7.dist-info}/WHEEL +1 -1
- {qlever-0.5.5.dist-info → qlever-0.5.7.dist-info}/LICENSE +0 -0
- {qlever-0.5.5.dist-info → qlever-0.5.7.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.5.dist-info → qlever-0.5.7.dist-info}/top_level.txt +0 -0
|
@@ -1,37 +1,37 @@
|
|
|
1
1
|
# Qleverfile for OHM Planet, use with https://github.com/ad-freiburg/qlever-control
|
|
2
2
|
#
|
|
3
3
|
# qlever get-data # ~20 mins (download PBF, convert to TTL, add GeoSPARQL triples)
|
|
4
|
-
# qlever index # ~
|
|
4
|
+
# qlever index # ~20 mins and ~5 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
5
|
# qlever start # ~1 sec
|
|
6
6
|
#
|
|
7
|
-
# For `qlever get-data` to work, `osm2rdf`
|
|
8
|
-
# and included in the `PATH`.
|
|
7
|
+
# For `qlever get-data` to work, `osm2rdf` must be installed and in the `PATH`.
|
|
9
8
|
|
|
10
9
|
[data]
|
|
11
10
|
NAME = ohm-planet
|
|
12
11
|
GET_DATA_URL = https://planet.openhistoricalmap.org/planet
|
|
12
|
+
CHECK_BINARIES = osm2rdf -h > /dev/null || (echo "osm2rdf not found, make sure that it's installed and in your PATH" && exit 1)
|
|
13
13
|
GET_DATA_CMD_1 = curl -LRfC - -o ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
|
|
14
|
-
GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --
|
|
15
|
-
|
|
16
|
-
GET_DATA_CMD = set -o pipefail && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2} && echo && ${GET_DATA_CMD_3} && head -100 <(bzcat ${NAME}.ttl.bz2) | sed '/^@prefix/!d' > ${NAME}.prefix-definitions
|
|
14
|
+
GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --cache . --add-hascentroid 2>&1 | tee ${NAME}.osm2rdf-log.txt
|
|
15
|
+
GET_DATA_CMD = set -o pipefail && ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
|
|
17
16
|
VERSION = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE")
|
|
18
17
|
DESCRIPTION = OHM Planet, data from ${GET_DATA_URL} version ${VERSION} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
|
|
19
18
|
|
|
20
19
|
[index]
|
|
21
|
-
INPUT_FILES = ${data:NAME}.
|
|
20
|
+
INPUT_FILES = ${data:NAME}.ttl.bz2
|
|
22
21
|
CAT_INPUT_FILES = bzcat -f ${INPUT_FILES}
|
|
23
22
|
SETTINGS_JSON = { "prefixes-external": [""], "ascii-prefixes-only": false, "parallel-parsing": true, "num-triples-per-batch": 5000000 }
|
|
24
23
|
|
|
25
24
|
[server]
|
|
26
|
-
PORT
|
|
27
|
-
ACCESS_TOKEN
|
|
28
|
-
MEMORY_FOR_QUERIES
|
|
29
|
-
CACHE_MAX_SIZE
|
|
30
|
-
|
|
31
|
-
|
|
25
|
+
PORT = 7037
|
|
26
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
27
|
+
MEMORY_FOR_QUERIES = 10G
|
|
28
|
+
CACHE_MAX_SIZE = 5G
|
|
29
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 4G
|
|
30
|
+
WARMUP_CMD = curl -s https://qlever.cs.uni-freiburg.de/mapui-petri/query --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX osm: <https://www.openstreetmap.org/> SELECT ?osm_id ?geometry WHERE { ?osm_id geo:hasGeometry/geo:asWKT ?geometry . ?osm_id rdf:type osm:node } LIMIT 1" --data-urlencode "backend=https://qlever.cs.uni-freiburg.de/api/${data:NAME}" > /dev/null
|
|
32
31
|
|
|
33
32
|
[runtime]
|
|
34
|
-
SYSTEM =
|
|
33
|
+
SYSTEM = docker
|
|
34
|
+
IMAGE = adfreiburg/qlever:latest
|
|
35
35
|
|
|
36
36
|
[ui]
|
|
37
37
|
CONFIG = ohm-planet
|
|
@@ -8,35 +8,41 @@
|
|
|
8
8
|
# the link under GET_DATA_CMD exists (the names are usually the canonical
|
|
9
9
|
# names). The time for osm2rdf is around the same as that for "qlever index".
|
|
10
10
|
|
|
11
|
-
#
|
|
11
|
+
# Dataset settings
|
|
12
|
+
[data]
|
|
12
13
|
CONTINENT = europe
|
|
13
14
|
COUNTRY = switzerland
|
|
14
|
-
|
|
15
|
-
PBF = ${
|
|
16
|
-
RDF_FILES = "${DB}.ttl.bz2"
|
|
17
|
-
CAT_FILES = "bzcat ${RDF_FILES}"
|
|
15
|
+
NAME = osm-${COUNTRY}
|
|
16
|
+
PBF = ${NAME}.pbf
|
|
18
17
|
WITH_TEXT = false
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
18
|
+
VERSION = $$(ls -l --time-style=+%d.%m.%Y ${PBF} 2> /dev/null | cut -d' ' -f6)
|
|
19
|
+
GET_DATA_CMD = wget -nc -O ${PBF} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf; rm -f ${NAME}.*.bz2; ( time osm2rdf ${PBF} -o ${NAME}.ttl --cache . ) 2>&1 | tee ${NAME}.osm2rdf-log.txt; rm -f spatial-*
|
|
20
|
+
DESCRIPTION = OSM ${COUNTRY}, dump from ${VERSION} with ogc:sfContains
|
|
21
|
+
|
|
22
|
+
# Indexer settings
|
|
23
|
+
[index]
|
|
24
|
+
INPUT_FILES = ${data:NAME}.ttl.bz2
|
|
25
|
+
CAT_INPUT_FILES = bzcat ${data:NAME}.ttl.bz2
|
|
26
|
+
STXXL_MEMORY = 10G
|
|
27
|
+
SETTINGS_JSON = { "prefixes-external": [ "\"LINESTRING(", "\"MULTIPOLYGON(", "\"POLYGON(" ], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
23
28
|
|
|
24
29
|
# Server settings
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
30
|
+
[server]
|
|
31
|
+
HOSTNAME = localhost
|
|
32
|
+
PORT = 7025
|
|
33
|
+
ACCESS_TOKEN = ${data:NAME}_%RANDOM%
|
|
28
34
|
MEMORY_FOR_QUERIES = 20G
|
|
29
35
|
CACHE_MAX_SIZE = 10G
|
|
30
36
|
CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
|
|
31
37
|
CACHE_MAX_NUM_ENTRIES = 100
|
|
38
|
+
TIMEOUT = 100s
|
|
32
39
|
|
|
33
|
-
#
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
QLEVER_DOCKER_CONTAINER = qlever.${DB}
|
|
40
|
+
# Runtime to use
|
|
41
|
+
[runtime]
|
|
42
|
+
SYSTEM = docker
|
|
43
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
38
44
|
|
|
39
|
-
#
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
45
|
+
# Qlever UI
|
|
46
|
+
[ui]
|
|
47
|
+
UI_PORT = 7000
|
|
48
|
+
UI_CONFIG = osm
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Qleverfile for Wikimedia Commons, TODO: add to https://github.com/ad-freiburg/qlever-control
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # takes ~3 hours to download .bz2 file of size ~40 GB
|
|
4
|
+
# qlever index # takes ~2 hours and ~40 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes around 15 seconds)
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = wikimedia-commons
|
|
9
|
+
MAIN_RDF_FILE = latest-mediainfo.ttl.gz
|
|
10
|
+
DATA_URL_BASE = https://dumps.wikimedia.org/other/wikibase/commonswiki
|
|
11
|
+
GET_TTL_CMD = wget -nc ${DATA_URL_BASE}/${MAIN_RDF_FILE}
|
|
12
|
+
GET_PROPS_CMD = curl -s https://qlever.cs.uni-freiburg.de/api/wikidata -H "Accept: text/turtle" -H "Content-type: application/sparql-query" --data "PREFIX wikibase: <http://wikiba.se/ontology#> CONSTRUCT { ?s ?p ?o } WHERE { VALUES ?p { wikibase:claim wikibase:directClaim wikibase:novalue wikibase:propertyType wikibase:qualifier wikibase:qualifierValue wikibase:reference wikibase:referenceValue wikibase:statementProperty wikibase:statementValue } ?s ?p ?o }" > properties.nt
|
|
13
|
+
GET_LABELS_CMD = curl -s https://qlever.cs.uni-freiburg.de/api/wikidata -H "Accept: text/turtle" -H "Content-type: application/sparql-query" --data "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> CONSTRUCT { ?subject rdfs:label ?label } WHERE { ?subject @en@rdfs:label ?label }" > labels.nt
|
|
14
|
+
GET_DATA_CMD = ${GET_TTL_CMD} && ${GET_PROPS_CMD} && ${GET_LABELS_CMD}
|
|
15
|
+
INDEX_DESCRIPTION = Wikimedia Commons from ${DATA_URL_BASE}, version 09.11.2023 + Wikidata triples for rdfs:label and wikibase:claim etc.
|
|
16
|
+
|
|
17
|
+
[index]
|
|
18
|
+
INPUT_FILES = ${data:MAIN_RDF_FILE} labels.nt properties.nt
|
|
19
|
+
CAT_INPUT_FILES = zcat -f ${INPUT_FILES}
|
|
20
|
+
WITH_TEXT_INDEX = from_literals
|
|
21
|
+
STXXL_MEMORY_GB = 5
|
|
22
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
|
|
23
|
+
|
|
24
|
+
[server]
|
|
25
|
+
PORT = 7033
|
|
26
|
+
ACCESS_TOKEN = ${data:NAME}_2511328747
|
|
27
|
+
MEMORY_FOR_QUERIES_GB = 20
|
|
28
|
+
CACHE_MAX_SIZE_GB = 10
|
|
29
|
+
CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 5
|
|
30
|
+
|
|
31
|
+
[runtime]
|
|
32
|
+
SYSTEM = native
|
|
33
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
34
|
+
|
|
35
|
+
[ui]
|
|
36
|
+
PORT = 7000
|
|
37
|
+
CONFIG = wikimedia-commons
|