qlever 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

@@ -1,37 +1,37 @@
1
1
  # Qleverfile for OHM Planet, use with https://github.com/ad-freiburg/qlever-control
2
2
  #
3
3
  # qlever get-data # ~20 mins (download PBF, convert to TTL, add GeoSPARQL triples)
4
- # qlever index # ~10 mins and ~5 GB RAM (on an AMD Ryzen 9 5900X)
4
+ # qlever index # ~20 mins and ~5 GB RAM (on an AMD Ryzen 9 5900X)
5
5
  # qlever start # ~1 sec
6
6
  #
7
- # For `qlever get-data` to work, `osm2rdf` and `spatialjoin` must be installed
8
- # and included in the `PATH`.
7
+ # For `qlever get-data` to work, `osm2rdf` must be installed and in the `PATH`.
9
8
 
10
9
  [data]
11
10
  NAME = ohm-planet
12
11
  GET_DATA_URL = https://planet.openhistoricalmap.org/planet
12
+ CHECK_BINARIES = osm2rdf -h > /dev/null || (echo "osm2rdf not found, make sure that it's installed and in your PATH" && exit 1)
13
13
  GET_DATA_CMD_1 = curl -LRfC - -o ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
14
- GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --simplify-wkt 0 --write-ogc-geo-triples none 2>&1 | tee ${NAME}.osm2rdf-log.txt
15
- GET_DATA_CMD_3 = bzcat ${NAME}.ttl.bz2 | \grep "^osm2rdf" | sed -En 's/^osm2rdf(geom)?:(ohm_)?(node|rel|way)[a-z]*_([0-9]+) geo:asWKT "([^\"]+)".*/ohm\3:\4\t\5/p' | tee ${NAME}.spatialjoin-input.tsv | spatialjoin --contains " ogc:sfContains " --covers " ogc:sfCovers " --intersects " ogc:sfIntersects " --equals " ogc:sfEquals " --touches " ogc:sfTouches " --crosses " ogc:sfCrosses " --overlaps " ogc:sfOverlaps " --suffix $$' .\n' -o ${NAME}.spatialjoin-triples.ttl.bz2 2>&1 | tee ${NAME}.spatialjoin-log.txt && rm -f areas events lines points simplelines
16
- GET_DATA_CMD = set -o pipefail && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2} && echo && ${GET_DATA_CMD_3} && head -100 <(bzcat ${NAME}.ttl.bz2) | sed '/^@prefix/!d' > ${NAME}.prefix-definitions
14
+ GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --cache . --add-hascentroid 2>&1 | tee ${NAME}.osm2rdf-log.txt
15
+ GET_DATA_CMD = set -o pipefail && ${CHECK_BINARIES} && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2}
17
16
  VERSION = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE")
18
17
  DESCRIPTION = OHM Planet, data from ${GET_DATA_URL} version ${VERSION} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
19
18
 
20
19
  [index]
21
- INPUT_FILES = ${data:NAME}.prefix-definitions ${data:NAME}.spatialjoin-triples.ttl.bz2 ${data:NAME}.ttl.bz2
20
+ INPUT_FILES = ${data:NAME}.ttl.bz2
22
21
  CAT_INPUT_FILES = bzcat -f ${INPUT_FILES}
23
22
  SETTINGS_JSON = { "prefixes-external": [""], "ascii-prefixes-only": false, "parallel-parsing": true, "num-triples-per-batch": 5000000 }
24
23
 
25
24
  [server]
26
- PORT = 7037
27
- ACCESS_TOKEN = ${data:NAME}
28
- MEMORY_FOR_QUERIES = 10G
29
- CACHE_MAX_SIZE = 5G
30
- WARMUP_CMD = curl -s http://localhost:${PORT} -H "Accept: application/qlever-results+json" --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> SELECT ?subject ?geometry WHERE { ?subject geo:hasGeometry ?m . ?m geo:asWKT ?geometry } INTERNAL SORT BY ?subject" --data-urlencode "access-token=${server:ACCESS_TOKEN}" --data-urlencode "pinresult=true" --data-urlencode "send=0" | jq .resultsize | xargs printf "Result size: %'d\n"
31
-
25
+ PORT = 7037
26
+ ACCESS_TOKEN = ${data:NAME}
27
+ MEMORY_FOR_QUERIES = 10G
28
+ CACHE_MAX_SIZE = 5G
29
+ CACHE_MAX_SIZE_SINGLE_ENTRY = 4G
30
+ WARMUP_CMD = curl -s https://qlever.cs.uni-freiburg.de/mapui-petri/query --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX osm: <https://www.openstreetmap.org/> SELECT ?osm_id ?geometry WHERE { ?osm_id geo:hasGeometry/geo:asWKT ?geometry . ?osm_id rdf:type osm:node } LIMIT 1" --data-urlencode "backend=https://qlever.cs.uni-freiburg.de/api/${data:NAME}" > /dev/null
32
31
 
33
32
  [runtime]
34
- SYSTEM = native
33
+ SYSTEM = docker
34
+ IMAGE = adfreiburg/qlever:latest
35
35
 
36
36
  [ui]
37
37
  CONFIG = ohm-planet
@@ -8,35 +8,41 @@
8
8
  # the link under GET_DATA_CMD exists (the names are usually the canonical
9
9
  # names). The time for osm2rdf is around the same as that for "qlever index".
10
10
 
11
- # Indexer settings
11
+ # Dataset settings
12
+ [data]
12
13
  CONTINENT = europe
13
14
  COUNTRY = switzerland
14
- DB = osm-${COUNTRY}
15
- PBF = ${DB}.pbf
16
- RDF_FILES = "${DB}.ttl.bz2"
17
- CAT_FILES = "bzcat ${RDF_FILES}"
15
+ NAME = osm-${COUNTRY}
16
+ PBF = ${NAME}.pbf
18
17
  WITH_TEXT = false
19
- STXXL_MEMORY = 10
20
- SETTINGS_JSON = '{ "prefixes-external": [ "\"LINESTRING(", "\"MULTIPOLYGON(", "\"POLYGON(" ], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }'
21
- GET_DATA_CMD = "wget -nc -O ${PBF} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf; rm -f ${DB}.*.bz2; ( time /local/data/osm2rdf/build/apps/osm2rdf ${PBF} -o ${DB}.ttl --cache . --write-geometric-relation-statistics ) 2>&1 | tee ${DB}.osm2rdf-log.txt; rm -f spatial-*"
22
- DESCRIPTION = "OSM ${COUNTRY^}, dump from $(ls -l --time-style=+%d.%m.%Y ${PBF} 2> /dev/null | cut -d' ' -f6) with ogc:contains"
18
+ VERSION = $$(ls -l --time-style=+%d.%m.%Y ${PBF} 2> /dev/null | cut -d' ' -f6)
19
+ GET_DATA_CMD = wget -nc -O ${PBF} https://download.geofabrik.de/${CONTINENT}/${COUNTRY}-latest.osm.pbf; rm -f ${NAME}.*.bz2; ( time osm2rdf ${PBF} -o ${NAME}.ttl --cache . ) 2>&1 | tee ${NAME}.osm2rdf-log.txt; rm -f spatial-*
20
+ DESCRIPTION = OSM ${COUNTRY}, dump from ${VERSION} with ogc:sfContains
21
+
22
+ # Indexer settings
23
+ [index]
24
+ INPUT_FILES = ${data:NAME}.ttl.bz2
25
+ CAT_INPUT_FILES = bzcat ${data:NAME}.ttl.bz2
26
+ STXXL_MEMORY = 10G
27
+ SETTINGS_JSON = { "prefixes-external": [ "\"LINESTRING(", "\"MULTIPOLYGON(", "\"POLYGON(" ], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
23
28
 
24
29
  # Server settings
25
- HOSTNAME = $(hostname -f)
26
- SERVER_PORT = 7025
27
- ACCESS_TOKEN = ${DB}_%RANDOM%
30
+ [server]
31
+ HOSTNAME = localhost
32
+ PORT = 7025
33
+ ACCESS_TOKEN = ${data:NAME}_%RANDOM%
28
34
  MEMORY_FOR_QUERIES = 20G
29
35
  CACHE_MAX_SIZE = 10G
30
36
  CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
31
37
  CACHE_MAX_NUM_ENTRIES = 100
38
+ TIMEOUT = 100s
32
39
 
33
- # QLever binaries
34
- QLEVER_BIN_DIR = %QLEVER_BIN_DIR%
35
- USE_DOCKER = true
36
- QLEVER_DOCKER_IMAGE = adfreiburg/qlever
37
- QLEVER_DOCKER_CONTAINER = qlever.${DB}
40
+ # Runtime to use
41
+ [runtime]
42
+ SYSTEM = docker
43
+ IMAGE = docker.io/adfreiburg/qlever:latest
38
44
 
39
- # QLever UI
40
- QLEVERUI_PORT = 7000
41
- QLEVERUI_DIR = qlever-ui
42
- QLEVERUI_CONFIG = osm
45
+ # Qlever UI
46
+ [ui]
47
+ UI_PORT = 7000
48
+ UI_CONFIG = osm
@@ -0,0 +1,37 @@
1
+ # Qleverfile for Wikimedia Commons, TODO: add to https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # qlever get-data # takes ~3 hours to download .bz2 file of size ~40 GB
4
+ # qlever index # takes ~2 hours and ~40 GB RAM (on an AMD Ryzen 9 5900X)
5
+ # qlever start # starts the server (takes around 15 seconds)
6
+
7
+ [data]
8
+ NAME = wikimedia-commons
9
+ MAIN_RDF_FILE = latest-mediainfo.ttl.gz
10
+ DATA_URL_BASE = https://dumps.wikimedia.org/other/wikibase/commonswiki
11
+ GET_TTL_CMD = wget -nc ${DATA_URL_BASE}/${MAIN_RDF_FILE}
12
+ GET_PROPS_CMD = curl -s https://qlever.cs.uni-freiburg.de/api/wikidata -H "Accept: text/turtle" -H "Content-type: application/sparql-query" --data "PREFIX wikibase: <http://wikiba.se/ontology#> CONSTRUCT { ?s ?p ?o } WHERE { VALUES ?p { wikibase:claim wikibase:directClaim wikibase:novalue wikibase:propertyType wikibase:qualifier wikibase:qualifierValue wikibase:reference wikibase:referenceValue wikibase:statementProperty wikibase:statementValue } ?s ?p ?o }" > properties.nt
13
+ GET_LABELS_CMD = curl -s https://qlever.cs.uni-freiburg.de/api/wikidata -H "Accept: text/turtle" -H "Content-type: application/sparql-query" --data "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> CONSTRUCT { ?subject rdfs:label ?label } WHERE { ?subject @en@rdfs:label ?label }" > labels.nt
14
+ GET_DATA_CMD = ${GET_TTL_CMD} && ${GET_PROPS_CMD} && ${GET_LABELS_CMD}
15
+ INDEX_DESCRIPTION = Wikimedia Commons from ${DATA_URL_BASE}, version 09.11.2023 + Wikidata triples for rdfs:label and wikibase:claim etc.
16
+
17
+ [index]
18
+ INPUT_FILES = ${data:MAIN_RDF_FILE} labels.nt properties.nt
19
+ CAT_INPUT_FILES = zcat -f ${INPUT_FILES}
20
+ WITH_TEXT_INDEX = from_literals
21
+ STXXL_MEMORY_GB = 5
22
+ SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
23
+
24
+ [server]
25
+ PORT = 7033
26
+ ACCESS_TOKEN = ${data:NAME}_2511328747
27
+ MEMORY_FOR_QUERIES_GB = 20
28
+ CACHE_MAX_SIZE_GB = 10
29
+ CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 5
30
+
31
+ [runtime]
32
+ SYSTEM = native
33
+ IMAGE = docker.io/adfreiburg/qlever:latest
34
+
35
+ [ui]
36
+ PORT = 7000
37
+ CONFIG = wikimedia-commons