qlever 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.dblp +14 -10
- qlever/Qleverfiles/Qleverfile.dblp-plus +2 -2
- qlever/Qleverfiles/Qleverfile.default +1 -1
- qlever/Qleverfiles/Qleverfile.fbeasy +4 -4
- qlever/Qleverfiles/Qleverfile.freebase +2 -2
- qlever/Qleverfiles/Qleverfile.imdb +1 -1
- qlever/Qleverfiles/Qleverfile.orkg +30 -0
- qlever/Qleverfiles/Qleverfile.osm-planet +1 -1
- qlever/Qleverfiles/Qleverfile.vvz +3 -3
- qlever/Qleverfiles/Qleverfile.wikidata +29 -17
- qlever/Qleverfiles/Qleverfile.yago-4 +4 -4
- qlever/commands/example_queries.py +250 -150
- qlever/commands/index.py +96 -8
- qlever/commands/setup_config.py +47 -31
- qlever/commands/system_info.py +126 -0
- qlever/commands/ui.py +50 -23
- qlever/containerize.py +67 -33
- qlever/qleverfile.py +10 -3
- qlever/util.py +55 -30
- {qlever-0.5.7.dist-info → qlever-0.5.9.dist-info}/METADATA +1 -1
- {qlever-0.5.7.dist-info → qlever-0.5.9.dist-info}/RECORD +25 -25
- {qlever-0.5.7.dist-info → qlever-0.5.9.dist-info}/WHEEL +1 -1
- qlever/Qleverfiles/Qleverfile.wikimedia-commons +0 -37
- qlever/__main__.py +0 -1476
- {qlever-0.5.7.dist-info → qlever-0.5.9.dist-info}/LICENSE +0 -0
- {qlever-0.5.7.dist-info → qlever-0.5.9.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.7.dist-info → qlever-0.5.9.dist-info}/top_level.txt +0 -0
|
@@ -1,20 +1,24 @@
|
|
|
1
|
-
# Qleverfile for DBLP, use with
|
|
1
|
+
# Qleverfile for DBLP, use with QLever CLI (`pip install qlever`)
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data #
|
|
4
|
-
# qlever index #
|
|
5
|
-
# qlever start #
|
|
3
|
+
# qlever get-data # ~1 min, ~5 GB compressed, 1.3 B triples
|
|
4
|
+
# qlever index # ~30 min, ~20 GB RAM, ~25 GB index size on disk
|
|
5
|
+
# qlever start # ~3 s, adjust MEMORY_FOR_QUERIES as needed
|
|
6
|
+
#
|
|
7
|
+
# Measured on an AMD Ryzen 9 5950X with 128 GB RAM, and NVMe SSD (25.10.2024)
|
|
6
8
|
|
|
7
9
|
[data]
|
|
8
10
|
NAME = dblp
|
|
9
|
-
|
|
10
|
-
|
|
11
|
+
DATA_TARFILE = dblp_KG_with_associated_data.tar
|
|
12
|
+
GET_DATA_URL = https://sparql.dblp.org/download/${DATA_TARFILE}
|
|
13
|
+
GET_DATA_CMD = (curl -LROC - ${GET_DATA_URL} && tar -xf ${DATA_TARFILE}) 2>&1 | tee ${NAME}.download-log.txt && rm -f ${DATA_TARFILE}
|
|
11
14
|
VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
|
|
12
|
-
DESCRIPTION = DBLP computer science bibliography, data from ${GET_DATA_URL} (version ${VERSION})
|
|
15
|
+
DESCRIPTION = DBLP computer science bibliography + citations from OpenCitations, data from ${GET_DATA_URL} (version ${VERSION})
|
|
16
|
+
FORMAT = ttl
|
|
13
17
|
|
|
14
18
|
[index]
|
|
15
|
-
INPUT_FILES
|
|
16
|
-
|
|
17
|
-
SETTINGS_JSON
|
|
19
|
+
INPUT_FILES = *.gz
|
|
20
|
+
MULTI_INPUT_JSON = $$(ls *.gz | awk 'BEGIN { printf "[ " } NR > 1 { printf ", " } { printf "{\"cmd\": \"zcat " $$0 "\"}" } END { printf "]" }')
|
|
21
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 5000000, "prefixes-external": [""] }
|
|
18
22
|
|
|
19
23
|
[server]
|
|
20
24
|
PORT = 7015
|
|
@@ -9,12 +9,12 @@
|
|
|
9
9
|
[data]
|
|
10
10
|
NAME = dblp-plus
|
|
11
11
|
GET_DATA_CMD = wget -nc -O dblp.ttl.gz https://dblp.org/rdf/dblp.ttl.gz
|
|
12
|
-
|
|
12
|
+
DESCRIPTION = Publication data from https://dblp.org, with affiliations from https://www.wikidata.org and citations from https://opencitations.net
|
|
13
13
|
TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
|
|
14
14
|
|
|
15
15
|
[index]
|
|
16
16
|
INPUT_FILES = dblp.ttl.gz affiliations.nt affiliations.additions.nt citations.nt
|
|
17
|
-
CAT_INPUT_FILES = zcat -f ${
|
|
17
|
+
CAT_INPUT_FILES = zcat -f ${INPUT_FILES}
|
|
18
18
|
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [ "<https://w3id.org", "<https://doi.org", "<http://dx.doi.org" ] }
|
|
19
19
|
TEXT_INDEX = from_literals
|
|
20
20
|
|
|
@@ -31,7 +31,7 @@ SETTINGS_JSON = { "num-triples-per-batch": 1000000 }
|
|
|
31
31
|
# URL parameter `access_token`. It should not be easily guessable, unless you
|
|
32
32
|
# don't mind others to get privileged access to your server.
|
|
33
33
|
[server]
|
|
34
|
-
PORT =
|
|
34
|
+
PORT = 8888
|
|
35
35
|
ACCESS_TOKEN =
|
|
36
36
|
|
|
37
37
|
# Use SYSTEM = docker to run QLever inside a docker container; the Docker image
|
|
@@ -13,13 +13,13 @@ TEXT_DESCRIPTION = Sentences from Wikipedia that mention at least one Freebase
|
|
|
13
13
|
|
|
14
14
|
[index]
|
|
15
15
|
INPUT_FILES = fbeasy.nt
|
|
16
|
-
CAT_INPUT_FILES = cat ${
|
|
16
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
17
17
|
SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 10000000 }
|
|
18
18
|
|
|
19
19
|
[server]
|
|
20
|
-
PORT
|
|
21
|
-
ACCESS_TOKEN
|
|
22
|
-
MEMORY_FOR_QUERIES
|
|
20
|
+
PORT = 7003
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
|
+
MEMORY_FOR_QUERIES = 5G
|
|
23
23
|
|
|
24
24
|
[runtime]
|
|
25
25
|
SYSTEM = docker
|
|
@@ -12,12 +12,12 @@ DESCRIPTION = RDF data from ${DATA_URL}, latest (and final) version from 09.08.
|
|
|
12
12
|
|
|
13
13
|
[index]
|
|
14
14
|
INPUT_FILES = freebase-rdf-latest.gz
|
|
15
|
-
CAT_INPUT_FILES = zcat ${
|
|
15
|
+
CAT_INPUT_FILES = zcat ${INPUT_FILES}
|
|
16
16
|
SETTINGS_JSON = { "languages-internal": [ "en" ], "prefixes-external": ["<"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 10000000 }
|
|
17
17
|
|
|
18
18
|
[server]
|
|
19
19
|
PORT = 7002
|
|
20
|
-
ACCESS_TOKEN = ${data:NAME}
|
|
20
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
21
21
|
MEMORY_FOR_QUERIES = 10G
|
|
22
22
|
|
|
23
23
|
[runtime]
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Qleverfile for ORKG, use with the QLever CLI (`pip install qlever`)
|
|
2
|
+
#
|
|
3
|
+
# qlever get-data # Get the dataset
|
|
4
|
+
# qlever index # Build index data structures
|
|
5
|
+
# qlever start # Start the server
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = orkg
|
|
9
|
+
GET_DATA_URL = https://orkg.org/api/rdf/dump
|
|
10
|
+
GET_DATA_CMD = curl -LR -o ${NAME}.ttl ${GET_DATA_URL} 2>&1 | tee ${NAME}.download-log.txt
|
|
11
|
+
VERSION = $$(date -r ${NAME}.ttl +%d.%m.%Y || echo "NO_DATE")
|
|
12
|
+
DESCRIPTION = The Open Research Knowledge Graph (ORKG) (data from ${GET_DATA_URL}, version ${VERSION})
|
|
13
|
+
|
|
14
|
+
[index]
|
|
15
|
+
INPUT_FILES = ${data:NAME}.ttl
|
|
16
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
17
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
|
|
18
|
+
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7053
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
|
+
MEMORY_FOR_QUERIES = 10G
|
|
23
|
+
CACHE_MAX_SIZE = 5G
|
|
24
|
+
|
|
25
|
+
[runtime]
|
|
26
|
+
SYSTEM = docker
|
|
27
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
28
|
+
|
|
29
|
+
[ui]
|
|
30
|
+
UI_CONFIG = orkg
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
NAME = osm-planet
|
|
12
12
|
DATA_URL = https://osm2rdf.cs.uni-freiburg.de/ttl/planet.osm.ttl.bz2
|
|
13
13
|
GET_DATA_CMD = curl --location --fail --continue-at - --remote-time --output ${NAME}.ttl.bz2 ${DATA_URL}
|
|
14
|
-
VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y")
|
|
14
|
+
VERSION = $$(date -r ${NAME}.ttl.bz2 +"%d.%m.%Y" || echo "NO_DATE")
|
|
15
15
|
DESCRIPTION = OSM Planet, data from ${DATA_URL} version ${VERSION} (complete OSM data, with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
|
|
16
16
|
|
|
17
17
|
[index]
|
|
@@ -14,13 +14,13 @@ TEXT_DESCRIPTION = All literals, search with FILTER KEYWORDS(?text, "...")
|
|
|
14
14
|
|
|
15
15
|
[index]
|
|
16
16
|
INPUT_FILES = vvz.ttl
|
|
17
|
-
CAT_INPUT_FILES = cat ${
|
|
18
|
-
SETTINGS_JSON = { "ascii-prefixes-only":
|
|
17
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
18
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
19
19
|
TEXT_INDEX = from_literals
|
|
20
20
|
|
|
21
21
|
[server]
|
|
22
22
|
PORT = 7041
|
|
23
|
-
ACCESS_TOKEN = ${data:NAME}
|
|
23
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
24
24
|
MEMORY_FOR_QUERIES = 10G
|
|
25
25
|
|
|
26
26
|
[runtime]
|
|
@@ -1,33 +1,45 @@
|
|
|
1
|
-
# Qleverfile for Wikidata, use with
|
|
1
|
+
# Qleverfile for Wikidata, use with the QLever CLI (`pip install qlever`)
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data #
|
|
4
|
-
# qlever index #
|
|
5
|
-
# qlever start #
|
|
3
|
+
# qlever get-data # ~7 hours, ~110 GB (compressed), ~20 billion triples
|
|
4
|
+
# qlever index # ~5 hours, ~20 GB RAM, ~500 GB index size on disk
|
|
5
|
+
# qlever start # a few seconds, adjust MEMORY_FOR_QUERIES as needed
|
|
6
|
+
#
|
|
7
|
+
# Adding a text index takes an additional ~2 hours and ~50 GB of disk space
|
|
8
|
+
#
|
|
9
|
+
# Measured on an AMD Ryzen 9 5950X with 128 GB RAM, and NVMe SSD (18.10.2024)
|
|
6
10
|
|
|
7
11
|
[DEFAULT]
|
|
8
12
|
NAME = wikidata
|
|
9
13
|
|
|
10
14
|
[data]
|
|
11
|
-
GET_DATA_URL
|
|
12
|
-
GET_DATA_CMD
|
|
13
|
-
|
|
14
|
-
|
|
15
|
+
GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
|
|
16
|
+
GET_DATA_CMD = curl -LROC - ${GET_DATA_URL}/latest-all.ttl.bz2 ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1 | tee wikidata.download-log.txt && curl -sL ${GET_DATA_URL}/dcatap.rdf | docker run -i --rm -v $$(pwd):/data stain/jena riot --syntax=RDF/XML --output=NT /dev/stdin > dcatap.nt
|
|
17
|
+
DATE_WIKIDATA = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
|
|
18
|
+
DATE_WIKIPEDIA = $$(date -r wikipedia-abstracts.nt +%d.%m.%Y || echo "NO_DATE")
|
|
19
|
+
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${DATE_WIKIDATA}) + English Wikipeda abstracts (version ${DATE_WIKIPEDIA}, available via schema:description)
|
|
20
|
+
TEXT_DESCRIPTION = All English and German literals + all sentences from the English Wikipedia (version ${DATE_WIKIPEDIA}), use with FILTER KEYWORDS(...)
|
|
15
21
|
|
|
16
22
|
[index]
|
|
17
|
-
INPUT_FILES
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
23
|
+
INPUT_FILES = latest-all.ttl.bz2 latest-lexemes.ttl.bz2 wikipedia-abstracts.nt dcatap.nt
|
|
24
|
+
MULTI_INPUT_JSON = [{ "cmd": "lbzcat -n 4 latest-all.ttl.bz2", "format": "ttl", "parallel": "true" },
|
|
25
|
+
{ "cmd": "lbzcat -n 1 latest-lexemes.ttl.bz2", "format": "ttl", "parallel": "false" },
|
|
26
|
+
{ "cmd": "cat wikipedia-abstracts.nt", "format": "nt", "parallel": "false" },
|
|
27
|
+
{ "cmd": "cat dcatap.nt", "format": "nt", "parallel": "false" }]
|
|
28
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
|
|
29
|
+
STXXL_MEMORY = 10G
|
|
30
|
+
TEXT_INDEX = from_text_records
|
|
21
31
|
|
|
22
32
|
[server]
|
|
23
|
-
PORT
|
|
24
|
-
ACCESS_TOKEN
|
|
25
|
-
MEMORY_FOR_QUERIES
|
|
26
|
-
CACHE_MAX_SIZE
|
|
33
|
+
PORT = 7001
|
|
34
|
+
ACCESS_TOKEN = ${data:NAME}_3fz47hfzrbf64b
|
|
35
|
+
MEMORY_FOR_QUERIES = 40G
|
|
36
|
+
CACHE_MAX_SIZE = 30G
|
|
37
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
|
|
38
|
+
TIMEOUT = 300s
|
|
27
39
|
|
|
28
40
|
[runtime]
|
|
29
41
|
SYSTEM = docker
|
|
30
|
-
IMAGE =
|
|
42
|
+
IMAGE = adfreiburg/qlever
|
|
31
43
|
|
|
32
44
|
[ui]
|
|
33
45
|
UI_CONFIG = wikidata
|
|
@@ -16,14 +16,14 @@ DESCRIPTION = "Full dump from https://yago-knowledge.org/downloads/yago-4, vers
|
|
|
16
16
|
|
|
17
17
|
[index]
|
|
18
18
|
INPUT_FILES = yago-wd-*.nt.gz
|
|
19
|
-
CAT_INPUT_FILES = zcat ${
|
|
19
|
+
CAT_INPUT_FILES = zcat ${INPUT_FILES}
|
|
20
20
|
SETTINGS_JSON = { "languages-internal": ["en"], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
|
|
21
21
|
STXXL_MEMORY = 10G
|
|
22
22
|
|
|
23
23
|
[server]
|
|
24
|
-
PORT
|
|
25
|
-
ACCESS_TOKEN
|
|
26
|
-
MEMORY_FOR_QUERIES
|
|
24
|
+
PORT = 9004
|
|
25
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
26
|
+
MEMORY_FOR_QUERIES = 30G
|
|
27
27
|
|
|
28
28
|
[runtime]
|
|
29
29
|
SYSTEM = docker
|