qlever 0.5.0__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of qlever might be problematic. Click here for more details.
- qlever/Qleverfiles/Qleverfile.dblp +11 -15
- qlever/Qleverfiles/Qleverfile.dnb +14 -11
- qlever/Qleverfiles/Qleverfile.ohm-planet +6 -6
- qlever/Qleverfiles/Qleverfile.pubchem +1 -1
- qlever/Qleverfiles/Qleverfile.scientists +24 -34
- qlever/Qleverfiles/Qleverfile.uniprot +16 -15
- qlever/Qleverfiles/Qleverfile.wikidata +14 -11
- qlever/commands/query.py +73 -0
- qlever/log.py +2 -1
- {qlever-0.5.0.dist-info → qlever-0.5.3.dist-info}/METADATA +1 -1
- {qlever-0.5.0.dist-info → qlever-0.5.3.dist-info}/RECORD +15 -14
- {qlever-0.5.0.dist-info → qlever-0.5.3.dist-info}/WHEEL +1 -1
- {qlever-0.5.0.dist-info → qlever-0.5.3.dist-info}/LICENSE +0 -0
- {qlever-0.5.0.dist-info → qlever-0.5.3.dist-info}/entry_points.txt +0 -0
- {qlever-0.5.0.dist-info → qlever-0.5.3.dist-info}/top_level.txt +0 -0
|
@@ -1,29 +1,25 @@
|
|
|
1
1
|
# Qleverfile for DBLP, use with https://github.com/ad-freiburg/qlever-control
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data # downloads .gz file of size ~3 GB
|
|
4
|
-
# qlever index # takes ~
|
|
5
|
-
# qlever start #
|
|
6
|
-
#
|
|
7
|
-
# Also builds a text index for fast kewyword search in literals. Without that
|
|
8
|
-
# (WITH_TEXT_INDEX = false), the index build takes only ~10 minutes.
|
|
3
|
+
# qlever get-data # takes ~3 mins (downloads .ttl.gz file of size ~3 GB)
|
|
4
|
+
# qlever index # takes ~3 mins (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # takes a few seconds
|
|
9
6
|
|
|
10
7
|
[data]
|
|
11
|
-
NAME
|
|
12
|
-
GET_DATA_URL
|
|
13
|
-
GET_DATA_CMD
|
|
14
|
-
|
|
15
|
-
|
|
8
|
+
NAME = dblp
|
|
9
|
+
GET_DATA_URL = https://dblp.org/rdf/dblp.ttl.gz
|
|
10
|
+
GET_DATA_CMD = curl -LRC - -O ${GET_DATA_URL} 2>&1 | tee ${data:NAME}.download-log.txt
|
|
11
|
+
VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
|
|
12
|
+
DESCRIPTION = DBLP computer science bibliography, data from ${GET_DATA_URL} (version ${VERSION})
|
|
16
13
|
|
|
17
14
|
[index]
|
|
18
15
|
INPUT_FILES = dblp.ttl.gz
|
|
19
16
|
CAT_INPUT_FILES = zcat ${INPUT_FILES}
|
|
20
|
-
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
21
|
-
TEXT_INDEX = from_literals
|
|
17
|
+
SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 1000000, "prefixes-external": [""] }
|
|
22
18
|
|
|
23
19
|
[server]
|
|
24
20
|
PORT = 7015
|
|
25
|
-
ACCESS_TOKEN = ${data:NAME}
|
|
26
|
-
MEMORY_FOR_QUERIES =
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
|
+
MEMORY_FOR_QUERIES = 10G
|
|
27
23
|
CACHE_MAX_SIZE = 5G
|
|
28
24
|
|
|
29
25
|
[runtime]
|
|
@@ -1,8 +1,12 @@
|
|
|
1
1
|
# Qleverfile for Olympics, use with https://github.com/ad-freiburg/qlever-control
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data # takes ~ 10
|
|
4
|
-
# qlever index # takes ~
|
|
5
|
-
# qlever start # starts the server
|
|
3
|
+
# qlever get-data # takes ~ 10 mins to download .nt.gz file of size ~ 8 GB
|
|
4
|
+
# qlever index # takes ~ 5 min and ~ 5 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (instantaneous)
|
|
6
|
+
#
|
|
7
|
+
# IMPORTANT: The current files contain invalid floating point literals. To make
|
|
8
|
+
# QLever ignore them, compile QLever with `invalidLiteralsAreSkipped_ = true`
|
|
9
|
+
# in `src/parser/TurtleParserBase.h:55`.
|
|
6
10
|
#
|
|
7
11
|
# NOTE: https://data.dnb.de/opendata/ is rather confusing becase of the many
|
|
8
12
|
# files. This Qleverfile downloads all the datasets named "Gesamtabzug", except
|
|
@@ -13,24 +17,23 @@
|
|
|
13
17
|
[data]
|
|
14
18
|
NAME = dnb
|
|
15
19
|
BASE_URL = https://data.dnb.de/opendata
|
|
16
|
-
GET_DATA_CMD = curl -L -C - --remote-name-all ${BASE_URL}/authorities-gnd_lds.nt.gz ${BASE_URL}/dnb-all_lds.nt.gz ${BASE_URL}/dnb-all_ldsprov.nt.gz ${BASE_URL}/zdb_lds.nt.gz
|
|
17
|
-
|
|
18
|
-
|
|
20
|
+
GET_DATA_CMD = curl -L -C - --remote-name-all ${BASE_URL}/authorities-gnd_lds.nt.gz ${BASE_URL}/dnb-all_lds.nt.gz ${BASE_URL}/dnb-all_ldsprov.nt.gz ${BASE_URL}/zdb_lds.nt.gz 2>&1 | tee ${data:NAME}.getdata-log.txt
|
|
21
|
+
VERSION = $$(date -r dnb-all_lds.nt.gz +%d.%m.%Y || echo "NO_DATE")
|
|
22
|
+
DESCRIPTION = DNB data from ${BASE_URL} (authoritities-gnd_lds, dnb_all_lds, dnb-all_ldsprov, zdb_lds), version ${VERSION}
|
|
19
23
|
|
|
20
24
|
[index]
|
|
21
25
|
INPUT_FILES = *.nt.gz
|
|
22
|
-
CAT_INPUT_FILES = zcat ${
|
|
26
|
+
CAT_INPUT_FILES = zcat ${INPUT_FILES}
|
|
23
27
|
SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 1000000 }
|
|
24
|
-
TEXT_INDEX = from_literals
|
|
25
28
|
|
|
26
29
|
[server]
|
|
27
30
|
PORT = 7035
|
|
28
|
-
ACCESS_TOKEN = ${data:NAME}
|
|
29
|
-
MEMORY_FOR_QUERIES =
|
|
31
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
32
|
+
MEMORY_FOR_QUERIES = 5G
|
|
30
33
|
CACHE_MAX_SIZE = 2G
|
|
31
34
|
|
|
32
35
|
[runtime]
|
|
33
|
-
SYSTEM =
|
|
36
|
+
SYSTEM = docker
|
|
34
37
|
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
35
38
|
|
|
36
39
|
[ui]
|
|
@@ -2,18 +2,18 @@
|
|
|
2
2
|
#
|
|
3
3
|
# qlever get-data # ~20 mins (download PBF, convert to TTL, add GeoSPARQL triples)
|
|
4
4
|
# qlever index # ~10 mins and ~5 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
-
# qlever start #
|
|
5
|
+
# qlever start # ~1 sec
|
|
6
6
|
#
|
|
7
7
|
# For `qlever get-data` to work, `osm2rdf` and `spatialjoin` must be installed
|
|
8
8
|
# and included in the `PATH`.
|
|
9
9
|
|
|
10
10
|
[data]
|
|
11
11
|
NAME = ohm-planet
|
|
12
|
-
GET_DATA_URL =
|
|
13
|
-
GET_DATA_CMD_1 = curl -LRfC - -o ${NAME}.pbf ${GET_DATA_URL}
|
|
12
|
+
GET_DATA_URL = https://planet.openhistoricalmap.org/planet
|
|
13
|
+
GET_DATA_CMD_1 = curl -LRfC - -o ${NAME}.pbf $$(curl -s ${GET_DATA_URL}/state.txt) 2>&1 | tee ${NAME}.download-log.txt
|
|
14
14
|
GET_DATA_CMD_2 = osm2rdf ${NAME}.pbf -o ${NAME}.ttl --source-dataset OHM --simplify-wkt 0 --write-ogc-geo-triples none 2>&1 | tee ${NAME}.osm2rdf-log.txt
|
|
15
|
-
GET_DATA_CMD_3 = bzcat ${NAME}.ttl.bz2 | \grep "^osm2rdf" | sed -En 's/^osm2rdf(geom)?:(ohm_)?(node|rel|way)[a-z]*_([0-9]+) geo:asWKT "([^\"]+)".*/ohm\3:\4\t\5/p' | tee ${NAME}.spatialjoin-input.tsv | spatialjoin --contains
|
|
16
|
-
GET_DATA_CMD = ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2} && echo && ${GET_DATA_CMD_3} && bzcat ${NAME}.ttl.bz2 |
|
|
15
|
+
GET_DATA_CMD_3 = bzcat ${NAME}.ttl.bz2 | \grep "^osm2rdf" | sed -En 's/^osm2rdf(geom)?:(ohm_)?(node|rel|way)[a-z]*_([0-9]+) geo:asWKT "([^\"]+)".*/ohm\3:\4\t\5/p' | tee ${NAME}.spatialjoin-input.tsv | spatialjoin --contains " ogc:sfContains " --covers " ogc:sfCovers " --intersects " ogc:sfIntersects " --equals " ogc:sfEquals " --touches " ogc:sfTouches " --crosses " ogc:sfCrosses " --overlaps " ogc:sfOverlaps " --suffix $$' .\n' -o ${NAME}.spatialjoin-triples.ttl.bz2 2>&1 | tee ${NAME}.spatialjoin-log.txt && rm -f areas events lines points simplelines
|
|
16
|
+
GET_DATA_CMD = set -o pipefail && ${GET_DATA_CMD_1} && echo && ${GET_DATA_CMD_2} && echo && ${GET_DATA_CMD_3} && head -100 <(bzcat ${NAME}.ttl.bz2) | sed '/^@prefix/!d' > ${NAME}.prefix-definitions
|
|
17
17
|
VERSION = $$(date -r ${NAME}.pbf +%d.%m.%Y || echo "NO_DATE")
|
|
18
18
|
DESCRIPTION = OHM Planet, data from ${GET_DATA_URL} version ${VERSION} (with GeoSPARQL predicates ogc:sfContains and ogc:sfIntersects)
|
|
19
19
|
|
|
@@ -24,7 +24,7 @@ SETTINGS_JSON = { "prefixes-external": [""], "ascii-prefixes-only": false, "p
|
|
|
24
24
|
|
|
25
25
|
[server]
|
|
26
26
|
PORT = 7037
|
|
27
|
-
ACCESS_TOKEN = ${data:NAME}
|
|
27
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
28
28
|
MEMORY_FOR_QUERIES = 10G
|
|
29
29
|
CACHE_MAX_SIZE = 5G
|
|
30
30
|
WARMUP_CMD = curl -s http://localhost:${PORT} -H "Accept: application/qlever-results+json" --data-urlencode "query=PREFIX geo: <http://www.opengis.net/ont/geosparql#> SELECT ?subject ?geometry WHERE { ?subject geo:hasGeometry ?m . ?m geo:asWKT ?geometry } INTERNAL SORT BY ?subject" --data-urlencode "access-token=${server:ACCESS_TOKEN}" --data-urlencode "pinresult=true" --data-urlencode "send=0" | jq .resultsize | xargs printf "Result size: %'d\n"
|
|
@@ -48,7 +48,7 @@ DESCRIPTION = PubChem RDF from ${GET_DATA_URL}, version ${DATE} (all folde
|
|
|
48
48
|
|
|
49
49
|
[index]
|
|
50
50
|
INPUT_FILES = pubchem.additional-ontologies.nt.gz nt.${DATE}/*.nt.gz
|
|
51
|
-
CAT_INPUT_FILES = zcat ${
|
|
51
|
+
CAT_INPUT_FILES = zcat ${INPUT_FILES}
|
|
52
52
|
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
|
|
53
53
|
STXXL_MEMORY = 10G
|
|
54
54
|
|
|
@@ -1,39 +1,29 @@
|
|
|
1
|
-
# Qleverfile for
|
|
1
|
+
# Qleverfile for "scientists", use with qlever script (pip install qlever)
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data #
|
|
4
|
-
# qlever index #
|
|
5
|
-
# qlever start #
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
3
|
+
# qlever get-data # get "scientists" dataset (370M triples, 2.2 M texts records)
|
|
4
|
+
# qlever index # build index, including text index (takes ~20 seconds)
|
|
5
|
+
# qlever start # start the server (instant)
|
|
6
|
+
|
|
7
|
+
[data]
|
|
8
|
+
NAME = scientists
|
|
9
|
+
GET_DATA_CMD = curl -LRC - -O https://github.com/ad-freiburg/qlever/raw/master/e2e/scientist-collection.zip && unzip -j scientist-collection.zip && rm -f scientist-collection.zip
|
|
10
|
+
DESCRIPTION = Test collection from https://github.com/ad-freiburg/qlever/tree/master/e2e (triples and text about scientists)
|
|
11
|
+
TEXT_DESCRIPTION = Text from all literals and Wikipedia articles on scientists (use ql:contains-entity and ql:contains-word)
|
|
9
12
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
STXXL_MEMORY = 1G
|
|
16
|
-
SETTINGS_JSON = '{ "ascii-prefixes-only": true, "num-triples-per-batch": 100000 }'
|
|
17
|
-
GET_DATA_CMD = "wget https://github.com/ad-freiburg/qlever/raw/master/e2e/scientist-collection.zip && unzip -j scientist-collection.zip && rm -f scientist-collection.zip"
|
|
18
|
-
INDEX_DESCRIPTION = "Scientist collection from QLever's end-to-end test, see https://github.com/ad-freiburg/qlever/tree/master/e2e"
|
|
19
|
-
TEXT_DESCRIPTION = "Literals (use FILTER CONTAINS) and Wikipedia articles (use ql:contains-entity and ql:contains-word)"
|
|
13
|
+
[index]
|
|
14
|
+
INPUT_FILES = ${data:NAME}.nt
|
|
15
|
+
CAT_INPUT_FILES = cat ${INPUT_FILES}
|
|
16
|
+
SETTINGS_JSON = { "ascii-prefixes-only": true, "num-triples-per-batch": 100000 }
|
|
17
|
+
TEXT_INDEX = from_text_records_and_literals
|
|
20
18
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
MEMORY_FOR_QUERIES = 5G
|
|
26
|
-
CACHE_MAX_SIZE = 2G
|
|
27
|
-
CACHE_MAX_SIZE_SINGLE_ENTRY = 1G
|
|
28
|
-
CACHE_MAX_NUM_ENTRIES = 100
|
|
19
|
+
[server]
|
|
20
|
+
PORT = 7020
|
|
21
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
|
+
MEMORY_FOR_QUERIES = 5G
|
|
29
23
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
QLEVER_DOCKER_IMAGE = adfreiburg/qlever
|
|
34
|
-
QLEVER_DOCKER_CONTAINER = qlever.scientists
|
|
24
|
+
[runtime]
|
|
25
|
+
SYSTEM = docker
|
|
26
|
+
IMAGE = docker.io/adfreiburg/qlever:latest
|
|
35
27
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
QLEVERUI_DIR = qlever-ui
|
|
39
|
-
QLEVERUI_CONFIG = default
|
|
28
|
+
[ui]
|
|
29
|
+
UI_CONFIG = scientists
|
|
@@ -1,37 +1,38 @@
|
|
|
1
1
|
# Qleverfile for UniProt, use with https://github.com/ad-freiburg/qlever-control
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data #
|
|
4
|
-
# qlever index # takes ~
|
|
5
|
-
# qlever start # starts the server (takes a few
|
|
3
|
+
# qlever get-data # takes ~ 30 hours and ~ 2 TB of disk (for the NT files)
|
|
4
|
+
# qlever index # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes a few seconds)
|
|
6
6
|
#
|
|
7
7
|
# Install packages: sudo apt install -y libxml2-utils parallel xz-utils pv
|
|
8
8
|
# Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
|
|
9
9
|
#
|
|
10
|
-
# Set DATE to the date of the latest release
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
# the uniprot.index.???.meta files can be on HDD.
|
|
10
|
+
# Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
|
|
11
|
+
# during build, ~ 3 TB after build). The uniprot.index.???.meta files can be on
|
|
12
|
+
# HDD without significant performance loss (when running the server).
|
|
14
13
|
|
|
15
14
|
[data]
|
|
16
15
|
NAME = uniprot
|
|
17
|
-
DATE = 2024-
|
|
16
|
+
DATE = 2024-05-29
|
|
18
17
|
DOWNLOAD_URL = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
|
|
19
18
|
GET_RDFXML_CMD = mkdir -p rdf.${DATE} && curl -s ${DOWNLOAD_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" - | while read URL; do wget --no-verbose -P rdf.${DATE} $$URL 2>&1 | tee -a uniprot.download-log; done
|
|
20
|
-
RDFXML2NT_CMD = mkdir -p nt.${DATE} && for RDFXML in rdf.${DATE}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=nt 2> /dev/null |
|
|
19
|
+
RDFXML2NT_CMD = mkdir -p nt.${DATE} && for RDFXML in rdf.${DATE}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=nt 2> /dev/null | gzip -c > nt.${DATE}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/nt.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
|
|
21
20
|
GET_DATA_CMD = rdfxml --help && date > ${NAME}.get-data.begin-date && ${GET_RDFXML_CMD} && ${RDFXML2NT_CMD} && date > ${NAME}.get-data.end-date
|
|
22
21
|
DESCRIPTION = Complete UniProt data from ${DOWNLOAD_URL}, version ${DATE}
|
|
23
22
|
|
|
24
23
|
[index]
|
|
25
|
-
INPUT_FILES = nt.${data:DATE}/*.nt.
|
|
26
|
-
CAT_INPUT_FILES = parallel --tmpdir . -j 4 '
|
|
24
|
+
INPUT_FILES = nt.${data:DATE}/*.nt.gz
|
|
25
|
+
CAT_INPUT_FILES = parallel --tmpdir . -j 4 'zcat -f {}' ::: ${INPUT_FILES} | pv -q -B 5G
|
|
27
26
|
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
|
|
28
27
|
STXXL_MEMORY = 60G
|
|
29
28
|
|
|
30
29
|
[server]
|
|
31
|
-
PORT
|
|
32
|
-
ACCESS_TOKEN
|
|
33
|
-
MEMORY_FOR_QUERIES
|
|
34
|
-
CACHE_MAX_SIZE
|
|
30
|
+
PORT = 7018
|
|
31
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
32
|
+
MEMORY_FOR_QUERIES = 20G
|
|
33
|
+
CACHE_MAX_SIZE = 10G
|
|
34
|
+
CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
|
|
35
|
+
TIMEOUT = 300s
|
|
35
36
|
|
|
36
37
|
[runtime]
|
|
37
38
|
SYSTEM = docker
|
|
@@ -1,24 +1,27 @@
|
|
|
1
|
-
# Qleverfile for Wikidata, use with
|
|
1
|
+
# Qleverfile for Wikidata, use with qlever script (`pip install qlever`)
|
|
2
2
|
#
|
|
3
|
-
# qlever get-data
|
|
4
|
-
# qlever index
|
|
5
|
-
# qlever start
|
|
3
|
+
# qlever get-data # downloads two .bz2 files of total size ~100 GB
|
|
4
|
+
# qlever index # takes ~4.5 hours and ~20 GB RAM (on an AMD Ryzen 9 5900X)
|
|
5
|
+
# qlever start # starts the server (takes a few seconds)
|
|
6
|
+
|
|
7
|
+
[DEFAULT]
|
|
8
|
+
NAME = wikidata
|
|
6
9
|
|
|
7
10
|
[data]
|
|
8
|
-
NAME = wikidata
|
|
9
11
|
GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
|
|
10
|
-
GET_DATA_CMD = curl -
|
|
11
|
-
|
|
12
|
+
GET_DATA_CMD = curl -LRC - --remote-name-all ${GET_DATA_URL}/latest-all.ttl.bz2 ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1
|
|
13
|
+
VERSION = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
|
|
14
|
+
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${VERSION})
|
|
12
15
|
|
|
13
16
|
[index]
|
|
14
|
-
INPUT_FILES = latest-
|
|
15
|
-
CAT_INPUT_FILES =
|
|
16
|
-
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only":
|
|
17
|
+
INPUT_FILES = latest-all.ttl.bz2 latest-lexemes.ttl.bz2
|
|
18
|
+
CAT_INPUT_FILES = lbzcat -n 4 -f ${INPUT_FILES}
|
|
19
|
+
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
|
|
17
20
|
STXXL_MEMORY = 10G
|
|
18
21
|
|
|
19
22
|
[server]
|
|
20
23
|
PORT = 7001
|
|
21
|
-
ACCESS_TOKEN = ${data:NAME}
|
|
24
|
+
ACCESS_TOKEN = ${data:NAME}
|
|
22
25
|
MEMORY_FOR_QUERIES = 20G
|
|
23
26
|
CACHE_MAX_SIZE = 10G
|
|
24
27
|
|
qlever/commands/query.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import shlex
|
|
4
|
+
import time
|
|
5
|
+
import traceback
|
|
6
|
+
|
|
7
|
+
from qlever.command import QleverCommand
|
|
8
|
+
from qlever.log import log
|
|
9
|
+
from qlever.util import run_command
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class QueryCommand(QleverCommand):
|
|
13
|
+
"""
|
|
14
|
+
Class for executing the `query` command.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
def description(self) -> str:
|
|
21
|
+
return ("Send a query to a SPARQL endpoint")
|
|
22
|
+
|
|
23
|
+
def should_have_qleverfile(self) -> bool:
|
|
24
|
+
return False
|
|
25
|
+
|
|
26
|
+
def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
|
|
27
|
+
return {"server": ["port"]}
|
|
28
|
+
|
|
29
|
+
def additional_arguments(self, subparser) -> None:
|
|
30
|
+
subparser.add_argument("--query", type=str,
|
|
31
|
+
default="SELECT * WHERE { ?s ?p ?o } LIMIT 10",
|
|
32
|
+
help="SPARQL query to send")
|
|
33
|
+
subparser.add_argument("--sparql-endpoint", type=str,
|
|
34
|
+
help="URL of the SPARQL endpoint")
|
|
35
|
+
subparser.add_argument("--accept", type=str,
|
|
36
|
+
choices=["text/tab-separated-values",
|
|
37
|
+
"text/csv",
|
|
38
|
+
"application/sparql-results+json",
|
|
39
|
+
"application/sparql-results+xml",
|
|
40
|
+
"application/qlever-results+json"],
|
|
41
|
+
default="text/tab-separated-values",
|
|
42
|
+
help="Accept header for the SPARQL query")
|
|
43
|
+
subparser.add_argument("--no-time", action="store_true",
|
|
44
|
+
default=False,
|
|
45
|
+
help="Do not print the (end-to-end) time taken")
|
|
46
|
+
|
|
47
|
+
def execute(self, args) -> bool:
|
|
48
|
+
# Show what the command will do.
|
|
49
|
+
sparql_endpoint = (args.sparql_endpoint if args.sparql_endpoint
|
|
50
|
+
else f"localhost:{args.port}")
|
|
51
|
+
curl_cmd = (f"curl -s {sparql_endpoint}"
|
|
52
|
+
f" -H \"Accept: {args.accept}\""
|
|
53
|
+
f" --data-urlencode query={shlex.quote(args.query)}")
|
|
54
|
+
self.show(curl_cmd, only_show=args.show)
|
|
55
|
+
if args.show:
|
|
56
|
+
return False
|
|
57
|
+
|
|
58
|
+
# Launch query.
|
|
59
|
+
try:
|
|
60
|
+
start_time = time.time()
|
|
61
|
+
run_command(curl_cmd, show_output=True)
|
|
62
|
+
time_msecs = round(1000 * (time.time() - start_time))
|
|
63
|
+
if not args.no_time and args.log_level != "NO_LOG":
|
|
64
|
+
log.info("")
|
|
65
|
+
log.info(f"Query processing time (end-to-end):"
|
|
66
|
+
f" {time_msecs:,d} ms")
|
|
67
|
+
except Exception as e:
|
|
68
|
+
if args.log_level == "DEBUG":
|
|
69
|
+
traceback.print_exc()
|
|
70
|
+
log.error(e)
|
|
71
|
+
return False
|
|
72
|
+
|
|
73
|
+
return True
|
qlever/log.py
CHANGED
|
@@ -3,27 +3,27 @@ qlever/__main__.py,sha256=MqM37bEzQeJEGUXZvuLcilIvnObZiG2eTGIkfKGpdnw,62016
|
|
|
3
3
|
qlever/command.py,sha256=yOr0Uc8D8-AM7EjwDsVzbc3KNYjPH-FVOZhIHkqO588,2749
|
|
4
4
|
qlever/config.py,sha256=qYPy-MQ7BwGrvKSazQWhs0lnlOFqm-d47mpZhc3fptc,10254
|
|
5
5
|
qlever/containerize.py,sha256=p8g3O3G8a_0XLzSTzl_e5t9dqjbCQ-ippoA8vI2Z9pI,4193
|
|
6
|
-
qlever/log.py,sha256=
|
|
6
|
+
qlever/log.py,sha256=2O_RvFymnu_dB10ErBTAOsI8bgjORfdD0tE3USH-siM,1315
|
|
7
7
|
qlever/qlever_main.py,sha256=tA_xqOs_FjvqlDIvKTprwuysfTwzsUjE7at26gRhCVA,2336
|
|
8
8
|
qlever/qlever_old.py,sha256=6sHosOfJzkURpdK4wXLdGl4SUtPnlsNEUwAqUeJiRYA,62026
|
|
9
9
|
qlever/qleverfile.py,sha256=6Ll81xkzel_s2Ju9ZfBXUGlRfikaAzZM6Do-dTrdo3k,12934
|
|
10
10
|
qlever/util.py,sha256=eepj0SY9JJOUQq5kvtoPnWfoLLV9fbw_sTEWKHet66E,7147
|
|
11
|
-
qlever/Qleverfiles/Qleverfile.dblp,sha256=
|
|
11
|
+
qlever/Qleverfiles/Qleverfile.dblp,sha256=oNT-O1a2I1ELPxgPxgB1F8T9Td3Iea7EyYizFpAHfXw,994
|
|
12
12
|
qlever/Qleverfiles/Qleverfile.dblp-plus,sha256=Dwd9pK1vPcelKfw6sA-IuyhbZ6yIxOh6_84JgPYnB9Q,1332
|
|
13
13
|
qlever/Qleverfiles/Qleverfile.default,sha256=mljl6I1RCkpIWOqMQwjzPZIsarYQx1R0mIlc583KuqU,1869
|
|
14
|
-
qlever/Qleverfiles/Qleverfile.dnb,sha256=
|
|
14
|
+
qlever/Qleverfiles/Qleverfile.dnb,sha256=GgnsbtRUl__yMCqUX2EN5x1oDWpW93bUalMfUxN8534,1751
|
|
15
15
|
qlever/Qleverfiles/Qleverfile.fbeasy,sha256=jeztW4gFpWL_w1nCH5qGHeZyZv2lz_kG6f1G3r3DkJ4,974
|
|
16
16
|
qlever/Qleverfiles/Qleverfile.freebase,sha256=k6PqYrtHTBr0EydObm1Hg9QWyAAM9fXkdcjhReDg0fM,1035
|
|
17
17
|
qlever/Qleverfiles/Qleverfile.imdb,sha256=uL5XlPwX01AmH-j6_Bc-PRm2fuPxGSIu8NaDflY525U,1623
|
|
18
|
-
qlever/Qleverfiles/Qleverfile.ohm-planet,sha256=
|
|
18
|
+
qlever/Qleverfiles/Qleverfile.ohm-planet,sha256=Y_yUxdpWpUOSDo_zmVKj3caa8X-Wv-1KmacBMks_4QA,2650
|
|
19
19
|
qlever/Qleverfiles/Qleverfile.olympics,sha256=5w9BOFwEBhdSzPz-0LRxwhv-7Gj6xbF539HOXr3cqD0,1088
|
|
20
20
|
qlever/Qleverfiles/Qleverfile.osm-country,sha256=UnlkckSXJDrknZORlU-Hdj_J82U4kStl1aRctCc5n6M,1953
|
|
21
21
|
qlever/Qleverfiles/Qleverfile.osm-planet,sha256=2RilNix0fplN3GsNNyOu3GzmUss1Pq7586WKOFAQnSs,1400
|
|
22
|
-
qlever/Qleverfiles/Qleverfile.pubchem,sha256=
|
|
23
|
-
qlever/Qleverfiles/Qleverfile.scientists,sha256=
|
|
24
|
-
qlever/Qleverfiles/Qleverfile.uniprot,sha256=
|
|
22
|
+
qlever/Qleverfiles/Qleverfile.pubchem,sha256=a6EAP8mOfC0V6NnVCLarvRagyoQSQDItR7AnrZqL9iE,3899
|
|
23
|
+
qlever/Qleverfiles/Qleverfile.scientists,sha256=9eZ2c6P9a3E3VHa3RR7LdOQbF4k3oyyrn56Z3u4LZYs,1164
|
|
24
|
+
qlever/Qleverfiles/Qleverfile.uniprot,sha256=9kAKseomdUnIt7EAZge39g1MTuaLVaSW9JYLHzIMolM,2338
|
|
25
25
|
qlever/Qleverfiles/Qleverfile.vvz,sha256=ftdMj5dCC9jAlFtNt2WR7kP30w0itT_iYtj5HoUVyWU,931
|
|
26
|
-
qlever/Qleverfiles/Qleverfile.wikidata,sha256=
|
|
26
|
+
qlever/Qleverfiles/Qleverfile.wikidata,sha256=vDkTY3mPSx2C8MvFWfB72zZoc4d-TMJSw3f_-FqnEqs,1275
|
|
27
27
|
qlever/Qleverfiles/Qleverfile.wikipathways,sha256=qWjfT-CVQCgRfN6fXPwBORMbjzXS_xsJ2DoCamQI7Rs,2045
|
|
28
28
|
qlever/Qleverfiles/Qleverfile.yago-4,sha256=GikYPqChCtbAyZOVqszmVUwgQxSePTcgM8xw2b_21e4,1849
|
|
29
29
|
qlever/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -35,15 +35,16 @@ qlever/commands/get_data.py,sha256=f9kjZI3TKad6JHSuXWNkeoajmW8h0Sx8ShvjauDCtNo,1
|
|
|
35
35
|
qlever/commands/index.py,sha256=lJhDnweknFZQm1czqPzNyz33EvbjIvOrS4j0wDaJ98o,5663
|
|
36
36
|
qlever/commands/index_stats.py,sha256=_BiUNBhmbYd9RPxrlm4HF0oENO6JmqnRiAkwkyOdN4U,11722
|
|
37
37
|
qlever/commands/log.py,sha256=8Krt3MsTUDapYqVw1zUu5X15SF8mV97Uj0qKOWK8jXk,1861
|
|
38
|
+
qlever/commands/query.py,sha256=_IDH-M8gKL_f1i5wzu0X452pZSUD0_qXl6bPXC85wX0,2750
|
|
38
39
|
qlever/commands/setup_config.py,sha256=6T0rXrIdejKMKhDbOMEMBKyMF_hAqO5nJaRFb57QPQU,2964
|
|
39
40
|
qlever/commands/start.py,sha256=2rOtk3NmhEs28D5csL_a1BdjSWU9VkcH6AqYT0vdww0,9285
|
|
40
41
|
qlever/commands/status.py,sha256=5S6EdapZEwFKV9cQZtNYcZhMbAXAY-FP6ggjIhfX8ek,1631
|
|
41
42
|
qlever/commands/stop.py,sha256=TZs4bxKHvujlZAU8BZmFjA5eXSZNAa6EeNzvPpEZsuI,4139
|
|
42
43
|
qlever/commands/ui.py,sha256=rV8u017WLbfz0zVT_c9GC4d9v1WWwrTM3kfGONbeCvQ,2499
|
|
43
44
|
qlever/commands/warmup.py,sha256=WOZSxeV8U_F6pEEnAb6YybXLQMxZFTRJXs4BPHUhsmc,1030
|
|
44
|
-
qlever-0.5.
|
|
45
|
-
qlever-0.5.
|
|
46
|
-
qlever-0.5.
|
|
47
|
-
qlever-0.5.
|
|
48
|
-
qlever-0.5.
|
|
49
|
-
qlever-0.5.
|
|
45
|
+
qlever-0.5.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
46
|
+
qlever-0.5.3.dist-info/METADATA,sha256=cBURUr5Og7ysQJOuCIZN5pKp_DGi4lm-c87CVxQhmtY,4146
|
|
47
|
+
qlever-0.5.3.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
48
|
+
qlever-0.5.3.dist-info/entry_points.txt,sha256=U_gbYYi0wwdsn884eb0XoOXfvhACOsxhlO330dZ9bi0,87
|
|
49
|
+
qlever-0.5.3.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
|
|
50
|
+
qlever-0.5.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|