qlever 0.5.2__py3-none-any.whl → 0.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

@@ -6,8 +6,8 @@
6
6
 
7
7
  [data]
8
8
  NAME = dblp
9
- GET_DATA_URL = https://dblp.org/rdf/${index:INPUT_FILES}
10
- GET_DATA_CMD = curl -LRC - O ${GET_DATA_URL} 2>&1 | tee ${data:NAME}.download-log.txt
9
+ GET_DATA_URL = https://dblp.org/rdf/dblp.ttl.gz
10
+ GET_DATA_CMD = curl -LRC - -O ${GET_DATA_URL} 2>&1 | tee ${data:NAME}.download-log.txt
11
11
  VERSION = $$(date -r dblp.ttl.gz +"%d.%m.%Y %H:%M" || echo "NO_DATE")
12
12
  DESCRIPTION = DBLP computer science bibliography, data from ${GET_DATA_URL} (version ${VERSION})
13
13
 
@@ -19,7 +19,7 @@ SETTINGS_JSON = { "ascii-prefixes-only": false, "num-triples-per-batch": 10000
19
19
  [server]
20
20
  PORT = 7015
21
21
  ACCESS_TOKEN = ${data:NAME}
22
- MEMORY_FOR_QUERIES = 30G
22
+ MEMORY_FOR_QUERIES = 10G
23
23
  CACHE_MAX_SIZE = 5G
24
24
 
25
25
  [runtime]
@@ -48,7 +48,7 @@ DESCRIPTION = PubChem RDF from ${GET_DATA_URL}, version ${DATE} (all folde
48
48
 
49
49
  [index]
50
50
  INPUT_FILES = pubchem.additional-ontologies.nt.gz nt.${DATE}/*.nt.gz
51
- CAT_INPUT_FILES = zcat ${FILE_NAMES}
51
+ CAT_INPUT_FILES = zcat ${INPUT_FILES}
52
52
  SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
53
53
  STXXL_MEMORY = 10G
54
54
 
@@ -1,37 +1,38 @@
1
1
  # Qleverfile for UniProt, use with https://github.com/ad-freiburg/qlever-control
2
2
  #
3
- # qlever get-data # download RDFXML and convert to NT (around 1 TB each)
4
- # qlever index # takes ~ 1.5 days and ~40 GB RAM (on an AMD Ryzen 9 5900X)
5
- # qlever start # starts the server (takes a few second)
3
+ # qlever get-data # takes ~ 30 hours and ~ 2 TB of disk (for the NT files)
4
+ # qlever index # takes ~ 40 hours and ~ 60 GB RAM (on an AMD Ryzen 9 5900X)
5
+ # qlever start # starts the server (takes a few seconds)
6
6
  #
7
7
  # Install packages: sudo apt install -y libxml2-utils parallel xz-utils pv
8
8
  # Install manually: Apache Jena binaries (https://dlcdn.apache.org/jena/binaries)
9
9
  #
10
- # Set DATE to the date of the latest release
11
- #
12
- # IMPORTANT: Build on SSD, disk space required: ~ 10 T. For running the server,
13
- # the uniprot.index.???.meta files can be on HDD.
10
+ # Set DATE to the date of the latest release. Build on SSD (requires ~ 7 TB
11
+ # during build, ~ 3 TB after build). The uniprot.index.???.meta files can be on
12
+ # HDD without significant performance loss (when running the server).
14
13
 
15
14
  [data]
16
15
  NAME = uniprot
17
- DATE = 2024-01-24
16
+ DATE = 2024-05-29
18
17
  DOWNLOAD_URL = https://ftp.uniprot.org/pub/databases/uniprot/current_release/rdf
19
18
  GET_RDFXML_CMD = mkdir -p rdf.${DATE} && curl -s ${DOWNLOAD_URL}/RELEASE.meta4 | sed "s/<metalink.*/<metalink>/" | xmllint --xpath "/metalink/files/file/url[@location=\"ch\"]/text()" - | while read URL; do wget --no-verbose -P rdf.${DATE} $$URL 2>&1 | tee -a uniprot.download-log; done
20
- RDFXML2NT_CMD = mkdir -p nt.${DATE} && for RDFXML in rdf.${DATE}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=nt 2> /dev/null | xz -c > nt.${DATE}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/nt.xz/') && echo 'DONE converting $$RDFXML'"; done | parallel
19
+ RDFXML2NT_CMD = mkdir -p nt.${DATE} && for RDFXML in rdf.${DATE}/*.{owl,owl.xz,rdf,rdf.xz}; do echo "xzcat -f $$RDFXML | rdfxml --output=nt 2> /dev/null | gzip -c > nt.${DATE}/$$(basename $$RDFXML | sed 's/\(rdf\|rdf.xz\|owl\|owl.xz\)$$/nt.gz/') && echo 'DONE converting $$RDFXML'"; done | parallel
21
20
  GET_DATA_CMD = rdfxml --help && date > ${NAME}.get-data.begin-date && ${GET_RDFXML_CMD} && ${RDFXML2NT_CMD} && date > ${NAME}.get-data.end-date
22
21
  DESCRIPTION = Complete UniProt data from ${DOWNLOAD_URL}, version ${DATE}
23
22
 
24
23
  [index]
25
- INPUT_FILES = nt.${data:DATE}/*.nt.xz
26
- CAT_INPUT_FILES = parallel --tmpdir . -j 4 'xzcat -f {}' ::: nt.${data:DATE}/*.nt.xz | pv -q -B 5G
24
+ INPUT_FILES = nt.${data:DATE}/*.nt.gz
25
+ CAT_INPUT_FILES = parallel --tmpdir . -j 4 'zcat -f {}' ::: ${INPUT_FILES} | pv -q -B 5G
27
26
  SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 25000000 }
28
27
  STXXL_MEMORY = 60G
29
28
 
30
29
  [server]
31
- PORT = 7018
32
- ACCESS_TOKEN = ${data:NAME}_1369924040
33
- MEMORY_FOR_QUERIES = 20G
34
- CACHE_MAX_SIZE = 10G
30
+ PORT = 7018
31
+ ACCESS_TOKEN = ${data:NAME}
32
+ MEMORY_FOR_QUERIES = 20G
33
+ CACHE_MAX_SIZE = 10G
34
+ CACHE_MAX_SIZE_SINGLE_ENTRY = 5G
35
+ TIMEOUT = 300s
35
36
 
36
37
  [runtime]
37
38
  SYSTEM = docker
@@ -1,24 +1,27 @@
1
- # Qleverfile for Wikidata, use with https://github.com/ad-freiburg/qlever-control
1
+ # Qleverfile for Wikidata, use with qlever script (`pip install qlever`)
2
2
  #
3
- # qlever get-data downloads two .bz2 files of total size ~100 GB
4
- # qlever index takes ~7 hours and ~40 GB RAM (on an AMD Ryzen 9 5900X)
5
- # qlever start starts the server (takes around 30 seconds)
3
+ # qlever get-data # downloads two .bz2 files of total size ~100 GB
4
+ # qlever index # takes ~4.5 hours and ~20 GB RAM (on an AMD Ryzen 9 5900X)
5
+ # qlever start # starts the server (takes a few seconds)
6
+
7
+ [DEFAULT]
8
+ NAME = wikidata
6
9
 
7
10
  [data]
8
- NAME = wikidata
9
11
  GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
10
- GET_DATA_CMD = curl -LO -C - ${GET_DATA_URL}/latest-all.ttl.bz2 ${GET_DATA_URL}/latest-lexemes.ttl.bz2
11
- DESCRIPTION = "Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2)"
12
+ GET_DATA_CMD = curl -LRC - --remote-name-all ${GET_DATA_URL}/latest-all.ttl.bz2 ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1
13
+ VERSION = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
14
+ DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${VERSION})
12
15
 
13
16
  [index]
14
- INPUT_FILES = latest-lexemes.ttl.bz2 latest-all.ttl.bz2
15
- CAT_INPUT_FILES = bzcat ${FILE_NAMES}
16
- SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
17
+ INPUT_FILES = latest-all.ttl.bz2 latest-lexemes.ttl.bz2
18
+ CAT_INPUT_FILES = lbzcat -n 4 -f ${INPUT_FILES}
19
+ SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
17
20
  STXXL_MEMORY = 10G
18
21
 
19
22
  [server]
20
23
  PORT = 7001
21
- ACCESS_TOKEN = ${data:NAME}_372483264
24
+ ACCESS_TOKEN = ${data:NAME}
22
25
  MEMORY_FOR_QUERIES = 20G
23
26
  CACHE_MAX_SIZE = 10G
24
27
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: qlever
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: Script for using the QLever SPARQL engine.
5
5
  Author-email: Hannah Bast <bast@cs.uni-freiburg.de>
6
6
  License: Apache-2.0
@@ -8,7 +8,7 @@ qlever/qlever_main.py,sha256=tA_xqOs_FjvqlDIvKTprwuysfTwzsUjE7at26gRhCVA,2336
8
8
  qlever/qlever_old.py,sha256=6sHosOfJzkURpdK4wXLdGl4SUtPnlsNEUwAqUeJiRYA,62026
9
9
  qlever/qleverfile.py,sha256=6Ll81xkzel_s2Ju9ZfBXUGlRfikaAzZM6Do-dTrdo3k,12934
10
10
  qlever/util.py,sha256=eepj0SY9JJOUQq5kvtoPnWfoLLV9fbw_sTEWKHet66E,7147
11
- qlever/Qleverfiles/Qleverfile.dblp,sha256=DjeyY0MGNrqEY-_8nRkjSsofGBBSz_wIgQ048KoRQBM,1002
11
+ qlever/Qleverfiles/Qleverfile.dblp,sha256=oNT-O1a2I1ELPxgPxgB1F8T9Td3Iea7EyYizFpAHfXw,994
12
12
  qlever/Qleverfiles/Qleverfile.dblp-plus,sha256=Dwd9pK1vPcelKfw6sA-IuyhbZ6yIxOh6_84JgPYnB9Q,1332
13
13
  qlever/Qleverfiles/Qleverfile.default,sha256=mljl6I1RCkpIWOqMQwjzPZIsarYQx1R0mIlc583KuqU,1869
14
14
  qlever/Qleverfiles/Qleverfile.dnb,sha256=GgnsbtRUl__yMCqUX2EN5x1oDWpW93bUalMfUxN8534,1751
@@ -19,11 +19,11 @@ qlever/Qleverfiles/Qleverfile.ohm-planet,sha256=Y_yUxdpWpUOSDo_zmVKj3caa8X-Wv-1K
19
19
  qlever/Qleverfiles/Qleverfile.olympics,sha256=5w9BOFwEBhdSzPz-0LRxwhv-7Gj6xbF539HOXr3cqD0,1088
20
20
  qlever/Qleverfiles/Qleverfile.osm-country,sha256=UnlkckSXJDrknZORlU-Hdj_J82U4kStl1aRctCc5n6M,1953
21
21
  qlever/Qleverfiles/Qleverfile.osm-planet,sha256=2RilNix0fplN3GsNNyOu3GzmUss1Pq7586WKOFAQnSs,1400
22
- qlever/Qleverfiles/Qleverfile.pubchem,sha256=bOhiJKUxzDiAm1UyXFPDQLYTqGc9jM8240fhobYLij0,3898
22
+ qlever/Qleverfiles/Qleverfile.pubchem,sha256=a6EAP8mOfC0V6NnVCLarvRagyoQSQDItR7AnrZqL9iE,3899
23
23
  qlever/Qleverfiles/Qleverfile.scientists,sha256=9eZ2c6P9a3E3VHa3RR7LdOQbF4k3oyyrn56Z3u4LZYs,1164
24
- qlever/Qleverfiles/Qleverfile.uniprot,sha256=FS8QLHvujbjUYyU2Ma0PRgfCWlulviaGLc_1csxpuic,2201
24
+ qlever/Qleverfiles/Qleverfile.uniprot,sha256=9kAKseomdUnIt7EAZge39g1MTuaLVaSW9JYLHzIMolM,2338
25
25
  qlever/Qleverfiles/Qleverfile.vvz,sha256=ftdMj5dCC9jAlFtNt2WR7kP30w0itT_iYtj5HoUVyWU,931
26
- qlever/Qleverfiles/Qleverfile.wikidata,sha256=fhWSChZTH3c2y14kgP1P5Duq1SsewTOK3wETf6RRmI8,1172
26
+ qlever/Qleverfiles/Qleverfile.wikidata,sha256=vDkTY3mPSx2C8MvFWfB72zZoc4d-TMJSw3f_-FqnEqs,1275
27
27
  qlever/Qleverfiles/Qleverfile.wikipathways,sha256=qWjfT-CVQCgRfN6fXPwBORMbjzXS_xsJ2DoCamQI7Rs,2045
28
28
  qlever/Qleverfiles/Qleverfile.yago-4,sha256=GikYPqChCtbAyZOVqszmVUwgQxSePTcgM8xw2b_21e4,1849
29
29
  qlever/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -42,9 +42,9 @@ qlever/commands/status.py,sha256=5S6EdapZEwFKV9cQZtNYcZhMbAXAY-FP6ggjIhfX8ek,163
42
42
  qlever/commands/stop.py,sha256=TZs4bxKHvujlZAU8BZmFjA5eXSZNAa6EeNzvPpEZsuI,4139
43
43
  qlever/commands/ui.py,sha256=rV8u017WLbfz0zVT_c9GC4d9v1WWwrTM3kfGONbeCvQ,2499
44
44
  qlever/commands/warmup.py,sha256=WOZSxeV8U_F6pEEnAb6YybXLQMxZFTRJXs4BPHUhsmc,1030
45
- qlever-0.5.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
46
- qlever-0.5.2.dist-info/METADATA,sha256=MHlNH1mplzq3hJ0ETmjUqM4Zw6zLqvends9IT60M4Io,4146
47
- qlever-0.5.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
48
- qlever-0.5.2.dist-info/entry_points.txt,sha256=U_gbYYi0wwdsn884eb0XoOXfvhACOsxhlO330dZ9bi0,87
49
- qlever-0.5.2.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
50
- qlever-0.5.2.dist-info/RECORD,,
45
+ qlever-0.5.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
46
+ qlever-0.5.3.dist-info/METADATA,sha256=cBURUr5Og7ysQJOuCIZN5pKp_DGi4lm-c87CVxQhmtY,4146
47
+ qlever-0.5.3.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
48
+ qlever-0.5.3.dist-info/entry_points.txt,sha256=U_gbYYi0wwdsn884eb0XoOXfvhACOsxhlO330dZ9bi0,87
49
+ qlever-0.5.3.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
50
+ qlever-0.5.3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.43.0)
2
+ Generator: setuptools (70.1.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5