qlever 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qlever might be problematic. Click here for more details.

@@ -1,60 +1,49 @@
1
1
  # Qleverfile for PubChem, use with https://github.com/ad-freiburg/qlever-control
2
2
  #
3
- # qlever get-data # downloads .gz files of total size 114 GB; see NOTES 2, 3, 4
4
- # qlever index # takes ~5 hours and ~20 GB RAM on an AMD Ryzen 9 5900X
5
- # qlever start # starts the server (a few seconds)
3
+ # Resource requirements (as of 18.08.2024, on an AMD Ryzen 9 5900X):
6
4
  #
7
- # IMPORTANT NOTES:
5
+ # qlever get-data # ~2 hours, ~150 GB, ~19 billion triples
6
+ # qlever index # ~7 hours, ~20 GB RAM, ~400 GB disk space
7
+ # qlever start # a few seconds
8
8
  #
9
- # NOTE 1: The SPARQL endpoint at https://qlever.cs.uni-freiburg.de/pubchem also
10
- # contains data from the following ontologies, which are very useful for
11
- # resolving names of IRIs like `sio:SIO_000008` or `obo:IAO_0000412`, but which
12
- # are not part of the PubChem RDF data. For the corresponding URLs, see
13
- # https://github.com/ad-freiburg/qlever/issues/711#issuecomment-1200479401 .
9
+ # NOTE 1: `get-data` does not only download the PubChem RDF data, but also
10
+ # a number of ontologies. These are very useful to obtain names for IRIs like
11
+ # `sio:SIO_000008` or `obo:IAO_0000412` (otherwise very hard to understand).
12
+ # The ontologies BAO and NDF-RT are infrequently updated, for latest versions,
13
+ # see the download links at https://bioportal.bioontology.org/ontologies/BAO
14
+ # and https://bioportal.bioontology.org/ontologies/NDF-RT .
14
15
  #
15
- # bao bfo biopax-level3 chebi cheminf cito dublin_core_terms fabio go iao ncit
16
- # obi pr ro sio skos so uo
17
- #
18
- # NOTE 2: The robots.txt file from https://ftp.ncbi.nlm.nih.gov currently
19
- # disallows downloading the PubChem RDF data using `wget --recursive` as in the
20
- # GET_DATA_CMD below. As a workaround, you can write a simple Python script
21
- # (using `BeautifulSoup` and `urllib.parse`) to scrape the URLs from the HTML
22
- # pages and download the files individually. This was done for the latest
23
- # version of https://qlever.cs.uni-freiburg.de/pubchem .
24
- #
25
- # NOTE 3: Many of the TTL files have generic prefix definitions in the middle
16
+ # NOTE 2: Many of the TTL files have generic prefix definitions in the middle
26
17
  # of the file, like @prefix ns23: <http://identifiers.org/biocyc/ARACYC:> .
27
18
  # See https://github.com/ad-freiburg/qlever/issues/711#issuecomment-1197113953
28
- # This is allowed by the standard, but VERY unusual. For use with QLever,
29
- # convert the TTL files to NT before indexing, see GET_DATA_CMD below.
30
- #
31
- # NOTE 4: Many of the files (TTL as well as NT) contain invalid IRIs because
32
- # spaces and braces are not properly escaped. Here is a simple awk-based script
33
- # to percent-encode spaces and braces in all IRIs in the NT files:
19
+ # This is allowed by the standard, but unusual. For use with QLever, we
20
+ # therefore convert the TTL files to NT when downloading them.
34
21
  #
35
- # for NTGZ in nt.${DATE}/*.nt.gz; do echo "zcat $NTGZ | sed 's/> />\t/1; s/> />\t/1; s/ \.\$/\t./' | awk 'BEGIN{FS=OFS=\"\t\"} {for (i = 1; i <= 3; i++) if (\$i ~ /^<.*>\$/) { gsub(/ /, \"%20\", \$i); gsub(/\[/, \"%5B\", \$i); gsub(/\]/, \"%5D\", \$i); gsub(/{/, \"%7B\", \$i); gsub(/}/, \"%7D\", \$i); } print }' | sed 's/\t/ /g' | gzip -c > nt.${DATE}.FIXED/$(basename $NTGZ)"; done > fix-nt.commands.txt
36
- # cat fix-nt.commands.txt | parallel
37
-
38
-
39
- [DEFAULT]
40
- NAME = pubchem
41
- DATE = 2024-02-03
22
+ # NOTE 3: The PubChem data contains several invalid IRIs, in particular,
23
+ # containing spaces. The previous version of this Qleverfile used a combination
24
+ # of `sed` and `awk` to fix this. In the meantime, QLever's default is to warn
25
+ # about such IRIs while indexing, but accept them anyway.
42
26
 
43
27
  [data]
44
- GET_DATA_URL = ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF
45
- MAKE_GET_DATA_CMD = curl -s ${GET_DATA_URL}/void.ttl | grep -oP '${GET_DATA_URL}/.*?\.ttl\.gz' | grep -v "nbr[23]d" | while read URL; do echo "echo \"Processing $$URL ...\"; curl --silent --remote-time --output ttl.${DATE}/$$(basename $$URL) $$URL && docker run --rm -v $$(pwd)/ttl.${DATE}:/data stain/jena turtle --output=NT /data/$$(basename $$URL) | sed 's/> />\t/1; s/> />\t/1; s/ \.\$$/\t./' | awk 'BEGIN{FS=OFS=\"\t\"} {for (i = 1; i <= 3; i++) if (\$$i ~ /^<.*>\$$/) { gsub(/ /, \"%20\", \$$i); gsub(/\[/, \"%5B\", \$$i); gsub(/\]/, \"%5D\", \$$i); gsub(/{/, \"%7B\", \$$i); gsub(/}/, \"%7D\", \$$i); } print }' | sed 's/\t/ /g' | gzip -c > nt.${DATE}/$$(basename -s .ttl.gz $$URL).nt.gz"; done > pubchem.get-data-cmds.txt
46
- GET_DATA_CMD = mkdir -p ttl.${DATE} && mkdir -p nt.${DATE} && ${MAKE_GET_DATA_CMD} && cat pubchem.get-data-cmds.txt | parallel --line-buffer
47
- DESCRIPTION = PubChem RDF from ${GET_DATA_URL}, version ${DATE} (all folders except nbr2d and nbr3d)
28
+ NAME = pubchem
29
+ GET_DATA_URL = ftp://ftp.ncbi.nlm.nih.gov/pubchem/RDF
30
+ CHECK_REQUIREMENTS = for CMD in docker parallel; do $$CMD --version >/dev/null 2>&1 || (echo "Requires \"$$CMD\", please install it"; false); done
31
+ MAKE_GET_DATA_CMD_1 = DIR=DATA.ontologies && mkdir -p $$DIR && cat $$DIR/ontologies.csv | while IFS=',' read -r DESC FILE URL; do ERRFILE=$${FILE%.*}.jena-stderr; echo "echo \"Processing $$URL ($$FILE) ...\" && curl -sLRo $$DIR/$$FILE \"$$URL\" && docker run --rm -v $$(pwd):/data stain/jena riot --output=NT /data/$$DIR/$$FILE 2> $$DIR/$$ERRFILE | gzip -c > $$DIR/$${FILE%.*}.nt.gz && rm -f $$DIR/$$FILE && if [ ! -s $$DIR/$$ERRFILE ]; then rm -f $$DIR/$$ERRFILE; fi || echo \"ERROR processing $$URL ($$FILE)\""; done > pubchem.get-data-cmds.txt
32
+ MAKE_GET_DATA_CMD_2 = DIR=DATA.pubchem && mkdir -p $$DIR && curl -LRO ${GET_DATA_URL}/void.ttl && grep -oP '${GET_DATA_URL}/.*?\.ttl\.gz' void.ttl | while read URL; do FILE=$$(basename $$URL); echo "echo \"Processing $$URL ...\" && curl -sLRo $$DIR/$$FILE \"$$URL\" && docker run -i --rm -v $$(pwd):/data stain/jena turtle --output=NT /data/$$DIR/$$FILE | gzip -c > $$DIR/$${FILE%%.*}.nt.gz && rm -f $$DIR/$$FILE || echo \"ERROR processing $$URL\""; done >> pubchem.get-data-cmds.txt
33
+ GET_DATA_CMD = ${CHECK_REQUIREMENTS} && ${MAKE_GET_DATA_CMD_1} && ${MAKE_GET_DATA_CMD_2} && cat pubchem.get-data-cmds.txt | parallel --line-buffer 2>&1 | tee pubchem.get-data-log.txt
34
+ VERSION = $$(date -r void.ttl +%d.%m.%Y || echo "NO_DATE")
35
+ DESCRIPTION = PubChem RDF from ${GET_DATA_URL} (version ${VERSION}) + associated ontologies (bao, bfo, biopax-level3, chebi, cheminf, cito, dublin_core_terms, fabio, go, iao, ncit, obi, pr, ro, sio, skos, so, uo)
36
+ MAKE_ONTOLOGIES_CSV = $$(mkdir -p DATA.ontologies && echo "BAO - BioAssay Ontology,bao.owl,https://data.bioontology.org/ontologies/BAO/submissions/56/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\nBFO - Basic Formal Ontology,bfo.owl,http://purl.obolibrary.org/obo/bfo.owl\n BioPAX - biological pathway data,bp.owl,http://www.biopax.org/release/biopax-level3.owl\n CHEMINF - Chemical Information Ontology,cheminf.owl,http://purl.obolibrary.org/obo/cheminf.owl\n ChEBI - Chemical Entities of Biological Interest,chebi.owl,http://purl.obolibrary.org/obo/chebi.owl\n CiTO,cito.nt,http://purl.org/spar/cito.nt\n DCMI Terms,dcterms.nt,https://www.dublincore.org/specifications/dublin-core/dcmi-terms/dublin_core_terms.nt\n FaBiO,fabio.nt,http://purl.org/spar/fabio.nt\n GO - Gene Ontology,go.owl,http://purl.obolibrary.org/obo/go.owl\n IAO - Information Artifact Ontology,iao.owl,http://purl.obolibrary.org/obo/iao.owl\n NCIt,ncit.owl,http://purl.obolibrary.org/obo/ncit.owl\n NDF-RT,ndfrt.owl,https://data.bioontology.org/ontologies/NDF-RT/submissions/1/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb\n OBI - Ontology for Biomedical Investigations,obi.owl,http://purl.obolibrary.org/obo/obi.owl\n OWL,owl.ttl,http://www.w3.org/2002/07/owl.ttl\n PDBo,pdbo.owl,http://rdf.wwpdb.org/schema/pdbx-v40.owl\n PR - PRotein Ontology (PRO),pr.owl,http://purl.obolibrary.org/obo/pr.owl\n RDF Schema,rdfs.ttl,https://www.w3.org/2000/01/rdf-schema.ttl\n RDF,rdf.ttl,http://www.w3.org/1999/02/22-rdf-syntax-ns.ttl\n RO - Relation Ontology,ro.owl,http://purl.obolibrary.org/obo/ro.owl\n SIO - Semanticscience Integrated Ontology,sio.owl,http://semanticscience.org/ontology/sio.owl\n SKOS,skos.rdf,http://www.w3.org/TR/skos-reference/skos.rdf\n SO - Sequence types and features ontology,so.owl,http://purl.obolibrary.org/obo/so.owl\n UO - Units of measurement ontology,uo.owl,http://purl.obolibrary.org/obo/uo.owl" > DATA.ontologies/ontologies.csv)
48
37
 
49
38
  [index]
50
- INPUT_FILES = pubchem.additional-ontologies.nt.gz nt.${DATE}/*.nt.gz
39
+ INPUT_FILES = DATA.ontologies/*.nt.gz DATA.pubchem/*.nt.gz
51
40
  CAT_INPUT_FILES = zcat ${INPUT_FILES}
52
- SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 1000000 }
41
+ SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "ascii-prefixes-only": false, "num-triples-per-batch": 5000000 }
53
42
  STXXL_MEMORY = 10G
54
43
 
55
44
  [server]
56
45
  PORT = 7023
57
- ACCESS_TOKEN = ${NAME}_310129823
46
+ ACCESS_TOKEN = ${data:NAME}
58
47
  MEMORY_FOR_QUERIES = 20G
59
48
  TIMEOUT = 120s
60
49
 
@@ -0,0 +1,37 @@
1
+ # Qleverfile for Wikimedia Commons, TODO: add to https://github.com/ad-freiburg/qlever-control
2
+ #
3
+ # qlever get-data # takes ~3 hours to download .bz2 file of size ~40 GB
4
+ # qlever index # takes ~2 hours and ~40 GB RAM (on an AMD Ryzen 9 5900X)
5
+ # qlever start # starts the server (takes around 15 seconds)
6
+
7
+ [data]
8
+ NAME = wikimedia-commons
9
+ MAIN_RDF_FILE = latest-mediainfo.ttl.gz
10
+ DATA_URL_BASE = https://dumps.wikimedia.org/other/wikibase/commonswiki
11
+ GET_TTL_CMD = wget -nc ${DATA_URL_BASE}/${MAIN_RDF_FILE}
12
+ GET_PROPS_CMD = curl -s https://qlever.cs.uni-freiburg.de/api/wikidata -H "Accept: text/turtle" -H "Content-type: application/sparql-query" --data "PREFIX wikibase: <http://wikiba.se/ontology#> CONSTRUCT { ?s ?p ?o } WHERE { VALUES ?p { wikibase:claim wikibase:directClaim wikibase:novalue wikibase:propertyType wikibase:qualifier wikibase:qualifierValue wikibase:reference wikibase:referenceValue wikibase:statementProperty wikibase:statementValue } ?s ?p ?o }" > properties.nt
13
+ GET_LABELS_CMD = curl -s https://qlever.cs.uni-freiburg.de/api/wikidata -H "Accept: text/turtle" -H "Content-type: application/sparql-query" --data "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> CONSTRUCT { ?subject rdfs:label ?label } WHERE { ?subject @en@rdfs:label ?label }" > labels.nt
14
+ GET_DATA_CMD = ${GET_TTL_CMD} && ${GET_PROPS_CMD} && ${GET_LABELS_CMD}
15
+ INDEX_DESCRIPTION = Wikimedia Commons from ${DATA_URL_BASE}, version 09.11.2023 + Wikidata triples for rdfs:label and wikibase:claim etc.
16
+
17
+ [index]
18
+ INPUT_FILES = ${data:MAIN_RDF_FILE} labels.nt properties.nt
19
+ CAT_INPUT_FILES = zcat -f ${INPUT_FILES}
20
+ WITH_TEXT_INDEX = from_literals
21
+ STXXL_MEMORY_GB = 5
22
+ SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
23
+
24
+ [server]
25
+ PORT = 7033
26
+ ACCESS_TOKEN = ${data:NAME}_2511328747
27
+ MEMORY_FOR_QUERIES_GB = 20
28
+ CACHE_MAX_SIZE_GB = 10
29
+ CACHE_MAX_SIZE_GB_SINGLE_ENTRY = 5
30
+
31
+ [runtime]
32
+ SYSTEM = native
33
+ IMAGE = docker.io/adfreiburg/qlever:latest
34
+
35
+ [ui]
36
+ PORT = 7000
37
+ CONFIG = wikimedia-commons
@@ -59,17 +59,37 @@ class ExampleQueriesCommand(QleverCommand):
59
59
  "or just compute the size of the result")
60
60
  subparser.add_argument("--limit", type=int,
61
61
  help="Limit on the number of results")
62
+ subparser.add_argument("--remove-offset-and-limit",
63
+ action="store_true", default=False,
64
+ help="Remove OFFSET and LIMIT from the query")
62
65
  subparser.add_argument("--accept", type=str,
63
66
  choices=["text/tab-separated-values",
64
- "application/sparql-results+json"],
67
+ "text/csv",
68
+ "application/sparql-results+json",
69
+ "text/turtle"],
65
70
  default="text/tab-separated-values",
66
71
  help="Accept header for the SPARQL query")
67
72
  subparser.add_argument("--clear-cache",
68
73
  choices=["yes", "no"],
69
74
  default="yes",
70
75
  help="Clear the cache before each query")
76
+ subparser.add_argument("--width-query-description", type=int,
77
+ default=40,
78
+ help="Width for printing the query description")
79
+ subparser.add_argument("--width-error-message", type=int,
80
+ default=80,
81
+ help="Width for printing the error message "
82
+ "(0 = no limit)")
83
+ subparser.add_argument("--width-result-size", type=int,
84
+ default=14,
85
+ help="Width for printing the result size")
71
86
 
72
87
  def execute(self, args) -> bool:
88
+ # We can't have both `--remove-offset-and-limit` and `--limit`.
89
+ if args.remove_offset_and_limit and args.limit:
90
+ log.error("Cannot have both --remove-offset-and-limit and --limit")
91
+ return False
92
+
73
93
  # If `args.accept` is `application/sparql-results+json`, we need `jq`.
74
94
  if args.accept == "application/sparql-results+json":
75
95
  try:
@@ -153,26 +173,41 @@ class ExampleQueriesCommand(QleverCommand):
153
173
  with mute_log():
154
174
  ClearCacheCommand().execute(args)
155
175
 
156
- # Count query.
157
- if args.download_or_count == "count":
158
- # Find first string matching ?[a-zA-Z0-9_]+ in query.
159
- match = re.search(r"\?[a-zA-Z0-9_]+", query)
160
- if not match:
161
- log.error("Could not find a variable in this query:")
162
- log.info("")
163
- log.info(query)
164
- return False
165
- first_var = match.group(0)
166
- query = query.replace(
167
- "SELECT ",
168
- f"SELECT (COUNT({first_var}) AS {first_var}_count_) "
169
- f"WHERE {{ SELECT ", 1) + " }"
176
+ # Remove OFFSET and LIMIT (after the last closing bracket).
177
+ if args.remove_offset_and_limit or args.limit:
178
+ closing_bracket_idx = query.rfind("}")
179
+ regexes = [re.compile(r"OFFSET\s+\d+\s*", re.IGNORECASE),
180
+ re.compile(r"LIMIT\s+\d+\s*", re.IGNORECASE)]
181
+ for regex in regexes:
182
+ match = re.search(regex, query[closing_bracket_idx:])
183
+ if match:
184
+ query = query[:closing_bracket_idx + match.start()] + \
185
+ query[closing_bracket_idx + match.end():]
170
186
 
171
187
  # Limit query.
172
188
  if args.limit:
173
- query = query.replace(
174
- "SELECT ", "SELECT * WHERE { SELECT ", 1) \
175
- + f" }} LIMIT {args.limit}"
189
+ query += f" LIMIT {args.limit}"
190
+
191
+ # Count query.
192
+ if args.download_or_count == "count":
193
+ # First find out if there is a FROM clause.
194
+ regex_from_clause = re.compile(r"\s*FROM\s+<[^>]+>\s*",
195
+ re.IGNORECASE)
196
+ match_from_clause = re.search(regex_from_clause, query)
197
+ from_clause = " "
198
+ if match_from_clause:
199
+ from_clause = match_from_clause.group(0)
200
+ query = (query[:match_from_clause.start()] + " " +
201
+ query[match_from_clause.end():])
202
+ # Now we can add the outer SELECT COUNT(*).
203
+ query = re.sub(r"SELECT ",
204
+ "SELECT (COUNT(*) AS ?qlever_count_)"
205
+ + from_clause + "WHERE { SELECT ",
206
+ query, count=1, flags=re.IGNORECASE) + " }"
207
+
208
+ # A bit of pretty-printing.
209
+ query = re.sub(r"\s+", " ", query)
210
+ query = re.sub(r"\s*\.\s*\}", " }", query)
176
211
 
177
212
  # Launch query.
178
213
  try:
@@ -214,10 +249,16 @@ class ExampleQueriesCommand(QleverCommand):
214
249
  f" | tonumber\" {result_file}",
215
250
  return_output=True)
216
251
  else:
217
- if args.accept == "text/tab-separated-values":
252
+ if (args.accept == "text/tab-separated-values"
253
+ or args.accept == "text/csv"):
218
254
  result_size = run_command(
219
255
  f"sed 1d {result_file} | wc -l",
220
256
  return_output=True)
257
+ elif args.accept == "text/turtle":
258
+ result_size = run_command(
259
+ f"sed '1d;/^@prefix/d;/^\\s*$/d' "
260
+ f"{result_file} | wc -l",
261
+ return_output=True)
221
262
  else:
222
263
  result_size = run_command(
223
264
  f"jq -r \".results.bindings | length\""
@@ -232,19 +273,25 @@ class ExampleQueriesCommand(QleverCommand):
232
273
  Path(result_file).unlink(missing_ok=True)
233
274
 
234
275
  # Print description, time, result in tabular form.
235
- if (len(description) > 60):
236
- description = description[:57] + "..."
276
+ if len(description) > args.width_query_description:
277
+ description = description[:args.width_query_description - 3]
278
+ description += "..."
237
279
  if error_msg is None:
238
- log.info(f"{description:<60} {time_seconds:6.2f} s "
239
- f"{result_size:14,}")
280
+ log.info(f"{description:<{args.width_query_description}} "
281
+ f"{time_seconds:6.2f} s "
282
+ f"{result_size:>{args.width_result_size},}")
240
283
  count_succeeded += 1
241
284
  total_time_seconds += time_seconds
242
285
  total_result_size += result_size
243
286
  else:
244
287
  count_failed += 1
245
- if (len(error_msg) > 60) and args.log_level != "DEBUG":
246
- error_msg = error_msg[:57] + "..."
247
- log.error(f"{description:<60} failed "
288
+ if (args.width_error_message > 0
289
+ and len(error_msg) > args.width_error_message
290
+ and args.log_level != "DEBUG"):
291
+ error_msg = error_msg[:args.width_error_message - 3]
292
+ error_msg += "..."
293
+ log.error(f"{description:<{args.width_query_description}} "
294
+ f"failed "
248
295
  f"{colored(error_msg, 'red')}")
249
296
 
250
297
  # Print total time.
@@ -252,11 +299,11 @@ class ExampleQueriesCommand(QleverCommand):
252
299
  if count_succeeded > 0:
253
300
  query_or_queries = "query" if count_succeeded == 1 else "queries"
254
301
  description = (f"TOTAL for {count_succeeded} {query_or_queries}")
255
- log.info(f"{description:<60} "
302
+ log.info(f"{description:<{args.width_query_description}} "
256
303
  f"{total_time_seconds:6.2f} s "
257
304
  f"{total_result_size:>14,}")
258
305
  description = (f"AVERAGE for {count_succeeded} {query_or_queries}")
259
- log.info(f"{description:<60} "
306
+ log.info(f"{description:<{args.width_query_description}} "
260
307
  f"{total_time_seconds / count_succeeded:6.2f} s "
261
308
  f"{round(total_result_size / count_succeeded):>14,}")
262
309
  else:
qlever/commands/index.py CHANGED
@@ -25,7 +25,7 @@ class IndexCommand(QleverCommand):
25
25
  return True
26
26
 
27
27
  def relevant_qleverfile_arguments(self) -> dict[str: list[str]]:
28
- return {"data": ["name"],
28
+ return {"data": ["name", "format"],
29
29
  "index": ["input_files", "cat_input_files", "settings_json",
30
30
  "index_binary",
31
31
  "only_pso_and_pos_permutations", "use_patterns",
@@ -41,7 +41,7 @@ class IndexCommand(QleverCommand):
41
41
  def execute(self, args) -> bool:
42
42
  # Construct the command line.
43
43
  index_cmd = (f"{args.cat_input_files} | {args.index_binary}"
44
- f" -F ttl -f -"
44
+ f" -F {args.format} -"
45
45
  f" -i {args.name}"
46
46
  f" -s {args.name}.settings.json")
47
47
  if args.only_pso_and_pos_permutations:
qlever/qleverfile.py CHANGED
@@ -53,6 +53,10 @@ class Qleverfile:
53
53
  "--text-description", type=str, default=None,
54
54
  help="A concise description of the additional text data"
55
55
  " if any")
56
+ data_args["format"] = arg(
57
+ "--format", type=str, default="ttl",
58
+ choices=["ttl", "nt", "nq"],
59
+ help="The format of the data")
56
60
 
57
61
  index_args["input_files"] = arg(
58
62
  "--input-files", type=str, required=True,
@@ -102,7 +106,7 @@ class Qleverfile:
102
106
  help="The binary for starting the server (this requires "
103
107
  "that you have compiled QLever on your machine)")
104
108
  server_args["host_name"] = arg(
105
- "--host-name", type=str, default=f"{socket.getfqdn()}",
109
+ "--host-name", type=str, default=f"localhost",
106
110
  help="The name of the host on which the server listens for "
107
111
  "requests")
108
112
  server_args["port"] = arg(
qlever/util.py CHANGED
@@ -186,8 +186,9 @@ def get_random_string(length: int) -> str:
186
186
 
187
187
  def is_port_used(port: int) -> bool:
188
188
  """
189
- Try to bind to the port on all interfaces to check if the port is already in use.
190
- If the port is already in use, `socket.bind` will raise an `OSError` with errno EADDRINUSE.
189
+ Try to bind to the port on all interfaces to check if the port is already
190
+ in use. If the port is already in use, `socket.bind` will raise an
191
+ `OSError` with errno EADDRINUSE.
191
192
  """
192
193
  try:
193
194
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -200,3 +201,16 @@ def is_port_used(port: int) -> bool:
200
201
  if err.errno != errno.EADDRINUSE:
201
202
  log.warning(f"Failed to determine if port is used: {err}")
202
203
  return True
204
+
205
+
206
+ def check_if_installed(name: str, check_cmd: str) -> bool:
207
+ """
208
+ Helper function that checks if a given program is installed by running
209
+ the given command.
210
+ """
211
+ try:
212
+ run_command(check_cmd)
213
+ return True
214
+ except Exception as e:
215
+ log.error(f"{name} is not installed: {e}")
216
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: qlever
3
- Version: 0.5.4
3
+ Version: 0.5.6
4
4
  Summary: Script for using the QLever SPARQL engine.
5
5
  Author-email: Hannah Bast <bast@cs.uni-freiburg.de>
6
6
  License: Apache-2.0
@@ -77,6 +77,14 @@ There are many more commands and options, see `qlever --help` for general help,
77
77
  `qlever <command> --help` for help on a specific command, or just the
78
78
  autocompletion.
79
79
 
80
+ # Use with your own dataset
81
+
82
+ To use QLever with your own dataset, you should also write a `Qleverfile`, like
83
+ in the example above. The easiest way to write a `Qleverfile` is to get one of
84
+ the existing ones (using `qlever setup-config ...` as explained above) and then
85
+ change it according to your needs (the variable names should be self-explanatory).
86
+ Pick one for a dataset that is similar to yours and when in doubt, pick `olympics`.
87
+
80
88
  # For developers
81
89
 
82
90
  The (Python) code for the script is in the `*.py` files in `src/qlever`. The
@@ -6,8 +6,8 @@ qlever/containerize.py,sha256=p8g3O3G8a_0XLzSTzl_e5t9dqjbCQ-ippoA8vI2Z9pI,4193
6
6
  qlever/log.py,sha256=2O_RvFymnu_dB10ErBTAOsI8bgjORfdD0tE3USH-siM,1315
7
7
  qlever/qlever_main.py,sha256=tA_xqOs_FjvqlDIvKTprwuysfTwzsUjE7at26gRhCVA,2336
8
8
  qlever/qlever_old.py,sha256=X-JxmepFKYeFgSLLp0TRDNqXSxDwIbc8_0Xstiems8c,62026
9
- qlever/qleverfile.py,sha256=NjY3SFyRTm_igI8Rv87TOvZBiLwn1TgHmRh1jVA51DM,12935
10
- qlever/util.py,sha256=20NQJquSk_mSqvlK4k0OrSBqWrxKs5SgVshm5ucus5o,7847
9
+ qlever/qleverfile.py,sha256=D321zDnWi-ScCefbFGBydKKI7lzzr1CkohHW6KuwVw0,13106
10
+ qlever/util.py,sha256=xNXxXTDfoDqTV0DKo5rKQpkdIwvi7JwfW7ySelvJaZ0,8185
11
11
  qlever/Qleverfiles/Qleverfile.dblp,sha256=Y6BqAG1GZg-OmEs0HM00yAQuY2TGnSzsOO1LLmGVn2Y,994
12
12
  qlever/Qleverfiles/Qleverfile.dblp-plus,sha256=Dwd9pK1vPcelKfw6sA-IuyhbZ6yIxOh6_84JgPYnB9Q,1332
13
13
  qlever/Qleverfiles/Qleverfile.dbpedia,sha256=aaNZZayE-zVePGSwPzXemkX__Ns8-kP_E7DNNKZPnqg,1160
@@ -20,20 +20,21 @@ qlever/Qleverfiles/Qleverfile.ohm-planet,sha256=Y_yUxdpWpUOSDo_zmVKj3caa8X-Wv-1K
20
20
  qlever/Qleverfiles/Qleverfile.olympics,sha256=5w9BOFwEBhdSzPz-0LRxwhv-7Gj6xbF539HOXr3cqD0,1088
21
21
  qlever/Qleverfiles/Qleverfile.osm-country,sha256=UnlkckSXJDrknZORlU-Hdj_J82U4kStl1aRctCc5n6M,1953
22
22
  qlever/Qleverfiles/Qleverfile.osm-planet,sha256=2RilNix0fplN3GsNNyOu3GzmUss1Pq7586WKOFAQnSs,1400
23
- qlever/Qleverfiles/Qleverfile.pubchem,sha256=a6EAP8mOfC0V6NnVCLarvRagyoQSQDItR7AnrZqL9iE,3899
23
+ qlever/Qleverfiles/Qleverfile.pubchem,sha256=YuDzWQmukSvL1opu7cf1KX9407_P21lmecYZ9cdbuvA,5611
24
24
  qlever/Qleverfiles/Qleverfile.scientists,sha256=9eZ2c6P9a3E3VHa3RR7LdOQbF4k3oyyrn56Z3u4LZYs,1164
25
25
  qlever/Qleverfiles/Qleverfile.uniprot,sha256=9kAKseomdUnIt7EAZge39g1MTuaLVaSW9JYLHzIMolM,2338
26
26
  qlever/Qleverfiles/Qleverfile.vvz,sha256=ftdMj5dCC9jAlFtNt2WR7kP30w0itT_iYtj5HoUVyWU,931
27
27
  qlever/Qleverfiles/Qleverfile.wikidata,sha256=vDkTY3mPSx2C8MvFWfB72zZoc4d-TMJSw3f_-FqnEqs,1275
28
+ qlever/Qleverfiles/Qleverfile.wikimedia-commons,sha256=5JJ1MIp6LoM-ROCDFFIRvLREepCF4i4PnjOT9AFihzQ,2247
28
29
  qlever/Qleverfiles/Qleverfile.wikipathways,sha256=UFEVLrtOBiSQfibBN9xc2wDXrnWcnx5f8PY9khcE6bc,1983
29
30
  qlever/Qleverfiles/Qleverfile.yago-4,sha256=GikYPqChCtbAyZOVqszmVUwgQxSePTcgM8xw2b_21e4,1849
30
31
  qlever/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
32
  qlever/commands/add_text_index.py,sha256=dkqYtwgOhgnXiei_eyhBWYCtdAiQUEmjWoa3JMlMb4c,3641
32
33
  qlever/commands/cache_stats.py,sha256=6JjueQstAqc8dNfgY8TP2EitFMxdUvCwrcyd7KUEb2o,4157
33
34
  qlever/commands/clear_cache.py,sha256=AnE1MOoj1ZexxrRT8FGeBLlv8rtQIVV4DP8VBn5-X-s,2843
34
- qlever/commands/example_queries.py,sha256=L32gVKdvb7MwZNqphF1K_gK6WARiwno6oiNDBgZuj1Y,12396
35
+ qlever/commands/example_queries.py,sha256=5-0ln5EkuDcQYPqKKAOcLaTIStMzFhkAogaNedfRc_I,15271
35
36
  qlever/commands/get_data.py,sha256=f9kjZI3TKad6JHSuXWNkeoajmW8h0Sx8ShvjauDCtNo,1412
36
- qlever/commands/index.py,sha256=lJhDnweknFZQm1czqPzNyz33EvbjIvOrS4j0wDaJ98o,5663
37
+ qlever/commands/index.py,sha256=iJ1wM7qtlAuRP_x0CupLWIndLRub1GqHvlCbB9ZlyPw,5680
37
38
  qlever/commands/index_stats.py,sha256=_BiUNBhmbYd9RPxrlm4HF0oENO6JmqnRiAkwkyOdN4U,11722
38
39
  qlever/commands/log.py,sha256=8Krt3MsTUDapYqVw1zUu5X15SF8mV97Uj0qKOWK8jXk,1861
39
40
  qlever/commands/query.py,sha256=_IDH-M8gKL_f1i5wzu0X452pZSUD0_qXl6bPXC85wX0,2750
@@ -43,9 +44,9 @@ qlever/commands/status.py,sha256=5S6EdapZEwFKV9cQZtNYcZhMbAXAY-FP6ggjIhfX8ek,163
43
44
  qlever/commands/stop.py,sha256=TZs4bxKHvujlZAU8BZmFjA5eXSZNAa6EeNzvPpEZsuI,4139
44
45
  qlever/commands/ui.py,sha256=b7g7Mp6ZWevn8f1kwFr-WR4ZWMq42KEV4cGl2QS7M1E,2828
45
46
  qlever/commands/warmup.py,sha256=WOZSxeV8U_F6pEEnAb6YybXLQMxZFTRJXs4BPHUhsmc,1030
46
- qlever-0.5.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
47
- qlever-0.5.4.dist-info/METADATA,sha256=sL8oC3NhgnRmUMEMIqfqozI_RTcHkaFYUWeailPrB8g,4146
48
- qlever-0.5.4.dist-info/WHEEL,sha256=HiCZjzuy6Dw0hdX5R3LCFPDmFS4BWl8H-8W39XfmgX4,91
49
- qlever-0.5.4.dist-info/entry_points.txt,sha256=U_gbYYi0wwdsn884eb0XoOXfvhACOsxhlO330dZ9bi0,87
50
- qlever-0.5.4.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
51
- qlever-0.5.4.dist-info/RECORD,,
47
+ qlever-0.5.6.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
48
+ qlever-0.5.6.dist-info/METADATA,sha256=FRJKEH385p07cxSLLKRHSgiND-PFwFtPnIYWQjVBv3M,4582
49
+ qlever-0.5.6.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
50
+ qlever-0.5.6.dist-info/entry_points.txt,sha256=U_gbYYi0wwdsn884eb0XoOXfvhACOsxhlO330dZ9bi0,87
51
+ qlever-0.5.6.dist-info/top_level.txt,sha256=kd3zsYqiFd0--Czh5XTVkfEq6XR-XgRFW35X0v0GT-c,7
52
+ qlever-0.5.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (72.2.0)
2
+ Generator: setuptools (74.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5