OneStop4All-Indexer 2.7.1__tar.gz → 2.8.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1/OneStop4All_Indexer.egg-info}/PKG-INFO +1 -1
  2. {onestop4all_indexer-2.7.1/OneStop4All_Indexer.egg-info → onestop4all_indexer-2.8.0.dev1}/PKG-INFO +1 -1
  3. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_dataset.py +25 -10
  4. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_repository.py +21 -9
  5. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/setup.py +1 -1
  6. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/utils/configs.py +13 -2
  7. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/LICENSE +0 -0
  8. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/OneStop4All_Indexer.egg-info/SOURCES.txt +0 -0
  9. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/OneStop4All_Indexer.egg-info/dependency_links.txt +0 -0
  10. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/OneStop4All_Indexer.egg-info/entry_points.txt +0 -0
  11. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/OneStop4All_Indexer.egg-info/requires.txt +0 -0
  12. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/OneStop4All_Indexer.egg-info/top_level.txt +0 -0
  13. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/data_repositories/__init__.py +0 -0
  14. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/data_repositories/repository_base.py +0 -0
  15. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/data_repositories/repository_n4eorganization.py +0 -0
  16. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/data_repositories/repository_person.py +0 -0
  17. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/data_repositories/repository_resource_links.py +0 -0
  18. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/data_repositories/repository_theme.py +0 -0
  19. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/__init__.py +0 -0
  20. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_article.py +0 -0
  21. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_base.py +0 -0
  22. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_dataservice.py +0 -0
  23. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_document.py +0 -0
  24. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_learningresource.py +0 -0
  25. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_metadatastandards.py +0 -0
  26. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_organization.py +0 -0
  27. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_service.py +0 -0
  28. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/harvesters/harvester_softwaresourcecode.py +0 -0
  29. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/pyproject.toml +0 -0
  30. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/setup.cfg +0 -0
  31. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/utils/__init__.py +0 -0
  32. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/utils/cli.py +0 -0
  33. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/utils/harvest.py +0 -0
  34. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/utils/solr.py +0 -0
  35. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/utils/sparql.py +0 -0
  36. {onestop4all_indexer-2.7.1 → onestop4all_indexer-2.8.0.dev1}/utils/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OneStop4All-Indexer
3
- Version: 2.7.1
3
+ Version: 2.8.0.dev1
4
4
  Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
5
5
  Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
6
6
  Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OneStop4All-Indexer
3
- Version: 2.7.1
3
+ Version: 2.8.0.dev1
4
4
  Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
5
5
  Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
6
6
  Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
@@ -24,9 +24,9 @@ class Dataset_Harvester(HarvesterCordra):
24
24
  iteration_end=None,
25
25
  page_size=50000,
26
26
  query='type:"Dataset"',
27
- #query='id:"n4e/dthb-oai-pangaea.de-doi-10.1594-PANGAEA.981078"', #downloadURL: https://cordra.knowledgehub.test.n4e.geo.tu-dresden.de/objects/n4e/dthb-oai-pangaea.de-doi-10.1594-PANGAEA.981078
28
- #query='id:"n4e/dthb-GB_NERC_BAS_PDC_01994"', #accessURL: https://cordra.knowledgehub.test.n4e.geo.tu-dresden.de/objects/n4e/dthb-GB_NERC_BAS_PDC_01994
29
- #query='id:"n4e/dthb-6A0D8B9D-1BBD-441B-BA5C-6159EE41EE71"', #multiple accessURLs: https://cordra.knowledgehub.nfdi4earth.de/objects/n4e/dthb-6A0D8B9D-1BBD-441B-BA5C-6159EE41EE71,
27
+ # query='id:"n4e/dthb-oai-pangaea.de-doi-10.1594-PANGAEA.981078"', #downloadURL: https://cordra.knowledgehub.test.n4e.geo.tu-dresden.de/objects/n4e/dthb-oai-pangaea.de-doi-10.1594-PANGAEA.981078
28
+ # query='id:"n4e/dthb-GB_NERC_BAS_PDC_01994"', #accessURL: https://cordra.knowledgehub.test.n4e.geo.tu-dresden.de/objects/n4e/dthb-GB_NERC_BAS_PDC_01994
29
+ # query='id:"n4e/dthb-6A0D8B9D-1BBD-441B-BA5C-6159EE41EE71"', #multiple accessURLs: https://cordra.knowledgehub.nfdi4earth.de/objects/n4e/dthb-6A0D8B9D-1BBD-441B-BA5C-6159EE41EE71,
30
30
  solr_validation=True,
31
31
  **kw,
32
32
  ):
@@ -35,9 +35,15 @@ class Dataset_Harvester(HarvesterCordra):
35
35
  self.links_repo = links_repo
36
36
  self.removed_geometries = []
37
37
  self.solr_validator = SolrValidator()
38
- self.iteration_start = int(iteration_start)
39
- self.iteration_end = int(iteration_end)
40
- self.page_size = int(page_size)
38
+ self.iteration_start = (
39
+ int(iteration_start)
40
+ if iteration_start is not None
41
+ else iteration_start
42
+ )
43
+ self.iteration_end = (
44
+ int(iteration_end) if iteration_end is not None else iteration_end
45
+ )
46
+ self.page_size = int(page_size) if page_size is not None else page_size
41
47
  self.query = query
42
48
  self.solr_validation = solr_validation
43
49
  log.info("#" * 20)
@@ -277,7 +283,9 @@ class Dataset_Harvester(HarvesterCordra):
277
283
  if val:
278
284
  self.addValue(
279
285
  dict=datasets[subject],
280
- attribute="distribution" + self.flatten_separator + "accessURL",
286
+ attribute="distribution"
287
+ + self.flatten_separator
288
+ + "accessURL",
281
289
  value=val,
282
290
  )
283
291
  if "title" in distribution:
@@ -288,16 +296,23 @@ class Dataset_Harvester(HarvesterCordra):
288
296
  if val:
289
297
  self.addValue(
290
298
  dict=datasets[subject],
291
- attribute="distribution" + self.flatten_separator + "title",
299
+ attribute="distribution"
300
+ + self.flatten_separator
301
+ + "title",
292
302
  value=val,
293
303
  )
294
304
  if "downloadURL" in distribution:
305
+ print(distribution["downloadURL"])
295
306
  for download_url in distribution["downloadURL"]:
296
- val = self.get_string_from_jsonld(download_url, subject)
307
+ val = self.get_string_from_jsonld(
308
+ download_url, subject
309
+ )
297
310
  if val:
298
311
  self.addValue(
299
312
  dict=datasets[subject],
300
- attribute="distribution" + self.flatten_separator + "downloadURL",
313
+ attribute="distribution"
314
+ + self.flatten_separator
315
+ + "downloadURL",
301
316
  value=val,
302
317
  )
303
318
  elif key == "spatialCoverage":
@@ -27,7 +27,7 @@ class Repository_Harvester(Harvester):
27
27
  {
28
28
  ?subject rdf:type ?type.
29
29
  ?subject ?predicate ?object.
30
- FILTER (?predicate NOT IN (dct:publisher, dcat:distribution, dcat:contactPoint))
30
+ FILTER (?predicate NOT IN (dct:publisher, dcat:distribution, dcat:contactPoint, n4e:hasCertificate))
31
31
  }
32
32
  UNION{
33
33
  VALUES ?predicate { dct:publisher }
@@ -44,6 +44,13 @@ class Repository_Harvester(Harvester):
44
44
  ?metadataStandard dct:title ?object.
45
45
  optional {?metadataStandard n4e:hasWebsite ?metadataStandard_homepage.}
46
46
  }
47
+ UNION {
48
+ VALUES ?predicate { n4e:hasCertificate }
49
+ ?subject rdf:type ?type;
50
+ n4e:hasCertificate ?certificate.
51
+ optional { ?certificate n4e:certificateName ?certificate_name }
52
+ bind(coalesce(?certificate_name, ?certificate) as ?object)
53
+ }
47
54
  UNION {
48
55
  VALUES ?predicate { dcat:contactPoint }
49
56
  ?subject rdf:type ?type;
@@ -157,9 +164,8 @@ class Repository_Harvester(Harvester):
157
164
 
158
165
  #execute additional sparql query to determine if (some) datasets in this repository are harvested by n4e
159
166
  #key = subject
160
- has_harvested_datasets = self.request_has_harvested_datasets(repo_subject=key)
161
- repo["isHarvestedByNFDI4Earth"] = has_harvested_datasets
162
-
167
+ has_harvested_datasets = self.request_has_harvested_datasets(repo_subject=key, repo=repo)
168
+ repo["isHarvestedByNFDI4Earth"] = has_harvested_datasets
163
169
 
164
170
  return repos_list
165
171
 
@@ -536,7 +542,7 @@ class Repository_Harvester(Harvester):
536
542
  r["object"]["value"],
537
543
  )
538
544
 
539
- def request_has_harvested_datasets(self, repo_subject):
545
+ def request_has_harvested_datasets(self, repo_subject, repo=None):
540
546
  sparql_query = """
541
547
  PREFIX n4e: <http://nfdi4earth.de/ontology/>
542
548
  select COUNT(?s) AS ?datasetCount
@@ -552,7 +558,13 @@ class Repository_Harvester(Harvester):
552
558
 
553
559
  if not hits or not hits[0]:
554
560
  return False
555
- else:
556
- count = int(hits[0]["datasetCount"]["value"])
557
- has_harvested_datasets = count > 0
558
- return has_harvested_datasets
561
+
562
+ # Check for Earth Data Portal repo
563
+ if repo is not None and (
564
+ repo["id"] == "n4e-service0044"
565
+ or "Earth Data Portal" in repo["title"]):
566
+ return True
567
+
568
+ count = int(hits[0]["datasetCount"]["value"])
569
+ has_harvested_datasets = count > 0
570
+ return has_harvested_datasets
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
2
2
 
3
3
  setup(
4
4
  name="OneStop4All-Indexer",
5
- version="2.7.1",
5
+ version="2.8.0.dev1",
6
6
  description="Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index",
7
7
  author="Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer",
8
8
  author_email="m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de",
@@ -1,10 +1,21 @@
1
+ import logging
1
2
  import os
2
3
  from jproperties import Properties
3
4
 
5
+ log = logging.getLogger(__name__)
6
+
7
+
4
8
  app_properties = Properties()
5
9
 
6
- with open("./application.properties", "rb") as config_file:
7
- app_properties.load(config_file)
10
+ try:
11
+ with open("./application.properties", "rb") as config_file:
12
+ app_properties.load(config_file)
13
+ except FileNotFoundError:
14
+ app_properties = Properties()
15
+ log.warning(
16
+ "Warnung: application.properties not found! "
17
+ "Using default values and environment variables only."
18
+ )
8
19
 
9
20
 
10
21
  def parse_bool_config(value):