OneStop4All-Indexer 2.8.0.dev12__tar.gz → 2.8.0.dev14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14/OneStop4All_Indexer.egg-info}/PKG-INFO +1 -1
  2. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/OneStop4All_Indexer.egg-info/SOURCES.txt +1 -1
  3. {onestop4all_indexer-2.8.0.dev12/OneStop4All_Indexer.egg-info → onestop4all_indexer-2.8.0.dev14}/PKG-INFO +1 -1
  4. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/__init__.py +1 -1
  5. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/harvester_organization.py +63 -105
  6. onestop4all_indexer-2.8.0.dev14/harvesters/harvester_service.py +551 -0
  7. onestop4all_indexer-2.8.0.dev12/harvesters/harvester_softwaresourcecode.py → onestop4all_indexer-2.8.0.dev14/harvesters/harvester_software.py +5 -5
  8. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/setup.py +1 -1
  9. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/utils/cli.py +1 -1
  10. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/utils/harvest.py +2 -2
  11. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/utils/solr.py +14 -25
  12. onestop4all_indexer-2.8.0.dev12/harvesters/harvester_service.py +0 -224
  13. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/LICENSE +0 -0
  14. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/OneStop4All_Indexer.egg-info/dependency_links.txt +0 -0
  15. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/OneStop4All_Indexer.egg-info/entry_points.txt +0 -0
  16. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/OneStop4All_Indexer.egg-info/requires.txt +0 -0
  17. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/OneStop4All_Indexer.egg-info/top_level.txt +0 -0
  18. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/data_repositories/__init__.py +0 -0
  19. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/data_repositories/repository_base.py +0 -0
  20. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/data_repositories/repository_n4eorganization.py +0 -0
  21. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/data_repositories/repository_person.py +0 -0
  22. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/data_repositories/repository_resource_links.py +0 -0
  23. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/data_repositories/repository_theme.py +0 -0
  24. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/harvester_article.py +0 -0
  25. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/harvester_base.py +0 -0
  26. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/harvester_dataservice.py +0 -0
  27. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/harvester_dataset.py +0 -0
  28. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/harvester_document.py +0 -0
  29. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/harvester_learningresource.py +0 -0
  30. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/harvester_metadatastandards.py +0 -0
  31. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/harvesters/harvester_repository.py +0 -0
  32. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/pyproject.toml +0 -0
  33. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/setup.cfg +0 -0
  34. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/utils/__init__.py +0 -0
  35. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/utils/configs.py +0 -0
  36. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/utils/sparql.py +0 -0
  37. {onestop4all_indexer-2.8.0.dev12 → onestop4all_indexer-2.8.0.dev14}/utils/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OneStop4All-Indexer
3
- Version: 2.8.0.dev12
3
+ Version: 2.8.0.dev14
4
4
  Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
5
5
  Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
6
6
  Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
@@ -24,7 +24,7 @@ harvesters/harvester_metadatastandards.py
24
24
  harvesters/harvester_organization.py
25
25
  harvesters/harvester_repository.py
26
26
  harvesters/harvester_service.py
27
- harvesters/harvester_softwaresourcecode.py
27
+ harvesters/harvester_software.py
28
28
  utils/__init__.py
29
29
  utils/cli.py
30
30
  utils/configs.py
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OneStop4All-Indexer
3
- Version: 2.8.0.dev12
3
+ Version: 2.8.0.dev14
4
4
  Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
5
5
  Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
6
6
  Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
@@ -1,7 +1,7 @@
1
1
  from .harvester_repository import *
2
2
  from .harvester_organization import *
3
3
  from .harvester_article import *
4
- from .harvester_softwaresourcecode import *
4
+ from .harvester_software import *
5
5
  from .harvester_learningresource import *
6
6
  from .harvester_metadatastandards import *
7
7
  from .harvester_document import *
@@ -88,11 +88,34 @@ class Organization_Harvester(Harvester):
88
88
  )
89
89
 
90
90
  organization_list = []
91
- for (
92
- key,
93
- value,
94
- ) in organizations.items(): # transform orga dict to list for indexing
91
+ for key, value in organizations.items(): # transform orga dict to list for indexing
95
92
  organization = value
93
+
94
+ # clean geometry: if more than one geometry is present,
95
+ # keep only the first one (should not happen, but just in case)
96
+ geom = organization.get("geometry", [])
97
+ if len(geom) > 1:
98
+ organization["geometry"] = geom[:1]
99
+
100
+ # clean rorId: if more than one rorId is present,
101
+ # keep only unique ones (should not happen, but just in case)
102
+ rorid = organization.get("rorId", [])
103
+ if len(rorid) > 1:
104
+ organization["rorId"] = list(set(rorid))
105
+
106
+ # Transform locations
107
+ localities = organization.get("locality", [])
108
+ countries = organization.get("countryName", [])
109
+ complete_locations = []
110
+
111
+ for loc, country in zip(localities, countries):
112
+ complete_locations.append(f"{loc}, {country}")
113
+
114
+ # Filter duplication
115
+ organization["location"] = list(dict.fromkeys(complete_locations))
116
+ organization["locality"] = list(dict.fromkeys(localities))
117
+ organization["countryName"] = list(dict.fromkeys(countries))
118
+
96
119
  # ensure mainTitle
97
120
  if (
98
121
  "mainTitle" not in organization
@@ -137,9 +160,23 @@ class Organization_Harvester(Harvester):
137
160
  assignto_dict[subject[0]][attribute] = []
138
161
  assignto_dict[subject[0]][attribute].extend(organization_name)
139
162
 
163
+ def does_object_exist(self, value, attribute: str, data: dict):
164
+ return attribute in data and value in data[attribute]
165
+
140
166
  def parse_response(
141
167
  self, hits, organizations, issuborganization, hasN4Econtact
142
168
  ):
169
+ PREDICATES = {
170
+ "http://xmlns.com/foaf/0.1/homepage": "homepage",
171
+ "http://www.w3.org/2002/07/owl#sameAs": "sameAs",
172
+ "http://w3id.org/nfdi4ing/metadata4ing#hasRorId": "rorId",
173
+ "http://www.w3.org/2004/02/skos/core#altLabel": "altLabel",
174
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "type",
175
+ "http://nfdi4earth.de/ontology/sourceSystemURL": "sourceSystemURL",
176
+ "http://nfdi4earth.de/ontology/hasSignedCommitment": "hasSignedCommitment",
177
+ "http://nfdi4earth.de/ontology/sourceSystemID": "sourceSystem" + self.flatten_separator + "id"
178
+ }
179
+
143
180
  for hit in hits:
144
181
  subject = hit["subject"]["value"]
145
182
  predicate = hit["predicate"]["value"]
@@ -151,6 +188,8 @@ class Organization_Harvester(Harvester):
151
188
  organizations[subject]["id"] = self.getID(
152
189
  subject
153
190
  ) # use ID from triple store also in Solr to ensure stable IDs
191
+ organizations[subject]["locality"] = []
192
+ organizations[subject]["countryName"] = []
154
193
 
155
194
  # set geometry if available and not already set
156
195
  if (
@@ -179,8 +218,9 @@ class Organization_Harvester(Harvester):
179
218
 
180
219
  if predicate == "http://schema.org/name": # name
181
220
  if (
182
- "xml:lang" not in hit["object"]
183
- or hit["object"]["xml:lang"] == "en"
221
+ ("xml:lang" not in hit["object"]
222
+ or hit["object"]["xml:lang"] == "en")
223
+ and not self.does_object_exist(object, "name", organizations[subject])
184
224
  ): # use international name for orga name
185
225
  self.addValue(
186
226
  dict=organizations[subject],
@@ -189,60 +229,30 @@ class Organization_Harvester(Harvester):
189
229
  )
190
230
  organizations[subject]["mainTitle"] = object # mainTitle
191
231
  if (
192
- "name_alt" not in organizations[subject]
193
- or object not in organizations[subject]["name_alt"]
232
+ not self.does_object_exist(object, "name_alt", organizations[subject])
194
233
  ): # prevent duplicates
195
234
  self.addValue(
196
235
  dict=organizations[subject],
197
236
  attribute="name_alt",
198
237
  value=object,
199
238
  )
200
- elif predicate == "http://xmlns.com/foaf/0.1/homepage": # homepage
201
- self.addValue(
202
- dict=organizations[subject],
203
- attribute="homepage",
204
- value=object,
205
- )
206
- elif (
207
- predicate == "http://www.w3.org/2006/vcard/ns#locality"
208
- ): # locality
209
- self.addValue(
210
- dict=organizations[subject],
211
- attribute="locality",
212
- value=object,
213
- )
214
- elif (
215
- predicate == "http://www.w3.org/2006/vcard/ns#country-name"
216
- ): # countryName
217
- self.addValue(
218
- dict=organizations[subject],
219
- attribute="countryName",
220
- value=object,
221
- )
222
- elif predicate == "http://www.w3.org/2002/07/owl#sameAs": # sameAs
223
- self.addValue(
224
- dict=organizations[subject],
225
- attribute="sameAs",
226
- value=object,
227
- )
228
- elif (
229
- predicate == "http://w3id.org/nfdi4ing/metadata4ing#hasRorId"
230
- ): # rorId
231
- self.addValue(
232
- dict=organizations[subject],
233
- attribute="rorId",
234
- value=object,
235
- )
236
- elif (
237
- predicate == "http://www.w3.org/2004/02/skos/core#altLabel"
238
- ): # altLabel
239
- self.addValue(
240
- dict=organizations[subject],
241
- attribute="altLabel",
242
- value=object,
243
- )
239
+
240
+ elif predicate == "http://www.w3.org/2006/vcard/ns#locality":
241
+ organizations[subject]["locality"].append(object)
242
+
243
+ elif predicate == "http://www.w3.org/2006/vcard/ns#country-name":
244
+ organizations[subject]["countryName"].append(object)
245
+
246
+ elif predicate in PREDICATES:
247
+ attribute = PREDICATES[predicate]
248
+ if not self.does_object_exist(object, attribute, organizations[subject]):
249
+ self.addValue(
250
+ dict=organizations[subject],
251
+ attribute=attribute,
252
+ value=object)
244
253
  elif (
245
254
  predicate == "http://www.w3.org/ns/org#subOrganizationOf"
255
+ and (subject, object) not in issuborganization
246
256
  ): # subOrganizationOf
247
257
  issuborganization.append(
248
258
  (subject, object)
@@ -250,60 +260,8 @@ class Organization_Harvester(Harvester):
250
260
  elif (
251
261
  predicate
252
262
  == "http://nfdi4earth.de/ontology/hasNFDI4EarthContactPerson"
263
+ and (subject, object) not in hasN4Econtact
253
264
  ): # NFDI4EarthContactPerson
254
265
  hasN4Econtact.append(
255
266
  (subject, object)
256
267
  ) # store, resolve contact info later
257
- elif (
258
- predicate == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
259
- ): # type
260
- self.addValue(
261
- dict=organizations[subject], attribute="type", value=object
262
- )
263
- elif (
264
- predicate == "http://nfdi4earth.de/ontology/sourceSystem"
265
- ): # sourceSystem
266
- if "sourceSystem_homepage" in hit:
267
- self.addValue(
268
- organizations[subject],
269
- "sourceSystem" + self.flatten_separator + "homepage",
270
- hit["sourceSystem_homepage"]["value"],
271
- )
272
- if "sourceSystem_title" in hit:
273
- self.addValue(
274
- organizations[subject],
275
- "sourceSystem" + self.flatten_separator + "title",
276
- hit["sourceSystem_title"]["value"],
277
- )
278
- elif (
279
- predicate == "http://nfdi4earth.de/ontology/sourceSystemID"
280
- ): # sourceSystemID
281
- if (
282
- "sourceSystem" + self.flatten_separator + "id"
283
- not in organizations[subject]
284
- ):
285
- # only set if not already present
286
- self.addValue(
287
- dict=organizations[subject],
288
- attribute="sourceSystem"
289
- + self.flatten_separator
290
- + "id",
291
- value=object,
292
- )
293
- elif (
294
- predicate == "http://nfdi4earth.de/ontology/sourceSystemURL"
295
- ): # sourceSystemURL
296
- self.addValue(
297
- dict=organizations[subject],
298
- attribute="sourceSystemURL",
299
- value=object,
300
- )
301
- elif (
302
- predicate
303
- == "http://nfdi4earth.de/ontology/hasSignedCommitment"
304
- ): # hasSignedCommitment
305
- self.addValue(
306
- dict=organizations[subject],
307
- attribute="hasSignedCommitment",
308
- value=object,
309
- )
@@ -0,0 +1,551 @@
1
+ import logging
2
+ from .harvester_base import Harvester
3
+ from utils import sparql
4
+ from data_repositories.repository_n4eorganization import (
5
+ RepositoryN4EOrganization,
6
+ )
7
+
8
+ log = logging.getLogger(__name__)
9
+
10
+
11
+ # harvester for Services https://nfdi4earth.pages.rwth-aachen.de/knowledgehub/nfdi4earth-kh-schema/Service/
12
+ class Service_Harvester(Harvester):
13
+ sparql_query = """
14
+ PREFIX fo: <http://www.w3.org/1999/XSL/Format#>
15
+ PREFIX foaf: <http://xmlns.com/foaf/0.1/>
16
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
17
+ PREFIX vcard: <http://www.w3.org/2006/vcard/ns#>
18
+ prefix dcat: <http://www.w3.org/ns/dcat#>
19
+ prefix dct: <http://purl.org/dc/terms/>
20
+ prefix n4e: <http://nfdi4earth.de/ontology/>
21
+ PREFIX m4i: <http://w3id.org/nfdi4ing/metadata4ing#>
22
+ PREFIX geo: <http://www.opengis.net/ont/geosparql#>
23
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
24
+ PREFIX schema: <http://schema.org/>
25
+
26
+ SELECT ?subject ?predicate ?object
27
+ ?contactpoint_email
28
+ ?contactpoint_url
29
+ ?contact_fn
30
+ ?contact_email
31
+ ?serviceProvider_homepage ?serviceProvider_imprint ?serviceProvider_rorId ?serviceProvider_name
32
+ ?serviceLocationPoint
33
+ ?tangibleKPI_kpiType ?tangibleKPI_kpiValue ?tangibleKPI_kpiNotes
34
+ WHERE {
35
+ {
36
+ # Page over distinct subjects first — avoids ORDER BY + OFFSET on the full
37
+ # UNION result set which forces full materialization and causes timeouts.
38
+ SELECT DISTINCT ?subject
39
+ WHERE {
40
+ ?subject rdf:type n4e:Service .
41
+ }
42
+ OFFSET %d
43
+ LIMIT %d
44
+ }
45
+
46
+ {
47
+ ?subject ?predicate ?object .
48
+ FILTER (?predicate NOT IN (dcat:contactPoint, n4e:sourceSystem))
49
+ }
50
+ UNION {
51
+ VALUES ?predicate { dcat:contactPoint }
52
+ ?subject dcat:contactPoint ?object .
53
+ OPTIONAL { ?object vcard:hasEmail ?contactpoint_email . }
54
+ OPTIONAL { ?object vcard:hasURL ?contactpoint_url . }
55
+ }
56
+ UNION {
57
+ VALUES ?predicate { n4e:firstLevelSupportContact }
58
+ ?subject n4e:firstLevelSupportContact ?object .
59
+ OPTIONAL { ?object vcard:fn ?contact_fn . }
60
+ OPTIONAL { ?object vcard:hasEmail ?contact_email . }
61
+ }
62
+ UNION {
63
+ VALUES ?predicate { n4e:securityIncidentContact }
64
+ ?subject n4e:securityIncidentContact ?object .
65
+ OPTIONAL { ?object vcard:fn ?contact_fn . }
66
+ OPTIONAL { ?object vcard:hasEmail ?contact_email . }
67
+ }
68
+ UNION {
69
+ VALUES ?predicate { n4e:serviceOwner }
70
+ ?subject n4e:serviceOwner ?object .
71
+ OPTIONAL { ?object vcard:fn ?contact_fn . }
72
+ OPTIONAL { ?object vcard:hasEmail ?contact_email . }
73
+ }
74
+ UNION {
75
+ VALUES ?predicate { n4e:serviceManager }
76
+ ?subject n4e:serviceManager ?object .
77
+ OPTIONAL { ?object vcard:fn ?contact_fn . }
78
+ OPTIONAL { ?object vcard:hasEmail ?contact_email . }
79
+ }
80
+ UNION {
81
+ VALUES ?predicate { n4e:serviceProvider }
82
+ ?subject n4e:serviceProvider ?object .
83
+ OPTIONAL { ?object foaf:homepage ?serviceProvider_homepage . }
84
+ OPTIONAL { ?object n4e:hasImprint ?serviceProvider_imprint . }
85
+ OPTIONAL { ?object m4i:hasRorId ?serviceProvider_rorId . }
86
+ OPTIONAL { ?object schema:name ?serviceProvider_name . }
87
+ }
88
+ UNION {
89
+ VALUES ?predicate { n4e:serviceLocation }
90
+ ?subject n4e:serviceLocation ?object .
91
+ OPTIONAL { ?object geo:asWKT ?serviceLocationPoint . }
92
+ }
93
+ UNION {
94
+ VALUES ?predicate { n4e:tangibleKPI }
95
+ ?subject n4e:tangibleKPI ?object .
96
+ OPTIONAL { ?object n4e:kpiType ?tangibleKPI_kpiType . }
97
+ OPTIONAL { ?object n4e:kpiValue ?tangibleKPI_kpiValue . }
98
+ OPTIONAL { ?object n4e:kpiNotes ?tangibleKPI_kpiNotes . }
99
+ }
100
+ }
101
+ """
102
+
103
+ def __init__(
104
+ self, n4e_organizations_repo: RepositoryN4EOrganization, **kw
105
+ ):
106
+ super().__init__(**kw)
107
+ self.n4e_organizations_repo = n4e_organizations_repo
108
+
109
+ def harvest(self):
110
+ limit = 5000
111
+ # convert to list of repo documents for indexing
112
+ services = {} # repos dict
113
+
114
+ i = 0
115
+ # split sparql query by paging over distinct subjects
116
+ # (sub-SELECT OFFSET/LIMIT)
117
+ while True:
118
+ query_splitted = self.sparql_query % (limit * i, limit)
119
+ hits = sparql.execute_query(self.sparql_endpoint, query_splitted)
120
+
121
+ subjects_before = len(services)
122
+ self.parse_response(hits, services)
123
+ new_subjects = len(services) - subjects_before
124
+
125
+ i += 1
126
+
127
+ # Stop when the sub-SELECT returned fewer subjects than the page size
128
+ if new_subjects < limit:
129
+ break
130
+
131
+ services_list = []
132
+ for (
133
+ key,
134
+ value,
135
+ ) in services.items(): # transform repos dict to list for indexing
136
+ service = value
137
+ if "mainTitle" not in service and len(service["name"]) > 0:
138
+ service["mainTitle"] = service["name"][0]
139
+ service["mainTitle"] = service["mainTitle"].strip()
140
+ services_list.append(service)
141
+
142
+ return services_list
143
+
144
+ def parse_response(
145
+ self, hits, services
146
+ ):
147
+ for hit in hits:
148
+ subject = hit["subject"]["value"]
149
+ predicate = hit["predicate"]["value"]
150
+ object = hit["object"]["value"]
151
+
152
+ if subject not in services:
153
+ services[subject] = {}
154
+ services[subject]["uri"] = subject
155
+ services[subject]["id"] = self.getID(
156
+ subject
157
+ ) # use ID from triple store also in Solr to ensure stable IDs
158
+
159
+ if predicate == "http://schema.org/name": # name
160
+ if (
161
+ "xml:lang" not in hit["object"]
162
+ or hit["object"]["xml:lang"] == "en"
163
+ ): # use international name for orga name
164
+ self.addValue(
165
+ dict=services[subject],
166
+ attribute="name",
167
+ value=object,
168
+ )
169
+ services[subject]["mainTitle"] = object # mainTitle
170
+ if (
171
+ "name" not in services[subject]
172
+ or object not in services[subject]["name"]
173
+ ): # prevent duplicates
174
+ self.addValue(
175
+ dict=services[subject],
176
+ attribute="name",
177
+ value=object,
178
+ )
179
+ elif predicate == "http://schema.org/description": # description
180
+ self.addValue(
181
+ dict=services[subject],
182
+ attribute="description",
183
+ value=object,
184
+ )
185
+ elif predicate == "http://schema.org/additionalType": # additionalType
186
+ self.addValue(
187
+ dict=services[subject],
188
+ attribute="additionalType",
189
+ value=object,
190
+ )
191
+ elif predicate == "http://schema.org/keywords": # keyword
192
+ self.addValue(
193
+ dict=services[subject],
194
+ attribute="keyword",
195
+ value=object,
196
+ )
197
+ elif predicate == "http://schema.org/url": # url
198
+ self.addValue(
199
+ dict=services[subject],
200
+ attribute="url",
201
+ value=object,
202
+ )
203
+ elif predicate == "http://nfdi4earth.de/ontology/serviceType": # serviceType
204
+ services[subject]["serviceType"] = object
205
+ elif predicate == "http://nfdi4earth.de/ontology/serviceHost": # serviceHost
206
+ services[subject]["serviceHost"] = object
207
+ elif predicate == "http://www.w3.org/ns/dcat#contactPoint": # contactPoint
208
+ if "contactpoint_email" in hit:
209
+ self.addValue(
210
+ services[subject],
211
+ "contactPoint" + self.flatten_separator + "email",
212
+ hit["contactpoint_email"]["value"],
213
+ )
214
+ if "contactpoint_url" in hit:
215
+ self.addValue(
216
+ services[subject],
217
+ "contactPoint" + self.flatten_separator + "url",
218
+ hit["contactpoint_url"]["value"],
219
+ )
220
+ elif predicate == "http://nfdi4earth.de/ontology/firstLevelSupportContact":
221
+ if "contact_fn" in hit:
222
+ self.addValue(
223
+ services[subject],
224
+ "firstLevelSupportContact" + self.flatten_separator + "fullname",
225
+ hit["contact_fn"]["value"],
226
+ )
227
+ if "contact_email" in hit:
228
+ self.addValue(
229
+ services[subject],
230
+ "firstLevelSupportContact" + self.flatten_separator + "hasEmail",
231
+ hit["contact_email"]["value"],
232
+ )
233
+ elif predicate == "http://nfdi4earth.de/ontology/serviceOwner":
234
+ if "contact_fn" in hit:
235
+ self.addValue(
236
+ services[subject],
237
+ "serviceOwner" + self.flatten_separator + "fullname",
238
+ hit["contact_fn"]["value"],
239
+ )
240
+ if "contact_email" in hit:
241
+ self.addValue(
242
+ services[subject],
243
+ "serviceOwner" + self.flatten_separator + "hasEmail",
244
+ hit["contact_email"]["value"],
245
+ )
246
+ elif predicate == "http://nfdi4earth.de/ontology/serviceManager":
247
+ if "contact_fn" in hit:
248
+ self.addValue(
249
+ services[subject],
250
+ "serviceManager" + self.flatten_separator + "fullname",
251
+ hit["contact_fn"]["value"],
252
+ )
253
+ if "contact_email" in hit:
254
+ self.addValue(
255
+ services[subject],
256
+ "serviceManager" + self.flatten_separator + "hasEmail",
257
+ hit["contact_email"]["value"],
258
+ )
259
+ elif predicate == "http://nfdi4earth.de/ontology/serviceProvider":
260
+ if "serviceProvider_homepage" in hit:
261
+ self.addValue(
262
+ services[subject],
263
+ "serviceProvider" + self.flatten_separator + "homepage",
264
+ hit["serviceProvider_homepage"]["value"],
265
+ )
266
+ if "serviceProvider_imprint" in hit:
267
+ self.addValue(
268
+ services[subject],
269
+ "serviceProvider" + self.flatten_separator + "imprint",
270
+ hit["serviceProvider_imprint"]["value"],
271
+ )
272
+ if "serviceProvider_rorId" in hit:
273
+ self.addValue(
274
+ services[subject],
275
+ "serviceProvider" + self.flatten_separator + "rorId",
276
+ hit["serviceProvider_rorId"]["value"],
277
+ )
278
+ if "serviceProvider_name" in hit:
279
+ self.addValue(
280
+ services[subject],
281
+ "serviceProvider" + self.flatten_separator + "name",
282
+ hit["serviceProvider_name"]["value"],
283
+ )
284
+ elif predicate == "http://nfdi4earth.de/ontology/serviceLocation":
285
+ if "serviceLocationPoint" in hit:
286
+ self.addValue(
287
+ services[subject],
288
+ "serviceLocationPoint",
289
+ hit["serviceLocationPoint"]["value"],
290
+ )
291
+ elif predicate == "http://nfdi4earth.de/ontology/securityIncidentContact":
292
+ if "contact_fn" in hit:
293
+ self.addValue(
294
+ services[subject],
295
+ "securityIncidentContact" + self.flatten_separator + "fullname",
296
+ hit["contact_fn"]["value"],
297
+ )
298
+ if "contact_email" in hit:
299
+ self.addValue(
300
+ services[subject],
301
+ "securityIncidentContact" + self.flatten_separator + "hasEmail",
302
+ hit["contact_email"]["value"],
303
+ )
304
+ elif (
305
+ predicate == "http://nfdi4earth.de/ontology/sourceSystemID"
306
+ ): # sourceSystemID
307
+ self.addValue(
308
+ dict=services[subject],
309
+ attribute="sourceSystem" + self.flatten_separator + "id",
310
+ value=object,
311
+ )
312
+ elif (
313
+ predicate == "http://nfdi4earth.de/ontology/sourceSystem"
314
+ ): # sourceSystem
315
+ if "sourceSystem_homepage" in hit:
316
+ self.addValue(
317
+ services[subject],
318
+ "sourceSystem" + self.flatten_separator + "homepage",
319
+ hit["sourceSystem_homepage"]["value"],
320
+ )
321
+ if "sourceSystem_title" in hit:
322
+ self.addValue(
323
+ services[subject],
324
+ "sourceSystem" + self.flatten_separator + "title",
325
+ hit["sourceSystem_title"]["value"],
326
+ )
327
+ elif (
328
+ predicate == "http://nfdi4earth.de/ontology/sourceSystemURL"
329
+ ): # sourceSystemURL
330
+ self.addValue(
331
+ dict=services[subject],
332
+ attribute="sourceSystemURL",
333
+ value=object,
334
+ )
335
+ elif (
336
+ predicate == "http://www.w3.org/2004/02/skos/core#altLabel"
337
+ ): # altLabel
338
+ self.addValue(
339
+ dict=services[subject],
340
+ attribute="altLabel",
341
+ value=object,
342
+ )
343
+ elif (
344
+ predicate == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
345
+ ): # type
346
+ self.addValue(
347
+ dict=services[subject],
348
+ attribute="type",
349
+ value=object
350
+ )
351
+ elif (
352
+ predicate == "http://nfdi4earth.de/ontology/serviceType"
353
+ ): # serviceType
354
+ self.addValue(
355
+ dict=services[subject],
356
+ attribute="serviceType",
357
+ value=object
358
+ )
359
+ elif (
360
+ predicate == "http://nfdi4earth.de/ontology/serviceCategory"
361
+ ): # serviceCategory
362
+ self.addValue(
363
+ dict=services[subject],
364
+ attribute="serviceCategory",
365
+ value=object
366
+ )
367
+ elif (
368
+ predicate == "http://nfdi4earth.de/ontology/linkToDocumentation"
369
+ ): # linkToDocumentation
370
+ self.addValue(
371
+ dict=services[subject],
372
+ attribute="linkToDocumentation",
373
+ value=object
374
+ )
375
+ elif (
376
+ predicate == "http://nfdi4earth.de/ontology/nameAbbreviation"
377
+ ): # nameAbbreviation
378
+ self.addValue(
379
+ dict=services[subject],
380
+ attribute="nameAbbreviation",
381
+ value=object
382
+ )
383
+ elif (
384
+ predicate == "http://nfdi4earth.de/ontology/shortDescription"
385
+ ): # shortDescription
386
+ self.addValue(
387
+ dict=services[subject],
388
+ attribute="shortDescription",
389
+ value=object
390
+ )
391
+ elif (
392
+ predicate == "http://nfdi4earth.de/ontology/chargeFree"
393
+ ): # chargeFree //BOOLEAN
394
+ if object == "1":
395
+ services[subject]["chargeFree"] = True
396
+ elif object == "0":
397
+ services[subject]["chargeFree"] = False
398
+ elif (
399
+ predicate == "http://nfdi4earth.de/ontology/nonProfit"
400
+ ): # nonProfit //BOOLEAN
401
+ if object == "1":
402
+ services[subject]["nonProfit"] = True
403
+ elif object == "0":
404
+ services[subject]["nonProfit"] = False
405
+ elif (
406
+ predicate == "http://nfdi4earth.de/ontology/adFree"
407
+ ): # adFree //BOOLEAN
408
+ if object == "1":
409
+ services[subject]["adFree"] = True
410
+ elif object == "0":
411
+ services[subject]["adFree"] = False
412
+ elif (
413
+ predicate == "http://nfdi4earth.de/ontology/fees"
414
+ ): # fees
415
+ self.addValue(
416
+ dict=services[subject],
417
+ attribute="fees",
418
+ value=object
419
+ )
420
+ elif (
421
+ predicate == "http://nfdi4earth.de/ontology/serviceAccessType"
422
+ ): # serviceAccessType
423
+ self.addValue(
424
+ dict=services[subject],
425
+ attribute="serviceAccessType",
426
+ value=object
427
+ )
428
+ elif (
429
+ predicate == "http://nfdi4earth.de/ontology/logo"
430
+ ): # logo
431
+ self.addValue(
432
+ dict=services[subject],
433
+ attribute="logo",
434
+ value=object
435
+ )
436
+ elif (
437
+ predicate == "http://nfdi4earth.de/ontology/userEnablement"
438
+ ): # userEnablement
439
+ self.addValue(
440
+ dict=services[subject],
441
+ attribute="userEnablement",
442
+ value=object
443
+ )
444
+ elif (
445
+ predicate == "http://nfdi4earth.de/ontology/serviceEnablement"
446
+ ): # serviceEnablement
447
+ self.addValue(
448
+ dict=services[subject],
449
+ attribute="serviceEnablement",
450
+ value=object
451
+ )
452
+ elif (
453
+ predicate == "http://nfdi4earth.de/ontology/personalDataProcessingAndStorage"
454
+ ): # personalDataProcessingAndStorage
455
+ self.addValue(
456
+ dict=services[subject],
457
+ attribute="personalDataProcessingAndStorage",
458
+ value=object
459
+ )
460
+ elif (
461
+ predicate == "http://nfdi4earth.de/ontology/dataProtectionAndBackup"
462
+ ): # dataProtectionAndBackup
463
+ self.addValue(
464
+ dict=services[subject],
465
+ attribute="dataProtectionAndBackup",
466
+ value=object
467
+ )
468
+ elif (
469
+ predicate == "http://nfdi4earth.de/ontology/securityIncidentContact"
470
+ ): # securityIncidentContact //ndoID
471
+ self.addValue(
472
+ dict=services[subject],
473
+ attribute="securityIncidentContact",
474
+ value=object
475
+ )
476
+ elif (
477
+ predicate == "http://nfdi4earth.de/ontology/servicePrivacyPolicy"
478
+ ): # servicePrivacyPolicy
479
+ self.addValue(
480
+ dict=services[subject],
481
+ attribute="servicePrivacyPolicy",
482
+ value=object
483
+ )
484
+ elif (
485
+ predicate == "http://nfdi4earth.de/ontology/businessModel"
486
+ ): # businessModel
487
+ self.addValue(
488
+ dict=services[subject],
489
+ attribute="businessModel",
490
+ value=object
491
+ )
492
+ elif (
493
+ predicate == "http://nfdi4earth.de/ontology/GDPRCompliant"
494
+ ): # GDPRCompliant //BOOLEAN
495
+ if object == "1":
496
+ services[subject]["GDPRCompliant"] = True
497
+ elif object == "0":
498
+ services[subject]["GDPRCompliant"] = False
499
+ elif (
500
+ predicate == "http://nfdi4earth.de/ontology/servicePublicationConsent"
501
+ ): # servicePublicationConsent //BOOLEAN
502
+ if object == "1":
503
+ services[subject]["servicePublicationConsent"] = True
504
+ elif object == "0":
505
+ services[subject]["servicePublicationConsent"] = False
506
+ elif (
507
+ predicate == "http://nfdi4earth.de/ontology/contactWithPortfolioManagement"
508
+ ): # contactWithPortfolioManagement
509
+ self.addValue(
510
+ dict=services[subject],
511
+ attribute="contactWithPortfolioManagement",
512
+ value=object
513
+ )
514
+ elif (
515
+ predicate == "http://nfdi4earth.de/ontology/limitations"
516
+ ): # limitations
517
+ self.addValue(
518
+ dict=services[subject],
519
+ attribute="limitations",
520
+ value=object
521
+ )
522
+ elif predicate == "http://nfdi4earth.de/ontology/tangibleKPI":
523
+ if "tangibleKPI_kpiType" in hit:
524
+ self.addValue(
525
+ services[subject],
526
+ "tangibleKPI" + self.flatten_separator + "kpiType",
527
+ hit["tangibleKPI_kpiType"]["value"],
528
+ )
529
+ if "tangibleKPI_kpiValue" in hit:
530
+ self.addValue(
531
+ services[subject],
532
+ "tangibleKPI" + self.flatten_separator + "kpiValue",
533
+ hit["tangibleKPI_kpiValue"]["value"],
534
+ )
535
+ if "tangibleKPI_kpiNotes" in hit:
536
+ self.addValue(
537
+ services[subject],
538
+ "tangibleKPI" + self.flatten_separator + "kpiNotes",
539
+ hit["tangibleKPI_kpiNotes"]["value"],
540
+ )
541
+ elif (
542
+ predicate == "http://nfdi4earth.de/ontology/idHostingInstitution"
543
+ ): #idHostingInstitution #hostingInstitution_name #isN4EOperated
544
+ host_rorID = object
545
+ services[subject]["idHostingInstitution"] = host_rorID #idHostingInstitution
546
+
547
+ n4e_organization = self.n4e_organizations_repo.get_n4e_organization_by_rorID(host_rorID) #check if rorID belongs to a organization that is n4e member
548
+ if n4e_organization is not None:
549
+ services[subject]["isN4EOperated"] = True #isN4EOperated
550
+ services[subject]["hostingInstitution_name"] = n4e_organization["name"] #hostingInstitution_name currently only for n4e operated services
551
+ #only index if `True`, do not index if `False`
@@ -7,7 +7,7 @@ from utils import sparql
7
7
  log = logging.getLogger(__name__)
8
8
 
9
9
 
10
- class Softwaresourcecode_Harvester(Harvester):
10
+ class Software_Harvester(Harvester):
11
11
  sparql_query = """
12
12
  PREFIX foaf: <http://xmlns.com/foaf/0.1/>
13
13
  prefix n4e: <http://nfdi4earth.de/ontology/>
@@ -17,25 +17,25 @@ class Softwaresourcecode_Harvester(Harvester):
17
17
  SELECT ?subject ?predicate ?object ?sourceSystem_homepage ?sourceSystem_title
18
18
  WHERE {
19
19
  {
20
- ?subject rdf:type <http://schema.org/SoftwareSourceCode>.
20
+ ?subject rdf:type n4e:Software.
21
21
  ?subject ?predicate ?object
22
22
  FILTER (?predicate NOT IN (<http://schema.org/publisher>, <http://schema.org/audience>))
23
23
  }
24
24
  UNION{
25
25
  VALUES ?predicate { <http://schema.org/publisher> }
26
- ?subject rdf:type <http://schema.org/SoftwareSourceCode>;
26
+ ?subject rdf:type n4e:Software;
27
27
  <http://schema.org/publisher> ?publisher.
28
28
  ?publisher <http://schema.org/name> ?object.
29
29
  }
30
30
  UNION{
31
31
  VALUES ?predicate { <http://schema.org/audience> }
32
- ?subject rdf:type <http://schema.org/SoftwareSourceCode>;
32
+ ?subject rdf:type n4e:Software;
33
33
  <http://schema.org/audience> ?audience.
34
34
  ?audience dct:title ?object.
35
35
  }
36
36
  UNION{
37
37
  VALUES ?predicate { n4e:sourceSystem }
38
- ?subject rdf:type <http://schema.org/SoftwareSourceCode>;
38
+ ?subject rdf:type n4e:Software;
39
39
  n4e:sourceSystem ?object.
40
40
  optional {?object dct:title ?sourceSystem_title.}
41
41
  optional {?object foaf:homepage ?sourceSystem_homepage.}
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
2
2
 
3
3
  setup(
4
4
  name="OneStop4All-Indexer",
5
- version="2.8.0.dev12",
5
+ version="2.8.0.dev14",
6
6
  description="Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index",
7
7
  author="Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer",
8
8
  author_email="m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de",
@@ -55,7 +55,7 @@ def debug():
55
55
  "Organization",
56
56
  "Repository",
57
57
  "Service",
58
- "Softwaresourcecode",
58
+ "Software",
59
59
  ],
60
60
  case_sensitive=False,
61
61
  ),
@@ -16,7 +16,7 @@ from harvesters import (
16
16
  Repository_Harvester,
17
17
  Organization_Harvester,
18
18
  Article_Harvester,
19
- Softwaresourcecode_Harvester,
19
+ Software_Harvester,
20
20
  Learningresource_Harvester,
21
21
  Metadatastandard_Harvester,
22
22
  Document_Harvester,
@@ -73,7 +73,7 @@ def get_harvester(
73
73
  get_repo("links_repo"),
74
74
  get_repo("themes_repo"),
75
75
  ),
76
- "Softwaresourcecode": lambda: Softwaresourcecode_Harvester(
76
+ "Software": lambda: Software_Harvester(
77
77
  get_repo("persons_repo"),
78
78
  get_repo("links_repo"),
79
79
  ),
@@ -11,6 +11,7 @@ log = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  class Solr(object):
14
+
14
15
  def __init__(
15
16
  self,
16
17
  solr_url: Optional[str] = None,
@@ -19,42 +20,29 @@ class Solr(object):
19
20
  always_commit: bool = False,
20
21
  timeout: int = 5 * 60,
21
22
  ) -> None:
22
- self.solr_url = solr_url
23
- self.solr_core = solr_core
24
- self.auth = solr_auth
23
+ self.solr_url = solr_url if solr_url else config["solr_url"]
24
+ self.solr_core = solr_core if solr_url else config["solr_url"]
25
+ self.auth = solr_auth if solr_url else config["solr_url"]
25
26
  self.client = SolrClient(
26
27
  self.endpoint,
27
28
  auth=self.authentication,
28
29
  always_commit=always_commit,
29
30
  timeout=timeout,
30
31
  )
32
+ # test connection to solr endpoint
33
+ # -> raises exception if connection fails
34
+ self.client.ping()
31
35
 
32
36
  @property
33
37
  def endpoint(self):
34
- # using config-values (by default) OR
35
- # overwrite with initially given values
36
- # TODO: check if endpoint is reachable, if not raise error
37
- solr_url = self.solr_url if self.solr_url else config["solr_url"]
38
- log.debug(f"configured solr url: {solr_url}")
39
- if solr_url.startswith("http://"):
40
- raise ValueError(
41
- "Insecure solr url configured. "
42
- "Please check your configuration and use https."
43
- )
44
- solr_core = self.solr_core if self.solr_core else config["solr_core"]
45
- log.debug(f"configured solr core: {solr_core}")
46
- _endpoint = urljoin(solr_url, solr_core)
47
- log.info(f"initialized solr client with endpoint: {_endpoint}")
48
- return _endpoint
38
+ endpoint = urljoin(self.solr_url, self.solr_core)
39
+ log.info(f"initialized solr client with endpoint: {endpoint}")
40
+ return endpoint
49
41
 
50
42
  @property
51
43
  def authentication(self):
52
- if self.auth or config["solr_auth"]:
53
- username, password = (
54
- self.auth.split(":")
55
- if self.auth
56
- else config["solr_auth"].split(":")
57
- )
44
+ if self.auth:
45
+ username, password = self.auth.split(":")
58
46
  return HTTPBasicAuth(username, password)
59
47
 
60
48
  def index_documents(
@@ -69,7 +57,8 @@ class Solr(object):
69
57
  if len(documents) <= offset + batch_size:
70
58
  batch = documents[offset:]
71
59
  else:
72
- batch = documents[offset : (offset + batch_size)]
60
+ limit = offset + batch_size
61
+ batch = documents[offset:limit]
73
62
 
74
63
  if len(batch) == 0:
75
64
  break
@@ -1,224 +0,0 @@
1
- import logging
2
- from .harvester_base import Harvester
3
- from utils import sparql
4
- from data_repositories.repository_n4eorganization import RepositoryN4EOrganization
5
-
6
- log = logging.getLogger(__name__)
7
-
8
- #harvester for Services https://nfdi4earth.pages.rwth-aachen.de/knowledgehub/nfdi4earth-kh-schema/Service/
9
- class Service_Harvester(Harvester):
10
- sparql_query = """
11
- PREFIX fo: <http://www.w3.org/1999/XSL/Format#>
12
- PREFIX foaf: <http://xmlns.com/foaf/0.1/>
13
- PREFIX dc: <http://purl.org/dc/elements/1.1/>
14
- PREFIX vcard: <http://www.w3.org/2006/vcard/ns#>
15
- prefix dcat: <http://www.w3.org/ns/dcat#>
16
- prefix dct: <http://purl.org/dc/terms/>
17
- prefix n4e: <http://nfdi4earth.de/ontology/>
18
- prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
19
-
20
- SELECT ?subject ?predicate ?object ?contactpoint_email ?contactpoint_url
21
- {
22
- {
23
- ?subject rdf:type <http://schema.org/Service>.
24
- ?subject ?predicate ?object
25
- FILTER (?predicate NOT IN (dcat:contactPoint, n4e:sourceSystem))
26
- }
27
- UNION {
28
- VALUES ?predicate { dcat:contactPoint }
29
- ?subject rdf:type <http://schema.org/Service>;
30
- dcat:contactPoint ?object.
31
- optional { ?object vcard:hasEmail ?contactpoint_email. }
32
- optional { ?object vcard:hasURL ?contactpoint_url. }
33
- }
34
- }
35
- ORDER BY ?subject ?predicate
36
- OFFSET %d
37
- LIMIT %d
38
- """
39
-
40
- def __init__(self, n4e_organizations_repo: RepositoryN4EOrganization, **kw):
41
- super().__init__(**kw)
42
- self.n4e_organizations_repo = n4e_organizations_repo
43
-
44
- def harvest(self):
45
- limit = 5000
46
- # convert to list of repo documents for indexing
47
- services = {} # repos dict
48
-
49
- i = 0
50
- hits = {}
51
- # split sparql query
52
- while True:
53
- query_splitted = self.sparql_query % (limit * i, limit)
54
- hits = sparql.execute_query(self.sparql_endpoint, query_splitted)
55
- self.parse_response(hits, services)
56
-
57
- i = i + 1
58
-
59
- if len(hits) < limit:
60
- break
61
-
62
- services_list = []
63
- for (
64
- key,
65
- value,
66
- ) in services.items(): # transform repos dict to list for indexing
67
- service = value
68
- if "mainTitle" not in service and len(service["name"]) > 0:
69
- service["mainTitle"] = service["name"][0]
70
- service["mainTitle"] = service["mainTitle"].strip()
71
- services_list.append(service)
72
-
73
- return services_list
74
-
75
- def parse_response(
76
- self, hits, services
77
- ):
78
- for hit in hits:
79
- subject = hit["subject"]["value"]
80
- predicate = hit["predicate"]["value"]
81
- object = hit["object"]["value"]
82
-
83
- if subject not in services:
84
- services[subject] = {}
85
- services[subject]["uri"] = subject
86
- services[subject]["id"] = self.getID(
87
- subject
88
- ) # use ID from triple store also in Solr to ensure stable IDs
89
-
90
- if predicate == "http://schema.org/name": # name
91
- if (
92
- "xml:lang" not in hit["object"]
93
- or hit["object"]["xml:lang"] == "en"
94
- ): # use international name for orga name
95
- self.addValue(
96
- dict=services[subject],
97
- attribute="name",
98
- value=object,
99
- )
100
- services[subject]["mainTitle"] = object # mainTitle
101
- if (
102
- "name" not in services[subject]
103
- or object not in services[subject]["name"]
104
- ): # prevent duplicates
105
- self.addValue(
106
- dict=services[subject],
107
- attribute="name",
108
- value=object,
109
- )
110
- elif predicate == "http://schema.org/description": # description
111
- self.addValue(
112
- dict=services[subject],
113
- attribute="description",
114
- value=object,
115
- )
116
- elif predicate == "http://schema.org/additionalType": # additionalType
117
- self.addValue(
118
- dict=services[subject],
119
- attribute="additionalType",
120
- value=object,
121
- )
122
- elif predicate == "http://schema.org/keywords": # keyword
123
- self.addValue(
124
- dict=services[subject],
125
- attribute="keyword",
126
- value=object,
127
- )
128
- elif predicate == "http://schema.org/url": # url
129
- self.addValue(
130
- dict=services[subject],
131
- attribute="url",
132
- value=object,
133
- )
134
- elif predicate == "http://nfdi4earth.de/ontology/serviceType": # serviceType
135
- services[subject]["serviceType"] = object
136
- elif predicate == "http://nfdi4earth.de/ontology/serviceHost": # serviceHost
137
- services[subject]["serviceHost"] = object
138
- elif predicate == "http://www.w3.org/ns/dcat#contactPoint": # contactPoint
139
- if "contactpoint_email" in hit:
140
- self.addValue(
141
- services[subject],
142
- "contactPoint" + self.flatten_separator + "email",
143
- hit["contactpoint_email"]["value"],
144
- )
145
- if "contactpoint_url" in hit:
146
- self.addValue(
147
- services[subject],
148
- "contactPoint" + self.flatten_separator + "url",
149
- hit["contactpoint_url"]["value"],
150
- )
151
- elif (
152
- predicate == "http://nfdi4earth.de/ontology/sourceSystemID"
153
- ): # sourceSystemID
154
- self.addValue(
155
- dict=services[subject],
156
- attribute="sourceSystem" + self.flatten_separator + "id",
157
- value=object,
158
- )
159
- elif (
160
- predicate == "http://nfdi4earth.de/ontology/sourceSystem"
161
- ): # sourceSystem
162
- if "sourceSystem_homepage" in hit:
163
- self.addValue(
164
- services[subject],
165
- "sourceSystem" + self.flatten_separator + "homepage",
166
- hit["sourceSystem_homepage"]["value"],
167
- )
168
- if "sourceSystem_title" in hit:
169
- self.addValue(
170
- services[subject],
171
- "sourceSystem" + self.flatten_separator + "title",
172
- hit["sourceSystem_title"]["value"],
173
- )
174
- elif (
175
- predicate == "http://nfdi4earth.de/ontology/sourceSystemURL"
176
- ): # sourceSystemURL
177
- self.addValue(
178
- dict=services[subject],
179
- attribute="sourceSystemURL",
180
- value=object,
181
- )
182
- elif (
183
- predicate == "http://www.w3.org/2004/02/skos/core#altLabel"
184
- ): # altLabel
185
- self.addValue(
186
- dict=services[subject],
187
- attribute="altLabel",
188
- value=object,
189
- )
190
- elif (
191
- predicate == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
192
- ): # type
193
- self.addValue(
194
- dict=services[subject],
195
- attribute="type",
196
- value=object
197
- )
198
- elif (
199
- predicate == "http://nfdi4earth.de/ontology/serviceType"
200
- ): # serviceType
201
- self.addValue(
202
- dict=services[subject],
203
- attribute="serviceType",
204
- value=object
205
- )
206
- elif (
207
- predicate == "http://nfdi4earth.de/ontology/serviceCategory"
208
- ): # serviceCategory
209
- self.addValue(
210
- dict=services[subject],
211
- attribute="serviceCategory",
212
- value=object
213
- )
214
- elif (
215
- predicate == "http://nfdi4earth.de/ontology/idHostingInstitution"
216
- ): #idHostingInstitution #hostingInstitution_name #isN4EOperated
217
- host_rorID = object
218
- services[subject]["idHostingInstitution"] = host_rorID #idHostingInstitution
219
-
220
- n4e_organization = self.n4e_organizations_repo.get_n4e_organization_by_rorID(host_rorID) #check if rorID belongs to a organization that is n4e member
221
- if n4e_organization is not None:
222
- services[subject]["isN4EOperated"] = True #isN4EOperated
223
- services[subject]["hostingInstitution_name"] = n4e_organization["name"] #hostingInstitution_name currently only for n4e operated services
224
- #only index if `True`, do not index if `False`