OneStop4All-Indexer 2.8.0.dev9__tar.gz → 2.8.0.dev10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10/OneStop4All_Indexer.egg-info}/PKG-INFO +1 -1
- {onestop4all_indexer-2.8.0.dev9/OneStop4All_Indexer.egg-info → onestop4all_indexer-2.8.0.dev10}/PKG-INFO +1 -1
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/setup.py +1 -1
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/utils/solr.py +34 -17
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/LICENSE +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/OneStop4All_Indexer.egg-info/SOURCES.txt +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/OneStop4All_Indexer.egg-info/dependency_links.txt +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/OneStop4All_Indexer.egg-info/entry_points.txt +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/OneStop4All_Indexer.egg-info/requires.txt +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/OneStop4All_Indexer.egg-info/top_level.txt +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/data_repositories/__init__.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/data_repositories/repository_base.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/data_repositories/repository_n4eorganization.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/data_repositories/repository_person.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/data_repositories/repository_resource_links.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/data_repositories/repository_theme.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/__init__.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_article.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_base.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_dataservice.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_dataset.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_document.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_learningresource.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_metadatastandards.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_organization.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_repository.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_service.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_softwaresourcecode.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/pyproject.toml +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/setup.cfg +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/utils/__init__.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/utils/cli.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/utils/configs.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/utils/harvest.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/utils/sparql.py +0 -0
- {onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/utils/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: OneStop4All-Indexer
|
|
3
|
-
Version: 2.8.0.
|
|
3
|
+
Version: 2.8.0.dev10
|
|
4
4
|
Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
|
|
5
5
|
Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
|
|
6
6
|
Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: OneStop4All-Indexer
|
|
3
|
-
Version: 2.8.0.
|
|
3
|
+
Version: 2.8.0.dev10
|
|
4
4
|
Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
|
|
5
5
|
Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
|
|
6
6
|
Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
|
|
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="OneStop4All-Indexer",
|
|
5
|
-
version="2.8.0.
|
|
5
|
+
version="2.8.0.dev10",
|
|
6
6
|
description="Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index",
|
|
7
7
|
author="Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer",
|
|
8
8
|
author_email="m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de",
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
-
from pysolr import Solr as
|
|
3
|
+
from pysolr import Solr as SolrClient
|
|
4
4
|
from requests.auth import HTTPBasicAuth
|
|
5
5
|
from typing import List, Dict, Optional, Literal
|
|
6
|
+
from urllib.parse import urljoin
|
|
6
7
|
|
|
7
8
|
from utils import config
|
|
8
9
|
|
|
@@ -21,7 +22,7 @@ class Solr(object):
|
|
|
21
22
|
self.solr_url = solr_url
|
|
22
23
|
self.solr_core = solr_core
|
|
23
24
|
self.auth = solr_auth
|
|
24
|
-
self.
|
|
25
|
+
self.client = SolrClient(
|
|
25
26
|
self.endpoint,
|
|
26
27
|
auth=self.authentication,
|
|
27
28
|
always_commit=always_commit,
|
|
@@ -34,8 +35,17 @@ class Solr(object):
|
|
|
34
35
|
# overwrite with initially given values
|
|
35
36
|
# TODO: check if endpoint is reachable, if not raise error
|
|
36
37
|
solr_url = self.solr_url if self.solr_url else config["solr_url"]
|
|
38
|
+
log.debug(f"configured solr url: {solr_url}")
|
|
39
|
+
if solr_url.startswith("http://"):
|
|
40
|
+
raise ValueError(
|
|
41
|
+
"Insecure solr url configured. "
|
|
42
|
+
"Please check your configuration and use https."
|
|
43
|
+
)
|
|
37
44
|
solr_core = self.solr_core if self.solr_core else config["solr_core"]
|
|
38
|
-
|
|
45
|
+
log.debug(f"configured solr core: {solr_core}")
|
|
46
|
+
_endpoint = urljoin(solr_url, solr_core)
|
|
47
|
+
log.info(f"initialized solr client with endpoint: {_endpoint}")
|
|
48
|
+
return _endpoint
|
|
39
49
|
|
|
40
50
|
@property
|
|
41
51
|
def authentication(self):
|
|
@@ -49,10 +59,7 @@ class Solr(object):
|
|
|
49
59
|
|
|
50
60
|
def index_documents(self, documents: List[Dict]) -> None:
|
|
51
61
|
# solr_endpoint = coreurl(solr_url, solr_core)
|
|
52
|
-
log.info(
|
|
53
|
-
f"start indexing {len(documents)} documents to {self.endpoint}"
|
|
54
|
-
)
|
|
55
|
-
|
|
62
|
+
log.info(f"start indexing {len(documents)} documents")
|
|
56
63
|
batch_size = 50000
|
|
57
64
|
offset = 0
|
|
58
65
|
iteration = 0
|
|
@@ -65,11 +72,11 @@ class Solr(object):
|
|
|
65
72
|
if len(batch) == 0:
|
|
66
73
|
break
|
|
67
74
|
|
|
68
|
-
self.
|
|
75
|
+
self.client.ping()
|
|
69
76
|
log.info("solr healtcheck successful")
|
|
70
77
|
try:
|
|
71
78
|
log.info(f"start adding batch of {len(batch)} documents")
|
|
72
|
-
self.
|
|
79
|
+
self.client.add(batch)
|
|
73
80
|
log.info(f"finished adding batch of {len(batch)} documents")
|
|
74
81
|
except Exception as e:
|
|
75
82
|
log.error(e)
|
|
@@ -81,16 +88,26 @@ class Solr(object):
|
|
|
81
88
|
break
|
|
82
89
|
|
|
83
90
|
log.info("commit changes to index")
|
|
84
|
-
self.
|
|
91
|
+
self.client.ping()
|
|
85
92
|
log.info("solr healtcheck successful")
|
|
86
|
-
self.
|
|
93
|
+
self.client.commit()
|
|
87
94
|
log.info("sucessfully commited changes to index")
|
|
88
95
|
log.info("finished indexing")
|
|
89
96
|
|
|
90
97
|
def reset_index(self):
|
|
91
|
-
|
|
92
|
-
self.
|
|
93
|
-
|
|
98
|
+
self.client.delete(q="*:*")
|
|
99
|
+
self.client.commit()
|
|
100
|
+
|
|
101
|
+
def exists(self, document_id: str) -> bool:
|
|
102
|
+
result = self.search(f"id:{document_id}")
|
|
103
|
+
return result.hits > 0
|
|
104
|
+
|
|
105
|
+
def search(self, *args, **kwargs):
|
|
106
|
+
return self.client.search(*args, **kwargs)
|
|
107
|
+
|
|
108
|
+
def delete(self, *args, **kwargs):
|
|
109
|
+
self.client.delete(*args, **kwargs)
|
|
110
|
+
self.client.commit()
|
|
94
111
|
|
|
95
112
|
|
|
96
113
|
class SolrValidator(Solr):
|
|
@@ -105,7 +122,7 @@ class SolrValidator(Solr):
|
|
|
105
122
|
}
|
|
106
123
|
|
|
107
124
|
try:
|
|
108
|
-
self.
|
|
125
|
+
self.client.add(dataset)
|
|
109
126
|
return True
|
|
110
127
|
except Exception as e:
|
|
111
128
|
log.error(e)
|
|
@@ -114,5 +131,5 @@ class SolrValidator(Solr):
|
|
|
114
131
|
def close(self):
|
|
115
132
|
# technically not necessary to check existence of test document,
|
|
116
133
|
# but when urls are misconfigured, this will throw an error
|
|
117
|
-
if self.
|
|
118
|
-
self.
|
|
134
|
+
if self.exists("geomValidationTest"):
|
|
135
|
+
self.delete(id="geomValidationTest")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/data_repositories/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_article.py
RENAMED
|
File without changes
|
{onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_base.py
RENAMED
|
File without changes
|
|
File without changes
|
{onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_dataset.py
RENAMED
|
File without changes
|
{onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_document.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{onestop4all_indexer-2.8.0.dev9 → onestop4all_indexer-2.8.0.dev10}/harvesters/harvester_service.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|