OneStop4All-Indexer 2.8.0.dev10__tar.gz → 2.8.0.dev11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11/OneStop4All_Indexer.egg-info}/PKG-INFO +2 -1
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/requires.txt +1 -0
- {onestop4all_indexer-2.8.0.dev10/OneStop4All_Indexer.egg-info → onestop4all_indexer-2.8.0.dev11}/PKG-INFO +2 -1
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_dataset.py +0 -1
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/setup.py +5 -2
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/solr.py +14 -6
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/LICENSE +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/SOURCES.txt +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/dependency_links.txt +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/entry_points.txt +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/top_level.txt +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/__init__.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_base.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_n4eorganization.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_person.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_resource_links.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_theme.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/__init__.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_article.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_base.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_dataservice.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_document.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_learningresource.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_metadatastandards.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_organization.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_repository.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_service.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_softwaresourcecode.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/pyproject.toml +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/setup.cfg +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/__init__.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/cli.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/configs.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/harvest.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/sparql.py +0 -0
- {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: OneStop4All-Indexer
|
|
3
|
-
Version: 2.8.0.
|
|
3
|
+
Version: 2.8.0.dev11
|
|
4
4
|
Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
|
|
5
5
|
Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
|
|
6
6
|
Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
|
|
@@ -13,6 +13,7 @@ Requires-Dist: geomet~=1.1.0
|
|
|
13
13
|
Requires-Dist: shapely~=2.0.5
|
|
14
14
|
Provides-Extra: airflow
|
|
15
15
|
Requires-Dist: apache-airflow==3.1.7; extra == "airflow"
|
|
16
|
+
Requires-Dist: apache-airflow-providers-amazon[s3fs]; extra == "airflow"
|
|
16
17
|
Dynamic: author
|
|
17
18
|
Dynamic: author-email
|
|
18
19
|
Dynamic: license-file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: OneStop4All-Indexer
|
|
3
|
-
Version: 2.8.0.
|
|
3
|
+
Version: 2.8.0.dev11
|
|
4
4
|
Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
|
|
5
5
|
Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
|
|
6
6
|
Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
|
|
@@ -13,6 +13,7 @@ Requires-Dist: geomet~=1.1.0
|
|
|
13
13
|
Requires-Dist: shapely~=2.0.5
|
|
14
14
|
Provides-Extra: airflow
|
|
15
15
|
Requires-Dist: apache-airflow==3.1.7; extra == "airflow"
|
|
16
|
+
Requires-Dist: apache-airflow-providers-amazon[s3fs]; extra == "airflow"
|
|
16
17
|
Dynamic: author
|
|
17
18
|
Dynamic: author-email
|
|
18
19
|
Dynamic: license-file
|
{onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_dataset.py
RENAMED
|
@@ -299,7 +299,6 @@ class Dataset_Harvester(HarvesterCordra):
|
|
|
299
299
|
value=val,
|
|
300
300
|
)
|
|
301
301
|
if "downloadURL" in distribution:
|
|
302
|
-
print(distribution["downloadURL"])
|
|
303
302
|
for download_url in distribution["downloadURL"]:
|
|
304
303
|
val = self.get_string_from_jsonld(
|
|
305
304
|
download_url, subject
|
|
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="OneStop4All-Indexer",
|
|
5
|
-
version="2.8.0.
|
|
5
|
+
version="2.8.0.dev11",
|
|
6
6
|
description="Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index",
|
|
7
7
|
author="Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer",
|
|
8
8
|
author_email="m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de",
|
|
@@ -16,7 +16,10 @@ setup(
|
|
|
16
16
|
"shapely ~= 2.0.5",
|
|
17
17
|
],
|
|
18
18
|
extras_require={
|
|
19
|
-
"airflow": [
|
|
19
|
+
"airflow": [
|
|
20
|
+
"apache-airflow==3.1.7",
|
|
21
|
+
"apache-airflow-providers-amazon[s3fs]",
|
|
22
|
+
],
|
|
20
23
|
},
|
|
21
24
|
include_package_data=True,
|
|
22
25
|
entry_points={
|
|
@@ -57,7 +57,9 @@ class Solr(object):
|
|
|
57
57
|
)
|
|
58
58
|
return HTTPBasicAuth(username, password)
|
|
59
59
|
|
|
60
|
-
def index_documents(
|
|
60
|
+
def index_documents(
|
|
61
|
+
self, documents: List[Dict], commit=True, ping=True
|
|
62
|
+
) -> None:
|
|
61
63
|
# solr_endpoint = coreurl(solr_url, solr_core)
|
|
62
64
|
log.info(f"start indexing {len(documents)} documents")
|
|
63
65
|
batch_size = 50000
|
|
@@ -87,13 +89,19 @@ class Solr(object):
|
|
|
87
89
|
if len(batch) < batch_size:
|
|
88
90
|
break
|
|
89
91
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
92
|
+
if ping is True:
|
|
93
|
+
self.client.ping()
|
|
94
|
+
log.info("solr healtcheck successful")
|
|
95
|
+
|
|
96
|
+
if commit is True:
|
|
97
|
+
log.info("commit changes to index")
|
|
98
|
+
self.client.commit()
|
|
99
|
+
log.info("sucessfully commited changes to index")
|
|
95
100
|
log.info("finished indexing")
|
|
96
101
|
|
|
102
|
+
def commit(self):
|
|
103
|
+
self.client.commit()
|
|
104
|
+
|
|
97
105
|
def reset_index(self):
|
|
98
106
|
self.client.delete(q="*:*")
|
|
99
107
|
self.client.commit()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_article.py
RENAMED
|
File without changes
|
{onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_base.py
RENAMED
|
File without changes
|
|
File without changes
|
{onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_document.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_service.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|