OneStop4All-Indexer 2.8.0.dev10__tar.gz → 2.8.0.dev11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11/OneStop4All_Indexer.egg-info}/PKG-INFO +2 -1
  2. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/requires.txt +1 -0
  3. {onestop4all_indexer-2.8.0.dev10/OneStop4All_Indexer.egg-info → onestop4all_indexer-2.8.0.dev11}/PKG-INFO +2 -1
  4. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_dataset.py +0 -1
  5. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/setup.py +5 -2
  6. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/solr.py +14 -6
  7. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/LICENSE +0 -0
  8. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/SOURCES.txt +0 -0
  9. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/dependency_links.txt +0 -0
  10. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/entry_points.txt +0 -0
  11. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/OneStop4All_Indexer.egg-info/top_level.txt +0 -0
  12. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/__init__.py +0 -0
  13. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_base.py +0 -0
  14. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_n4eorganization.py +0 -0
  15. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_person.py +0 -0
  16. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_resource_links.py +0 -0
  17. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/data_repositories/repository_theme.py +0 -0
  18. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/__init__.py +0 -0
  19. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_article.py +0 -0
  20. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_base.py +0 -0
  21. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_dataservice.py +0 -0
  22. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_document.py +0 -0
  23. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_learningresource.py +0 -0
  24. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_metadatastandards.py +0 -0
  25. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_organization.py +0 -0
  26. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_repository.py +0 -0
  27. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_service.py +0 -0
  28. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/harvesters/harvester_softwaresourcecode.py +0 -0
  29. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/pyproject.toml +0 -0
  30. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/setup.cfg +0 -0
  31. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/__init__.py +0 -0
  32. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/cli.py +0 -0
  33. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/configs.py +0 -0
  34. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/harvest.py +0 -0
  35. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/sparql.py +0 -0
  36. {onestop4all_indexer-2.8.0.dev10 → onestop4all_indexer-2.8.0.dev11}/utils/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OneStop4All-Indexer
3
- Version: 2.8.0.dev10
3
+ Version: 2.8.0.dev11
4
4
  Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
5
5
  Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
6
6
  Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
@@ -13,6 +13,7 @@ Requires-Dist: geomet~=1.1.0
13
13
  Requires-Dist: shapely~=2.0.5
14
14
  Provides-Extra: airflow
15
15
  Requires-Dist: apache-airflow==3.1.7; extra == "airflow"
16
+ Requires-Dist: apache-airflow-providers-amazon[s3fs]; extra == "airflow"
16
17
  Dynamic: author
17
18
  Dynamic: author-email
18
19
  Dynamic: license-file
@@ -7,3 +7,4 @@ shapely~=2.0.5
7
7
 
8
8
  [airflow]
9
9
  apache-airflow==3.1.7
10
+ apache-airflow-providers-amazon[s3fs]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: OneStop4All-Indexer
3
- Version: 2.8.0.dev10
3
+ Version: 2.8.0.dev11
4
4
  Summary: Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index
5
5
  Author: Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer
6
6
  Author-email: m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de
@@ -13,6 +13,7 @@ Requires-Dist: geomet~=1.1.0
13
13
  Requires-Dist: shapely~=2.0.5
14
14
  Provides-Extra: airflow
15
15
  Requires-Dist: apache-airflow==3.1.7; extra == "airflow"
16
+ Requires-Dist: apache-airflow-providers-amazon[s3fs]; extra == "airflow"
16
17
  Dynamic: author
17
18
  Dynamic: author-email
18
19
  Dynamic: license-file
@@ -299,7 +299,6 @@ class Dataset_Harvester(HarvesterCordra):
299
299
  value=val,
300
300
  )
301
301
  if "downloadURL" in distribution:
302
- print(distribution["downloadURL"])
303
302
  for download_url in distribution["downloadURL"]:
304
303
  val = self.get_string_from_jsonld(
305
304
  download_url, subject
@@ -2,7 +2,7 @@ from setuptools import find_packages, setup
2
2
 
3
3
  setup(
4
4
  name="OneStop4All-Indexer",
5
- version="2.8.0.dev10",
5
+ version="2.8.0.dev11",
6
6
  description="Library to harvest data from NFDI4Earth-KnowledgeHub to OneStop4All-Index",
7
7
  author="Markus Konkol, Arne Vogt, Tom Niers, Ralf Klammer",
8
8
  author_email="m.konkol@52north.org, a.vogt@52north.org, tom.niers@tu-dresden.de, ralf.klammer@tu-dresden.de",
@@ -16,7 +16,10 @@ setup(
16
16
  "shapely ~= 2.0.5",
17
17
  ],
18
18
  extras_require={
19
- "airflow": ["apache-airflow==3.1.7"],
19
+ "airflow": [
20
+ "apache-airflow==3.1.7",
21
+ "apache-airflow-providers-amazon[s3fs]",
22
+ ],
20
23
  },
21
24
  include_package_data=True,
22
25
  entry_points={
@@ -57,7 +57,9 @@ class Solr(object):
57
57
  )
58
58
  return HTTPBasicAuth(username, password)
59
59
 
60
- def index_documents(self, documents: List[Dict]) -> None:
60
+ def index_documents(
61
+ self, documents: List[Dict], commit=True, ping=True
62
+ ) -> None:
61
63
  # solr_endpoint = coreurl(solr_url, solr_core)
62
64
  log.info(f"start indexing {len(documents)} documents")
63
65
  batch_size = 50000
@@ -87,13 +89,19 @@ class Solr(object):
87
89
  if len(batch) < batch_size:
88
90
  break
89
91
 
90
- log.info("commit changes to index")
91
- self.client.ping()
92
- log.info("solr healtcheck successful")
93
- self.client.commit()
94
- log.info("sucessfully commited changes to index")
92
+ if ping is True:
93
+ self.client.ping()
94
+ log.info("solr healtcheck successful")
95
+
96
+ if commit is True:
97
+ log.info("commit changes to index")
98
+ self.client.commit()
99
+ log.info("sucessfully commited changes to index")
95
100
  log.info("finished indexing")
96
101
 
102
+ def commit(self):
103
+ self.client.commit()
104
+
97
105
  def reset_index(self):
98
106
  self.client.delete(q="*:*")
99
107
  self.client.commit()