mex-common 0.62.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. mex_common-0.62.0/LICENSE +21 -0
  2. mex_common-0.62.0/PKG-INFO +150 -0
  3. mex_common-0.62.0/README.md +94 -0
  4. mex_common-0.62.0/mex/__init__.py +3 -0
  5. mex_common-0.62.0/mex/common/__init__.py +8 -0
  6. mex_common-0.62.0/mex/common/backend_api/__init__.py +0 -0
  7. mex_common-0.62.0/mex/common/backend_api/connector.py +399 -0
  8. mex_common-0.62.0/mex/common/cli.py +188 -0
  9. mex_common-0.62.0/mex/common/connector/__init__.py +8 -0
  10. mex_common-0.62.0/mex/common/connector/base.py +50 -0
  11. mex_common-0.62.0/mex/common/connector/http.py +175 -0
  12. mex_common-0.62.0/mex/common/connector/utils.py +14 -0
  13. mex_common-0.62.0/mex/common/context.py +68 -0
  14. mex_common-0.62.0/mex/common/exceptions.py +67 -0
  15. mex_common-0.62.0/mex/common/extract.py +84 -0
  16. mex_common-0.62.0/mex/common/fields.py +171 -0
  17. mex_common-0.62.0/mex/common/identity/__init__.py +10 -0
  18. mex_common-0.62.0/mex/common/identity/backend_api.py +66 -0
  19. mex_common-0.62.0/mex/common/identity/base.py +29 -0
  20. mex_common-0.62.0/mex/common/identity/memory.py +117 -0
  21. mex_common-0.62.0/mex/common/identity/models.py +15 -0
  22. mex_common-0.62.0/mex/common/identity/registry.py +46 -0
  23. mex_common-0.62.0/mex/common/ldap/__init__.py +36 -0
  24. mex_common-0.62.0/mex/common/ldap/connector.py +258 -0
  25. mex_common-0.62.0/mex/common/ldap/extract.py +135 -0
  26. mex_common-0.62.0/mex/common/ldap/models.py +50 -0
  27. mex_common-0.62.0/mex/common/ldap/transform.py +215 -0
  28. mex_common-0.62.0/mex/common/logging.py +72 -0
  29. mex_common-0.62.0/mex/common/merged/__init__.py +0 -0
  30. mex_common-0.62.0/mex/common/merged/main.py +158 -0
  31. mex_common-0.62.0/mex/common/merged/utils.py +36 -0
  32. mex_common-0.62.0/mex/common/models/__init__.py +758 -0
  33. mex_common-0.62.0/mex/common/models/access_platform.py +210 -0
  34. mex_common-0.62.0/mex/common/models/activity.py +243 -0
  35. mex_common-0.62.0/mex/common/models/base/__init__.py +0 -0
  36. mex_common-0.62.0/mex/common/models/base/container.py +18 -0
  37. mex_common-0.62.0/mex/common/models/base/extracted_data.py +100 -0
  38. mex_common-0.62.0/mex/common/models/base/filter.py +30 -0
  39. mex_common-0.62.0/mex/common/models/base/mapping.py +36 -0
  40. mex_common-0.62.0/mex/common/models/base/merged_item.py +5 -0
  41. mex_common-0.62.0/mex/common/models/base/model.py +188 -0
  42. mex_common-0.62.0/mex/common/models/base/preview_item.py +5 -0
  43. mex_common-0.62.0/mex/common/models/base/rules.py +24 -0
  44. mex_common-0.62.0/mex/common/models/base/schema.py +20 -0
  45. mex_common-0.62.0/mex/common/models/bibliographic_resource.py +389 -0
  46. mex_common-0.62.0/mex/common/models/consent.py +176 -0
  47. mex_common-0.62.0/mex/common/models/contact_point.py +150 -0
  48. mex_common-0.62.0/mex/common/models/distribution.py +221 -0
  49. mex_common-0.62.0/mex/common/models/organization.py +253 -0
  50. mex_common-0.62.0/mex/common/models/organizational_unit.py +187 -0
  51. mex_common-0.62.0/mex/common/models/person.py +210 -0
  52. mex_common-0.62.0/mex/common/models/primary_source.py +191 -0
  53. mex_common-0.62.0/mex/common/models/resource.py +435 -0
  54. mex_common-0.62.0/mex/common/models/variable.py +212 -0
  55. mex_common-0.62.0/mex/common/models/variable_group.py +157 -0
  56. mex_common-0.62.0/mex/common/orcid/__init__.py +0 -0
  57. mex_common-0.62.0/mex/common/orcid/connector.py +72 -0
  58. mex_common-0.62.0/mex/common/orcid/extract.py +101 -0
  59. mex_common-0.62.0/mex/common/orcid/models.py +69 -0
  60. mex_common-0.62.0/mex/common/orcid/transform.py +44 -0
  61. mex_common-0.62.0/mex/common/organigram/__init__.py +36 -0
  62. mex_common-0.62.0/mex/common/organigram/extract.py +118 -0
  63. mex_common-0.62.0/mex/common/organigram/helpers.py +76 -0
  64. mex_common-0.62.0/mex/common/organigram/models.py +14 -0
  65. mex_common-0.62.0/mex/common/organigram/transform.py +79 -0
  66. mex_common-0.62.0/mex/common/primary_source/__init__.py +39 -0
  67. mex_common-0.62.0/mex/common/primary_source/extract.py +24 -0
  68. mex_common-0.62.0/mex/common/primary_source/helpers.py +22 -0
  69. mex_common-0.62.0/mex/common/primary_source/models.py +9 -0
  70. mex_common-0.62.0/mex/common/primary_source/transform.py +64 -0
  71. mex_common-0.62.0/mex/common/settings.py +241 -0
  72. mex_common-0.62.0/mex/common/sinks/__init__.py +0 -0
  73. mex_common-0.62.0/mex/common/sinks/backend_api.py +96 -0
  74. mex_common-0.62.0/mex/common/sinks/base.py +25 -0
  75. mex_common-0.62.0/mex/common/sinks/ndjson.py +65 -0
  76. mex_common-0.62.0/mex/common/sinks/registry.py +79 -0
  77. mex_common-0.62.0/mex/common/testing/__init__.py +3 -0
  78. mex_common-0.62.0/mex/common/testing/joker.py +22 -0
  79. mex_common-0.62.0/mex/common/testing/plugin.py +259 -0
  80. mex_common-0.62.0/mex/common/testing/test_data/orcid_multiple_matches.json +75 -0
  81. mex_common-0.62.0/mex/common/testing/test_data/orcid_person_jayne_raw.json +38 -0
  82. mex_common-0.62.0/mex/common/testing/test_data/orcid_person_raw.json +38 -0
  83. mex_common-0.62.0/mex/common/testing/test_data/wikidata_organization_raw.json +228 -0
  84. mex_common-0.62.0/mex/common/transform.py +127 -0
  85. mex_common-0.62.0/mex/common/types/__init__.py +249 -0
  86. mex_common-0.62.0/mex/common/types/email.py +42 -0
  87. mex_common-0.62.0/mex/common/types/identifier.py +181 -0
  88. mex_common-0.62.0/mex/common/types/identity.py +9 -0
  89. mex_common-0.62.0/mex/common/types/link.py +52 -0
  90. mex_common-0.62.0/mex/common/types/path.py +74 -0
  91. mex_common-0.62.0/mex/common/types/sink.py +10 -0
  92. mex_common-0.62.0/mex/common/types/temporal_entity.py +348 -0
  93. mex_common-0.62.0/mex/common/types/text.py +60 -0
  94. mex_common-0.62.0/mex/common/types/vocabulary.py +240 -0
  95. mex_common-0.62.0/mex/common/utils.py +216 -0
  96. mex_common-0.62.0/mex/common/wikidata/__init__.py +0 -0
  97. mex_common-0.62.0/mex/common/wikidata/connector.py +52 -0
  98. mex_common-0.62.0/mex/common/wikidata/extract.py +31 -0
  99. mex_common-0.62.0/mex/common/wikidata/models.py +99 -0
  100. mex_common-0.62.0/mex/common/wikidata/transform.py +166 -0
  101. mex_common-0.62.0/mex/py.typed +0 -0
  102. mex_common-0.62.0/pyproject.toml +225 -0
  103. mex_common-0.62.0/tests/__init__.py +0 -0
  104. mex_common-0.62.0/tests/backend_api/__init__.py +0 -0
  105. mex_common-0.62.0/tests/backend_api/conftest.py +18 -0
  106. mex_common-0.62.0/tests/backend_api/test_connector.py +257 -0
  107. mex_common-0.62.0/tests/conftest.py +96 -0
  108. mex_common-0.62.0/tests/connector/__init__.py +0 -0
  109. mex_common-0.62.0/tests/connector/test_base.py +30 -0
  110. mex_common-0.62.0/tests/connector/test_http.py +235 -0
  111. mex_common-0.62.0/tests/identity/__init__.py +0 -0
  112. mex_common-0.62.0/tests/identity/conftest.py +15 -0
  113. mex_common-0.62.0/tests/identity/test_backend_api.py +138 -0
  114. mex_common-0.62.0/tests/identity/test_memory.py +94 -0
  115. mex_common-0.62.0/tests/identity/test_registry.py +70 -0
  116. mex_common-0.62.0/tests/ldap/__init__.py +0 -0
  117. mex_common-0.62.0/tests/ldap/conftest.py +70 -0
  118. mex_common-0.62.0/tests/ldap/test_connector.py +161 -0
  119. mex_common-0.62.0/tests/ldap/test_extract.py +201 -0
  120. mex_common-0.62.0/tests/ldap/test_transform.py +165 -0
  121. mex_common-0.62.0/tests/merged/__init__.py +0 -0
  122. mex_common-0.62.0/tests/merged/test_main.py +358 -0
  123. mex_common-0.62.0/tests/merged/test_utils.py +62 -0
  124. mex_common-0.62.0/tests/models/__init__.py +0 -0
  125. mex_common-0.62.0/tests/models/test_base.py +162 -0
  126. mex_common-0.62.0/tests/models/test_extracted_data.py +87 -0
  127. mex_common-0.62.0/tests/models/test_filter.py +106 -0
  128. mex_common-0.62.0/tests/models/test_mapping.py +380 -0
  129. mex_common-0.62.0/tests/models/test_model_schemas.py +202 -0
  130. mex_common-0.62.0/tests/models/test_rules.py +83 -0
  131. mex_common-0.62.0/tests/orcid/__init__.py +0 -0
  132. mex_common-0.62.0/tests/orcid/test_connector.py +162 -0
  133. mex_common-0.62.0/tests/orcid/test_extract.py +182 -0
  134. mex_common-0.62.0/tests/orcid/test_model.py +113 -0
  135. mex_common-0.62.0/tests/orcid/test_transform.py +99 -0
  136. mex_common-0.62.0/tests/organigram/__init__.py +0 -0
  137. mex_common-0.62.0/tests/organigram/conftest.py +69 -0
  138. mex_common-0.62.0/tests/organigram/test_extract.py +94 -0
  139. mex_common-0.62.0/tests/organigram/test_transform.py +61 -0
  140. mex_common-0.62.0/tests/primary_source/__init__.py +0 -0
  141. mex_common-0.62.0/tests/primary_source/test_extract.py +12 -0
  142. mex_common-0.62.0/tests/primary_source/test_helpers.py +17 -0
  143. mex_common-0.62.0/tests/primary_source/test_transform.py +24 -0
  144. mex_common-0.62.0/tests/sinks/__init__.py +0 -0
  145. mex_common-0.62.0/tests/sinks/test_backend_api.py +49 -0
  146. mex_common-0.62.0/tests/sinks/test_ndjson.py +60 -0
  147. mex_common-0.62.0/tests/test_cli.py +250 -0
  148. mex_common-0.62.0/tests/test_context.py +31 -0
  149. mex_common-0.62.0/tests/test_exceptions.py +15 -0
  150. mex_common-0.62.0/tests/test_extract.py +49 -0
  151. mex_common-0.62.0/tests/test_fields.py +37 -0
  152. mex_common-0.62.0/tests/test_logging.py +30 -0
  153. mex_common-0.62.0/tests/test_settings.py +95 -0
  154. mex_common-0.62.0/tests/test_transform.py +244 -0
  155. mex_common-0.62.0/tests/test_utils.py +225 -0
  156. mex_common-0.62.0/tests/testing/__init__.py +0 -0
  157. mex_common-0.62.0/tests/testing/test_joker.py +13 -0
  158. mex_common-0.62.0/tests/types/__init__.py +0 -0
  159. mex_common-0.62.0/tests/types/conftest.py +3 -0
  160. mex_common-0.62.0/tests/types/test_data/dummy-vocabulary.json +29 -0
  161. mex_common-0.62.0/tests/types/test_email.py +53 -0
  162. mex_common-0.62.0/tests/types/test_identifier.py +68 -0
  163. mex_common-0.62.0/tests/types/test_link.py +38 -0
  164. mex_common-0.62.0/tests/types/test_path.py +45 -0
  165. mex_common-0.62.0/tests/types/test_temporal_entity.py +301 -0
  166. mex_common-0.62.0/tests/types/test_text.py +77 -0
  167. mex_common-0.62.0/tests/types/test_vocabulary.py +90 -0
  168. mex_common-0.62.0/tests/wikidata/__init__.py +0 -0
  169. mex_common-0.62.0/tests/wikidata/conftest.py +22 -0
  170. mex_common-0.62.0/tests/wikidata/test_connector.py +73 -0
  171. mex_common-0.62.0/tests/wikidata/test_data/items_details.json +22385 -0
  172. mex_common-0.62.0/tests/wikidata/test_extract.py +230 -0
  173. mex_common-0.62.0/tests/wikidata/test_transform.py +244 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Robert Koch-Institut
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,150 @@
1
+ Metadata-Version: 2.1
2
+ Name: mex-common
3
+ Version: 0.62.0
4
+ Summary: Common library for MEx python projects.
5
+ Author-Email: MEx Team <mex@rki.de>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2025 Robert Koch-Institut
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Repository, https://github.com/robert-koch-institut/mex-common
29
+ Requires-Python: <3.12,>=3.11
30
+ Requires-Dist: backoff<3,>=2
31
+ Requires-Dist: click<9,>=8
32
+ Requires-Dist: langdetect<2,>=1
33
+ Requires-Dist: ldap3<3,>=2
34
+ Requires-Dist: mex-model==4.1.0
35
+ Requires-Dist: numpy<3,>=2
36
+ Requires-Dist: pandas<3,>=2
37
+ Requires-Dist: pyarrow<21,>=20
38
+ Requires-Dist: pydantic-settings<2.8,>=2
39
+ Requires-Dist: pydantic<2.10,>=2
40
+ Requires-Dist: pytz<2024.2,>=2024
41
+ Requires-Dist: requests<3,>=2
42
+ Provides-Extra: dev
43
+ Requires-Dist: ipdb>=0.13; extra == "dev"
44
+ Requires-Dist: pandas-stubs>=2; extra == "dev"
45
+ Requires-Dist: mypy>=1; extra == "dev"
46
+ Requires-Dist: pytest-cov>=6; extra == "dev"
47
+ Requires-Dist: pytest-random-order>=1; extra == "dev"
48
+ Requires-Dist: pytest-xdist>=3; extra == "dev"
49
+ Requires-Dist: pytest>=8; extra == "dev"
50
+ Requires-Dist: ruff>=0.11; extra == "dev"
51
+ Requires-Dist: sphinx>=8; extra == "dev"
52
+ Requires-Dist: types-ldap3>=2; extra == "dev"
53
+ Requires-Dist: types-pytz>=2025; extra == "dev"
54
+ Requires-Dist: types-requests>=2; extra == "dev"
55
+ Description-Content-Type: text/markdown
56
+
57
+ # MEx common
58
+
59
+ Common library for MEx python projects.
60
+
61
+ [![cookiecutter](https://github.com/robert-koch-institut/mex-common/actions/workflows/cookiecutter.yml/badge.svg)](https://github.com/robert-koch-institut/mex-template)
62
+ [![cve-scan](https://github.com/robert-koch-institut/mex-common/actions/workflows/cve-scan.yml/badge.svg)](https://github.com/robert-koch-institut/mex-common/actions/workflows/cve-scan.yml)
63
+ [![documentation](https://github.com/robert-koch-institut/mex-common/actions/workflows/documentation.yml/badge.svg)](https://robert-koch-institut.github.io/mex-common)
64
+ [![linting](https://github.com/robert-koch-institut/mex-common/actions/workflows/linting.yml/badge.svg)](https://github.com/robert-koch-institut/mex-common/actions/workflows/linting.yml)
65
+ [![open-code](https://github.com/robert-koch-institut/mex-common/actions/workflows/open-code.yml/badge.svg)](https://gitlab.opencode.de/robert-koch-institut/mex/mex-common)
66
+ [![testing](https://github.com/robert-koch-institut/mex-common/actions/workflows/testing.yml/badge.svg)](https://github.com/robert-koch-institut/mex-common/actions/workflows/testing.yml)
67
+
68
+ ## Project
69
+
70
+ The Metadata Exchange (MEx) project is committed to improve the retrieval of RKI
71
+ research data and projects. How? By focusing on metadata: instead of providing the
72
+ actual research data directly, the MEx metadata catalog captures descriptive information
73
+ about research data and activities. On this basis, we want to make the data FAIR[^1] so
74
+ that it can be shared with others.
75
+
76
+ Via MEx, metadata will be made findable, accessible and shareable, as well as available
77
+ for further research. The goal is to get an overview of what research data is available,
78
+ understand its context, and know what needs to be considered for subsequent use.
79
+
80
+ RKI cooperated with D4L data4life gGmbH for a pilot phase where the vision of a
81
+ FAIR metadata catalog was explored and concepts and prototypes were developed.
82
+ The partnership has ended with the successful conclusion of the pilot phase.
83
+
84
+ After an internal launch, the metadata will also be made publicly available and thus be
85
+ available to external researchers as well as the interested (professional) public to
86
+ find research data from the RKI.
87
+
88
+ For further details, please consult our
89
+ [project page](https://www.rki.de/DE/Aktuelles/Publikationen/Forschungsdaten/MEx/metadata-exchange-plattform-mex-node.html).
90
+
91
+ [^1]: FAIR is referencing the so-called
92
+ [FAIR data principles](https://www.go-fair.org/fair-principles/) – guidelines to make
93
+ data Findable, Accessible, Interoperable and Reusable.
94
+
95
+ **Contact** \
96
+ For more information, please feel free to email us at [mex@rki.de](mailto:mex@rki.de).
97
+
98
+ ### Publisher
99
+
100
+ **Robert Koch-Institut** \
101
+ Nordufer 20 \
102
+ 13353 Berlin \
103
+ Germany
104
+
105
+ ## Package
106
+
107
+ The `mex-common` library is a software development toolkit that is used by multiple
108
+ components within the MEx project. It contains utilities for building pipelines like a
109
+ common commandline interface, logging and configuration setup. It also provides common
110
+ auxiliary connectors that can be used to fetch data from external services and a
111
+ re-usable implementation of the MEx metadata schema as pydantic models.
112
+
113
+ ## License
114
+
115
+ This package is licensed under the [MIT license](/LICENSE). All other software
116
+ components of the MEx project are open-sourced under the same license as well.
117
+
118
+ ## Development
119
+
120
+ ### Installation
121
+
122
+ - on unix, consider using pyenv https://github.com/pyenv/pyenv
123
+ - get pyenv `curl https://pyenv.run | bash`
124
+ - install 3.11 `pyenv install 3.11`
125
+ - switch version `pyenv global 3.11`
126
+ - run `make install`
127
+ - on windows, consider using pyenv-win https://pyenv-win.github.io/pyenv-win/
128
+ - follow https://pyenv-win.github.io/pyenv-win/#quick-start
129
+ - install 3.11 `pyenv install 3.11`
130
+ - switch version `pyenv global 3.11`
131
+ - run `.\mex.bat install`
132
+
133
+ ### Linting and testing
134
+
135
+ - run all linters with `pdm lint`
136
+ - run only unit tests with `pdm unit`
137
+ - run unit and integration tests with `pdm test`
138
+
139
+ ### Updating dependencies
140
+
141
+ - update boilerplate files with `cruft update`
142
+ - update global requirements in `requirements.txt` manually
143
+ - update git hooks with `pre-commit autoupdate`
144
+ - update package dependencies using `pdm update-all`
145
+ - update github actions in `.github/workflows/*.yml` manually
146
+
147
+ ### Creating release
148
+
149
+ - run `pdm release RULE` to release a new version where RULE determines which part of
150
+ the version to update and is one of `major`, `minor`, `patch`.
@@ -0,0 +1,94 @@
1
+ # MEx common
2
+
3
+ Common library for MEx python projects.
4
+
5
+ [![cookiecutter](https://github.com/robert-koch-institut/mex-common/actions/workflows/cookiecutter.yml/badge.svg)](https://github.com/robert-koch-institut/mex-template)
6
+ [![cve-scan](https://github.com/robert-koch-institut/mex-common/actions/workflows/cve-scan.yml/badge.svg)](https://github.com/robert-koch-institut/mex-common/actions/workflows/cve-scan.yml)
7
+ [![documentation](https://github.com/robert-koch-institut/mex-common/actions/workflows/documentation.yml/badge.svg)](https://robert-koch-institut.github.io/mex-common)
8
+ [![linting](https://github.com/robert-koch-institut/mex-common/actions/workflows/linting.yml/badge.svg)](https://github.com/robert-koch-institut/mex-common/actions/workflows/linting.yml)
9
+ [![open-code](https://github.com/robert-koch-institut/mex-common/actions/workflows/open-code.yml/badge.svg)](https://gitlab.opencode.de/robert-koch-institut/mex/mex-common)
10
+ [![testing](https://github.com/robert-koch-institut/mex-common/actions/workflows/testing.yml/badge.svg)](https://github.com/robert-koch-institut/mex-common/actions/workflows/testing.yml)
11
+
12
+ ## Project
13
+
14
+ The Metadata Exchange (MEx) project is committed to improve the retrieval of RKI
15
+ research data and projects. How? By focusing on metadata: instead of providing the
16
+ actual research data directly, the MEx metadata catalog captures descriptive information
17
+ about research data and activities. On this basis, we want to make the data FAIR[^1] so
18
+ that it can be shared with others.
19
+
20
+ Via MEx, metadata will be made findable, accessible and shareable, as well as available
21
+ for further research. The goal is to get an overview of what research data is available,
22
+ understand its context, and know what needs to be considered for subsequent use.
23
+
24
+ RKI cooperated with D4L data4life gGmbH for a pilot phase where the vision of a
25
+ FAIR metadata catalog was explored and concepts and prototypes were developed.
26
+ The partnership has ended with the successful conclusion of the pilot phase.
27
+
28
+ After an internal launch, the metadata will also be made publicly available and thus be
29
+ available to external researchers as well as the interested (professional) public to
30
+ find research data from the RKI.
31
+
32
+ For further details, please consult our
33
+ [project page](https://www.rki.de/DE/Aktuelles/Publikationen/Forschungsdaten/MEx/metadata-exchange-plattform-mex-node.html).
34
+
35
+ [^1]: FAIR is referencing the so-called
36
+ [FAIR data principles](https://www.go-fair.org/fair-principles/) – guidelines to make
37
+ data Findable, Accessible, Interoperable and Reusable.
38
+
39
+ **Contact** \
40
+ For more information, please feel free to email us at [mex@rki.de](mailto:mex@rki.de).
41
+
42
+ ### Publisher
43
+
44
+ **Robert Koch-Institut** \
45
+ Nordufer 20 \
46
+ 13353 Berlin \
47
+ Germany
48
+
49
+ ## Package
50
+
51
+ The `mex-common` library is a software development toolkit that is used by multiple
52
+ components within the MEx project. It contains utilities for building pipelines like a
53
+ common commandline interface, logging and configuration setup. It also provides common
54
+ auxiliary connectors that can be used to fetch data from external services and a
55
+ re-usable implementation of the MEx metadata schema as pydantic models.
56
+
57
+ ## License
58
+
59
+ This package is licensed under the [MIT license](/LICENSE). All other software
60
+ components of the MEx project are open-sourced under the same license as well.
61
+
62
+ ## Development
63
+
64
+ ### Installation
65
+
66
+ - on unix, consider using pyenv https://github.com/pyenv/pyenv
67
+ - get pyenv `curl https://pyenv.run | bash`
68
+ - install 3.11 `pyenv install 3.11`
69
+ - switch version `pyenv global 3.11`
70
+ - run `make install`
71
+ - on windows, consider using pyenv-win https://pyenv-win.github.io/pyenv-win/
72
+ - follow https://pyenv-win.github.io/pyenv-win/#quick-start
73
+ - install 3.11 `pyenv install 3.11`
74
+ - switch version `pyenv global 3.11`
75
+ - run `.\mex.bat install`
76
+
77
+ ### Linting and testing
78
+
79
+ - run all linters with `pdm lint`
80
+ - run only unit tests with `pdm unit`
81
+ - run unit and integration tests with `pdm test`
82
+
83
+ ### Updating dependencies
84
+
85
+ - update boilerplate files with `cruft update`
86
+ - update global requirements in `requirements.txt` manually
87
+ - update git hooks with `pre-commit autoupdate`
88
+ - update package dependencies using `pdm update-all`
89
+ - update github actions in `.github/workflows/*.yml` manually
90
+
91
+ ### Creating release
92
+
93
+ - run `pdm release RULE` to release a new version where RULE determines which part of
94
+ the version to update and is one of `major`, `minor`, `patch`.
@@ -0,0 +1,3 @@
1
+ from pkgutil import extend_path
2
+
3
+ __path__ = extend_path(__path__, __name__)
@@ -0,0 +1,8 @@
1
+ from mex.common.identity.backend_api import BackendApiIdentityProvider
2
+ from mex.common.identity.memory import MemoryIdentityProvider
3
+ from mex.common.identity.registry import register_provider
4
+ from mex.common.types import IdentityProvider
5
+
6
+ # register the default providers shipped with mex-common
7
+ register_provider(IdentityProvider.MEMORY, MemoryIdentityProvider)
8
+ register_provider(IdentityProvider.BACKEND, BackendApiIdentityProvider)
File without changes
@@ -0,0 +1,399 @@
1
+ from typing import Any, TypeVar
2
+ from urllib.parse import urljoin
3
+
4
+ from requests.exceptions import HTTPError
5
+
6
+ from mex.common.connector import HTTPConnector
7
+ from mex.common.identity.models import Identity
8
+ from mex.common.models import (
9
+ AnyExtractedModel,
10
+ AnyMergedModel,
11
+ AnyPreviewModel,
12
+ AnyRuleSetRequest,
13
+ AnyRuleSetResponse,
14
+ ExtractedOrganization,
15
+ ExtractedPerson,
16
+ ItemsContainer,
17
+ PaginatedItemsContainer,
18
+ PreviewModelTypeAdapter,
19
+ RuleSetResponseTypeAdapter,
20
+ )
21
+ from mex.common.settings import BaseSettings
22
+ from mex.common.types import Identifier, MergedPrimarySourceIdentifier
23
+
24
+ _IngestibleModelT = TypeVar(
25
+ "_IngestibleModelT", bound=AnyExtractedModel | AnyRuleSetResponse
26
+ )
27
+
28
+
29
+ class BackendApiConnector(HTTPConnector):
30
+ """Connector class to handle interaction with the Backend API."""
31
+
32
+ API_VERSION = "v0"
33
+
34
+ def _check_availability(self) -> None:
35
+ """Send a GET request to verify the API is available."""
36
+ self.request("GET", "_system/check")
37
+
38
+ def _set_authentication(self) -> None:
39
+ """Set the backend API key to all session headers."""
40
+ settings = BaseSettings.get()
41
+ self.session.headers["X-API-Key"] = settings.backend_api_key.get_secret_value()
42
+
43
+ def _set_url(self) -> None:
44
+ """Set the backend api url with the version path."""
45
+ settings = BaseSettings.get()
46
+ self.url = urljoin(str(settings.backend_api_url), self.API_VERSION)
47
+
48
+ def fetch_extracted_items(
49
+ self,
50
+ query_string: str | None,
51
+ stable_target_id: str | None,
52
+ entity_type: list[str] | None,
53
+ skip: int,
54
+ limit: int,
55
+ ) -> PaginatedItemsContainer[AnyExtractedModel]:
56
+ """Fetch extracted items that match the given set of filters.
57
+
58
+ Args:
59
+ query_string: Full-text search query
60
+ stable_target_id: The item's stableTargetId
61
+ entity_type: The item's entityType
62
+ skip: How many items to skip for pagination
63
+ limit: How many items to return in one page
64
+
65
+ Raises:
66
+ HTTPError: If search was not accepted, crashes or times out
67
+
68
+ Returns:
69
+ One page of extracted items and the total count that was matched
70
+ """
71
+ response = self.request(
72
+ method="GET",
73
+ endpoint="extracted-item",
74
+ params={
75
+ "q": query_string,
76
+ "stableTargetId": stable_target_id,
77
+ "entityType": entity_type,
78
+ "skip": str(skip),
79
+ "limit": str(limit),
80
+ },
81
+ )
82
+ return PaginatedItemsContainer[AnyExtractedModel].model_validate(response)
83
+
84
+ def fetch_merged_items(
85
+ self,
86
+ query_string: str | None,
87
+ entity_type: list[str] | None,
88
+ had_primary_source: list[str] | None,
89
+ skip: int,
90
+ limit: int,
91
+ ) -> PaginatedItemsContainer[AnyMergedModel]:
92
+ """Fetch merged items that match the given set of filters.
93
+
94
+ Args:
95
+ query_string: Full-text search query
96
+ entity_type: The items' entityType
97
+ had_primary_source: The items' hadPrimarySource
98
+ skip: How many items to skip for pagination
99
+ limit: How many items to return in one page
100
+
101
+ Raises:
102
+ HTTPError: If search was not accepted, crashes or times out
103
+
104
+ Returns:
105
+ One page of merged items and the total count that was matched
106
+ """
107
+ response = self.request(
108
+ method="GET",
109
+ endpoint="merged-item",
110
+ params={
111
+ "q": query_string,
112
+ "entityType": entity_type,
113
+ "hadPrimarySource": had_primary_source,
114
+ "skip": str(skip),
115
+ "limit": str(limit),
116
+ },
117
+ )
118
+ return PaginatedItemsContainer[AnyMergedModel].model_validate(response)
119
+
120
+ def get_merged_item(
121
+ self,
122
+ identifier: str,
123
+ ) -> AnyMergedModel:
124
+ """Return one merged item for the given `identifier`.
125
+
126
+ Args:
127
+ identifier: The merged item's identifier
128
+
129
+ Raises:
130
+ HTTPError: If no merged item was found
131
+
132
+ Returns:
133
+ A single merged item
134
+ """
135
+ # TODO(ND): stop-gap until backend has proper get merged item endpoint (MX-1669)
136
+ response = self.request(
137
+ method="GET",
138
+ endpoint="merged-item",
139
+ params={
140
+ "identifier": identifier,
141
+ "limit": "1",
142
+ },
143
+ )
144
+ response_model = PaginatedItemsContainer[AnyMergedModel].model_validate(
145
+ response
146
+ )
147
+ try:
148
+ return response_model.items[0]
149
+ except IndexError:
150
+ msg = "merged item was not found"
151
+ raise HTTPError(msg) from None
152
+
153
+ def preview_merged_item(
154
+ self,
155
+ stable_target_id: str,
156
+ rule_set: AnyRuleSetRequest,
157
+ ) -> AnyPreviewModel:
158
+ """Return a preview for merging the given rule-set with stored extracted items.
159
+
160
+ Args:
161
+ stable_target_id: The extracted items' `stableTargetId`
162
+ rule_set: A rule-set to use for previewing
163
+
164
+ Raises:
165
+ HTTPError: If preview produces errors, crashes or times out
166
+
167
+ Returns:
168
+ A single merged item
169
+ """
170
+ response = self.request(
171
+ method="POST",
172
+ endpoint=f"preview-item/{stable_target_id}",
173
+ payload=rule_set,
174
+ )
175
+ return PreviewModelTypeAdapter.validate_python(response)
176
+
177
+ def fetch_preview_items(
178
+ self,
179
+ query_string: str | None,
180
+ entity_type: list[str] | None,
181
+ had_primary_source: list[str] | None,
182
+ skip: int,
183
+ limit: int,
184
+ ) -> PaginatedItemsContainer[AnyPreviewModel]:
185
+ """Fetch merged item previews that match the given set of filters.
186
+
187
+ Args:
188
+ query_string: Full-text search query
189
+ entity_type: The items' entityType
190
+ had_primary_source: The items' hadPrimarySource
191
+ skip: How many items to skip for pagination
192
+ limit: How many items to return in one page
193
+
194
+ Raises:
195
+ HTTPError: If search was not accepted, crashes or times out
196
+
197
+ Returns:
198
+ One page of preview items and the total count that was matched
199
+ """
200
+ response = self.request(
201
+ method="GET",
202
+ endpoint="preview-item",
203
+ params={
204
+ "q": query_string,
205
+ "entityType": entity_type,
206
+ "hadPrimarySource": had_primary_source,
207
+ "skip": str(skip),
208
+ "limit": str(limit),
209
+ },
210
+ )
211
+ return PaginatedItemsContainer[AnyPreviewModel].model_validate(response)
212
+
213
+ def create_rule_set(
214
+ self,
215
+ rule_set: AnyRuleSetRequest,
216
+ ) -> AnyRuleSetResponse:
217
+ """Create a new rule set.
218
+
219
+ Args:
220
+ rule_set: New rule-set to create
221
+
222
+ Raises:
223
+ HTTPError: If the rule-set did not validate
224
+
225
+ Returns:
226
+ The newly created rule-set
227
+ """
228
+ response = self.request(method="POST", endpoint="rule-set", payload=rule_set)
229
+ return RuleSetResponseTypeAdapter.validate_python(response)
230
+
231
+ def get_rule_set(
232
+ self,
233
+ stable_target_id: str,
234
+ ) -> AnyRuleSetResponse:
235
+ """Return a triple of rules for the given `stableTargetId`.
236
+
237
+ Args:
238
+ stable_target_id: The merged item's identifier
239
+
240
+ Raises:
241
+ HTTPError: If no rule-set was found
242
+
243
+ Returns:
244
+ A set of three rules
245
+ """
246
+ response = self.request(
247
+ method="GET",
248
+ endpoint=f"rule-set/{stable_target_id}",
249
+ )
250
+ return RuleSetResponseTypeAdapter.validate_python(response)
251
+
252
+ def update_rule_set(
253
+ self, stable_target_id: str, rule_set: AnyRuleSetRequest
254
+ ) -> AnyRuleSetResponse:
255
+ """Update an existing rule set.
256
+
257
+ Args:
258
+ stable_target_id: The merged item's identifier
259
+ rule_set: The new rule-set contents
260
+
261
+ Raises:
262
+ HTTPError: If no rule-set was found
263
+
264
+ Returns:
265
+ A set of three rules
266
+ """
267
+ response = self.request(
268
+ method="PUT", endpoint=f"rule-set/{stable_target_id}", payload=rule_set
269
+ )
270
+ return RuleSetResponseTypeAdapter.validate_python(response)
271
+
272
+ def search_organization_in_wikidata(
273
+ self,
274
+ q: str,
275
+ offset: int = 0,
276
+ limit: int = 10,
277
+ ) -> PaginatedItemsContainer[ExtractedOrganization]:
278
+ """Search for organizations in wikidata.
279
+
280
+ Args:
281
+ q: Wikidata item ID or full URL
282
+ offset: The starting index for pagination
283
+ limit: The maximum number of results to return
284
+
285
+ Returns:
286
+ Paginated list of ExtractedOrganizations
287
+ """
288
+ response = self.request(
289
+ method="GET",
290
+ endpoint="wikidata",
291
+ params={"q": q, "offset": str(offset), "limit": str(limit)},
292
+ )
293
+ return PaginatedItemsContainer[ExtractedOrganization].model_validate(response)
294
+
295
+ def search_person_in_ldap(
296
+ self,
297
+ q: str,
298
+ offset: int = 0,
299
+ limit: int = 10,
300
+ ) -> PaginatedItemsContainer[ExtractedPerson]:
301
+ """Search for persons in LDAP.
302
+
303
+ Args:
304
+ q: The name of the person to be searched
305
+ offset: The starting index for pagination
306
+ limit: The maximum number of results to return
307
+
308
+ Returns:
309
+ Paginated list of ExtractedPersons
310
+ """
311
+ response = self.request(
312
+ method="GET",
313
+ endpoint="ldap",
314
+ params={"q": q, "offset": str(offset), "limit": str(limit)},
315
+ )
316
+ return PaginatedItemsContainer[ExtractedPerson].model_validate(response)
317
+
318
+ def search_person_in_orcid(
319
+ self,
320
+ q: str,
321
+ offset: int = 0,
322
+ limit: int = 10,
323
+ ) -> PaginatedItemsContainer[ExtractedPerson]:
324
+ """Search for persons in orcid.
325
+
326
+ Args:
327
+ q: The name of the person to be searched
328
+ offset: The starting index for pagination
329
+ limit: The maximum number of results to return
330
+
331
+ Returns:
332
+ Paginated list of ExtractedPersons
333
+ """
334
+ response = self.request(
335
+ method="GET",
336
+ endpoint="orcid",
337
+ params={"q": q, "offset": str(offset), "limit": str(limit)},
338
+ )
339
+ return PaginatedItemsContainer[ExtractedPerson].model_validate(response)
340
+
341
+ def assign_identity(
342
+ self,
343
+ had_primary_source: MergedPrimarySourceIdentifier,
344
+ identifier_in_primary_source: str,
345
+ ) -> Identity:
346
+ """Find an Identity in a database or assign a new one."""
347
+ response = self.request(
348
+ "POST",
349
+ "identity",
350
+ {
351
+ "hadPrimarySource": had_primary_source,
352
+ "identifierInPrimarySource": identifier_in_primary_source,
353
+ },
354
+ )
355
+ return Identity.model_validate(response)
356
+
357
+ def fetch_identities(
358
+ self,
359
+ had_primary_source: Identifier | None = None,
360
+ identifier_in_primary_source: str | None = None,
361
+ stable_target_id: Identifier | None = None,
362
+ ) -> ItemsContainer[Identity]:
363
+ """Find Identity instances matching the given filters.
364
+
365
+ Either provide `stableTargetId` or `hadPrimarySource`
366
+ and `identifierInPrimarySource` together to get a unique result.
367
+ """
368
+ connector = BackendApiConnector.get()
369
+ response = connector.request(
370
+ "GET",
371
+ "identity",
372
+ params={
373
+ "hadPrimarySource": had_primary_source,
374
+ "identifierInPrimarySource": identifier_in_primary_source,
375
+ "stableTargetId": stable_target_id,
376
+ },
377
+ )
378
+ return ItemsContainer[Identity].model_validate(response)
379
+
380
+ def ingest(
381
+ self,
382
+ ingestible_models: list[_IngestibleModelT],
383
+ **kwargs: Any, # noqa: ANN401
384
+ ) -> None:
385
+ """Post extracted models or rule-sets to the backend in bulk.
386
+
387
+ Args:
388
+ ingestible_models: Extracted models or rule-sets to ingest
389
+ kwargs: Further keyword arguments passed to `requests`
390
+
391
+ Raises:
392
+ HTTPError: If post was not accepted, crashes or times out
393
+ """
394
+ self.request(
395
+ method="POST",
396
+ endpoint="ingest",
397
+ payload=ItemsContainer[_IngestibleModelT](items=ingestible_models),
398
+ **kwargs,
399
+ )