metadata-curation-client 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metadata_curation_client-0.1.0/LICENSE +21 -0
- metadata_curation_client-0.1.0/PKG-INFO +239 -0
- metadata_curation_client-0.1.0/README.md +210 -0
- metadata_curation_client-0.1.0/metadata_curation_client/__init__.py +11 -0
- metadata_curation_client-0.1.0/metadata_curation_client/curation_api_client.py +182 -0
- metadata_curation_client-0.1.0/metadata_curation_client/source_manager.py +455 -0
- metadata_curation_client-0.1.0/metadata_curation_client.egg-info/PKG-INFO +239 -0
- metadata_curation_client-0.1.0/metadata_curation_client.egg-info/SOURCES.txt +11 -0
- metadata_curation_client-0.1.0/metadata_curation_client.egg-info/dependency_links.txt +1 -0
- metadata_curation_client-0.1.0/metadata_curation_client.egg-info/requires.txt +7 -0
- metadata_curation_client-0.1.0/metadata_curation_client.egg-info/top_level.txt +1 -0
- metadata_curation_client-0.1.0/pyproject.toml +53 -0
- metadata_curation_client-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Digital Edition Curation Team
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,239 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: metadata-curation-client
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: API client for metadata curation platforms
|
5
|
+
Author: Digital Edition Curation Team
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/yourusername/digital-edition-curation
|
8
|
+
Project-URL: Repository, https://github.com/yourusername/digital-edition-curation
|
9
|
+
Project-URL: Issues, https://github.com/yourusername/digital-edition-curation/issues
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
11
|
+
Classifier: Intended Audience :: Developers
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
19
|
+
Requires-Python: >=3.8
|
20
|
+
Description-Content-Type: text/markdown
|
21
|
+
License-File: LICENSE
|
22
|
+
Requires-Dist: requests>=2.28.0
|
23
|
+
Provides-Extra: dev
|
24
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
25
|
+
Requires-Dist: black; extra == "dev"
|
26
|
+
Requires-Dist: isort; extra == "dev"
|
27
|
+
Requires-Dist: mypy; extra == "dev"
|
28
|
+
Dynamic: license-file
|
29
|
+
|
30
|
+
# Metadata Curation Client
|
31
|
+
|
32
|
+
API client for external partners to integrate with metadata curation platforms.
|
33
|
+
|
34
|
+
## Installation
|
35
|
+
|
36
|
+
```bash
|
37
|
+
pip install metadata-curation-client
|
38
|
+
```
|
39
|
+
|
40
|
+
## Basic Usage
|
41
|
+
|
42
|
+
```python
|
43
|
+
from metadata_curation_client import CurationAPIClient, PropertyType
|
44
|
+
|
45
|
+
# Initialize client
|
46
|
+
client = CurationAPIClient("http://localhost:8000")
|
47
|
+
|
48
|
+
# Create source
|
49
|
+
source = client.create_source({
|
50
|
+
"name": "My Archive",
|
51
|
+
"description": "Digital editions from our collection"
|
52
|
+
})
|
53
|
+
|
54
|
+
# Create controlled vocabulary property
|
55
|
+
language_prop = client.create_property({
|
56
|
+
"technical_name": "language",
|
57
|
+
"name": "Language",
|
58
|
+
"type": PropertyType.CONTROLLED_VOCABULARY,
|
59
|
+
"source_id": source["id"],
|
60
|
+
"property_options": [{"name": "English"}, {"name": "German"}]
|
61
|
+
})
|
62
|
+
|
63
|
+
# Create free text property
|
64
|
+
description_prop = client.create_property({
|
65
|
+
"technical_name": "description",
|
66
|
+
"name": "Description",
|
67
|
+
"type": PropertyType.FREE_TEXT,
|
68
|
+
"source_id": source["id"]
|
69
|
+
})
|
70
|
+
|
71
|
+
# Create edition
|
72
|
+
edition = client.create_edition({
|
73
|
+
"source_id": source["id"],
|
74
|
+
"source_internal_id": "my_001"
|
75
|
+
})
|
76
|
+
|
77
|
+
# Create properties for each type
|
78
|
+
genre_prop = client.create_property({
|
79
|
+
"technical_name": "genre",
|
80
|
+
"name": "Genre",
|
81
|
+
"type": PropertyType.CONTROLLED_VOCABULARY,
|
82
|
+
"source_id": source["id"],
|
83
|
+
"property_options": [
|
84
|
+
{"name": "Poetry"}, {"name": "Prose"}, {"name": "Drama"}
|
85
|
+
]
|
86
|
+
})
|
87
|
+
|
88
|
+
has_annotations_prop = client.create_property({
|
89
|
+
"technical_name": "has_annotations",
|
90
|
+
"name": "Has Annotations",
|
91
|
+
"type": PropertyType.BINARY,
|
92
|
+
"source_id": source["id"]
|
93
|
+
})
|
94
|
+
|
95
|
+
year_prop = client.create_property({
|
96
|
+
"technical_name": "publication_year",
|
97
|
+
"name": "Publication Year",
|
98
|
+
"type": PropertyType.NUMERICAL,
|
99
|
+
"source_id": source["id"]
|
100
|
+
})
|
101
|
+
|
102
|
+
description_prop = client.create_property({
|
103
|
+
"technical_name": "description",
|
104
|
+
"name": "Description",
|
105
|
+
"type": PropertyType.FREE_TEXT,
|
106
|
+
"source_id": source["id"]
|
107
|
+
})
|
108
|
+
|
109
|
+
# Example 1: CONTROLLED_VOCABULARY suggestion
|
110
|
+
# First get the property option ID
|
111
|
+
properties = client.get_properties()
|
112
|
+
genre_prop = next(p for p in properties if p["technical_name"] == "genre")
|
113
|
+
poetry_option = next(opt for opt in genre_prop["property_options"] if opt["name"] == "Poetry")
|
114
|
+
|
115
|
+
client.create_suggestion({
|
116
|
+
"source_id": source["id"],
|
117
|
+
"edition_id": edition["id"],
|
118
|
+
"property_id": genre_prop["id"],
|
119
|
+
"property_option_id": poetry_option["id"]
|
120
|
+
})
|
121
|
+
|
122
|
+
# Example 2: BINARY suggestion (uses property_option_id)
|
123
|
+
# Binary properties always have options with ID 1 (true/1) and ID 2 (false/0)
|
124
|
+
# Get the "true" option (usually ID 1)
|
125
|
+
binary_props = client.get_properties()
|
126
|
+
has_annotations_prop = next(p for p in binary_props if p["technical_name"] == "has_annotations")
|
127
|
+
true_option = next(opt for opt in has_annotations_prop["property_options"] if opt["name"] == "1")
|
128
|
+
|
129
|
+
client.create_suggestion({
|
130
|
+
"source_id": source["id"],
|
131
|
+
"edition_id": edition["id"],
|
132
|
+
"property_id": has_annotations_prop["id"],
|
133
|
+
"property_option_id": true_option["id"] # For "yes"/"true" value
|
134
|
+
})
|
135
|
+
|
136
|
+
# Example 3: NUMERICAL suggestion (uses custom_value)
|
137
|
+
client.create_suggestion({
|
138
|
+
"source_id": source["id"],
|
139
|
+
"edition_id": edition["id"],
|
140
|
+
"property_id": year_prop["id"],
|
141
|
+
"custom_value": "2025" # Note: numerical values are sent as strings
|
142
|
+
})
|
143
|
+
|
144
|
+
# Example 4: FREE_TEXT suggestion (uses custom_value)
|
145
|
+
client.create_suggestion({
|
146
|
+
"source_id": source["id"],
|
147
|
+
"edition_id": edition["id"],
|
148
|
+
"property_id": description_prop["id"],
|
149
|
+
"custom_value": "This is a detailed description of the edition."
|
150
|
+
})
|
151
|
+
|
152
|
+
# Mark ingestion complete
|
153
|
+
client.mark_ingestion_complete(source["id"])
|
154
|
+
```
|
155
|
+
|
156
|
+
## Property Types
|
157
|
+
|
158
|
+
- `PropertyType.CONTROLLED_VOCABULARY` - Predefined options
|
159
|
+
- `PropertyType.FREE_TEXT` - Free text
|
160
|
+
- `PropertyType.BINARY` - True/false values
|
161
|
+
- `PropertyType.NUMERICAL` - Numeric values
|
162
|
+
|
163
|
+
## API Reference
|
164
|
+
|
165
|
+
See the docstrings in `curation_api_client.py` for detailed method documentation.
|
166
|
+
|
167
|
+
## Enhanced Integration with SourceManager
|
168
|
+
|
169
|
+
For more sophisticated integrations, we also provide a higher-level abstraction in `source_manager.py` that mirrors some of the conveniences of our internal extractors:
|
170
|
+
|
171
|
+
```python
|
172
|
+
from metadata_curation_client import CurationAPIClient, PropertyType, SourceManager, PropertyBuilder
|
173
|
+
|
174
|
+
# Initialize client and create source
|
175
|
+
client = CurationAPIClient("http://localhost:8000")
|
176
|
+
source = client.get_source_by_technical_name("my_data_source")
|
177
|
+
if not source:
|
178
|
+
source = client.create_source({
|
179
|
+
"name": "My Data Source",
|
180
|
+
"description": "My collection of digital editions",
|
181
|
+
"technical_name": "my_data_source"
|
182
|
+
})
|
183
|
+
|
184
|
+
# Define properties using helper builders
|
185
|
+
property_definitions = [
|
186
|
+
PropertyBuilder.controlled_vocabulary(
|
187
|
+
"example_genre", "Genre", ["Poetry", "Prose", "Drama"]
|
188
|
+
),
|
189
|
+
PropertyBuilder.binary(
|
190
|
+
"example_has_annotations", "Has Annotations"
|
191
|
+
),
|
192
|
+
PropertyBuilder.numerical(
|
193
|
+
"example_year", "Publication Year"
|
194
|
+
)
|
195
|
+
]
|
196
|
+
|
197
|
+
# Initialize the source manager - this will:
|
198
|
+
# - Fetch all existing data
|
199
|
+
# - Build lookup tables
|
200
|
+
# - Create any missing properties
|
201
|
+
manager = SourceManager(client, source['id'], property_definitions)
|
202
|
+
|
203
|
+
# Efficiently get or create edition using lookup tables
|
204
|
+
edition = manager.get_or_create_edition("book_001")
|
205
|
+
|
206
|
+
# Create suggestions in a batch with validation and deduplication
|
207
|
+
manager.create_suggestions_batch(
|
208
|
+
edition["id"],
|
209
|
+
{
|
210
|
+
"example_genre": "Poetry",
|
211
|
+
"example_has_annotations": True,
|
212
|
+
"example_year": 2022
|
213
|
+
}
|
214
|
+
)
|
215
|
+
|
216
|
+
# Mark ingestion complete (updates timestamp)
|
217
|
+
manager.finish_ingestion()
|
218
|
+
```
|
219
|
+
|
220
|
+
### Benefits of the SourceManager
|
221
|
+
|
222
|
+
The `SourceManager` provides several advantages for more complex integrations:
|
223
|
+
|
224
|
+
1. **Reduced API Calls**: Prefetches data to minimize API requests
|
225
|
+
2. **Lookup Tables**: Maintains efficient in-memory lookups for editions, properties, and suggestions
|
226
|
+
3. **Automatic Property Creation**: Creates properties from definitions as needed
|
227
|
+
4. **Validation**: Automatically validates values based on property types
|
228
|
+
5. **Deduplication**: Avoids creating duplicate suggestions
|
229
|
+
6. **Builder Helpers**: Provides convenient builder classes for creating properties and sources
|
230
|
+
7. **Timestamp Management**: Automatically updates the last ingestion timestamp
|
231
|
+
|
232
|
+
For a complete example, see `example_with_source_manager.py`.
|
233
|
+
|
234
|
+
### Choosing the Right Approach
|
235
|
+
|
236
|
+
- **Basic API Client**: For simple integrations or when you need complete control over the process
|
237
|
+
- **SourceManager**: For more complex integrations where efficiency and convenience are priorities
|
238
|
+
|
239
|
+
Both approaches use the same underlying API endpoints and data models, so you can choose the one that best fits your needs or even mix them as required.
|
@@ -0,0 +1,210 @@
|
|
1
|
+
# Metadata Curation Client
|
2
|
+
|
3
|
+
API client for external partners to integrate with metadata curation platforms.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
```bash
|
8
|
+
pip install metadata-curation-client
|
9
|
+
```
|
10
|
+
|
11
|
+
## Basic Usage
|
12
|
+
|
13
|
+
```python
|
14
|
+
from metadata_curation_client import CurationAPIClient, PropertyType
|
15
|
+
|
16
|
+
# Initialize client
|
17
|
+
client = CurationAPIClient("http://localhost:8000")
|
18
|
+
|
19
|
+
# Create source
|
20
|
+
source = client.create_source({
|
21
|
+
"name": "My Archive",
|
22
|
+
"description": "Digital editions from our collection"
|
23
|
+
})
|
24
|
+
|
25
|
+
# Create controlled vocabulary property
|
26
|
+
language_prop = client.create_property({
|
27
|
+
"technical_name": "language",
|
28
|
+
"name": "Language",
|
29
|
+
"type": PropertyType.CONTROLLED_VOCABULARY,
|
30
|
+
"source_id": source["id"],
|
31
|
+
"property_options": [{"name": "English"}, {"name": "German"}]
|
32
|
+
})
|
33
|
+
|
34
|
+
# Create free text property
|
35
|
+
description_prop = client.create_property({
|
36
|
+
"technical_name": "description",
|
37
|
+
"name": "Description",
|
38
|
+
"type": PropertyType.FREE_TEXT,
|
39
|
+
"source_id": source["id"]
|
40
|
+
})
|
41
|
+
|
42
|
+
# Create edition
|
43
|
+
edition = client.create_edition({
|
44
|
+
"source_id": source["id"],
|
45
|
+
"source_internal_id": "my_001"
|
46
|
+
})
|
47
|
+
|
48
|
+
# Create properties for each type
|
49
|
+
genre_prop = client.create_property({
|
50
|
+
"technical_name": "genre",
|
51
|
+
"name": "Genre",
|
52
|
+
"type": PropertyType.CONTROLLED_VOCABULARY,
|
53
|
+
"source_id": source["id"],
|
54
|
+
"property_options": [
|
55
|
+
{"name": "Poetry"}, {"name": "Prose"}, {"name": "Drama"}
|
56
|
+
]
|
57
|
+
})
|
58
|
+
|
59
|
+
has_annotations_prop = client.create_property({
|
60
|
+
"technical_name": "has_annotations",
|
61
|
+
"name": "Has Annotations",
|
62
|
+
"type": PropertyType.BINARY,
|
63
|
+
"source_id": source["id"]
|
64
|
+
})
|
65
|
+
|
66
|
+
year_prop = client.create_property({
|
67
|
+
"technical_name": "publication_year",
|
68
|
+
"name": "Publication Year",
|
69
|
+
"type": PropertyType.NUMERICAL,
|
70
|
+
"source_id": source["id"]
|
71
|
+
})
|
72
|
+
|
73
|
+
description_prop = client.create_property({
|
74
|
+
"technical_name": "description",
|
75
|
+
"name": "Description",
|
76
|
+
"type": PropertyType.FREE_TEXT,
|
77
|
+
"source_id": source["id"]
|
78
|
+
})
|
79
|
+
|
80
|
+
# Example 1: CONTROLLED_VOCABULARY suggestion
|
81
|
+
# First get the property option ID
|
82
|
+
properties = client.get_properties()
|
83
|
+
genre_prop = next(p for p in properties if p["technical_name"] == "genre")
|
84
|
+
poetry_option = next(opt for opt in genre_prop["property_options"] if opt["name"] == "Poetry")
|
85
|
+
|
86
|
+
client.create_suggestion({
|
87
|
+
"source_id": source["id"],
|
88
|
+
"edition_id": edition["id"],
|
89
|
+
"property_id": genre_prop["id"],
|
90
|
+
"property_option_id": poetry_option["id"]
|
91
|
+
})
|
92
|
+
|
93
|
+
# Example 2: BINARY suggestion (uses property_option_id)
|
94
|
+
# Binary properties always have options with ID 1 (true/1) and ID 2 (false/0)
|
95
|
+
# Get the "true" option (usually ID 1)
|
96
|
+
binary_props = client.get_properties()
|
97
|
+
has_annotations_prop = next(p for p in binary_props if p["technical_name"] == "has_annotations")
|
98
|
+
true_option = next(opt for opt in has_annotations_prop["property_options"] if opt["name"] == "1")
|
99
|
+
|
100
|
+
client.create_suggestion({
|
101
|
+
"source_id": source["id"],
|
102
|
+
"edition_id": edition["id"],
|
103
|
+
"property_id": has_annotations_prop["id"],
|
104
|
+
"property_option_id": true_option["id"] # For "yes"/"true" value
|
105
|
+
})
|
106
|
+
|
107
|
+
# Example 3: NUMERICAL suggestion (uses custom_value)
|
108
|
+
client.create_suggestion({
|
109
|
+
"source_id": source["id"],
|
110
|
+
"edition_id": edition["id"],
|
111
|
+
"property_id": year_prop["id"],
|
112
|
+
"custom_value": "2025" # Note: numerical values are sent as strings
|
113
|
+
})
|
114
|
+
|
115
|
+
# Example 4: FREE_TEXT suggestion (uses custom_value)
|
116
|
+
client.create_suggestion({
|
117
|
+
"source_id": source["id"],
|
118
|
+
"edition_id": edition["id"],
|
119
|
+
"property_id": description_prop["id"],
|
120
|
+
"custom_value": "This is a detailed description of the edition."
|
121
|
+
})
|
122
|
+
|
123
|
+
# Mark ingestion complete
|
124
|
+
client.mark_ingestion_complete(source["id"])
|
125
|
+
```
|
126
|
+
|
127
|
+
## Property Types
|
128
|
+
|
129
|
+
- `PropertyType.CONTROLLED_VOCABULARY` - Predefined options
|
130
|
+
- `PropertyType.FREE_TEXT` - Free text
|
131
|
+
- `PropertyType.BINARY` - True/false values
|
132
|
+
- `PropertyType.NUMERICAL` - Numeric values
|
133
|
+
|
134
|
+
## API Reference
|
135
|
+
|
136
|
+
See the docstrings in `curation_api_client.py` for detailed method documentation.
|
137
|
+
|
138
|
+
## Enhanced Integration with SourceManager
|
139
|
+
|
140
|
+
For more sophisticated integrations, we also provide a higher-level abstraction in `source_manager.py` that mirrors some of the conveniences of our internal extractors:
|
141
|
+
|
142
|
+
```python
|
143
|
+
from metadata_curation_client import CurationAPIClient, PropertyType, SourceManager, PropertyBuilder
|
144
|
+
|
145
|
+
# Initialize client and create source
|
146
|
+
client = CurationAPIClient("http://localhost:8000")
|
147
|
+
source = client.get_source_by_technical_name("my_data_source")
|
148
|
+
if not source:
|
149
|
+
source = client.create_source({
|
150
|
+
"name": "My Data Source",
|
151
|
+
"description": "My collection of digital editions",
|
152
|
+
"technical_name": "my_data_source"
|
153
|
+
})
|
154
|
+
|
155
|
+
# Define properties using helper builders
|
156
|
+
property_definitions = [
|
157
|
+
PropertyBuilder.controlled_vocabulary(
|
158
|
+
"example_genre", "Genre", ["Poetry", "Prose", "Drama"]
|
159
|
+
),
|
160
|
+
PropertyBuilder.binary(
|
161
|
+
"example_has_annotations", "Has Annotations"
|
162
|
+
),
|
163
|
+
PropertyBuilder.numerical(
|
164
|
+
"example_year", "Publication Year"
|
165
|
+
)
|
166
|
+
]
|
167
|
+
|
168
|
+
# Initialize the source manager - this will:
|
169
|
+
# - Fetch all existing data
|
170
|
+
# - Build lookup tables
|
171
|
+
# - Create any missing properties
|
172
|
+
manager = SourceManager(client, source['id'], property_definitions)
|
173
|
+
|
174
|
+
# Efficiently get or create edition using lookup tables
|
175
|
+
edition = manager.get_or_create_edition("book_001")
|
176
|
+
|
177
|
+
# Create suggestions in a batch with validation and deduplication
|
178
|
+
manager.create_suggestions_batch(
|
179
|
+
edition["id"],
|
180
|
+
{
|
181
|
+
"example_genre": "Poetry",
|
182
|
+
"example_has_annotations": True,
|
183
|
+
"example_year": 2022
|
184
|
+
}
|
185
|
+
)
|
186
|
+
|
187
|
+
# Mark ingestion complete (updates timestamp)
|
188
|
+
manager.finish_ingestion()
|
189
|
+
```
|
190
|
+
|
191
|
+
### Benefits of the SourceManager
|
192
|
+
|
193
|
+
The `SourceManager` provides several advantages for more complex integrations:
|
194
|
+
|
195
|
+
1. **Reduced API Calls**: Prefetches data to minimize API requests
|
196
|
+
2. **Lookup Tables**: Maintains efficient in-memory lookups for editions, properties, and suggestions
|
197
|
+
3. **Automatic Property Creation**: Creates properties from definitions as needed
|
198
|
+
4. **Validation**: Automatically validates values based on property types
|
199
|
+
5. **Deduplication**: Avoids creating duplicate suggestions
|
200
|
+
6. **Builder Helpers**: Provides convenient builder classes for creating properties and sources
|
201
|
+
7. **Timestamp Management**: Automatically updates the last ingestion timestamp
|
202
|
+
|
203
|
+
For a complete example, see `example_with_source_manager.py`.
|
204
|
+
|
205
|
+
### Choosing the Right Approach
|
206
|
+
|
207
|
+
- **Basic API Client**: For simple integrations or when you need complete control over the process
|
208
|
+
- **SourceManager**: For more complex integrations where efficiency and convenience are priorities
|
209
|
+
|
210
|
+
Both approaches use the same underlying API endpoints and data models, so you can choose the one that best fits your needs or even mix them as required.
|
@@ -0,0 +1,11 @@
|
|
1
|
+
"""
|
2
|
+
Metadata Curation Client
|
3
|
+
|
4
|
+
A lightweight API client for external partners to integrate with metadata curation platforms.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from .curation_api_client import CurationAPIClient, PropertyType
|
8
|
+
from .source_manager import SourceManager, PropertyBuilder
|
9
|
+
|
10
|
+
__version__ = "0.1.0"
|
11
|
+
__all__ = ["CurationAPIClient", "PropertyType", "SourceManager", "PropertyBuilder"]
|
@@ -0,0 +1,182 @@
|
|
1
|
+
"""
|
2
|
+
Metadata Curation Client - API Client
|
3
|
+
|
4
|
+
Lightweight API client for external partners to integrate with metadata curation platforms.
|
5
|
+
Based on the actual models and AbstractExtractor patterns.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import requests
|
9
|
+
from typing import Dict, List, Optional, Any
|
10
|
+
from datetime import datetime
|
11
|
+
|
12
|
+
|
13
|
+
class CurationAPIClient:
|
14
|
+
"""
|
15
|
+
API client for external data integration.
|
16
|
+
Mirrors the internal ExtractionAPIClient for consistency.
|
17
|
+
"""
|
18
|
+
|
19
|
+
def __init__(self, base_url: str, api_key: Optional[str] = None):
|
20
|
+
self.base_url = base_url.rstrip('/')
|
21
|
+
self.session = requests.Session()
|
22
|
+
self.session.headers.update({'Content-Type': 'application/json'})
|
23
|
+
|
24
|
+
if api_key:
|
25
|
+
self.session.headers.update({'Authorization': f'Bearer {api_key}'})
|
26
|
+
|
27
|
+
def _handle_response(self, response: requests.Response) -> Dict:
|
28
|
+
"""Handle API response and raise appropriate exceptions."""
|
29
|
+
try:
|
30
|
+
response.raise_for_status()
|
31
|
+
return response.json()
|
32
|
+
except requests.exceptions.HTTPError as e:
|
33
|
+
print(f"API Error {response.status_code}: {response.text}")
|
34
|
+
raise e
|
35
|
+
except requests.exceptions.RequestException as e:
|
36
|
+
print(f"Request Error: {e}")
|
37
|
+
raise e
|
38
|
+
|
39
|
+
# Source endpoints
|
40
|
+
def create_source(self, source_data: Dict) -> Dict:
|
41
|
+
"""Create a new source."""
|
42
|
+
response = self.session.post(f"{self.base_url}/sources/", json=source_data)
|
43
|
+
return self._handle_response(response)
|
44
|
+
|
45
|
+
def get_source(self, source_id: int) -> Dict:
|
46
|
+
"""Get source by ID."""
|
47
|
+
response = self.session.get(f"{self.base_url}/sources/{source_id}")
|
48
|
+
return self._handle_response(response)
|
49
|
+
|
50
|
+
def get_sources(self) -> List[Dict]:
|
51
|
+
"""Get all sources."""
|
52
|
+
response = self.session.get(f"{self.base_url}/sources/")
|
53
|
+
return self._handle_response(response)
|
54
|
+
|
55
|
+
def get_source_by_technical_name(self, technical_name: str) -> Optional[Dict]:
|
56
|
+
"""Get source by technical name."""
|
57
|
+
sources = self.get_sources()
|
58
|
+
return next((s for s in sources if s.get('technical_name') == technical_name), None)
|
59
|
+
|
60
|
+
def get_source_editions(self, source_id: int, include_relationships: bool = False) -> List[Dict]:
|
61
|
+
"""Get all editions for a source."""
|
62
|
+
params = {"include_relationships": include_relationships} if include_relationships else {}
|
63
|
+
response = self.session.get(f"{self.base_url}/sources/{source_id}/editions", params=params)
|
64
|
+
return self._handle_response(response)
|
65
|
+
|
66
|
+
def get_source_properties(self, source_id: int, include_relationships: bool = False) -> List[Dict]:
|
67
|
+
"""Get all properties for a source."""
|
68
|
+
params = {"include_relationships": include_relationships} if include_relationships else {}
|
69
|
+
response = self.session.get(f"{self.base_url}/sources/{source_id}/properties", params=params)
|
70
|
+
return self._handle_response(response)
|
71
|
+
|
72
|
+
def get_source_suggestions(self, source_id: int, include_relationships: bool = False) -> List[Dict]:
|
73
|
+
"""Get all suggestions for a source."""
|
74
|
+
params = {"include_relationships": include_relationships} if include_relationships else {}
|
75
|
+
response = self.session.get(f"{self.base_url}/sources/{source_id}/suggestions", params=params)
|
76
|
+
return self._handle_response(response)
|
77
|
+
|
78
|
+
def update_source(self, source_id: int, source_data: Dict) -> Dict:
|
79
|
+
"""Update an existing source."""
|
80
|
+
response = self.session.put(f"{self.base_url}/sources/{source_id}", json=source_data)
|
81
|
+
return self._handle_response(response)
|
82
|
+
|
83
|
+
def mark_ingestion_complete(self, source_id: int) -> Dict:
|
84
|
+
"""Mark ingestion complete by updating last_ingestion_at timestamp."""
|
85
|
+
return self.update_source(source_id, {
|
86
|
+
"last_ingestion_at": datetime.now().isoformat()
|
87
|
+
})
|
88
|
+
|
89
|
+
# Edition endpoints
|
90
|
+
def create_edition(self, edition_data: Dict) -> Dict:
|
91
|
+
"""
|
92
|
+
Create a new edition.
|
93
|
+
|
94
|
+
Required fields:
|
95
|
+
- source_id: ID of the source this edition belongs to
|
96
|
+
- source_internal_id: Internal ID/identifier for this edition
|
97
|
+
|
98
|
+
Optional fields:
|
99
|
+
- mapped_from_ids: List of edition IDs this edition is mapped from
|
100
|
+
"""
|
101
|
+
response = self.session.post(f"{self.base_url}/editions/", json=edition_data)
|
102
|
+
return self._handle_response(response)
|
103
|
+
|
104
|
+
def get_editions(self) -> List[Dict]:
|
105
|
+
"""Get all editions."""
|
106
|
+
response = self.session.get(f"{self.base_url}/editions/")
|
107
|
+
return self._handle_response(response)
|
108
|
+
|
109
|
+
def get_edition(self, edition_id: int) -> Dict:
|
110
|
+
"""Get edition by ID."""
|
111
|
+
response = self.session.get(f"{self.base_url}/editions/{edition_id}")
|
112
|
+
return self._handle_response(response)
|
113
|
+
|
114
|
+
# Property endpoints
|
115
|
+
def create_property(self, property_data: Dict) -> Dict:
|
116
|
+
"""Create a new property."""
|
117
|
+
response = self.session.post(f"{self.base_url}/properties/", json=property_data)
|
118
|
+
return self._handle_response(response)
|
119
|
+
|
120
|
+
def get_properties(self) -> List[Dict]:
|
121
|
+
"""Get all properties."""
|
122
|
+
response = self.session.get(f"{self.base_url}/properties/")
|
123
|
+
return self._handle_response(response)
|
124
|
+
|
125
|
+
def get_property(self, property_id: int) -> Dict:
|
126
|
+
"""Get property by ID."""
|
127
|
+
response = self.session.get(f"{self.base_url}/properties/{property_id}")
|
128
|
+
return self._handle_response(response)
|
129
|
+
|
130
|
+
# Suggestion endpoints
|
131
|
+
def create_suggestion(self, suggestion_data: Dict) -> Dict:
|
132
|
+
"""
|
133
|
+
Create a new suggestion.
|
134
|
+
|
135
|
+
Required fields:
|
136
|
+
- source_id: ID of the source
|
137
|
+
- edition_id: ID of the edition
|
138
|
+
- property_id: ID of the property
|
139
|
+
|
140
|
+
For controlled_vocabulary properties:
|
141
|
+
- property_option_id: ID of the property option
|
142
|
+
|
143
|
+
For free_text, numerical, or other properties:
|
144
|
+
- custom_value: String value for the property
|
145
|
+
|
146
|
+
Note: Either property_option_id OR custom_value must be provided,
|
147
|
+
depending on the property type.
|
148
|
+
"""
|
149
|
+
response = self.session.post(f"{self.base_url}/suggestions/", json=suggestion_data)
|
150
|
+
return self._handle_response(response)
|
151
|
+
|
152
|
+
def get_suggestions(self) -> List[Dict]:
|
153
|
+
"""Get all suggestions."""
|
154
|
+
response = self.session.get(f"{self.base_url}/suggestions/")
|
155
|
+
return self._handle_response(response)
|
156
|
+
|
157
|
+
|
158
|
+
# Property type constants (matching models.py)
|
159
|
+
class PropertyType:
|
160
|
+
"""
|
161
|
+
Property type constants for creating properties.
|
162
|
+
|
163
|
+
CONTROLLED_VOCABULARY: Property with predefined options
|
164
|
+
- Requires property_options list when creating
|
165
|
+
- Suggestions require property_option_id
|
166
|
+
|
167
|
+
FREE_TEXT: Property with open text values
|
168
|
+
- No property_options needed
|
169
|
+
- Suggestions require custom_value (string)
|
170
|
+
|
171
|
+
BINARY: Boolean/yes-no property
|
172
|
+
- Automatically creates "1" and "0" options
|
173
|
+
- Suggestions require property_option_id (use option with name "1" for true)
|
174
|
+
|
175
|
+
NUMERICAL: Numeric property
|
176
|
+
- No property_options needed
|
177
|
+
- Suggestions require custom_value (numeric value as string)
|
178
|
+
"""
|
179
|
+
CONTROLLED_VOCABULARY = "controlled_vocabulary"
|
180
|
+
FREE_TEXT = "free_text"
|
181
|
+
BINARY = "binary"
|
182
|
+
NUMERICAL = "numerical"
|