metadata-curation-client 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metadata_curation_client/__init__.py +11 -0
- metadata_curation_client/curation_api_client.py +182 -0
- metadata_curation_client/source_manager.py +455 -0
- metadata_curation_client-0.1.0.dist-info/METADATA +239 -0
- metadata_curation_client-0.1.0.dist-info/RECORD +8 -0
- metadata_curation_client-0.1.0.dist-info/WHEEL +5 -0
- metadata_curation_client-0.1.0.dist-info/licenses/LICENSE +21 -0
- metadata_curation_client-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
"""
|
2
|
+
Metadata Curation Client
|
3
|
+
|
4
|
+
A lightweight API client for external partners to integrate with metadata curation platforms.
|
5
|
+
"""
|
6
|
+
|
7
|
+
from .curation_api_client import CurationAPIClient, PropertyType
|
8
|
+
from .source_manager import SourceManager, PropertyBuilder
|
9
|
+
|
10
|
+
__version__ = "0.1.0"
|
11
|
+
__all__ = ["CurationAPIClient", "PropertyType", "SourceManager", "PropertyBuilder"]
|
@@ -0,0 +1,182 @@
|
|
1
|
+
"""
|
2
|
+
Metadata Curation Client - API Client
|
3
|
+
|
4
|
+
Lightweight API client for external partners to integrate with metadata curation platforms.
|
5
|
+
Based on the actual models and AbstractExtractor patterns.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import requests
|
9
|
+
from typing import Dict, List, Optional, Any
|
10
|
+
from datetime import datetime
|
11
|
+
|
12
|
+
|
13
|
+
class CurationAPIClient:
|
14
|
+
"""
|
15
|
+
API client for external data integration.
|
16
|
+
Mirrors the internal ExtractionAPIClient for consistency.
|
17
|
+
"""
|
18
|
+
|
19
|
+
def __init__(self, base_url: str, api_key: Optional[str] = None):
|
20
|
+
self.base_url = base_url.rstrip('/')
|
21
|
+
self.session = requests.Session()
|
22
|
+
self.session.headers.update({'Content-Type': 'application/json'})
|
23
|
+
|
24
|
+
if api_key:
|
25
|
+
self.session.headers.update({'Authorization': f'Bearer {api_key}'})
|
26
|
+
|
27
|
+
def _handle_response(self, response: requests.Response) -> Dict:
|
28
|
+
"""Handle API response and raise appropriate exceptions."""
|
29
|
+
try:
|
30
|
+
response.raise_for_status()
|
31
|
+
return response.json()
|
32
|
+
except requests.exceptions.HTTPError as e:
|
33
|
+
print(f"API Error {response.status_code}: {response.text}")
|
34
|
+
raise e
|
35
|
+
except requests.exceptions.RequestException as e:
|
36
|
+
print(f"Request Error: {e}")
|
37
|
+
raise e
|
38
|
+
|
39
|
+
# Source endpoints
|
40
|
+
def create_source(self, source_data: Dict) -> Dict:
|
41
|
+
"""Create a new source."""
|
42
|
+
response = self.session.post(f"{self.base_url}/sources/", json=source_data)
|
43
|
+
return self._handle_response(response)
|
44
|
+
|
45
|
+
def get_source(self, source_id: int) -> Dict:
|
46
|
+
"""Get source by ID."""
|
47
|
+
response = self.session.get(f"{self.base_url}/sources/{source_id}")
|
48
|
+
return self._handle_response(response)
|
49
|
+
|
50
|
+
def get_sources(self) -> List[Dict]:
|
51
|
+
"""Get all sources."""
|
52
|
+
response = self.session.get(f"{self.base_url}/sources/")
|
53
|
+
return self._handle_response(response)
|
54
|
+
|
55
|
+
def get_source_by_technical_name(self, technical_name: str) -> Optional[Dict]:
|
56
|
+
"""Get source by technical name."""
|
57
|
+
sources = self.get_sources()
|
58
|
+
return next((s for s in sources if s.get('technical_name') == technical_name), None)
|
59
|
+
|
60
|
+
def get_source_editions(self, source_id: int, include_relationships: bool = False) -> List[Dict]:
|
61
|
+
"""Get all editions for a source."""
|
62
|
+
params = {"include_relationships": include_relationships} if include_relationships else {}
|
63
|
+
response = self.session.get(f"{self.base_url}/sources/{source_id}/editions", params=params)
|
64
|
+
return self._handle_response(response)
|
65
|
+
|
66
|
+
def get_source_properties(self, source_id: int, include_relationships: bool = False) -> List[Dict]:
|
67
|
+
"""Get all properties for a source."""
|
68
|
+
params = {"include_relationships": include_relationships} if include_relationships else {}
|
69
|
+
response = self.session.get(f"{self.base_url}/sources/{source_id}/properties", params=params)
|
70
|
+
return self._handle_response(response)
|
71
|
+
|
72
|
+
def get_source_suggestions(self, source_id: int, include_relationships: bool = False) -> List[Dict]:
|
73
|
+
"""Get all suggestions for a source."""
|
74
|
+
params = {"include_relationships": include_relationships} if include_relationships else {}
|
75
|
+
response = self.session.get(f"{self.base_url}/sources/{source_id}/suggestions", params=params)
|
76
|
+
return self._handle_response(response)
|
77
|
+
|
78
|
+
def update_source(self, source_id: int, source_data: Dict) -> Dict:
|
79
|
+
"""Update an existing source."""
|
80
|
+
response = self.session.put(f"{self.base_url}/sources/{source_id}", json=source_data)
|
81
|
+
return self._handle_response(response)
|
82
|
+
|
83
|
+
def mark_ingestion_complete(self, source_id: int) -> Dict:
|
84
|
+
"""Mark ingestion complete by updating last_ingestion_at timestamp."""
|
85
|
+
return self.update_source(source_id, {
|
86
|
+
"last_ingestion_at": datetime.now().isoformat()
|
87
|
+
})
|
88
|
+
|
89
|
+
# Edition endpoints
|
90
|
+
def create_edition(self, edition_data: Dict) -> Dict:
|
91
|
+
"""
|
92
|
+
Create a new edition.
|
93
|
+
|
94
|
+
Required fields:
|
95
|
+
- source_id: ID of the source this edition belongs to
|
96
|
+
- source_internal_id: Internal ID/identifier for this edition
|
97
|
+
|
98
|
+
Optional fields:
|
99
|
+
- mapped_from_ids: List of edition IDs this edition is mapped from
|
100
|
+
"""
|
101
|
+
response = self.session.post(f"{self.base_url}/editions/", json=edition_data)
|
102
|
+
return self._handle_response(response)
|
103
|
+
|
104
|
+
def get_editions(self) -> List[Dict]:
|
105
|
+
"""Get all editions."""
|
106
|
+
response = self.session.get(f"{self.base_url}/editions/")
|
107
|
+
return self._handle_response(response)
|
108
|
+
|
109
|
+
def get_edition(self, edition_id: int) -> Dict:
|
110
|
+
"""Get edition by ID."""
|
111
|
+
response = self.session.get(f"{self.base_url}/editions/{edition_id}")
|
112
|
+
return self._handle_response(response)
|
113
|
+
|
114
|
+
# Property endpoints
|
115
|
+
def create_property(self, property_data: Dict) -> Dict:
|
116
|
+
"""Create a new property."""
|
117
|
+
response = self.session.post(f"{self.base_url}/properties/", json=property_data)
|
118
|
+
return self._handle_response(response)
|
119
|
+
|
120
|
+
def get_properties(self) -> List[Dict]:
|
121
|
+
"""Get all properties."""
|
122
|
+
response = self.session.get(f"{self.base_url}/properties/")
|
123
|
+
return self._handle_response(response)
|
124
|
+
|
125
|
+
def get_property(self, property_id: int) -> Dict:
|
126
|
+
"""Get property by ID."""
|
127
|
+
response = self.session.get(f"{self.base_url}/properties/{property_id}")
|
128
|
+
return self._handle_response(response)
|
129
|
+
|
130
|
+
# Suggestion endpoints
|
131
|
+
def create_suggestion(self, suggestion_data: Dict) -> Dict:
|
132
|
+
"""
|
133
|
+
Create a new suggestion.
|
134
|
+
|
135
|
+
Required fields:
|
136
|
+
- source_id: ID of the source
|
137
|
+
- edition_id: ID of the edition
|
138
|
+
- property_id: ID of the property
|
139
|
+
|
140
|
+
For controlled_vocabulary properties:
|
141
|
+
- property_option_id: ID of the property option
|
142
|
+
|
143
|
+
For free_text, numerical, or other properties:
|
144
|
+
- custom_value: String value for the property
|
145
|
+
|
146
|
+
Note: Either property_option_id OR custom_value must be provided,
|
147
|
+
depending on the property type.
|
148
|
+
"""
|
149
|
+
response = self.session.post(f"{self.base_url}/suggestions/", json=suggestion_data)
|
150
|
+
return self._handle_response(response)
|
151
|
+
|
152
|
+
def get_suggestions(self) -> List[Dict]:
|
153
|
+
"""Get all suggestions."""
|
154
|
+
response = self.session.get(f"{self.base_url}/suggestions/")
|
155
|
+
return self._handle_response(response)
|
156
|
+
|
157
|
+
|
158
|
+
# Property type constants (matching models.py)
|
159
|
+
class PropertyType:
|
160
|
+
"""
|
161
|
+
Property type constants for creating properties.
|
162
|
+
|
163
|
+
CONTROLLED_VOCABULARY: Property with predefined options
|
164
|
+
- Requires property_options list when creating
|
165
|
+
- Suggestions require property_option_id
|
166
|
+
|
167
|
+
FREE_TEXT: Property with open text values
|
168
|
+
- No property_options needed
|
169
|
+
- Suggestions require custom_value (string)
|
170
|
+
|
171
|
+
BINARY: Boolean/yes-no property
|
172
|
+
- Automatically creates "1" and "0" options
|
173
|
+
- Suggestions require property_option_id (use option with name "1" for true)
|
174
|
+
|
175
|
+
NUMERICAL: Numeric property
|
176
|
+
- No property_options needed
|
177
|
+
- Suggestions require custom_value (numeric value as string)
|
178
|
+
"""
|
179
|
+
CONTROLLED_VOCABULARY = "controlled_vocabulary"
|
180
|
+
FREE_TEXT = "free_text"
|
181
|
+
BINARY = "binary"
|
182
|
+
NUMERICAL = "numerical"
|
@@ -0,0 +1,455 @@
|
|
1
|
+
"""
|
2
|
+
Metadata Curation Client - Source Manager
|
3
|
+
|
4
|
+
Enhanced abstractions for the metadata curation API client, inspired by the internal AbstractExtractor.
|
5
|
+
|
6
|
+
This provides higher-level functionality for external integrators who prefer a more
|
7
|
+
streamlined approach with features like:
|
8
|
+
- Pre-fetching data to reduce API calls
|
9
|
+
- Lookup tables for efficient access
|
10
|
+
- Automatic property creation and validation
|
11
|
+
- Streamlined suggestion creation
|
12
|
+
"""
|
13
|
+
|
14
|
+
from typing import Dict, List, Any, Optional, Union
|
15
|
+
from datetime import datetime
|
16
|
+
from .curation_api_client import CurationAPIClient, PropertyType
|
17
|
+
|
18
|
+
|
19
|
+
class SourceManager:
|
20
|
+
"""
|
21
|
+
High-level manager for source data integration.
|
22
|
+
|
23
|
+
Provides similar convenience features to the internal AbstractExtractor:
|
24
|
+
- Prefetches data to reduce API calls
|
25
|
+
- Maintains lookup tables for editions, properties, and suggestions
|
26
|
+
- Automatically creates properties from definitions
|
27
|
+
- Handles validation for different property types
|
28
|
+
- Deduplicates suggestions
|
29
|
+
|
30
|
+
This is optional - partners can still use the direct CurationAPIClient
|
31
|
+
for simpler integrations if preferred.
|
32
|
+
"""
|
33
|
+
|
34
|
+
def __init__(self, client: CurationAPIClient, source_identifier: Union[int, str], property_definitions: Optional[List[Dict]] = None):
|
35
|
+
"""
|
36
|
+
Initialize the source manager with all needed data.
|
37
|
+
|
38
|
+
Args:
|
39
|
+
client: API client for backend communication
|
40
|
+
source_identifier: Source ID (int) or technical_name (str)
|
41
|
+
property_definitions: Optional list of property definitions to ensure exist
|
42
|
+
"""
|
43
|
+
self.client = client
|
44
|
+
self.property_definitions = property_definitions or []
|
45
|
+
|
46
|
+
# Step 1: Get source information
|
47
|
+
self.source = self._get_source(source_identifier)
|
48
|
+
self.source_id = self.source['id']
|
49
|
+
|
50
|
+
print(f"🎯 Working with source: {self.source['name']} (ID: {self.source_id})")
|
51
|
+
|
52
|
+
# Step 2: Fetch all current data via API
|
53
|
+
self._fetch_all_data()
|
54
|
+
|
55
|
+
# Step 3: Build lookup dictionaries
|
56
|
+
self._build_lookups()
|
57
|
+
|
58
|
+
# Step 4: Ensure properties exist (if definitions provided)
|
59
|
+
if property_definitions:
|
60
|
+
self._ensure_properties_exist()
|
61
|
+
|
62
|
+
def _get_source(self, source_identifier: Union[int, str]) -> Dict:
|
63
|
+
"""Get source by ID or technical name."""
|
64
|
+
if isinstance(source_identifier, int) or (isinstance(source_identifier, str) and source_identifier.isdigit()):
|
65
|
+
source_id = int(source_identifier)
|
66
|
+
return self.client.get_source(source_id)
|
67
|
+
else:
|
68
|
+
# Get all sources and filter by technical_name
|
69
|
+
response = self.client.get_sources()
|
70
|
+
source = next((s for s in response if s.get('technical_name') == source_identifier), None)
|
71
|
+
if not source:
|
72
|
+
raise ValueError(f"Source '{source_identifier}' not found")
|
73
|
+
return source
|
74
|
+
|
75
|
+
def _fetch_all_data(self):
|
76
|
+
"""Fetch all information via API to reduce individual calls later."""
|
77
|
+
print("📡 Fetching all data from API...")
|
78
|
+
|
79
|
+
# Get all source-related data
|
80
|
+
self.editions = self.client.get_source_editions(self.source_id)
|
81
|
+
self.properties = self.client.get_source_properties(self.source_id)
|
82
|
+
self.suggestions = self.client.get_source_suggestions(self.source_id)
|
83
|
+
|
84
|
+
print(f" 📚 {len(self.editions)} editions")
|
85
|
+
print(f" 🏷️ {len(self.properties)} properties")
|
86
|
+
print(f" 💡 {len(self.suggestions)} suggestions")
|
87
|
+
|
88
|
+
def _build_lookups(self):
|
89
|
+
"""Build lookup dictionaries for efficient access."""
|
90
|
+
print("🔍 Building lookup dictionaries...")
|
91
|
+
|
92
|
+
# Editions by internal ID
|
93
|
+
self.editions_by_internal_id = {
|
94
|
+
edition['source_internal_id']: edition
|
95
|
+
for edition in self.editions
|
96
|
+
}
|
97
|
+
|
98
|
+
# Properties by technical name
|
99
|
+
self.properties_by_tech_name = {
|
100
|
+
prop['technical_name']: prop
|
101
|
+
for prop in self.properties
|
102
|
+
}
|
103
|
+
|
104
|
+
# Suggestions by edition and property
|
105
|
+
self.suggestions_lookup = {}
|
106
|
+
for suggestion in self.suggestions:
|
107
|
+
key = (suggestion['edition_id'], suggestion['property_id'])
|
108
|
+
if key not in self.suggestions_lookup:
|
109
|
+
self.suggestions_lookup[key] = []
|
110
|
+
self.suggestions_lookup[key].append(suggestion)
|
111
|
+
|
112
|
+
def _ensure_properties_exist(self):
|
113
|
+
"""Create properties if they don't exist yet."""
|
114
|
+
print("🏷️ Ensuring properties exist...")
|
115
|
+
|
116
|
+
created_count = 0
|
117
|
+
for prop_def in self.property_definitions:
|
118
|
+
tech_name = prop_def['technical_name']
|
119
|
+
|
120
|
+
if tech_name not in self.properties_by_tech_name:
|
121
|
+
# Create the property
|
122
|
+
property_data = {
|
123
|
+
'technical_name': tech_name,
|
124
|
+
'name': prop_def['name'],
|
125
|
+
'type': prop_def['type'],
|
126
|
+
'source_id': self.source_id,
|
127
|
+
'property_options': []
|
128
|
+
}
|
129
|
+
|
130
|
+
# Add options for controlled vocabulary and binary
|
131
|
+
if prop_def['type'] == PropertyType.CONTROLLED_VOCABULARY and 'options' in prop_def:
|
132
|
+
property_data['property_options'] = [
|
133
|
+
{'name': option} for option in prop_def['options']
|
134
|
+
]
|
135
|
+
elif prop_def['type'] == PropertyType.BINARY:
|
136
|
+
property_data['property_options'] = [
|
137
|
+
{'name': '0'}, {'name': '1'}
|
138
|
+
]
|
139
|
+
|
140
|
+
# Create via API
|
141
|
+
created_property = self.client.create_property(property_data)
|
142
|
+
self.properties_by_tech_name[tech_name] = created_property
|
143
|
+
self.properties.append(created_property)
|
144
|
+
created_count += 1
|
145
|
+
|
146
|
+
print(f" ➕ Created: {prop_def['name']} ({prop_def['type']})")
|
147
|
+
|
148
|
+
if created_count == 0:
|
149
|
+
print(f" ✅ All {len(self.property_definitions)} properties already exist")
|
150
|
+
else:
|
151
|
+
print(f" ✅ Created {created_count} new properties")
|
152
|
+
|
153
|
+
def get_or_create_edition(self, internal_id: str) -> Dict:
|
154
|
+
"""Get existing edition or create new one."""
|
155
|
+
if internal_id in self.editions_by_internal_id:
|
156
|
+
return self.editions_by_internal_id[internal_id]
|
157
|
+
|
158
|
+
# Create new edition
|
159
|
+
edition_data = {
|
160
|
+
'source_id': self.source_id,
|
161
|
+
'source_internal_id': internal_id,
|
162
|
+
'mapped_from_ids': []
|
163
|
+
}
|
164
|
+
|
165
|
+
edition = self.client.create_edition(edition_data)
|
166
|
+
self.editions_by_internal_id[internal_id] = edition
|
167
|
+
self.editions.append(edition)
|
168
|
+
|
169
|
+
print(f" ➕ Created edition: {internal_id}")
|
170
|
+
return edition
|
171
|
+
|
172
|
+
def create_suggestion(self, edition_id: int, property_name: str, value: Any) -> Optional[Dict]:
|
173
|
+
"""
|
174
|
+
Create a single property suggestion with validation and deduplication.
|
175
|
+
|
176
|
+
Args:
|
177
|
+
edition_id: ID of the edition
|
178
|
+
property_name: Technical name of the property
|
179
|
+
value: Value to suggest (will be validated based on property type)
|
180
|
+
|
181
|
+
Returns:
|
182
|
+
Created suggestion or None if invalid/skipped
|
183
|
+
"""
|
184
|
+
# Skip empty or None values
|
185
|
+
if value is None or value == "":
|
186
|
+
return None
|
187
|
+
|
188
|
+
# Find the property object
|
189
|
+
property_obj = self.properties_by_tech_name.get(property_name)
|
190
|
+
if not property_obj:
|
191
|
+
print(f" ⚠️ Property '{property_name}' not found")
|
192
|
+
return None
|
193
|
+
|
194
|
+
property_id = property_obj['id']
|
195
|
+
property_type = property_obj['type']
|
196
|
+
suggestion_key = (edition_id, property_id)
|
197
|
+
|
198
|
+
# Check if suggestion with same value already exists
|
199
|
+
if self._suggestion_exists(suggestion_key, value, property_obj):
|
200
|
+
print(f" ⏭️ Skipping duplicate suggestion: {property_name} = '{value}'")
|
201
|
+
return None
|
202
|
+
|
203
|
+
# Prepare suggestion data based on property type
|
204
|
+
suggestion_data = {
|
205
|
+
'edition_id': edition_id,
|
206
|
+
'property_id': property_id,
|
207
|
+
'source_id': self.source_id,
|
208
|
+
}
|
209
|
+
|
210
|
+
if property_type in [PropertyType.CONTROLLED_VOCABULARY, PropertyType.BINARY]:
|
211
|
+
# Find matching option
|
212
|
+
property_options = property_obj.get('property_options', [])
|
213
|
+
str_value = str(value).strip()
|
214
|
+
|
215
|
+
if property_type == PropertyType.BINARY:
|
216
|
+
# Normalize binary values (1/0, true/false, yes/no)
|
217
|
+
if str_value.lower() in ['1', 'true', 'yes', 'y']:
|
218
|
+
option_name = '1'
|
219
|
+
elif str_value.lower() in ['0', 'false', 'no', 'n']:
|
220
|
+
option_name = '0'
|
221
|
+
else:
|
222
|
+
print(f" ⚠️ Invalid binary value: '{value}'")
|
223
|
+
return None
|
224
|
+
else:
|
225
|
+
option_name = str_value
|
226
|
+
|
227
|
+
# Find matching option
|
228
|
+
matching_option = next(
|
229
|
+
(opt for opt in property_options if opt['name'].lower() == option_name.lower()),
|
230
|
+
None
|
231
|
+
)
|
232
|
+
|
233
|
+
if not matching_option:
|
234
|
+
print(f" ⚠️ No matching option for '{str_value}'")
|
235
|
+
return None
|
236
|
+
|
237
|
+
suggestion_data['property_option_id'] = matching_option['id']
|
238
|
+
|
239
|
+
elif property_type in [PropertyType.FREE_TEXT, PropertyType.NUMERICAL]:
|
240
|
+
# For numerical values, validate it's a number
|
241
|
+
if property_type == PropertyType.NUMERICAL:
|
242
|
+
try:
|
243
|
+
float(str(value)) # Check if it's a valid number
|
244
|
+
except ValueError:
|
245
|
+
print(f" ⚠️ Invalid numerical value: '{value}'")
|
246
|
+
return None
|
247
|
+
|
248
|
+
# Use custom value for free text and numerical properties
|
249
|
+
suggestion_data['custom_value'] = str(value)
|
250
|
+
|
251
|
+
else:
|
252
|
+
print(f" ⚠️ Unknown property type: {property_type}")
|
253
|
+
return None
|
254
|
+
|
255
|
+
# Create the suggestion
|
256
|
+
try:
|
257
|
+
suggestion = self.client.create_suggestion(suggestion_data)
|
258
|
+
|
259
|
+
# Add to our lookup for future reference
|
260
|
+
if suggestion_key not in self.suggestions_lookup:
|
261
|
+
self.suggestions_lookup[suggestion_key] = []
|
262
|
+
self.suggestions_lookup[suggestion_key].append(suggestion)
|
263
|
+
self.suggestions.append(suggestion)
|
264
|
+
|
265
|
+
print(f" 💡 Created suggestion: {property_name} = '{value}'")
|
266
|
+
return suggestion
|
267
|
+
|
268
|
+
except Exception as e:
|
269
|
+
print(f" ⚠️ Failed to create suggestion: {e}")
|
270
|
+
return None
|
271
|
+
|
272
|
+
def _suggestion_exists(self, suggestion_key: tuple, new_value: Any, property_obj: Dict) -> bool:
|
273
|
+
"""Check if a suggestion with the same value already exists."""
|
274
|
+
existing_suggestions = self.suggestions_lookup.get(suggestion_key, [])
|
275
|
+
if not existing_suggestions:
|
276
|
+
return False
|
277
|
+
|
278
|
+
property_type = property_obj.get('type')
|
279
|
+
str_value = str(new_value).strip()
|
280
|
+
|
281
|
+
for suggestion in existing_suggestions:
|
282
|
+
if property_type in [PropertyType.CONTROLLED_VOCABULARY, PropertyType.BINARY]:
|
283
|
+
# Get the option that matches new_value
|
284
|
+
if property_type == PropertyType.BINARY:
|
285
|
+
# Normalize binary values
|
286
|
+
if str_value.lower() in ['1', 'true', 'yes', 'y']:
|
287
|
+
normalized_value = '1'
|
288
|
+
elif str_value.lower() in ['0', 'false', 'no', 'n']:
|
289
|
+
normalized_value = '0'
|
290
|
+
else:
|
291
|
+
continue # Invalid binary value
|
292
|
+
else:
|
293
|
+
normalized_value = str_value
|
294
|
+
|
295
|
+
# Check if option ID matches
|
296
|
+
property_options = property_obj.get('property_options', [])
|
297
|
+
for option in property_options:
|
298
|
+
if option['name'].lower() == normalized_value.lower():
|
299
|
+
if suggestion.get('property_option_id') == option['id']:
|
300
|
+
return True
|
301
|
+
|
302
|
+
elif property_type in [PropertyType.FREE_TEXT, PropertyType.NUMERICAL]:
|
303
|
+
# For free text and numerical, compare custom_value
|
304
|
+
if suggestion.get('custom_value', '').strip() == str_value:
|
305
|
+
return True
|
306
|
+
|
307
|
+
return False
|
308
|
+
|
309
|
+
def create_suggestions_batch(self, edition_id: int, data: Dict[str, Any]) -> Dict:
|
310
|
+
"""
|
311
|
+
Create multiple suggestions in a batch.
|
312
|
+
|
313
|
+
Args:
|
314
|
+
edition_id: ID of the edition
|
315
|
+
data: Dictionary mapping property technical names to values
|
316
|
+
|
317
|
+
Returns:
|
318
|
+
Dictionary with counts of created and skipped suggestions
|
319
|
+
"""
|
320
|
+
created_count = 0
|
321
|
+
skipped_count = 0
|
322
|
+
|
323
|
+
for property_name, value in data.items():
|
324
|
+
# Handle both single values and lists of values
|
325
|
+
values_to_process = value if isinstance(value, list) else [value]
|
326
|
+
|
327
|
+
for individual_value in values_to_process:
|
328
|
+
# Skip empty or None list items
|
329
|
+
if individual_value is None or individual_value == "":
|
330
|
+
continue
|
331
|
+
|
332
|
+
suggestion = self.create_suggestion(edition_id, property_name, individual_value)
|
333
|
+
if suggestion:
|
334
|
+
created_count += 1
|
335
|
+
else:
|
336
|
+
skipped_count += 1
|
337
|
+
|
338
|
+
if created_count > 0 or skipped_count > 0:
|
339
|
+
print(f" ✅ Suggestions: {created_count} created, {skipped_count} skipped")
|
340
|
+
|
341
|
+
return {
|
342
|
+
'created': created_count,
|
343
|
+
'skipped': skipped_count
|
344
|
+
}
|
345
|
+
|
346
|
+
def finish_ingestion(self):
|
347
|
+
"""Mark ingestion complete by updating the timestamp."""
|
348
|
+
try:
|
349
|
+
update_data = {
|
350
|
+
'last_ingestion_at': datetime.now().isoformat()
|
351
|
+
}
|
352
|
+
updated_source = self.client.update_source(self.source_id, update_data)
|
353
|
+
print(f"📅 Updated last ingestion timestamp for: {self.source['name']}")
|
354
|
+
return updated_source
|
355
|
+
except Exception as e:
|
356
|
+
print(f"⚠️ Failed to update last ingestion timestamp: {e}")
|
357
|
+
return None
|
358
|
+
|
359
|
+
|
360
|
+
class PropertyBuilder:
|
361
|
+
"""Helper class to build property definitions with proper validation."""
|
362
|
+
|
363
|
+
@staticmethod
|
364
|
+
def free_text(technical_name: str, display_name: str, description: str = "") -> Dict:
|
365
|
+
"""Create a free text property definition."""
|
366
|
+
return {
|
367
|
+
'technical_name': technical_name,
|
368
|
+
'name': display_name,
|
369
|
+
'description': description,
|
370
|
+
'type': PropertyType.FREE_TEXT
|
371
|
+
}
|
372
|
+
|
373
|
+
@staticmethod
|
374
|
+
def controlled_vocabulary(technical_name: str, display_name: str, options: List[str], description: str = "") -> Dict:
|
375
|
+
"""Create a controlled vocabulary property definition."""
|
376
|
+
return {
|
377
|
+
'technical_name': technical_name,
|
378
|
+
'name': display_name,
|
379
|
+
'description': description,
|
380
|
+
'type': PropertyType.CONTROLLED_VOCABULARY,
|
381
|
+
'options': options
|
382
|
+
}
|
383
|
+
|
384
|
+
@staticmethod
|
385
|
+
def binary(technical_name: str, display_name: str, description: str = "") -> Dict:
|
386
|
+
"""Create a binary property definition."""
|
387
|
+
return {
|
388
|
+
'technical_name': technical_name,
|
389
|
+
'name': display_name,
|
390
|
+
'description': description,
|
391
|
+
'type': PropertyType.BINARY
|
392
|
+
}
|
393
|
+
|
394
|
+
@staticmethod
|
395
|
+
def numerical(technical_name: str, display_name: str, description: str = "") -> Dict:
|
396
|
+
"""Create a numerical property definition."""
|
397
|
+
return {
|
398
|
+
'technical_name': technical_name,
|
399
|
+
'name': display_name,
|
400
|
+
'description': description,
|
401
|
+
'type': PropertyType.NUMERICAL
|
402
|
+
}
|
403
|
+
|
404
|
+
|
405
|
+
class SourceBuilder:
|
406
|
+
"""Helper class to create a new source."""
|
407
|
+
|
408
|
+
@staticmethod
|
409
|
+
def create(client: CurationAPIClient, name: str, description: str, technical_name: str = None) -> Dict:
|
410
|
+
"""
|
411
|
+
Create a new source with the given parameters.
|
412
|
+
|
413
|
+
Args:
|
414
|
+
client: The API client to use
|
415
|
+
name: Display name for the source
|
416
|
+
description: Description of the source
|
417
|
+
technical_name: Optional technical name (slug)
|
418
|
+
|
419
|
+
Returns:
|
420
|
+
The created source
|
421
|
+
"""
|
422
|
+
source_data = {
|
423
|
+
'name': name,
|
424
|
+
'description': description
|
425
|
+
}
|
426
|
+
|
427
|
+
if technical_name:
|
428
|
+
source_data['technical_name'] = technical_name
|
429
|
+
|
430
|
+
return client.create_source(source_data)
|
431
|
+
|
432
|
+
|
433
|
+
class EditionBuilder:
|
434
|
+
"""Helper class to create editions."""
|
435
|
+
|
436
|
+
@staticmethod
|
437
|
+
def create(client: CurationAPIClient, source_id: int, internal_id: str) -> Dict:
|
438
|
+
"""
|
439
|
+
Create a new edition for a source.
|
440
|
+
|
441
|
+
Args:
|
442
|
+
client: The API client to use
|
443
|
+
source_id: ID of the source
|
444
|
+
internal_id: Internal ID/identifier for this edition
|
445
|
+
|
446
|
+
Returns:
|
447
|
+
The created edition
|
448
|
+
"""
|
449
|
+
edition_data = {
|
450
|
+
'source_id': source_id,
|
451
|
+
'source_internal_id': internal_id,
|
452
|
+
'mapped_from_ids': []
|
453
|
+
}
|
454
|
+
|
455
|
+
return client.create_edition(edition_data)
|
@@ -0,0 +1,239 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: metadata-curation-client
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: API client for metadata curation platforms
|
5
|
+
Author: Digital Edition Curation Team
|
6
|
+
License: MIT
|
7
|
+
Project-URL: Homepage, https://github.com/yourusername/digital-edition-curation
|
8
|
+
Project-URL: Repository, https://github.com/yourusername/digital-edition-curation
|
9
|
+
Project-URL: Issues, https://github.com/yourusername/digital-edition-curation/issues
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
11
|
+
Classifier: Intended Audience :: Developers
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3.8
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
19
|
+
Requires-Python: >=3.8
|
20
|
+
Description-Content-Type: text/markdown
|
21
|
+
License-File: LICENSE
|
22
|
+
Requires-Dist: requests>=2.28.0
|
23
|
+
Provides-Extra: dev
|
24
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
25
|
+
Requires-Dist: black; extra == "dev"
|
26
|
+
Requires-Dist: isort; extra == "dev"
|
27
|
+
Requires-Dist: mypy; extra == "dev"
|
28
|
+
Dynamic: license-file
|
29
|
+
|
30
|
+
# Metadata Curation Client
|
31
|
+
|
32
|
+
API client for external partners to integrate with metadata curation platforms.
|
33
|
+
|
34
|
+
## Installation
|
35
|
+
|
36
|
+
```bash
|
37
|
+
pip install metadata-curation-client
|
38
|
+
```
|
39
|
+
|
40
|
+
## Basic Usage
|
41
|
+
|
42
|
+
```python
|
43
|
+
from metadata_curation_client import CurationAPIClient, PropertyType
|
44
|
+
|
45
|
+
# Initialize client
|
46
|
+
client = CurationAPIClient("http://localhost:8000")
|
47
|
+
|
48
|
+
# Create source
|
49
|
+
source = client.create_source({
|
50
|
+
"name": "My Archive",
|
51
|
+
"description": "Digital editions from our collection"
|
52
|
+
})
|
53
|
+
|
54
|
+
# Create controlled vocabulary property
|
55
|
+
language_prop = client.create_property({
|
56
|
+
"technical_name": "language",
|
57
|
+
"name": "Language",
|
58
|
+
"type": PropertyType.CONTROLLED_VOCABULARY,
|
59
|
+
"source_id": source["id"],
|
60
|
+
"property_options": [{"name": "English"}, {"name": "German"}]
|
61
|
+
})
|
62
|
+
|
63
|
+
# Create free text property
|
64
|
+
description_prop = client.create_property({
|
65
|
+
"technical_name": "description",
|
66
|
+
"name": "Description",
|
67
|
+
"type": PropertyType.FREE_TEXT,
|
68
|
+
"source_id": source["id"]
|
69
|
+
})
|
70
|
+
|
71
|
+
# Create edition
|
72
|
+
edition = client.create_edition({
|
73
|
+
"source_id": source["id"],
|
74
|
+
"source_internal_id": "my_001"
|
75
|
+
})
|
76
|
+
|
77
|
+
# Create properties for each type
|
78
|
+
genre_prop = client.create_property({
|
79
|
+
"technical_name": "genre",
|
80
|
+
"name": "Genre",
|
81
|
+
"type": PropertyType.CONTROLLED_VOCABULARY,
|
82
|
+
"source_id": source["id"],
|
83
|
+
"property_options": [
|
84
|
+
{"name": "Poetry"}, {"name": "Prose"}, {"name": "Drama"}
|
85
|
+
]
|
86
|
+
})
|
87
|
+
|
88
|
+
has_annotations_prop = client.create_property({
|
89
|
+
"technical_name": "has_annotations",
|
90
|
+
"name": "Has Annotations",
|
91
|
+
"type": PropertyType.BINARY,
|
92
|
+
"source_id": source["id"]
|
93
|
+
})
|
94
|
+
|
95
|
+
year_prop = client.create_property({
|
96
|
+
"technical_name": "publication_year",
|
97
|
+
"name": "Publication Year",
|
98
|
+
"type": PropertyType.NUMERICAL,
|
99
|
+
"source_id": source["id"]
|
100
|
+
})
|
101
|
+
|
102
|
+
description_prop = client.create_property({
|
103
|
+
"technical_name": "description",
|
104
|
+
"name": "Description",
|
105
|
+
"type": PropertyType.FREE_TEXT,
|
106
|
+
"source_id": source["id"]
|
107
|
+
})
|
108
|
+
|
109
|
+
# Example 1: CONTROLLED_VOCABULARY suggestion
|
110
|
+
# First get the property option ID
|
111
|
+
properties = client.get_properties()
|
112
|
+
genre_prop = next(p for p in properties if p["technical_name"] == "genre")
|
113
|
+
poetry_option = next(opt for opt in genre_prop["property_options"] if opt["name"] == "Poetry")
|
114
|
+
|
115
|
+
client.create_suggestion({
|
116
|
+
"source_id": source["id"],
|
117
|
+
"edition_id": edition["id"],
|
118
|
+
"property_id": genre_prop["id"],
|
119
|
+
"property_option_id": poetry_option["id"]
|
120
|
+
})
|
121
|
+
|
122
|
+
# Example 2: BINARY suggestion (uses property_option_id)
|
123
|
+
# Binary properties always have options with ID 1 (true/1) and ID 2 (false/0)
|
124
|
+
# Get the "true" option (usually ID 1)
|
125
|
+
binary_props = client.get_properties()
|
126
|
+
has_annotations_prop = next(p for p in binary_props if p["technical_name"] == "has_annotations")
|
127
|
+
true_option = next(opt for opt in has_annotations_prop["property_options"] if opt["name"] == "1")
|
128
|
+
|
129
|
+
client.create_suggestion({
|
130
|
+
"source_id": source["id"],
|
131
|
+
"edition_id": edition["id"],
|
132
|
+
"property_id": has_annotations_prop["id"],
|
133
|
+
"property_option_id": true_option["id"] # For "yes"/"true" value
|
134
|
+
})
|
135
|
+
|
136
|
+
# Example 3: NUMERICAL suggestion (uses custom_value)
|
137
|
+
client.create_suggestion({
|
138
|
+
"source_id": source["id"],
|
139
|
+
"edition_id": edition["id"],
|
140
|
+
"property_id": year_prop["id"],
|
141
|
+
"custom_value": "2025" # Note: numerical values are sent as strings
|
142
|
+
})
|
143
|
+
|
144
|
+
# Example 4: FREE_TEXT suggestion (uses custom_value)
|
145
|
+
client.create_suggestion({
|
146
|
+
"source_id": source["id"],
|
147
|
+
"edition_id": edition["id"],
|
148
|
+
"property_id": description_prop["id"],
|
149
|
+
"custom_value": "This is a detailed description of the edition."
|
150
|
+
})
|
151
|
+
|
152
|
+
# Mark ingestion complete
|
153
|
+
client.mark_ingestion_complete(source["id"])
|
154
|
+
```
|
155
|
+
|
156
|
+
## Property Types
|
157
|
+
|
158
|
+
- `PropertyType.CONTROLLED_VOCABULARY` - Predefined options
|
159
|
+
- `PropertyType.FREE_TEXT` - Free text
|
160
|
+
- `PropertyType.BINARY` - True/false values
|
161
|
+
- `PropertyType.NUMERICAL` - Numeric values
|
162
|
+
|
163
|
+
## API Reference
|
164
|
+
|
165
|
+
See the docstrings in `curation_api_client.py` for detailed method documentation.
|
166
|
+
|
167
|
+
## Enhanced Integration with SourceManager
|
168
|
+
|
169
|
+
For more sophisticated integrations, we also provide a higher-level abstraction in `source_manager.py` that mirrors some of the conveniences of our internal extractors:
|
170
|
+
|
171
|
+
```python
|
172
|
+
from metadata_curation_client import CurationAPIClient, PropertyType, SourceManager, PropertyBuilder
|
173
|
+
|
174
|
+
# Initialize client and create source
|
175
|
+
client = CurationAPIClient("http://localhost:8000")
|
176
|
+
source = client.get_source_by_technical_name("my_data_source")
|
177
|
+
if not source:
|
178
|
+
source = client.create_source({
|
179
|
+
"name": "My Data Source",
|
180
|
+
"description": "My collection of digital editions",
|
181
|
+
"technical_name": "my_data_source"
|
182
|
+
})
|
183
|
+
|
184
|
+
# Define properties using helper builders
|
185
|
+
property_definitions = [
|
186
|
+
PropertyBuilder.controlled_vocabulary(
|
187
|
+
"example_genre", "Genre", ["Poetry", "Prose", "Drama"]
|
188
|
+
),
|
189
|
+
PropertyBuilder.binary(
|
190
|
+
"example_has_annotations", "Has Annotations"
|
191
|
+
),
|
192
|
+
PropertyBuilder.numerical(
|
193
|
+
"example_year", "Publication Year"
|
194
|
+
)
|
195
|
+
]
|
196
|
+
|
197
|
+
# Initialize the source manager - this will:
|
198
|
+
# - Fetch all existing data
|
199
|
+
# - Build lookup tables
|
200
|
+
# - Create any missing properties
|
201
|
+
manager = SourceManager(client, source['id'], property_definitions)
|
202
|
+
|
203
|
+
# Efficiently get or create edition using lookup tables
|
204
|
+
edition = manager.get_or_create_edition("book_001")
|
205
|
+
|
206
|
+
# Create suggestions in a batch with validation and deduplication
|
207
|
+
manager.create_suggestions_batch(
|
208
|
+
edition["id"],
|
209
|
+
{
|
210
|
+
"example_genre": "Poetry",
|
211
|
+
"example_has_annotations": True,
|
212
|
+
"example_year": 2022
|
213
|
+
}
|
214
|
+
)
|
215
|
+
|
216
|
+
# Mark ingestion complete (updates timestamp)
|
217
|
+
manager.finish_ingestion()
|
218
|
+
```
|
219
|
+
|
220
|
+
### Benefits of the SourceManager
|
221
|
+
|
222
|
+
The `SourceManager` provides several advantages for more complex integrations:
|
223
|
+
|
224
|
+
1. **Reduced API Calls**: Prefetches data to minimize API requests
|
225
|
+
2. **Lookup Tables**: Maintains efficient in-memory lookups for editions, properties, and suggestions
|
226
|
+
3. **Automatic Property Creation**: Creates properties from definitions as needed
|
227
|
+
4. **Validation**: Automatically validates values based on property types
|
228
|
+
5. **Deduplication**: Avoids creating duplicate suggestions
|
229
|
+
6. **Builder Helpers**: Provides convenient builder classes for creating properties and sources
|
230
|
+
7. **Timestamp Management**: Automatically updates the last ingestion timestamp
|
231
|
+
|
232
|
+
For a complete example, see `example_with_source_manager.py`.
|
233
|
+
|
234
|
+
### Choosing the Right Approach
|
235
|
+
|
236
|
+
- **Basic API Client**: For simple integrations or when you need complete control over the process
|
237
|
+
- **SourceManager**: For more complex integrations where efficiency and convenience are priorities
|
238
|
+
|
239
|
+
Both approaches use the same underlying API endpoints and data models, so you can choose the one that best fits your needs or even mix them as required.
|
@@ -0,0 +1,8 @@
|
|
1
|
+
metadata_curation_client/__init__.py,sha256=VWJY3OsCDQEZ5BbePugl4J8E2etciuHyTuDvTccaqog,360
|
2
|
+
metadata_curation_client/curation_api_client.py,sha256=WMfIIKO01b4EZ0wCyO9Opn_FrcZSKJLquHEMsVWQWu0,7459
|
3
|
+
metadata_curation_client/source_manager.py,sha256=hkrEU1JT4Hkwy2fUbheb7pP_1KfTXbcU8sjiLYc0i8Q,17940
|
4
|
+
metadata_curation_client-0.1.0.dist-info/licenses/LICENSE,sha256=dvKFLHmy95RWWhFDqmOn38Yjfv_w-Hxc5EmQgQ9iCC8,1086
|
5
|
+
metadata_curation_client-0.1.0.dist-info/METADATA,sha256=YzREY-ik0GaW44OLOTAnZ0dJvLJXxiAtFzgTflyup10,7890
|
6
|
+
metadata_curation_client-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
7
|
+
metadata_curation_client-0.1.0.dist-info/top_level.txt,sha256=FUkmJY-66mVLu-RvgCXwPn26F6Jkxmve9Stw8kCsr0w,25
|
8
|
+
metadata_curation_client-0.1.0.dist-info/RECORD,,
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Digital Edition Curation Team
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1 @@
|
|
1
|
+
metadata_curation_client
|