metadata-curation-client 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ """
2
+ Metadata Curation Client
3
+
4
+ A lightweight API client for external partners to integrate with metadata curation platforms.
5
+ """
6
+
7
+ from .curation_api_client import CurationAPIClient, PropertyType
8
+ from .source_manager import SourceManager, PropertyBuilder
9
+
10
+ __version__ = "0.1.0"
11
+ __all__ = ["CurationAPIClient", "PropertyType", "SourceManager", "PropertyBuilder"]
@@ -0,0 +1,182 @@
1
+ """
2
+ Metadata Curation Client - API Client
3
+
4
+ Lightweight API client for external partners to integrate with metadata curation platforms.
5
+ Based on the actual models and AbstractExtractor patterns.
6
+ """
7
+
8
+ import requests
9
+ from typing import Dict, List, Optional, Any
10
+ from datetime import datetime
11
+
12
+
13
+ class CurationAPIClient:
14
+ """
15
+ API client for external data integration.
16
+ Mirrors the internal ExtractionAPIClient for consistency.
17
+ """
18
+
19
+ def __init__(self, base_url: str, api_key: Optional[str] = None):
20
+ self.base_url = base_url.rstrip('/')
21
+ self.session = requests.Session()
22
+ self.session.headers.update({'Content-Type': 'application/json'})
23
+
24
+ if api_key:
25
+ self.session.headers.update({'Authorization': f'Bearer {api_key}'})
26
+
27
+ def _handle_response(self, response: requests.Response) -> Dict:
28
+ """Handle API response and raise appropriate exceptions."""
29
+ try:
30
+ response.raise_for_status()
31
+ return response.json()
32
+ except requests.exceptions.HTTPError as e:
33
+ print(f"API Error {response.status_code}: {response.text}")
34
+ raise e
35
+ except requests.exceptions.RequestException as e:
36
+ print(f"Request Error: {e}")
37
+ raise e
38
+
39
+ # Source endpoints
40
+ def create_source(self, source_data: Dict) -> Dict:
41
+ """Create a new source."""
42
+ response = self.session.post(f"{self.base_url}/sources/", json=source_data)
43
+ return self._handle_response(response)
44
+
45
+ def get_source(self, source_id: int) -> Dict:
46
+ """Get source by ID."""
47
+ response = self.session.get(f"{self.base_url}/sources/{source_id}")
48
+ return self._handle_response(response)
49
+
50
+ def get_sources(self) -> List[Dict]:
51
+ """Get all sources."""
52
+ response = self.session.get(f"{self.base_url}/sources/")
53
+ return self._handle_response(response)
54
+
55
+ def get_source_by_technical_name(self, technical_name: str) -> Optional[Dict]:
56
+ """Get source by technical name."""
57
+ sources = self.get_sources()
58
+ return next((s for s in sources if s.get('technical_name') == technical_name), None)
59
+
60
+ def get_source_editions(self, source_id: int, include_relationships: bool = False) -> List[Dict]:
61
+ """Get all editions for a source."""
62
+ params = {"include_relationships": include_relationships} if include_relationships else {}
63
+ response = self.session.get(f"{self.base_url}/sources/{source_id}/editions", params=params)
64
+ return self._handle_response(response)
65
+
66
+ def get_source_properties(self, source_id: int, include_relationships: bool = False) -> List[Dict]:
67
+ """Get all properties for a source."""
68
+ params = {"include_relationships": include_relationships} if include_relationships else {}
69
+ response = self.session.get(f"{self.base_url}/sources/{source_id}/properties", params=params)
70
+ return self._handle_response(response)
71
+
72
+ def get_source_suggestions(self, source_id: int, include_relationships: bool = False) -> List[Dict]:
73
+ """Get all suggestions for a source."""
74
+ params = {"include_relationships": include_relationships} if include_relationships else {}
75
+ response = self.session.get(f"{self.base_url}/sources/{source_id}/suggestions", params=params)
76
+ return self._handle_response(response)
77
+
78
+ def update_source(self, source_id: int, source_data: Dict) -> Dict:
79
+ """Update an existing source."""
80
+ response = self.session.put(f"{self.base_url}/sources/{source_id}", json=source_data)
81
+ return self._handle_response(response)
82
+
83
+ def mark_ingestion_complete(self, source_id: int) -> Dict:
84
+ """Mark ingestion complete by updating last_ingestion_at timestamp."""
85
+ return self.update_source(source_id, {
86
+ "last_ingestion_at": datetime.now().isoformat()
87
+ })
88
+
89
+ # Edition endpoints
90
+ def create_edition(self, edition_data: Dict) -> Dict:
91
+ """
92
+ Create a new edition.
93
+
94
+ Required fields:
95
+ - source_id: ID of the source this edition belongs to
96
+ - source_internal_id: Internal ID/identifier for this edition
97
+
98
+ Optional fields:
99
+ - mapped_from_ids: List of edition IDs this edition is mapped from
100
+ """
101
+ response = self.session.post(f"{self.base_url}/editions/", json=edition_data)
102
+ return self._handle_response(response)
103
+
104
+ def get_editions(self) -> List[Dict]:
105
+ """Get all editions."""
106
+ response = self.session.get(f"{self.base_url}/editions/")
107
+ return self._handle_response(response)
108
+
109
+ def get_edition(self, edition_id: int) -> Dict:
110
+ """Get edition by ID."""
111
+ response = self.session.get(f"{self.base_url}/editions/{edition_id}")
112
+ return self._handle_response(response)
113
+
114
+ # Property endpoints
115
+ def create_property(self, property_data: Dict) -> Dict:
116
+ """Create a new property."""
117
+ response = self.session.post(f"{self.base_url}/properties/", json=property_data)
118
+ return self._handle_response(response)
119
+
120
+ def get_properties(self) -> List[Dict]:
121
+ """Get all properties."""
122
+ response = self.session.get(f"{self.base_url}/properties/")
123
+ return self._handle_response(response)
124
+
125
+ def get_property(self, property_id: int) -> Dict:
126
+ """Get property by ID."""
127
+ response = self.session.get(f"{self.base_url}/properties/{property_id}")
128
+ return self._handle_response(response)
129
+
130
+ # Suggestion endpoints
131
+ def create_suggestion(self, suggestion_data: Dict) -> Dict:
132
+ """
133
+ Create a new suggestion.
134
+
135
+ Required fields:
136
+ - source_id: ID of the source
137
+ - edition_id: ID of the edition
138
+ - property_id: ID of the property
139
+
140
+ For controlled_vocabulary properties:
141
+ - property_option_id: ID of the property option
142
+
143
+ For free_text, numerical, or other properties:
144
+ - custom_value: String value for the property
145
+
146
+ Note: Either property_option_id OR custom_value must be provided,
147
+ depending on the property type.
148
+ """
149
+ response = self.session.post(f"{self.base_url}/suggestions/", json=suggestion_data)
150
+ return self._handle_response(response)
151
+
152
+ def get_suggestions(self) -> List[Dict]:
153
+ """Get all suggestions."""
154
+ response = self.session.get(f"{self.base_url}/suggestions/")
155
+ return self._handle_response(response)
156
+
157
+
158
+ # Property type constants (matching models.py)
159
+ class PropertyType:
160
+ """
161
+ Property type constants for creating properties.
162
+
163
+ CONTROLLED_VOCABULARY: Property with predefined options
164
+ - Requires property_options list when creating
165
+ - Suggestions require property_option_id
166
+
167
+ FREE_TEXT: Property with open text values
168
+ - No property_options needed
169
+ - Suggestions require custom_value (string)
170
+
171
+ BINARY: Boolean/yes-no property
172
+ - Automatically creates "1" and "0" options
173
+ - Suggestions require property_option_id (use option with name "1" for true)
174
+
175
+ NUMERICAL: Numeric property
176
+ - No property_options needed
177
+ - Suggestions require custom_value (numeric value as string)
178
+ """
179
+ CONTROLLED_VOCABULARY = "controlled_vocabulary"
180
+ FREE_TEXT = "free_text"
181
+ BINARY = "binary"
182
+ NUMERICAL = "numerical"
@@ -0,0 +1,455 @@
1
+ """
2
+ Metadata Curation Client - Source Manager
3
+
4
+ Enhanced abstractions for the metadata curation API client, inspired by the internal AbstractExtractor.
5
+
6
+ This provides higher-level functionality for external integrators who prefer a more
7
+ streamlined approach with features like:
8
+ - Pre-fetching data to reduce API calls
9
+ - Lookup tables for efficient access
10
+ - Automatic property creation and validation
11
+ - Streamlined suggestion creation
12
+ """
13
+
14
+ from typing import Dict, List, Any, Optional, Union
15
+ from datetime import datetime
16
+ from .curation_api_client import CurationAPIClient, PropertyType
17
+
18
+
19
+ class SourceManager:
20
+ """
21
+ High-level manager for source data integration.
22
+
23
+ Provides similar convenience features to the internal AbstractExtractor:
24
+ - Prefetches data to reduce API calls
25
+ - Maintains lookup tables for editions, properties, and suggestions
26
+ - Automatically creates properties from definitions
27
+ - Handles validation for different property types
28
+ - Deduplicates suggestions
29
+
30
+ This is optional - partners can still use the direct CurationAPIClient
31
+ for simpler integrations if preferred.
32
+ """
33
+
34
+ def __init__(self, client: CurationAPIClient, source_identifier: Union[int, str], property_definitions: Optional[List[Dict]] = None):
35
+ """
36
+ Initialize the source manager with all needed data.
37
+
38
+ Args:
39
+ client: API client for backend communication
40
+ source_identifier: Source ID (int) or technical_name (str)
41
+ property_definitions: Optional list of property definitions to ensure exist
42
+ """
43
+ self.client = client
44
+ self.property_definitions = property_definitions or []
45
+
46
+ # Step 1: Get source information
47
+ self.source = self._get_source(source_identifier)
48
+ self.source_id = self.source['id']
49
+
50
+ print(f"🎯 Working with source: {self.source['name']} (ID: {self.source_id})")
51
+
52
+ # Step 2: Fetch all current data via API
53
+ self._fetch_all_data()
54
+
55
+ # Step 3: Build lookup dictionaries
56
+ self._build_lookups()
57
+
58
+ # Step 4: Ensure properties exist (if definitions provided)
59
+ if property_definitions:
60
+ self._ensure_properties_exist()
61
+
62
+ def _get_source(self, source_identifier: Union[int, str]) -> Dict:
63
+ """Get source by ID or technical name."""
64
+ if isinstance(source_identifier, int) or (isinstance(source_identifier, str) and source_identifier.isdigit()):
65
+ source_id = int(source_identifier)
66
+ return self.client.get_source(source_id)
67
+ else:
68
+ # Get all sources and filter by technical_name
69
+ response = self.client.get_sources()
70
+ source = next((s for s in response if s.get('technical_name') == source_identifier), None)
71
+ if not source:
72
+ raise ValueError(f"Source '{source_identifier}' not found")
73
+ return source
74
+
75
+ def _fetch_all_data(self):
76
+ """Fetch all information via API to reduce individual calls later."""
77
+ print("📡 Fetching all data from API...")
78
+
79
+ # Get all source-related data
80
+ self.editions = self.client.get_source_editions(self.source_id)
81
+ self.properties = self.client.get_source_properties(self.source_id)
82
+ self.suggestions = self.client.get_source_suggestions(self.source_id)
83
+
84
+ print(f" 📚 {len(self.editions)} editions")
85
+ print(f" 🏷️ {len(self.properties)} properties")
86
+ print(f" 💡 {len(self.suggestions)} suggestions")
87
+
88
+ def _build_lookups(self):
89
+ """Build lookup dictionaries for efficient access."""
90
+ print("🔍 Building lookup dictionaries...")
91
+
92
+ # Editions by internal ID
93
+ self.editions_by_internal_id = {
94
+ edition['source_internal_id']: edition
95
+ for edition in self.editions
96
+ }
97
+
98
+ # Properties by technical name
99
+ self.properties_by_tech_name = {
100
+ prop['technical_name']: prop
101
+ for prop in self.properties
102
+ }
103
+
104
+ # Suggestions by edition and property
105
+ self.suggestions_lookup = {}
106
+ for suggestion in self.suggestions:
107
+ key = (suggestion['edition_id'], suggestion['property_id'])
108
+ if key not in self.suggestions_lookup:
109
+ self.suggestions_lookup[key] = []
110
+ self.suggestions_lookup[key].append(suggestion)
111
+
112
+ def _ensure_properties_exist(self):
113
+ """Create properties if they don't exist yet."""
114
+ print("🏷️ Ensuring properties exist...")
115
+
116
+ created_count = 0
117
+ for prop_def in self.property_definitions:
118
+ tech_name = prop_def['technical_name']
119
+
120
+ if tech_name not in self.properties_by_tech_name:
121
+ # Create the property
122
+ property_data = {
123
+ 'technical_name': tech_name,
124
+ 'name': prop_def['name'],
125
+ 'type': prop_def['type'],
126
+ 'source_id': self.source_id,
127
+ 'property_options': []
128
+ }
129
+
130
+ # Add options for controlled vocabulary and binary
131
+ if prop_def['type'] == PropertyType.CONTROLLED_VOCABULARY and 'options' in prop_def:
132
+ property_data['property_options'] = [
133
+ {'name': option} for option in prop_def['options']
134
+ ]
135
+ elif prop_def['type'] == PropertyType.BINARY:
136
+ property_data['property_options'] = [
137
+ {'name': '0'}, {'name': '1'}
138
+ ]
139
+
140
+ # Create via API
141
+ created_property = self.client.create_property(property_data)
142
+ self.properties_by_tech_name[tech_name] = created_property
143
+ self.properties.append(created_property)
144
+ created_count += 1
145
+
146
+ print(f" ➕ Created: {prop_def['name']} ({prop_def['type']})")
147
+
148
+ if created_count == 0:
149
+ print(f" ✅ All {len(self.property_definitions)} properties already exist")
150
+ else:
151
+ print(f" ✅ Created {created_count} new properties")
152
+
153
+ def get_or_create_edition(self, internal_id: str) -> Dict:
154
+ """Get existing edition or create new one."""
155
+ if internal_id in self.editions_by_internal_id:
156
+ return self.editions_by_internal_id[internal_id]
157
+
158
+ # Create new edition
159
+ edition_data = {
160
+ 'source_id': self.source_id,
161
+ 'source_internal_id': internal_id,
162
+ 'mapped_from_ids': []
163
+ }
164
+
165
+ edition = self.client.create_edition(edition_data)
166
+ self.editions_by_internal_id[internal_id] = edition
167
+ self.editions.append(edition)
168
+
169
+ print(f" ➕ Created edition: {internal_id}")
170
+ return edition
171
+
172
+ def create_suggestion(self, edition_id: int, property_name: str, value: Any) -> Optional[Dict]:
173
+ """
174
+ Create a single property suggestion with validation and deduplication.
175
+
176
+ Args:
177
+ edition_id: ID of the edition
178
+ property_name: Technical name of the property
179
+ value: Value to suggest (will be validated based on property type)
180
+
181
+ Returns:
182
+ Created suggestion or None if invalid/skipped
183
+ """
184
+ # Skip empty or None values
185
+ if value is None or value == "":
186
+ return None
187
+
188
+ # Find the property object
189
+ property_obj = self.properties_by_tech_name.get(property_name)
190
+ if not property_obj:
191
+ print(f" ⚠️ Property '{property_name}' not found")
192
+ return None
193
+
194
+ property_id = property_obj['id']
195
+ property_type = property_obj['type']
196
+ suggestion_key = (edition_id, property_id)
197
+
198
+ # Check if suggestion with same value already exists
199
+ if self._suggestion_exists(suggestion_key, value, property_obj):
200
+ print(f" ⏭️ Skipping duplicate suggestion: {property_name} = '{value}'")
201
+ return None
202
+
203
+ # Prepare suggestion data based on property type
204
+ suggestion_data = {
205
+ 'edition_id': edition_id,
206
+ 'property_id': property_id,
207
+ 'source_id': self.source_id,
208
+ }
209
+
210
+ if property_type in [PropertyType.CONTROLLED_VOCABULARY, PropertyType.BINARY]:
211
+ # Find matching option
212
+ property_options = property_obj.get('property_options', [])
213
+ str_value = str(value).strip()
214
+
215
+ if property_type == PropertyType.BINARY:
216
+ # Normalize binary values (1/0, true/false, yes/no)
217
+ if str_value.lower() in ['1', 'true', 'yes', 'y']:
218
+ option_name = '1'
219
+ elif str_value.lower() in ['0', 'false', 'no', 'n']:
220
+ option_name = '0'
221
+ else:
222
+ print(f" ⚠️ Invalid binary value: '{value}'")
223
+ return None
224
+ else:
225
+ option_name = str_value
226
+
227
+ # Find matching option
228
+ matching_option = next(
229
+ (opt for opt in property_options if opt['name'].lower() == option_name.lower()),
230
+ None
231
+ )
232
+
233
+ if not matching_option:
234
+ print(f" ⚠️ No matching option for '{str_value}'")
235
+ return None
236
+
237
+ suggestion_data['property_option_id'] = matching_option['id']
238
+
239
+ elif property_type in [PropertyType.FREE_TEXT, PropertyType.NUMERICAL]:
240
+ # For numerical values, validate it's a number
241
+ if property_type == PropertyType.NUMERICAL:
242
+ try:
243
+ float(str(value)) # Check if it's a valid number
244
+ except ValueError:
245
+ print(f" ⚠️ Invalid numerical value: '{value}'")
246
+ return None
247
+
248
+ # Use custom value for free text and numerical properties
249
+ suggestion_data['custom_value'] = str(value)
250
+
251
+ else:
252
+ print(f" ⚠️ Unknown property type: {property_type}")
253
+ return None
254
+
255
+ # Create the suggestion
256
+ try:
257
+ suggestion = self.client.create_suggestion(suggestion_data)
258
+
259
+ # Add to our lookup for future reference
260
+ if suggestion_key not in self.suggestions_lookup:
261
+ self.suggestions_lookup[suggestion_key] = []
262
+ self.suggestions_lookup[suggestion_key].append(suggestion)
263
+ self.suggestions.append(suggestion)
264
+
265
+ print(f" 💡 Created suggestion: {property_name} = '{value}'")
266
+ return suggestion
267
+
268
+ except Exception as e:
269
+ print(f" ⚠️ Failed to create suggestion: {e}")
270
+ return None
271
+
272
+ def _suggestion_exists(self, suggestion_key: tuple, new_value: Any, property_obj: Dict) -> bool:
273
+ """Check if a suggestion with the same value already exists."""
274
+ existing_suggestions = self.suggestions_lookup.get(suggestion_key, [])
275
+ if not existing_suggestions:
276
+ return False
277
+
278
+ property_type = property_obj.get('type')
279
+ str_value = str(new_value).strip()
280
+
281
+ for suggestion in existing_suggestions:
282
+ if property_type in [PropertyType.CONTROLLED_VOCABULARY, PropertyType.BINARY]:
283
+ # Get the option that matches new_value
284
+ if property_type == PropertyType.BINARY:
285
+ # Normalize binary values
286
+ if str_value.lower() in ['1', 'true', 'yes', 'y']:
287
+ normalized_value = '1'
288
+ elif str_value.lower() in ['0', 'false', 'no', 'n']:
289
+ normalized_value = '0'
290
+ else:
291
+ continue # Invalid binary value
292
+ else:
293
+ normalized_value = str_value
294
+
295
+ # Check if option ID matches
296
+ property_options = property_obj.get('property_options', [])
297
+ for option in property_options:
298
+ if option['name'].lower() == normalized_value.lower():
299
+ if suggestion.get('property_option_id') == option['id']:
300
+ return True
301
+
302
+ elif property_type in [PropertyType.FREE_TEXT, PropertyType.NUMERICAL]:
303
+ # For free text and numerical, compare custom_value
304
+ if suggestion.get('custom_value', '').strip() == str_value:
305
+ return True
306
+
307
+ return False
308
+
309
+ def create_suggestions_batch(self, edition_id: int, data: Dict[str, Any]) -> Dict:
310
+ """
311
+ Create multiple suggestions in a batch.
312
+
313
+ Args:
314
+ edition_id: ID of the edition
315
+ data: Dictionary mapping property technical names to values
316
+
317
+ Returns:
318
+ Dictionary with counts of created and skipped suggestions
319
+ """
320
+ created_count = 0
321
+ skipped_count = 0
322
+
323
+ for property_name, value in data.items():
324
+ # Handle both single values and lists of values
325
+ values_to_process = value if isinstance(value, list) else [value]
326
+
327
+ for individual_value in values_to_process:
328
+ # Skip empty or None list items
329
+ if individual_value is None or individual_value == "":
330
+ continue
331
+
332
+ suggestion = self.create_suggestion(edition_id, property_name, individual_value)
333
+ if suggestion:
334
+ created_count += 1
335
+ else:
336
+ skipped_count += 1
337
+
338
+ if created_count > 0 or skipped_count > 0:
339
+ print(f" ✅ Suggestions: {created_count} created, {skipped_count} skipped")
340
+
341
+ return {
342
+ 'created': created_count,
343
+ 'skipped': skipped_count
344
+ }
345
+
346
+ def finish_ingestion(self):
347
+ """Mark ingestion complete by updating the timestamp."""
348
+ try:
349
+ update_data = {
350
+ 'last_ingestion_at': datetime.now().isoformat()
351
+ }
352
+ updated_source = self.client.update_source(self.source_id, update_data)
353
+ print(f"📅 Updated last ingestion timestamp for: {self.source['name']}")
354
+ return updated_source
355
+ except Exception as e:
356
+ print(f"⚠️ Failed to update last ingestion timestamp: {e}")
357
+ return None
358
+
359
+
360
+ class PropertyBuilder:
361
+ """Helper class to build property definitions with proper validation."""
362
+
363
+ @staticmethod
364
+ def free_text(technical_name: str, display_name: str, description: str = "") -> Dict:
365
+ """Create a free text property definition."""
366
+ return {
367
+ 'technical_name': technical_name,
368
+ 'name': display_name,
369
+ 'description': description,
370
+ 'type': PropertyType.FREE_TEXT
371
+ }
372
+
373
+ @staticmethod
374
+ def controlled_vocabulary(technical_name: str, display_name: str, options: List[str], description: str = "") -> Dict:
375
+ """Create a controlled vocabulary property definition."""
376
+ return {
377
+ 'technical_name': technical_name,
378
+ 'name': display_name,
379
+ 'description': description,
380
+ 'type': PropertyType.CONTROLLED_VOCABULARY,
381
+ 'options': options
382
+ }
383
+
384
+ @staticmethod
385
+ def binary(technical_name: str, display_name: str, description: str = "") -> Dict:
386
+ """Create a binary property definition."""
387
+ return {
388
+ 'technical_name': technical_name,
389
+ 'name': display_name,
390
+ 'description': description,
391
+ 'type': PropertyType.BINARY
392
+ }
393
+
394
+ @staticmethod
395
+ def numerical(technical_name: str, display_name: str, description: str = "") -> Dict:
396
+ """Create a numerical property definition."""
397
+ return {
398
+ 'technical_name': technical_name,
399
+ 'name': display_name,
400
+ 'description': description,
401
+ 'type': PropertyType.NUMERICAL
402
+ }
403
+
404
+
405
+ class SourceBuilder:
406
+ """Helper class to create a new source."""
407
+
408
+ @staticmethod
409
+ def create(client: CurationAPIClient, name: str, description: str, technical_name: str = None) -> Dict:
410
+ """
411
+ Create a new source with the given parameters.
412
+
413
+ Args:
414
+ client: The API client to use
415
+ name: Display name for the source
416
+ description: Description of the source
417
+ technical_name: Optional technical name (slug)
418
+
419
+ Returns:
420
+ The created source
421
+ """
422
+ source_data = {
423
+ 'name': name,
424
+ 'description': description
425
+ }
426
+
427
+ if technical_name:
428
+ source_data['technical_name'] = technical_name
429
+
430
+ return client.create_source(source_data)
431
+
432
+
433
+ class EditionBuilder:
434
+ """Helper class to create editions."""
435
+
436
+ @staticmethod
437
+ def create(client: CurationAPIClient, source_id: int, internal_id: str) -> Dict:
438
+ """
439
+ Create a new edition for a source.
440
+
441
+ Args:
442
+ client: The API client to use
443
+ source_id: ID of the source
444
+ internal_id: Internal ID/identifier for this edition
445
+
446
+ Returns:
447
+ The created edition
448
+ """
449
+ edition_data = {
450
+ 'source_id': source_id,
451
+ 'source_internal_id': internal_id,
452
+ 'mapped_from_ids': []
453
+ }
454
+
455
+ return client.create_edition(edition_data)
@@ -0,0 +1,239 @@
1
+ Metadata-Version: 2.4
2
+ Name: metadata-curation-client
3
+ Version: 0.1.0
4
+ Summary: API client for metadata curation platforms
5
+ Author: Digital Edition Curation Team
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/yourusername/digital-edition-curation
8
+ Project-URL: Repository, https://github.com/yourusername/digital-edition-curation
9
+ Project-URL: Issues, https://github.com/yourusername/digital-edition-curation/issues
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: requests>=2.28.0
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=7.0; extra == "dev"
25
+ Requires-Dist: black; extra == "dev"
26
+ Requires-Dist: isort; extra == "dev"
27
+ Requires-Dist: mypy; extra == "dev"
28
+ Dynamic: license-file
29
+
30
+ # Metadata Curation Client
31
+
32
+ API client for external partners to integrate with metadata curation platforms.
33
+
34
+ ## Installation
35
+
36
+ ```bash
37
+ pip install metadata-curation-client
38
+ ```
39
+
40
+ ## Basic Usage
41
+
42
+ ```python
43
+ from metadata_curation_client import CurationAPIClient, PropertyType
44
+
45
+ # Initialize client
46
+ client = CurationAPIClient("http://localhost:8000")
47
+
48
+ # Create source
49
+ source = client.create_source({
50
+ "name": "My Archive",
51
+ "description": "Digital editions from our collection"
52
+ })
53
+
54
+ # Create controlled vocabulary property
55
+ language_prop = client.create_property({
56
+ "technical_name": "language",
57
+ "name": "Language",
58
+ "type": PropertyType.CONTROLLED_VOCABULARY,
59
+ "source_id": source["id"],
60
+ "property_options": [{"name": "English"}, {"name": "German"}]
61
+ })
62
+
63
+ # Create free text property
64
+ description_prop = client.create_property({
65
+ "technical_name": "description",
66
+ "name": "Description",
67
+ "type": PropertyType.FREE_TEXT,
68
+ "source_id": source["id"]
69
+ })
70
+
71
+ # Create edition
72
+ edition = client.create_edition({
73
+ "source_id": source["id"],
74
+ "source_internal_id": "my_001"
75
+ })
76
+
77
+ # Create properties for each type
78
+ genre_prop = client.create_property({
79
+ "technical_name": "genre",
80
+ "name": "Genre",
81
+ "type": PropertyType.CONTROLLED_VOCABULARY,
82
+ "source_id": source["id"],
83
+ "property_options": [
84
+ {"name": "Poetry"}, {"name": "Prose"}, {"name": "Drama"}
85
+ ]
86
+ })
87
+
88
+ has_annotations_prop = client.create_property({
89
+ "technical_name": "has_annotations",
90
+ "name": "Has Annotations",
91
+ "type": PropertyType.BINARY,
92
+ "source_id": source["id"]
93
+ })
94
+
95
+ year_prop = client.create_property({
96
+ "technical_name": "publication_year",
97
+ "name": "Publication Year",
98
+ "type": PropertyType.NUMERICAL,
99
+ "source_id": source["id"]
100
+ })
101
+
102
+ description_prop = client.create_property({
103
+ "technical_name": "description",
104
+ "name": "Description",
105
+ "type": PropertyType.FREE_TEXT,
106
+ "source_id": source["id"]
107
+ })
108
+
109
+ # Example 1: CONTROLLED_VOCABULARY suggestion
110
+ # First get the property option ID
111
+ properties = client.get_properties()
112
+ genre_prop = next(p for p in properties if p["technical_name"] == "genre")
113
+ poetry_option = next(opt for opt in genre_prop["property_options"] if opt["name"] == "Poetry")
114
+
115
+ client.create_suggestion({
116
+ "source_id": source["id"],
117
+ "edition_id": edition["id"],
118
+ "property_id": genre_prop["id"],
119
+ "property_option_id": poetry_option["id"]
120
+ })
121
+
122
+ # Example 2: BINARY suggestion (uses property_option_id)
123
+ # Binary properties always have options with ID 1 (true/1) and ID 2 (false/0)
124
+ # Get the "true" option (usually ID 1)
125
+ binary_props = client.get_properties()
126
+ has_annotations_prop = next(p for p in binary_props if p["technical_name"] == "has_annotations")
127
+ true_option = next(opt for opt in has_annotations_prop["property_options"] if opt["name"] == "1")
128
+
129
+ client.create_suggestion({
130
+ "source_id": source["id"],
131
+ "edition_id": edition["id"],
132
+ "property_id": has_annotations_prop["id"],
133
+ "property_option_id": true_option["id"] # For "yes"/"true" value
134
+ })
135
+
136
+ # Example 3: NUMERICAL suggestion (uses custom_value)
137
+ client.create_suggestion({
138
+ "source_id": source["id"],
139
+ "edition_id": edition["id"],
140
+ "property_id": year_prop["id"],
141
+ "custom_value": "2025" # Note: numerical values are sent as strings
142
+ })
143
+
144
+ # Example 4: FREE_TEXT suggestion (uses custom_value)
145
+ client.create_suggestion({
146
+ "source_id": source["id"],
147
+ "edition_id": edition["id"],
148
+ "property_id": description_prop["id"],
149
+ "custom_value": "This is a detailed description of the edition."
150
+ })
151
+
152
+ # Mark ingestion complete
153
+ client.mark_ingestion_complete(source["id"])
154
+ ```
155
+
156
+ ## Property Types
157
+
158
+ - `PropertyType.CONTROLLED_VOCABULARY` - Predefined options
159
+ - `PropertyType.FREE_TEXT` - Free text
160
+ - `PropertyType.BINARY` - True/false values
161
+ - `PropertyType.NUMERICAL` - Numeric values
162
+
163
+ ## API Reference
164
+
165
+ See the docstrings in `curation_api_client.py` for detailed method documentation.
166
+
167
+ ## Enhanced Integration with SourceManager
168
+
169
+ For more sophisticated integrations, we also provide a higher-level abstraction in `source_manager.py` that mirrors some of the conveniences of our internal extractors:
170
+
171
+ ```python
172
+ from metadata_curation_client import CurationAPIClient, PropertyType, SourceManager, PropertyBuilder
173
+
174
+ # Initialize client and create source
175
+ client = CurationAPIClient("http://localhost:8000")
176
+ source = client.get_source_by_technical_name("my_data_source")
177
+ if not source:
178
+ source = client.create_source({
179
+ "name": "My Data Source",
180
+ "description": "My collection of digital editions",
181
+ "technical_name": "my_data_source"
182
+ })
183
+
184
+ # Define properties using helper builders
185
+ property_definitions = [
186
+ PropertyBuilder.controlled_vocabulary(
187
+ "example_genre", "Genre", ["Poetry", "Prose", "Drama"]
188
+ ),
189
+ PropertyBuilder.binary(
190
+ "example_has_annotations", "Has Annotations"
191
+ ),
192
+ PropertyBuilder.numerical(
193
+ "example_year", "Publication Year"
194
+ )
195
+ ]
196
+
197
+ # Initialize the source manager - this will:
198
+ # - Fetch all existing data
199
+ # - Build lookup tables
200
+ # - Create any missing properties
201
+ manager = SourceManager(client, source['id'], property_definitions)
202
+
203
+ # Efficiently get or create edition using lookup tables
204
+ edition = manager.get_or_create_edition("book_001")
205
+
206
+ # Create suggestions in a batch with validation and deduplication
207
+ manager.create_suggestions_batch(
208
+ edition["id"],
209
+ {
210
+ "example_genre": "Poetry",
211
+ "example_has_annotations": True,
212
+ "example_year": 2022
213
+ }
214
+ )
215
+
216
+ # Mark ingestion complete (updates timestamp)
217
+ manager.finish_ingestion()
218
+ ```
219
+
220
+ ### Benefits of the SourceManager
221
+
222
+ The `SourceManager` provides several advantages for more complex integrations:
223
+
224
+ 1. **Reduced API Calls**: Prefetches data to minimize API requests
225
+ 2. **Lookup Tables**: Maintains efficient in-memory lookups for editions, properties, and suggestions
226
+ 3. **Automatic Property Creation**: Creates properties from definitions as needed
227
+ 4. **Validation**: Automatically validates values based on property types
228
+ 5. **Deduplication**: Avoids creating duplicate suggestions
229
+ 6. **Builder Helpers**: Provides convenient builder classes for creating properties and sources
230
+ 7. **Timestamp Management**: Automatically updates the last ingestion timestamp
231
+
232
+ For a complete example, see `example_with_source_manager.py`.
233
+
234
+ ### Choosing the Right Approach
235
+
236
+ - **Basic API Client**: For simple integrations or when you need complete control over the process
237
+ - **SourceManager**: For more complex integrations where efficiency and convenience are priorities
238
+
239
+ Both approaches use the same underlying API endpoints and data models, so you can choose the one that best fits your needs or even mix them as required.
@@ -0,0 +1,8 @@
1
+ metadata_curation_client/__init__.py,sha256=VWJY3OsCDQEZ5BbePugl4J8E2etciuHyTuDvTccaqog,360
2
+ metadata_curation_client/curation_api_client.py,sha256=WMfIIKO01b4EZ0wCyO9Opn_FrcZSKJLquHEMsVWQWu0,7459
3
+ metadata_curation_client/source_manager.py,sha256=hkrEU1JT4Hkwy2fUbheb7pP_1KfTXbcU8sjiLYc0i8Q,17940
4
+ metadata_curation_client-0.1.0.dist-info/licenses/LICENSE,sha256=dvKFLHmy95RWWhFDqmOn38Yjfv_w-Hxc5EmQgQ9iCC8,1086
5
+ metadata_curation_client-0.1.0.dist-info/METADATA,sha256=YzREY-ik0GaW44OLOTAnZ0dJvLJXxiAtFzgTflyup10,7890
6
+ metadata_curation_client-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ metadata_curation_client-0.1.0.dist-info/top_level.txt,sha256=FUkmJY-66mVLu-RvgCXwPn26F6Jkxmve9Stw8kCsr0w,25
8
+ metadata_curation_client-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Digital Edition Curation Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ metadata_curation_client