gedcom-x 0.5.6__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gedcomx/Serialization.py CHANGED
@@ -1,401 +1,816 @@
1
- from typing import Dict
1
+ from __future__ import annotations
2
+ from functools import lru_cache
2
3
 
3
- from .Logging import get_logger
4
+ import enum
5
+ import logging
6
+ import types
7
+ from collections.abc import Sized
8
+ from typing import Any, Dict, List, Set, Tuple, Union, Annotated, ForwardRef, get_args, get_origin
9
+ from typing import Any, Callable, Mapping, List, Dict, Tuple, Set
10
+ from typing import List, Optional
4
11
 
5
- log = get_logger(__name__)
6
- log.setLevel("DEBUG")
7
- log.info("Logger initialized.")
12
+ """
13
+ ======================================================================
14
+ Project: Gedcom-X
15
+ File: Serialization.py
16
+ Author: David J. Cartwright
17
+ Purpose: Serialization/Deserialization of gedcomx Objects
8
18
 
9
- from collections.abc import Sized
10
- from typing import Any, get_origin, get_args, List, Set, Tuple, Dict, Union, ForwardRef, Annotated
11
- import types
19
+ Created: 2025-08-25
20
+ Updated:
21
+ - 2025-08-31: cleaned up imports and documentation
22
+
23
+ ======================================================================
24
+ """
12
25
 
13
- import enum
26
+ """
27
+ ======================================================================
28
+ GEDCOM Module Types
29
+ ======================================================================
30
+ """
31
+ from .Address import Address
32
+ from .Agent import Agent
33
+ from .Attribution import Attribution
34
+ from .Conclusion import ConfidenceLevel
35
+ from .Date import Date
36
+ from .Document import Document, DocumentType, TextType
37
+ from .EvidenceReference import EvidenceReference
38
+ from .Event import Event, EventType, EventRole, EventRoleType
39
+ from .Extensions.rs10.rsLink import _rsLinkList
40
+ from .Fact import Fact, FactType, FactQualifier
41
+ from .Gender import Gender, GenderType
42
+ from .Identifier import IdentifierList, Identifier
43
+ from .LoggingHub import hub, ChannelConfig
44
+ from .Name import Name, NameType, NameForm, NamePart, NamePartType, NamePartQualifier
45
+ from .Note import Note
46
+ from .OnlineAccount import OnlineAccount
47
+ from .Person import Person
48
+ from .PlaceDescription import PlaceDescription
49
+ from .PlaceReference import PlaceReference
50
+ from .Qualifier import Qualifier
51
+ from .Relationship import Relationship, RelationshipType
14
52
  from .Resource import Resource
15
- from .Identifier import IdentifierList
53
+ from .SourceDescription import SourceDescription, ResourceType, SourceCitation, Coverage
54
+ from .SourceReference import SourceReference
55
+ from .TextValue import TextValue
16
56
  from .URI import URI
57
+ #======================================================================
58
+
59
+ log = logging.getLogger("gedcomx")
60
+ deserialization = "gedcomx.deserialization"
61
+
62
+ hub.start_channel(
63
+ ChannelConfig(
64
+ name=deserialization,
65
+ path=f"logs/{deserialization}.log",
66
+ level=logging.DEBUG,
67
+ rotation="size:10MB:3", # rotate by size, keep 3 backups
68
+ )
69
+ )
17
70
 
18
71
  _PRIMITIVES = (str, int, float, bool, type(None))
19
72
 
20
73
  def _has_parent_class(obj) -> bool:
21
74
  return hasattr(obj, '__class__') and hasattr(obj.__class__, '__bases__') and len(obj.__class__.__bases__) > 0
22
75
 
23
-
24
76
  class Serialization:
25
-
77
+
26
78
  @staticmethod
27
79
  def serialize_dict(dict_to_serialize: dict) -> dict:
28
80
  """
29
- Iterates through the dict, serilaizing all Gedcom Types into a json compatible value
30
-
31
- Parameters
32
- ----------
33
- dict_to_serialize: dict
34
- dict that has been created from any Gedcom Type Object's _as_dict_ property
35
-
36
- Raises
37
- ------
38
- ValueError
39
- If `id` is not a valid UUID.
81
+ Walk a dict and serialize nested GedcomX objects to JSON-compatible values.
82
+ - Uses `_as_dict_` on your objects when present
83
+ - Recurse into dicts / lists / sets / tuples
84
+ - Drops None and empty containers
40
85
  """
41
86
  def _serialize(value):
42
87
  if isinstance(value, (str, int, float, bool, type(None))):
43
88
  return value
44
- elif isinstance(value, dict):
89
+ if hasattr(value, "_as_dict_"):
90
+ # Expect your objects expose a snapshot via _as_dict_
91
+ return value._as_dict_
92
+ if isinstance(value, dict):
45
93
  return {k: _serialize(v) for k, v in value.items()}
46
- elif isinstance(value, (list, tuple, set)):
94
+ if isinstance(value, (list, tuple, set)):
47
95
  return [_serialize(v) for v in value]
48
- elif hasattr(value, "_as_dict_"):
49
- return value._as_dict_
50
- else:
51
- return str(value) # fallback for unknown objects
52
-
53
- if dict_to_serialize and isinstance(dict_to_serialize,dict):
54
- for key, value in dict_to_serialize.items():
55
- if value is not None:
56
- dict_to_serialize[key] = _serialize(value)
57
-
96
+ # Fallback: string representation
97
+ return str(value)
98
+
99
+ if isinstance(dict_to_serialize, dict):
100
+ cooked = {
101
+ k: _serialize(v)
102
+ for k, v in dict_to_serialize.items()
103
+ if v is not None
104
+ }
105
+ # prune empty containers (after serialization)
58
106
  return {
59
- k: v
60
- for k, v in dict_to_serialize.items()
61
- if v is not None and not (isinstance(v, Sized) and len(v) == 0)
62
- }
107
+ k: v
108
+ for k, v in cooked.items()
109
+ if not (isinstance(v, Sized) and len(v) == 0)
110
+ }
63
111
  return {}
64
-
112
+
113
+ # --- tiny helpers --------------------------------------------------------
65
114
  @staticmethod
66
- def _coerce_value(value: Any, typ: Any) -> Any:
67
- """Coerce `value` to `typ`:
68
- - primitives: pass through
69
- - containers: recurse into elements
70
- - objects: call typ._from_json_(dict) if available and value is dict
71
- - already-instantiated objects of typ: pass through
72
- - otherwise: return value unchanged
115
+ def _is_resource(obj: Any) -> bool:
73
116
  """
74
- def is_enum_type(T) -> bool:
75
- """Return True if T (possibly a typing construct) is or contains an Enum type."""
76
- origin = get_origin(T)
117
+ try:
118
+ from Resource import Resource
119
+ except Exception:
120
+ class Resource: pass
121
+ """
122
+ return isinstance(obj, Resource)
77
123
 
78
- # Unwrap Union/Optional/PEP 604 (A | B)
79
- if origin in (Union, types.UnionType):
80
- return any(is_enum_type(a) for a in get_args(T))
124
+ @staticmethod
125
+ def _has_resource_value(x: Any) -> bool:
126
+ if Serialization._is_resource(x):
127
+ return True
128
+ if isinstance(x, (list, tuple, set)):
129
+ return any(Serialization._has_resource_value(v) for v in x)
130
+ if isinstance(x, dict):
131
+ return any(Serialization._has_resource_value(v) for v in x.values())
132
+ return False
81
133
 
82
- # Unwrap Annotated[T, ...]
83
- if origin is Annotated:
84
- return is_enum_type(get_args(T)[0])
134
+ @staticmethod
135
+ def _resolve_structure(x: Any, resolver: Callable[[Any], Any]) -> Any:
136
+ """Return a deep copy with Resources resolved via resolver(Resource)->Any."""
137
+ if Serialization._is_resource(x):
138
+ return resolver(x)
139
+ if isinstance(x, list):
140
+ return [Serialization._resolve_structure(v, resolver) for v in x]
141
+ if isinstance(x, tuple):
142
+ return tuple(Serialization._resolve_structure(v, resolver) for v in x)
143
+ if isinstance(x, set):
144
+ return {Serialization._resolve_structure(v, resolver) for v in x}
145
+ if isinstance(x, dict):
146
+ return {k: Serialization._resolve_structure(v, resolver) for k, v in x.items()}
147
+ return x
85
148
 
86
- # Resolve forward refs / strings if you use them
87
- if isinstance(T, ForwardRef):
88
- T = globals().get(T.__forward_arg__, T)
89
- if isinstance(T, str):
90
- T = globals().get(T, T)
149
+ @classmethod
150
+ def apply_resource_resolutions(cls, inst: Any, resolver: Callable[[Any], Any]) -> None:
151
+ """Resolve any queued attribute setters stored on the instance."""
152
+ setters: List[Callable[[Any], None]] = getattr(inst, "_resource_setters", [])
153
+ for set_fn in setters:
154
+ set_fn(inst, resolver)
155
+ # Optional: clear after applying
156
+ inst._resource_setters = []
91
157
 
92
- # Finally check enum-ness
93
- try:
94
- return issubclass(T, enum.Enum)
95
- except TypeError:
96
- return False # not a class (e.g., typing.List[int], etc.)
97
- log.debug(f"Coercing value '{value}' of type '{type(value).__name__}' to '{typ}'")
98
-
99
- def _resolve(t):
100
- # resolve ForwardRef('Resource') -> actual object if already in globals()
101
- if isinstance(t, ForwardRef):
102
- return globals().get(t.__forward_arg__, t)
103
- return t
104
-
105
- if is_enum_type(typ):
106
- log.debug(f"Enum type detected: {typ}")
107
- return typ(value) # cast to enum
108
-
109
- origin = get_origin(typ)
110
- if origin in (Union, types.UnionType):
111
- args = tuple(_resolve(a) for a in get_args(typ))
112
- else:
113
- args = (_resolve(typ),)
114
- log.debug(f"Origin: {origin}, args: {args}")
115
-
116
- if Resource in args and isinstance(value, dict):
117
- if Resource in args:
118
- log.info(f"Deserializing Resource from value: {value}")
119
- return Resource(uri=value.get('resource'), id=value.get('resourceId', None))
120
-
121
- if isinstance(value, _PRIMITIVES):
122
- if Resource in args:
123
- log.info(f"Deserializing Resource from value: {value}")
124
- return Resource(uri=value)
125
- if URI in args:
126
- log.info(f"Deserializing URI from value: {value}")
127
- return URI.from_url(value)
128
- return value
158
+ # --- your deserialize with setters --------------------------------------
159
+ @classmethod
160
+ def deserialize(
161
+ cls,
162
+ data: Dict[str, Any],
163
+ class_type: type,
164
+ *,
165
+ resolver: Callable[[Any], Any] | None = None, # pass a function to resolve Resources now
166
+ queue_setters: bool = True # also stash setters on instance for later
167
+ ) -> Any:
168
+ class_fields = cls.get_class_fields(class_type.__name__)
169
+ result: Dict[str, Any] = {}
170
+ # collect setters that know how to assign back to attributes
171
+ pending_setters: List[Callable[[Any, Callable[[Any], Any]], None]] = []
172
+
173
+ for name, typ in class_fields.items():
174
+ if name not in data:
175
+ continue
176
+ coerced = cls._coerce_value(data[name], typ)
177
+ result[name] = coerced
178
+
179
+ # if this attribute (or inside it) has Resource(s), prepare a setter
180
+ if cls._has_resource_value(coerced):
181
+ def make_setter(attr_name: str, raw_value: Any):
182
+ # capture references to the *exact* object we just built for this attribute
183
+ def _setter(instance: Any, _resolver: Callable[[Any], Any]) -> None:
184
+ resolved = cls._resolve_structure(raw_value, _resolver)
185
+ setattr(instance, attr_name, resolved)
186
+ return _setter
187
+ pending_setters.append(make_setter(name, coerced))
188
+
189
+ # build the instance
190
+ inst = class_type(**result)
191
+
192
+ # apply now, if resolver provided
193
+ if resolver is not None and pending_setters:
194
+ for set_fn in pending_setters:
195
+ set_fn(inst, resolver)
196
+
197
+ # optionally store for later (gives you a real attribute assignment later)
198
+ if queue_setters:
199
+ # merge if already present
200
+ existing = getattr(inst, "_resource_setters", [])
201
+ inst._resource_setters = [*existing, *pending_setters]
202
+
203
+ return inst
204
+
205
+ @staticmethod
206
+ def get_class_fields(cls_name) -> Dict:
207
+ # NOTE: keep imports local to avoid circulars
129
208
 
130
- if IdentifierList in args:
131
- log.error(f"Deserializing IdentifierList from value: {value}")
132
- return IdentifierList._from_json_(value)
133
-
134
- if origin in (list, List):
135
- elem_args = get_args(typ) # NOT get_args(args)
136
- elem_t = elem_args[0] if elem_args else Any
137
- log.debug(f"List: {typ}, elem={elem_t}")
138
- return [Serialization._coerce_value(v, elem_t) for v in (value or [])]
139
-
140
- if origin in (set, Set):
141
- (elem_t,) = args or (Any,)
142
- return { Serialization._coerce_value(v, elem_t) for v in (value or []) }
143
-
144
- if origin in (tuple, Tuple):
145
- if not args:
146
- return tuple(value)
147
- if len(args) == 2 and args[1] is Ellipsis: # Tuple[T, ...]
148
- elem_t = args[0]
149
- return tuple(Serialization._coerce_value(v, elem_t) for v in (value or []))
150
- return tuple(Serialization._coerce_value(v, t) for v, t in zip(value, args))
151
-
152
- if origin in (dict, Dict):
153
- k_t, v_t = args or (Any, Any)
154
- return {
155
- Serialization._coerce_value(k, k_t): Serialization._coerce_value(v, v_t)
156
- for k, v in (value or {}).items()
209
+
210
+ fields = {
211
+ "GedcomX": {"persons": List[Person]},
212
+ "Conclusion": {
213
+ "id": str,
214
+ "lang": str,
215
+ "sources": List["SourceReference"],
216
+ "analysis": Document | Resource,
217
+ "notes": List[Note],
218
+ "confidence": ConfidenceLevel,
219
+ "attribution": Attribution,
220
+ "uri": "Resource",
221
+ "max_note_count": int,
222
+ "links": _rsLinkList,
223
+ },
224
+ "Subject": {
225
+ "id": str,
226
+ "lang": str,
227
+ "sources": List["SourceReference"],
228
+ "analysis": Resource,
229
+ "notes": List["Note"],
230
+ "confidence": ConfidenceLevel,
231
+ "attribution": Attribution,
232
+ "extracted": bool,
233
+ "evidence": List[EvidenceReference],
234
+ "media": List[SourceReference],
235
+ "identifiers": IdentifierList,
236
+ "uri": Resource,
237
+ "links": _rsLinkList,
238
+ },
239
+ "Person": {
240
+ "id": str,
241
+ "lang": str,
242
+ "sources": List[SourceReference],
243
+ "analysis": Resource,
244
+ "notes": List[Note],
245
+ "confidence": ConfidenceLevel,
246
+ "attribution": Attribution,
247
+ "extracted": bool,
248
+ "evidence": List[EvidenceReference],
249
+ "media": List[SourceReference],
250
+ "identifiers": IdentifierList,
251
+ "private": bool,
252
+ "gender": Gender,
253
+ "names": List[Name],
254
+ "facts": List[Fact],
255
+ "living": bool,
256
+ "links": _rsLinkList,
257
+ #"uri": URI,
258
+ },
259
+ "SourceReference": {
260
+ "description": Resource,
261
+ "descriptionId": str,
262
+ "attribution": Attribution,
263
+ "qualifiers": List[Qualifier],
264
+ },
265
+ "Attribution": {
266
+ "contributor": Resource,
267
+ "modified": str,
268
+ "changeMessage": str,
269
+ "creator": Resource,
270
+ "created": str,
271
+ },
272
+ "SourceDescription": {
273
+ "id": str,
274
+ "resourceType": ResourceType,
275
+ "citations": List[SourceCitation],
276
+ "mediaType": str,
277
+ "about": URI,
278
+ "mediator": Resource,
279
+ "publisher": Resource, # forward-ref to avoid circular import
280
+ "authors": List[Resource],
281
+ "sources": List[SourceReference], # SourceReference
282
+ "analysis": Resource, # analysis is typically a Document (kept union to avoid cycle)
283
+ "componentOf": SourceReference, # SourceReference
284
+ "titles": List[TextValue],
285
+ "notes": List[Note],
286
+ "attribution": Attribution,
287
+ "rights": List[Resource],
288
+ "coverage": List[Coverage], # Coverage
289
+ "descriptions": List[TextValue],
290
+ "identifiers": IdentifierList,
291
+ "created": Date,
292
+ "modified": Date,
293
+ "published": Date,
294
+ "repository": Agent, # forward-ref
295
+ "max_note_count": int,
296
+ },
297
+ "Gender": {
298
+ "id": str,
299
+ "lang": str,
300
+ "sources": List[SourceReference],
301
+ "analysis": Resource,
302
+ "notes": List[Note],
303
+ "confidence": ConfidenceLevel,
304
+ "attribution": Attribution,
305
+ "type": GenderType,
306
+ },
307
+ "PlaceReference": {
308
+ "original": str,
309
+ "description": URI,
310
+ },
311
+ "Relationship": {
312
+ "id": str,
313
+ "lang": str,
314
+ "sources": List[SourceReference],
315
+ "analysis": Resource,
316
+ "notes": List[Note],
317
+ "confidence": ConfidenceLevel,
318
+ "attribution": Attribution,
319
+ "extracted": bool,
320
+ "evidence": List[EvidenceReference],
321
+ "media": List[SourceReference],
322
+ "identifiers": IdentifierList,
323
+ "type": RelationshipType,
324
+ "person1": Resource,
325
+ "person2": Resource,
326
+ "facts": List[Fact],
327
+ },
328
+ "Document": {
329
+ "id": str,
330
+ "lang": str,
331
+ "sources": List[SourceReference],
332
+ "analysis": Resource,
333
+ "notes": List[Note],
334
+ "confidence": ConfidenceLevel,
335
+ "attribution": Attribution,
336
+ "type": DocumentType,
337
+ "extracted": bool,
338
+ "textType": TextType,
339
+ "text": str,
340
+ },
341
+ "PlaceDescription": {
342
+ "id": str,
343
+ "lang": str,
344
+ "sources": List[SourceReference],
345
+ "analysis": Resource,
346
+ "notes": List[Note],
347
+ "confidence": ConfidenceLevel,
348
+ "attribution": Attribution,
349
+ "extracted": bool,
350
+ "evidence": List[EvidenceReference],
351
+ "media": List[SourceReference],
352
+ "identifiers": List[IdentifierList],
353
+ "names": List[TextValue],
354
+ "type": str,
355
+ "place": URI,
356
+ "jurisdiction": Resource,
357
+ "latitude": float,
358
+ "longitude": float,
359
+ "temporalDescription": Date,
360
+ "spatialDescription": Resource,
361
+ },
362
+ "Agent": {
363
+ "id": str,
364
+ "identifiers": IdentifierList,
365
+ "names": List[TextValue],
366
+ "homepage": URI,
367
+ "openid": URI,
368
+ "accounts": List[OnlineAccount],
369
+ "emails": List[URI],
370
+ "phones": List[URI],
371
+ "addresses": List[Address],
372
+ "person": object | Resource, # intended Person | Resource
373
+ "attribution": object, # GEDCOM5/7 compatibility
374
+ "uri": URI | Resource,
375
+ },
376
+ "Event": {
377
+ "id": str,
378
+ "lang": str,
379
+ "sources": List[SourceReference],
380
+ "analysis": Resource,
381
+ "notes": List[Note],
382
+ "confidence": ConfidenceLevel,
383
+ "attribution": Attribution,
384
+ "extracted": bool,
385
+ "evidence": List[EvidenceReference],
386
+ "media": List[SourceReference],
387
+ "identifiers": List[Identifier],
388
+ "type": EventType,
389
+ "date": Date,
390
+ "place": PlaceReference,
391
+ "roles": List[EventRole],
392
+ },
393
+ "EventRole": {
394
+ "id:": str,
395
+ "lang": str,
396
+ "sources": List[SourceReference],
397
+ "analysis": Resource,
398
+ "notes": List[Note],
399
+ "confidence": ConfidenceLevel,
400
+ "attribution": Attribution,
401
+ "person": Resource,
402
+ "type": EventRoleType,
403
+ "details": str,
404
+ },
405
+ "Resource":{
406
+ "resource":URI,
407
+ "id":str
408
+ },
409
+ "Qualifier":{
410
+ "name":str,
411
+ "value":str
412
+ },
413
+ "KnownSourceReference":{
414
+ "name":str,
415
+ "value":str
416
+ },
417
+ "Name": {
418
+ "id": str,
419
+ "lang": str,
420
+ "sources": List[SourceReference],
421
+ "analysis": Resource,
422
+ "notes": List[Note],
423
+ "confidence": ConfidenceLevel,
424
+ "attribution": Attribution,
425
+ "type": NameType,
426
+ "nameForms": List[NameForm], # use string to avoid circulars if needed
427
+ "date": Date,
428
+ },
429
+ "NameForm": {
430
+ "lang": str,
431
+ "fullText": str,
432
+ "parts": List[NamePart], # use "NamePart" as a forward-ref to avoid circulars
433
+ },
434
+ "NamePart": {
435
+ "type": NamePartType,
436
+ "value": str,
437
+ "qualifiers": List["NamePartQualifier"], # quote if you want to avoid circulars
438
+ },
439
+ "Fact":{
440
+ "id": str,
441
+ "lang": str,
442
+ "sources": List[SourceReference],
443
+ "analysis": Resource | Document,
444
+ "notes": List[Note],
445
+ "confidence": ConfidenceLevel,
446
+ "attribution": Attribution,
447
+ "type": FactType,
448
+ "date": Date,
449
+ "place": PlaceReference,
450
+ "value": str,
451
+ "qualifiers": List[FactQualifier],
452
+ "links": _rsLinkList,
157
453
  }
454
+ }
455
+ return fields.get(cls_name, {})
456
+
457
+
458
+ @classmethod
459
+ def _coerce_value(cls, value: Any, Typ: Any) -> Any:
460
+ """Coerce `value` into `Typ` using the registry (recursively), with verbose logging."""
461
+ log.debug("COERCE enter: value=%r (type=%s) -> Typ=%r", value, type(value).__name__, Typ)
158
462
 
159
- # If `typ` has _from_json_ and value is a dict, use it
160
- if hasattr(typ, "_from_json_") and isinstance(value, dict):
161
- log.info(f"Deserializing {typ} from json method with value: {value}")
162
- return typ._from_json_(value)
463
+ # Enums
464
+ if cls._is_enum_type(Typ):
465
+ U = cls._resolve_forward(cls._unwrap(Typ))
466
+ log.debug("COERCE enum: casting %r to %s", value, getattr(U, "__name__", U))
467
+ try:
468
+ ret = U(value)
469
+ log.debug("COERCE enum: success -> %r", ret)
470
+ return ret
471
+ except Exception:
472
+ log.exception("COERCE enum: failed to cast %r to %s", value, U)
473
+ return value
163
474
 
164
- # If already the right type, keep it
475
+ # Unwrap typing once
476
+ T = cls._resolve_forward(cls._unwrap(Typ))
477
+ origin = get_origin(T) or T
478
+ args = get_args(T)
479
+ log.debug("COERCE typing: unwrapped Typ=%r -> T=%r, origin=%r, args=%r", Typ, T, origin, args)
480
+
481
+ # Late imports to reduce circulars (and to allow logging if they aren't available)
482
+ '''
165
483
  try:
166
- if isinstance(value, typ):
484
+ from gedcomx.Resource import Resource
485
+ from gedcomx.URI import URI
486
+ from gedcomx.Identifier import IdentifierList
487
+ _gx_import_ok = True
488
+ except Exception as _imp_err:
489
+ _gx_import_ok = False
490
+ Resource = URI = IdentifierList = object # fallbacks avoid NameError
491
+ log.debug("COERCE imports: gedcomx types not available (%r); using object fallbacks", _imp_err)
492
+ '''
493
+
494
+ # Strings to Resource/URI
495
+ if isinstance(value, str):
496
+ if T is Resource:
497
+ log.debug("COERCE str->Resource: %r", value)
498
+ try:
499
+ ret = Resource(uri=value)
500
+ log.debug("COERCE str->Resource: built %r", ret)
501
+ return ret
502
+ except Exception:
503
+ log.exception("COERCE str->Resource: failed for %r", value)
504
+ return value
505
+ if T is URI:
506
+ log.debug("COERCE str->URI: %r", value)
507
+ try:
508
+ ret: Any = URI.from_url(value)
509
+ log.debug("COERCE str->URI: built %r", ret)
510
+ return ret
511
+ except Exception:
512
+ log.exception("COERCE str->URI: failed for %r", value)
513
+ return value
514
+ log.debug("COERCE str passthrough: target %r is not Resource/URI", T)
515
+ return value
516
+
517
+ # Dict to Resource
518
+ if T is Resource and isinstance(value, dict):
519
+ log.debug("COERCE dict->Resource: %r", value)
520
+ try:
521
+ ret = Resource(uri=value.get("resource"), id=value.get("resourceId"))
522
+ log.debug("COERCE dict->Resource: built %r", ret)
523
+ return ret
524
+ except Exception:
525
+ log.exception("COERCE dict->Resource: failed for %r", value)
526
+ return value
527
+
528
+ # IdentifierList special
529
+ if T is IdentifierList:
530
+ log.debug("COERCE IdentifierList: %r", value)
531
+ try:
532
+ ret = IdentifierList._from_json_(value)
533
+ log.debug("COERCE IdentifierList: built %r", ret)
534
+ return ret
535
+ except Exception:
536
+ log.exception("COERCE IdentifierList: _from_json_ failed for %r", value)
537
+ return value
538
+
539
+ # Containers
540
+ if cls._is_list_like(T):
541
+ elem_t = args[0] if args else Any
542
+ log.debug("COERCE list-like: len=%s, elem_t=%r", len(value or []), elem_t)
543
+ try:
544
+ ret = [cls._coerce_value(v, elem_t) for v in (value or [])]
545
+ log.debug("COERCE list-like: result sample=%r", ret[:3] if isinstance(ret, list) else ret)
546
+ return ret
547
+ except Exception:
548
+ log.exception("COERCE list-like: failed for value=%r elem_t=%r", value, elem_t)
549
+ return value
550
+
551
+ if cls._is_set_like(T):
552
+ elem_t = args[0] if args else Any
553
+ log.debug("COERCE set-like: len=%s, elem_t=%r", len(value or []), elem_t)
554
+ try:
555
+ ret = {cls._coerce_value(v, elem_t) for v in (value or [])}
556
+ log.debug("COERCE set-like: result size=%d", len(ret))
557
+ return ret
558
+ except Exception:
559
+ log.exception("COERCE set-like: failed for value=%r elem_t=%r", value, elem_t)
560
+ return value
561
+
562
+ if cls._is_tuple_like(T):
563
+ log.debug("COERCE tuple-like: value=%r, args=%r", value, args)
564
+ try:
565
+ if not value:
566
+ log.debug("COERCE tuple-like: empty/None -> ()")
567
+ return tuple(value or ())
568
+ if len(args) == 2 and args[1] is Ellipsis:
569
+ elem_t = args[0]
570
+ ret = tuple(cls._coerce_value(v, elem_t) for v in (value or ()))
571
+ log.debug("COERCE tuple-like variadic: size=%d", len(ret))
572
+ return ret
573
+ ret = tuple(cls._coerce_value(v, t) for v, t in zip(value, args))
574
+ log.debug("COERCE tuple-like fixed: size=%d", len(ret))
575
+ return ret
576
+ except Exception:
577
+ log.exception("COERCE tuple-like: failed for value=%r args=%r", value, args)
578
+ return value
579
+
580
+ if cls._is_dict_like(T):
581
+ k_t = args[0] if len(args) >= 1 else Any
582
+ v_t = args[1] if len(args) >= 2 else Any
583
+ log.debug("COERCE dict-like: keys=%s, k_t=%r, v_t=%r", len((value or {}).keys()), k_t, v_t)
584
+ try:
585
+ ret = {
586
+ cls._coerce_value(k, k_t): cls._coerce_value(v, v_t)
587
+ for k, v in (value or {}).items()
588
+ }
589
+ log.debug("COERCE dict-like: result size=%d", len(ret))
590
+ return ret
591
+ except Exception:
592
+ log.exception("COERCE dict-like: failed for value=%r k_t=%r v_t=%r", value, k_t, v_t)
593
+ return value
594
+
595
+ # Objects via registry
596
+ if isinstance(T, type) and isinstance(value, dict):
597
+ fields = cls.get_class_fields(T.__name__) or {}
598
+ log.debug(
599
+ "COERCE object: class=%s, input_keys=%s, registered_fields=%s",
600
+ T.__name__, list(value.keys()), list(fields.keys())
601
+ )
602
+ if fields:
603
+ kwargs = {}
604
+ present = []
605
+ for fname, ftype in fields.items():
606
+ if fname in value:
607
+ resolved = cls._resolve_forward(cls._unwrap(ftype))
608
+ log.debug("COERCE object.field: %s.%s -> %r, raw=%r", T.__name__, fname, resolved, value[fname])
609
+ try:
610
+ coerced = cls._coerce_value(value[fname], resolved)
611
+ kwargs[fname] = coerced
612
+ present.append(fname)
613
+ log.debug("COERCE object.field: %s.%s coerced -> %r", T.__name__, fname, coerced)
614
+ except Exception:
615
+ log.exception("COERCE object.field: %s.%s failed", T.__name__, fname)
616
+ unknown = [k for k in value.keys() if k not in fields]
617
+ if unknown:
618
+ log.debug("COERCE object: %s unknown keys ignored: %s", T.__name__, unknown)
619
+ try:
620
+ log.debug("COERCE object: instantiate %s(**%s)", T.__name__, present)
621
+ ret = T(**kwargs)
622
+ log.debug("COERCE object: success -> %r", ret)
623
+ return ret
624
+ except TypeError as e:
625
+ log.error("COERCE object: instantiate %s failed with kwargs=%s: %s", T.__name__, list(kwargs.keys()), e)
626
+ log.debug("COERCE object: returning partially coerced dict")
627
+ return kwargs
628
+
629
+ # Already correct type?
630
+ try:
631
+ if isinstance(value, T):
632
+ log.debug("COERCE passthrough: value already instance of %r", T)
167
633
  return value
168
634
  except TypeError:
169
- log.debug(f"Could not coerce value '{value}' to type '{typ}'")
170
- pass # `typ` may be a typing construct not valid for isinstance
635
+ log.debug("COERCE isinstance not applicable: T=%r", T)
171
636
 
172
- # Fallback: leave as-is
173
- log.debug(f"Returning '{type(value)}' type")
637
+ log.debug("COERCE fallback: returning original value=%r (type=%s)", value, type(value).__name__)
174
638
  return value
175
639
 
640
+
641
+ # Dict to Resource
642
+ if T is Resource and isinstance(value, dict):
643
+ log.debug("COERCE dict->Resource: %r", value)
644
+ try:
645
+ ret = Resource(uri=value.get("resource"), id=value.get("resourceId"))
646
+ log.debug("COERCE dict->Resource: built %r", ret)
647
+ return ret
648
+ except Exception:
649
+ log.exception("COERCE dict->Resource: failed for %r", value)
650
+ return value
651
+
652
+ # IdentifierList special
653
+ if T is IdentifierList:
654
+ log.debug("COERCE IdentifierList: %r", value)
655
+ try:
656
+ ret = IdentifierList._from_json_(value)
657
+ log.debug("COERCE IdentifierList: built %r", ret)
658
+ return ret
659
+ except Exception:
660
+ log.exception("COERCE IdentifierList: _from_json_ failed for %r", value)
661
+ return value
662
+
663
+ # Containers
664
+ if self._is_list_like(T):
665
+ elem_t = args[0] if args else Any
666
+ log.debug("COERCE list-like: len=%s, elem_t=%r", len(value or []), elem_t)
667
+ try:
668
+ ret = [self._coerce_value(v, elem_t) for v in (value or [])]
669
+ log.debug("COERCE list-like: result sample=%r", ret[:3] if isinstance(ret, list) else ret)
670
+ return ret
671
+ except Exception:
672
+ log.exception("COERCE list-like: failed for value=%r elem_t=%r", value, elem_t)
673
+ return value
674
+
675
+ if self._is_set_like(T):
676
+ elem_t = args[0] if args else Any
677
+ log.debug("COERCE set-like: len=%s, elem_t=%r", len(value or []), elem_t)
678
+ try:
679
+ ret = {self._coerce_value(v, elem_t) for v in (value or [])}
680
+ log.debug("COERCE set-like: result size=%d", len(ret))
681
+ return ret
682
+ except Exception:
683
+ log.exception("COERCE set-like: failed for value=%r elem_t=%r", value, elem_t)
684
+ return value
685
+
686
+ if self._is_tuple_like(T):
687
+ log.debug("COERCE tuple-like: value=%r, args=%r", value, args)
688
+ try:
689
+ if not value:
690
+ log.debug("COERCE tuple-like: empty/None -> ()")
691
+ return tuple(value or ())
692
+ if len(args) == 2 and args[1] is Ellipsis:
693
+ elem_t = args[0]
694
+ ret = tuple(self._coerce_value(v, elem_t) for v in (value or ()))
695
+ log.debug("COERCE tuple-like variadic: size=%d", len(ret))
696
+ return ret
697
+ ret = tuple(self._coerce_value(v, t) for v, t in zip(value, args))
698
+ log.debug("COERCE tuple-like fixed: size=%d", len(ret))
699
+ return ret
700
+ except Exception:
701
+ log.exception("COERCE tuple-like: failed for value=%r args=%r", value, args)
702
+ return value
703
+
704
+ if self._is_dict_like(T):
705
+ k_t = args[0] if len(args) >= 1 else Any
706
+ v_t = args[1] if len(args) >= 2 else Any
707
+ log.debug("COERCE dict-like: keys=%s, k_t=%r, v_t=%r", len((value or {}).keys()), k_t, v_t)
708
+ try:
709
+ ret = {
710
+ self._coerce_value(k, k_t): self._coerce_value(v, v_t)
711
+ for k, v in (value or {}).items()
712
+ }
713
+ log.debug("COERCE dict-like: result size=%d", len(ret))
714
+ return ret
715
+ except Exception:
716
+ log.exception("COERCE dict-like: failed for value=%r k_t=%r v_t=%r", value, k_t, v_t)
717
+ return value
718
+
719
+ # Objects via registry
720
+ if isinstance(T, type) and isinstance(value, dict):
721
+ fields = self.get_class_fields(T.__name__) or {}
722
+ log.debug(
723
+ "COERCE object: class=%s, input_keys=%s, registered_fields=%s",
724
+ T.__name__, list(value.keys()), list(fields.keys())
725
+ )
726
+ if fields:
727
+ kwargs = {}
728
+ present = []
729
+ for fname, ftype in fields.items():
730
+ if fname in value:
731
+ resolved = self._resolve_forward(self._unwrap(ftype))
732
+ log.debug("COERCE object.field: %s.%s -> %r, raw=%r", T.__name__, fname, resolved, value[fname])
733
+ try:
734
+ coerced = self._coerce_value(value[fname], resolved)
735
+ kwargs[fname] = coerced
736
+ present.append(fname)
737
+ log.debug("COERCE object.field: %s.%s coerced -> %r", T.__name__, fname, coerced)
738
+ except Exception:
739
+ log.exception("COERCE object.field: %s.%s failed", T.__name__, fname)
740
+ unknown = [k for k in value.keys() if k not in fields]
741
+ if unknown:
742
+ log.debug("COERCE object: %s unknown keys ignored: %s", T.__name__, unknown)
743
+ try:
744
+ log.debug("COERCE object: instantiate %s(**%s)", T.__name__, present)
745
+ ret = T(**kwargs)
746
+ log.debug("COERCE object: success -> %r", ret)
747
+ return ret
748
+ except TypeError as e:
749
+ log.warning("COERCE object: instantiate %s failed with kwargs=%s: %s", T.__name__, list(kwargs.keys()), e)
750
+ log.debug("COERCE object: returning partially coerced dict")
751
+ return kwargs
752
+
753
+ # Already correct type?
754
+ try:
755
+ if isinstance(value, T):
756
+ log.debug("COERCE passthrough: value already instance of %r", T)
757
+ return value
758
+ except TypeError:
759
+ log.debug("COERCE isinstance not applicable: T=%r", T)
760
+
761
+ log.debug("COERCE fallback: returning original value=%r (type=%s)", value, type(value).__name__)
762
+ return value
763
+
764
+ # -------------------------- TYPE HELPERS --------------------------
765
+
176
766
  @staticmethod
177
- def get_class_fields(cls_name) -> Dict:
178
- from typing import List, Optional
179
- from gedcomx.Attribution import Attribution
180
- from gedcomx.Document import Document , DocumentType, TextType
181
- from gedcomx.Note import Note
182
- from gedcomx.Resource import Resource
183
- from gedcomx.SourceReference import SourceReference
184
- from gedcomx.extensions.rs10.rsLink import _rsLinkList
185
- from gedcomx.Conclusion import ConfidenceLevel
186
- from gedcomx.EvidenceReference import EvidenceReference
187
- from gedcomx.Identifier import IdentifierList
188
- from gedcomx.Gender import Gender, GenderType
189
- from gedcomx.Fact import Fact
190
- from gedcomx.Name import Name
191
- from gedcomx.URI import URI
192
- from gedcomx.Qualifier import Qualifier
193
- from gedcomx.PlaceDescription import PlaceDescription
194
- from gedcomx.PlaceReference import PlaceReference
195
- from gedcomx.Person import Person
196
- from gedcomx.Relationship import Relationship, RelationshipType
197
- from gedcomx.Identifier import Identifier
198
- from gedcomx.Date import Date
199
- from gedcomx.TextValue import TextValue
200
- from gedcomx.Address import Address
201
- from gedcomx.OnlineAccount import OnlineAccount
202
- from gedcomx.Event import Event, EventType, EventRole
203
- from .SourceDescription import SourceDescription
204
-
205
- fields = { 'Conclusion' : {
206
- "id": str,
207
- "lang": str,
208
- "sources": List["SourceReference"],
209
- "analysis": Document | Resource,
210
- "notes": List[Note],
211
- "confidence": ConfidenceLevel,
212
- "attribution": Attribution,
213
- "uri": "Resource",
214
- "max_note_count": int,
215
- "links": _rsLinkList
216
- },
217
- 'Subject' : {
218
- "id": str,
219
- "lang": str,
220
- "sources": List["SourceReference"],
221
- "analysis": Resource,
222
- "notes": List["Note"],
223
- "confidence": ConfidenceLevel,
224
- "attribution": Attribution,
225
- "extracted": bool,
226
- "evidence": List[EvidenceReference],
227
- "media": List[SourceReference],
228
- "identifiers": IdentifierList,
229
- "uri": Resource,
230
- "links": _rsLinkList
231
- },
232
- 'Person' : {
233
- "id": str,
234
- "lang": str,
235
- "sources": List[SourceReference],
236
- "analysis": Resource,
237
- "notes": List[Note],
238
- "confidence": ConfidenceLevel,
239
- "attribution": Attribution,
240
- "extracted": bool,
241
- "evidence": List[EvidenceReference],
242
- "media": List[SourceReference],
243
- "identifiers": IdentifierList,
244
- "private": bool,
245
- "gender": Gender,
246
- "names": List[Name],
247
- "facts": List[Fact],
248
- "living": bool,
249
- "links": _rsLinkList,
250
- 'uri': Resource
251
- },
252
- 'SourceReference' : {
253
- "description": SourceDescription | URI | Resource,
254
- "descriptionId": str,
255
- "attribution": Attribution,
256
- "qualifiers": List[Qualifier],
257
- },
258
- 'Attribution' : {
259
- "contributor": Resource | Attribution,
260
- "modified": str,
261
- "changeMessage": str,
262
- "creator": Resource | Attribution,
263
- "created": str
264
- },
265
- 'Gender' : {
266
- "id": str,
267
- "lang": str,
268
- "sources": List[SourceReference],
269
- "analysis": Resource,
270
- "notes": List[Note],
271
- "confidence": ConfidenceLevel,
272
- "attribution": Attribution,
273
- "type": GenderType,
274
- },
275
- 'PlaceReference' : {
276
- "original": str,
277
- "description": PlaceDescription | URI,
278
- },
279
- 'Relationship' : {
280
- "id": str,
281
- "lang": str,
282
- "sources": List[SourceReference],
283
- "analysis": Document | Resource,
284
- "notes": List[Note],
285
- "confidence": ConfidenceLevel,
286
- "attribution": Attribution,
287
- "extracted": bool,
288
- "evidence": List[EvidenceReference],
289
- "media": List[SourceReference],
290
- "identifiers": IdentifierList,
291
- "type": RelationshipType,
292
- "person1": Person | Resource,
293
- "person2": Person | Resource,
294
- "facts": List[Fact],
295
- },
296
- 'Document' : {
297
- "id": str,
298
- "lang": str,
299
- "sources": List[SourceReference],
300
- "analysis": Resource,
301
- "notes": List[Note],
302
- "confidence": ConfidenceLevel,
303
- "attribution": Attribution,
304
- "type": DocumentType,
305
- "extracted": bool,
306
- "textType": TextType,
307
- "text": str,
308
- },
309
- 'PlaceDescription' : {
310
- "id": str,
311
- "lang": str,
312
- "sources": List[SourceReference],
313
- "analysis": Resource,
314
- "notes": List[Note],
315
- "confidence": ConfidenceLevel,
316
- "attribution": Attribution,
317
- "extracted": bool,
318
- "evidence": List[EvidenceReference],
319
- "media": List[SourceReference],
320
- "identifiers": List[IdentifierList],
321
- "names": List[TextValue],
322
- "type": str,
323
- "place": URI,
324
- "jurisdiction": Resource | PlaceDescription,
325
- "latitude": float,
326
- "longitude": float,
327
- "temporalDescription": Date,
328
- "spatialDescription": Resource,
329
- },
330
- "Agent" : {
331
- "id": str,
332
- "identifiers": IdentifierList,
333
- "names": List[TextValue],
334
- "homepage": URI,
335
- "openid": URI,
336
- "accounts": List[OnlineAccount],
337
- "emails": List[URI],
338
- "phones": List[URI],
339
- "addresses": List[Address],
340
- "person": object | Resource, # intended to be Person | Resource
341
- # "xnotes": List[Note], # commented out in your __init__
342
- "attribution": object, # for GEDCOM5/7 compatibility
343
- "uri": URI | Resource,
344
- },
345
- 'Event' : {
346
- "id": str,
347
- "lang": str,
348
- "sources": List[SourceReference],
349
- "analysis": Resource,
350
- "notes": List[Note],
351
- "confidence": ConfidenceLevel,
352
- "attribution": Attribution,
353
- "extracted": bool,
354
- "evidence": List[EvidenceReference],
355
- "media": List[SourceReference],
356
- "identifiers": List[Identifier],
357
- "type": EventType,
358
- "date": Date,
359
- "place": PlaceReference,
360
- "roles": List[EventRole],
361
- }
362
-
363
- }
364
-
365
-
366
- return fields[cls_name] if cls_name in fields else {}
767
+ @lru_cache(maxsize=None)
768
+ def _unwrap(T: Any) -> Any:
769
+ origin = get_origin(T)
770
+ if origin is None:
771
+ return T
772
+ if str(origin).endswith("Annotated"):
773
+ args = get_args(T)
774
+ return Serialization._unwrap(args[0]) if args else Any
775
+ if origin in (Union, types.UnionType):
776
+ args = tuple(a for a in get_args(T) if a is not type(None))
777
+ return Serialization._unwrap(args[0]) if len(args) == 1 else tuple(Serialization._unwrap(a) for a in args)
778
+ return T
367
779
 
368
780
  @staticmethod
369
- def deserialize(data: dict[str, Any], class_type) -> Any:
370
- """
371
- Deserialize `data` according to `fields` (field -> type).
372
- - Primitives are assigned directly.
373
- - Objects use `type._from_json_(dict)` when present.
374
- - Lists/Sets/Tuples/Dicts are recursively processed.
375
- Returns (result, unknown_keys).
376
- """
377
- log.debug(f"Deserializing '{data}' into '{class_type.__name__}'")
378
- class_fields = Serialization.get_class_fields(str(class_type.__name__))
379
- if class_fields == {}:
380
- log.warning(f"No class fields found for '{class_type.__name__}'")
381
- log.debug(f"class fields: {class_fields}")
382
- result: dict[str, Any] = {}
383
- known = set(class_fields.keys())
384
- log.debug(f"keys found in JSON: {data.keys()}")
385
- #log.debug(f"known fields: {known}")
386
- for name, typ in class_fields.items():
387
- if name in data:
388
- log.debug(f"Field '{name}' of {class_type.__name__} found in data")
389
- result[name] = Serialization._coerce_value(data[name], typ)
390
- #if type(result[name]) != class_fields[name]:# TODO Write better type checking
391
- # log.error(f"Field '{name}' of {class_type.__name__} was expected to be of type '{class_fields[name]}', but got '{type(result[name])}' with value '{result[name]}'")
392
- # raise TypeError(f"Field '{name}' expected type '{class_fields[name]}', got '{type(result[name])}'")
393
- log.debug(f"Field '{name}' of '{class_type.__name__}' resulted in a '{type(result[name]).__name__}' with value '{result[name]}'")
394
- else:
395
- log.debug(f"Field '{name}' of '{class_type.__name__}' not found in JSON data")
396
-
397
- unknown_keys = [k for k in data.keys() if k not in known]
398
- log.info(f"Creating instance of {class_type.__name__} with fields: {result.keys()}")
399
- new_cls = class_type(**result)
400
- log.debug(f"Deserialized {class_type.__name__} with unknown keys: {unknown_keys}")
401
- return new_cls # type: ignore, unknown_keys
781
+ @lru_cache(maxsize=None)
782
+ def _resolve_forward(T: Any) -> Any:
783
+ if isinstance(T, ForwardRef):
784
+ return globals().get(T.__forward_arg__, T)
785
+ if isinstance(T, str):
786
+ return globals().get(T, T)
787
+ return T
788
+
789
+ @staticmethod
790
+ @lru_cache(maxsize=None)
791
+ def _is_enum_type(T: Any) -> bool:
792
+ U = Serialization._resolve_forward(Serialization._unwrap(T))
793
+ try:
794
+ return isinstance(U, type) and issubclass(U, enum.Enum)
795
+ except TypeError:
796
+ return False
797
+
798
+ @staticmethod
799
+ def _is_list_like(T: Any) -> bool:
800
+ origin = get_origin(T) or T
801
+ return origin in (list, List)
802
+
803
+ @staticmethod
804
+ def _is_set_like(T: Any) -> bool:
805
+ origin = get_origin(T) or T
806
+ return origin in (set, Set)
807
+
808
+ @staticmethod
809
+ def _is_tuple_like(T: Any) -> bool:
810
+ origin = get_origin(T) or T
811
+ return origin in (tuple, Tuple)
812
+
813
+ @staticmethod
814
+ def _is_dict_like(T: Any) -> bool:
815
+ origin = get_origin(T) or T
816
+ return origin in (dict, Dict)