gedcom-x 0.5.7__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gedcom_x-0.5.7.dist-info → gedcom_x-0.5.9.dist-info}/METADATA +1 -1
- gedcom_x-0.5.9.dist-info/RECORD +56 -0
- gedcomx/Extensions/rs10/rsLink.py +110 -60
- gedcomx/TopLevelTypeCollection.py +1 -1
- gedcomx/__init__.py +43 -42
- gedcomx/address.py +217 -0
- gedcomx/{Agent.py → agent.py} +107 -34
- gedcomx/attribution.py +115 -0
- gedcomx/{Conclusion.py → conclusion.py} +120 -51
- gedcomx/{Converter.py → converter.py} +261 -116
- gedcomx/coverage.py +64 -0
- gedcomx/{Date.py → date.py} +43 -9
- gedcomx/{Document.py → document.py} +60 -12
- gedcomx/{Event.py → event.py} +88 -31
- gedcomx/evidence_reference.py +20 -0
- gedcomx/{Fact.py → fact.py} +81 -74
- gedcomx/{Gedcom.py → gedcom.py} +10 -0
- gedcomx/{Gedcom5x.py → gedcom5x.py} +31 -21
- gedcomx/gedcom7/Exceptions.py +9 -0
- gedcomx/gedcom7/GedcomStructure.py +94 -0
- gedcomx/gedcom7/Specification.py +347 -0
- gedcomx/gedcom7/__init__.py +26 -0
- gedcomx/gedcom7/g7interop.py +205 -0
- gedcomx/gedcom7/gedcom7.py +160 -0
- gedcomx/gedcom7/logger.py +19 -0
- gedcomx/{GedcomX.py → gedcomx.py} +109 -106
- gedcomx/gender.py +91 -0
- gedcomx/group.py +72 -0
- gedcomx/{Identifier.py → identifier.py} +48 -21
- gedcomx/{LoggingHub.py → logging_hub.py} +19 -0
- gedcomx/{Mutations.py → mutations.py} +59 -30
- gedcomx/{Name.py → name.py} +88 -47
- gedcomx/note.py +105 -0
- gedcomx/online_account.py +19 -0
- gedcomx/{Person.py → person.py} +61 -41
- gedcomx/{PlaceDescription.py → place_description.py} +71 -23
- gedcomx/{PlaceReference.py → place_reference.py} +32 -10
- gedcomx/{Qualifier.py → qualifier.py} +20 -4
- gedcomx/relationship.py +156 -0
- gedcomx/resource.py +112 -0
- gedcomx/serialization.py +794 -0
- gedcomx/source_citation.py +37 -0
- gedcomx/source_description.py +401 -0
- gedcomx/{SourceReference.py → source_reference.py} +56 -21
- gedcomx/subject.py +122 -0
- gedcomx/textvalue.py +89 -0
- gedcomx/{Translation.py → translation.py} +4 -4
- gedcomx/uri.py +273 -0
- gedcom_x-0.5.7.dist-info/RECORD +0 -49
- gedcomx/Address.py +0 -131
- gedcomx/Attribution.py +0 -91
- gedcomx/Coverage.py +0 -37
- gedcomx/EvidenceReference.py +0 -11
- gedcomx/Gender.py +0 -65
- gedcomx/Group.py +0 -37
- gedcomx/Note.py +0 -73
- gedcomx/OnlineAccount.py +0 -10
- gedcomx/Relationship.py +0 -97
- gedcomx/Resource.py +0 -85
- gedcomx/Serialization.py +0 -816
- gedcomx/SourceCitation.py +0 -25
- gedcomx/SourceDescription.py +0 -314
- gedcomx/Subject.py +0 -59
- gedcomx/TextValue.py +0 -35
- gedcomx/URI.py +0 -105
- {gedcom_x-0.5.7.dist-info → gedcom_x-0.5.9.dist-info}/WHEEL +0 -0
- {gedcom_x-0.5.7.dist-info → gedcom_x-0.5.9.dist-info}/top_level.txt +0 -0
- /gedcomx/{Exceptions.py → exceptions.py} +0 -0
- /gedcomx/{ExtensibleEnum.py → extensible_enum.py} +0 -0
@@ -1,52 +1,86 @@
|
|
1
1
|
DEBUG = False
|
2
|
-
import
|
3
|
-
import json
|
2
|
+
import logging
|
4
3
|
import mimetypes
|
5
4
|
import re
|
6
|
-
import uuid
|
7
5
|
import xml.etree.ElementTree as ET
|
8
6
|
|
9
|
-
from typing import
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
from .
|
34
|
-
from .
|
35
|
-
from .
|
36
|
-
from .
|
37
|
-
from .
|
38
|
-
from .
|
39
|
-
from .
|
40
|
-
|
41
|
-
from .
|
42
|
-
from .
|
43
|
-
from .
|
7
|
+
from typing import Any, Mapping
|
8
|
+
import math
|
9
|
+
import shutil
|
10
|
+
|
11
|
+
"""
|
12
|
+
======================================================================
|
13
|
+
Project: Gedcom-X
|
14
|
+
File: converter.py
|
15
|
+
Author: David J. Cartwright
|
16
|
+
Purpose: convert gedcom versions
|
17
|
+
|
18
|
+
Created: 2025-08-25
|
19
|
+
Updated:
|
20
|
+
- 2025-09-01: added docs and fixed imports for lowercase module names
|
21
|
+
|
22
|
+
======================================================================
|
23
|
+
"""
|
24
|
+
|
25
|
+
"""
|
26
|
+
======================================================================
|
27
|
+
GEDCOM Module Types
|
28
|
+
======================================================================
|
29
|
+
"""
|
30
|
+
|
31
|
+
from .address import Address
|
32
|
+
from .agent import Agent
|
33
|
+
from .attribution import Attribution
|
34
|
+
from .conclusion import Conclusion
|
35
|
+
from .coverage import Coverage
|
36
|
+
from .date import Date, date_to_timestamp
|
37
|
+
from .document import Document
|
38
|
+
from .evidence_reference import EvidenceReference
|
39
|
+
from .exceptions import TagConversionError
|
40
|
+
from .event import Event, EventType, EventRole, EventRoleType
|
41
|
+
from .fact import Fact, FactType, FactQualifier
|
42
|
+
from .gedcom import Gedcom
|
43
|
+
from .gedcom5x import Gedcom5x, Gedcom5xRecord
|
44
|
+
from .gedcomx import GedcomX
|
45
|
+
from .gender import Gender, GenderType
|
46
|
+
from .group import Group
|
47
|
+
from .identifier import Identifier, IdentifierType, make_uid, IdentifierList
|
48
|
+
from .logging_hub import hub, ChannelConfig
|
49
|
+
from .name import Name, NameType, NameForm, NamePart, NamePartType, NamePartQualifier
|
50
|
+
from .note import Note
|
51
|
+
from .online_account import OnlineAccount
|
52
|
+
from .person import Person
|
53
|
+
from .place_description import PlaceDescription
|
54
|
+
from .place_reference import PlaceReference
|
55
|
+
from .qualifier import Qualifier
|
56
|
+
from .relationship import Relationship, RelationshipType
|
57
|
+
from .resource import Resource
|
58
|
+
from .source_citation import SourceCitation
|
59
|
+
from .source_description import SourceDescription, ResourceType
|
60
|
+
from .source_reference import SourceReference, KnownSourceReference
|
61
|
+
# from .subject import Subject
|
62
|
+
from .textvalue import TextValue
|
63
|
+
#from .topleveltypecollection import TopLevelTypeCollection
|
64
|
+
from .uri import URI
|
65
|
+
from .logging_hub import hub, logging
|
66
|
+
"""
|
67
|
+
======================================================================
|
68
|
+
Logging
|
69
|
+
======================================================================
|
70
|
+
"""
|
71
|
+
log = logging.getLogger("gedcomx")
|
72
|
+
serial_log = "gedcomx.serialization"
|
73
|
+
convert_log = "gedcomx.convert.GEDCOM5x"
|
74
|
+
#=====================================================================
|
44
75
|
|
76
|
+
hub.start_channel(
|
77
|
+
ChannelConfig(
|
78
|
+
name=convert_log,
|
79
|
+
path=f"logs/{convert_log}.log",
|
80
|
+
level=logging.DEBUG,
|
81
|
+
rotation="size:10MB:3", # rotate by size, keep 3 backups
|
82
|
+
))
|
45
83
|
|
46
|
-
import logging
|
47
|
-
from .LoggingHub import hub, ChannelConfig
|
48
|
-
log = logging.getLogger("gedcomx")
|
49
|
-
job_id = "gedcomx.convert.GEDCOM5x"
|
50
84
|
|
51
85
|
class GedcomConverter():
|
52
86
|
def __init__(self) -> None:
|
@@ -54,6 +88,9 @@ class GedcomConverter():
|
|
54
88
|
self.object_map: dict[Any, Any] = {-1:self.gedcomx}
|
55
89
|
self.missing_handler_count = {}
|
56
90
|
|
91
|
+
type_name_type = {
|
92
|
+
'aka': NameType.AlsoKnownAs
|
93
|
+
}
|
57
94
|
gedcom_even_to_fact = {
|
58
95
|
# Person Fact Types
|
59
96
|
"ADOP": FactType.Adoption,
|
@@ -142,6 +179,9 @@ class GedcomConverter():
|
|
142
179
|
|
143
180
|
}
|
144
181
|
|
182
|
+
@property
|
183
|
+
def ignored_tags(self):
|
184
|
+
return self.missing_handler_count if self.missing_handler_count != {} else None
|
145
185
|
|
146
186
|
def clean_str(self, text: str | None) -> str:
|
147
187
|
# Regular expression to match HTML/XML tags
|
@@ -151,15 +191,19 @@ class GedcomConverter():
|
|
151
191
|
|
152
192
|
return clean_text
|
153
193
|
|
154
|
-
def
|
155
|
-
if record:
|
156
|
-
with hub.use(
|
194
|
+
def parse_gedcom5x_record(self,record: Gedcom5xRecord):
|
195
|
+
if record is not None:
|
196
|
+
with hub.use(convert_log):
|
157
197
|
handler_name = 'handle_' + record.tag.lower()
|
158
198
|
|
159
199
|
if hasattr(self,handler_name):
|
160
|
-
log.info(f'
|
200
|
+
log.info(f'Using {handler_name} to pars Record: {record.describe()}')
|
161
201
|
handler = getattr(self,handler_name)
|
162
202
|
handler(record)
|
203
|
+
if record.tag != 'FAM':
|
204
|
+
for sub_record in record.subRecords():
|
205
|
+
log.debug(sub_record.describe())
|
206
|
+
self.parse_gedcom5x_record(sub_record)
|
163
207
|
else:
|
164
208
|
if record.tag in self.missing_handler_count:
|
165
209
|
self.missing_handler_count[record.tag] += 1
|
@@ -167,14 +211,16 @@ class GedcomConverter():
|
|
167
211
|
self.missing_handler_count[record.tag] = 1
|
168
212
|
|
169
213
|
log.error(f'Failed Parsing Record: {record.describe()}')
|
170
|
-
|
171
|
-
|
214
|
+
log.debug(f"{record.tag} with id: {record.xref} has {len(record.subRecords())} subRecords")
|
215
|
+
|
216
|
+
else:
|
217
|
+
assert False
|
172
218
|
|
173
219
|
def handle__apid(self, record: Gedcom5xRecord):
|
174
220
|
if isinstance(self.object_map[record.level-1], SourceReference):
|
175
|
-
self.object_map[record.level-1].description.add_identifier(Identifier(value=[URI.from_url('APID://' + record.value if record.value else '')]))
|
221
|
+
self.object_map[record.level-1].description.add_identifier(Identifier(type=IdentifierType.Other, value=[URI.from_url('APID://' + record.value if record.value else '')]))
|
176
222
|
elif isinstance(self.object_map[record.level-1], SourceDescription):
|
177
|
-
self.object_map[record.level-1].add_identifier(Identifier(value=[URI.from_url('APID://' + record.value if record.value else '')]))
|
223
|
+
self.object_map[record.level-1].add_identifier(Identifier(type=IdentifierType.Other,value=[URI.from_url('APID://' + record.value if record.value else '')]))
|
178
224
|
else:
|
179
225
|
raise ValueError(f"Could not handle '_APID' tag in record {record.describe()}, last stack object {type(self.object_map[record.level-1])}")
|
180
226
|
|
@@ -322,9 +368,9 @@ class GedcomConverter():
|
|
322
368
|
|
323
369
|
def handle_caln(self, record: Gedcom5xRecord):
|
324
370
|
if isinstance(self.object_map[record.level-1], SourceReference):
|
325
|
-
self.object_map[record.level-1].description.add_identifier(Identifier(value=[URI.from_url('Call Number:' + record.value if record.value else '')]))
|
371
|
+
self.object_map[record.level-1].description.add_identifier(Identifier(type=IdentifierType.Other,value=[URI.from_url('Call Number:' + record.value if record.value else '')]))
|
326
372
|
elif isinstance(self.object_map[record.level-1], SourceDescription):
|
327
|
-
self.object_map[record.level-1].add_identifier(Identifier(value=[URI.from_url('Call Number:' + record.value if record.value else '')]))
|
373
|
+
self.object_map[record.level-1].add_identifier(Identifier(type=IdentifierType.Other,value=[URI.from_url('Call Number:' + record.value if record.value else '')]))
|
328
374
|
elif isinstance(self.object_map[record.level-1], Agent):
|
329
375
|
pass
|
330
376
|
# TODO Why is GEDCOM so shitty? A callnumber for a repository?
|
@@ -508,7 +554,11 @@ class GedcomConverter():
|
|
508
554
|
|
509
555
|
self.object_map[record.level] = gxobject
|
510
556
|
else:
|
511
|
-
|
557
|
+
log.warning(f"EVEN type is not known {record.describe()}")
|
558
|
+
gxobject = Event(roles=[EventRole(person=self.object_map[record.level],type=EventRoleType.Principal)])
|
559
|
+
self.gedcomx.add_event(gxobject)
|
560
|
+
self.object_map[record.level] = gxobject
|
561
|
+
|
512
562
|
|
513
563
|
else:
|
514
564
|
possible_fact = FactType.guess(record.subRecord('TYPE')[0].value)
|
@@ -540,7 +590,7 @@ class GedcomConverter():
|
|
540
590
|
|
541
591
|
def handle_exid(self,record: Gedcom5xRecord):
|
542
592
|
if record.value:
|
543
|
-
gxobject = Identifier(type=IdentifierType.External,value=[URI(record.value) if record.value else URI()]) # type: ignore
|
593
|
+
gxobject = Identifier(type=IdentifierType.External,value=[URI._from_json_(record.value) if record.value else URI()]) # type: ignore
|
544
594
|
self.object_map[record.level-1].add_identifier(gxobject)
|
545
595
|
self.object_map[record.level] = gxobject
|
546
596
|
else: raise ValueError('Record had no value')
|
@@ -583,7 +633,11 @@ class GedcomConverter():
|
|
583
633
|
if husband and wife:
|
584
634
|
relationship = Relationship(person1=husband, person2=wife, type=RelationshipType.Couple)
|
585
635
|
self.gedcomx.add_relationship(relationship)
|
586
|
-
|
636
|
+
self.object_map[record.level] = relationship
|
637
|
+
|
638
|
+
if (marr_record := record.subRecord('MARR')) is not None:
|
639
|
+
self.handle_marr(marr_record[0])
|
640
|
+
|
587
641
|
def handle_famc(self, record: Gedcom5xRecord) -> None:
|
588
642
|
return
|
589
643
|
|
@@ -606,7 +660,7 @@ class GedcomConverter():
|
|
606
660
|
if mime_type:
|
607
661
|
self.object_map[record.level-2].mediaType = mime_type
|
608
662
|
else:
|
609
|
-
|
663
|
+
log.error(f"Could not determing mime type from {record.value}")
|
610
664
|
elif isinstance(self.object_map[record.level-1], PlaceDescription):
|
611
665
|
self.object_map[record.level-1].names.append(TextValue(value=record.value))
|
612
666
|
elif record.parent is not None and record.parent.tag == 'TRAN':
|
@@ -626,10 +680,10 @@ class GedcomConverter():
|
|
626
680
|
if person is None:
|
627
681
|
log.warning('Had to create person with id {recrod.xref}')
|
628
682
|
if isinstance(record.xref,str):
|
629
|
-
person = Person(id=record.xref
|
683
|
+
person = Person(id=record.xref)
|
684
|
+
self.gedcomx.add_person(person)
|
630
685
|
else:
|
631
|
-
raise ValueError('INDI Record had no XREF')
|
632
|
-
self.gedcomx.add_person(person)
|
686
|
+
raise ValueError('INDI Record had no XREF')
|
633
687
|
self.object_map[record.level] = person
|
634
688
|
|
635
689
|
def handle_immi(self, record: Gedcom5xRecord):
|
@@ -643,15 +697,33 @@ class GedcomConverter():
|
|
643
697
|
raise TagConversionError(record=record,levelstack=self.object_map)
|
644
698
|
|
645
699
|
def handle_marr(self, record: Gedcom5xRecord):
|
700
|
+
"""
|
646
701
|
if isinstance(self.object_map[record.level-1], Person):
|
647
702
|
gxobject = Fact(type=FactType.Marriage)
|
648
703
|
self.object_map[record.level-1].add_fact(gxobject)
|
649
704
|
|
650
705
|
|
651
706
|
self.object_map[record.level] = gxobject
|
707
|
+
"""
|
708
|
+
if (add_fact := getattr(self.object_map[record.level-1],'add_fact',None)) is not None:
|
709
|
+
gxobject = Fact(type=FactType.Marriage)
|
710
|
+
add_fact(gxobject)
|
711
|
+
self.object_map[record.level] = gxobject
|
652
712
|
else:
|
653
713
|
raise TagConversionError(record=record,levelstack=self.object_map)
|
654
714
|
|
715
|
+
def handle__link(self,record: Gedcom5xRecord):
|
716
|
+
if isinstance(self.object_map[record.level-1], SourceReference):
|
717
|
+
gxobject = Identifier([URI.from_url(record.value)],IdentifierType.External)
|
718
|
+
self.object_map[record.level-1].description.add_identifier(gxobject)
|
719
|
+
self.object_map[record.level] = gxobject
|
720
|
+
|
721
|
+
def handle__milt(self, record: Gedcom5xRecord):
|
722
|
+
if isinstance(self.object_map[record.level-1], Person):
|
723
|
+
gxobject = Fact(type=FactType.MilitaryService)
|
724
|
+
self.object_map[record.level-1].add_fact(gxobject)
|
725
|
+
self.object_map[record.level] = gxobject
|
726
|
+
|
655
727
|
def handle_name(self, record: Gedcom5xRecord):
|
656
728
|
if isinstance(self.object_map[record.level-1], Person):
|
657
729
|
gxobject = Name.simple(record.value if record.value else 'WARNING: NAME had no value')
|
@@ -661,8 +733,10 @@ class GedcomConverter():
|
|
661
733
|
|
662
734
|
self.object_map[record.level] = gxobject
|
663
735
|
elif isinstance(self.object_map[record.level-1], Agent):
|
736
|
+
|
664
737
|
gxobject = TextValue(value=record.value)
|
665
738
|
self.object_map[record.level-1].add_name(gxobject)
|
739
|
+
|
666
740
|
else:
|
667
741
|
raise TagConversionError(record=record,levelstack=self.object_map)
|
668
742
|
|
@@ -674,11 +748,15 @@ class GedcomConverter():
|
|
674
748
|
|
675
749
|
self.object_map[record.level] = gxobject
|
676
750
|
elif isinstance(self.object_map[record.level-1], SourceReference):
|
677
|
-
|
678
|
-
|
751
|
+
if self.object_map[record.level-1].description is not None:
|
752
|
+
gxobject = Note(text=self.clean_str(record.value))
|
753
|
+
self.object_map[record.level-1].description.add_note(gxobject)
|
754
|
+
self.object_map[record.level] = gxobject
|
755
|
+
else:
|
756
|
+
log.error('SourceReference does not have description')
|
679
757
|
|
680
758
|
|
681
|
-
|
759
|
+
|
682
760
|
elif isinstance(self.object_map[record.level-1], Conclusion):
|
683
761
|
gxobject = Note(text=record.value)
|
684
762
|
self.object_map[record.level-1].add_note(gxobject)
|
@@ -729,12 +807,12 @@ class GedcomConverter():
|
|
729
807
|
|
730
808
|
def handle_page(self, record: Gedcom5xRecord):
|
731
809
|
if isinstance(self.object_map[record.level-1], SourceReference):
|
732
|
-
self.object_map[record.level-1].descriptionId = record.value
|
810
|
+
#self.object_map[record.level-1].descriptionId = record.value
|
733
811
|
gx_object = KnownSourceReference(name=KnownSourceReference.Page,value=record.value)
|
734
812
|
self.object_map[record.level-1].add_qualifier(gx_object)
|
735
813
|
self.object_map[record.level] = self.object_map[record.level-1]
|
736
814
|
else:
|
737
|
-
raise ValueError(f"Could not handle 'PAGE' tag in record {record.describe()},
|
815
|
+
raise ValueError(f"Could not handle 'PAGE' tag in record {record.describe()}, object stack {self.object_map}")
|
738
816
|
|
739
817
|
def handle_plac(self, record: Gedcom5xRecord):
|
740
818
|
if isinstance(self.object_map[record.level-1], Agent):
|
@@ -822,9 +900,9 @@ class GedcomConverter():
|
|
822
900
|
def handle_repo(self, record: Gedcom5xRecord):
|
823
901
|
|
824
902
|
if record.level == 0:
|
825
|
-
|
826
|
-
|
827
|
-
|
903
|
+
if (gxobject := self.gedcomx.agents.byId(id=record.xref)) is None:
|
904
|
+
gxobject = Agent(id=record.xref)
|
905
|
+
self.gedcomx.add_agent(gxobject)
|
828
906
|
|
829
907
|
self.object_map[record.level] = gxobject
|
830
908
|
|
@@ -837,7 +915,6 @@ class GedcomConverter():
|
|
837
915
|
self.object_map[record.level] = gxobject
|
838
916
|
|
839
917
|
else:
|
840
|
-
print("handle_repo",record.describe())
|
841
918
|
raise ValueError()
|
842
919
|
gxobject = Agent(names=[TextValue(record.value)])
|
843
920
|
else:
|
@@ -884,30 +961,39 @@ class GedcomConverter():
|
|
884
961
|
assert False
|
885
962
|
|
886
963
|
def handle_sour(self, record: Gedcom5xRecord):
|
887
|
-
if record.level == 0
|
888
|
-
source_description = SourceDescription(id=record.xref.replace('@','') if record.xref else None)
|
889
|
-
self.gedcomx.add_source_description(source_description)
|
964
|
+
if record.level == 0 and (record.tag in ['SOUR','OBJE','_WLNK']):
|
890
965
|
|
891
|
-
self.
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
return False
|
897
|
-
if self.gedcomx.source_descriptions.byId(record.xref):
|
898
|
-
gxobject = SourceReference(descriptionId=record.xref, description=self.gedcomx.source_descriptions.byId(record.xref))
|
966
|
+
if (gxobject := self.gedcomx.sourceDescriptions.byId(record.xref)) is None:
|
967
|
+
log.debug(f"SourceDescription with id: {record.xref} was not found. Creating a new SourceDescription")
|
968
|
+
log.debug(f"Creating SourceDescription from {record.tag} {record.describe()}")
|
969
|
+
gxobject = SourceDescription(id=record.xref)
|
970
|
+
self.object_map[record.level-1].add_source_description(gxobject)
|
899
971
|
else:
|
900
|
-
log.
|
901
|
-
|
902
|
-
|
903
|
-
if
|
904
|
-
|
905
|
-
|
906
|
-
|
972
|
+
log.debug(f"Found SourceDescription with id:{record.xref}")
|
973
|
+
|
974
|
+
elif (add_method := getattr(self.object_map[record.level-1],"add_source_reference",None)) is not None:
|
975
|
+
if (source_description := self.gedcomx.sourceDescriptions.byId(record.xref)) is not None:
|
976
|
+
gxobject = SourceReference(descriptionId=record.xref, description=source_description)
|
977
|
+
add_method(gxobject)
|
978
|
+
else:
|
979
|
+
log.error(f"Could not find source with id: {record.xref}, Creating Place Holder Description")
|
980
|
+
gxobject = SourceDescription(id=record.xref)
|
981
|
+
gxobject._place_holder = True
|
982
|
+
gxobject = SourceReference(descriptionId=record.xref, description=gxobject)
|
983
|
+
|
984
|
+
elif record.tag == 'OBJE' and isinstance(self.object_map[record.level-1],SourceReference):
|
985
|
+
if (source_description := self.gedcomx.sourceDescriptions.byId(record.xref)) is not None:
|
986
|
+
gxobject = SourceReference(descriptionId=record.xref, description=source_description)
|
987
|
+
self.object_map[record.level-1].description.add_source_reference(gxobject)
|
907
988
|
else:
|
908
|
-
|
989
|
+
assert False
|
990
|
+
|
991
|
+
else:
|
992
|
+
print(record.describe())
|
993
|
+
print(self.object_map)
|
994
|
+
assert False
|
909
995
|
|
910
|
-
|
996
|
+
self.object_map[record.level] = gxobject
|
911
997
|
|
912
998
|
def handle_stae(self, record: Gedcom5xRecord):
|
913
999
|
if isinstance(self.object_map[record.level-1], Address):
|
@@ -940,8 +1026,6 @@ class GedcomConverter():
|
|
940
1026
|
|
941
1027
|
gxobject = TextValue(value=self.clean_str(record.value))
|
942
1028
|
self.object_map[record.level-1].add_title(gxobject)
|
943
|
-
|
944
|
-
|
945
1029
|
self.object_map[record.level] = gxobject
|
946
1030
|
|
947
1031
|
elif record.parent is not None and record.parent.tag == 'FILE' and isinstance(self.object_map[record.level-2], SourceDescription):
|
@@ -967,8 +1051,7 @@ class GedcomConverter():
|
|
967
1051
|
self.object_map[record.level-1].type = EventType.guess(record.value)
|
968
1052
|
else:
|
969
1053
|
log.warning(f"Could not determine type of event with value '{record.value}'")
|
970
|
-
|
971
|
-
self.object_map[record.level-1].type = None
|
1054
|
+
# add as a note anyway, guess works of text in the string
|
972
1055
|
self.object_map[record.level-1].add_note(Note(text=self.clean_str(record.value)))
|
973
1056
|
elif isinstance(self.object_map[record.level-1], Fact):
|
974
1057
|
if not self.object_map[record.level-1].type:
|
@@ -981,9 +1064,11 @@ class GedcomConverter():
|
|
981
1064
|
elif record.parent is not None and record.parent.tag == 'FORM':
|
982
1065
|
if not self.object_map[0].mediaType:
|
983
1066
|
self.object_map[0].mediaType = record.value
|
1067
|
+
elif isinstance(self.object_map[record.level-1], Name):
|
1068
|
+
self.object_map[record.level-1].type = GedcomConverter.type_name_type.get(record.value,NameType.Other)
|
984
1069
|
|
985
1070
|
else:
|
986
|
-
raise
|
1071
|
+
raise TagConversionError(record,self.object_map)
|
987
1072
|
|
988
1073
|
def handle__url(self, record: Gedcom5xRecord):
|
989
1074
|
if isinstance(self.object_map[record.level-2], SourceDescription):
|
@@ -999,6 +1084,50 @@ class GedcomConverter():
|
|
999
1084
|
else:
|
1000
1085
|
raise ValueError(f"Could not handle 'WWW' tag in record {record.describe()}, last stack object {self.object_map[record.level-1]}")
|
1001
1086
|
|
1087
|
+
def parse_gedcom5x_fam_record(self, record: Gedcom5xRecord):
|
1088
|
+
log.info(f"Parsing family recrods")
|
1089
|
+
with open('./logs/gedcomx.convert.families.json', 'a') as f:
|
1090
|
+
for fam in record._flatten_subrecords(record):
|
1091
|
+
f.write(fam.describe() + "\n")
|
1092
|
+
|
1093
|
+
def print_counts_table(self, counts: Mapping[Any, int]) -> None:
|
1094
|
+
"""
|
1095
|
+
Pretty-print {key: int} as columns, largest count first.
|
1096
|
+
Column count adapts to terminal width and number of items.
|
1097
|
+
"""
|
1098
|
+
items = [(str(k), int(v)) for k, v in counts.items()]
|
1099
|
+
if not items:
|
1100
|
+
print("(empty)")
|
1101
|
+
return
|
1102
|
+
|
1103
|
+
# Sort: by value desc, then key asc for stable ordering
|
1104
|
+
items.sort(key=lambda kv: (-kv[1], kv[0]))
|
1105
|
+
|
1106
|
+
# Cell formatting widths
|
1107
|
+
key_w = max(len(k) for k, _ in items)
|
1108
|
+
num_w = max(len(str(v)) for _, v in items)
|
1109
|
+
cell_fmt = f"{{k:<{key_w}}} {{v:>{num_w}}}" # e.g., 'Surname 123'
|
1110
|
+
cell_width = key_w + 2 + num_w + 2 # +2 padding between columns
|
1111
|
+
|
1112
|
+
# Decide number of columns: fit to terminal, but also scale with item count
|
1113
|
+
term_cols = shutil.get_terminal_size(fallback=(100, 24)).columns
|
1114
|
+
fit_cols = max(1, term_cols // cell_width)
|
1115
|
+
sqrt_cols = max(1, int(math.sqrt(len(items)))) # more cols when many items
|
1116
|
+
cols = max(1, min(len(items), max(fit_cols, sqrt_cols)))
|
1117
|
+
|
1118
|
+
rows = math.ceil(len(items) / cols)
|
1119
|
+
|
1120
|
+
# Print row-wise, reading items column-major so columns stay balanced
|
1121
|
+
for r in range(rows):
|
1122
|
+
line = []
|
1123
|
+
for c in range(cols):
|
1124
|
+
i = c * rows + r
|
1125
|
+
if i < len(items):
|
1126
|
+
k, v = items[i]
|
1127
|
+
cell = cell_fmt.format(k=k, v=v)
|
1128
|
+
line.append(cell.ljust(cell_width))
|
1129
|
+
print("".join(line).rstrip())
|
1130
|
+
|
1002
1131
|
def Gedcom5x_GedcomX(self, gedcom5x: Gedcom5x):
|
1003
1132
|
print(f'Parsing GEDCOM Version {gedcom5x.version}')
|
1004
1133
|
individual_ids = set()
|
@@ -1006,29 +1135,45 @@ class GedcomConverter():
|
|
1006
1135
|
repository_ids = set()
|
1007
1136
|
family_ids = set()
|
1008
1137
|
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1138
|
+
with hub.use(convert_log):
|
1139
|
+
if gedcom5x:
|
1140
|
+
for object in gedcom5x.objects:
|
1141
|
+
log.debug(f"Priming Source id's with id: {object.xref} from objects")
|
1142
|
+
source_ids.add(object.xref)
|
1143
|
+
gx_obj = SourceDescription(id=object.xref)
|
1144
|
+
self.gedcomx.add_source_description(gx_obj)
|
1145
|
+
|
1146
|
+
for source in gedcom5x.sources:
|
1147
|
+
source_ids.add(source.xref)
|
1148
|
+
gx_obj = SourceDescription(id=source.xref)
|
1149
|
+
self.gedcomx.add_source_description(gx_obj)
|
1150
|
+
|
1151
|
+
for repo in gedcom5x.repositories:
|
1152
|
+
repository_ids.add(repo.xref)
|
1153
|
+
gx_obj = Agent(id=repo.xref)
|
1154
|
+
self.gedcomx.add_agent(gx_obj)
|
1155
|
+
|
1156
|
+
for individual in gedcom5x.individuals:
|
1157
|
+
individual_ids.add(individual.xref)
|
1158
|
+
gx_obj = Person(id=individual.xref)
|
1159
|
+
self.gedcomx.add_person(gx_obj)
|
1014
1160
|
|
1161
|
+
for family in gedcom5x.families:
|
1162
|
+
family_ids.add(family.xref)
|
1163
|
+
self.handle_fam(family)
|
1164
|
+
|
1165
|
+
# Now Parse Zero Level Recrods
|
1166
|
+
for source in gedcom5x.sources:
|
1167
|
+
self.parse_gedcom5x_record(source)
|
1168
|
+
for object in gedcom5x.objects:
|
1169
|
+
self.parse_gedcom5x_record(object)
|
1170
|
+
for individual in gedcom5x.individuals:
|
1171
|
+
self.parse_gedcom5x_record(individual)
|
1172
|
+
for repo in gedcom5x.repositories:
|
1173
|
+
self.parse_gedcom5x_record(repo)
|
1174
|
+
for family in gedcom5x.families:
|
1175
|
+
self.parse_gedcom5x_record(family)
|
1176
|
+
|
1177
|
+
self.print_counts_table(self.missing_handler_count)
|
1015
1178
|
|
1016
|
-
for source in gedcom5x.sources:
|
1017
|
-
source_ids.add(source.xref)
|
1018
|
-
gx_obj = SourceDescription(id=source.xref)
|
1019
|
-
self.gedcomx.add_source_description(gx_obj)
|
1020
|
-
|
1021
|
-
for source in gedcom5x.repositories:
|
1022
|
-
repository_ids.add(source.xref)
|
1023
|
-
gx_obj = Agent(id=source.xref)
|
1024
|
-
self.gedcomx.add_agent(gx_obj)
|
1025
|
-
|
1026
|
-
for family in gedcom5x.families:
|
1027
|
-
family_ids.add(family.xref)
|
1028
|
-
self.handle_fam(family)
|
1029
|
-
|
1030
|
-
# Now Parse Zero Level Recrods
|
1031
|
-
for individual in gedcom5x.individuals:
|
1032
|
-
self.parse_gedcom5x_recrod(individual)
|
1033
|
-
|
1034
1179
|
return self.gedcomx
|
gedcomx/coverage.py
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
"""
|
3
|
+
======================================================================
|
4
|
+
Project: Gedcom-X
|
5
|
+
File: coverage.py
|
6
|
+
Author: David J. Cartwright
|
7
|
+
Purpose:
|
8
|
+
|
9
|
+
Created: 2025-08-25
|
10
|
+
Updated:
|
11
|
+
- 2025-09-03: _from_json_ refactor
|
12
|
+
|
13
|
+
======================================================================
|
14
|
+
"""
|
15
|
+
|
16
|
+
"""
|
17
|
+
======================================================================
|
18
|
+
GEDCOM Module Types
|
19
|
+
======================================================================
|
20
|
+
"""
|
21
|
+
from .date import Date
|
22
|
+
from .place_reference import PlaceReference
|
23
|
+
from .logging_hub import hub, logging
|
24
|
+
"""
|
25
|
+
======================================================================
|
26
|
+
Logging
|
27
|
+
======================================================================
|
28
|
+
"""
|
29
|
+
log = logging.getLogger("gedcomx")
|
30
|
+
serial_log = "gedcomx.serialization"
|
31
|
+
#=====================================================================
|
32
|
+
|
33
|
+
|
34
|
+
class Coverage:
|
35
|
+
identifier = 'http://gedcomx.org/v1/Coverage'
|
36
|
+
version = 'http://gedcomx.org/conceptual-model/v1'
|
37
|
+
|
38
|
+
def __init__(self,spatial: Optional[PlaceReference], temporal: Optional[Date]) -> None:
|
39
|
+
self.spatial = spatial
|
40
|
+
self.temporal = temporal
|
41
|
+
|
42
|
+
# ...existing code...
|
43
|
+
|
44
|
+
@property
|
45
|
+
def _as_dict_(self):
|
46
|
+
from .serialization import Serialization
|
47
|
+
type_as_dict = {}
|
48
|
+
if self.spatial:
|
49
|
+
type_as_dict['spatial'] = getattr(self.spatial, '_as_dict_', self.spatial)
|
50
|
+
if self.temporal: # (fixed: no space after the dot)
|
51
|
+
type_as_dict['temporal'] = getattr(self.temporal, '_as_dict_', self.temporal)
|
52
|
+
return Serialization.serialize_dict(type_as_dict)
|
53
|
+
|
54
|
+
@classmethod
|
55
|
+
def _from_json_(cls, data: dict):
|
56
|
+
"""
|
57
|
+
Create a Coverage instance from a JSON-dict (already parsed).
|
58
|
+
"""
|
59
|
+
from .place_reference import PlaceReference
|
60
|
+
from .date import Date
|
61
|
+
|
62
|
+
spatial = PlaceReference._from_json_(data.get('spatial')) if data.get('spatial') else None
|
63
|
+
temporal = Date._from_json_(data.get('temporal')) if data.get('temporal') else None
|
64
|
+
return cls(spatial=spatial, temporal=temporal)
|