gedcom-x 0.5.8__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gedcom_x-0.5.8.dist-info → gedcom_x-0.5.9.dist-info}/METADATA +1 -1
- gedcom_x-0.5.9.dist-info/RECORD +56 -0
- gedcomx/Extensions/rs10/rsLink.py +109 -59
- gedcomx/__init__.py +1 -1
- gedcomx/address.py +102 -16
- gedcomx/agent.py +81 -24
- gedcomx/attribution.py +52 -28
- gedcomx/conclusion.py +97 -45
- gedcomx/converter.py +209 -79
- gedcomx/coverage.py +10 -1
- gedcomx/date.py +42 -8
- gedcomx/document.py +37 -7
- gedcomx/event.py +77 -20
- gedcomx/evidence_reference.py +9 -0
- gedcomx/fact.py +53 -54
- gedcomx/gedcom.py +10 -0
- gedcomx/gedcom5x.py +30 -20
- gedcomx/gedcom7/__init__.py +1 -1
- gedcomx/gedcomx.py +95 -93
- gedcomx/gender.py +21 -9
- gedcomx/group.py +9 -0
- gedcomx/identifier.py +47 -20
- gedcomx/logging_hub.py +19 -0
- gedcomx/mutations.py +10 -5
- gedcomx/name.py +74 -33
- gedcomx/note.py +50 -18
- gedcomx/online_account.py +9 -0
- gedcomx/person.py +44 -26
- gedcomx/place_description.py +54 -8
- gedcomx/place_reference.py +30 -8
- gedcomx/qualifier.py +19 -3
- gedcomx/relationship.py +55 -14
- gedcomx/resource.py +45 -18
- gedcomx/serialization.py +400 -421
- gedcomx/source_citation.py +16 -4
- gedcomx/source_description.py +181 -94
- gedcomx/source_reference.py +51 -16
- gedcomx/subject.py +59 -14
- gedcomx/textvalue.py +66 -12
- gedcomx/translation.py +3 -3
- gedcomx/uri.py +155 -3
- gedcom_x-0.5.8.dist-info/RECORD +0 -56
- {gedcom_x-0.5.8.dist-info → gedcom_x-0.5.9.dist-info}/WHEEL +0 -0
- {gedcom_x-0.5.8.dist-info → gedcom_x-0.5.9.dist-info}/top_level.txt +0 -0
- /gedcomx/gedcom7/{Gedcom7.py → gedcom7.py} +0 -0
gedcomx/converter.py
CHANGED
@@ -3,6 +3,11 @@ import logging
|
|
3
3
|
import mimetypes
|
4
4
|
import re
|
5
5
|
import xml.etree.ElementTree as ET
|
6
|
+
|
7
|
+
from typing import Any, Mapping
|
8
|
+
import math
|
9
|
+
import shutil
|
10
|
+
|
6
11
|
"""
|
7
12
|
======================================================================
|
8
13
|
Project: Gedcom-X
|
@@ -22,8 +27,7 @@ import xml.etree.ElementTree as ET
|
|
22
27
|
GEDCOM Module Types
|
23
28
|
======================================================================
|
24
29
|
"""
|
25
|
-
|
26
|
-
from xml.dom import minidom
|
30
|
+
|
27
31
|
from .address import Address
|
28
32
|
from .agent import Agent
|
29
33
|
from .attribution import Attribution
|
@@ -41,7 +45,6 @@ from .gedcomx import GedcomX
|
|
41
45
|
from .gender import Gender, GenderType
|
42
46
|
from .group import Group
|
43
47
|
from .identifier import Identifier, IdentifierType, make_uid, IdentifierList
|
44
|
-
from .Logging import get_logger
|
45
48
|
from .logging_hub import hub, ChannelConfig
|
46
49
|
from .name import Name, NameType, NameForm, NamePart, NamePartType, NamePartQualifier
|
47
50
|
from .note import Note
|
@@ -59,9 +62,25 @@ from .source_reference import SourceReference, KnownSourceReference
|
|
59
62
|
from .textvalue import TextValue
|
60
63
|
#from .topleveltypecollection import TopLevelTypeCollection
|
61
64
|
from .uri import URI
|
62
|
-
|
65
|
+
from .logging_hub import hub, logging
|
66
|
+
"""
|
67
|
+
======================================================================
|
68
|
+
Logging
|
69
|
+
======================================================================
|
70
|
+
"""
|
63
71
|
log = logging.getLogger("gedcomx")
|
64
|
-
|
72
|
+
serial_log = "gedcomx.serialization"
|
73
|
+
convert_log = "gedcomx.convert.GEDCOM5x"
|
74
|
+
#=====================================================================
|
75
|
+
|
76
|
+
hub.start_channel(
|
77
|
+
ChannelConfig(
|
78
|
+
name=convert_log,
|
79
|
+
path=f"logs/{convert_log}.log",
|
80
|
+
level=logging.DEBUG,
|
81
|
+
rotation="size:10MB:3", # rotate by size, keep 3 backups
|
82
|
+
))
|
83
|
+
|
65
84
|
|
66
85
|
class GedcomConverter():
|
67
86
|
def __init__(self) -> None:
|
@@ -69,6 +88,9 @@ class GedcomConverter():
|
|
69
88
|
self.object_map: dict[Any, Any] = {-1:self.gedcomx}
|
70
89
|
self.missing_handler_count = {}
|
71
90
|
|
91
|
+
type_name_type = {
|
92
|
+
'aka': NameType.AlsoKnownAs
|
93
|
+
}
|
72
94
|
gedcom_even_to_fact = {
|
73
95
|
# Person Fact Types
|
74
96
|
"ADOP": FactType.Adoption,
|
@@ -157,6 +179,9 @@ class GedcomConverter():
|
|
157
179
|
|
158
180
|
}
|
159
181
|
|
182
|
+
@property
|
183
|
+
def ignored_tags(self):
|
184
|
+
return self.missing_handler_count if self.missing_handler_count != {} else None
|
160
185
|
|
161
186
|
def clean_str(self, text: str | None) -> str:
|
162
187
|
# Regular expression to match HTML/XML tags
|
@@ -166,15 +191,19 @@ class GedcomConverter():
|
|
166
191
|
|
167
192
|
return clean_text
|
168
193
|
|
169
|
-
def
|
170
|
-
if record:
|
171
|
-
with hub.use(
|
194
|
+
def parse_gedcom5x_record(self,record: Gedcom5xRecord):
|
195
|
+
if record is not None:
|
196
|
+
with hub.use(convert_log):
|
172
197
|
handler_name = 'handle_' + record.tag.lower()
|
173
198
|
|
174
199
|
if hasattr(self,handler_name):
|
175
|
-
log.info(f'
|
200
|
+
log.info(f'Using {handler_name} to pars Record: {record.describe()}')
|
176
201
|
handler = getattr(self,handler_name)
|
177
202
|
handler(record)
|
203
|
+
if record.tag != 'FAM':
|
204
|
+
for sub_record in record.subRecords():
|
205
|
+
log.debug(sub_record.describe())
|
206
|
+
self.parse_gedcom5x_record(sub_record)
|
178
207
|
else:
|
179
208
|
if record.tag in self.missing_handler_count:
|
180
209
|
self.missing_handler_count[record.tag] += 1
|
@@ -182,14 +211,16 @@ class GedcomConverter():
|
|
182
211
|
self.missing_handler_count[record.tag] = 1
|
183
212
|
|
184
213
|
log.error(f'Failed Parsing Record: {record.describe()}')
|
185
|
-
|
186
|
-
|
214
|
+
log.debug(f"{record.tag} with id: {record.xref} has {len(record.subRecords())} subRecords")
|
215
|
+
|
216
|
+
else:
|
217
|
+
assert False
|
187
218
|
|
188
219
|
def handle__apid(self, record: Gedcom5xRecord):
|
189
220
|
if isinstance(self.object_map[record.level-1], SourceReference):
|
190
|
-
self.object_map[record.level-1].description.add_identifier(Identifier(value=[URI.from_url('APID://' + record.value if record.value else '')]))
|
221
|
+
self.object_map[record.level-1].description.add_identifier(Identifier(type=IdentifierType.Other, value=[URI.from_url('APID://' + record.value if record.value else '')]))
|
191
222
|
elif isinstance(self.object_map[record.level-1], SourceDescription):
|
192
|
-
self.object_map[record.level-1].add_identifier(Identifier(value=[URI.from_url('APID://' + record.value if record.value else '')]))
|
223
|
+
self.object_map[record.level-1].add_identifier(Identifier(type=IdentifierType.Other,value=[URI.from_url('APID://' + record.value if record.value else '')]))
|
193
224
|
else:
|
194
225
|
raise ValueError(f"Could not handle '_APID' tag in record {record.describe()}, last stack object {type(self.object_map[record.level-1])}")
|
195
226
|
|
@@ -337,9 +368,9 @@ class GedcomConverter():
|
|
337
368
|
|
338
369
|
def handle_caln(self, record: Gedcom5xRecord):
|
339
370
|
if isinstance(self.object_map[record.level-1], SourceReference):
|
340
|
-
self.object_map[record.level-1].description.add_identifier(Identifier(value=[URI.from_url('Call Number:' + record.value if record.value else '')]))
|
371
|
+
self.object_map[record.level-1].description.add_identifier(Identifier(type=IdentifierType.Other,value=[URI.from_url('Call Number:' + record.value if record.value else '')]))
|
341
372
|
elif isinstance(self.object_map[record.level-1], SourceDescription):
|
342
|
-
self.object_map[record.level-1].add_identifier(Identifier(value=[URI.from_url('Call Number:' + record.value if record.value else '')]))
|
373
|
+
self.object_map[record.level-1].add_identifier(Identifier(type=IdentifierType.Other,value=[URI.from_url('Call Number:' + record.value if record.value else '')]))
|
343
374
|
elif isinstance(self.object_map[record.level-1], Agent):
|
344
375
|
pass
|
345
376
|
# TODO Why is GEDCOM so shitty? A callnumber for a repository?
|
@@ -523,7 +554,11 @@ class GedcomConverter():
|
|
523
554
|
|
524
555
|
self.object_map[record.level] = gxobject
|
525
556
|
else:
|
526
|
-
|
557
|
+
log.warning(f"EVEN type is not known {record.describe()}")
|
558
|
+
gxobject = Event(roles=[EventRole(person=self.object_map[record.level],type=EventRoleType.Principal)])
|
559
|
+
self.gedcomx.add_event(gxobject)
|
560
|
+
self.object_map[record.level] = gxobject
|
561
|
+
|
527
562
|
|
528
563
|
else:
|
529
564
|
possible_fact = FactType.guess(record.subRecord('TYPE')[0].value)
|
@@ -555,7 +590,7 @@ class GedcomConverter():
|
|
555
590
|
|
556
591
|
def handle_exid(self,record: Gedcom5xRecord):
|
557
592
|
if record.value:
|
558
|
-
gxobject = Identifier(type=IdentifierType.External,value=[URI(record.value) if record.value else URI()]) # type: ignore
|
593
|
+
gxobject = Identifier(type=IdentifierType.External,value=[URI._from_json_(record.value) if record.value else URI()]) # type: ignore
|
559
594
|
self.object_map[record.level-1].add_identifier(gxobject)
|
560
595
|
self.object_map[record.level] = gxobject
|
561
596
|
else: raise ValueError('Record had no value')
|
@@ -598,7 +633,11 @@ class GedcomConverter():
|
|
598
633
|
if husband and wife:
|
599
634
|
relationship = Relationship(person1=husband, person2=wife, type=RelationshipType.Couple)
|
600
635
|
self.gedcomx.add_relationship(relationship)
|
601
|
-
|
636
|
+
self.object_map[record.level] = relationship
|
637
|
+
|
638
|
+
if (marr_record := record.subRecord('MARR')) is not None:
|
639
|
+
self.handle_marr(marr_record[0])
|
640
|
+
|
602
641
|
def handle_famc(self, record: Gedcom5xRecord) -> None:
|
603
642
|
return
|
604
643
|
|
@@ -621,7 +660,7 @@ class GedcomConverter():
|
|
621
660
|
if mime_type:
|
622
661
|
self.object_map[record.level-2].mediaType = mime_type
|
623
662
|
else:
|
624
|
-
|
663
|
+
log.error(f"Could not determing mime type from {record.value}")
|
625
664
|
elif isinstance(self.object_map[record.level-1], PlaceDescription):
|
626
665
|
self.object_map[record.level-1].names.append(TextValue(value=record.value))
|
627
666
|
elif record.parent is not None and record.parent.tag == 'TRAN':
|
@@ -641,10 +680,10 @@ class GedcomConverter():
|
|
641
680
|
if person is None:
|
642
681
|
log.warning('Had to create person with id {recrod.xref}')
|
643
682
|
if isinstance(record.xref,str):
|
644
|
-
person = Person(id=record.xref
|
683
|
+
person = Person(id=record.xref)
|
684
|
+
self.gedcomx.add_person(person)
|
645
685
|
else:
|
646
|
-
raise ValueError('INDI Record had no XREF')
|
647
|
-
self.gedcomx.add_person(person)
|
686
|
+
raise ValueError('INDI Record had no XREF')
|
648
687
|
self.object_map[record.level] = person
|
649
688
|
|
650
689
|
def handle_immi(self, record: Gedcom5xRecord):
|
@@ -658,15 +697,33 @@ class GedcomConverter():
|
|
658
697
|
raise TagConversionError(record=record,levelstack=self.object_map)
|
659
698
|
|
660
699
|
def handle_marr(self, record: Gedcom5xRecord):
|
700
|
+
"""
|
661
701
|
if isinstance(self.object_map[record.level-1], Person):
|
662
702
|
gxobject = Fact(type=FactType.Marriage)
|
663
703
|
self.object_map[record.level-1].add_fact(gxobject)
|
664
704
|
|
665
705
|
|
666
706
|
self.object_map[record.level] = gxobject
|
707
|
+
"""
|
708
|
+
if (add_fact := getattr(self.object_map[record.level-1],'add_fact',None)) is not None:
|
709
|
+
gxobject = Fact(type=FactType.Marriage)
|
710
|
+
add_fact(gxobject)
|
711
|
+
self.object_map[record.level] = gxobject
|
667
712
|
else:
|
668
713
|
raise TagConversionError(record=record,levelstack=self.object_map)
|
669
714
|
|
715
|
+
def handle__link(self,record: Gedcom5xRecord):
|
716
|
+
if isinstance(self.object_map[record.level-1], SourceReference):
|
717
|
+
gxobject = Identifier([URI.from_url(record.value)],IdentifierType.External)
|
718
|
+
self.object_map[record.level-1].description.add_identifier(gxobject)
|
719
|
+
self.object_map[record.level] = gxobject
|
720
|
+
|
721
|
+
def handle__milt(self, record: Gedcom5xRecord):
|
722
|
+
if isinstance(self.object_map[record.level-1], Person):
|
723
|
+
gxobject = Fact(type=FactType.MilitaryService)
|
724
|
+
self.object_map[record.level-1].add_fact(gxobject)
|
725
|
+
self.object_map[record.level] = gxobject
|
726
|
+
|
670
727
|
def handle_name(self, record: Gedcom5xRecord):
|
671
728
|
if isinstance(self.object_map[record.level-1], Person):
|
672
729
|
gxobject = Name.simple(record.value if record.value else 'WARNING: NAME had no value')
|
@@ -676,8 +733,10 @@ class GedcomConverter():
|
|
676
733
|
|
677
734
|
self.object_map[record.level] = gxobject
|
678
735
|
elif isinstance(self.object_map[record.level-1], Agent):
|
736
|
+
|
679
737
|
gxobject = TextValue(value=record.value)
|
680
738
|
self.object_map[record.level-1].add_name(gxobject)
|
739
|
+
|
681
740
|
else:
|
682
741
|
raise TagConversionError(record=record,levelstack=self.object_map)
|
683
742
|
|
@@ -689,11 +748,15 @@ class GedcomConverter():
|
|
689
748
|
|
690
749
|
self.object_map[record.level] = gxobject
|
691
750
|
elif isinstance(self.object_map[record.level-1], SourceReference):
|
692
|
-
|
693
|
-
|
751
|
+
if self.object_map[record.level-1].description is not None:
|
752
|
+
gxobject = Note(text=self.clean_str(record.value))
|
753
|
+
self.object_map[record.level-1].description.add_note(gxobject)
|
754
|
+
self.object_map[record.level] = gxobject
|
755
|
+
else:
|
756
|
+
log.error('SourceReference does not have description')
|
694
757
|
|
695
758
|
|
696
|
-
|
759
|
+
|
697
760
|
elif isinstance(self.object_map[record.level-1], Conclusion):
|
698
761
|
gxobject = Note(text=record.value)
|
699
762
|
self.object_map[record.level-1].add_note(gxobject)
|
@@ -744,12 +807,12 @@ class GedcomConverter():
|
|
744
807
|
|
745
808
|
def handle_page(self, record: Gedcom5xRecord):
|
746
809
|
if isinstance(self.object_map[record.level-1], SourceReference):
|
747
|
-
self.object_map[record.level-1].descriptionId = record.value
|
810
|
+
#self.object_map[record.level-1].descriptionId = record.value
|
748
811
|
gx_object = KnownSourceReference(name=KnownSourceReference.Page,value=record.value)
|
749
812
|
self.object_map[record.level-1].add_qualifier(gx_object)
|
750
813
|
self.object_map[record.level] = self.object_map[record.level-1]
|
751
814
|
else:
|
752
|
-
raise ValueError(f"Could not handle 'PAGE' tag in record {record.describe()},
|
815
|
+
raise ValueError(f"Could not handle 'PAGE' tag in record {record.describe()}, object stack {self.object_map}")
|
753
816
|
|
754
817
|
def handle_plac(self, record: Gedcom5xRecord):
|
755
818
|
if isinstance(self.object_map[record.level-1], Agent):
|
@@ -837,9 +900,9 @@ class GedcomConverter():
|
|
837
900
|
def handle_repo(self, record: Gedcom5xRecord):
|
838
901
|
|
839
902
|
if record.level == 0:
|
840
|
-
|
841
|
-
|
842
|
-
|
903
|
+
if (gxobject := self.gedcomx.agents.byId(id=record.xref)) is None:
|
904
|
+
gxobject = Agent(id=record.xref)
|
905
|
+
self.gedcomx.add_agent(gxobject)
|
843
906
|
|
844
907
|
self.object_map[record.level] = gxobject
|
845
908
|
|
@@ -852,7 +915,6 @@ class GedcomConverter():
|
|
852
915
|
self.object_map[record.level] = gxobject
|
853
916
|
|
854
917
|
else:
|
855
|
-
print("handle_repo",record.describe())
|
856
918
|
raise ValueError()
|
857
919
|
gxobject = Agent(names=[TextValue(record.value)])
|
858
920
|
else:
|
@@ -899,30 +961,39 @@ class GedcomConverter():
|
|
899
961
|
assert False
|
900
962
|
|
901
963
|
def handle_sour(self, record: Gedcom5xRecord):
|
902
|
-
if record.level == 0
|
903
|
-
source_description = SourceDescription(id=record.xref.replace('@','') if record.xref else None)
|
904
|
-
self.gedcomx.add_source_description(source_description)
|
964
|
+
if record.level == 0 and (record.tag in ['SOUR','OBJE','_WLNK']):
|
905
965
|
|
906
|
-
self.
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
966
|
+
if (gxobject := self.gedcomx.sourceDescriptions.byId(record.xref)) is None:
|
967
|
+
log.debug(f"SourceDescription with id: {record.xref} was not found. Creating a new SourceDescription")
|
968
|
+
log.debug(f"Creating SourceDescription from {record.tag} {record.describe()}")
|
969
|
+
gxobject = SourceDescription(id=record.xref)
|
970
|
+
self.object_map[record.level-1].add_source_description(gxobject)
|
971
|
+
else:
|
972
|
+
log.debug(f"Found SourceDescription with id:{record.xref}")
|
973
|
+
|
974
|
+
elif (add_method := getattr(self.object_map[record.level-1],"add_source_reference",None)) is not None:
|
975
|
+
if (source_description := self.gedcomx.sourceDescriptions.byId(record.xref)) is not None:
|
976
|
+
gxobject = SourceReference(descriptionId=record.xref, description=source_description)
|
977
|
+
add_method(gxobject)
|
914
978
|
else:
|
915
|
-
log.
|
916
|
-
|
917
|
-
gxobject =
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
979
|
+
log.error(f"Could not find source with id: {record.xref}, Creating Place Holder Description")
|
980
|
+
gxobject = SourceDescription(id=record.xref)
|
981
|
+
gxobject._place_holder = True
|
982
|
+
gxobject = SourceReference(descriptionId=record.xref, description=gxobject)
|
983
|
+
|
984
|
+
elif record.tag == 'OBJE' and isinstance(self.object_map[record.level-1],SourceReference):
|
985
|
+
if (source_description := self.gedcomx.sourceDescriptions.byId(record.xref)) is not None:
|
986
|
+
gxobject = SourceReference(descriptionId=record.xref, description=source_description)
|
987
|
+
self.object_map[record.level-1].description.add_source_reference(gxobject)
|
922
988
|
else:
|
923
|
-
|
989
|
+
assert False
|
990
|
+
|
991
|
+
else:
|
992
|
+
print(record.describe())
|
993
|
+
print(self.object_map)
|
994
|
+
assert False
|
924
995
|
|
925
|
-
|
996
|
+
self.object_map[record.level] = gxobject
|
926
997
|
|
927
998
|
def handle_stae(self, record: Gedcom5xRecord):
|
928
999
|
if isinstance(self.object_map[record.level-1], Address):
|
@@ -955,8 +1026,6 @@ class GedcomConverter():
|
|
955
1026
|
|
956
1027
|
gxobject = TextValue(value=self.clean_str(record.value))
|
957
1028
|
self.object_map[record.level-1].add_title(gxobject)
|
958
|
-
|
959
|
-
|
960
1029
|
self.object_map[record.level] = gxobject
|
961
1030
|
|
962
1031
|
elif record.parent is not None and record.parent.tag == 'FILE' and isinstance(self.object_map[record.level-2], SourceDescription):
|
@@ -982,8 +1051,7 @@ class GedcomConverter():
|
|
982
1051
|
self.object_map[record.level-1].type = EventType.guess(record.value)
|
983
1052
|
else:
|
984
1053
|
log.warning(f"Could not determine type of event with value '{record.value}'")
|
985
|
-
|
986
|
-
self.object_map[record.level-1].type = None
|
1054
|
+
# add as a note anyway, guess works of text in the string
|
987
1055
|
self.object_map[record.level-1].add_note(Note(text=self.clean_str(record.value)))
|
988
1056
|
elif isinstance(self.object_map[record.level-1], Fact):
|
989
1057
|
if not self.object_map[record.level-1].type:
|
@@ -996,9 +1064,11 @@ class GedcomConverter():
|
|
996
1064
|
elif record.parent is not None and record.parent.tag == 'FORM':
|
997
1065
|
if not self.object_map[0].mediaType:
|
998
1066
|
self.object_map[0].mediaType = record.value
|
1067
|
+
elif isinstance(self.object_map[record.level-1], Name):
|
1068
|
+
self.object_map[record.level-1].type = GedcomConverter.type_name_type.get(record.value,NameType.Other)
|
999
1069
|
|
1000
1070
|
else:
|
1001
|
-
raise
|
1071
|
+
raise TagConversionError(record,self.object_map)
|
1002
1072
|
|
1003
1073
|
def handle__url(self, record: Gedcom5xRecord):
|
1004
1074
|
if isinstance(self.object_map[record.level-2], SourceDescription):
|
@@ -1014,6 +1084,50 @@ class GedcomConverter():
|
|
1014
1084
|
else:
|
1015
1085
|
raise ValueError(f"Could not handle 'WWW' tag in record {record.describe()}, last stack object {self.object_map[record.level-1]}")
|
1016
1086
|
|
1087
|
+
def parse_gedcom5x_fam_record(self, record: Gedcom5xRecord):
|
1088
|
+
log.info(f"Parsing family recrods")
|
1089
|
+
with open('./logs/gedcomx.convert.families.json', 'a') as f:
|
1090
|
+
for fam in record._flatten_subrecords(record):
|
1091
|
+
f.write(fam.describe() + "\n")
|
1092
|
+
|
1093
|
+
def print_counts_table(self, counts: Mapping[Any, int]) -> None:
|
1094
|
+
"""
|
1095
|
+
Pretty-print {key: int} as columns, largest count first.
|
1096
|
+
Column count adapts to terminal width and number of items.
|
1097
|
+
"""
|
1098
|
+
items = [(str(k), int(v)) for k, v in counts.items()]
|
1099
|
+
if not items:
|
1100
|
+
print("(empty)")
|
1101
|
+
return
|
1102
|
+
|
1103
|
+
# Sort: by value desc, then key asc for stable ordering
|
1104
|
+
items.sort(key=lambda kv: (-kv[1], kv[0]))
|
1105
|
+
|
1106
|
+
# Cell formatting widths
|
1107
|
+
key_w = max(len(k) for k, _ in items)
|
1108
|
+
num_w = max(len(str(v)) for _, v in items)
|
1109
|
+
cell_fmt = f"{{k:<{key_w}}} {{v:>{num_w}}}" # e.g., 'Surname 123'
|
1110
|
+
cell_width = key_w + 2 + num_w + 2 # +2 padding between columns
|
1111
|
+
|
1112
|
+
# Decide number of columns: fit to terminal, but also scale with item count
|
1113
|
+
term_cols = shutil.get_terminal_size(fallback=(100, 24)).columns
|
1114
|
+
fit_cols = max(1, term_cols // cell_width)
|
1115
|
+
sqrt_cols = max(1, int(math.sqrt(len(items)))) # more cols when many items
|
1116
|
+
cols = max(1, min(len(items), max(fit_cols, sqrt_cols)))
|
1117
|
+
|
1118
|
+
rows = math.ceil(len(items) / cols)
|
1119
|
+
|
1120
|
+
# Print row-wise, reading items column-major so columns stay balanced
|
1121
|
+
for r in range(rows):
|
1122
|
+
line = []
|
1123
|
+
for c in range(cols):
|
1124
|
+
i = c * rows + r
|
1125
|
+
if i < len(items):
|
1126
|
+
k, v = items[i]
|
1127
|
+
cell = cell_fmt.format(k=k, v=v)
|
1128
|
+
line.append(cell.ljust(cell_width))
|
1129
|
+
print("".join(line).rstrip())
|
1130
|
+
|
1017
1131
|
def Gedcom5x_GedcomX(self, gedcom5x: Gedcom5x):
|
1018
1132
|
print(f'Parsing GEDCOM Version {gedcom5x.version}')
|
1019
1133
|
individual_ids = set()
|
@@ -1021,29 +1135,45 @@ class GedcomConverter():
|
|
1021
1135
|
repository_ids = set()
|
1022
1136
|
family_ids = set()
|
1023
1137
|
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1138
|
+
with hub.use(convert_log):
|
1139
|
+
if gedcom5x:
|
1140
|
+
for object in gedcom5x.objects:
|
1141
|
+
log.debug(f"Priming Source id's with id: {object.xref} from objects")
|
1142
|
+
source_ids.add(object.xref)
|
1143
|
+
gx_obj = SourceDescription(id=object.xref)
|
1144
|
+
self.gedcomx.add_source_description(gx_obj)
|
1145
|
+
|
1146
|
+
for source in gedcom5x.sources:
|
1147
|
+
source_ids.add(source.xref)
|
1148
|
+
gx_obj = SourceDescription(id=source.xref)
|
1149
|
+
self.gedcomx.add_source_description(gx_obj)
|
1150
|
+
|
1151
|
+
for repo in gedcom5x.repositories:
|
1152
|
+
repository_ids.add(repo.xref)
|
1153
|
+
gx_obj = Agent(id=repo.xref)
|
1154
|
+
self.gedcomx.add_agent(gx_obj)
|
1155
|
+
|
1156
|
+
for individual in gedcom5x.individuals:
|
1157
|
+
individual_ids.add(individual.xref)
|
1158
|
+
gx_obj = Person(id=individual.xref)
|
1159
|
+
self.gedcomx.add_person(gx_obj)
|
1029
1160
|
|
1161
|
+
for family in gedcom5x.families:
|
1162
|
+
family_ids.add(family.xref)
|
1163
|
+
self.handle_fam(family)
|
1164
|
+
|
1165
|
+
# Now Parse Zero Level Recrods
|
1166
|
+
for source in gedcom5x.sources:
|
1167
|
+
self.parse_gedcom5x_record(source)
|
1168
|
+
for object in gedcom5x.objects:
|
1169
|
+
self.parse_gedcom5x_record(object)
|
1170
|
+
for individual in gedcom5x.individuals:
|
1171
|
+
self.parse_gedcom5x_record(individual)
|
1172
|
+
for repo in gedcom5x.repositories:
|
1173
|
+
self.parse_gedcom5x_record(repo)
|
1174
|
+
for family in gedcom5x.families:
|
1175
|
+
self.parse_gedcom5x_record(family)
|
1176
|
+
|
1177
|
+
self.print_counts_table(self.missing_handler_count)
|
1030
1178
|
|
1031
|
-
for source in gedcom5x.sources:
|
1032
|
-
source_ids.add(source.xref)
|
1033
|
-
gx_obj = SourceDescription(id=source.xref)
|
1034
|
-
self.gedcomx.add_source_description(gx_obj)
|
1035
|
-
|
1036
|
-
for source in gedcom5x.repositories:
|
1037
|
-
repository_ids.add(source.xref)
|
1038
|
-
gx_obj = Agent(id=source.xref)
|
1039
|
-
self.gedcomx.add_agent(gx_obj)
|
1040
|
-
|
1041
|
-
for family in gedcom5x.families:
|
1042
|
-
family_ids.add(family.xref)
|
1043
|
-
self.handle_fam(family)
|
1044
|
-
|
1045
|
-
# Now Parse Zero Level Recrods
|
1046
|
-
for individual in gedcom5x.individuals:
|
1047
|
-
self.parse_gedcom5x_recrod(individual)
|
1048
|
-
|
1049
1179
|
return self.gedcomx
|
gedcomx/coverage.py
CHANGED
@@ -8,7 +8,7 @@ from typing import Optional
|
|
8
8
|
|
9
9
|
Created: 2025-08-25
|
10
10
|
Updated:
|
11
|
-
- 2025-
|
11
|
+
- 2025-09-03: _from_json_ refactor
|
12
12
|
|
13
13
|
======================================================================
|
14
14
|
"""
|
@@ -20,6 +20,15 @@ GEDCOM Module Types
|
|
20
20
|
"""
|
21
21
|
from .date import Date
|
22
22
|
from .place_reference import PlaceReference
|
23
|
+
from .logging_hub import hub, logging
|
24
|
+
"""
|
25
|
+
======================================================================
|
26
|
+
Logging
|
27
|
+
======================================================================
|
28
|
+
"""
|
29
|
+
log = logging.getLogger("gedcomx")
|
30
|
+
serial_log = "gedcomx.serialization"
|
31
|
+
#=====================================================================
|
23
32
|
|
24
33
|
|
25
34
|
class Coverage:
|
gedcomx/date.py
CHANGED
@@ -1,9 +1,34 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import Any, Optional, Dict
|
2
2
|
from datetime import datetime, timezone
|
3
3
|
from dateutil import parser
|
4
|
-
|
5
|
-
|
6
|
-
|
4
|
+
"""
|
5
|
+
======================================================================
|
6
|
+
Project: Gedcom-X
|
7
|
+
File: date.py
|
8
|
+
Author: David J. Cartwright
|
9
|
+
Purpose:
|
10
|
+
|
11
|
+
Created: 2025-08-25
|
12
|
+
Updated:
|
13
|
+
- 2025-09-03: _from_json refactored
|
14
|
+
|
15
|
+
======================================================================
|
16
|
+
"""
|
17
|
+
|
18
|
+
"""
|
19
|
+
======================================================================
|
20
|
+
GEDCOM Module Types
|
21
|
+
======================================================================
|
22
|
+
"""
|
23
|
+
from .logging_hub import hub, logging
|
24
|
+
"""
|
25
|
+
======================================================================
|
26
|
+
Logging
|
27
|
+
======================================================================
|
28
|
+
"""
|
29
|
+
log = logging.getLogger("gedcomx")
|
30
|
+
serial_log = "gedcomx.serialization"
|
31
|
+
#=====================================================================
|
7
32
|
|
8
33
|
|
9
34
|
class DateFormat:
|
@@ -31,14 +56,23 @@ class Date:
|
|
31
56
|
type_as_dict['original'] = self.original
|
32
57
|
if self.formal:
|
33
58
|
type_as_dict['formal'] = self.formal
|
59
|
+
return type_as_dict if type_as_dict != {} else None
|
34
60
|
return Serialization.serialize_dict(type_as_dict)
|
35
61
|
|
36
62
|
@classmethod
|
37
|
-
def _from_json_(cls,data):
|
38
|
-
|
39
|
-
|
63
|
+
def _from_json_(cls,data: Any, context=None):
|
64
|
+
if not isinstance(data, dict):
|
65
|
+
raise TypeError(f"{cls.__name__}._from_json_ expected dict or str, got {type(data)} data:{data}")
|
66
|
+
|
67
|
+
date_data: Dict[str, Any] = {}
|
68
|
+
|
69
|
+
# Scalars
|
70
|
+
if (orig := data.get("original")) is not None:
|
71
|
+
date_data["original"] = orig
|
72
|
+
if (formal := data.get("formal")) is not None:
|
73
|
+
date_data["formal"] = formal
|
40
74
|
|
41
|
-
return
|
75
|
+
return cls(**date_data)
|
42
76
|
|
43
77
|
|
44
78
|
|
gedcomx/document.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
from enum import Enum
|
2
|
-
from typing import
|
2
|
+
from typing import Any, Dict, List, Optional
|
3
3
|
"""
|
4
4
|
======================================================================
|
5
5
|
Project: Gedcom-X
|
@@ -9,7 +9,7 @@ from typing import Optional, List
|
|
9
9
|
|
10
10
|
Created: 2025-08-25
|
11
11
|
Updated:
|
12
|
-
- 2025-
|
12
|
+
- 2025-09-03: _from_json_ refactored
|
13
13
|
|
14
14
|
======================================================================
|
15
15
|
"""
|
@@ -24,6 +24,15 @@ from .conclusion import Conclusion, ConfidenceLevel
|
|
24
24
|
from .note import Note
|
25
25
|
from .resource import Resource
|
26
26
|
from .source_reference import SourceReference
|
27
|
+
from .logging_hub import hub, logging
|
28
|
+
"""
|
29
|
+
======================================================================
|
30
|
+
Logging
|
31
|
+
======================================================================
|
32
|
+
"""
|
33
|
+
log = logging.getLogger("gedcomx")
|
34
|
+
serial_log = "gedcomx.serialization"
|
35
|
+
#=====================================================================
|
27
36
|
|
28
37
|
|
29
38
|
class DocumentType(Enum):
|
@@ -83,10 +92,31 @@ class Document(Conclusion):
|
|
83
92
|
return Serialization.serialize_dict(type_as_dict)
|
84
93
|
|
85
94
|
@classmethod
|
86
|
-
def _from_json_(cls, data:
|
95
|
+
def _from_json_(cls, data: Any, context: Any = None) -> "Document":
|
87
96
|
"""
|
88
|
-
|
97
|
+
Build a Document from JSON.
|
98
|
+
Shorthand: a bare string becomes {'text': <string>}.
|
89
99
|
"""
|
90
|
-
|
91
|
-
|
92
|
-
|
100
|
+
if not isinstance(data, dict):
|
101
|
+
raise TypeError(f"{cls.__name__}._from_json_ expected dict or str, got {type(data)}")
|
102
|
+
|
103
|
+
obj: Dict[str, Any] = Conclusion._dict_from_json_(data,context)
|
104
|
+
|
105
|
+
# type (enum)
|
106
|
+
if (typ := data.get("type")) is not None:
|
107
|
+
obj["type"] = DocumentType(typ)
|
108
|
+
|
109
|
+
|
110
|
+
# extracted (bool; accept common string forms)
|
111
|
+
if (ex := data.get("extracted")) is not None:
|
112
|
+
obj["extracted"] = bool(ex)
|
113
|
+
|
114
|
+
# textType (enum)
|
115
|
+
if (tt := data.get("textType")) is not None:
|
116
|
+
obj["textType"] = TextType(tt)
|
117
|
+
|
118
|
+
# text (string)
|
119
|
+
if (tx := data.get("text")) is not None:
|
120
|
+
obj["text"] = str(tx)
|
121
|
+
|
122
|
+
return cls(**obj)
|