gedcom-x 0.5.7__py3-none-any.whl → 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gedcom_x-0.5.7.dist-info → gedcom_x-0.5.8.dist-info}/METADATA +1 -1
- gedcom_x-0.5.8.dist-info/RECORD +56 -0
- gedcomx/Extensions/rs10/rsLink.py +3 -3
- gedcomx/TopLevelTypeCollection.py +1 -1
- gedcomx/__init__.py +43 -42
- gedcomx/{Address.py → address.py} +1 -1
- gedcomx/{Agent.py → agent.py} +32 -16
- gedcomx/{Attribution.py → attribution.py} +3 -3
- gedcomx/{Conclusion.py → conclusion.py} +26 -9
- gedcomx/{Converter.py → converter.py} +54 -39
- gedcomx/{Coverage.py → coverage.py} +23 -5
- gedcomx/{Date.py → date.py} +1 -1
- gedcomx/{Document.py → document.py} +26 -8
- gedcomx/{Event.py → event.py} +13 -13
- gedcomx/{EvidenceReference.py → evidence_reference.py} +2 -2
- gedcomx/{Fact.py → fact.py} +31 -23
- gedcomx/{Gedcom5x.py → gedcom5x.py} +1 -1
- gedcomx/gedcom7/Exceptions.py +9 -0
- gedcomx/gedcom7/Gedcom7.py +160 -0
- gedcomx/gedcom7/GedcomStructure.py +94 -0
- gedcomx/gedcom7/Specification.py +347 -0
- gedcomx/gedcom7/__init__.py +26 -0
- gedcomx/gedcom7/g7interop.py +205 -0
- gedcomx/gedcom7/logger.py +19 -0
- gedcomx/{GedcomX.py → gedcomx.py} +14 -13
- gedcomx/{Gender.py → gender.py} +25 -11
- gedcomx/group.py +63 -0
- gedcomx/{Identifier.py → identifier.py} +4 -4
- gedcomx/{Mutations.py → mutations.py} +49 -25
- gedcomx/{Name.py → name.py} +15 -15
- gedcomx/{Note.py → note.py} +2 -2
- gedcomx/{OnlineAccount.py → online_account.py} +1 -1
- gedcomx/{Person.py → person.py} +18 -16
- gedcomx/{PlaceDescription.py → place_description.py} +18 -16
- gedcomx/{PlaceReference.py → place_reference.py} +4 -4
- gedcomx/{Qualifier.py → qualifier.py} +1 -1
- gedcomx/{Relationship.py → relationship.py} +30 -12
- gedcomx/{Resource.py → resource.py} +2 -2
- gedcomx/{Serialization.py → serialization.py} +31 -32
- gedcomx/{SourceDescription.py → source_description.py} +16 -16
- gedcomx/{SourceReference.py → source_reference.py} +7 -7
- gedcomx/{Subject.py → subject.py} +26 -8
- gedcomx/{Translation.py → translation.py} +1 -1
- gedcomx/{URI.py → uri.py} +42 -26
- gedcom_x-0.5.7.dist-info/RECORD +0 -49
- gedcomx/Group.py +0 -37
- {gedcom_x-0.5.7.dist-info → gedcom_x-0.5.8.dist-info}/WHEEL +0 -0
- {gedcom_x-0.5.7.dist-info → gedcom_x-0.5.8.dist-info}/top_level.txt +0 -0
- /gedcomx/{Exceptions.py → exceptions.py} +0 -0
- /gedcomx/{ExtensibleEnum.py → extensible_enum.py} +0 -0
- /gedcomx/{Gedcom.py → gedcom.py} +0 -0
- /gedcomx/{LoggingHub.py → logging_hub.py} +0 -0
- /gedcomx/{SourceCitation.py → source_citation.py} +0 -0
- /gedcomx/{TextValue.py → textvalue.py} +0 -0
gedcomx/{Fact.py → fact.py}
RENAMED
@@ -1,28 +1,38 @@
|
|
1
1
|
import difflib
|
2
2
|
import re
|
3
3
|
|
4
|
-
from datetime import datetime
|
5
4
|
from enum import Enum
|
6
5
|
from typing import List, Optional, Dict, Any
|
6
|
+
"""
|
7
|
+
======================================================================
|
8
|
+
Project: Gedcom-X
|
9
|
+
File: fact.py
|
10
|
+
Author: David J. Cartwright
|
11
|
+
Purpose:
|
7
12
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
from .SourceReference import SourceReference
|
13
|
+
Created: 2025-08-25
|
14
|
+
Updated:
|
15
|
+
- 2025-08-31:
|
16
|
+
|
17
|
+
======================================================================
|
18
|
+
"""
|
15
19
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
from
|
22
|
-
|
23
|
-
from
|
24
|
-
|
25
|
-
from .Extensions.rs10.rsLink import
|
20
|
+
"""
|
21
|
+
======================================================================
|
22
|
+
GEDCOM Module Types
|
23
|
+
======================================================================
|
24
|
+
"""
|
25
|
+
from .attribution import Attribution
|
26
|
+
from .conclusion import Conclusion, ConfidenceLevel
|
27
|
+
from .date import Date
|
28
|
+
from .document import Document
|
29
|
+
from .Extensions.rs10.rsLink import _rsLinkList
|
30
|
+
from .note import Note
|
31
|
+
from .place_reference import PlaceReference
|
32
|
+
from .qualifier import Qualifier
|
33
|
+
from .resource import Resource
|
34
|
+
from .source_reference import SourceReference
|
35
|
+
#=====================================================================
|
26
36
|
|
27
37
|
|
28
38
|
class FactType(Enum):
|
@@ -370,7 +380,6 @@ class FactType(Enum):
|
|
370
380
|
return keywords_to_fact_type[matches[0]]
|
371
381
|
return None
|
372
382
|
|
373
|
-
|
374
383
|
class FactQualifier(Enum):
|
375
384
|
Age = "http://gedcomx.org/Age"
|
376
385
|
Cause = "http://gedcomx.org/Cause"
|
@@ -389,7 +398,6 @@ class FactQualifier(Enum):
|
|
389
398
|
}
|
390
399
|
return descriptions.get(self, "No description available.")
|
391
400
|
|
392
|
-
|
393
401
|
class Fact(Conclusion):
|
394
402
|
identifier = 'http://gedcomx.org/v1/Fact'
|
395
403
|
version = 'http://gedcomx.org/conceptual-model/v1'
|
@@ -397,9 +405,9 @@ class Fact(Conclusion):
|
|
397
405
|
def __init__(self,
|
398
406
|
id: Optional[str] = None,
|
399
407
|
lang: Optional[str] = None,
|
400
|
-
sources: Optional[List[SourceReference]] =
|
408
|
+
sources: Optional[List[SourceReference]] = None,
|
401
409
|
analysis: Optional[Resource | Document] = None,
|
402
|
-
notes: Optional[List[Note]] =
|
410
|
+
notes: Optional[List[Note]] = None,
|
403
411
|
confidence: Optional[ConfidenceLevel] = None,
|
404
412
|
attribution: Optional[Attribution] = None,
|
405
413
|
type: Optional[FactType] = None,
|
@@ -428,7 +436,7 @@ class Fact(Conclusion):
|
|
428
436
|
|
429
437
|
@property
|
430
438
|
def _as_dict_(self):
|
431
|
-
from .
|
439
|
+
from .serialization import Serialization
|
432
440
|
type_as_dcit = super()._as_dict_
|
433
441
|
# Only add Relationship-specific fields
|
434
442
|
if self.type:
|
@@ -9,7 +9,7 @@ from collections import defaultdict
|
|
9
9
|
from typing import Iterable, Iterator, List, Optional, Tuple, Union
|
10
10
|
|
11
11
|
import logging
|
12
|
-
from .
|
12
|
+
from .logging_hub import hub, ChannelConfig
|
13
13
|
|
14
14
|
job_id = "gedcomx.parsing.GEDCOM5x"
|
15
15
|
|
@@ -0,0 +1,160 @@
|
|
1
|
+
|
2
|
+
from __future__ import annotations
|
3
|
+
from typing import Any, Dict, List, Optional, Union, Iterable
|
4
|
+
from collections import defaultdict
|
5
|
+
|
6
|
+
|
7
|
+
from .GedcomStructure import GedcomStructure
|
8
|
+
from . import Specification as g7specs
|
9
|
+
from .logger import get_logger
|
10
|
+
|
11
|
+
|
12
|
+
from typing import Dict, List, Optional
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
class Gedcom7:
|
17
|
+
def __init__(self, filepath: Optional[str] = None):
|
18
|
+
self.persons: List[Any] = []
|
19
|
+
self.families: List[Any] = []
|
20
|
+
self.sources: List[Any] = []
|
21
|
+
self.records: List['GedcomStructure'] = []
|
22
|
+
self._tag_index: Dict[str, List[int]] = defaultdict(list) # tag -> list of record indices
|
23
|
+
|
24
|
+
# ---- indexing helpers -------------------------------------------------
|
25
|
+
@staticmethod
|
26
|
+
def _norm_tag(tag: str) -> str:
|
27
|
+
return tag.upper()
|
28
|
+
|
29
|
+
def _rebuild_index(self) -> None:
|
30
|
+
self._tag_index.clear()
|
31
|
+
for i, rec in enumerate(self.records):
|
32
|
+
if getattr(rec, "tag", None):
|
33
|
+
self._tag_index[self._norm_tag(rec.tag)].append(i)
|
34
|
+
|
35
|
+
# Optional: keep index in sync if you append records elsewhere
|
36
|
+
def _append_record(self, rec: 'GedcomStructure') -> None:
|
37
|
+
self.records.append(rec)
|
38
|
+
if getattr(rec, "tag", None):
|
39
|
+
self._tag_index[self._norm_tag(rec.tag)].append(len(self.records) - 1)
|
40
|
+
|
41
|
+
# ---- Python container protocol ----------------------------------------
|
42
|
+
def __len__(self) -> int:
|
43
|
+
return len(self.records)
|
44
|
+
|
45
|
+
def __iter__(self) -> Iterable['GedcomStructure']:
|
46
|
+
return iter(self.records)
|
47
|
+
|
48
|
+
def __contains__(self, key: Union[str, 'GedcomStructure']) -> bool:
|
49
|
+
if isinstance(key, str):
|
50
|
+
return self._norm_tag(key) in self._tag_index
|
51
|
+
return key in self.records
|
52
|
+
|
53
|
+
def __getitem__(self, key: Union[int, slice, str, tuple]) -> Union['GedcomStructure', List['GedcomStructure']]:
|
54
|
+
# by position
|
55
|
+
if isinstance(key, (int, slice)):
|
56
|
+
return self.records[key]
|
57
|
+
|
58
|
+
# by tag
|
59
|
+
if isinstance(key, str):
|
60
|
+
idxs = self._tag_index.get(self._norm_tag(key), [])
|
61
|
+
return [self.records[i] for i in idxs]
|
62
|
+
|
63
|
+
# combo: ('INDI', 0) or ('INDI', 0:5)
|
64
|
+
if isinstance(key, tuple) and len(key) == 2 and isinstance(key[0], str):
|
65
|
+
tag, sub = key
|
66
|
+
items = self[tag] # list for that tag
|
67
|
+
if isinstance(sub, int) or isinstance(sub, slice):
|
68
|
+
return items[sub]
|
69
|
+
raise TypeError(f"Unsupported sub-key type: {type(sub)!r}")
|
70
|
+
|
71
|
+
raise TypeError(f"Unsupported key type: {type(key)!r}")
|
72
|
+
|
73
|
+
# ---- your existing methods (trimmed) ----------------------------------
|
74
|
+
@staticmethod
|
75
|
+
def parse_gedcom_line(line: str) -> Optional[Dict[str, Any]]:
|
76
|
+
|
77
|
+
line = line.lstrip('\ufeff').rstrip('\r\n')
|
78
|
+
if not line:
|
79
|
+
return None
|
80
|
+
|
81
|
+
parts = line.split(maxsplit=3)
|
82
|
+
if len(parts) < 2:
|
83
|
+
return None # not even "0 HEAD"
|
84
|
+
|
85
|
+
# 1) Level
|
86
|
+
try:
|
87
|
+
level = int(parts[0])
|
88
|
+
except ValueError:
|
89
|
+
return None
|
90
|
+
|
91
|
+
# 2) Is parts[1] an XREF?
|
92
|
+
xref = None
|
93
|
+
if parts[1].startswith('@') and parts[1].endswith('@'):
|
94
|
+
xref = parts[1]
|
95
|
+
|
96
|
+
# 3) Where is the tag?
|
97
|
+
if xref:
|
98
|
+
# must have at least ["0", "@X@", "TAG"]
|
99
|
+
if len(parts) < 3:
|
100
|
+
return None
|
101
|
+
tag = parts[2]
|
102
|
+
# everything after index 2 is the value
|
103
|
+
value_parts = parts[3:] # could be empty or one-element
|
104
|
+
else:
|
105
|
+
tag = parts[1]
|
106
|
+
# everything after index 1 is the value
|
107
|
+
value_parts = parts[2:] # could be empty, one- or two-element
|
108
|
+
|
109
|
+
|
110
|
+
# 4) re-assemble the full value
|
111
|
+
value = " ".join(value_parts) # empty string if value_parts == []
|
112
|
+
if value.startswith('@') and value.endswith('@'):
|
113
|
+
xref = parts[1]
|
114
|
+
|
115
|
+
if tag == 'TAG':
|
116
|
+
xtag, uri = value.split()
|
117
|
+
g7specs.structure_specs[xtag] = uri
|
118
|
+
g7specs.structure_specs[uri] = {'label': 'Extension_' + xtag}
|
119
|
+
|
120
|
+
return {
|
121
|
+
"level": level,
|
122
|
+
"xref": xref,
|
123
|
+
"tag": tag,
|
124
|
+
"value": value
|
125
|
+
}
|
126
|
+
|
127
|
+
|
128
|
+
def loadfile(self, filepath: str) -> None:
|
129
|
+
log = get_logger('importlog')
|
130
|
+
context: Dict[int, GedcomStructure] = {}
|
131
|
+
records: List[GedcomStructure] = []
|
132
|
+
|
133
|
+
with open(filepath, 'r', encoding='utf8') as file:
|
134
|
+
for lineno, raw in enumerate(file, start=1):
|
135
|
+
record = Gedcom7.parse_gedcom_line(raw)
|
136
|
+
if record is None:
|
137
|
+
log.error(f'empty line at {lineno}: {raw}')
|
138
|
+
continue
|
139
|
+
|
140
|
+
level = int(record["level"])
|
141
|
+
if record["tag"] == g7specs.CONT:
|
142
|
+
context[level - 1].value += "\n" + record["value"]
|
143
|
+
continue
|
144
|
+
|
145
|
+
structure = GedcomStructure(
|
146
|
+
level=level,
|
147
|
+
tag=record["tag"],
|
148
|
+
xref=record["xref"],
|
149
|
+
text=record["value"],
|
150
|
+
parent=context[level - 1] if level > 0 else None,
|
151
|
+
line_num=lineno
|
152
|
+
)
|
153
|
+
|
154
|
+
if level == 0:
|
155
|
+
records.append(structure)
|
156
|
+
|
157
|
+
context[level] = structure
|
158
|
+
|
159
|
+
self.records = records
|
160
|
+
self._rebuild_index() # <-- build fast tag index once
|
@@ -0,0 +1,94 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Dict, Any
|
4
|
+
import warnings
|
5
|
+
from . import Specification as g7
|
6
|
+
|
7
|
+
from typing import Dict, List,Optional,Any
|
8
|
+
|
9
|
+
|
10
|
+
gedcom_top_level_terms = ['https://gedcom.io/terms/v7/CONT',
|
11
|
+
'https://gedcom.io/terms/v7/record-FAM',
|
12
|
+
'https://gedcom.io/terms/v7/record-INDI',
|
13
|
+
'https://gedcom.io/terms/v7/record-SNOTE',
|
14
|
+
'https://gedcom.io/terms/v7/record-SUBM',
|
15
|
+
'https://gedcom.io/terms/v7/TRLR',
|
16
|
+
'https://gedcom.io/terms/v7/HEAD',
|
17
|
+
'https://gedcom.io/terms/v7/record-OBJE',
|
18
|
+
'https://gedcom.io/terms/v7/record-REPO',
|
19
|
+
'https://gedcom.io/terms/v7/record-SOUR']
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
class GedcomStructure:
|
25
|
+
version = 'v7'
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
*,
|
30
|
+
level: int | None = None,
|
31
|
+
xref: str | None = None,
|
32
|
+
tag: str | None = None,
|
33
|
+
pointer: bool | None = None,
|
34
|
+
text: str | None = None,
|
35
|
+
parent: GedcomStructure | None = None,
|
36
|
+
line_num: int | None = None
|
37
|
+
) -> None:
|
38
|
+
"""Create a GEDCOM structure node.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
level: GEDCOM line level (0..n).
|
42
|
+
xref: Optional cross-reference id (e.g., '@I1@').
|
43
|
+
tag: GEDCOM tag (e.g., 'INDI', 'NAME').
|
44
|
+
pointer: True if this line is a pointer, False if not, None if unknown.
|
45
|
+
text: Literal text payload for this line.
|
46
|
+
parent: Parent node in the structure tree, if any.
|
47
|
+
"""
|
48
|
+
self.level = level
|
49
|
+
self.xref = xref
|
50
|
+
self.tag = tag
|
51
|
+
self.pointer = pointer
|
52
|
+
self.text = text
|
53
|
+
self.parent = parent
|
54
|
+
self.value = text
|
55
|
+
self.pointer = pointer if pointer else False
|
56
|
+
self.line_num = line_num
|
57
|
+
|
58
|
+
if self.level and self.level > 0 and text and text.startswith('@') and text.endswith('@'):
|
59
|
+
self.pointer = True
|
60
|
+
self.xref = text
|
61
|
+
|
62
|
+
self.parent: GedcomStructure | None = parent if parent else None
|
63
|
+
if self.parent and isinstance(self.parent, GedcomStructure):
|
64
|
+
parent.subtructures.append(self)
|
65
|
+
|
66
|
+
self.extension = False if not tag else True if tag.startswith('_') else False
|
67
|
+
self.uri = g7.match_uri(tag,self.parent)
|
68
|
+
self.label = g7.get_label(self.uri)
|
69
|
+
|
70
|
+
self.subtructures = []
|
71
|
+
|
72
|
+
|
73
|
+
def _as_dict_(self):
|
74
|
+
as_dict = {}
|
75
|
+
as_dict['level'] = self.level
|
76
|
+
if self.xref: as_dict['xref'] = self.xref
|
77
|
+
as_dict['tag'] = self.tag
|
78
|
+
if self.value: as_dict['value'] = self.value
|
79
|
+
if self.subtructures: as_dict['substructures'] = [substructure._as_dict_() for substructure in self.subtructures]
|
80
|
+
return {g7.get_label(self.uri):as_dict}
|
81
|
+
|
82
|
+
def __repr__(self):
|
83
|
+
return (
|
84
|
+
"GedcomStructure("
|
85
|
+
f"level: {self.level} tag={self.tag:<6} ({self.label}), {'(Ext)' if self.extension else ''} xref:{self.xref} pointer={self.pointer}, text='{self.value}', "
|
86
|
+
f"uri={self.uri} subStructures: {len(self.subtructures)}"
|
87
|
+
)
|
88
|
+
|
89
|
+
def __getitem__(self,index) -> List['GedcomStructure']:
|
90
|
+
return [s for s in self.subtructures if s.tag == index]
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|