gedcom-x 0.5.6__py3-none-any.whl → 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gedcom_x-0.5.6.dist-info → gedcom_x-0.5.8.dist-info}/METADATA +1 -1
- gedcom_x-0.5.8.dist-info/RECORD +56 -0
- gedcomx/Extensions/__init__.py +1 -0
- gedcomx/Extensions/rs10/__init__.py +1 -0
- gedcomx/Extensions/rs10/rsLink.py +116 -0
- gedcomx/TopLevelTypeCollection.py +1 -1
- gedcomx/__init__.py +43 -41
- gedcomx/{Address.py → address.py} +13 -13
- gedcomx/{Agent.py → agent.py} +52 -24
- gedcomx/{Attribution.py → attribution.py} +36 -9
- gedcomx/{Conclusion.py → conclusion.py} +49 -21
- gedcomx/converter.py +1049 -0
- gedcomx/coverage.py +55 -0
- gedcomx/{Date.py → date.py} +11 -4
- gedcomx/{Document.py → document.py} +27 -8
- gedcomx/{Event.py → event.py} +102 -27
- gedcomx/{EvidenceReference.py → evidence_reference.py} +2 -2
- gedcomx/{Fact.py → fact.py} +45 -34
- gedcomx/{Gedcom5x.py → gedcom5x.py} +78 -61
- gedcomx/gedcom7/Exceptions.py +9 -0
- gedcomx/gedcom7/Gedcom7.py +160 -0
- gedcomx/gedcom7/GedcomStructure.py +94 -0
- gedcomx/gedcom7/Specification.py +347 -0
- gedcomx/gedcom7/__init__.py +26 -0
- gedcomx/gedcom7/g7interop.py +205 -0
- gedcomx/gedcom7/logger.py +19 -0
- gedcomx/gedcomx.py +501 -0
- gedcomx/{Gender.py → gender.py} +29 -17
- gedcomx/group.py +63 -0
- gedcomx/{Identifier.py → identifier.py} +13 -16
- gedcomx/{LoggingHub.py → logging_hub.py} +21 -0
- gedcomx/{Mutations.py → mutations.py} +50 -26
- gedcomx/name.py +396 -0
- gedcomx/{Note.py → note.py} +17 -10
- gedcomx/{OnlineAccount.py → online_account.py} +1 -1
- gedcomx/{Person.py → person.py} +52 -29
- gedcomx/place_description.py +123 -0
- gedcomx/place_reference.py +62 -0
- gedcomx/qualifier.py +54 -0
- gedcomx/{Relationship.py → relationship.py} +33 -13
- gedcomx/resource.py +85 -0
- gedcomx/serialization.py +815 -0
- gedcomx/{SourceDescription.py → source_description.py} +144 -85
- gedcomx/{SourceReference.py → source_reference.py} +15 -14
- gedcomx/{Subject.py → subject.py} +30 -28
- gedcomx/{GedcomX.py → translation.py} +283 -446
- gedcomx/{URI.py → uri.py} +42 -26
- gedcom_x-0.5.6.dist-info/RECORD +0 -45
- gedcomx/Coverage.py +0 -36
- gedcomx/Group.py +0 -37
- gedcomx/Name.py +0 -276
- gedcomx/PlaceDescription.py +0 -70
- gedcomx/PlaceReference.py +0 -30
- gedcomx/Qualifier.py +0 -27
- gedcomx/Resource.py +0 -75
- gedcomx/Serialization.py +0 -401
- gedcomx/Translation.py +0 -219
- {gedcom_x-0.5.6.dist-info → gedcom_x-0.5.8.dist-info}/WHEEL +0 -0
- {gedcom_x-0.5.6.dist-info → gedcom_x-0.5.8.dist-info}/top_level.txt +0 -0
- /gedcomx/{Exceptions.py → exceptions.py} +0 -0
- /gedcomx/{ExtensibleEnum.py → extensible_enum.py} +0 -0
- /gedcomx/{Gedcom.py → gedcom.py} +0 -0
- /gedcomx/{SourceCitation.py → source_citation.py} +0 -0
- /gedcomx/{TextValue.py → textvalue.py} +0 -0
@@ -3,14 +3,26 @@
|
|
3
3
|
|
4
4
|
import html
|
5
5
|
import os
|
6
|
-
from typing import List, Optional, Tuple
|
6
|
+
from typing import List, Optional, Tuple, Any
|
7
7
|
import re
|
8
8
|
from collections import defaultdict
|
9
9
|
from typing import Iterable, Iterator, List, Optional, Tuple, Union
|
10
10
|
|
11
|
-
|
12
|
-
hub
|
13
|
-
|
11
|
+
import logging
|
12
|
+
from .logging_hub import hub, ChannelConfig
|
13
|
+
|
14
|
+
job_id = "gedcomx.parsing.GEDCOM5x"
|
15
|
+
|
16
|
+
hub.start_channel(
|
17
|
+
ChannelConfig(
|
18
|
+
name=job_id,
|
19
|
+
path=f"logs/{job_id}.log",
|
20
|
+
level=logging.DEBUG,
|
21
|
+
rotation="size:10MB:3", # rotate by size, keep 3 backups
|
22
|
+
)
|
23
|
+
)
|
24
|
+
|
25
|
+
log = logging.getLogger("gedcomx")
|
14
26
|
|
15
27
|
BOM = '\ufeff'
|
16
28
|
|
@@ -51,25 +63,25 @@ line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
|
|
51
63
|
from typing import List, Optional, Iterator, Union
|
52
64
|
|
53
65
|
|
54
|
-
class
|
66
|
+
class Gedcom5xRecord():
|
55
67
|
def __init__(
|
56
68
|
self,
|
57
69
|
line_num: Optional[int] = None,
|
58
70
|
level: int = -1,
|
59
|
-
tag: str = "NONR",
|
71
|
+
tag: str | None = "NONR",
|
60
72
|
xref: Optional[str] = None,
|
61
73
|
value: Optional[str] = None,
|
62
74
|
) -> None:
|
63
75
|
self.line = line_num
|
64
|
-
self._subRecords: List[
|
76
|
+
self._subRecords: List[Gedcom5xRecord] = []
|
65
77
|
self.level = int(level)
|
66
78
|
self.xref = xref
|
67
79
|
self.pointer: bool = False
|
68
80
|
self.tag = str(tag).strip()
|
69
81
|
self.value = value
|
70
82
|
|
71
|
-
self.parent: Optional[
|
72
|
-
self.root: Optional[
|
83
|
+
self.parent: Optional[Gedcom5xRecord] = None
|
84
|
+
self.root: Optional[Gedcom5xRecord] = None
|
73
85
|
|
74
86
|
# ───────────────────────────────
|
75
87
|
# Dict/JSON friendly view
|
@@ -88,7 +100,7 @@ class GedcomRecord():
|
|
88
100
|
# ───────────────────────────────
|
89
101
|
# Subrecord management
|
90
102
|
# ───────────────────────────────
|
91
|
-
def addSubRecord(self, record: "
|
103
|
+
def addSubRecord(self, record: "Gedcom5xRecord"):
|
92
104
|
|
93
105
|
if record is not None and (record.level == (self.level + 1)):
|
94
106
|
record.parent = self
|
@@ -99,7 +111,7 @@ class GedcomRecord():
|
|
99
111
|
)
|
100
112
|
|
101
113
|
def recordOnly(self):
|
102
|
-
return
|
114
|
+
return Gedcom5xRecord(
|
103
115
|
line_num=self.line, level=self.level, tag=self.tag, value=self.value
|
104
116
|
)
|
105
117
|
|
@@ -123,7 +135,7 @@ class GedcomRecord():
|
|
123
135
|
f"subRecords: {len(self._subRecords)}"
|
124
136
|
)
|
125
137
|
if subRecords:
|
126
|
-
for subRecord in self.subRecords():
|
138
|
+
for subRecord in self.subRecords() or []:
|
127
139
|
description += "\n" + subRecord.describe(subRecords=True)
|
128
140
|
return description
|
129
141
|
|
@@ -132,9 +144,9 @@ class GedcomRecord():
|
|
132
144
|
# ───────────────────────────────
|
133
145
|
def subRecord(self, tag: str):
|
134
146
|
result = [r for r in self._subRecords if r.tag == tag]
|
135
|
-
return
|
147
|
+
return [] if not result else result
|
136
148
|
|
137
|
-
def subRecords(self, tag: str = None):
|
149
|
+
def subRecords(self, tag: str | None = None) -> List['Gedcom5xRecord']:
|
138
150
|
if not tag:
|
139
151
|
return self._subRecords
|
140
152
|
tags = tag.split("/", 1)
|
@@ -142,7 +154,7 @@ class GedcomRecord():
|
|
142
154
|
# Collect matching first-level subrecords
|
143
155
|
matches = [r for r in self._subRecords if r.tag == tags[0]]
|
144
156
|
if not matches:
|
145
|
-
return
|
157
|
+
return []
|
146
158
|
|
147
159
|
if len(tags) == 1:
|
148
160
|
return matches
|
@@ -156,7 +168,7 @@ class GedcomRecord():
|
|
156
168
|
results.extend(sub_result)
|
157
169
|
else:
|
158
170
|
results.append(sub_result)
|
159
|
-
return results if results else
|
171
|
+
return results if results else []
|
160
172
|
|
161
173
|
# ───────────────────────────────
|
162
174
|
# Iteration / Subscriptability
|
@@ -164,11 +176,11 @@ class GedcomRecord():
|
|
164
176
|
def __call__(self) -> str:
|
165
177
|
return self.describe()
|
166
178
|
|
167
|
-
def __iter__(self) -> Iterator["
|
179
|
+
def __iter__(self) -> Iterator["Gedcom5xRecord"]:
|
168
180
|
"""Iterates recursively over self and all subrecords."""
|
169
181
|
yield from self._flatten_subrecords(self)
|
170
182
|
|
171
|
-
def _flatten_subrecords(self, record: "
|
183
|
+
def _flatten_subrecords(self, record: "Gedcom5xRecord") -> Iterator["Gedcom5xRecord"]:
|
172
184
|
yield record
|
173
185
|
for sub in record._subRecords:
|
174
186
|
yield from self._flatten_subrecords(sub)
|
@@ -176,7 +188,7 @@ class GedcomRecord():
|
|
176
188
|
def __len__(self) -> int:
|
177
189
|
return len(self._subRecords)
|
178
190
|
|
179
|
-
def __getitem__(self, key: Union[int, slice, str]) -> Union["
|
191
|
+
def __getitem__(self, key: Union[int, slice, str]) -> Union["Gedcom5xRecord", List["Gedcom5xRecord"]]:
|
180
192
|
"""
|
181
193
|
- rec[0] -> first subrecord
|
182
194
|
- rec[1:3] -> slice of subrecords
|
@@ -222,26 +234,26 @@ class Gedcom5x():
|
|
222
234
|
"""
|
223
235
|
_top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD','SNOTE']
|
224
236
|
|
225
|
-
def __init__(self, records: Optional[List[
|
237
|
+
def __init__(self, records: Optional[List[Gedcom5xRecord]] = None,filepath: str | None = None) -> None:
|
226
238
|
|
227
|
-
self.records: List[
|
239
|
+
self.records: List[Gedcom5xRecord] = records or []
|
228
240
|
if filepath:
|
229
241
|
self.records = self._records_from_file(filepath)
|
230
242
|
elif records:
|
231
|
-
self.records: List[
|
243
|
+
self.records: List[Gedcom5xRecord] = records if records else []
|
232
244
|
|
233
245
|
|
234
246
|
# Fast tag index: {'HEAD': [rec], 'INDI': [rec1, rec2, ...], ...}
|
235
|
-
self._tag_index: dict[str, List[
|
247
|
+
self._tag_index: dict[str, List[Gedcom5xRecord]] = defaultdict(list)
|
236
248
|
self._reindex()
|
237
249
|
|
238
|
-
self.header:
|
239
|
-
self._sources: List[
|
240
|
-
self._repositories: List[
|
241
|
-
self._individuals: List[
|
242
|
-
self._families: List[
|
243
|
-
self._objects: List[
|
244
|
-
self._snotes: List[
|
250
|
+
self.header: Gedcom5xRecord | None = None
|
251
|
+
self._sources: List[Gedcom5xRecord] = []
|
252
|
+
self._repositories: List[Gedcom5xRecord] = []
|
253
|
+
self._individuals: List[Gedcom5xRecord] = []
|
254
|
+
self._families: List[Gedcom5xRecord] = []
|
255
|
+
self._objects: List[Gedcom5xRecord] = []
|
256
|
+
self._snotes: List[Gedcom5xRecord] = []
|
245
257
|
self.version = None
|
246
258
|
|
247
259
|
if self.records:
|
@@ -277,7 +289,7 @@ class Gedcom5x():
|
|
277
289
|
def __len__(self) -> int:
|
278
290
|
return len(self.records)
|
279
291
|
|
280
|
-
def __iter__(self) -> Iterator['
|
292
|
+
def __iter__(self) -> Iterator['Gedcom5xRecord']:
|
281
293
|
# Enables: for x in gedcom:
|
282
294
|
return iter(self.records)
|
283
295
|
|
@@ -289,7 +301,7 @@ class Gedcom5x():
|
|
289
301
|
return 0 <= key < len(self.records)
|
290
302
|
return False
|
291
303
|
|
292
|
-
def __getitem__(self, key: Key) -> Union['
|
304
|
+
def __getitem__(self, key: Key) -> Union['Gedcom5xRecord', List['Gedcom5xRecord']]:
|
293
305
|
"""
|
294
306
|
- gedcom[0] -> GedcomRecord at index 0
|
295
307
|
- gedcom[1:5] -> list of GedcomRecord (slice)
|
@@ -309,30 +321,30 @@ class Gedcom5x():
|
|
309
321
|
raise TypeError(f"Unsupported key type: {type(key).__name__}")
|
310
322
|
|
311
323
|
# Optional: convenience helpers
|
312
|
-
def by_tag(self, tag: str) -> List['
|
324
|
+
def by_tag(self, tag: str) -> List['Gedcom5xRecord']:
|
313
325
|
"""Always return a list of records for a tag (empty list if none)."""
|
314
326
|
return list(self._tag_index.get(tag, []))
|
315
327
|
|
316
|
-
def first(self, tag: str) -> Optional['
|
328
|
+
def first(self, tag: str) -> Optional['Gedcom5xRecord']:
|
317
329
|
"""Return the first record with a given tag, or None."""
|
318
330
|
lst = self._tag_index.get(tag, [])
|
319
331
|
return lst[0] if lst else None
|
320
332
|
|
321
333
|
# If you add/replace records after init, keep the index fresh:
|
322
|
-
def append(self, rec: '
|
334
|
+
def append(self, rec: 'Gedcom5xRecord') -> None:
|
323
335
|
self.records.append(rec)
|
324
336
|
self._tag_index.setdefault(rec.tag, []).append(rec)
|
325
337
|
|
326
|
-
def extend(self, recs: Iterable['
|
338
|
+
def extend(self, recs: Iterable['Gedcom5xRecord']) -> None:
|
327
339
|
self.records.extend(recs)
|
328
340
|
for r in recs:
|
329
341
|
self._tag_index.setdefault(r.tag, []).append(r)
|
330
342
|
|
331
|
-
def insert(self, idx: int, rec: '
|
343
|
+
def insert(self, idx: int, rec: 'Gedcom5xRecord') -> None:
|
332
344
|
self.records.insert(idx, rec)
|
333
345
|
self._tag_index.setdefault(rec.tag, []).append(rec)
|
334
346
|
|
335
|
-
def remove(self, rec: '
|
347
|
+
def remove(self, rec: 'Gedcom5xRecord') -> None:
|
336
348
|
self.records.remove(rec)
|
337
349
|
try:
|
338
350
|
bucket = self._tag_index.get(rec.tag)
|
@@ -385,55 +397,55 @@ class Gedcom5x():
|
|
385
397
|
print_table(imports_stats)
|
386
398
|
|
387
399
|
@property
|
388
|
-
def sources(self) -> List[
|
400
|
+
def sources(self) -> List[Gedcom5xRecord]:
|
389
401
|
return self._sources
|
390
402
|
|
391
403
|
@sources.setter
|
392
|
-
def sources(self, value: List[
|
393
|
-
if not isinstance(value, list) or not all(isinstance(item,
|
404
|
+
def sources(self, value: List[Gedcom5xRecord]):
|
405
|
+
if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
|
394
406
|
raise ValueError("sources must be a list of GedcomRecord objects.")
|
395
407
|
self._sources = value
|
396
408
|
|
397
409
|
@property
|
398
|
-
def repositories(self) -> List[
|
410
|
+
def repositories(self) -> List[Gedcom5xRecord]:
|
399
411
|
"""
|
400
412
|
List of **REPO** records found in the Genealogy
|
401
413
|
"""
|
402
414
|
return self._repositories
|
403
415
|
|
404
416
|
@repositories.setter
|
405
|
-
def repositories(self, value: List[
|
406
|
-
if not isinstance(value, list) or not all(isinstance(item,
|
417
|
+
def repositories(self, value: List[Gedcom5xRecord]):
|
418
|
+
if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
|
407
419
|
raise ValueError("repositories must be a list of GedcomRecord objects.")
|
408
420
|
self._repositories = value
|
409
421
|
|
410
422
|
@property
|
411
|
-
def individuals(self) -> List[
|
423
|
+
def individuals(self) -> List[Gedcom5xRecord]:
|
412
424
|
return self._individuals
|
413
425
|
|
414
426
|
@individuals.setter
|
415
|
-
def individuals(self, value: List[
|
416
|
-
if not isinstance(value, list) or not all(isinstance(item,
|
427
|
+
def individuals(self, value: List[Gedcom5xRecord]):
|
428
|
+
if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
|
417
429
|
raise ValueError("individuals must be a list of GedcomRecord objects.")
|
418
430
|
self._individuals = value
|
419
431
|
|
420
432
|
@property
|
421
|
-
def families(self) -> List[
|
433
|
+
def families(self) -> List[Gedcom5xRecord]:
|
422
434
|
return self._families
|
423
435
|
|
424
436
|
@families.setter
|
425
|
-
def families(self, value: List[
|
426
|
-
if not isinstance(value, list) or not all(isinstance(item,
|
437
|
+
def families(self, value: List[Gedcom5xRecord]):
|
438
|
+
if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
|
427
439
|
raise ValueError("families must be a list of GedcomRecord objects.")
|
428
440
|
self._families = value
|
429
441
|
|
430
442
|
@property
|
431
|
-
def objects(self) -> List[
|
443
|
+
def objects(self) -> List[Gedcom5xRecord]:
|
432
444
|
return self._objects
|
433
445
|
|
434
446
|
@objects.setter
|
435
|
-
def objects(self, value: List[
|
436
|
-
if not isinstance(value, list) or not all(isinstance(item,
|
447
|
+
def objects(self, value: List[Gedcom5xRecord]):
|
448
|
+
if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
|
437
449
|
raise ValueError("objects must be a list of GedcomRecord objects.")
|
438
450
|
self._objects = value
|
439
451
|
|
@@ -449,7 +461,7 @@ class Gedcom5x():
|
|
449
461
|
raise NotImplementedError("Writing of GEDCOM files is not implemented.")
|
450
462
|
|
451
463
|
@staticmethod
|
452
|
-
def _records_from_file(file_path: str) -> List[
|
464
|
+
def _records_from_file(file_path: str) -> List[Gedcom5xRecord]:
|
453
465
|
def parse_gedcom7_line(line: str) -> Optional[Tuple[int, Optional[str], str, Optional[str], Optional[str]]]:
|
454
466
|
"""
|
455
467
|
Parse a GEDCOM 7 line into: level, xref_id (record), tag, value, xref_value (if value is an @X@)
|
@@ -483,7 +495,7 @@ class Gedcom5x():
|
|
483
495
|
lines = [line.strip() for line in file]
|
484
496
|
|
485
497
|
records = []
|
486
|
-
record_map = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
|
498
|
+
record_map: dict[int,Any] = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
|
487
499
|
|
488
500
|
for l, line in enumerate(lines):
|
489
501
|
if line.startswith(BOM):
|
@@ -510,15 +522,18 @@ class Gedcom5x():
|
|
510
522
|
else:
|
511
523
|
level, tag = parts
|
512
524
|
|
513
|
-
level, xref, tag, value, xref_value = parse_gedcom7_line(line)
|
525
|
+
level, xref, tag, value, xref_value = parse_gedcom7_line(line) or tuple([None, None, None, None])
|
526
|
+
|
514
527
|
|
515
528
|
if xref is None and xref_value is not None:
|
516
529
|
xref = xref_value
|
517
530
|
# print(l, level, xref, tag, value, xref_value)
|
518
|
-
|
519
|
-
level = int(level)
|
520
531
|
|
521
|
-
|
532
|
+
if isinstance(level,int):
|
533
|
+
level = int(level)
|
534
|
+
else: raise ValueError(f"Record had a level of {level}")
|
535
|
+
|
536
|
+
new_record = Gedcom5xRecord(line_num=l + 1, level=level, tag=tag if tag else None, xref=xref,value=value)
|
522
537
|
|
523
538
|
|
524
539
|
if level == 0:
|
@@ -528,12 +543,14 @@ class Gedcom5x():
|
|
528
543
|
new_record.parent = record_map[int(level) - 1]
|
529
544
|
record_map[int(level) - 1].addSubRecord(new_record)
|
530
545
|
record_map[int(level)] = new_record
|
546
|
+
with hub.use(job_id):
|
547
|
+
log.info(new_record.describe())
|
531
548
|
|
532
549
|
|
533
|
-
return records if records else
|
550
|
+
return records if records else []
|
534
551
|
|
535
552
|
@staticmethod
|
536
|
-
def fromFile(file_path: str) -> '
|
553
|
+
def fromFile(file_path: str) -> 'Gedcom5x':
|
537
554
|
"""
|
538
555
|
Static method to create a Gedcom object from a GEDCOM file.
|
539
556
|
|
@@ -0,0 +1,160 @@
|
|
1
|
+
|
2
|
+
from __future__ import annotations
|
3
|
+
from typing import Any, Dict, List, Optional, Union, Iterable
|
4
|
+
from collections import defaultdict
|
5
|
+
|
6
|
+
|
7
|
+
from .GedcomStructure import GedcomStructure
|
8
|
+
from . import Specification as g7specs
|
9
|
+
from .logger import get_logger
|
10
|
+
|
11
|
+
|
12
|
+
from typing import Dict, List, Optional
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
class Gedcom7:
|
17
|
+
def __init__(self, filepath: Optional[str] = None):
|
18
|
+
self.persons: List[Any] = []
|
19
|
+
self.families: List[Any] = []
|
20
|
+
self.sources: List[Any] = []
|
21
|
+
self.records: List['GedcomStructure'] = []
|
22
|
+
self._tag_index: Dict[str, List[int]] = defaultdict(list) # tag -> list of record indices
|
23
|
+
|
24
|
+
# ---- indexing helpers -------------------------------------------------
|
25
|
+
@staticmethod
|
26
|
+
def _norm_tag(tag: str) -> str:
|
27
|
+
return tag.upper()
|
28
|
+
|
29
|
+
def _rebuild_index(self) -> None:
|
30
|
+
self._tag_index.clear()
|
31
|
+
for i, rec in enumerate(self.records):
|
32
|
+
if getattr(rec, "tag", None):
|
33
|
+
self._tag_index[self._norm_tag(rec.tag)].append(i)
|
34
|
+
|
35
|
+
# Optional: keep index in sync if you append records elsewhere
|
36
|
+
def _append_record(self, rec: 'GedcomStructure') -> None:
|
37
|
+
self.records.append(rec)
|
38
|
+
if getattr(rec, "tag", None):
|
39
|
+
self._tag_index[self._norm_tag(rec.tag)].append(len(self.records) - 1)
|
40
|
+
|
41
|
+
# ---- Python container protocol ----------------------------------------
|
42
|
+
def __len__(self) -> int:
|
43
|
+
return len(self.records)
|
44
|
+
|
45
|
+
def __iter__(self) -> Iterable['GedcomStructure']:
|
46
|
+
return iter(self.records)
|
47
|
+
|
48
|
+
def __contains__(self, key: Union[str, 'GedcomStructure']) -> bool:
|
49
|
+
if isinstance(key, str):
|
50
|
+
return self._norm_tag(key) in self._tag_index
|
51
|
+
return key in self.records
|
52
|
+
|
53
|
+
def __getitem__(self, key: Union[int, slice, str, tuple]) -> Union['GedcomStructure', List['GedcomStructure']]:
|
54
|
+
# by position
|
55
|
+
if isinstance(key, (int, slice)):
|
56
|
+
return self.records[key]
|
57
|
+
|
58
|
+
# by tag
|
59
|
+
if isinstance(key, str):
|
60
|
+
idxs = self._tag_index.get(self._norm_tag(key), [])
|
61
|
+
return [self.records[i] for i in idxs]
|
62
|
+
|
63
|
+
# combo: ('INDI', 0) or ('INDI', 0:5)
|
64
|
+
if isinstance(key, tuple) and len(key) == 2 and isinstance(key[0], str):
|
65
|
+
tag, sub = key
|
66
|
+
items = self[tag] # list for that tag
|
67
|
+
if isinstance(sub, int) or isinstance(sub, slice):
|
68
|
+
return items[sub]
|
69
|
+
raise TypeError(f"Unsupported sub-key type: {type(sub)!r}")
|
70
|
+
|
71
|
+
raise TypeError(f"Unsupported key type: {type(key)!r}")
|
72
|
+
|
73
|
+
# ---- your existing methods (trimmed) ----------------------------------
|
74
|
+
@staticmethod
|
75
|
+
def parse_gedcom_line(line: str) -> Optional[Dict[str, Any]]:
|
76
|
+
|
77
|
+
line = line.lstrip('\ufeff').rstrip('\r\n')
|
78
|
+
if not line:
|
79
|
+
return None
|
80
|
+
|
81
|
+
parts = line.split(maxsplit=3)
|
82
|
+
if len(parts) < 2:
|
83
|
+
return None # not even "0 HEAD"
|
84
|
+
|
85
|
+
# 1) Level
|
86
|
+
try:
|
87
|
+
level = int(parts[0])
|
88
|
+
except ValueError:
|
89
|
+
return None
|
90
|
+
|
91
|
+
# 2) Is parts[1] an XREF?
|
92
|
+
xref = None
|
93
|
+
if parts[1].startswith('@') and parts[1].endswith('@'):
|
94
|
+
xref = parts[1]
|
95
|
+
|
96
|
+
# 3) Where is the tag?
|
97
|
+
if xref:
|
98
|
+
# must have at least ["0", "@X@", "TAG"]
|
99
|
+
if len(parts) < 3:
|
100
|
+
return None
|
101
|
+
tag = parts[2]
|
102
|
+
# everything after index 2 is the value
|
103
|
+
value_parts = parts[3:] # could be empty or one-element
|
104
|
+
else:
|
105
|
+
tag = parts[1]
|
106
|
+
# everything after index 1 is the value
|
107
|
+
value_parts = parts[2:] # could be empty, one- or two-element
|
108
|
+
|
109
|
+
|
110
|
+
# 4) re-assemble the full value
|
111
|
+
value = " ".join(value_parts) # empty string if value_parts == []
|
112
|
+
if value.startswith('@') and value.endswith('@'):
|
113
|
+
xref = parts[1]
|
114
|
+
|
115
|
+
if tag == 'TAG':
|
116
|
+
xtag, uri = value.split()
|
117
|
+
g7specs.structure_specs[xtag] = uri
|
118
|
+
g7specs.structure_specs[uri] = {'label': 'Extension_' + xtag}
|
119
|
+
|
120
|
+
return {
|
121
|
+
"level": level,
|
122
|
+
"xref": xref,
|
123
|
+
"tag": tag,
|
124
|
+
"value": value
|
125
|
+
}
|
126
|
+
|
127
|
+
|
128
|
+
def loadfile(self, filepath: str) -> None:
|
129
|
+
log = get_logger('importlog')
|
130
|
+
context: Dict[int, GedcomStructure] = {}
|
131
|
+
records: List[GedcomStructure] = []
|
132
|
+
|
133
|
+
with open(filepath, 'r', encoding='utf8') as file:
|
134
|
+
for lineno, raw in enumerate(file, start=1):
|
135
|
+
record = Gedcom7.parse_gedcom_line(raw)
|
136
|
+
if record is None:
|
137
|
+
log.error(f'empty line at {lineno}: {raw}')
|
138
|
+
continue
|
139
|
+
|
140
|
+
level = int(record["level"])
|
141
|
+
if record["tag"] == g7specs.CONT:
|
142
|
+
context[level - 1].value += "\n" + record["value"]
|
143
|
+
continue
|
144
|
+
|
145
|
+
structure = GedcomStructure(
|
146
|
+
level=level,
|
147
|
+
tag=record["tag"],
|
148
|
+
xref=record["xref"],
|
149
|
+
text=record["value"],
|
150
|
+
parent=context[level - 1] if level > 0 else None,
|
151
|
+
line_num=lineno
|
152
|
+
)
|
153
|
+
|
154
|
+
if level == 0:
|
155
|
+
records.append(structure)
|
156
|
+
|
157
|
+
context[level] = structure
|
158
|
+
|
159
|
+
self.records = records
|
160
|
+
self._rebuild_index() # <-- build fast tag index once
|
@@ -0,0 +1,94 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Dict, Any
|
4
|
+
import warnings
|
5
|
+
from . import Specification as g7
|
6
|
+
|
7
|
+
from typing import Dict, List,Optional,Any
|
8
|
+
|
9
|
+
|
10
|
+
gedcom_top_level_terms = ['https://gedcom.io/terms/v7/CONT',
|
11
|
+
'https://gedcom.io/terms/v7/record-FAM',
|
12
|
+
'https://gedcom.io/terms/v7/record-INDI',
|
13
|
+
'https://gedcom.io/terms/v7/record-SNOTE',
|
14
|
+
'https://gedcom.io/terms/v7/record-SUBM',
|
15
|
+
'https://gedcom.io/terms/v7/TRLR',
|
16
|
+
'https://gedcom.io/terms/v7/HEAD',
|
17
|
+
'https://gedcom.io/terms/v7/record-OBJE',
|
18
|
+
'https://gedcom.io/terms/v7/record-REPO',
|
19
|
+
'https://gedcom.io/terms/v7/record-SOUR']
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
class GedcomStructure:
|
25
|
+
version = 'v7'
|
26
|
+
|
27
|
+
def __init__(
|
28
|
+
self,
|
29
|
+
*,
|
30
|
+
level: int | None = None,
|
31
|
+
xref: str | None = None,
|
32
|
+
tag: str | None = None,
|
33
|
+
pointer: bool | None = None,
|
34
|
+
text: str | None = None,
|
35
|
+
parent: GedcomStructure | None = None,
|
36
|
+
line_num: int | None = None
|
37
|
+
) -> None:
|
38
|
+
"""Create a GEDCOM structure node.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
level: GEDCOM line level (0..n).
|
42
|
+
xref: Optional cross-reference id (e.g., '@I1@').
|
43
|
+
tag: GEDCOM tag (e.g., 'INDI', 'NAME').
|
44
|
+
pointer: True if this line is a pointer, False if not, None if unknown.
|
45
|
+
text: Literal text payload for this line.
|
46
|
+
parent: Parent node in the structure tree, if any.
|
47
|
+
"""
|
48
|
+
self.level = level
|
49
|
+
self.xref = xref
|
50
|
+
self.tag = tag
|
51
|
+
self.pointer = pointer
|
52
|
+
self.text = text
|
53
|
+
self.parent = parent
|
54
|
+
self.value = text
|
55
|
+
self.pointer = pointer if pointer else False
|
56
|
+
self.line_num = line_num
|
57
|
+
|
58
|
+
if self.level and self.level > 0 and text and text.startswith('@') and text.endswith('@'):
|
59
|
+
self.pointer = True
|
60
|
+
self.xref = text
|
61
|
+
|
62
|
+
self.parent: GedcomStructure | None = parent if parent else None
|
63
|
+
if self.parent and isinstance(self.parent, GedcomStructure):
|
64
|
+
parent.subtructures.append(self)
|
65
|
+
|
66
|
+
self.extension = False if not tag else True if tag.startswith('_') else False
|
67
|
+
self.uri = g7.match_uri(tag,self.parent)
|
68
|
+
self.label = g7.get_label(self.uri)
|
69
|
+
|
70
|
+
self.subtructures = []
|
71
|
+
|
72
|
+
|
73
|
+
def _as_dict_(self):
|
74
|
+
as_dict = {}
|
75
|
+
as_dict['level'] = self.level
|
76
|
+
if self.xref: as_dict['xref'] = self.xref
|
77
|
+
as_dict['tag'] = self.tag
|
78
|
+
if self.value: as_dict['value'] = self.value
|
79
|
+
if self.subtructures: as_dict['substructures'] = [substructure._as_dict_() for substructure in self.subtructures]
|
80
|
+
return {g7.get_label(self.uri):as_dict}
|
81
|
+
|
82
|
+
def __repr__(self):
|
83
|
+
return (
|
84
|
+
"GedcomStructure("
|
85
|
+
f"level: {self.level} tag={self.tag:<6} ({self.label}), {'(Ext)' if self.extension else ''} xref:{self.xref} pointer={self.pointer}, text='{self.value}', "
|
86
|
+
f"uri={self.uri} subStructures: {len(self.subtructures)}"
|
87
|
+
)
|
88
|
+
|
89
|
+
def __getitem__(self,index) -> List['GedcomStructure']:
|
90
|
+
return [s for s in self.subtructures if s.tag == index]
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|