gedcom-x 0.5.2__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gedcom_x-0.5.2.dist-info → gedcom_x-0.5.5.dist-info}/METADATA +1 -1
- gedcom_x-0.5.5.dist-info/RECORD +43 -0
- gedcomx/Address.py +2 -0
- gedcomx/Agent.py +9 -2
- gedcomx/Attribution.py +10 -46
- gedcomx/Conclusion.py +85 -21
- gedcomx/Coverage.py +10 -0
- gedcomx/Date.py +2 -7
- gedcomx/Document.py +27 -6
- gedcomx/Event.py +20 -1
- gedcomx/Exceptions.py +6 -0
- gedcomx/Fact.py +7 -8
- gedcomx/Gedcom.py +38 -404
- gedcomx/Gedcom5x.py +558 -0
- gedcomx/GedcomX.py +37 -22
- gedcomx/Gender.py +6 -40
- gedcomx/Identifier.py +151 -97
- gedcomx/Mutations.py +228 -0
- gedcomx/Name.py +6 -0
- gedcomx/Person.py +49 -90
- gedcomx/PlaceDescription.py +23 -14
- gedcomx/PlaceReference.py +12 -15
- gedcomx/Relationship.py +23 -54
- gedcomx/Resource.py +17 -3
- gedcomx/Serialization.py +352 -31
- gedcomx/SourceDescription.py +6 -9
- gedcomx/SourceReference.py +20 -86
- gedcomx/Subject.py +4 -4
- gedcomx/Translation.py +219 -0
- gedcomx/URI.py +1 -0
- gedcomx/__init__.py +7 -1
- gedcom_x-0.5.2.dist-info/RECORD +0 -42
- gedcomx/_Links.py +0 -37
- gedcomx/g7interop.py +0 -205
- {gedcom_x-0.5.2.dist-info → gedcom_x-0.5.5.dist-info}/WHEEL +0 -0
- {gedcom_x-0.5.2.dist-info → gedcom_x-0.5.5.dist-info}/top_level.txt +0 -0
gedcomx/Gedcom5x.py
ADDED
@@ -0,0 +1,558 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
import html
|
5
|
+
import os
|
6
|
+
from typing import List, Optional, Tuple
|
7
|
+
import re
|
8
|
+
from collections import defaultdict
|
9
|
+
from typing import Iterable, Iterator, List, Optional, Tuple, Union
|
10
|
+
|
11
|
+
BOM = '\ufeff'
|
12
|
+
|
13
|
+
GEDCOM7_LINE_RE = re.compile(
|
14
|
+
r"""^
|
15
|
+
(?P<level>\d+) # Level
|
16
|
+
(?:\s+@(?P<xref>[^@]+)@)? # Optional record identifier
|
17
|
+
\s+(?P<tag>[A-Z0-9_-]+) # Tag
|
18
|
+
(?:\s+(?P<value>.+))? # Optional value (may be XREF)
|
19
|
+
$""",
|
20
|
+
re.VERBOSE
|
21
|
+
)
|
22
|
+
|
23
|
+
XREF_RE = re.compile(r'^@[^@]+@$')
|
24
|
+
|
25
|
+
# Add hash table for XREF of Zero Recrods?
|
26
|
+
|
27
|
+
nonzero = '[1-9]'
|
28
|
+
level = f'(?P<level>0|{nonzero}[0-9]*)'
|
29
|
+
atsign = '@'
|
30
|
+
underscore = '_'
|
31
|
+
ucletter = '[A-Z]'
|
32
|
+
tagchar = f'({ucletter}|[0-9]|{underscore})'
|
33
|
+
xref = f'{atsign}({tagchar})+{atsign}'
|
34
|
+
d = '\\ '
|
35
|
+
stdtag = f'{ucletter}({tagchar})*'
|
36
|
+
exttag = f'{underscore}({tagchar})+'
|
37
|
+
tag = f'({stdtag}|{exttag})'
|
38
|
+
voidptr = '@VOID@'
|
39
|
+
pointer = f'(?P<pointer>{voidptr}|{xref})'
|
40
|
+
nonat = '[\t -?A-\\U0010ffff]'
|
41
|
+
noneol = '[\t -\\U0010ffff]'
|
42
|
+
linestr = f'(?P<linestr>({nonat}|{atsign}{atsign})({noneol})*)'
|
43
|
+
lineval = f'({pointer}|{linestr})'
|
44
|
+
eol = '(\\\r(\\\n)?|\\\n)'
|
45
|
+
line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
|
46
|
+
|
47
|
+
from typing import List, Optional, Iterator, Union
|
48
|
+
|
49
|
+
|
50
|
+
class GedcomRecord():
|
51
|
+
def __init__(
|
52
|
+
self,
|
53
|
+
line_num: Optional[int] = None,
|
54
|
+
level: int = -1,
|
55
|
+
tag: str = "NONR",
|
56
|
+
xref: Optional[str] = None,
|
57
|
+
value: Optional[str] = None,
|
58
|
+
) -> None:
|
59
|
+
self.line = line_num
|
60
|
+
self._subRecords: List[GedcomRecord] = []
|
61
|
+
self.level = int(level)
|
62
|
+
self.xref = xref
|
63
|
+
self.pointer: bool = False
|
64
|
+
self.tag = str(tag).strip()
|
65
|
+
self.value = value
|
66
|
+
|
67
|
+
self.parent: Optional[GedcomRecord] = None
|
68
|
+
self.root: Optional[GedcomRecord] = None
|
69
|
+
|
70
|
+
# ───────────────────────────────
|
71
|
+
# Dict/JSON friendly view
|
72
|
+
# ───────────────────────────────
|
73
|
+
@property
|
74
|
+
def _as_dict_(self):
|
75
|
+
return {
|
76
|
+
"level": self.level,
|
77
|
+
"xref": self.xref,
|
78
|
+
"tag": self.tag,
|
79
|
+
"pointer": self.pointer,
|
80
|
+
"value": self.value,
|
81
|
+
"subrecords": [sub._as_dict_ for sub in self._subRecords],
|
82
|
+
}
|
83
|
+
|
84
|
+
# ───────────────────────────────
|
85
|
+
# Subrecord management
|
86
|
+
# ───────────────────────────────
|
87
|
+
def addSubRecord(self, record: "GedcomRecord"):
|
88
|
+
|
89
|
+
if record is not None and (record.level == (self.level + 1)):
|
90
|
+
record.parent = self
|
91
|
+
self._subRecords.append(record)
|
92
|
+
else:
|
93
|
+
raise ValueError(
|
94
|
+
f"SubRecord must be next level from this record (level:{self.level}, subRecord has level {record.level})"
|
95
|
+
)
|
96
|
+
|
97
|
+
def recordOnly(self):
|
98
|
+
return GedcomRecord(
|
99
|
+
line_num=self.line, level=self.level, tag=self.tag, value=self.value
|
100
|
+
)
|
101
|
+
|
102
|
+
# ───────────────────────────────
|
103
|
+
# Pretty printers
|
104
|
+
# ───────────────────────────────
|
105
|
+
def dump(self) -> str:
|
106
|
+
record_dump = (
|
107
|
+
f"Level: {self.level}, tag: {self.tag}, value: {self.value}, "
|
108
|
+
f"subRecords: {len(self._subRecords)}\n"
|
109
|
+
)
|
110
|
+
for record in self._subRecords:
|
111
|
+
record_dump += "\t" + record.dump()
|
112
|
+
return record_dump
|
113
|
+
|
114
|
+
def describe(self, subRecords: bool = False) -> str:
|
115
|
+
level_str = "\t" * self.level
|
116
|
+
description = (
|
117
|
+
f"Line {self.line}: {level_str} Level: {self.level}, "
|
118
|
+
f"tag: '{self.tag}', xref={self.xref} value: '{self.value}', "
|
119
|
+
f"subRecords: {len(self._subRecords)}"
|
120
|
+
)
|
121
|
+
if subRecords:
|
122
|
+
for subRecord in self.subRecords():
|
123
|
+
description += "\n" + subRecord.describe(subRecords=True)
|
124
|
+
return description
|
125
|
+
|
126
|
+
# ───────────────────────────────
|
127
|
+
# Subrecord access
|
128
|
+
# ───────────────────────────────
|
129
|
+
def subRecord(self, tag: str):
|
130
|
+
result = [r for r in self._subRecords if r.tag == tag]
|
131
|
+
return None if not result else result
|
132
|
+
|
133
|
+
def subRecords(self, tag: str = None):
|
134
|
+
if not tag:
|
135
|
+
return self._subRecords
|
136
|
+
tags = tag.split("/", 1)
|
137
|
+
|
138
|
+
# Collect matching first-level subrecords
|
139
|
+
matches = [r for r in self._subRecords if r.tag == tags[0]]
|
140
|
+
if not matches:
|
141
|
+
return None
|
142
|
+
|
143
|
+
if len(tags) == 1:
|
144
|
+
return matches
|
145
|
+
|
146
|
+
# Recurse deeper
|
147
|
+
results = []
|
148
|
+
for r in matches:
|
149
|
+
sub_result = r.subRecords(tags[1])
|
150
|
+
if sub_result:
|
151
|
+
if isinstance(sub_result, list):
|
152
|
+
results.extend(sub_result)
|
153
|
+
else:
|
154
|
+
results.append(sub_result)
|
155
|
+
return results if results else None
|
156
|
+
|
157
|
+
# ───────────────────────────────
|
158
|
+
# Iteration / Subscriptability
|
159
|
+
# ───────────────────────────────
|
160
|
+
def __call__(self) -> str:
|
161
|
+
return self.describe()
|
162
|
+
|
163
|
+
def __iter__(self) -> Iterator["GedcomRecord"]:
|
164
|
+
"""Iterates recursively over self and all subrecords."""
|
165
|
+
yield from self._flatten_subrecords(self)
|
166
|
+
|
167
|
+
def _flatten_subrecords(self, record: "GedcomRecord") -> Iterator["GedcomRecord"]:
|
168
|
+
yield record
|
169
|
+
for sub in record._subRecords:
|
170
|
+
yield from self._flatten_subrecords(sub)
|
171
|
+
|
172
|
+
def __len__(self) -> int:
|
173
|
+
return len(self._subRecords)
|
174
|
+
|
175
|
+
def __getitem__(self, key: Union[int, slice, str]) -> Union["GedcomRecord", List["GedcomRecord"]]:
|
176
|
+
"""
|
177
|
+
- rec[0] -> first subrecord
|
178
|
+
- rec[1:3] -> slice of subrecords
|
179
|
+
- rec['NAME'] -> list of subrecords with tag 'NAME'
|
180
|
+
"""
|
181
|
+
if isinstance(key, int) or isinstance(key, slice):
|
182
|
+
return self._subRecords[key]
|
183
|
+
if isinstance(key, str):
|
184
|
+
matches = [r for r in self._subRecords if r.tag == key]
|
185
|
+
if not matches:
|
186
|
+
raise KeyError(f"No subrecords with tag '{key}'.")
|
187
|
+
return matches[0] if len(matches) == 1 else matches
|
188
|
+
raise TypeError(f"Unsupported key type: {type(key).__name__}")
|
189
|
+
|
190
|
+
def __contains__(self, key: object) -> bool:
|
191
|
+
if isinstance(key, str):
|
192
|
+
return any(r.tag == key for r in self._subRecords)
|
193
|
+
if isinstance(key, int):
|
194
|
+
return 0 <= key < len(self._subRecords)
|
195
|
+
return False
|
196
|
+
|
197
|
+
|
198
|
+
TagKey = str
|
199
|
+
IndexKey = int
|
200
|
+
Key = Union[IndexKey, slice, TagKey]
|
201
|
+
|
202
|
+
class Gedcom5x():
|
203
|
+
"""
|
204
|
+
Object representing a Genealogy in legacy GEDCOM 5.x / 7 format.
|
205
|
+
|
206
|
+
Parameters
|
207
|
+
----------
|
208
|
+
records : List[GedcomReord]
|
209
|
+
List of GedcomRecords to initialize the genealogy with
|
210
|
+
filepath : str
|
211
|
+
path to a GEDCOM (``*``.ged), if provided object will read, parse and initialize with records in the file.
|
212
|
+
|
213
|
+
Note
|
214
|
+
----
|
215
|
+
**file_path** takes precidence over **records**.
|
216
|
+
If no arguments are provided, Gedcom Object will initialize with no records.
|
217
|
+
|
218
|
+
"""
|
219
|
+
_top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD','SNOTE']
|
220
|
+
|
221
|
+
def __init__(self, records: Optional[List[GedcomRecord]] = None,filepath: str = None) -> None:
|
222
|
+
if filepath:
|
223
|
+
self.records = self._records_from_file(filepath)
|
224
|
+
elif records:
|
225
|
+
self.records: List[GedcomRecord] = records if records else []
|
226
|
+
|
227
|
+
# Fast tag index: {'HEAD': [rec], 'INDI': [rec1, rec2, ...], ...}
|
228
|
+
self._tag_index: dict[str, List[GedcomRecord]] = defaultdict(list)
|
229
|
+
self._reindex()
|
230
|
+
|
231
|
+
self.header: GedcomRecord | None = None
|
232
|
+
self._sources: List[GedcomRecord] = []
|
233
|
+
self._repositories: List[GedcomRecord] = []
|
234
|
+
self._individuals: List[GedcomRecord] = []
|
235
|
+
self._families: List[GedcomRecord] = []
|
236
|
+
self._objects: List[GedcomRecord] = []
|
237
|
+
self._snotes: List[GedcomRecord] = []
|
238
|
+
self.version = None
|
239
|
+
|
240
|
+
if self.records:
|
241
|
+
for record in self.records:
|
242
|
+
if record.tag == 'HEAD':
|
243
|
+
self.header = record
|
244
|
+
self.version = record['GEDC']['VERS'].value
|
245
|
+
if record.tag == 'INDI':
|
246
|
+
self._individuals.append(record)
|
247
|
+
if record.tag == 'SOUR' and record.level == 0:
|
248
|
+
self._sources.append(record)
|
249
|
+
if record.tag == 'REPO' and record.level == 0:
|
250
|
+
self._repositories.append(record)
|
251
|
+
if record.tag == 'FAM' and record.level == 0:
|
252
|
+
self._families.append(record)
|
253
|
+
if record.tag == 'OBJE' and record.level == 0:
|
254
|
+
self._objects.append(record)
|
255
|
+
if record.tag == 'SNOTE' and record.level == 0:
|
256
|
+
record.xref = record.value
|
257
|
+
self._snotes.append(record)
|
258
|
+
|
259
|
+
# ─────────────────────────────────────────────────────────────
|
260
|
+
# Subscriptable & iterable behavior
|
261
|
+
# ─────────────────────────────────────────────────────────────
|
262
|
+
def _reindex(self) -> None:
|
263
|
+
"""Rebuild the tag index from self.records."""
|
264
|
+
self._tag_index.clear()
|
265
|
+
for rec in self.records:
|
266
|
+
# Normalize tag just in case
|
267
|
+
tag = rec.tag if isinstance(rec.tag, str) else str(rec.tag)
|
268
|
+
self._tag_index[tag].append(rec)
|
269
|
+
|
270
|
+
def __len__(self) -> int:
|
271
|
+
return len(self.records)
|
272
|
+
|
273
|
+
def __iter__(self) -> Iterator['GedcomRecord']:
|
274
|
+
# Enables: for x in gedcom:
|
275
|
+
return iter(self.records)
|
276
|
+
|
277
|
+
def __contains__(self, key: object) -> bool:
|
278
|
+
# Enables: 'HEAD' in gedcom (tag membership)
|
279
|
+
if isinstance(key, str):
|
280
|
+
return key in self._tag_index and len(self._tag_index[key]) > 0
|
281
|
+
if isinstance(key, int):
|
282
|
+
return 0 <= key < len(self.records)
|
283
|
+
return False
|
284
|
+
|
285
|
+
def __getitem__(self, key: Key) -> Union['GedcomRecord', List['GedcomRecord']]:
|
286
|
+
"""
|
287
|
+
- gedcom[0] -> GedcomRecord at index 0
|
288
|
+
- gedcom[1:5] -> list of GedcomRecord (slice)
|
289
|
+
- gedcom['HEAD'] -> single record if exactly one; otherwise list of matching records
|
290
|
+
- gedcom['INDI'] -> list of all INDI records (usually many)
|
291
|
+
"""
|
292
|
+
if isinstance(key, int):
|
293
|
+
return self.records[key]
|
294
|
+
if isinstance(key, slice):
|
295
|
+
return self.records[key]
|
296
|
+
if isinstance(key, str):
|
297
|
+
matches = self._tag_index.get(key, [])
|
298
|
+
if not matches:
|
299
|
+
raise KeyError(f"No records with tag '{key}'.")
|
300
|
+
# If exactly one match (e.g., HEAD), return the record; otherwise return list
|
301
|
+
return matches[0] if len(matches) == 1 else matches
|
302
|
+
raise TypeError(f"Unsupported key type: {type(key).__name__}")
|
303
|
+
|
304
|
+
# Optional: convenience helpers
|
305
|
+
def by_tag(self, tag: str) -> List['GedcomRecord']:
|
306
|
+
"""Always return a list of records for a tag (empty list if none)."""
|
307
|
+
return list(self._tag_index.get(tag, []))
|
308
|
+
|
309
|
+
def first(self, tag: str) -> Optional['GedcomRecord']:
|
310
|
+
"""Return the first record with a given tag, or None."""
|
311
|
+
lst = self._tag_index.get(tag, [])
|
312
|
+
return lst[0] if lst else None
|
313
|
+
|
314
|
+
# If you add/replace records after init, keep the index fresh:
|
315
|
+
def append(self, rec: 'GedcomRecord') -> None:
|
316
|
+
self.records.append(rec)
|
317
|
+
self._tag_index.setdefault(rec.tag, []).append(rec)
|
318
|
+
|
319
|
+
def extend(self, recs: Iterable['GedcomRecord']) -> None:
|
320
|
+
self.records.extend(recs)
|
321
|
+
for r in recs:
|
322
|
+
self._tag_index.setdefault(r.tag, []).append(r)
|
323
|
+
|
324
|
+
def insert(self, idx: int, rec: 'GedcomRecord') -> None:
|
325
|
+
self.records.insert(idx, rec)
|
326
|
+
self._tag_index.setdefault(rec.tag, []).append(rec)
|
327
|
+
|
328
|
+
def remove(self, rec: 'GedcomRecord') -> None:
|
329
|
+
self.records.remove(rec)
|
330
|
+
try:
|
331
|
+
bucket = self._tag_index.get(rec.tag)
|
332
|
+
if bucket:
|
333
|
+
bucket.remove(rec)
|
334
|
+
if not bucket:
|
335
|
+
del self._tag_index[rec.tag]
|
336
|
+
except ValueError:
|
337
|
+
pass # already out of index
|
338
|
+
|
339
|
+
def clear(self) -> None:
|
340
|
+
self.records.clear()
|
341
|
+
self._tag_index.clear()
|
342
|
+
# =========================================================
|
343
|
+
# 2. PROPERTY ACCESSORS (GETTERS & SETTERS)
|
344
|
+
# =========================================================
|
345
|
+
|
346
|
+
@property
|
347
|
+
def json(self):
|
348
|
+
import json
|
349
|
+
return json.dumps({'Individuals': [indi._as_dict_ for indi in self._individuals]},indent=4)
|
350
|
+
|
351
|
+
def stats(self):
|
352
|
+
def print_table(pairs):
|
353
|
+
|
354
|
+
# Calculate the width of the columns
|
355
|
+
name_width = max(len(name) for name, _ in pairs)
|
356
|
+
value_width = max(len(str(value)) for _, value in pairs)
|
357
|
+
|
358
|
+
# Print the header
|
359
|
+
print('GEDCOM Import Results')
|
360
|
+
header = f"{'Type'.ljust(name_width)} | {'Count'.ljust(value_width)}"
|
361
|
+
print('-' * len(header))
|
362
|
+
print(header)
|
363
|
+
print('-' * len(header))
|
364
|
+
|
365
|
+
# Print each pair in the table
|
366
|
+
for name, value in pairs:
|
367
|
+
print(f"{name.ljust(name_width)} | {str(value).ljust(value_width)}")
|
368
|
+
|
369
|
+
imports_stats = [
|
370
|
+
('Top Level Records', len(self.records)),
|
371
|
+
('Individuals', len(self.individuals)),
|
372
|
+
('Family Group Records', len(self.families)),
|
373
|
+
('Repositories', len(self.repositories)),
|
374
|
+
('Sources', len(self.sources)),
|
375
|
+
('Objects', len(self.objects))
|
376
|
+
]
|
377
|
+
|
378
|
+
print_table(imports_stats)
|
379
|
+
|
380
|
+
@property
|
381
|
+
def sources(self) -> List[GedcomRecord]:
|
382
|
+
return self._sources
|
383
|
+
|
384
|
+
@sources.setter
|
385
|
+
def sources(self, value: List[GedcomRecord]):
|
386
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
387
|
+
raise ValueError("sources must be a list of GedcomRecord objects.")
|
388
|
+
self._sources = value
|
389
|
+
|
390
|
+
@property
|
391
|
+
def repositories(self) -> List[GedcomRecord]:
|
392
|
+
"""
|
393
|
+
List of **REPO** records found in the Genealogy
|
394
|
+
"""
|
395
|
+
return self._repositories
|
396
|
+
|
397
|
+
@repositories.setter
|
398
|
+
def repositories(self, value: List[GedcomRecord]):
|
399
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
400
|
+
raise ValueError("repositories must be a list of GedcomRecord objects.")
|
401
|
+
self._repositories = value
|
402
|
+
|
403
|
+
@property
|
404
|
+
def individuals(self) -> List[GedcomRecord]:
|
405
|
+
return self._individuals
|
406
|
+
|
407
|
+
@individuals.setter
|
408
|
+
def individuals(self, value: List[GedcomRecord]):
|
409
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
410
|
+
raise ValueError("individuals must be a list of GedcomRecord objects.")
|
411
|
+
self._individuals = value
|
412
|
+
|
413
|
+
@property
|
414
|
+
def families(self) -> List[GedcomRecord]:
|
415
|
+
return self._families
|
416
|
+
|
417
|
+
@families.setter
|
418
|
+
def families(self, value: List[GedcomRecord]):
|
419
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
420
|
+
raise ValueError("families must be a list of GedcomRecord objects.")
|
421
|
+
self._families = value
|
422
|
+
|
423
|
+
@property
|
424
|
+
def objects(self) -> List[GedcomRecord]:
|
425
|
+
return self._objects
|
426
|
+
|
427
|
+
@objects.setter
|
428
|
+
def objects(self, value: List[GedcomRecord]):
|
429
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
430
|
+
raise ValueError("objects must be a list of GedcomRecord objects.")
|
431
|
+
self._objects = value
|
432
|
+
|
433
|
+
|
434
|
+
|
435
|
+
def write(self) -> bool:
|
436
|
+
"""
|
437
|
+
Method placeholder for writing GEDCOM files.
|
438
|
+
|
439
|
+
Raises
|
440
|
+
------
|
441
|
+
NotImplementedError
|
442
|
+
writing to legacy GEDCOM file is not currently implimented.
|
443
|
+
"""
|
444
|
+
raise NotImplementedError("Writing of GEDCOM files is not implemented.")
|
445
|
+
|
446
|
+
@staticmethod
|
447
|
+
def _records_from_file(filepath: str) -> List[GedcomRecord]:
|
448
|
+
def parse_gedcom7_line(line: str) -> Optional[Tuple[int, Optional[str], str, Optional[str], Optional[str]]]:
|
449
|
+
"""
|
450
|
+
Parse a GEDCOM 7 line into: level, xref_id (record), tag, value, xref_value (if value is an @X@)
|
451
|
+
|
452
|
+
Returns:
|
453
|
+
(level, xref_id, tag, value, xref_value)
|
454
|
+
"""
|
455
|
+
match = GEDCOM7_LINE_RE.match(line.strip())
|
456
|
+
if not match:
|
457
|
+
return None
|
458
|
+
|
459
|
+
level = int(match.group("level"))
|
460
|
+
xref_id = match.group("xref")
|
461
|
+
tag = match.group("tag")
|
462
|
+
value = match.group("value")
|
463
|
+
if value == 'None': value = None
|
464
|
+
xref_value = value.strip("@") if value and XREF_RE.match(value.strip()) else None
|
465
|
+
|
466
|
+
return level, xref_id, tag, value, xref_value
|
467
|
+
extension = '.ged'
|
468
|
+
|
469
|
+
if not os.path.exists(filepath):
|
470
|
+
print(f"File does not exist: {filepath}")
|
471
|
+
raise FileNotFoundError
|
472
|
+
elif not filepath.lower().endswith(extension.lower()):
|
473
|
+
print(f"File does not have the correct extension: {filepath}")
|
474
|
+
raise Exception("File does not appear to be a GEDCOM")
|
475
|
+
|
476
|
+
print("Reading from GEDCOM file")
|
477
|
+
with open(filepath, 'r', encoding='utf-8') as file:
|
478
|
+
lines = [line.strip() for line in file]
|
479
|
+
|
480
|
+
records = []
|
481
|
+
record_map = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
|
482
|
+
|
483
|
+
for l, line in enumerate(lines):
|
484
|
+
if line.startswith(BOM):
|
485
|
+
line = line.lstrip(BOM)
|
486
|
+
line = html.unescape(line).replace('"', '')
|
487
|
+
|
488
|
+
if line.strip() == '':
|
489
|
+
continue
|
490
|
+
|
491
|
+
level, tag, value = '', '', ''
|
492
|
+
|
493
|
+
# Split the line into the first two columns and the rest
|
494
|
+
parts = line.split(maxsplit=2)
|
495
|
+
if len(parts) == 3:
|
496
|
+
level, col2, col3 = parts
|
497
|
+
|
498
|
+
if col3 in Gedcom5x._top_level_tags:
|
499
|
+
tag = col3
|
500
|
+
value = col2
|
501
|
+
else:
|
502
|
+
tag = col2
|
503
|
+
value = col3
|
504
|
+
|
505
|
+
else:
|
506
|
+
level, tag = parts
|
507
|
+
|
508
|
+
level, xref, tag, value, xref_value = parse_gedcom7_line(line)
|
509
|
+
|
510
|
+
if xref is None and xref_value is not None:
|
511
|
+
xref = xref_value
|
512
|
+
# print(l, level, xref, tag, value, xref_value)
|
513
|
+
|
514
|
+
level = int(level)
|
515
|
+
|
516
|
+
new_record = GedcomRecord(line_num=l + 1, level=level, tag=tag, xref=xref,value=value)
|
517
|
+
|
518
|
+
|
519
|
+
if level == 0:
|
520
|
+
records.append(new_record)
|
521
|
+
else:
|
522
|
+
new_record.root = record_map[0]
|
523
|
+
new_record.parent = record_map[int(level) - 1]
|
524
|
+
record_map[int(level) - 1].addSubRecord(new_record)
|
525
|
+
record_map[int(level)] = new_record
|
526
|
+
|
527
|
+
|
528
|
+
return records if records else None
|
529
|
+
|
530
|
+
@staticmethod
|
531
|
+
def fromFile(filepath: str) -> 'Gedcom':
|
532
|
+
"""
|
533
|
+
Static method to create a Gedcom object from a GEDCOM file.
|
534
|
+
|
535
|
+
Args:
|
536
|
+
filepath (str): The path to the GEDCOM file.
|
537
|
+
|
538
|
+
Returns:
|
539
|
+
Gedcom: An instance of the Gedcom class.
|
540
|
+
"""
|
541
|
+
records = Gedcom._records_from_file(filepath)
|
542
|
+
|
543
|
+
gedcom = Gedcom(records=records)
|
544
|
+
|
545
|
+
return gedcom
|
546
|
+
|
547
|
+
def merge_with_file(self, file_path: str) -> bool:
|
548
|
+
"""
|
549
|
+
Adds records from a valid (``*``.ged) file to the current Genealogy
|
550
|
+
|
551
|
+
Args:
|
552
|
+
filepath (str): The path to the GEDCOM file.
|
553
|
+
|
554
|
+
Returns:
|
555
|
+
bool: Indicates if merge was successful.
|
556
|
+
"""
|
557
|
+
return True
|
558
|
+
|