gedcom-x 0.5.1__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gedcom_x-0.5.1.dist-info → gedcom_x-0.5.5.dist-info}/METADATA +1 -1
- gedcom_x-0.5.5.dist-info/RECORD +43 -0
- gedcomx/Address.py +42 -11
- gedcomx/Agent.py +136 -23
- gedcomx/Attribution.py +39 -91
- gedcomx/Conclusion.py +132 -53
- gedcomx/Coverage.py +10 -0
- gedcomx/Date.py +47 -11
- gedcomx/Document.py +43 -12
- gedcomx/Event.py +24 -5
- gedcomx/EvidenceReference.py +2 -2
- gedcomx/Exceptions.py +16 -0
- gedcomx/Fact.py +73 -50
- gedcomx/Gedcom.py +40 -333
- gedcomx/Gedcom5x.py +558 -0
- gedcomx/GedcomX.py +439 -194
- gedcomx/Gender.py +27 -8
- gedcomx/Group.py +3 -3
- gedcomx/Identifier.py +192 -55
- gedcomx/Logging.py +19 -0
- gedcomx/Mutations.py +228 -0
- gedcomx/Name.py +73 -38
- gedcomx/Note.py +5 -4
- gedcomx/OnlineAccount.py +2 -2
- gedcomx/Person.py +106 -92
- gedcomx/PlaceDescription.py +39 -16
- gedcomx/PlaceReference.py +14 -15
- gedcomx/Relationship.py +35 -56
- gedcomx/Resource.py +75 -0
- gedcomx/Serialization.py +394 -30
- gedcomx/SourceCitation.py +6 -1
- gedcomx/SourceDescription.py +89 -75
- gedcomx/SourceReference.py +33 -88
- gedcomx/Subject.py +12 -10
- gedcomx/TextValue.py +2 -1
- gedcomx/Translation.py +219 -0
- gedcomx/URI.py +96 -61
- gedcomx/Zip.py +1 -0
- gedcomx/__init__.py +11 -3
- gedcom_x-0.5.1.dist-info/RECORD +0 -37
- gedcomx/_Resource.py +0 -11
- {gedcom_x-0.5.1.dist-info → gedcom_x-0.5.5.dist-info}/WHEEL +0 -0
- {gedcom_x-0.5.1.dist-info → gedcom_x-0.5.5.dist-info}/top_level.txt +0 -0
gedcomx/Gedcom.py
CHANGED
@@ -1,346 +1,53 @@
|
|
1
|
-
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
|
4
|
-
import html
|
5
|
-
import os
|
6
|
-
from typing import List, Optional
|
7
|
-
|
8
|
-
BOM = '\ufeff'
|
9
|
-
|
10
|
-
# Add hash table for XREF of Zero Recrods?
|
11
|
-
|
12
|
-
nonzero = '[1-9]'
|
13
|
-
level = f'(?P<level>0|{nonzero}[0-9]*)'
|
14
|
-
atsign = '@'
|
15
|
-
underscore = '_'
|
16
|
-
ucletter = '[A-Z]'
|
17
|
-
tagchar = f'({ucletter}|[0-9]|{underscore})'
|
18
|
-
xref = f'{atsign}({tagchar})+{atsign}'
|
19
|
-
d = '\\ '
|
20
|
-
stdtag = f'{ucletter}({tagchar})*'
|
21
|
-
exttag = f'{underscore}({tagchar})+'
|
22
|
-
tag = f'({stdtag}|{exttag})'
|
23
|
-
voidptr = '@VOID@'
|
24
|
-
pointer = f'(?P<pointer>{voidptr}|{xref})'
|
25
|
-
nonat = '[\t -?A-\\U0010ffff]'
|
26
|
-
noneol = '[\t -\\U0010ffff]'
|
27
|
-
linestr = f'(?P<linestr>({nonat}|{atsign}{atsign})({noneol})*)'
|
28
|
-
lineval = f'({pointer}|{linestr})'
|
29
|
-
eol = '(\\\r(\\\n)?|\\\n)'
|
30
|
-
line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
|
31
|
-
|
32
|
-
class GedcomRecord():
|
33
|
-
def __init__(self,line_num=None,level=-1, tag='NONR', xref='', value=None) -> None:
|
34
|
-
self.line_num = line_num
|
35
|
-
self._subRecords = []
|
36
|
-
self.level = int(level)
|
37
|
-
self.xref = xref
|
38
|
-
self.pointer: bool = False
|
39
|
-
self.tag = str(tag).strip()
|
40
|
-
self.value = value
|
41
|
-
|
42
|
-
self.parent = None
|
43
|
-
self.root = None
|
44
|
-
|
45
|
-
if self.value.endswith('@') and self.value.startswith('@'):
|
46
|
-
self.xref = self.value.replace('@','')
|
47
|
-
if level > 0:
|
48
|
-
self.pointer = True
|
49
|
-
|
50
|
-
@property
|
51
|
-
def _as_dict_(self):
|
52
|
-
record_dict = {
|
53
|
-
'level':self.level,
|
54
|
-
'xref':self.xref,
|
55
|
-
'tag': self.tag,
|
56
|
-
'pointer': self.pointer,
|
57
|
-
'value': self.value,
|
58
|
-
'subrecords': [subrecord._as_dict_ for subrecord in self._subRecords]
|
59
|
-
}
|
60
|
-
return record_dict
|
61
|
-
|
62
|
-
def addSubRecord(self, record):
|
63
|
-
if record and record.level == self.level+1:
|
64
|
-
record.parent = self
|
65
|
-
self._subRecords.append(record)
|
66
|
-
else:
|
67
|
-
raise ValueError(f"SubRecord must be next level from this record (level:{self.level}, subRecord has level {record.level})")
|
68
|
-
|
69
|
-
def recordOnly(self):
|
70
|
-
return GedcomRecord(line_num=self.line_num,level=self.level,tag=self.tag,value=self.value)
|
71
|
-
|
72
|
-
def dump(self):
|
73
|
-
record_dump = f"Level: {self.level}, tag: {self.tag}, value: {self.value}, subRecords: {len(self._subRecords)}\n"
|
74
|
-
for record in self._subRecords:
|
75
|
-
record_dump += "\t" + record.dump() # Recursively call dump on sub_records and concatenate
|
76
|
-
return record_dump
|
77
|
-
|
78
|
-
def describe(self,subRecords: bool = False):
|
79
|
-
level_str = '\t'* self.level
|
80
|
-
description = f"Line {self.line_num}: {level_str} Level: {self.level}, tag: '{self.tag}', value: '{self.value}', subRecords: {len(self._subRecords)}"
|
81
|
-
if subRecords:
|
82
|
-
for subRecord in self.subRecords():
|
83
|
-
description = description + '\n' + subRecord.describe(subRecords=True)
|
84
|
-
return description
|
85
|
-
|
86
|
-
|
87
|
-
def subRecord(self, tag):
|
88
|
-
result = [record for record in self._subRecords if record.tag == tag]
|
89
|
-
if len(result) == 0: return None
|
90
|
-
return result
|
91
|
-
|
92
|
-
def subRecords(self, tag: str = None):
|
93
|
-
if not tag:
|
94
|
-
return self._subRecords
|
95
|
-
else:
|
96
|
-
tags = tag.split("/", 1) # Split into first tag and the rest
|
97
|
-
|
98
|
-
# Collect all records matching the first tag
|
99
|
-
matching_records = [record for record in self._subRecords if record.tag == tags[0]]
|
100
|
-
|
101
|
-
if not matching_records:
|
102
|
-
return None # No matching records found for the first tag
|
103
|
-
|
104
|
-
if len(tags) == 1:
|
105
|
-
return matching_records # Return all matching records for the final tag
|
106
|
-
|
107
|
-
# Recurse into each matching record's subRecords and collect results
|
108
|
-
results = []
|
109
|
-
for record in matching_records:
|
110
|
-
sub_result = record.subRecords(tags[1])
|
111
|
-
if sub_result:
|
112
|
-
if isinstance(sub_result, list):
|
113
|
-
results.extend(sub_result)
|
114
|
-
else:
|
115
|
-
results.append(sub_result)
|
116
|
-
|
117
|
-
return results if results else None
|
118
|
-
|
119
|
-
def __call__(self) -> None:
|
120
|
-
return self.describe()
|
121
|
-
|
122
|
-
def __iter__(self):
|
123
|
-
return self._flatten_subrecords(self)
|
124
|
-
|
125
|
-
def _flatten_subrecords(self, record):
|
126
|
-
yield record
|
127
|
-
for subrecord in record._subRecords:
|
128
|
-
yield from self._flatten_subrecords(subrecord)
|
1
|
+
import re
|
129
2
|
|
130
3
|
class Gedcom():
|
131
|
-
|
132
|
-
|
133
|
-
# =========================================================
|
134
|
-
# 1. INITIALIZATION
|
135
|
-
# =========================================================
|
136
|
-
|
137
|
-
|
138
|
-
def __init__(self, records: Optional[List[GedcomRecord]] = None,filepath: str = None) -> None:
|
139
|
-
if filepath:
|
140
|
-
self.records = self._records_from_file(filepath)
|
141
|
-
elif records:
|
142
|
-
self.records: List[GedcomRecord] = records if records else []
|
143
|
-
|
144
|
-
|
145
|
-
self._sources = []
|
146
|
-
self._repositories = []
|
147
|
-
self._individuals = []
|
148
|
-
self._families = []
|
149
|
-
self._objects = []
|
150
|
-
|
151
|
-
if self.records:
|
152
|
-
for record in self.records:
|
153
|
-
if record.tag == 'INDI':
|
154
|
-
record.xref = record.value
|
155
|
-
self._individuals.append(record)
|
156
|
-
if record.tag == 'SOUR' and record.level == 0:
|
157
|
-
record.xref = record.value
|
158
|
-
self._sources.append(record)
|
159
|
-
if record.tag == 'REPO' and record.level == 0:
|
160
|
-
record.xref = record.value
|
161
|
-
self._repositories.append(record)
|
162
|
-
if record.tag == 'FAM' and record.level == 0:
|
163
|
-
record.xref = record.value
|
164
|
-
self._families.append(record)
|
165
|
-
if record.tag == 'OBJE' and record.level == 0:
|
166
|
-
record.xref = record.value
|
167
|
-
self._objects.append(record)
|
168
|
-
|
169
|
-
# =========================================================
|
170
|
-
# 2. PROPERTY ACCESSORS (GETTERS & SETTERS)
|
171
|
-
# =========================================================
|
172
|
-
|
173
|
-
@property
|
174
|
-
def json(self):
|
175
|
-
import json
|
176
|
-
return json.dumps({'Individuals': [indi._as_dict_ for indi in self._individuals]},indent=4)
|
177
|
-
|
178
|
-
def stats(self):
|
179
|
-
def print_table(pairs):
|
180
|
-
|
181
|
-
# Calculate the width of the columns
|
182
|
-
name_width = max(len(name) for name, _ in pairs)
|
183
|
-
value_width = max(len(str(value)) for _, value in pairs)
|
184
|
-
|
185
|
-
# Print the header
|
186
|
-
print('GEDCOM Import Results')
|
187
|
-
header = f"{'Type'.ljust(name_width)} | {'Count'.ljust(value_width)}"
|
188
|
-
print('-' * len(header))
|
189
|
-
print(header)
|
190
|
-
print('-' * len(header))
|
191
|
-
|
192
|
-
# Print each pair in the table
|
193
|
-
for name, value in pairs:
|
194
|
-
print(f"{name.ljust(name_width)} | {str(value).ljust(value_width)}")
|
195
|
-
|
196
|
-
imports_stats = [
|
197
|
-
('Top Level Records', len(self.records)),
|
198
|
-
('Individuals', len(self.individuals)),
|
199
|
-
('Family Group Records', len(self.families)),
|
200
|
-
('Repositories', len(self.repositories)),
|
201
|
-
('Sources', len(self.sources)),
|
202
|
-
('Objects', len(self.objects))
|
203
|
-
]
|
204
|
-
|
205
|
-
print_table(imports_stats)
|
206
|
-
|
207
|
-
@property
|
208
|
-
def sources(self) -> List[GedcomRecord]:
|
209
|
-
return self._sources
|
210
|
-
|
211
|
-
@sources.setter
|
212
|
-
def sources(self, value: List[GedcomRecord]):
|
213
|
-
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
214
|
-
raise ValueError("sources must be a list of GedcomRecord objects.")
|
215
|
-
self._sources = value
|
216
|
-
|
217
|
-
@property
|
218
|
-
def repositories(self) -> List[GedcomRecord]:
|
219
|
-
return self._repositories
|
220
|
-
|
221
|
-
@repositories.setter
|
222
|
-
def repositories(self, value: List[GedcomRecord]):
|
223
|
-
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
224
|
-
raise ValueError("repositories must be a list of GedcomRecord objects.")
|
225
|
-
self._repositories = value
|
226
|
-
|
227
|
-
@property
|
228
|
-
def individuals(self) -> List[GedcomRecord]:
|
229
|
-
return self._individuals
|
230
|
-
|
231
|
-
@individuals.setter
|
232
|
-
def individuals(self, value: List[GedcomRecord]):
|
233
|
-
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
234
|
-
raise ValueError("individuals must be a list of GedcomRecord objects.")
|
235
|
-
self._individuals = value
|
236
|
-
|
237
|
-
@property
|
238
|
-
def families(self) -> List[GedcomRecord]:
|
239
|
-
return self._families
|
240
|
-
|
241
|
-
@families.setter
|
242
|
-
def families(self, value: List[GedcomRecord]):
|
243
|
-
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
244
|
-
raise ValueError("families must be a list of GedcomRecord objects.")
|
245
|
-
self._families = value
|
246
|
-
|
247
|
-
@property
|
248
|
-
def objects(self) -> List[GedcomRecord]:
|
249
|
-
return self._objects
|
250
|
-
|
251
|
-
@objects.setter
|
252
|
-
def objects(self, value: List[GedcomRecord]):
|
253
|
-
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
254
|
-
raise ValueError("objects must be a list of GedcomRecord objects.")
|
255
|
-
self._objects = value
|
256
|
-
|
257
|
-
# =========================================================
|
258
|
-
# 3. METHODS
|
259
|
-
# =========================================================
|
260
|
-
|
261
|
-
def write(self):
|
262
|
-
"""
|
263
|
-
Method placeholder for writing GEDCOM files.
|
264
|
-
"""
|
265
|
-
raise NotImplementedError("Writing of GEDCOM files is not implemented.")
|
4
|
+
def __init__(self) -> None:
|
5
|
+
pass
|
266
6
|
|
267
7
|
@staticmethod
|
268
|
-
def
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
print(f"File does not exist: {filepath}")
|
273
|
-
raise FileNotFoundError
|
274
|
-
elif not filepath.lower().endswith(extension.lower()):
|
275
|
-
print(f"File does not have the correct extension: {filepath}")
|
276
|
-
raise Exception("File does not appear to be a GEDCOM")
|
8
|
+
def read_gedcom_version(filepath: str) -> str | None:
|
9
|
+
"""
|
10
|
+
Reads only the HEAD section of a GEDCOM file and returns the GEDCOM standard version.
|
11
|
+
Looks specifically for HEAD → GEDC → VERS.
|
277
12
|
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
if line.strip() == '':
|
13
|
+
Returns:
|
14
|
+
str: GEDCOM version (e.g., "5.5.1" or "7.0.0"), or None if not found.
|
15
|
+
"""
|
16
|
+
version = None
|
17
|
+
inside_head = False
|
18
|
+
inside_gedc = False
|
19
|
+
|
20
|
+
with open(filepath, "r", encoding="utf-8") as f:
|
21
|
+
for line in f:
|
22
|
+
parts = line.strip().split(maxsplit=2)
|
23
|
+
if not parts:
|
290
24
|
continue
|
291
25
|
|
292
|
-
level
|
293
|
-
|
294
|
-
|
295
|
-
parts = line.split(maxsplit=2)
|
296
|
-
if len(parts) == 3:
|
297
|
-
level, col2, col3 = parts
|
298
|
-
|
299
|
-
if col3 in Gedcom.top_level_tags:
|
300
|
-
tag = col3
|
301
|
-
value = col2
|
302
|
-
else:
|
303
|
-
tag = col2
|
304
|
-
value = col3
|
305
|
-
else:
|
306
|
-
level, tag = parts
|
26
|
+
level = int(parts[0])
|
27
|
+
tag = parts[1] if len(parts) > 1 else ""
|
28
|
+
value = parts[2] if len(parts) > 2 else None
|
307
29
|
|
308
|
-
|
30
|
+
# Enter HEAD
|
31
|
+
if level == 0 and tag == "HEAD":
|
32
|
+
inside_head = True
|
33
|
+
continue
|
309
34
|
|
310
|
-
|
311
|
-
if level == 0:
|
312
|
-
|
313
|
-
else:
|
314
|
-
new_record.root = record_map[0]
|
315
|
-
new_record.parent = record_map[int(level) - 1]
|
316
|
-
record_map[int(level) - 1].addSubRecord(new_record)
|
317
|
-
record_map[int(level)] = new_record
|
318
|
-
|
319
|
-
return records if records else None
|
35
|
+
# Leave HEAD block
|
36
|
+
if inside_head and level == 0:
|
37
|
+
break
|
320
38
|
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
Args:
|
327
|
-
filepath (str): The path to the GEDCOM file.
|
39
|
+
# Inside HEAD, look for GEDC
|
40
|
+
if inside_head and level == 1 and tag == "GEDC":
|
41
|
+
inside_gedc = True
|
42
|
+
continue
|
328
43
|
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
records = Gedcom._records_from_file(filepath)
|
333
|
-
gedcom = Gedcom(records=records)
|
44
|
+
# If we drop back to level 1 (but not GEDC), stop looking inside GEDC
|
45
|
+
if inside_gedc and level == 1:
|
46
|
+
inside_gedc = False
|
334
47
|
|
335
|
-
|
48
|
+
# Inside GEDC, look for VERS
|
49
|
+
if inside_gedc and tag == "VERS":
|
50
|
+
version = value
|
51
|
+
break
|
336
52
|
|
337
|
-
|
338
|
-
#import re
|
339
|
-
#filepath = r"C:\Users\User\Documents\PythonProjects\gedcomx\.ged_files\_DJC_ Nunda Cartwright Family.ged"
|
340
|
-
#with open(filepath, 'r', encoding='utf-8') as file:
|
341
|
-
# string = file.read()
|
342
|
-
#
|
343
|
-
#for match in re.finditer(line, string):
|
344
|
-
# data = match.groupdict()
|
345
|
-
# print(data)
|
346
|
-
#'''
|
53
|
+
return version
|