gedcom-x 0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gedcom_x-0.5.dist-info/METADATA +17 -0
- gedcom_x-0.5.dist-info/RECORD +37 -0
- gedcom_x-0.5.dist-info/WHEEL +5 -0
- gedcom_x-0.5.dist-info/top_level.txt +1 -0
- gedcomx/Address.py +100 -0
- gedcomx/Agent.py +83 -0
- gedcomx/Attribution.py +116 -0
- gedcomx/Conclusion.py +137 -0
- gedcomx/Coverage.py +26 -0
- gedcomx/Date.py +29 -0
- gedcomx/Document.py +42 -0
- gedcomx/Event.py +195 -0
- gedcomx/EvidenceReference.py +11 -0
- gedcomx/Fact.py +462 -0
- gedcomx/Gedcom.py +345 -0
- gedcomx/GedcomX.py +1105 -0
- gedcomx/Gender.py +48 -0
- gedcomx/Group.py +37 -0
- gedcomx/Identifier.py +89 -0
- gedcomx/Name.py +241 -0
- gedcomx/Note.py +65 -0
- gedcomx/OnlineAccount.py +10 -0
- gedcomx/Person.py +178 -0
- gedcomx/PlaceDescription.py +47 -0
- gedcomx/PlaceReference.py +31 -0
- gedcomx/Qualifier.py +27 -0
- gedcomx/Relationship.py +116 -0
- gedcomx/Serialization.py +37 -0
- gedcomx/SourceCitation.py +20 -0
- gedcomx/SourceDescription.py +241 -0
- gedcomx/SourceReference.py +168 -0
- gedcomx/Subject.py +73 -0
- gedcomx/TextValue.py +34 -0
- gedcomx/TopLevelTypeCollection.py +47 -0
- gedcomx/URI.py +70 -0
- gedcomx/_Resource.py +11 -0
- gedcomx/__init__.py +39 -0
gedcomx/Gedcom.py
ADDED
@@ -0,0 +1,345 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
import html
|
5
|
+
import os
|
6
|
+
from typing import List, Optional
|
7
|
+
|
8
|
+
BOM = '\ufeff'
|
9
|
+
|
10
|
+
# Add hash table for XREF of Zero Recrods?
|
11
|
+
|
12
|
+
nonzero = '[1-9]'
|
13
|
+
level = f'(?P<level>0|{nonzero}[0-9]*)'
|
14
|
+
atsign = '@'
|
15
|
+
underscore = '_'
|
16
|
+
ucletter = '[A-Z]'
|
17
|
+
tagchar = f'({ucletter}|[0-9]|{underscore})'
|
18
|
+
xref = f'{atsign}({tagchar})+{atsign}'
|
19
|
+
d = '\\ '
|
20
|
+
stdtag = f'{ucletter}({tagchar})*'
|
21
|
+
exttag = f'{underscore}({tagchar})+'
|
22
|
+
tag = f'({stdtag}|{exttag})'
|
23
|
+
voidptr = '@VOID@'
|
24
|
+
pointer = f'(?P<pointer>{voidptr}|{xref})'
|
25
|
+
nonat = '[\t -?A-\\U0010ffff]'
|
26
|
+
noneol = '[\t -\\U0010ffff]'
|
27
|
+
linestr = f'(?P<linestr>({nonat}|{atsign}{atsign})({noneol})*)'
|
28
|
+
lineval = f'({pointer}|{linestr})'
|
29
|
+
eol = '(\\\r(\\\n)?|\\\n)'
|
30
|
+
line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
|
31
|
+
|
32
|
+
class GedcomRecord():
|
33
|
+
def __init__(self,line_num=None,level=-1, tag='NONR', xref='', value=None) -> None:
|
34
|
+
self.line_num = line_num
|
35
|
+
self._subRecords = []
|
36
|
+
self.level = int(level)
|
37
|
+
self.xref = xref
|
38
|
+
self.pointer: bool = False
|
39
|
+
self.tag = str(tag).strip()
|
40
|
+
self.value = value
|
41
|
+
|
42
|
+
self.parent = None
|
43
|
+
self.root = None
|
44
|
+
|
45
|
+
if self.value.endswith('@') and self.value.startswith('@'):
|
46
|
+
self.xref = self.value.replace('@','')
|
47
|
+
if level > 0:
|
48
|
+
self.pointer = True
|
49
|
+
|
50
|
+
@property
|
51
|
+
def _as_dict_(self):
|
52
|
+
record_dict = {
|
53
|
+
'level':self.level,
|
54
|
+
'xref':self.xref,
|
55
|
+
'tag': self.tag,
|
56
|
+
'pointer': self.pointer,
|
57
|
+
'value': self.value,
|
58
|
+
'subrecords': [subrecord._as_dict_ for subrecord in self._subRecords]
|
59
|
+
}
|
60
|
+
return record_dict
|
61
|
+
|
62
|
+
def addSubRecord(self, record):
|
63
|
+
if record and record.level == self.level+1:
|
64
|
+
record.parent = self
|
65
|
+
self._subRecords.append(record)
|
66
|
+
else:
|
67
|
+
raise ValueError(f"SubRecord must be next level from this record (level:{self.level}, subRecord has level {record.level})")
|
68
|
+
|
69
|
+
def recordOnly(self):
|
70
|
+
return GedcomRecord(line_num=self.line_num,level=self.level,tag=self.tag,value=self.value)
|
71
|
+
|
72
|
+
def dump(self):
|
73
|
+
record_dump = f"Level: {self.level}, tag: {self.tag}, value: {self.value}, subRecords: {len(self._subRecords)}\n"
|
74
|
+
for record in self._subRecords:
|
75
|
+
record_dump += "\t" + record.dump() # Recursively call dump on sub_records and concatenate
|
76
|
+
return record_dump
|
77
|
+
|
78
|
+
def describe(self,subRecords: bool = False):
|
79
|
+
description = f"Line {self.line_num}: {'\t'* self.level} Level: {self.level}, tag: '{self.tag}', value: '{self.value}', subRecords: {len(self._subRecords)}"
|
80
|
+
if subRecords:
|
81
|
+
for subRecord in self.subRecords():
|
82
|
+
description = description + '\n' + subRecord.describe(subRecords=True)
|
83
|
+
return description
|
84
|
+
|
85
|
+
|
86
|
+
def subRecord(self, tag):
|
87
|
+
result = [record for record in self._subRecords if record.tag == tag]
|
88
|
+
if len(result) == 0: return None
|
89
|
+
return result
|
90
|
+
|
91
|
+
def subRecords(self, tag: str = None):
|
92
|
+
if not tag:
|
93
|
+
return self._subRecords
|
94
|
+
else:
|
95
|
+
tags = tag.split("/", 1) # Split into first tag and the rest
|
96
|
+
|
97
|
+
# Collect all records matching the first tag
|
98
|
+
matching_records = [record for record in self._subRecords if record.tag == tags[0]]
|
99
|
+
|
100
|
+
if not matching_records:
|
101
|
+
return None # No matching records found for the first tag
|
102
|
+
|
103
|
+
if len(tags) == 1:
|
104
|
+
return matching_records # Return all matching records for the final tag
|
105
|
+
|
106
|
+
# Recurse into each matching record's subRecords and collect results
|
107
|
+
results = []
|
108
|
+
for record in matching_records:
|
109
|
+
sub_result = record.subRecords(tags[1])
|
110
|
+
if sub_result:
|
111
|
+
if isinstance(sub_result, list):
|
112
|
+
results.extend(sub_result)
|
113
|
+
else:
|
114
|
+
results.append(sub_result)
|
115
|
+
|
116
|
+
return results if results else None
|
117
|
+
|
118
|
+
def __call__(self) -> None:
|
119
|
+
return self.describe()
|
120
|
+
|
121
|
+
def __iter__(self):
|
122
|
+
return self._flatten_subrecords(self)
|
123
|
+
|
124
|
+
def _flatten_subrecords(self, record):
|
125
|
+
yield record
|
126
|
+
for subrecord in record._subRecords:
|
127
|
+
yield from self._flatten_subrecords(subrecord)
|
128
|
+
|
129
|
+
class Gedcom():
|
130
|
+
top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD']
|
131
|
+
|
132
|
+
# =========================================================
|
133
|
+
# 1. INITIALIZATION
|
134
|
+
# =========================================================
|
135
|
+
|
136
|
+
|
137
|
+
def __init__(self, records: Optional[List[GedcomRecord]] = None,filepath: str = None) -> None:
|
138
|
+
if filepath:
|
139
|
+
self.records = self._records_from_file(filepath)
|
140
|
+
elif records:
|
141
|
+
self.records: List[GedcomRecord] = records if records else []
|
142
|
+
|
143
|
+
|
144
|
+
self._sources = []
|
145
|
+
self._repositories = []
|
146
|
+
self._individuals = []
|
147
|
+
self._families = []
|
148
|
+
self._objects = []
|
149
|
+
|
150
|
+
if self.records:
|
151
|
+
for record in self.records:
|
152
|
+
if record.tag == 'INDI':
|
153
|
+
record.xref = record.value
|
154
|
+
self._individuals.append(record)
|
155
|
+
if record.tag == 'SOUR' and record.level == 0:
|
156
|
+
record.xref = record.value
|
157
|
+
self._sources.append(record)
|
158
|
+
if record.tag == 'REPO' and record.level == 0:
|
159
|
+
record.xref = record.value
|
160
|
+
self._repositories.append(record)
|
161
|
+
if record.tag == 'FAM' and record.level == 0:
|
162
|
+
record.xref = record.value
|
163
|
+
self._families.append(record)
|
164
|
+
if record.tag == 'OBJE' and record.level == 0:
|
165
|
+
record.xref = record.value
|
166
|
+
self._objects.append(record)
|
167
|
+
|
168
|
+
# =========================================================
|
169
|
+
# 2. PROPERTY ACCESSORS (GETTERS & SETTERS)
|
170
|
+
# =========================================================
|
171
|
+
|
172
|
+
@property
|
173
|
+
def json(self):
|
174
|
+
import json
|
175
|
+
return json.dumps({'Individuals': [indi._as_dict_ for indi in self._individuals]},indent=4)
|
176
|
+
|
177
|
+
def stats(self):
|
178
|
+
def print_table(pairs):
|
179
|
+
|
180
|
+
# Calculate the width of the columns
|
181
|
+
name_width = max(len(name) for name, _ in pairs)
|
182
|
+
value_width = max(len(str(value)) for _, value in pairs)
|
183
|
+
|
184
|
+
# Print the header
|
185
|
+
print('GEDCOM Import Results')
|
186
|
+
header = f"{'Type'.ljust(name_width)} | {'Count'.ljust(value_width)}"
|
187
|
+
print('-' * len(header))
|
188
|
+
print(header)
|
189
|
+
print('-' * len(header))
|
190
|
+
|
191
|
+
# Print each pair in the table
|
192
|
+
for name, value in pairs:
|
193
|
+
print(f"{name.ljust(name_width)} | {str(value).ljust(value_width)}")
|
194
|
+
|
195
|
+
imports_stats = [
|
196
|
+
('Top Level Records', len(self.records)),
|
197
|
+
('Individuals', len(self.individuals)),
|
198
|
+
('Family Group Records', len(self.families)),
|
199
|
+
('Repositories', len(self.repositories)),
|
200
|
+
('Sources', len(self.sources)),
|
201
|
+
('Objects', len(self.objects))
|
202
|
+
]
|
203
|
+
|
204
|
+
print_table(imports_stats)
|
205
|
+
|
206
|
+
@property
|
207
|
+
def sources(self) -> List[GedcomRecord]:
|
208
|
+
return self._sources
|
209
|
+
|
210
|
+
@sources.setter
|
211
|
+
def sources(self, value: List[GedcomRecord]):
|
212
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
213
|
+
raise ValueError("sources must be a list of GedcomRecord objects.")
|
214
|
+
self._sources = value
|
215
|
+
|
216
|
+
@property
|
217
|
+
def repositories(self) -> List[GedcomRecord]:
|
218
|
+
return self._repositories
|
219
|
+
|
220
|
+
@repositories.setter
|
221
|
+
def repositories(self, value: List[GedcomRecord]):
|
222
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
223
|
+
raise ValueError("repositories must be a list of GedcomRecord objects.")
|
224
|
+
self._repositories = value
|
225
|
+
|
226
|
+
@property
|
227
|
+
def individuals(self) -> List[GedcomRecord]:
|
228
|
+
return self._individuals
|
229
|
+
|
230
|
+
@individuals.setter
|
231
|
+
def individuals(self, value: List[GedcomRecord]):
|
232
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
233
|
+
raise ValueError("individuals must be a list of GedcomRecord objects.")
|
234
|
+
self._individuals = value
|
235
|
+
|
236
|
+
@property
|
237
|
+
def families(self) -> List[GedcomRecord]:
|
238
|
+
return self._families
|
239
|
+
|
240
|
+
@families.setter
|
241
|
+
def families(self, value: List[GedcomRecord]):
|
242
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
243
|
+
raise ValueError("families must be a list of GedcomRecord objects.")
|
244
|
+
self._families = value
|
245
|
+
|
246
|
+
@property
|
247
|
+
def objects(self) -> List[GedcomRecord]:
|
248
|
+
return self._objects
|
249
|
+
|
250
|
+
@objects.setter
|
251
|
+
def objects(self, value: List[GedcomRecord]):
|
252
|
+
if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
|
253
|
+
raise ValueError("objects must be a list of GedcomRecord objects.")
|
254
|
+
self._objects = value
|
255
|
+
|
256
|
+
# =========================================================
|
257
|
+
# 3. METHODS
|
258
|
+
# =========================================================
|
259
|
+
|
260
|
+
def write(self):
|
261
|
+
"""
|
262
|
+
Method placeholder for writing GEDCOM files.
|
263
|
+
"""
|
264
|
+
raise NotImplementedError("Writing of GEDCOM files is not implemented.")
|
265
|
+
|
266
|
+
@staticmethod
|
267
|
+
def _records_from_file(filepath: str) -> List[GedcomRecord]:
|
268
|
+
extension = '.ged'
|
269
|
+
|
270
|
+
if not os.path.exists(filepath):
|
271
|
+
print(f"File does not exist: {filepath}")
|
272
|
+
raise FileNotFoundError
|
273
|
+
elif not filepath.lower().endswith(extension.lower()):
|
274
|
+
print(f"File does not have the correct extension: {filepath}")
|
275
|
+
raise Exception("File does not appear to be a GEDCOM")
|
276
|
+
|
277
|
+
print("Reading from GEDCOM file")
|
278
|
+
with open(filepath, 'r', encoding='utf-8') as file:
|
279
|
+
lines = [line.strip() for line in file]
|
280
|
+
|
281
|
+
records = []
|
282
|
+
record_map = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
|
283
|
+
for l, line in enumerate(lines):
|
284
|
+
if line.startswith(BOM):
|
285
|
+
line = line.lstrip(BOM)
|
286
|
+
line = html.unescape(line).replace('"', '')
|
287
|
+
|
288
|
+
if line.strip() == '':
|
289
|
+
continue
|
290
|
+
|
291
|
+
level, tag, value = '', '', ''
|
292
|
+
|
293
|
+
# Split the line into the first two columns and the rest
|
294
|
+
parts = line.split(maxsplit=2)
|
295
|
+
if len(parts) == 3:
|
296
|
+
level, col2, col3 = parts
|
297
|
+
|
298
|
+
if col3 in Gedcom.top_level_tags:
|
299
|
+
tag = col3
|
300
|
+
value = col2
|
301
|
+
else:
|
302
|
+
tag = col2
|
303
|
+
value = col3
|
304
|
+
else:
|
305
|
+
level, tag = parts
|
306
|
+
|
307
|
+
level = int(level)
|
308
|
+
|
309
|
+
new_record = GedcomRecord(line_num=l + 1, level=level, tag=tag, value=value)
|
310
|
+
if level == 0:
|
311
|
+
records.append(new_record)
|
312
|
+
else:
|
313
|
+
new_record.root = record_map[0]
|
314
|
+
new_record.parent = record_map[int(level) - 1]
|
315
|
+
record_map[int(level) - 1].addSubRecord(new_record)
|
316
|
+
record_map[int(level)] = new_record
|
317
|
+
|
318
|
+
return records if records else None
|
319
|
+
|
320
|
+
@staticmethod
|
321
|
+
def fromFile(filepath: str) -> 'Gedcom':
|
322
|
+
"""
|
323
|
+
Static method to create a Gedcom object from a GEDCOM file.
|
324
|
+
|
325
|
+
Args:
|
326
|
+
filepath (str): The path to the GEDCOM file.
|
327
|
+
|
328
|
+
Returns:
|
329
|
+
Gedcom: An instance of the Gedcom class.
|
330
|
+
"""
|
331
|
+
records = Gedcom._records_from_file(filepath)
|
332
|
+
gedcom = Gedcom(records=records)
|
333
|
+
|
334
|
+
return gedcom
|
335
|
+
|
336
|
+
#
|
337
|
+
#import re
|
338
|
+
#filepath = r"C:\Users\User\Documents\PythonProjects\gedcomx\.ged_files\_DJC_ Nunda Cartwright Family.ged"
|
339
|
+
#with open(filepath, 'r', encoding='utf-8') as file:
|
340
|
+
# string = file.read()
|
341
|
+
#
|
342
|
+
#for match in re.finditer(line, string):
|
343
|
+
# data = match.groupdict()
|
344
|
+
# print(data)
|
345
|
+
#'''
|