gedcom-x 0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gedcomx/Gedcom.py ADDED
@@ -0,0 +1,345 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+ import html
5
+ import os
6
+ from typing import List, Optional
7
+
8
+ BOM = '\ufeff'
9
+
10
+ # Add hash table for XREF of Zero Recrods?
11
+
12
+ nonzero = '[1-9]'
13
+ level = f'(?P<level>0|{nonzero}[0-9]*)'
14
+ atsign = '@'
15
+ underscore = '_'
16
+ ucletter = '[A-Z]'
17
+ tagchar = f'({ucletter}|[0-9]|{underscore})'
18
+ xref = f'{atsign}({tagchar})+{atsign}'
19
+ d = '\\ '
20
+ stdtag = f'{ucletter}({tagchar})*'
21
+ exttag = f'{underscore}({tagchar})+'
22
+ tag = f'({stdtag}|{exttag})'
23
+ voidptr = '@VOID@'
24
+ pointer = f'(?P<pointer>{voidptr}|{xref})'
25
+ nonat = '[\t -?A-\\U0010ffff]'
26
+ noneol = '[\t -\\U0010ffff]'
27
+ linestr = f'(?P<linestr>({nonat}|{atsign}{atsign})({noneol})*)'
28
+ lineval = f'({pointer}|{linestr})'
29
+ eol = '(\\\r(\\\n)?|\\\n)'
30
+ line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
31
+
32
+ class GedcomRecord():
33
+ def __init__(self,line_num=None,level=-1, tag='NONR', xref='', value=None) -> None:
34
+ self.line_num = line_num
35
+ self._subRecords = []
36
+ self.level = int(level)
37
+ self.xref = xref
38
+ self.pointer: bool = False
39
+ self.tag = str(tag).strip()
40
+ self.value = value
41
+
42
+ self.parent = None
43
+ self.root = None
44
+
45
+ if self.value.endswith('@') and self.value.startswith('@'):
46
+ self.xref = self.value.replace('@','')
47
+ if level > 0:
48
+ self.pointer = True
49
+
50
+ @property
51
+ def _as_dict_(self):
52
+ record_dict = {
53
+ 'level':self.level,
54
+ 'xref':self.xref,
55
+ 'tag': self.tag,
56
+ 'pointer': self.pointer,
57
+ 'value': self.value,
58
+ 'subrecords': [subrecord._as_dict_ for subrecord in self._subRecords]
59
+ }
60
+ return record_dict
61
+
62
+ def addSubRecord(self, record):
63
+ if record and record.level == self.level+1:
64
+ record.parent = self
65
+ self._subRecords.append(record)
66
+ else:
67
+ raise ValueError(f"SubRecord must be next level from this record (level:{self.level}, subRecord has level {record.level})")
68
+
69
+ def recordOnly(self):
70
+ return GedcomRecord(line_num=self.line_num,level=self.level,tag=self.tag,value=self.value)
71
+
72
+ def dump(self):
73
+ record_dump = f"Level: {self.level}, tag: {self.tag}, value: {self.value}, subRecords: {len(self._subRecords)}\n"
74
+ for record in self._subRecords:
75
+ record_dump += "\t" + record.dump() # Recursively call dump on sub_records and concatenate
76
+ return record_dump
77
+
78
+ def describe(self,subRecords: bool = False):
79
+ description = f"Line {self.line_num}: {'\t'* self.level} Level: {self.level}, tag: '{self.tag}', value: '{self.value}', subRecords: {len(self._subRecords)}"
80
+ if subRecords:
81
+ for subRecord in self.subRecords():
82
+ description = description + '\n' + subRecord.describe(subRecords=True)
83
+ return description
84
+
85
+
86
+ def subRecord(self, tag):
87
+ result = [record for record in self._subRecords if record.tag == tag]
88
+ if len(result) == 0: return None
89
+ return result
90
+
91
+ def subRecords(self, tag: str = None):
92
+ if not tag:
93
+ return self._subRecords
94
+ else:
95
+ tags = tag.split("/", 1) # Split into first tag and the rest
96
+
97
+ # Collect all records matching the first tag
98
+ matching_records = [record for record in self._subRecords if record.tag == tags[0]]
99
+
100
+ if not matching_records:
101
+ return None # No matching records found for the first tag
102
+
103
+ if len(tags) == 1:
104
+ return matching_records # Return all matching records for the final tag
105
+
106
+ # Recurse into each matching record's subRecords and collect results
107
+ results = []
108
+ for record in matching_records:
109
+ sub_result = record.subRecords(tags[1])
110
+ if sub_result:
111
+ if isinstance(sub_result, list):
112
+ results.extend(sub_result)
113
+ else:
114
+ results.append(sub_result)
115
+
116
+ return results if results else None
117
+
118
+ def __call__(self) -> None:
119
+ return self.describe()
120
+
121
+ def __iter__(self):
122
+ return self._flatten_subrecords(self)
123
+
124
+ def _flatten_subrecords(self, record):
125
+ yield record
126
+ for subrecord in record._subRecords:
127
+ yield from self._flatten_subrecords(subrecord)
128
+
129
+ class Gedcom():
130
+ top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD']
131
+
132
+ # =========================================================
133
+ # 1. INITIALIZATION
134
+ # =========================================================
135
+
136
+
137
+ def __init__(self, records: Optional[List[GedcomRecord]] = None,filepath: str = None) -> None:
138
+ if filepath:
139
+ self.records = self._records_from_file(filepath)
140
+ elif records:
141
+ self.records: List[GedcomRecord] = records if records else []
142
+
143
+
144
+ self._sources = []
145
+ self._repositories = []
146
+ self._individuals = []
147
+ self._families = []
148
+ self._objects = []
149
+
150
+ if self.records:
151
+ for record in self.records:
152
+ if record.tag == 'INDI':
153
+ record.xref = record.value
154
+ self._individuals.append(record)
155
+ if record.tag == 'SOUR' and record.level == 0:
156
+ record.xref = record.value
157
+ self._sources.append(record)
158
+ if record.tag == 'REPO' and record.level == 0:
159
+ record.xref = record.value
160
+ self._repositories.append(record)
161
+ if record.tag == 'FAM' and record.level == 0:
162
+ record.xref = record.value
163
+ self._families.append(record)
164
+ if record.tag == 'OBJE' and record.level == 0:
165
+ record.xref = record.value
166
+ self._objects.append(record)
167
+
168
+ # =========================================================
169
+ # 2. PROPERTY ACCESSORS (GETTERS & SETTERS)
170
+ # =========================================================
171
+
172
+ @property
173
+ def json(self):
174
+ import json
175
+ return json.dumps({'Individuals': [indi._as_dict_ for indi in self._individuals]},indent=4)
176
+
177
+ def stats(self):
178
+ def print_table(pairs):
179
+
180
+ # Calculate the width of the columns
181
+ name_width = max(len(name) for name, _ in pairs)
182
+ value_width = max(len(str(value)) for _, value in pairs)
183
+
184
+ # Print the header
185
+ print('GEDCOM Import Results')
186
+ header = f"{'Type'.ljust(name_width)} | {'Count'.ljust(value_width)}"
187
+ print('-' * len(header))
188
+ print(header)
189
+ print('-' * len(header))
190
+
191
+ # Print each pair in the table
192
+ for name, value in pairs:
193
+ print(f"{name.ljust(name_width)} | {str(value).ljust(value_width)}")
194
+
195
+ imports_stats = [
196
+ ('Top Level Records', len(self.records)),
197
+ ('Individuals', len(self.individuals)),
198
+ ('Family Group Records', len(self.families)),
199
+ ('Repositories', len(self.repositories)),
200
+ ('Sources', len(self.sources)),
201
+ ('Objects', len(self.objects))
202
+ ]
203
+
204
+ print_table(imports_stats)
205
+
206
+ @property
207
+ def sources(self) -> List[GedcomRecord]:
208
+ return self._sources
209
+
210
+ @sources.setter
211
+ def sources(self, value: List[GedcomRecord]):
212
+ if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
213
+ raise ValueError("sources must be a list of GedcomRecord objects.")
214
+ self._sources = value
215
+
216
+ @property
217
+ def repositories(self) -> List[GedcomRecord]:
218
+ return self._repositories
219
+
220
+ @repositories.setter
221
+ def repositories(self, value: List[GedcomRecord]):
222
+ if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
223
+ raise ValueError("repositories must be a list of GedcomRecord objects.")
224
+ self._repositories = value
225
+
226
+ @property
227
+ def individuals(self) -> List[GedcomRecord]:
228
+ return self._individuals
229
+
230
+ @individuals.setter
231
+ def individuals(self, value: List[GedcomRecord]):
232
+ if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
233
+ raise ValueError("individuals must be a list of GedcomRecord objects.")
234
+ self._individuals = value
235
+
236
+ @property
237
+ def families(self) -> List[GedcomRecord]:
238
+ return self._families
239
+
240
+ @families.setter
241
+ def families(self, value: List[GedcomRecord]):
242
+ if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
243
+ raise ValueError("families must be a list of GedcomRecord objects.")
244
+ self._families = value
245
+
246
+ @property
247
+ def objects(self) -> List[GedcomRecord]:
248
+ return self._objects
249
+
250
+ @objects.setter
251
+ def objects(self, value: List[GedcomRecord]):
252
+ if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
253
+ raise ValueError("objects must be a list of GedcomRecord objects.")
254
+ self._objects = value
255
+
256
+ # =========================================================
257
+ # 3. METHODS
258
+ # =========================================================
259
+
260
+ def write(self):
261
+ """
262
+ Method placeholder for writing GEDCOM files.
263
+ """
264
+ raise NotImplementedError("Writing of GEDCOM files is not implemented.")
265
+
266
+ @staticmethod
267
+ def _records_from_file(filepath: str) -> List[GedcomRecord]:
268
+ extension = '.ged'
269
+
270
+ if not os.path.exists(filepath):
271
+ print(f"File does not exist: {filepath}")
272
+ raise FileNotFoundError
273
+ elif not filepath.lower().endswith(extension.lower()):
274
+ print(f"File does not have the correct extension: {filepath}")
275
+ raise Exception("File does not appear to be a GEDCOM")
276
+
277
+ print("Reading from GEDCOM file")
278
+ with open(filepath, 'r', encoding='utf-8') as file:
279
+ lines = [line.strip() for line in file]
280
+
281
+ records = []
282
+ record_map = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
283
+ for l, line in enumerate(lines):
284
+ if line.startswith(BOM):
285
+ line = line.lstrip(BOM)
286
+ line = html.unescape(line).replace('&quot;', '')
287
+
288
+ if line.strip() == '':
289
+ continue
290
+
291
+ level, tag, value = '', '', ''
292
+
293
+ # Split the line into the first two columns and the rest
294
+ parts = line.split(maxsplit=2)
295
+ if len(parts) == 3:
296
+ level, col2, col3 = parts
297
+
298
+ if col3 in Gedcom.top_level_tags:
299
+ tag = col3
300
+ value = col2
301
+ else:
302
+ tag = col2
303
+ value = col3
304
+ else:
305
+ level, tag = parts
306
+
307
+ level = int(level)
308
+
309
+ new_record = GedcomRecord(line_num=l + 1, level=level, tag=tag, value=value)
310
+ if level == 0:
311
+ records.append(new_record)
312
+ else:
313
+ new_record.root = record_map[0]
314
+ new_record.parent = record_map[int(level) - 1]
315
+ record_map[int(level) - 1].addSubRecord(new_record)
316
+ record_map[int(level)] = new_record
317
+
318
+ return records if records else None
319
+
320
+ @staticmethod
321
+ def fromFile(filepath: str) -> 'Gedcom':
322
+ """
323
+ Static method to create a Gedcom object from a GEDCOM file.
324
+
325
+ Args:
326
+ filepath (str): The path to the GEDCOM file.
327
+
328
+ Returns:
329
+ Gedcom: An instance of the Gedcom class.
330
+ """
331
+ records = Gedcom._records_from_file(filepath)
332
+ gedcom = Gedcom(records=records)
333
+
334
+ return gedcom
335
+
336
+ #
337
+ #import re
338
+ #filepath = r"C:\Users\User\Documents\PythonProjects\gedcomx\.ged_files\_DJC_ Nunda Cartwright Family.ged"
339
+ #with open(filepath, 'r', encoding='utf-8') as file:
340
+ # string = file.read()
341
+ #
342
+ #for match in re.finditer(line, string):
343
+ # data = match.groupdict()
344
+ # print(data)
345
+ #'''