gedcom-x 0.5.1__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gedcomx/Gedcom.py CHANGED
@@ -1,346 +1,53 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
4
- import html
5
- import os
6
- from typing import List, Optional
7
-
8
- BOM = '\ufeff'
9
-
10
- # Add hash table for XREF of Zero Recrods?
11
-
12
- nonzero = '[1-9]'
13
- level = f'(?P<level>0|{nonzero}[0-9]*)'
14
- atsign = '@'
15
- underscore = '_'
16
- ucletter = '[A-Z]'
17
- tagchar = f'({ucletter}|[0-9]|{underscore})'
18
- xref = f'{atsign}({tagchar})+{atsign}'
19
- d = '\\ '
20
- stdtag = f'{ucletter}({tagchar})*'
21
- exttag = f'{underscore}({tagchar})+'
22
- tag = f'({stdtag}|{exttag})'
23
- voidptr = '@VOID@'
24
- pointer = f'(?P<pointer>{voidptr}|{xref})'
25
- nonat = '[\t -?A-\\U0010ffff]'
26
- noneol = '[\t -\\U0010ffff]'
27
- linestr = f'(?P<linestr>({nonat}|{atsign}{atsign})({noneol})*)'
28
- lineval = f'({pointer}|{linestr})'
29
- eol = '(\\\r(\\\n)?|\\\n)'
30
- line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
31
-
32
- class GedcomRecord():
33
- def __init__(self,line_num=None,level=-1, tag='NONR', xref='', value=None) -> None:
34
- self.line_num = line_num
35
- self._subRecords = []
36
- self.level = int(level)
37
- self.xref = xref
38
- self.pointer: bool = False
39
- self.tag = str(tag).strip()
40
- self.value = value
41
-
42
- self.parent = None
43
- self.root = None
44
-
45
- if self.value.endswith('@') and self.value.startswith('@'):
46
- self.xref = self.value.replace('@','')
47
- if level > 0:
48
- self.pointer = True
49
-
50
- @property
51
- def _as_dict_(self):
52
- record_dict = {
53
- 'level':self.level,
54
- 'xref':self.xref,
55
- 'tag': self.tag,
56
- 'pointer': self.pointer,
57
- 'value': self.value,
58
- 'subrecords': [subrecord._as_dict_ for subrecord in self._subRecords]
59
- }
60
- return record_dict
61
-
62
- def addSubRecord(self, record):
63
- if record and record.level == self.level+1:
64
- record.parent = self
65
- self._subRecords.append(record)
66
- else:
67
- raise ValueError(f"SubRecord must be next level from this record (level:{self.level}, subRecord has level {record.level})")
68
-
69
- def recordOnly(self):
70
- return GedcomRecord(line_num=self.line_num,level=self.level,tag=self.tag,value=self.value)
71
-
72
- def dump(self):
73
- record_dump = f"Level: {self.level}, tag: {self.tag}, value: {self.value}, subRecords: {len(self._subRecords)}\n"
74
- for record in self._subRecords:
75
- record_dump += "\t" + record.dump() # Recursively call dump on sub_records and concatenate
76
- return record_dump
77
-
78
- def describe(self,subRecords: bool = False):
79
- level_str = '\t'* self.level
80
- description = f"Line {self.line_num}: {level_str} Level: {self.level}, tag: '{self.tag}', value: '{self.value}', subRecords: {len(self._subRecords)}"
81
- if subRecords:
82
- for subRecord in self.subRecords():
83
- description = description + '\n' + subRecord.describe(subRecords=True)
84
- return description
85
-
86
-
87
- def subRecord(self, tag):
88
- result = [record for record in self._subRecords if record.tag == tag]
89
- if len(result) == 0: return None
90
- return result
91
-
92
- def subRecords(self, tag: str = None):
93
- if not tag:
94
- return self._subRecords
95
- else:
96
- tags = tag.split("/", 1) # Split into first tag and the rest
97
-
98
- # Collect all records matching the first tag
99
- matching_records = [record for record in self._subRecords if record.tag == tags[0]]
100
-
101
- if not matching_records:
102
- return None # No matching records found for the first tag
103
-
104
- if len(tags) == 1:
105
- return matching_records # Return all matching records for the final tag
106
-
107
- # Recurse into each matching record's subRecords and collect results
108
- results = []
109
- for record in matching_records:
110
- sub_result = record.subRecords(tags[1])
111
- if sub_result:
112
- if isinstance(sub_result, list):
113
- results.extend(sub_result)
114
- else:
115
- results.append(sub_result)
116
-
117
- return results if results else None
118
-
119
- def __call__(self) -> None:
120
- return self.describe()
121
-
122
- def __iter__(self):
123
- return self._flatten_subrecords(self)
124
-
125
- def _flatten_subrecords(self, record):
126
- yield record
127
- for subrecord in record._subRecords:
128
- yield from self._flatten_subrecords(subrecord)
1
+ import re
129
2
 
130
3
  class Gedcom():
131
- top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD']
132
-
133
- # =========================================================
134
- # 1. INITIALIZATION
135
- # =========================================================
136
-
137
-
138
- def __init__(self, records: Optional[List[GedcomRecord]] = None,filepath: str = None) -> None:
139
- if filepath:
140
- self.records = self._records_from_file(filepath)
141
- elif records:
142
- self.records: List[GedcomRecord] = records if records else []
143
-
144
-
145
- self._sources = []
146
- self._repositories = []
147
- self._individuals = []
148
- self._families = []
149
- self._objects = []
150
-
151
- if self.records:
152
- for record in self.records:
153
- if record.tag == 'INDI':
154
- record.xref = record.value
155
- self._individuals.append(record)
156
- if record.tag == 'SOUR' and record.level == 0:
157
- record.xref = record.value
158
- self._sources.append(record)
159
- if record.tag == 'REPO' and record.level == 0:
160
- record.xref = record.value
161
- self._repositories.append(record)
162
- if record.tag == 'FAM' and record.level == 0:
163
- record.xref = record.value
164
- self._families.append(record)
165
- if record.tag == 'OBJE' and record.level == 0:
166
- record.xref = record.value
167
- self._objects.append(record)
168
-
169
- # =========================================================
170
- # 2. PROPERTY ACCESSORS (GETTERS & SETTERS)
171
- # =========================================================
172
-
173
- @property
174
- def json(self):
175
- import json
176
- return json.dumps({'Individuals': [indi._as_dict_ for indi in self._individuals]},indent=4)
177
-
178
- def stats(self):
179
- def print_table(pairs):
180
-
181
- # Calculate the width of the columns
182
- name_width = max(len(name) for name, _ in pairs)
183
- value_width = max(len(str(value)) for _, value in pairs)
184
-
185
- # Print the header
186
- print('GEDCOM Import Results')
187
- header = f"{'Type'.ljust(name_width)} | {'Count'.ljust(value_width)}"
188
- print('-' * len(header))
189
- print(header)
190
- print('-' * len(header))
191
-
192
- # Print each pair in the table
193
- for name, value in pairs:
194
- print(f"{name.ljust(name_width)} | {str(value).ljust(value_width)}")
195
-
196
- imports_stats = [
197
- ('Top Level Records', len(self.records)),
198
- ('Individuals', len(self.individuals)),
199
- ('Family Group Records', len(self.families)),
200
- ('Repositories', len(self.repositories)),
201
- ('Sources', len(self.sources)),
202
- ('Objects', len(self.objects))
203
- ]
204
-
205
- print_table(imports_stats)
206
-
207
- @property
208
- def sources(self) -> List[GedcomRecord]:
209
- return self._sources
210
-
211
- @sources.setter
212
- def sources(self, value: List[GedcomRecord]):
213
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
214
- raise ValueError("sources must be a list of GedcomRecord objects.")
215
- self._sources = value
216
-
217
- @property
218
- def repositories(self) -> List[GedcomRecord]:
219
- return self._repositories
220
-
221
- @repositories.setter
222
- def repositories(self, value: List[GedcomRecord]):
223
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
224
- raise ValueError("repositories must be a list of GedcomRecord objects.")
225
- self._repositories = value
226
-
227
- @property
228
- def individuals(self) -> List[GedcomRecord]:
229
- return self._individuals
230
-
231
- @individuals.setter
232
- def individuals(self, value: List[GedcomRecord]):
233
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
234
- raise ValueError("individuals must be a list of GedcomRecord objects.")
235
- self._individuals = value
236
-
237
- @property
238
- def families(self) -> List[GedcomRecord]:
239
- return self._families
240
-
241
- @families.setter
242
- def families(self, value: List[GedcomRecord]):
243
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
244
- raise ValueError("families must be a list of GedcomRecord objects.")
245
- self._families = value
246
-
247
- @property
248
- def objects(self) -> List[GedcomRecord]:
249
- return self._objects
250
-
251
- @objects.setter
252
- def objects(self, value: List[GedcomRecord]):
253
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
254
- raise ValueError("objects must be a list of GedcomRecord objects.")
255
- self._objects = value
256
-
257
- # =========================================================
258
- # 3. METHODS
259
- # =========================================================
260
-
261
- def write(self):
262
- """
263
- Method placeholder for writing GEDCOM files.
264
- """
265
- raise NotImplementedError("Writing of GEDCOM files is not implemented.")
4
+ def __init__(self) -> None:
5
+ pass
266
6
 
267
7
  @staticmethod
268
- def _records_from_file(filepath: str) -> List[GedcomRecord]:
269
- extension = '.ged'
270
-
271
- if not os.path.exists(filepath):
272
- print(f"File does not exist: {filepath}")
273
- raise FileNotFoundError
274
- elif not filepath.lower().endswith(extension.lower()):
275
- print(f"File does not have the correct extension: {filepath}")
276
- raise Exception("File does not appear to be a GEDCOM")
8
+ def read_gedcom_version(filepath: str) -> str | None:
9
+ """
10
+ Reads only the HEAD section of a GEDCOM file and returns the GEDCOM standard version.
11
+ Looks specifically for HEAD → GEDC → VERS.
277
12
 
278
- print("Reading from GEDCOM file")
279
- with open(filepath, 'r', encoding='utf-8') as file:
280
- lines = [line.strip() for line in file]
281
-
282
- records = []
283
- record_map = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
284
- for l, line in enumerate(lines):
285
- if line.startswith(BOM):
286
- line = line.lstrip(BOM)
287
- line = html.unescape(line).replace('&quot;', '')
288
-
289
- if line.strip() == '':
13
+ Returns:
14
+ str: GEDCOM version (e.g., "5.5.1" or "7.0.0"), or None if not found.
15
+ """
16
+ version = None
17
+ inside_head = False
18
+ inside_gedc = False
19
+
20
+ with open(filepath, "r", encoding="utf-8") as f:
21
+ for line in f:
22
+ parts = line.strip().split(maxsplit=2)
23
+ if not parts:
290
24
  continue
291
25
 
292
- level, tag, value = '', '', ''
293
-
294
- # Split the line into the first two columns and the rest
295
- parts = line.split(maxsplit=2)
296
- if len(parts) == 3:
297
- level, col2, col3 = parts
298
-
299
- if col3 in Gedcom.top_level_tags:
300
- tag = col3
301
- value = col2
302
- else:
303
- tag = col2
304
- value = col3
305
- else:
306
- level, tag = parts
26
+ level = int(parts[0])
27
+ tag = parts[1] if len(parts) > 1 else ""
28
+ value = parts[2] if len(parts) > 2 else None
307
29
 
308
- level = int(level)
30
+ # Enter HEAD
31
+ if level == 0 and tag == "HEAD":
32
+ inside_head = True
33
+ continue
309
34
 
310
- new_record = GedcomRecord(line_num=l + 1, level=level, tag=tag, value=value)
311
- if level == 0:
312
- records.append(new_record)
313
- else:
314
- new_record.root = record_map[0]
315
- new_record.parent = record_map[int(level) - 1]
316
- record_map[int(level) - 1].addSubRecord(new_record)
317
- record_map[int(level)] = new_record
318
-
319
- return records if records else None
35
+ # Leave HEAD block
36
+ if inside_head and level == 0:
37
+ break
320
38
 
321
- @staticmethod
322
- def fromFile(filepath: str) -> 'Gedcom':
323
- """
324
- Static method to create a Gedcom object from a GEDCOM file.
325
-
326
- Args:
327
- filepath (str): The path to the GEDCOM file.
39
+ # Inside HEAD, look for GEDC
40
+ if inside_head and level == 1 and tag == "GEDC":
41
+ inside_gedc = True
42
+ continue
328
43
 
329
- Returns:
330
- Gedcom: An instance of the Gedcom class.
331
- """
332
- records = Gedcom._records_from_file(filepath)
333
- gedcom = Gedcom(records=records)
44
+ # If we drop back to level 1 (but not GEDC), stop looking inside GEDC
45
+ if inside_gedc and level == 1:
46
+ inside_gedc = False
334
47
 
335
- return gedcom
48
+ # Inside GEDC, look for VERS
49
+ if inside_gedc and tag == "VERS":
50
+ version = value
51
+ break
336
52
 
337
- #
338
- #import re
339
- #filepath = r"C:\Users\User\Documents\PythonProjects\gedcomx\.ged_files\_DJC_ Nunda Cartwright Family.ged"
340
- #with open(filepath, 'r', encoding='utf-8') as file:
341
- # string = file.read()
342
- #
343
- #for match in re.finditer(line, string):
344
- # data = match.groupdict()
345
- # print(data)
346
- #'''
53
+ return version