gedcom-x 0.5.2__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gedcomx/Gedcom.py CHANGED
@@ -1,419 +1,53 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
-
4
- import html
5
- import os
6
- from typing import List, Optional, Tuple
7
1
  import re
8
2
 
9
- BOM = '\ufeff'
10
-
11
- GEDCOM7_LINE_RE = re.compile(
12
- r"""^
13
- (?P<level>\d+) # Level
14
- (?:\s+@(?P<xref>[^@]+)@)? # Optional record identifier
15
- \s+(?P<tag>[A-Z0-9_-]+) # Tag
16
- (?:\s+(?P<value>.+))? # Optional value (may be XREF)
17
- $""",
18
- re.VERBOSE
19
- )
20
-
21
- XREF_RE = re.compile(r'^@[^@]+@$')
22
-
23
- # Add hash table for XREF of Zero Recrods?
24
-
25
- nonzero = '[1-9]'
26
- level = f'(?P<level>0|{nonzero}[0-9]*)'
27
- atsign = '@'
28
- underscore = '_'
29
- ucletter = '[A-Z]'
30
- tagchar = f'({ucletter}|[0-9]|{underscore})'
31
- xref = f'{atsign}({tagchar})+{atsign}'
32
- d = '\\ '
33
- stdtag = f'{ucletter}({tagchar})*'
34
- exttag = f'{underscore}({tagchar})+'
35
- tag = f'({stdtag}|{exttag})'
36
- voidptr = '@VOID@'
37
- pointer = f'(?P<pointer>{voidptr}|{xref})'
38
- nonat = '[\t -?A-\\U0010ffff]'
39
- noneol = '[\t -\\U0010ffff]'
40
- linestr = f'(?P<linestr>({nonat}|{atsign}{atsign})({noneol})*)'
41
- lineval = f'({pointer}|{linestr})'
42
- eol = '(\\\r(\\\n)?|\\\n)'
43
- line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
44
-
45
- class GedcomRecord():
46
-
47
- def __init__(self,line_num: Optional[int] =None,level: int =-1, tag='NONR', xref: Optional[str] = None, value: Optional[str] = None) -> None:
48
- self.line = line_num
49
- self._subRecords = []
50
- self.level = int(level)
51
- self.xref = xref
52
- self.pointer: bool = False
53
- self.tag = str(tag).strip()
54
- self.value = value
55
-
56
- self.parent = None
57
- self.root = None
58
-
59
- #if self.value and (self.value.endswith('@') and self.value.startswith('@')):
60
- # self.xref = self.value.replace('@','')
61
- # if level > 0:
62
- # self.pointer = True
63
-
64
- @property
65
- def _as_dict_(self):
66
- record_dict = {
67
- 'level':self.level,
68
- 'xref':self.xref,
69
- 'tag': self.tag,
70
- 'pointer': self.pointer,
71
- 'value': self.value,
72
- 'subrecords': [subrecord._as_dict_ for subrecord in self._subRecords]
73
- }
74
- return record_dict
75
-
76
- def addSubRecord(self, record):
77
- if record and record.level == self.level+1:
78
- record.parent = self
79
- self._subRecords.append(record)
80
- else:
81
- raise ValueError(f"SubRecord must be next level from this record (level:{self.level}, subRecord has level {record.level})")
82
-
83
- def recordOnly(self):
84
- return GedcomRecord(line_num=self.line,level=self.level,tag=self.tag,value=self.value)
85
-
86
- def dump(self):
87
- record_dump = f"Level: {self.level}, tag: {self.tag}, value: {self.value}, subRecords: {len(self._subRecords)}\n"
88
- for record in self._subRecords:
89
- record_dump += "\t" + record.dump() # Recursively call dump on sub_records and concatenate
90
- return record_dump
91
-
92
- def describe(self,subRecords: bool = False):
93
- level_str = '\t'* self.level
94
- description = f"Line {self.line}: {level_str} Level: {self.level}, tag: '{self.tag}', xref={self.xref} value: '{self.value}', subRecords: {len(self._subRecords)}"
95
- if subRecords:
96
- for subRecord in self.subRecords():
97
- description = description + '\n' + subRecord.describe(subRecords=True)
98
- return description
99
-
100
-
101
- def subRecord(self, tag):
102
- result = [record for record in self._subRecords if record.tag == tag]
103
- if len(result) == 0: return None
104
- return result
105
-
106
- def subRecords(self, tag: str = None):
107
- if not tag:
108
- return self._subRecords
109
- else:
110
- tags = tag.split("/", 1) # Split into first tag and the rest
111
-
112
- # Collect all records matching the first tag
113
- matching_records = [record for record in self._subRecords if record.tag == tags[0]]
114
-
115
- if not matching_records:
116
- return None # No matching records found for the first tag
117
-
118
- if len(tags) == 1:
119
- return matching_records # Return all matching records for the final tag
120
-
121
- # Recurse into each matching record's subRecords and collect results
122
- results = []
123
- for record in matching_records:
124
- sub_result = record.subRecords(tags[1])
125
- if sub_result:
126
- if isinstance(sub_result, list):
127
- results.extend(sub_result)
128
- else:
129
- results.append(sub_result)
130
-
131
- return results if results else None
132
-
133
- def __call__(self) -> None:
134
- return self.describe()
135
-
136
- def __iter__(self):
137
- return self._flatten_subrecords(self)
138
-
139
- def _flatten_subrecords(self, record):
140
- yield record
141
- for subrecord in record._subRecords:
142
- yield from self._flatten_subrecords(subrecord)
143
-
144
3
  class Gedcom():
145
- """
146
- Object representing a Genealogy in legacy GEDCOM 5.x / 7 format.
4
+ def __init__(self) -> None:
5
+ pass
147
6
 
148
- Parameters
149
- ----------
150
- records : List[GedcomReord]
151
- List of GedcomRecords to initialize the genealogy with
152
- filepath : str
153
- path to a GEDCOM (``*``.ged), if provided object will read, parse and initialize with records in the file.
154
-
155
- Note
156
- ----
157
- **file_path** takes precidence over **records**.
158
- If no arguments are provided, Gedcom Object will initialize with no records.
159
-
160
-
161
- """
162
- _top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD','SNOTE']
163
-
164
- def __init__(self, records: Optional[List[GedcomRecord]] = None,filepath: str = None) -> None:
165
- if filepath:
166
- self.records = self._records_from_file(filepath)
167
- elif records:
168
- self.records: List[GedcomRecord] = records if records else []
169
-
170
-
171
-
172
- self._sources = []
173
- self._repositories = []
174
- self._individuals = []
175
- self._families = []
176
- self._objects = []
177
- self._snotes = []
178
-
179
- if self.records:
180
- for record in self.records:
181
- if record.tag == 'INDI':
182
-
183
- self._individuals.append(record)
184
- if record.tag == 'SOUR' and record.level == 0:
185
-
186
- self._sources.append(record)
187
- if record.tag == 'REPO' and record.level == 0:
188
- print(record.describe())
189
-
190
- self._repositories.append(record)
191
- if record.tag == 'FAM' and record.level == 0:
192
-
193
- self._families.append(record)
194
- if record.tag == 'OBJE' and record.level == 0:
195
-
196
- self._objects.append(record)
197
- if record.tag == 'SNOTE' and record.level == 0:
198
-
199
- record.xref = record.value
200
- self._snotes.append(record)
201
-
202
-
203
- # =========================================================
204
- # 2. PROPERTY ACCESSORS (GETTERS & SETTERS)
205
- # =========================================================
206
-
207
- @property
208
- def json(self):
209
- import json
210
- return json.dumps({'Individuals': [indi._as_dict_ for indi in self._individuals]},indent=4)
211
-
212
- def stats(self):
213
- def print_table(pairs):
214
-
215
- # Calculate the width of the columns
216
- name_width = max(len(name) for name, _ in pairs)
217
- value_width = max(len(str(value)) for _, value in pairs)
218
-
219
- # Print the header
220
- print('GEDCOM Import Results')
221
- header = f"{'Type'.ljust(name_width)} | {'Count'.ljust(value_width)}"
222
- print('-' * len(header))
223
- print(header)
224
- print('-' * len(header))
225
-
226
- # Print each pair in the table
227
- for name, value in pairs:
228
- print(f"{name.ljust(name_width)} | {str(value).ljust(value_width)}")
229
-
230
- imports_stats = [
231
- ('Top Level Records', len(self.records)),
232
- ('Individuals', len(self.individuals)),
233
- ('Family Group Records', len(self.families)),
234
- ('Repositories', len(self.repositories)),
235
- ('Sources', len(self.sources)),
236
- ('Objects', len(self.objects))
237
- ]
238
-
239
- print_table(imports_stats)
240
-
241
- @property
242
- def sources(self) -> List[GedcomRecord]:
243
- return self._sources
244
-
245
- @sources.setter
246
- def sources(self, value: List[GedcomRecord]):
247
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
248
- raise ValueError("sources must be a list of GedcomRecord objects.")
249
- self._sources = value
250
-
251
- @property
252
- def repositories(self) -> List[GedcomRecord]:
253
- """
254
- List of **REPO** records found in the Genealogy
255
- """
256
- return self._repositories
257
-
258
- @repositories.setter
259
- def repositories(self, value: List[GedcomRecord]):
260
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
261
- raise ValueError("repositories must be a list of GedcomRecord objects.")
262
- self._repositories = value
263
-
264
- @property
265
- def individuals(self) -> List[GedcomRecord]:
266
- return self._individuals
267
-
268
- @individuals.setter
269
- def individuals(self, value: List[GedcomRecord]):
270
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
271
- raise ValueError("individuals must be a list of GedcomRecord objects.")
272
- self._individuals = value
273
-
274
- @property
275
- def families(self) -> List[GedcomRecord]:
276
- return self._families
277
-
278
- @families.setter
279
- def families(self, value: List[GedcomRecord]):
280
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
281
- raise ValueError("families must be a list of GedcomRecord objects.")
282
- self._families = value
283
-
284
- @property
285
- def objects(self) -> List[GedcomRecord]:
286
- return self._objects
287
-
288
- @objects.setter
289
- def objects(self, value: List[GedcomRecord]):
290
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
291
- raise ValueError("objects must be a list of GedcomRecord objects.")
292
- self._objects = value
293
-
294
-
295
-
296
- def write(self) -> bool:
7
+ @staticmethod
8
+ def read_gedcom_version(filepath: str) -> str | None:
297
9
  """
298
- Method placeholder for writing GEDCOM files.
10
+ Reads only the HEAD section of a GEDCOM file and returns the GEDCOM standard version.
11
+ Looks specifically for HEAD → GEDC → VERS.
299
12
 
300
- Raises
301
- ------
302
- NotImplementedError
303
- writing to legacy GEDCOM file is not currently implimented.
13
+ Returns:
14
+ str: GEDCOM version (e.g., "5.5.1" or "7.0.0"), or None if not found.
304
15
  """
305
- raise NotImplementedError("Writing of GEDCOM files is not implemented.")
306
-
307
- @staticmethod
308
- def _records_from_file(filepath: str) -> List[GedcomRecord]:
309
- def parse_gedcom7_line(line: str) -> Optional[Tuple[int, Optional[str], str, Optional[str], Optional[str]]]:
310
- """
311
- Parse a GEDCOM 7 line into: level, xref_id (record), tag, value, xref_value (if value is an @X@)
312
-
313
- Returns:
314
- (level, xref_id, tag, value, xref_value)
315
- """
316
- match = GEDCOM7_LINE_RE.match(line.strip())
317
- if not match:
318
- return None
319
-
320
- level = int(match.group("level"))
321
- xref_id = match.group("xref")
322
- tag = match.group("tag")
323
- value = match.group("value")
324
- if value == 'None': value = None
325
- xref_value = value.strip("@") if value and XREF_RE.match(value.strip()) else None
326
-
327
- return level, xref_id, tag, value, xref_value
328
- extension = '.ged'
329
-
330
- if not os.path.exists(filepath):
331
- print(f"File does not exist: {filepath}")
332
- raise FileNotFoundError
333
- elif not filepath.lower().endswith(extension.lower()):
334
- print(f"File does not have the correct extension: {filepath}")
335
- raise Exception("File does not appear to be a GEDCOM")
336
-
337
- print("Reading from GEDCOM file")
338
- with open(filepath, 'r', encoding='utf-8') as file:
339
- lines = [line.strip() for line in file]
340
-
341
- records = []
342
- record_map = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
343
-
344
- for l, line in enumerate(lines):
345
- if line.startswith(BOM):
346
- line = line.lstrip(BOM)
347
- line = html.unescape(line).replace('&quot;', '')
348
-
349
- if line.strip() == '':
16
+ version = None
17
+ inside_head = False
18
+ inside_gedc = False
19
+
20
+ with open(filepath, "r", encoding="utf-8") as f:
21
+ for line in f:
22
+ parts = line.strip().split(maxsplit=2)
23
+ if not parts:
350
24
  continue
351
25
 
352
- level, tag, value = '', '', ''
353
-
354
- # Split the line into the first two columns and the rest
355
- parts = line.split(maxsplit=2)
356
- if len(parts) == 3:
357
- level, col2, col3 = parts
358
-
359
- if col3 in Gedcom._top_level_tags:
360
- tag = col3
361
- value = col2
362
- else:
363
- tag = col2
364
- value = col3
365
-
366
- else:
367
- level, tag = parts
368
-
369
- level, xref, tag, value, xref_value = parse_gedcom7_line(line)
370
-
371
- if xref is None and xref_value is not None:
372
- xref = xref_value
373
- # print(l, level, xref, tag, value, xref_value)
374
-
375
- level = int(level)
376
-
377
- new_record = GedcomRecord(line_num=l + 1, level=level, tag=tag, xref=xref,value=value)
378
-
379
-
380
- if level == 0:
381
- records.append(new_record)
382
- else:
383
- new_record.root = record_map[0]
384
- new_record.parent = record_map[int(level) - 1]
385
- record_map[int(level) - 1].addSubRecord(new_record)
386
- record_map[int(level)] = new_record
387
-
388
-
389
- return records if records else None
390
-
391
- @staticmethod
392
- def fromFile(filepath: str) -> 'Gedcom':
393
- """
394
- Static method to create a Gedcom object from a GEDCOM file.
26
+ level = int(parts[0])
27
+ tag = parts[1] if len(parts) > 1 else ""
28
+ value = parts[2] if len(parts) > 2 else None
395
29
 
396
- Args:
397
- filepath (str): The path to the GEDCOM file.
30
+ # Enter HEAD
31
+ if level == 0 and tag == "HEAD":
32
+ inside_head = True
33
+ continue
398
34
 
399
- Returns:
400
- Gedcom: An instance of the Gedcom class.
401
- """
402
- records = Gedcom._records_from_file(filepath)
403
-
404
- gedcom = Gedcom(records=records)
35
+ # Leave HEAD block
36
+ if inside_head and level == 0:
37
+ break
405
38
 
406
- return gedcom
407
-
408
- def merge_with_file(self, file_path: str) -> bool:
409
- """
410
- Adds records from a valid (``*``.ged) file to the current Genealogy
39
+ # Inside HEAD, look for GEDC
40
+ if inside_head and level == 1 and tag == "GEDC":
41
+ inside_gedc = True
42
+ continue
411
43
 
412
- Args:
413
- filepath (str): The path to the GEDCOM file.
44
+ # If we drop back to level 1 (but not GEDC), stop looking inside GEDC
45
+ if inside_gedc and level == 1:
46
+ inside_gedc = False
414
47
 
415
- Returns:
416
- bool: Indicates if merge was successful.
417
- """
418
- return True
48
+ # Inside GEDC, look for VERS
49
+ if inside_gedc and tag == "VERS":
50
+ version = value
51
+ break
419
52
 
53
+ return version