gedcom-x 0.5.5__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gedcomx/Gedcom5x.py CHANGED
@@ -3,11 +3,27 @@
3
3
 
4
4
  import html
5
5
  import os
6
- from typing import List, Optional, Tuple
6
+ from typing import List, Optional, Tuple, Any
7
7
  import re
8
8
  from collections import defaultdict
9
9
  from typing import Iterable, Iterator, List, Optional, Tuple, Union
10
10
 
11
+ import logging
12
+ from .LoggingHub import hub, ChannelConfig
13
+
14
+ job_id = "gedcomx.parsing.GEDCOM5x"
15
+
16
+ hub.start_channel(
17
+ ChannelConfig(
18
+ name=job_id,
19
+ path=f"logs/{job_id}.log",
20
+ level=logging.DEBUG,
21
+ rotation="size:10MB:3", # rotate by size, keep 3 backups
22
+ )
23
+ )
24
+
25
+ log = logging.getLogger("gedcomx")
26
+
11
27
  BOM = '\ufeff'
12
28
 
13
29
  GEDCOM7_LINE_RE = re.compile(
@@ -47,25 +63,25 @@ line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
47
63
  from typing import List, Optional, Iterator, Union
48
64
 
49
65
 
50
- class GedcomRecord():
66
+ class Gedcom5xRecord():
51
67
  def __init__(
52
68
  self,
53
69
  line_num: Optional[int] = None,
54
70
  level: int = -1,
55
- tag: str = "NONR",
71
+ tag: str | None = "NONR",
56
72
  xref: Optional[str] = None,
57
73
  value: Optional[str] = None,
58
74
  ) -> None:
59
75
  self.line = line_num
60
- self._subRecords: List[GedcomRecord] = []
76
+ self._subRecords: List[Gedcom5xRecord] = []
61
77
  self.level = int(level)
62
78
  self.xref = xref
63
79
  self.pointer: bool = False
64
80
  self.tag = str(tag).strip()
65
81
  self.value = value
66
82
 
67
- self.parent: Optional[GedcomRecord] = None
68
- self.root: Optional[GedcomRecord] = None
83
+ self.parent: Optional[Gedcom5xRecord] = None
84
+ self.root: Optional[Gedcom5xRecord] = None
69
85
 
70
86
  # ───────────────────────────────
71
87
  # Dict/JSON friendly view
@@ -84,7 +100,7 @@ class GedcomRecord():
84
100
  # ───────────────────────────────
85
101
  # Subrecord management
86
102
  # ───────────────────────────────
87
- def addSubRecord(self, record: "GedcomRecord"):
103
+ def addSubRecord(self, record: "Gedcom5xRecord"):
88
104
 
89
105
  if record is not None and (record.level == (self.level + 1)):
90
106
  record.parent = self
@@ -95,7 +111,7 @@ class GedcomRecord():
95
111
  )
96
112
 
97
113
  def recordOnly(self):
98
- return GedcomRecord(
114
+ return Gedcom5xRecord(
99
115
  line_num=self.line, level=self.level, tag=self.tag, value=self.value
100
116
  )
101
117
 
@@ -119,7 +135,7 @@ class GedcomRecord():
119
135
  f"subRecords: {len(self._subRecords)}"
120
136
  )
121
137
  if subRecords:
122
- for subRecord in self.subRecords():
138
+ for subRecord in self.subRecords() or []:
123
139
  description += "\n" + subRecord.describe(subRecords=True)
124
140
  return description
125
141
 
@@ -128,9 +144,9 @@ class GedcomRecord():
128
144
  # ───────────────────────────────
129
145
  def subRecord(self, tag: str):
130
146
  result = [r for r in self._subRecords if r.tag == tag]
131
- return None if not result else result
147
+ return [] if not result else result
132
148
 
133
- def subRecords(self, tag: str = None):
149
+ def subRecords(self, tag: str | None = None) -> List['Gedcom5xRecord']:
134
150
  if not tag:
135
151
  return self._subRecords
136
152
  tags = tag.split("/", 1)
@@ -138,7 +154,7 @@ class GedcomRecord():
138
154
  # Collect matching first-level subrecords
139
155
  matches = [r for r in self._subRecords if r.tag == tags[0]]
140
156
  if not matches:
141
- return None
157
+ return []
142
158
 
143
159
  if len(tags) == 1:
144
160
  return matches
@@ -152,7 +168,7 @@ class GedcomRecord():
152
168
  results.extend(sub_result)
153
169
  else:
154
170
  results.append(sub_result)
155
- return results if results else None
171
+ return results if results else []
156
172
 
157
173
  # ───────────────────────────────
158
174
  # Iteration / Subscriptability
@@ -160,11 +176,11 @@ class GedcomRecord():
160
176
  def __call__(self) -> str:
161
177
  return self.describe()
162
178
 
163
- def __iter__(self) -> Iterator["GedcomRecord"]:
179
+ def __iter__(self) -> Iterator["Gedcom5xRecord"]:
164
180
  """Iterates recursively over self and all subrecords."""
165
181
  yield from self._flatten_subrecords(self)
166
182
 
167
- def _flatten_subrecords(self, record: "GedcomRecord") -> Iterator["GedcomRecord"]:
183
+ def _flatten_subrecords(self, record: "Gedcom5xRecord") -> Iterator["Gedcom5xRecord"]:
168
184
  yield record
169
185
  for sub in record._subRecords:
170
186
  yield from self._flatten_subrecords(sub)
@@ -172,7 +188,7 @@ class GedcomRecord():
172
188
  def __len__(self) -> int:
173
189
  return len(self._subRecords)
174
190
 
175
- def __getitem__(self, key: Union[int, slice, str]) -> Union["GedcomRecord", List["GedcomRecord"]]:
191
+ def __getitem__(self, key: Union[int, slice, str]) -> Union["Gedcom5xRecord", List["Gedcom5xRecord"]]:
176
192
  """
177
193
  - rec[0] -> first subrecord
178
194
  - rec[1:3] -> slice of subrecords
@@ -218,23 +234,26 @@ class Gedcom5x():
218
234
  """
219
235
  _top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD','SNOTE']
220
236
 
221
- def __init__(self, records: Optional[List[GedcomRecord]] = None,filepath: str = None) -> None:
237
+ def __init__(self, records: Optional[List[Gedcom5xRecord]] = None,filepath: str | None = None) -> None:
238
+
239
+ self.records: List[Gedcom5xRecord] = records or []
222
240
  if filepath:
223
241
  self.records = self._records_from_file(filepath)
224
242
  elif records:
225
- self.records: List[GedcomRecord] = records if records else []
243
+ self.records: List[Gedcom5xRecord] = records if records else []
244
+
226
245
 
227
246
  # Fast tag index: {'HEAD': [rec], 'INDI': [rec1, rec2, ...], ...}
228
- self._tag_index: dict[str, List[GedcomRecord]] = defaultdict(list)
247
+ self._tag_index: dict[str, List[Gedcom5xRecord]] = defaultdict(list)
229
248
  self._reindex()
230
249
 
231
- self.header: GedcomRecord | None = None
232
- self._sources: List[GedcomRecord] = []
233
- self._repositories: List[GedcomRecord] = []
234
- self._individuals: List[GedcomRecord] = []
235
- self._families: List[GedcomRecord] = []
236
- self._objects: List[GedcomRecord] = []
237
- self._snotes: List[GedcomRecord] = []
250
+ self.header: Gedcom5xRecord | None = None
251
+ self._sources: List[Gedcom5xRecord] = []
252
+ self._repositories: List[Gedcom5xRecord] = []
253
+ self._individuals: List[Gedcom5xRecord] = []
254
+ self._families: List[Gedcom5xRecord] = []
255
+ self._objects: List[Gedcom5xRecord] = []
256
+ self._snotes: List[Gedcom5xRecord] = []
238
257
  self.version = None
239
258
 
240
259
  if self.records:
@@ -270,7 +289,7 @@ class Gedcom5x():
270
289
  def __len__(self) -> int:
271
290
  return len(self.records)
272
291
 
273
- def __iter__(self) -> Iterator['GedcomRecord']:
292
+ def __iter__(self) -> Iterator['Gedcom5xRecord']:
274
293
  # Enables: for x in gedcom:
275
294
  return iter(self.records)
276
295
 
@@ -282,7 +301,7 @@ class Gedcom5x():
282
301
  return 0 <= key < len(self.records)
283
302
  return False
284
303
 
285
- def __getitem__(self, key: Key) -> Union['GedcomRecord', List['GedcomRecord']]:
304
+ def __getitem__(self, key: Key) -> Union['Gedcom5xRecord', List['Gedcom5xRecord']]:
286
305
  """
287
306
  - gedcom[0] -> GedcomRecord at index 0
288
307
  - gedcom[1:5] -> list of GedcomRecord (slice)
@@ -302,30 +321,30 @@ class Gedcom5x():
302
321
  raise TypeError(f"Unsupported key type: {type(key).__name__}")
303
322
 
304
323
  # Optional: convenience helpers
305
- def by_tag(self, tag: str) -> List['GedcomRecord']:
324
+ def by_tag(self, tag: str) -> List['Gedcom5xRecord']:
306
325
  """Always return a list of records for a tag (empty list if none)."""
307
326
  return list(self._tag_index.get(tag, []))
308
327
 
309
- def first(self, tag: str) -> Optional['GedcomRecord']:
328
+ def first(self, tag: str) -> Optional['Gedcom5xRecord']:
310
329
  """Return the first record with a given tag, or None."""
311
330
  lst = self._tag_index.get(tag, [])
312
331
  return lst[0] if lst else None
313
332
 
314
333
  # If you add/replace records after init, keep the index fresh:
315
- def append(self, rec: 'GedcomRecord') -> None:
334
+ def append(self, rec: 'Gedcom5xRecord') -> None:
316
335
  self.records.append(rec)
317
336
  self._tag_index.setdefault(rec.tag, []).append(rec)
318
337
 
319
- def extend(self, recs: Iterable['GedcomRecord']) -> None:
338
+ def extend(self, recs: Iterable['Gedcom5xRecord']) -> None:
320
339
  self.records.extend(recs)
321
340
  for r in recs:
322
341
  self._tag_index.setdefault(r.tag, []).append(r)
323
342
 
324
- def insert(self, idx: int, rec: 'GedcomRecord') -> None:
343
+ def insert(self, idx: int, rec: 'Gedcom5xRecord') -> None:
325
344
  self.records.insert(idx, rec)
326
345
  self._tag_index.setdefault(rec.tag, []).append(rec)
327
346
 
328
- def remove(self, rec: 'GedcomRecord') -> None:
347
+ def remove(self, rec: 'Gedcom5xRecord') -> None:
329
348
  self.records.remove(rec)
330
349
  try:
331
350
  bucket = self._tag_index.get(rec.tag)
@@ -356,7 +375,7 @@ class Gedcom5x():
356
375
  value_width = max(len(str(value)) for _, value in pairs)
357
376
 
358
377
  # Print the header
359
- print('GEDCOM Import Results')
378
+ print(f'GEDCOM {self.version} Import Results')
360
379
  header = f"{'Type'.ljust(name_width)} | {'Count'.ljust(value_width)}"
361
380
  print('-' * len(header))
362
381
  print(header)
@@ -378,60 +397,58 @@ class Gedcom5x():
378
397
  print_table(imports_stats)
379
398
 
380
399
  @property
381
- def sources(self) -> List[GedcomRecord]:
400
+ def sources(self) -> List[Gedcom5xRecord]:
382
401
  return self._sources
383
402
 
384
403
  @sources.setter
385
- def sources(self, value: List[GedcomRecord]):
386
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
404
+ def sources(self, value: List[Gedcom5xRecord]):
405
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
387
406
  raise ValueError("sources must be a list of GedcomRecord objects.")
388
407
  self._sources = value
389
408
 
390
409
  @property
391
- def repositories(self) -> List[GedcomRecord]:
410
+ def repositories(self) -> List[Gedcom5xRecord]:
392
411
  """
393
412
  List of **REPO** records found in the Genealogy
394
413
  """
395
414
  return self._repositories
396
415
 
397
416
  @repositories.setter
398
- def repositories(self, value: List[GedcomRecord]):
399
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
417
+ def repositories(self, value: List[Gedcom5xRecord]):
418
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
400
419
  raise ValueError("repositories must be a list of GedcomRecord objects.")
401
420
  self._repositories = value
402
421
 
403
422
  @property
404
- def individuals(self) -> List[GedcomRecord]:
423
+ def individuals(self) -> List[Gedcom5xRecord]:
405
424
  return self._individuals
406
425
 
407
426
  @individuals.setter
408
- def individuals(self, value: List[GedcomRecord]):
409
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
427
+ def individuals(self, value: List[Gedcom5xRecord]):
428
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
410
429
  raise ValueError("individuals must be a list of GedcomRecord objects.")
411
430
  self._individuals = value
412
431
 
413
432
  @property
414
- def families(self) -> List[GedcomRecord]:
433
+ def families(self) -> List[Gedcom5xRecord]:
415
434
  return self._families
416
435
 
417
436
  @families.setter
418
- def families(self, value: List[GedcomRecord]):
419
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
437
+ def families(self, value: List[Gedcom5xRecord]):
438
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
420
439
  raise ValueError("families must be a list of GedcomRecord objects.")
421
440
  self._families = value
422
441
 
423
442
  @property
424
- def objects(self) -> List[GedcomRecord]:
443
+ def objects(self) -> List[Gedcom5xRecord]:
425
444
  return self._objects
426
445
 
427
446
  @objects.setter
428
- def objects(self, value: List[GedcomRecord]):
429
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
447
+ def objects(self, value: List[Gedcom5xRecord]):
448
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
430
449
  raise ValueError("objects must be a list of GedcomRecord objects.")
431
450
  self._objects = value
432
451
 
433
-
434
-
435
452
  def write(self) -> bool:
436
453
  """
437
454
  Method placeholder for writing GEDCOM files.
@@ -444,7 +461,7 @@ class Gedcom5x():
444
461
  raise NotImplementedError("Writing of GEDCOM files is not implemented.")
445
462
 
446
463
  @staticmethod
447
- def _records_from_file(filepath: str) -> List[GedcomRecord]:
464
+ def _records_from_file(file_path: str) -> List[Gedcom5xRecord]:
448
465
  def parse_gedcom7_line(line: str) -> Optional[Tuple[int, Optional[str], str, Optional[str], Optional[str]]]:
449
466
  """
450
467
  Parse a GEDCOM 7 line into: level, xref_id (record), tag, value, xref_value (if value is an @X@)
@@ -466,19 +483,19 @@ class Gedcom5x():
466
483
  return level, xref_id, tag, value, xref_value
467
484
  extension = '.ged'
468
485
 
469
- if not os.path.exists(filepath):
470
- print(f"File does not exist: {filepath}")
486
+ if not os.path.exists(file_path):
487
+ print(f"File does not exist: {file_path}")
471
488
  raise FileNotFoundError
472
- elif not filepath.lower().endswith(extension.lower()):
473
- print(f"File does not have the correct extension: {filepath}")
489
+ elif not file_path.lower().endswith(extension.lower()):
490
+ print(f"File does not have the correct extension: {file_path}")
474
491
  raise Exception("File does not appear to be a GEDCOM")
475
492
 
476
493
  print("Reading from GEDCOM file")
477
- with open(filepath, 'r', encoding='utf-8') as file:
494
+ with open(file_path, 'r', encoding='utf-8') as file:
478
495
  lines = [line.strip() for line in file]
479
496
 
480
497
  records = []
481
- record_map = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
498
+ record_map: dict[int,Any] = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
482
499
 
483
500
  for l, line in enumerate(lines):
484
501
  if line.startswith(BOM):
@@ -505,15 +522,18 @@ class Gedcom5x():
505
522
  else:
506
523
  level, tag = parts
507
524
 
508
- level, xref, tag, value, xref_value = parse_gedcom7_line(line)
525
+ level, xref, tag, value, xref_value = parse_gedcom7_line(line) or tuple([None, None, None, None])
526
+
509
527
 
510
528
  if xref is None and xref_value is not None:
511
529
  xref = xref_value
512
530
  # print(l, level, xref, tag, value, xref_value)
513
-
514
- level = int(level)
515
531
 
516
- new_record = GedcomRecord(line_num=l + 1, level=level, tag=tag, xref=xref,value=value)
532
+ if isinstance(level,int):
533
+ level = int(level)
534
+ else: raise ValueError(f"Record had a level of {level}")
535
+
536
+ new_record = Gedcom5xRecord(line_num=l + 1, level=level, tag=tag if tag else None, xref=xref,value=value)
517
537
 
518
538
 
519
539
  if level == 0:
@@ -523,12 +543,14 @@ class Gedcom5x():
523
543
  new_record.parent = record_map[int(level) - 1]
524
544
  record_map[int(level) - 1].addSubRecord(new_record)
525
545
  record_map[int(level)] = new_record
546
+ with hub.use(job_id):
547
+ log.info(new_record.describe())
526
548
 
527
549
 
528
- return records if records else None
550
+ return records if records else []
529
551
 
530
552
  @staticmethod
531
- def fromFile(filepath: str) -> 'Gedcom':
553
+ def fromFile(file_path: str) -> 'Gedcom5x':
532
554
  """
533
555
  Static method to create a Gedcom object from a GEDCOM file.
534
556
 
@@ -538,21 +560,37 @@ class Gedcom5x():
538
560
  Returns:
539
561
  Gedcom: An instance of the Gedcom class.
540
562
  """
541
- records = Gedcom._records_from_file(filepath)
563
+ records = Gedcom5x._records_from_file(file_path)
542
564
 
543
- gedcom = Gedcom(records=records)
565
+ gedcom = Gedcom5x(records=records)
544
566
 
545
567
  return gedcom
546
568
 
547
- def merge_with_file(self, file_path: str) -> bool:
548
- """
549
- Adds records from a valid (``*``.ged) file to the current Genealogy
550
-
551
- Args:
552
- filepath (str): The path to the GEDCOM file.
569
+ def load_file(self,file_path: str) -> None:
570
+ records = Gedcom5x._records_from_file(file_path)
571
+ if records:
572
+ self.records.extend(records)
573
+ for record in self.records:
574
+ if record.tag == 'HEAD':
575
+ pass
576
+ #self.header = record
577
+ #version = record['GEDC']['VERS'].value
578
+ #if not str(version)[0:2] == str(self.version)[0:2]: #TODO Deal with no VERS
579
+ # raise ValueError(f'Wrong Version Current: {str(version)[0:2]}, new file: {str(self.version)[0:2]}')
580
+ if record.tag == 'INDI':
581
+ self._individuals.append(record)
582
+ if record.tag == 'SOUR' and record.level == 0:
583
+ self._sources.append(record)
584
+ if record.tag == 'REPO' and record.level == 0:
585
+ self._repositories.append(record)
586
+ if record.tag == 'FAM' and record.level == 0:
587
+ self._families.append(record)
588
+ if record.tag == 'OBJE' and record.level == 0:
589
+ self._objects.append(record)
590
+ if record.tag == 'SNOTE' and record.level == 0:
591
+ record.xref = record.value
592
+ self._snotes.append(record)
593
+ else:
594
+ raise ValueError()
553
595
 
554
- Returns:
555
- bool: Indicates if merge was successful.
556
- """
557
- return True
558
596