gedcom-x 0.5.6__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gedcomx/Gedcom5x.py CHANGED
@@ -3,14 +3,26 @@
3
3
 
4
4
  import html
5
5
  import os
6
- from typing import List, Optional, Tuple
6
+ from typing import List, Optional, Tuple, Any
7
7
  import re
8
8
  from collections import defaultdict
9
9
  from typing import Iterable, Iterator, List, Optional, Tuple, Union
10
10
 
11
- from .LoggingHub import LoggingHub, ChannelConfig
12
- hub = LoggingHub("GEDCOM5x")
13
- hub.init_root()
11
+ import logging
12
+ from .LoggingHub import hub, ChannelConfig
13
+
14
+ job_id = "gedcomx.parsing.GEDCOM5x"
15
+
16
+ hub.start_channel(
17
+ ChannelConfig(
18
+ name=job_id,
19
+ path=f"logs/{job_id}.log",
20
+ level=logging.DEBUG,
21
+ rotation="size:10MB:3", # rotate by size, keep 3 backups
22
+ )
23
+ )
24
+
25
+ log = logging.getLogger("gedcomx")
14
26
 
15
27
  BOM = '\ufeff'
16
28
 
@@ -51,25 +63,25 @@ line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
51
63
  from typing import List, Optional, Iterator, Union
52
64
 
53
65
 
54
- class GedcomRecord():
66
+ class Gedcom5xRecord():
55
67
  def __init__(
56
68
  self,
57
69
  line_num: Optional[int] = None,
58
70
  level: int = -1,
59
- tag: str = "NONR",
71
+ tag: str | None = "NONR",
60
72
  xref: Optional[str] = None,
61
73
  value: Optional[str] = None,
62
74
  ) -> None:
63
75
  self.line = line_num
64
- self._subRecords: List[GedcomRecord] = []
76
+ self._subRecords: List[Gedcom5xRecord] = []
65
77
  self.level = int(level)
66
78
  self.xref = xref
67
79
  self.pointer: bool = False
68
80
  self.tag = str(tag).strip()
69
81
  self.value = value
70
82
 
71
- self.parent: Optional[GedcomRecord] = None
72
- self.root: Optional[GedcomRecord] = None
83
+ self.parent: Optional[Gedcom5xRecord] = None
84
+ self.root: Optional[Gedcom5xRecord] = None
73
85
 
74
86
  # ───────────────────────────────
75
87
  # Dict/JSON friendly view
@@ -88,7 +100,7 @@ class GedcomRecord():
88
100
  # ───────────────────────────────
89
101
  # Subrecord management
90
102
  # ───────────────────────────────
91
- def addSubRecord(self, record: "GedcomRecord"):
103
+ def addSubRecord(self, record: "Gedcom5xRecord"):
92
104
 
93
105
  if record is not None and (record.level == (self.level + 1)):
94
106
  record.parent = self
@@ -99,7 +111,7 @@ class GedcomRecord():
99
111
  )
100
112
 
101
113
  def recordOnly(self):
102
- return GedcomRecord(
114
+ return Gedcom5xRecord(
103
115
  line_num=self.line, level=self.level, tag=self.tag, value=self.value
104
116
  )
105
117
 
@@ -123,7 +135,7 @@ class GedcomRecord():
123
135
  f"subRecords: {len(self._subRecords)}"
124
136
  )
125
137
  if subRecords:
126
- for subRecord in self.subRecords():
138
+ for subRecord in self.subRecords() or []:
127
139
  description += "\n" + subRecord.describe(subRecords=True)
128
140
  return description
129
141
 
@@ -132,9 +144,9 @@ class GedcomRecord():
132
144
  # ───────────────────────────────
133
145
  def subRecord(self, tag: str):
134
146
  result = [r for r in self._subRecords if r.tag == tag]
135
- return None if not result else result
147
+ return [] if not result else result
136
148
 
137
- def subRecords(self, tag: str = None):
149
+ def subRecords(self, tag: str | None = None) -> List['Gedcom5xRecord']:
138
150
  if not tag:
139
151
  return self._subRecords
140
152
  tags = tag.split("/", 1)
@@ -142,7 +154,7 @@ class GedcomRecord():
142
154
  # Collect matching first-level subrecords
143
155
  matches = [r for r in self._subRecords if r.tag == tags[0]]
144
156
  if not matches:
145
- return None
157
+ return []
146
158
 
147
159
  if len(tags) == 1:
148
160
  return matches
@@ -156,7 +168,7 @@ class GedcomRecord():
156
168
  results.extend(sub_result)
157
169
  else:
158
170
  results.append(sub_result)
159
- return results if results else None
171
+ return results if results else []
160
172
 
161
173
  # ───────────────────────────────
162
174
  # Iteration / Subscriptability
@@ -164,11 +176,11 @@ class GedcomRecord():
164
176
  def __call__(self) -> str:
165
177
  return self.describe()
166
178
 
167
- def __iter__(self) -> Iterator["GedcomRecord"]:
179
+ def __iter__(self) -> Iterator["Gedcom5xRecord"]:
168
180
  """Iterates recursively over self and all subrecords."""
169
181
  yield from self._flatten_subrecords(self)
170
182
 
171
- def _flatten_subrecords(self, record: "GedcomRecord") -> Iterator["GedcomRecord"]:
183
+ def _flatten_subrecords(self, record: "Gedcom5xRecord") -> Iterator["Gedcom5xRecord"]:
172
184
  yield record
173
185
  for sub in record._subRecords:
174
186
  yield from self._flatten_subrecords(sub)
@@ -176,7 +188,7 @@ class GedcomRecord():
176
188
  def __len__(self) -> int:
177
189
  return len(self._subRecords)
178
190
 
179
- def __getitem__(self, key: Union[int, slice, str]) -> Union["GedcomRecord", List["GedcomRecord"]]:
191
+ def __getitem__(self, key: Union[int, slice, str]) -> Union["Gedcom5xRecord", List["Gedcom5xRecord"]]:
180
192
  """
181
193
  - rec[0] -> first subrecord
182
194
  - rec[1:3] -> slice of subrecords
@@ -222,26 +234,26 @@ class Gedcom5x():
222
234
  """
223
235
  _top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD','SNOTE']
224
236
 
225
- def __init__(self, records: Optional[List[GedcomRecord]] = None,filepath: str = None) -> None:
237
+ def __init__(self, records: Optional[List[Gedcom5xRecord]] = None,filepath: str | None = None) -> None:
226
238
 
227
- self.records: List[GedcomRecord] = records or []
239
+ self.records: List[Gedcom5xRecord] = records or []
228
240
  if filepath:
229
241
  self.records = self._records_from_file(filepath)
230
242
  elif records:
231
- self.records: List[GedcomRecord] = records if records else []
243
+ self.records: List[Gedcom5xRecord] = records if records else []
232
244
 
233
245
 
234
246
  # Fast tag index: {'HEAD': [rec], 'INDI': [rec1, rec2, ...], ...}
235
- self._tag_index: dict[str, List[GedcomRecord]] = defaultdict(list)
247
+ self._tag_index: dict[str, List[Gedcom5xRecord]] = defaultdict(list)
236
248
  self._reindex()
237
249
 
238
- self.header: GedcomRecord | None = None
239
- self._sources: List[GedcomRecord] = []
240
- self._repositories: List[GedcomRecord] = []
241
- self._individuals: List[GedcomRecord] = []
242
- self._families: List[GedcomRecord] = []
243
- self._objects: List[GedcomRecord] = []
244
- self._snotes: List[GedcomRecord] = []
250
+ self.header: Gedcom5xRecord | None = None
251
+ self._sources: List[Gedcom5xRecord] = []
252
+ self._repositories: List[Gedcom5xRecord] = []
253
+ self._individuals: List[Gedcom5xRecord] = []
254
+ self._families: List[Gedcom5xRecord] = []
255
+ self._objects: List[Gedcom5xRecord] = []
256
+ self._snotes: List[Gedcom5xRecord] = []
245
257
  self.version = None
246
258
 
247
259
  if self.records:
@@ -277,7 +289,7 @@ class Gedcom5x():
277
289
  def __len__(self) -> int:
278
290
  return len(self.records)
279
291
 
280
- def __iter__(self) -> Iterator['GedcomRecord']:
292
+ def __iter__(self) -> Iterator['Gedcom5xRecord']:
281
293
  # Enables: for x in gedcom:
282
294
  return iter(self.records)
283
295
 
@@ -289,7 +301,7 @@ class Gedcom5x():
289
301
  return 0 <= key < len(self.records)
290
302
  return False
291
303
 
292
- def __getitem__(self, key: Key) -> Union['GedcomRecord', List['GedcomRecord']]:
304
+ def __getitem__(self, key: Key) -> Union['Gedcom5xRecord', List['Gedcom5xRecord']]:
293
305
  """
294
306
  - gedcom[0] -> GedcomRecord at index 0
295
307
  - gedcom[1:5] -> list of GedcomRecord (slice)
@@ -309,30 +321,30 @@ class Gedcom5x():
309
321
  raise TypeError(f"Unsupported key type: {type(key).__name__}")
310
322
 
311
323
  # Optional: convenience helpers
312
- def by_tag(self, tag: str) -> List['GedcomRecord']:
324
+ def by_tag(self, tag: str) -> List['Gedcom5xRecord']:
313
325
  """Always return a list of records for a tag (empty list if none)."""
314
326
  return list(self._tag_index.get(tag, []))
315
327
 
316
- def first(self, tag: str) -> Optional['GedcomRecord']:
328
+ def first(self, tag: str) -> Optional['Gedcom5xRecord']:
317
329
  """Return the first record with a given tag, or None."""
318
330
  lst = self._tag_index.get(tag, [])
319
331
  return lst[0] if lst else None
320
332
 
321
333
  # If you add/replace records after init, keep the index fresh:
322
- def append(self, rec: 'GedcomRecord') -> None:
334
+ def append(self, rec: 'Gedcom5xRecord') -> None:
323
335
  self.records.append(rec)
324
336
  self._tag_index.setdefault(rec.tag, []).append(rec)
325
337
 
326
- def extend(self, recs: Iterable['GedcomRecord']) -> None:
338
+ def extend(self, recs: Iterable['Gedcom5xRecord']) -> None:
327
339
  self.records.extend(recs)
328
340
  for r in recs:
329
341
  self._tag_index.setdefault(r.tag, []).append(r)
330
342
 
331
- def insert(self, idx: int, rec: 'GedcomRecord') -> None:
343
+ def insert(self, idx: int, rec: 'Gedcom5xRecord') -> None:
332
344
  self.records.insert(idx, rec)
333
345
  self._tag_index.setdefault(rec.tag, []).append(rec)
334
346
 
335
- def remove(self, rec: 'GedcomRecord') -> None:
347
+ def remove(self, rec: 'Gedcom5xRecord') -> None:
336
348
  self.records.remove(rec)
337
349
  try:
338
350
  bucket = self._tag_index.get(rec.tag)
@@ -385,55 +397,55 @@ class Gedcom5x():
385
397
  print_table(imports_stats)
386
398
 
387
399
  @property
388
- def sources(self) -> List[GedcomRecord]:
400
+ def sources(self) -> List[Gedcom5xRecord]:
389
401
  return self._sources
390
402
 
391
403
  @sources.setter
392
- def sources(self, value: List[GedcomRecord]):
393
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
404
+ def sources(self, value: List[Gedcom5xRecord]):
405
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
394
406
  raise ValueError("sources must be a list of GedcomRecord objects.")
395
407
  self._sources = value
396
408
 
397
409
  @property
398
- def repositories(self) -> List[GedcomRecord]:
410
+ def repositories(self) -> List[Gedcom5xRecord]:
399
411
  """
400
412
  List of **REPO** records found in the Genealogy
401
413
  """
402
414
  return self._repositories
403
415
 
404
416
  @repositories.setter
405
- def repositories(self, value: List[GedcomRecord]):
406
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
417
+ def repositories(self, value: List[Gedcom5xRecord]):
418
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
407
419
  raise ValueError("repositories must be a list of GedcomRecord objects.")
408
420
  self._repositories = value
409
421
 
410
422
  @property
411
- def individuals(self) -> List[GedcomRecord]:
423
+ def individuals(self) -> List[Gedcom5xRecord]:
412
424
  return self._individuals
413
425
 
414
426
  @individuals.setter
415
- def individuals(self, value: List[GedcomRecord]):
416
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
427
+ def individuals(self, value: List[Gedcom5xRecord]):
428
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
417
429
  raise ValueError("individuals must be a list of GedcomRecord objects.")
418
430
  self._individuals = value
419
431
 
420
432
  @property
421
- def families(self) -> List[GedcomRecord]:
433
+ def families(self) -> List[Gedcom5xRecord]:
422
434
  return self._families
423
435
 
424
436
  @families.setter
425
- def families(self, value: List[GedcomRecord]):
426
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
437
+ def families(self, value: List[Gedcom5xRecord]):
438
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
427
439
  raise ValueError("families must be a list of GedcomRecord objects.")
428
440
  self._families = value
429
441
 
430
442
  @property
431
- def objects(self) -> List[GedcomRecord]:
443
+ def objects(self) -> List[Gedcom5xRecord]:
432
444
  return self._objects
433
445
 
434
446
  @objects.setter
435
- def objects(self, value: List[GedcomRecord]):
436
- if not isinstance(value, list) or not all(isinstance(item, GedcomRecord) for item in value):
447
+ def objects(self, value: List[Gedcom5xRecord]):
448
+ if not isinstance(value, list) or not all(isinstance(item, Gedcom5xRecord) for item in value):
437
449
  raise ValueError("objects must be a list of GedcomRecord objects.")
438
450
  self._objects = value
439
451
 
@@ -449,7 +461,7 @@ class Gedcom5x():
449
461
  raise NotImplementedError("Writing of GEDCOM files is not implemented.")
450
462
 
451
463
  @staticmethod
452
- def _records_from_file(file_path: str) -> List[GedcomRecord]:
464
+ def _records_from_file(file_path: str) -> List[Gedcom5xRecord]:
453
465
  def parse_gedcom7_line(line: str) -> Optional[Tuple[int, Optional[str], str, Optional[str], Optional[str]]]:
454
466
  """
455
467
  Parse a GEDCOM 7 line into: level, xref_id (record), tag, value, xref_value (if value is an @X@)
@@ -483,7 +495,7 @@ class Gedcom5x():
483
495
  lines = [line.strip() for line in file]
484
496
 
485
497
  records = []
486
- record_map = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
498
+ record_map: dict[int,Any] = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
487
499
 
488
500
  for l, line in enumerate(lines):
489
501
  if line.startswith(BOM):
@@ -510,15 +522,18 @@ class Gedcom5x():
510
522
  else:
511
523
  level, tag = parts
512
524
 
513
- level, xref, tag, value, xref_value = parse_gedcom7_line(line)
525
+ level, xref, tag, value, xref_value = parse_gedcom7_line(line) or tuple([None, None, None, None])
526
+
514
527
 
515
528
  if xref is None and xref_value is not None:
516
529
  xref = xref_value
517
530
  # print(l, level, xref, tag, value, xref_value)
518
-
519
- level = int(level)
520
531
 
521
- new_record = GedcomRecord(line_num=l + 1, level=level, tag=tag, xref=xref,value=value)
532
+ if isinstance(level,int):
533
+ level = int(level)
534
+ else: raise ValueError(f"Record had a level of {level}")
535
+
536
+ new_record = Gedcom5xRecord(line_num=l + 1, level=level, tag=tag if tag else None, xref=xref,value=value)
522
537
 
523
538
 
524
539
  if level == 0:
@@ -528,12 +543,14 @@ class Gedcom5x():
528
543
  new_record.parent = record_map[int(level) - 1]
529
544
  record_map[int(level) - 1].addSubRecord(new_record)
530
545
  record_map[int(level)] = new_record
546
+ with hub.use(job_id):
547
+ log.info(new_record.describe())
531
548
 
532
549
 
533
- return records if records else None
550
+ return records if records else []
534
551
 
535
552
  @staticmethod
536
- def fromFile(file_path: str) -> 'Gedcom':
553
+ def fromFile(file_path: str) -> 'Gedcom5x':
537
554
  """
538
555
  Static method to create a Gedcom object from a GEDCOM file.
539
556