gedcom-x 0.5.7__py3-none-any.whl → 0.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. {gedcom_x-0.5.7.dist-info → gedcom_x-0.5.8.dist-info}/METADATA +1 -1
  2. gedcom_x-0.5.8.dist-info/RECORD +56 -0
  3. gedcomx/Extensions/rs10/rsLink.py +3 -3
  4. gedcomx/TopLevelTypeCollection.py +1 -1
  5. gedcomx/__init__.py +43 -42
  6. gedcomx/{Address.py → address.py} +1 -1
  7. gedcomx/{Agent.py → agent.py} +32 -16
  8. gedcomx/{Attribution.py → attribution.py} +3 -3
  9. gedcomx/{Conclusion.py → conclusion.py} +26 -9
  10. gedcomx/{Converter.py → converter.py} +54 -39
  11. gedcomx/{Coverage.py → coverage.py} +23 -5
  12. gedcomx/{Date.py → date.py} +1 -1
  13. gedcomx/{Document.py → document.py} +26 -8
  14. gedcomx/{Event.py → event.py} +13 -13
  15. gedcomx/{EvidenceReference.py → evidence_reference.py} +2 -2
  16. gedcomx/{Fact.py → fact.py} +31 -23
  17. gedcomx/{Gedcom5x.py → gedcom5x.py} +1 -1
  18. gedcomx/gedcom7/Exceptions.py +9 -0
  19. gedcomx/gedcom7/Gedcom7.py +160 -0
  20. gedcomx/gedcom7/GedcomStructure.py +94 -0
  21. gedcomx/gedcom7/Specification.py +347 -0
  22. gedcomx/gedcom7/__init__.py +26 -0
  23. gedcomx/gedcom7/g7interop.py +205 -0
  24. gedcomx/gedcom7/logger.py +19 -0
  25. gedcomx/{GedcomX.py → gedcomx.py} +14 -13
  26. gedcomx/{Gender.py → gender.py} +25 -11
  27. gedcomx/group.py +63 -0
  28. gedcomx/{Identifier.py → identifier.py} +4 -4
  29. gedcomx/{Mutations.py → mutations.py} +49 -25
  30. gedcomx/{Name.py → name.py} +15 -15
  31. gedcomx/{Note.py → note.py} +2 -2
  32. gedcomx/{OnlineAccount.py → online_account.py} +1 -1
  33. gedcomx/{Person.py → person.py} +18 -16
  34. gedcomx/{PlaceDescription.py → place_description.py} +18 -16
  35. gedcomx/{PlaceReference.py → place_reference.py} +4 -4
  36. gedcomx/{Qualifier.py → qualifier.py} +1 -1
  37. gedcomx/{Relationship.py → relationship.py} +30 -12
  38. gedcomx/{Resource.py → resource.py} +2 -2
  39. gedcomx/{Serialization.py → serialization.py} +31 -32
  40. gedcomx/{SourceDescription.py → source_description.py} +16 -16
  41. gedcomx/{SourceReference.py → source_reference.py} +7 -7
  42. gedcomx/{Subject.py → subject.py} +26 -8
  43. gedcomx/{Translation.py → translation.py} +1 -1
  44. gedcomx/{URI.py → uri.py} +42 -26
  45. gedcom_x-0.5.7.dist-info/RECORD +0 -49
  46. gedcomx/Group.py +0 -37
  47. {gedcom_x-0.5.7.dist-info → gedcom_x-0.5.8.dist-info}/WHEEL +0 -0
  48. {gedcom_x-0.5.7.dist-info → gedcom_x-0.5.8.dist-info}/top_level.txt +0 -0
  49. /gedcomx/{Exceptions.py → exceptions.py} +0 -0
  50. /gedcomx/{ExtensibleEnum.py → extensible_enum.py} +0 -0
  51. /gedcomx/{Gedcom.py → gedcom.py} +0 -0
  52. /gedcomx/{LoggingHub.py → logging_hub.py} +0 -0
  53. /gedcomx/{SourceCitation.py → source_citation.py} +0 -0
  54. /gedcomx/{TextValue.py → textvalue.py} +0 -0
@@ -1,28 +1,38 @@
1
1
  import difflib
2
2
  import re
3
3
 
4
- from datetime import datetime
5
4
  from enum import Enum
6
5
  from typing import List, Optional, Dict, Any
6
+ """
7
+ ======================================================================
8
+ Project: Gedcom-X
9
+ File: fact.py
10
+ Author: David J. Cartwright
11
+ Purpose:
7
12
 
8
- from .Attribution import Attribution
9
- from .Conclusion import ConfidenceLevel
10
- from .Document import Document
11
- from .Date import Date
12
- from .Note import Note
13
- from .PlaceReference import PlaceReference
14
- from .SourceReference import SourceReference
13
+ Created: 2025-08-25
14
+ Updated:
15
+ - 2025-08-31:
16
+
17
+ ======================================================================
18
+ """
15
19
 
16
- from .Resource import Resource
17
-
18
- from .Conclusion import Conclusion
19
- from .Qualifier import Qualifier
20
-
21
- from enum import Enum
22
-
23
- from collections.abc import Sized
24
-
25
- from .Extensions.rs10.rsLink import rsLink, _rsLinkList
20
+ """
21
+ ======================================================================
22
+ GEDCOM Module Types
23
+ ======================================================================
24
+ """
25
+ from .attribution import Attribution
26
+ from .conclusion import Conclusion, ConfidenceLevel
27
+ from .date import Date
28
+ from .document import Document
29
+ from .Extensions.rs10.rsLink import _rsLinkList
30
+ from .note import Note
31
+ from .place_reference import PlaceReference
32
+ from .qualifier import Qualifier
33
+ from .resource import Resource
34
+ from .source_reference import SourceReference
35
+ #=====================================================================
26
36
 
27
37
 
28
38
  class FactType(Enum):
@@ -370,7 +380,6 @@ class FactType(Enum):
370
380
  return keywords_to_fact_type[matches[0]]
371
381
  return None
372
382
 
373
-
374
383
  class FactQualifier(Enum):
375
384
  Age = "http://gedcomx.org/Age"
376
385
  Cause = "http://gedcomx.org/Cause"
@@ -389,7 +398,6 @@ class FactQualifier(Enum):
389
398
  }
390
399
  return descriptions.get(self, "No description available.")
391
400
 
392
-
393
401
  class Fact(Conclusion):
394
402
  identifier = 'http://gedcomx.org/v1/Fact'
395
403
  version = 'http://gedcomx.org/conceptual-model/v1'
@@ -397,9 +405,9 @@ class Fact(Conclusion):
397
405
  def __init__(self,
398
406
  id: Optional[str] = None,
399
407
  lang: Optional[str] = None,
400
- sources: Optional[List[SourceReference]] = [],
408
+ sources: Optional[List[SourceReference]] = None,
401
409
  analysis: Optional[Resource | Document] = None,
402
- notes: Optional[List[Note]] = [],
410
+ notes: Optional[List[Note]] = None,
403
411
  confidence: Optional[ConfidenceLevel] = None,
404
412
  attribution: Optional[Attribution] = None,
405
413
  type: Optional[FactType] = None,
@@ -428,7 +436,7 @@ class Fact(Conclusion):
428
436
 
429
437
  @property
430
438
  def _as_dict_(self):
431
- from .Serialization import Serialization
439
+ from .serialization import Serialization
432
440
  type_as_dcit = super()._as_dict_
433
441
  # Only add Relationship-specific fields
434
442
  if self.type:
@@ -9,7 +9,7 @@ from collections import defaultdict
9
9
  from typing import Iterable, Iterator, List, Optional, Tuple, Union
10
10
 
11
11
  import logging
12
- from .LoggingHub import hub, ChannelConfig
12
+ from .logging_hub import hub, ChannelConfig
13
13
 
14
14
  job_id = "gedcomx.parsing.GEDCOM5x"
15
15
 
@@ -0,0 +1,9 @@
1
+
2
+
3
+
4
+ class GedcomError(Exception):
5
+ """Generic error in GEDCOM processing."""
6
+ pass
7
+
8
+ class GedcomInvalidSubStructure(Exception):
9
+ pass
@@ -0,0 +1,160 @@
1
+
2
+ from __future__ import annotations
3
+ from typing import Any, Dict, List, Optional, Union, Iterable
4
+ from collections import defaultdict
5
+
6
+
7
+ from .GedcomStructure import GedcomStructure
8
+ from . import Specification as g7specs
9
+ from .logger import get_logger
10
+
11
+
12
+ from typing import Dict, List, Optional
13
+
14
+
15
+
16
+ class Gedcom7:
17
+ def __init__(self, filepath: Optional[str] = None):
18
+ self.persons: List[Any] = []
19
+ self.families: List[Any] = []
20
+ self.sources: List[Any] = []
21
+ self.records: List['GedcomStructure'] = []
22
+ self._tag_index: Dict[str, List[int]] = defaultdict(list) # tag -> list of record indices
23
+
24
+ # ---- indexing helpers -------------------------------------------------
25
+ @staticmethod
26
+ def _norm_tag(tag: str) -> str:
27
+ return tag.upper()
28
+
29
+ def _rebuild_index(self) -> None:
30
+ self._tag_index.clear()
31
+ for i, rec in enumerate(self.records):
32
+ if getattr(rec, "tag", None):
33
+ self._tag_index[self._norm_tag(rec.tag)].append(i)
34
+
35
+ # Optional: keep index in sync if you append records elsewhere
36
+ def _append_record(self, rec: 'GedcomStructure') -> None:
37
+ self.records.append(rec)
38
+ if getattr(rec, "tag", None):
39
+ self._tag_index[self._norm_tag(rec.tag)].append(len(self.records) - 1)
40
+
41
+ # ---- Python container protocol ----------------------------------------
42
+ def __len__(self) -> int:
43
+ return len(self.records)
44
+
45
+ def __iter__(self) -> Iterable['GedcomStructure']:
46
+ return iter(self.records)
47
+
48
+ def __contains__(self, key: Union[str, 'GedcomStructure']) -> bool:
49
+ if isinstance(key, str):
50
+ return self._norm_tag(key) in self._tag_index
51
+ return key in self.records
52
+
53
+ def __getitem__(self, key: Union[int, slice, str, tuple]) -> Union['GedcomStructure', List['GedcomStructure']]:
54
+ # by position
55
+ if isinstance(key, (int, slice)):
56
+ return self.records[key]
57
+
58
+ # by tag
59
+ if isinstance(key, str):
60
+ idxs = self._tag_index.get(self._norm_tag(key), [])
61
+ return [self.records[i] for i in idxs]
62
+
63
+ # combo: ('INDI', 0) or ('INDI', 0:5)
64
+ if isinstance(key, tuple) and len(key) == 2 and isinstance(key[0], str):
65
+ tag, sub = key
66
+ items = self[tag] # list for that tag
67
+ if isinstance(sub, int) or isinstance(sub, slice):
68
+ return items[sub]
69
+ raise TypeError(f"Unsupported sub-key type: {type(sub)!r}")
70
+
71
+ raise TypeError(f"Unsupported key type: {type(key)!r}")
72
+
73
+ # ---- your existing methods (trimmed) ----------------------------------
74
+ @staticmethod
75
+ def parse_gedcom_line(line: str) -> Optional[Dict[str, Any]]:
76
+
77
+ line = line.lstrip('\ufeff').rstrip('\r\n')
78
+ if not line:
79
+ return None
80
+
81
+ parts = line.split(maxsplit=3)
82
+ if len(parts) < 2:
83
+ return None # not even "0 HEAD"
84
+
85
+ # 1) Level
86
+ try:
87
+ level = int(parts[0])
88
+ except ValueError:
89
+ return None
90
+
91
+ # 2) Is parts[1] an XREF?
92
+ xref = None
93
+ if parts[1].startswith('@') and parts[1].endswith('@'):
94
+ xref = parts[1]
95
+
96
+ # 3) Where is the tag?
97
+ if xref:
98
+ # must have at least ["0", "@X@", "TAG"]
99
+ if len(parts) < 3:
100
+ return None
101
+ tag = parts[2]
102
+ # everything after index 2 is the value
103
+ value_parts = parts[3:] # could be empty or one-element
104
+ else:
105
+ tag = parts[1]
106
+ # everything after index 1 is the value
107
+ value_parts = parts[2:] # could be empty, one- or two-element
108
+
109
+
110
+ # 4) re-assemble the full value
111
+ value = " ".join(value_parts) # empty string if value_parts == []
112
+ if value.startswith('@') and value.endswith('@'):
113
+ xref = parts[1]
114
+
115
+ if tag == 'TAG':
116
+ xtag, uri = value.split()
117
+ g7specs.structure_specs[xtag] = uri
118
+ g7specs.structure_specs[uri] = {'label': 'Extension_' + xtag}
119
+
120
+ return {
121
+ "level": level,
122
+ "xref": xref,
123
+ "tag": tag,
124
+ "value": value
125
+ }
126
+
127
+
128
+ def loadfile(self, filepath: str) -> None:
129
+ log = get_logger('importlog')
130
+ context: Dict[int, GedcomStructure] = {}
131
+ records: List[GedcomStructure] = []
132
+
133
+ with open(filepath, 'r', encoding='utf8') as file:
134
+ for lineno, raw in enumerate(file, start=1):
135
+ record = Gedcom7.parse_gedcom_line(raw)
136
+ if record is None:
137
+ log.error(f'empty line at {lineno}: {raw}')
138
+ continue
139
+
140
+ level = int(record["level"])
141
+ if record["tag"] == g7specs.CONT:
142
+ context[level - 1].value += "\n" + record["value"]
143
+ continue
144
+
145
+ structure = GedcomStructure(
146
+ level=level,
147
+ tag=record["tag"],
148
+ xref=record["xref"],
149
+ text=record["value"],
150
+ parent=context[level - 1] if level > 0 else None,
151
+ line_num=lineno
152
+ )
153
+
154
+ if level == 0:
155
+ records.append(structure)
156
+
157
+ context[level] = structure
158
+
159
+ self.records = records
160
+ self._rebuild_index() # <-- build fast tag index once
@@ -0,0 +1,94 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Dict, Any
4
+ import warnings
5
+ from . import Specification as g7
6
+
7
+ from typing import Dict, List,Optional,Any
8
+
9
+
10
+ gedcom_top_level_terms = ['https://gedcom.io/terms/v7/CONT',
11
+ 'https://gedcom.io/terms/v7/record-FAM',
12
+ 'https://gedcom.io/terms/v7/record-INDI',
13
+ 'https://gedcom.io/terms/v7/record-SNOTE',
14
+ 'https://gedcom.io/terms/v7/record-SUBM',
15
+ 'https://gedcom.io/terms/v7/TRLR',
16
+ 'https://gedcom.io/terms/v7/HEAD',
17
+ 'https://gedcom.io/terms/v7/record-OBJE',
18
+ 'https://gedcom.io/terms/v7/record-REPO',
19
+ 'https://gedcom.io/terms/v7/record-SOUR']
20
+
21
+
22
+
23
+
24
+ class GedcomStructure:
25
+ version = 'v7'
26
+
27
+ def __init__(
28
+ self,
29
+ *,
30
+ level: int | None = None,
31
+ xref: str | None = None,
32
+ tag: str | None = None,
33
+ pointer: bool | None = None,
34
+ text: str | None = None,
35
+ parent: GedcomStructure | None = None,
36
+ line_num: int | None = None
37
+ ) -> None:
38
+ """Create a GEDCOM structure node.
39
+
40
+ Args:
41
+ level: GEDCOM line level (0..n).
42
+ xref: Optional cross-reference id (e.g., '@I1@').
43
+ tag: GEDCOM tag (e.g., 'INDI', 'NAME').
44
+ pointer: True if this line is a pointer, False if not, None if unknown.
45
+ text: Literal text payload for this line.
46
+ parent: Parent node in the structure tree, if any.
47
+ """
48
+ self.level = level
49
+ self.xref = xref
50
+ self.tag = tag
51
+ self.pointer = pointer
52
+ self.text = text
53
+ self.parent = parent
54
+ self.value = text
55
+ self.pointer = pointer if pointer else False
56
+ self.line_num = line_num
57
+
58
+ if self.level and self.level > 0 and text and text.startswith('@') and text.endswith('@'):
59
+ self.pointer = True
60
+ self.xref = text
61
+
62
+ self.parent: GedcomStructure | None = parent if parent else None
63
+ if self.parent and isinstance(self.parent, GedcomStructure):
64
+ parent.subtructures.append(self)
65
+
66
+ self.extension = False if not tag else True if tag.startswith('_') else False
67
+ self.uri = g7.match_uri(tag,self.parent)
68
+ self.label = g7.get_label(self.uri)
69
+
70
+ self.subtructures = []
71
+
72
+
73
+ def _as_dict_(self):
74
+ as_dict = {}
75
+ as_dict['level'] = self.level
76
+ if self.xref: as_dict['xref'] = self.xref
77
+ as_dict['tag'] = self.tag
78
+ if self.value: as_dict['value'] = self.value
79
+ if self.subtructures: as_dict['substructures'] = [substructure._as_dict_() for substructure in self.subtructures]
80
+ return {g7.get_label(self.uri):as_dict}
81
+
82
+ def __repr__(self):
83
+ return (
84
+ "GedcomStructure("
85
+ f"level: {self.level} tag={self.tag:<6} ({self.label}), {'(Ext)' if self.extension else ''} xref:{self.xref} pointer={self.pointer}, text='{self.value}', "
86
+ f"uri={self.uri} subStructures: {len(self.subtructures)}"
87
+ )
88
+
89
+ def __getitem__(self,index) -> List['GedcomStructure']:
90
+ return [s for s in self.subtructures if s.tag == index]
91
+
92
+
93
+
94
+