PyPI - gedcom-x - Versions diffs - 0.5__py3-none-any.whl → 0.5.2__py3-none-any.whl - Mend

gedcom-x 0.5py3-none-any.whl → 0.5.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{gedcom_x-0.5.dist-info → gedcom_x-0.5.2.dist-info}/METADATA +1 -1
gedcom_x-0.5.2.dist-info/RECORD +42 -0
gedcomx/Address.py +40 -11
gedcomx/Agent.py +129 -23
gedcomx/Attribution.py +38 -54
gedcomx/Conclusion.py +60 -45
gedcomx/Date.py +49 -8
gedcomx/Document.py +19 -9
gedcomx/Event.py +4 -4
gedcomx/EvidenceReference.py +2 -2
gedcomx/Exceptions.py +10 -0
gedcomx/Fact.py +70 -46
gedcomx/Gedcom.py +111 -37
gedcomx/GedcomX.py +405 -175
gedcomx/Gender.py +61 -8
gedcomx/Group.py +3 -3
gedcomx/Identifier.py +93 -10
gedcomx/Logging.py +19 -0
gedcomx/Name.py +67 -38
gedcomx/Note.py +5 -4
gedcomx/OnlineAccount.py +2 -2
gedcomx/Person.py +88 -33
gedcomx/PlaceDescription.py +22 -8
gedcomx/PlaceReference.py +7 -5
gedcomx/Relationship.py +19 -9
gedcomx/Resource.py +61 -0
gedcomx/Serialization.py +44 -1
gedcomx/SourceCitation.py +6 -1
gedcomx/SourceDescription.py +89 -72
gedcomx/SourceReference.py +25 -14
gedcomx/Subject.py +10 -8
gedcomx/TextValue.py +2 -1
gedcomx/URI.py +95 -61
gedcomx/Zip.py +1 -0
gedcomx/_Links.py +37 -0
gedcomx/__init__.py +4 -2
gedcomx/g7interop.py +205 -0
gedcom_x-0.5.dist-info/RECORD +0 -37
gedcomx/_Resource.py +0 -11
{gedcom_x-0.5.dist-info → gedcom_x-0.5.2.dist-info}/WHEEL +0 -0
{gedcom_x-0.5.dist-info → gedcom_x-0.5.2.dist-info}/top_level.txt +0 -0

gedcomx/Date.py CHANGED Viewed

@@ -1,29 +1,70 @@
 from typing import Optional
+from datetime import datetime, timezone
+from dateutil import parser
+import time
 class DateFormat:
     def __init__(self) -> None:
         pass
+class DateNormalization():
+    pass
 class Date:
     identifier = 'http://gedcomx.org/v1/Date'
     version = 'http://gedcomx.org/conceptual-model/v1'
-    def __init__(self, original: Optional[str],formal: Optional[str | DateFormat] = None) -> None:
+    def __init__(self, original: Optional[str],normalized: Optional[DateNormalization] = None ,formal: Optional[str | DateFormat] = None) -> None:
         self.orginal = original
         self.formal = formal
+        self.normalized: DateNormalization | None = normalized if normalized else None
     def _prop_dict(self):
         return {'original': self.orginal,
                 'formal': self.formal}
-# Date
-Date._from_json_ = classmethod(lambda cls, data: Date(
-    original=data.get('original'),
-    formal=data.get('formal')
-))
+    @classmethod
+    def _from_json_(obj,data):
+        original = data.get('original',None)
+        formal = data.get('formal',None)
+        return Date(original=original,formal=formal)
 Date._to_dict_ = lambda self: {
     'original': self.orginal,
-    'formal': self.formal}
+    'formal': self.formal}
+def date_to_timestamp(date_str: str, assume_utc_if_naive: bool = True, print_definition: bool = True):
+    """
+    Convert a date string of various formats into a Unix timestamp.
+    A "timestamp" refers to an instance of time, including values for year,
+    month, date, hour, minute, second, and timezone.
+    """
+    # Handle year ranges like "1894-1912" → pick first year
+    if "-" in date_str and date_str.count("-") == 1 and all(part.isdigit() for part in date_str.split("-")):
+        date_str = date_str.split("-")[0].strip()
+    # Parse date
+    dt = parser.parse(date_str)
+    # Ensure timezone awareness
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc if assume_utc_if_naive else datetime.now().astimezone().tzinfo)
+    # Normalize to UTC and compute timestamp
+    dt_utc = dt.astimezone(timezone.utc)
+    epoch = datetime(1970, 1, 1, tzinfo=timezone.utc)
+    ts = (dt_utc - epoch).total_seconds()
+    # Create ISO 8601 string with full date/time/timezone
+    full_timestamp_str = dt_utc.replace(microsecond=0).isoformat()
+    return ts, full_timestamp_str

gedcomx/Document.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from enum import Enum
-from typing import Optional
+from typing import Optional, List
 from gedcomx.Attribution import Attribution
-from gedcomx.Conclusion import ConfidenceLevel
+#from gedcomx.Conclusion import ConfidenceLevel
 from gedcomx.Note import Note
 from gedcomx.SourceReference import SourceReference
-from gedcomx.URI import URI
+from gedcomx.Resource import Resource
 from .Conclusion import Conclusion
@@ -33,10 +33,20 @@ class Document(Conclusion):
     identifier = 'http://gedcomx.org/v1/Document'
     version = 'http://gedcomx.org/conceptual-model/v1'
-    def __init__(self, id: str | None, lang: str | None, sources: SourceReference | None, analysis: URI | None, notes: Note | None, confidence: ConfidenceLevel | None, attribution: Attribution | None,
-                 type: Optional[DocumentType],
-                 extracted: Optional[bool], # Default to False
-                 textType: Optional[TextType],
-                 text: str,
+    def __init__(self, id: Optional[str] = None,
+                 lang: Optional[str] = None,
+                 sources: Optional[List[SourceReference]] = None,
+                 analysis: Optional[Resource] = None,
+                 notes: Optional[List[Note]] = None,
+                 confidence: Optional[object] = None, # ConfidenceLevel
+                 attribution: Optional[Attribution] = None,
+                 type: Optional[DocumentType] = None,
+                 extracted: Optional[bool] = None, # Default to False
+                 textType: Optional[TextType] = None,
+                 text: Optional[str] = None,
                  ) -> None:
-        super().__init__(id, lang, sources, analysis, notes, confidence, attribution)
+        super().__init__(id, lang, sources, analysis, notes, confidence, attribution)
+        self.type = type
+        self.extracted = extracted
+        self.textType = textType
+        self.text = text

gedcomx/Event.py CHANGED Viewed

@@ -11,7 +11,7 @@ from .Note import Note
 from .PlaceReference import PlaceReference
 from .SourceReference import SourceReference
 from .Subject import Subject
-from .URI import URI
+from .Resource import Resource
 class EventRoleType(Enum):
     Principal = "http://gedcomx.org/Principal"
@@ -37,11 +37,11 @@ class EventRole(Conclusion):
                  id: Optional[str] = None,
                  lang: Optional[str] = 'en',
                  sources: Optional[List[SourceReference]] = [],
-                 analysis: Optional[URI] = None,
+                 analysis: Optional[Resource] = None,
                  notes: Optional[List[Note]] = [],
                  confidence: Optional[ConfidenceLevel] = None,
                  attribution: Optional[Attribution] = None,
-                 person: URI = None,
+                 person: Resource = None,
                  type: Optional[EventRoleType] = None,
                  details: Optional[str] = None) -> None:
         super().__init__(id, lang, sources, analysis, notes, confidence, attribution)
@@ -180,7 +180,7 @@ class Event(Subject):
                  id: Optional[str] = None,
                  lang: Optional[str] = 'en',
                  sources: Optional[List[SourceReference]] = [],
-                 analysis: Optional[URI] = None,
+                 analysis: Optional[Resource] = None,
                  notes: Optional[List[Note]] = [],
                  confidence: Optional[ConfidenceLevel] = None,
                  attribution: Optional[Attribution] = None,

gedcomx/EvidenceReference.py CHANGED Viewed

@@ -1,11 +1,11 @@
 from typing import Optional
 from .Attribution import Attribution
-from .URI import URI
+from .Resource import Resource
 class EvidenceReference:
     identifier = 'http://gedcomx.org/v1/EvidenceReference'
     version = 'http://gedcomx.org/conceptual-model/v1'
-    def __init__(self, resource: URI, attribution: Optional[Attribution]) -> None:
+    def __init__(self, resource: Resource, attribution: Optional[Attribution]) -> None:
         pass

gedcomx/Exceptions.py ADDED Viewed

@@ -0,0 +1,10 @@
+class GedcomXError(Exception):
+    """Base for all app-specific errors."""
+class TagConversionError(GedcomXError):
+    def __init__(self, record,levelstack):
+        msg = f"Cannot convert: #{record.line} TAG: {record.tag} {record.xref if record.xref else ''} Value:{record.value} STACK: {type(levelstack[record.level-1]).__name__}"
+        super().__init__(msg)

gedcomx/Fact.py CHANGED Viewed

@@ -3,21 +3,27 @@ import re
 from datetime import datetime
 from enum import Enum
-from typing import List, Optional
+from typing import List, Optional, Dict, Any
 from .Attribution import Attribution
 from .Conclusion import ConfidenceLevel
+from .Document import Document
 from .Date import Date
 from .Note import Note
 from .PlaceReference import PlaceReference
 from .SourceReference import SourceReference
-from .URI import URI
+from .Serialization import Serialization
+from .Resource import Resource
 from .Conclusion import Conclusion
 from .Qualifier import Qualifier
 from enum import Enum
+from collections.abc import Sized
+from ._Links import _Link, _LinkList
 class FactType(Enum):
     # Person Fact Types
@@ -390,10 +396,10 @@ class Fact(Conclusion):
     version = 'http://gedcomx.org/conceptual-model/v1'
     def __init__(self,
-                 id: str = None,
+                 id: Optional[str] = None,
                  lang: str = 'en',
                  sources: Optional[List[SourceReference]] = [],
-                 analysis: URI = None,
+                 analysis: Optional[Resource | Document] = None,
                  notes: Optional[List[Note]] = [],
                  confidence: ConfidenceLevel = None,
                  attribution: Attribution = None,
@@ -401,62 +407,80 @@ class Fact(Conclusion):
                  date: Optional[Date] = None,
                  place: Optional[PlaceReference] = None,
                  value: Optional[str] = None,
-                 qualifiers  = []) -> None:       #qualifiers: Optional[List[FactQualifier]] = []) -> None:
-        super().__init__(id, lang, sources, analysis, notes, confidence, attribution)
+                 qualifiers: Optional[List[FactQualifier]] = None,
+                 links: Optional[_LinkList] = None):
+        super().__init__(id, lang, sources, analysis, notes, confidence, attribution, links=links)
         self.type = type
         self.date = date
         self.place = place
         self.value = value
-        self.qualifiers = qualifiers
+        self._qualifiers = qualifiers if qualifiers else []
     @property
-    def _as_dict_(self):
-        def _serialize(value):
-            if isinstance(value, (str, int, float, bool, type(None))):
-                return value
-            elif isinstance(value, dict):
-                return {k: _serialize(v) for k, v in value.items()}
-            elif isinstance(value, (list, tuple, set)):
-                return [_serialize(v) for v in value]
-            elif hasattr(value, "_as_dict_"):
-                return value._as_dict_
-            else:
-                return str("UKN " + value)  # fallback for unknown objects
+    def qualifiers(self) -> List[FactQualifier]:
+        return self._qualifiers # type: ignore
+    @qualifiers.setter
+    def qualifiers(self, value: List[FactQualifier]):
+        if (not isinstance(value, list)) or (not all(isinstance(item, FactQualifier) for item in value)):
+            raise ValueError("sources must be a list of GedcomRecord objects.")
+        self._qualifiers.extend(value)
+    @property
+    def _as_dict_(self):
+        fact_dict = super()._as_dict_
         # Only add Relationship-specific fields
-        fact_fields = {
+        fact_dict.update( {
             'type': self.type.value if self.type else None,
             'date': self.date._prop_dict() if self.date else None,
             'place': self.place._as_dict_ if self.place else None,
             'value': self.value,
             'qualifiers': [q.value for q in self.qualifiers] if self.qualifiers else []
-        }
-        # Serialize and exclude None values
-        for key, value in fact_fields.items():
-            if value is not None:
-                                fact_fields[key] = _serialize(value)
+        })
-        return fact_fields
+        return Serialization.serialize_dict(fact_dict)
+    @classmethod
+    def _from_json_(cls, data: Dict[str, Any]) -> 'Fact':
+        # Extract fields, no trailing commas!
+        id_          = data.get('id')
+        lang         = data.get('lang', 'en')
+        sources      = [SourceReference._from_json_(s) for s in data.get('sources',[])]
+        analysis     = (Resource._from_json_(data['analysis'])
+                        if data.get('analysis') else None)
+        notes        = [Note._from_json_(n) for n in data.get('notes',[])]
+        confidence   = (ConfidenceLevel._from_json_(data['confidence'])
+                        if data.get('confidence') else None)
+        attribution  = (Attribution._from_json_(data['attribution']) if data.get('attribution') else None)
+        fact_type    = (FactType.from_value(data['type'])
+                        if data.get('type') else None)
+        date         = (Date._from_json_(data['date'])
+                        if data.get('date') else None)
+        place        = (PlaceReference._from_json_(data['place'])
+                        if data.get('place') else None)
+        value        = data.get('value')
+        qualifiers   = [Qualifier._from_json_(q) for q in data.get('qualifiers', [])]
+        links = _LinkList._from_json_(data.get('links')) if data.get('links') else None
+        return cls(
+            id=id_,
+            lang=lang,
+            sources=sources,
+            analysis=analysis,
+            notes=notes,
+            confidence=confidence,
+            attribution=attribution,
+            type=fact_type,
+            date=date,
+            place=place,
+            value=value,
+            qualifiers=qualifiers,
+            links=links
+        )
-def ensure_list(val):
-    if val is None:
-        return []
-    return val if isinstance(val, list) else [val]
-# Fact
-Fact._from_json_ = classmethod(lambda cls, data: cls(
-    id=data.get('id'),
-    lang=data.get('lang', 'en'),
-    sources=[SourceReference._from_json_(s) for s in ensure_list(data.get('sources'))],
-    analysis=URI._from_json_(data['analysis']) if data.get('analysis') else None,
-    notes=[Note._from_json_(n) for n in ensure_list(data.get('notes'))],
-    confidence=ConfidenceLevel._from_json_(data['confidence']) if data.get('confidence') else None,
-    attribution=Attribution._from_json_(data['attribution']) if data.get('attribution') else None,
-    type=FactType.from_value(data['type']) if data.get('type') else None,
-    date=Date._from_json_(data['date']) if data.get('date') else None,
-    place=PlaceReference._from_json_(data['place']) if data.get('place') else None,
-    value=data.get('value'),
-    qualifiers=[Qualifier._from_json_(q) for q in ensure_list(data.get('qualifiers'))]
-))

gedcomx/Gedcom.py CHANGED Viewed

@@ -3,10 +3,23 @@
 import html
 import os
-from typing import List, Optional
+from typing import List, Optional, Tuple
+import re
 BOM = '\ufeff'
+GEDCOM7_LINE_RE = re.compile(
+    r"""^
+    (?P<level>\d+)                    # Level
+    (?:\s+@(?P<xref>[^@]+)@)?         # Optional record identifier
+    \s+(?P<tag>[A-Z0-9_-]+)           # Tag
+    (?:\s+(?P<value>.+))?             # Optional value (may be XREF)
+    $""",
+    re.VERBOSE
+)
+XREF_RE = re.compile(r'^@[^@]+@$')
 # Add hash table for XREF of Zero Recrods?
 nonzero = '[1-9]'
@@ -30,8 +43,9 @@ eol = '(\\\r(\\\n)?|\\\n)'
 line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
 class GedcomRecord():
-    def __init__(self,line_num=None,level=-1, tag='NONR', xref='', value=None) -> None:
-        self.line_num = line_num
+    def __init__(self,line_num: Optional[int] =None,level: int =-1, tag='NONR', xref: Optional[str] = None, value: Optional[str] = None) -> None:
+        self.line = line_num
         self._subRecords = []
         self.level = int(level)
         self.xref = xref
@@ -42,10 +56,10 @@ class GedcomRecord():
         self.parent = None
         self.root = None
-        if self.value.endswith('@') and self.value.startswith('@'):
-            self.xref = self.value.replace('@','')
-            if level > 0:
-                self.pointer = True
+        #if self.value and (self.value.endswith('@') and self.value.startswith('@')):
+        #    self.xref = self.value.replace('@','')
+        #    if level > 0:
+        #        self.pointer = True
     @property
     def _as_dict_(self):
@@ -67,7 +81,7 @@ class GedcomRecord():
             raise ValueError(f"SubRecord must be next level from this record (level:{self.level}, subRecord has level {record.level})")
     def recordOnly(self):
-        return GedcomRecord(line_num=self.line_num,level=self.level,tag=self.tag,value=self.value)
+        return GedcomRecord(line_num=self.line,level=self.level,tag=self.tag,value=self.value)
     def dump(self):
         record_dump = f"Level: {self.level}, tag: {self.tag}, value: {self.value}, subRecords: {len(self._subRecords)}\n"
@@ -76,7 +90,8 @@ class GedcomRecord():
         return record_dump
     def describe(self,subRecords: bool = False):
-        description = f"Line {self.line_num}: {'\t'* self.level} Level: {self.level}, tag: '{self.tag}', value: '{self.value}', subRecords: {len(self._subRecords)}"
+        level_str = '\t'* self.level
+        description = f"Line {self.line}: {level_str} Level: {self.level}, tag: '{self.tag}', xref={self.xref} value: '{self.value}', subRecords: {len(self._subRecords)}"
         if subRecords:
             for subRecord in self.subRecords():
                 description = description + '\n' + subRecord.describe(subRecords=True)
@@ -127,13 +142,25 @@ class GedcomRecord():
             yield from self._flatten_subrecords(subrecord)
 class Gedcom():
-    top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD']
-    # =========================================================
-    # 1. INITIALIZATION
-    # =========================================================
+    """
+    Object representing a Genealogy in legacy GEDCOM 5.x / 7 format.
+    Parameters
+    ----------
+    records : List[GedcomReord]
+        List of GedcomRecords to initialize the genealogy with
+    filepath : str
+        path to a GEDCOM (``*``.ged), if provided object will read, parse and initialize with records in the file.
+    Note
+    ----
+    **file_path** takes precidence over **records**.
+    If no arguments are provided, Gedcom Object will initialize with no records.
+    """
+    _top_level_tags = ['INDI', 'FAM', 'OBJE', 'SOUR', 'REPO', 'NOTE', 'HEAD','SNOTE']
     def __init__(self, records: Optional[List[GedcomRecord]] = None,filepath: str = None) -> None:
         if filepath:
             self.records = self._records_from_file(filepath)
@@ -141,34 +168,42 @@ class Gedcom():
             self.records: List[GedcomRecord] = records if records else []
         self._sources = []
         self._repositories = []
         self._individuals = []
         self._families = []
         self._objects = []
+        self._snotes = []
         if self.records:
             for record in self.records:
                 if record.tag == 'INDI':
-                    record.xref = record.value
                     self._individuals.append(record)
                 if record.tag == 'SOUR' and record.level == 0:
-                    record.xref = record.value
                     self._sources.append(record)
                 if record.tag == 'REPO' and record.level == 0:
-                    record.xref = record.value
+                    print(record.describe())
                     self._repositories.append(record)
                 if record.tag == 'FAM' and record.level == 0:
-                    record.xref = record.value
                     self._families.append(record)
                 if record.tag == 'OBJE' and record.level == 0:
-                    record.xref = record.value
                     self._objects.append(record)
+                if record.tag == 'SNOTE' and record.level == 0:
+                    record.xref = record.value
+                    self._snotes.append(record)
     # =========================================================
     # 2. PROPERTY ACCESSORS (GETTERS & SETTERS)
     # =========================================================
     @property
     def json(self):
         import json
@@ -215,6 +250,9 @@ class Gedcom():
     @property
     def repositories(self) -> List[GedcomRecord]:
+        """
+        List of **REPO** records found in the Genealogy
+        """
         return self._repositories
     @repositories.setter
@@ -253,18 +291,40 @@ class Gedcom():
             raise ValueError("objects must be a list of GedcomRecord objects.")
         self._objects = value
-    # =========================================================
-    # 3. METHODS
-    # =========================================================
-    def write(self):
+    def write(self) -> bool:
         """
         Method placeholder for writing GEDCOM files.
+        Raises
+        ------
+        NotImplementedError
+         writing to legacy GEDCOM file is not currently implimented.
         """
         raise NotImplementedError("Writing of GEDCOM files is not implemented.")
     @staticmethod
     def _records_from_file(filepath: str) -> List[GedcomRecord]:
+        def parse_gedcom7_line(line: str) -> Optional[Tuple[int, Optional[str], str, Optional[str], Optional[str]]]:
+            """
+            Parse a GEDCOM 7 line into: level, xref_id (record), tag, value, xref_value (if value is an @X@)
+            Returns:
+                (level, xref_id, tag, value, xref_value)
+            """
+            match = GEDCOM7_LINE_RE.match(line.strip())
+            if not match:
+                return None
+            level = int(match.group("level"))
+            xref_id = match.group("xref")
+            tag = match.group("tag")
+            value = match.group("value")
+            if value == 'None': value = None
+            xref_value = value.strip("@") if value and XREF_RE.match(value.strip()) else None
+            return level, xref_id, tag, value, xref_value
         extension = '.ged'
         if not os.path.exists(filepath):
@@ -280,6 +340,7 @@ class Gedcom():
             records = []
             record_map = {0: None, 1: None, 2: None, 3: None, 4: None, 5: None}
             for l, line in enumerate(lines):
                 if line.startswith(BOM):
                     line = line.lstrip(BOM)
@@ -295,18 +356,27 @@ class Gedcom():
                 if len(parts) == 3:
                     level, col2, col3 = parts
-                    if col3 in Gedcom.top_level_tags:
+                    if col3 in Gedcom._top_level_tags:
                         tag = col3
                         value = col2
                     else:
                         tag = col2
                         value = col3
                 else:
                     level, tag = parts
+                level, xref, tag, value, xref_value = parse_gedcom7_line(line)
+                if xref is None and xref_value is not None:
+                    xref = xref_value
+               # print(l, level, xref, tag, value, xref_value)
                 level = int(level)
-                new_record = GedcomRecord(line_num=l + 1, level=level, tag=tag, value=value)
+                new_record = GedcomRecord(line_num=l + 1, level=level, tag=tag, xref=xref,value=value)
                 if level == 0:
                     records.append(new_record)
                 else:
@@ -315,6 +385,7 @@ class Gedcom():
                     record_map[int(level) - 1].addSubRecord(new_record)
                 record_map[int(level)] = new_record
         return records if records else None
     @staticmethod
@@ -329,17 +400,20 @@ class Gedcom():
             Gedcom: An instance of the Gedcom class.
         """
         records = Gedcom._records_from_file(filepath)
         gedcom = Gedcom(records=records)
         return gedcom
+    def merge_with_file(self, file_path: str) -> bool:
+        """
+        Adds records from a valid (``*``.ged) file to the current Genealogy
+        Args:
+            filepath (str): The path to the GEDCOM file.
+        Returns:
+            bool: Indicates if merge was successful.
+        """
+        return True
-#
-#import re
-#filepath = r"C:\Users\User\Documents\PythonProjects\gedcomx\.ged_files\_DJC_ Nunda Cartwright Family.ged"
-#with open(filepath, 'r', encoding='utf-8') as file:
-#    string = file.read()
-#
-#for match in re.finditer(line, string):
-#    data = match.groupdict()
-#    print(data)
-#'''

gedcom-x 0.5__py3-none-any.whl → 0.5.2__py3-none-any.whl

gedcom-x 0.5py3-none-any.whl → 0.5.2py3-none-any.whl