gedcom-x 0.5.8__py3-none-any.whl → 0.5.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gedcom_x-0.5.8.dist-info → gedcom_x-0.5.10.dist-info}/METADATA +1 -1
- gedcom_x-0.5.10.dist-info/RECORD +58 -0
- gedcomx/Extensions/rs10/rsLink.py +109 -59
- gedcomx/__init__.py +4 -1
- gedcomx/address.py +102 -16
- gedcomx/agent.py +81 -24
- gedcomx/attribution.py +52 -28
- gedcomx/conclusion.py +98 -46
- gedcomx/converter.py +209 -79
- gedcomx/coverage.py +10 -1
- gedcomx/date.py +42 -8
- gedcomx/document.py +37 -7
- gedcomx/event.py +77 -20
- gedcomx/evidence_reference.py +9 -0
- gedcomx/extensible.py +86 -0
- gedcomx/fact.py +53 -54
- gedcomx/gedcom.py +10 -0
- gedcomx/gedcom5x.py +30 -20
- gedcomx/gedcom7/GedcomStructure.py +1 -3
- gedcomx/gedcom7/__init__.py +2 -2
- gedcomx/gedcom7/{Gedcom7.py → gedcom7.py} +3 -3
- gedcomx/gedcom7/specification.py +4817 -0
- gedcomx/gedcomx.py +95 -93
- gedcomx/gender.py +21 -9
- gedcomx/group.py +9 -0
- gedcomx/identifier.py +47 -20
- gedcomx/logging_hub.py +19 -0
- gedcomx/mutations.py +10 -5
- gedcomx/name.py +74 -33
- gedcomx/note.py +50 -18
- gedcomx/online_account.py +9 -0
- gedcomx/person.py +46 -27
- gedcomx/place_description.py +54 -8
- gedcomx/place_reference.py +30 -8
- gedcomx/qualifier.py +19 -3
- gedcomx/relationship.py +55 -14
- gedcomx/resource.py +45 -18
- gedcomx/schemas.py +328 -0
- gedcomx/serialization.py +400 -421
- gedcomx/source_citation.py +16 -4
- gedcomx/source_description.py +181 -94
- gedcomx/source_reference.py +51 -16
- gedcomx/subject.py +59 -14
- gedcomx/textvalue.py +66 -12
- gedcomx/translation.py +3 -3
- gedcomx/uri.py +155 -3
- gedcom_x-0.5.8.dist-info/RECORD +0 -56
- gedcomx/gedcom7/Specification.py +0 -347
- {gedcom_x-0.5.8.dist-info → gedcom_x-0.5.10.dist-info}/WHEEL +0 -0
- {gedcom_x-0.5.8.dist-info → gedcom_x-0.5.10.dist-info}/top_level.txt +0 -0
gedcomx/translation.py
CHANGED
@@ -334,7 +334,7 @@ class Translater():
|
|
334
334
|
print(f"Translated {len(self.gedcomx.agents)} 'REPO' records to Agents")
|
335
335
|
for source in self.gedcom.sources:
|
336
336
|
self.parse_record(source)
|
337
|
-
print(f"Translated {len(self.gedcomx.
|
337
|
+
print(f"Translated {len(self.gedcomx.sourceDescriptions)} 'SOUR' records to SourceDescription")
|
338
338
|
|
339
339
|
for object in self.gedcom.objects:
|
340
340
|
self.parse_record(object)
|
@@ -1089,8 +1089,8 @@ class Translater():
|
|
1089
1089
|
if record.xref and record.xref.strip() == '':
|
1090
1090
|
import_log.warning(f"SOUR points to nothing: {record.describe()}")
|
1091
1091
|
return False
|
1092
|
-
if self.gedcomx.
|
1093
|
-
gxobject = SourceReference(descriptionId=record.xref, description=self.gedcomx.
|
1092
|
+
if self.gedcomx.sourceDescriptions.byId(record.xref):
|
1093
|
+
gxobject = SourceReference(descriptionId=record.xref, description=self.gedcomx.sourceDescriptions.byId(record.xref))
|
1094
1094
|
else:
|
1095
1095
|
import_log.warning(f'Could not find source with id: {record.xref}')
|
1096
1096
|
source_description = SourceDescription(id=record.xref)
|
gedcomx/uri.py
CHANGED
@@ -2,6 +2,36 @@ from __future__ import annotations
|
|
2
2
|
from dataclasses import dataclass, field
|
3
3
|
from typing import Mapping, Sequence, Tuple, Union, Iterable
|
4
4
|
from urllib.parse import urlsplit, urlunsplit, urlencode, parse_qsl, SplitResult
|
5
|
+
from urllib.parse import urlunparse
|
6
|
+
|
7
|
+
"""
|
8
|
+
======================================================================
|
9
|
+
Project: Gedcom-X
|
10
|
+
File: uri.py
|
11
|
+
Author: David J. Cartwright
|
12
|
+
Purpose:
|
13
|
+
|
14
|
+
Created: 2025-08-25
|
15
|
+
Updated:
|
16
|
+
- 2025-09-03: _from_json_ refactor
|
17
|
+
|
18
|
+
======================================================================
|
19
|
+
"""
|
20
|
+
|
21
|
+
"""
|
22
|
+
======================================================================
|
23
|
+
GEDCOM Module Types
|
24
|
+
======================================================================
|
25
|
+
"""
|
26
|
+
from .logging_hub import hub, logging
|
27
|
+
"""
|
28
|
+
======================================================================
|
29
|
+
Logging
|
30
|
+
======================================================================
|
31
|
+
"""
|
32
|
+
log = logging.getLogger("gedcomx")
|
33
|
+
serial_log = "gedcomx.serialization"
|
34
|
+
#=====================================================================
|
5
35
|
|
6
36
|
_DEFAULT_SCHEME = "gedcomx"
|
7
37
|
|
@@ -19,8 +49,129 @@ def _encode_query(q: QueryLike) -> str:
|
|
19
49
|
return urlencode(list(q), doseq=True) # coerce iterable to a sequence
|
20
50
|
|
21
51
|
|
52
|
+
class URI():
|
53
|
+
def __init__(self,
|
54
|
+
|
55
|
+
target=None,
|
56
|
+
scheme: str | None = None,
|
57
|
+
authority: str | None = None,
|
58
|
+
path: str | None = None,
|
59
|
+
params: str | None = None,
|
60
|
+
query: str | None = None,
|
61
|
+
fragment: str | None = None,
|
62
|
+
value: str | None = None
|
63
|
+
) -> None:
|
64
|
+
|
65
|
+
self.target = target
|
66
|
+
|
67
|
+
self.scheme = scheme
|
68
|
+
self.authority = authority
|
69
|
+
self.path = path
|
70
|
+
self.params = params
|
71
|
+
self.query = query
|
72
|
+
self.fragment = fragment
|
73
|
+
|
74
|
+
self._value = value
|
75
|
+
|
76
|
+
if self._value:
|
77
|
+
s = urlsplit(self._value)
|
78
|
+
self.scheme = s.scheme or _DEFAULT_SCHEME
|
79
|
+
self.authority=s.netloc
|
80
|
+
self.path=s.path
|
81
|
+
self.query=s.query
|
82
|
+
self.fragment=s.fragment
|
83
|
+
|
84
|
+
if self.target is not None:
|
85
|
+
#log.debug(f"Creating URI from Target {target}, most likely for serialization")
|
86
|
+
if hasattr(self.target,'id'):
|
87
|
+
log.debug(f"'{target}.id' found {target.id}, using as fragment")
|
88
|
+
self.fragment = self.target.id
|
89
|
+
if hasattr(self.target,'uri'):
|
90
|
+
#log.debug(f"'{target}.uri' found, copying")
|
91
|
+
self._value = target.uri._value
|
92
|
+
self.scheme = target.uri.scheme
|
93
|
+
self.authority = target.uri.authority
|
94
|
+
self.path = target.uri.path
|
95
|
+
self.query = target.uri.query
|
96
|
+
self.fragment = target.uri.fragment
|
97
|
+
elif isinstance(target,URI):
|
98
|
+
#log.debug(f"'{target} is a URI, copying")
|
99
|
+
self._value = target._value
|
100
|
+
self.scheme = target.scheme
|
101
|
+
self.authority = target.authority
|
102
|
+
self.path = target.path
|
103
|
+
self.query = target.query
|
104
|
+
self.fragment = target.fragment
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
elif isinstance(self.target,str):
|
109
|
+
#log.warning(f"Creating a URI from target type {type(target)} with data: {target}.")
|
110
|
+
s = urlsplit(self.target)
|
111
|
+
self.scheme = s.scheme or _DEFAULT_SCHEME
|
112
|
+
self.authority=s.netloc
|
113
|
+
self.path=s.path
|
114
|
+
self.query=s.query
|
115
|
+
self.fragment=s.fragment
|
116
|
+
else:
|
117
|
+
#log.warning(f"Unable to create URI from target type {type(target)} with data: {target}.")
|
118
|
+
self._value = target
|
119
|
+
#log.info(f"self.scheme = {self.scheme} self.authority={self.authority} self.path={self.path} self.query={self.query} self.fragment={self.fragment}")
|
120
|
+
|
121
|
+
parts = [
|
122
|
+
self.scheme or "",
|
123
|
+
self.authority or "",
|
124
|
+
self.path or "",
|
125
|
+
self.params or "",
|
126
|
+
self.query or "",
|
127
|
+
self.fragment or "",
|
128
|
+
]
|
129
|
+
if not any(parts):
|
130
|
+
raise ValueError()
|
131
|
+
|
132
|
+
@property
|
133
|
+
def value(self) -> str:
|
134
|
+
parts = [
|
135
|
+
self.scheme or "",
|
136
|
+
self.authority or "",
|
137
|
+
self.path or "",
|
138
|
+
self.params or "",
|
139
|
+
self.query or "",
|
140
|
+
self.fragment or "",
|
141
|
+
]
|
142
|
+
if not any(parts):
|
143
|
+
return None
|
144
|
+
return str(urlunparse(parts))
|
145
|
+
|
146
|
+
def split(self) -> SplitResult:
|
147
|
+
return SplitResult(self.scheme, self.authority, self.path, self.query, self.fragment)
|
148
|
+
|
149
|
+
def __str__(self) -> str:
|
150
|
+
return urlunsplit(self.split())
|
151
|
+
|
152
|
+
def __repr__(self) -> str:
|
153
|
+
return (f"scheme = {self.scheme}, authority={self.authority}, path={self.path}, query={self.query}, fragment={self.fragment}")
|
154
|
+
|
155
|
+
@property
|
156
|
+
def _as_dict_(self):
|
157
|
+
return self.value or self._value
|
158
|
+
|
159
|
+
@property
|
160
|
+
def uri(self):
|
161
|
+
return self
|
162
|
+
|
163
|
+
@classmethod
|
164
|
+
def from_url(cls,url):
|
165
|
+
return cls(target=url)
|
166
|
+
|
167
|
+
@classmethod
|
168
|
+
def _from_json_(cls,data,context=None):
|
169
|
+
return cls(value=data)
|
170
|
+
|
171
|
+
|
172
|
+
|
22
173
|
@dataclass(slots=True)
|
23
|
-
class
|
174
|
+
class _URI:
|
24
175
|
scheme: str = field(default=_DEFAULT_SCHEME)
|
25
176
|
authority: str = field(default="")
|
26
177
|
path: str = field(default="")
|
@@ -79,9 +230,10 @@ class URI:
|
|
79
230
|
|
80
231
|
# Accepts {'resource': '...'} or a plain string
|
81
232
|
@classmethod
|
82
|
-
def
|
233
|
+
def _from_json_(cls, data: str | Mapping[str, object],context=None) -> URI:
|
234
|
+
return cls.from_parts(fragment="NOT IMPLIMENTED")
|
83
235
|
if isinstance(data, str):
|
84
|
-
return cls.
|
236
|
+
return cls.from_parts(fragment="NOT IMPLIMENTED")
|
85
237
|
if isinstance(data, Mapping):
|
86
238
|
raw = data.get("resource") or data.get("value") or ""
|
87
239
|
if isinstance(raw, str) and raw:
|
gedcom_x-0.5.8.dist-info/RECORD
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
gedcomx/Logging.py,sha256=vBDOjawVXc4tCge1laYjy6_2Ves-fnGzG0m6NnLZejE,624
|
2
|
-
gedcomx/TopLevelTypeCollection.py,sha256=p99i-O5LXiXe3GlC6jWuz4nH1TAcKxOLbe0VRxWbSFY,1495
|
3
|
-
gedcomx/Zip.py,sha256=lBxcv-Vip45884EHj56wZJJ5I36Q38UuHUidDxQBoS8,14
|
4
|
-
gedcomx/__init__.py,sha256=bQs2gL9GfLTFJbDppCIhxn8HTwuJUBo1PUjeEqQuHFE,1723
|
5
|
-
gedcomx/address.py,sha256=zwvsA6N6edR3FJ31kraGfv250e70TAsi2AB-grZTkF8,4865
|
6
|
-
gedcomx/agent.py,sha256=voUM3pxxr3DTTwZUCZgkqONAux2FsoIIJAErlEofbZ4,9321
|
7
|
-
gedcomx/attribution.py,sha256=cGvRyzofLSORzgtv7dFWAh8EKwWfd_I7oxpKeyqNBU0,3381
|
8
|
-
gedcomx/conclusion.py,sha256=yU0eul3rj6VBb5lFAPTAY9138x15yYQVjO6dN2gcdKo,9187
|
9
|
-
gedcomx/converter.py,sha256=yP3xw1vo0uVaM74_fyE51UQA0lq2eBzfcnasLyo-DfU,51508
|
10
|
-
gedcomx/coverage.py,sha256=E6Oa9O3ahbE81zTLgX2QQ-VbnLdzjWvqeRpY_KO2v_s,1837
|
11
|
-
gedcomx/date.py,sha256=rkvpVtziwvzsbGip8OkyeJXsi1spc4S6uWpRcNSSxCo,2344
|
12
|
-
gedcomx/document.py,sha256=iS3eANznPx8TxBytPa3CtwHS4QuPs80skQNdL-Rwis4,3519
|
13
|
-
gedcomx/event.py,sha256=YJhaWp42RCQdELIYzjZVrZKdb5E9qjs-MfUrujZLIbU,13324
|
14
|
-
gedcomx/evidence_reference.py,sha256=9Wo2iF8G8BIni5EvuqFj78KKbIBbSs1ZJQJ0pGiUoM4,348
|
15
|
-
gedcomx/exceptions.py,sha256=0OdPM3euhBMgX8o61ZwPuKeN8zPuSuuDcSBFflVGFqk,587
|
16
|
-
gedcomx/extensible_enum.py,sha256=DftCZLMBNul3C9hwh-rf0GE3SVdvylvyd5mt7bX_l6o,6535
|
17
|
-
gedcomx/fact.py,sha256=vYS4yweXGN6gX02sQXZbIiye_SLO3f3mRoCvKmgQ5po,24871
|
18
|
-
gedcomx/gedcom.py,sha256=l_BuLBynQacDtpLjhs2Afrabfiqt2Opoo_5_7myI7Z4,1709
|
19
|
-
gedcomx/gedcom5x.py,sha256=V4kLW-TJobJbaVRYPJVApMUYI2pPbEBHWetXQ-mwzbM,23138
|
20
|
-
gedcomx/gedcomx.py,sha256=dIbPas65wWcj3BeCqqRauE6WK2iGgjSj7olekF5H0pU,19019
|
21
|
-
gedcomx/gender.py,sha256=3om8xY-mjAx1lGUs9XZwNR5x-Dmqxvsh9NDcw8vy2K0,2698
|
22
|
-
gedcomx/group.py,sha256=8yuQuZb6XE56A3J57pqISPIsZwfY0zWyvQcBn0-3zLs,2531
|
23
|
-
gedcomx/identifier.py,sha256=_o821Y1dIkcC9knb4GeSD691o9Nj-A_bS2MHHNft--0,8492
|
24
|
-
gedcomx/logging_hub.py,sha256=f4z1r6WL3TdL1kg1eaCfCk7XEn9epCH5zUCXd1OT4mc,7793
|
25
|
-
gedcomx/mutations.py,sha256=3TSNZggB6kNsys7XLojgzwiHPNGnmz-XBK2Fnuqg6KA,7224
|
26
|
-
gedcomx/name.py,sha256=1DffrckDP0_A05bHDxV9KgMhGlXPIP5SNgtZQEA84Nw,18643
|
27
|
-
gedcomx/note.py,sha256=URgLdSmrSCSA9hx4eZzAUPDSWph0pMoUyRQp9qafGss,2547
|
28
|
-
gedcomx/online_account.py,sha256=zZliLAmrtQnPJH0z-UgfqBdKDhq8Y3eBbViKsJnRrkg,291
|
29
|
-
gedcomx/person.py,sha256=71zig0D708qGQmLhHZct9xAnp9uVdQ4E0sd7WFVwO5Y,8103
|
30
|
-
gedcomx/place_description.py,sha256=rXYQnYcyZemJtZnkMxqd8-4AXm8Ez_e09PvBQG_9ucc,5563
|
31
|
-
gedcomx/place_reference.py,sha256=sEo_8PIEeuO0VzBCYhORhxvN4opuGk4RweOlPTjTqVY,2112
|
32
|
-
gedcomx/qualifier.py,sha256=nM02EW8uEpZXUhejX9YTZk1sD0Why-_27RgUdJrZWkM,1778
|
33
|
-
gedcomx/relationship.py,sha256=D2qsgUZ-s3HJWI8IlSahissFVyfa6-YhSb6P0QIReVk,4265
|
34
|
-
gedcomx/resource.py,sha256=c1nID0mzDMNyq8vQWZmISLlGm765Ku_m6BJmPA9yanI,2674
|
35
|
-
gedcomx/serialization.py,sha256=RvrIQkAJJNmIk6qHomEbikkuudk9ei9Tc_C17xr-GJs,34015
|
36
|
-
gedcomx/source_citation.py,sha256=aW-lEb7bT9QU49GiBjJppFMBvtisR6fhVVuXjr5y4vQ,742
|
37
|
-
gedcomx/source_description.py,sha256=9rrFVgFXIPF-M2suRQmJy_-e3uOak2kE_Bc3ZAtwTTs,14973
|
38
|
-
gedcomx/source_reference.py,sha256=fvAvhi046d82gQ53v64moq9o11VulHQbZ1FPErjHDjw,5190
|
39
|
-
gedcomx/subject.py,sha256=AQkC2fZOdEIXjIfoKIh7bI3heqkupeCo5j6pFAdrbms,3084
|
40
|
-
gedcomx/textvalue.py,sha256=6B0wMxL0nigFNzhXZDhbTONvFGbnM2t2NcDZiZuu4Zw,1112
|
41
|
-
gedcomx/translation.py,sha256=nCs6jAXHBg9qCePcCnY0S2uw-c8GfEn7IJhHHmDVe6s,61398
|
42
|
-
gedcomx/uri.py,sha256=0k6NySntsZoyAeEkTthNM-fcd49_3hPDanaKXLaYYzA,4558
|
43
|
-
gedcomx/Extensions/__init__.py,sha256=MQzi_whzlxiLiknUNh10hG8OVrNqJE38l6n-AwCssx8,24
|
44
|
-
gedcomx/Extensions/rs10/__init__.py,sha256=nSHoZiD8hsCAyE-KyRTuWSLqSJSFh12kSz7hqilAMps,26
|
45
|
-
gedcomx/Extensions/rs10/rsLink.py,sha256=yZhd-XIqERhBD4w1-VsIFJ7IeR09W0JYK8fAbkIR0zs,4353
|
46
|
-
gedcomx/gedcom7/Exceptions.py,sha256=xeKr4x8b7r8pOqJ9yMpsCVTyxPeOlREDGgKoM5rX4U0,149
|
47
|
-
gedcomx/gedcom7/Gedcom7.py,sha256=i_g9W0qsZQYLMoD2sBCA13ibKRYFnSf4uj9-ix_tE4Q,5614
|
48
|
-
gedcomx/gedcom7/GedcomStructure.py,sha256=ZKNoEcXc41KdrCLPx-A8ohOU9VYmXAkFc4xuZExsBPw,3435
|
49
|
-
gedcomx/gedcom7/Specification.py,sha256=qIBe9wzL1GB0l0NyetS1ncbhz5C44b9nMyjAxHuqMt8,9245
|
50
|
-
gedcomx/gedcom7/__init__.py,sha256=8ELFZJ_j8RbRDKG9i6Sqb8s_CSxU9iM3bMN_0779rgI,679
|
51
|
-
gedcomx/gedcom7/g7interop.py,sha256=hSzwqeok2n7xziEvN2QiJY7bVCWrOnZIZWXubnkrv7w,9945
|
52
|
-
gedcomx/gedcom7/logger.py,sha256=QM1SySyh91UEhs90d2DMhH-s9qGF8XS8I8gr1eOcmfw,617
|
53
|
-
gedcom_x-0.5.8.dist-info/METADATA,sha256=E7gYz4fVbIoNVtFf9Mce6i5f4Nxj1mtKrpBNJnDqrps,4332
|
54
|
-
gedcom_x-0.5.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
55
|
-
gedcom_x-0.5.8.dist-info/top_level.txt,sha256=smVBF4nxSU-mzCd6idtRYTbYjPICMMi8pTqewEmqF8Y,8
|
56
|
-
gedcom_x-0.5.8.dist-info/RECORD,,
|
gedcomx/gedcom7/Specification.py
DELETED
@@ -1,347 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
from typing import Dict, Any
|
3
|
-
import os
|
4
|
-
|
5
|
-
def load_spec(file_path: str) -> Dict[str, Any]:
|
6
|
-
"""
|
7
|
-
Load the JSON spec file into a Python dict.
|
8
|
-
|
9
|
-
:param file_path: Path to your spec.json
|
10
|
-
:return: A dict mapping each URI to its structure-definition dict.
|
11
|
-
"""
|
12
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
13
|
-
return json.load(f)
|
14
|
-
|
15
|
-
SPEC_PATH = os.path.join(os.path.dirname(__file__), "spec.json")
|
16
|
-
structure_specs = load_spec(SPEC_PATH)
|
17
|
-
|
18
|
-
def get_substructures(key: str) -> Dict[str, Any]:
|
19
|
-
"""
|
20
|
-
Return the 'substructures' dict for the given key.
|
21
|
-
"""
|
22
|
-
struct = structure_specs.get(key)
|
23
|
-
if struct is None:
|
24
|
-
return {}
|
25
|
-
raise KeyError(f"No entry for key {key!r} in spec.json")
|
26
|
-
return struct.get("substructures", {})
|
27
|
-
|
28
|
-
def get_label(key: str) -> Dict[str, Any]:
|
29
|
-
"""
|
30
|
-
Return the label for the given key.
|
31
|
-
"""
|
32
|
-
struct = structure_specs.get(key)
|
33
|
-
if struct is None:
|
34
|
-
raise KeyError(f"No entry for key {key!r} in spec.json")
|
35
|
-
return 'None'
|
36
|
-
|
37
|
-
return struct.get("label", 'No Label')
|
38
|
-
|
39
|
-
def match_uri(tag: str,parent):
|
40
|
-
uri = None
|
41
|
-
if tag.startswith("_"):
|
42
|
-
uri = structure_specs.get(tag)
|
43
|
-
elif parent:
|
44
|
-
valid_substrutures = get_substructures(parent.uri)
|
45
|
-
uri = valid_substrutures.get(tag)
|
46
|
-
elif 'https://gedcom.io/terms/v7/record-' + tag in structure_specs.keys():
|
47
|
-
uri = 'https://gedcom.io/terms/v7/record-' + tag
|
48
|
-
elif 'https://gedcom.io/terms/v7/' + tag in structure_specs.keys():
|
49
|
-
uri = 'https://gedcom.io/terms/v7/' + tag
|
50
|
-
if uri == None:
|
51
|
-
raise ValueError(f'Could not get uri for tag: {tag}, parent: {parent}')
|
52
|
-
return uri
|
53
|
-
|
54
|
-
'''
|
55
|
-
MIT License
|
56
|
-
|
57
|
-
Copyright (c) 2022 David Straub
|
58
|
-
|
59
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
60
|
-
of this software and associated documentation files (the "Software"), to deal
|
61
|
-
in the Software without restriction, including without limitation the rights
|
62
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
63
|
-
copies of the Software, and to permit persons to whom the Software is
|
64
|
-
furnished to do so, subject to the following conditions:
|
65
|
-
|
66
|
-
The above copyright notice and this permission notice shall be included in all
|
67
|
-
copies or substantial portions of the Software.
|
68
|
-
|
69
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
70
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
71
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
72
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
73
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
74
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
75
|
-
SOFTWARE.
|
76
|
-
'''
|
77
|
-
# TODO: https://github.com/DavidMStraub
|
78
|
-
|
79
|
-
# GEDCOM 7 regex patterns thanks@DavidMStraub
|
80
|
-
|
81
|
-
# --- Common primitives ---
|
82
|
-
d = '\\ ' # GEDCOM delimiter (escaped space)
|
83
|
-
integer = '[0-9]+' # One or more digits
|
84
|
-
nonzero = '[1-9]' # Digits 1–9
|
85
|
-
|
86
|
-
# --- Duration units ---
|
87
|
-
years = f'{integer}y'
|
88
|
-
months = f'{integer}m'
|
89
|
-
weeks = f'{integer}w'
|
90
|
-
days = f'{integer}d'
|
91
|
-
|
92
|
-
# --- Age format ---
|
93
|
-
agebound = '[<>]' # Optional boundary indicator (less than, greater than)
|
94
|
-
ageduration = (
|
95
|
-
f'((?P<years>{years})({d}(?P<months1>{months}))?({d}(?P<weeks1>{weeks}))?'
|
96
|
-
f'({d}(?P<days1>{days}))?|(?P<months2>{months})({d}(?P<weeks2>{weeks}))?'
|
97
|
-
f'({d}(?P<days2>{days}))?|(?P<weeks3>{weeks})({d}(?P<days3>{days}))?|'
|
98
|
-
f'(?P<days4>{days}))'
|
99
|
-
)
|
100
|
-
age = f'((?P<agebound>{agebound}){d})?{ageduration}'
|
101
|
-
|
102
|
-
# --- Tags and Enums ---
|
103
|
-
underscore = '_'
|
104
|
-
ucletter = '[A-Z]'
|
105
|
-
tagchar = f'({ucletter}|[0-9]|{underscore})'
|
106
|
-
exttag = f'{underscore}({tagchar})+'
|
107
|
-
stdtag = f'{ucletter}({tagchar})*'
|
108
|
-
tag = f'({stdtag}|{exttag})'
|
109
|
-
enum = tag
|
110
|
-
|
111
|
-
# --- Dates ---
|
112
|
-
daterestrict = 'FROM|TO|BET|AND|BEF|AFT|ABT|CAL|EST'
|
113
|
-
calendar = f'(?!{daterestrict})(GREGORIAN|JULIAN|FRENCH_R|HEBREW|{exttag})'
|
114
|
-
day = integer
|
115
|
-
month = f'(?!{daterestrict})({stdtag}|{exttag})'
|
116
|
-
year = integer
|
117
|
-
epoch = f'(?!{daterestrict})(BCE|{exttag})'
|
118
|
-
|
119
|
-
date = f'({calendar}{d})?(({day}{d})?{month}{d})?{year}({d}{epoch})?'
|
120
|
-
|
121
|
-
# --- Date variants with captures ---
|
122
|
-
date_capture = (
|
123
|
-
f'((?P<calendar>{calendar}){d})?(((?P<day>{day}){d})?'
|
124
|
-
f'(?P<month>{month}){d})?(?P<year>{year})({d}(?P<epoch>{epoch}))?'
|
125
|
-
)
|
126
|
-
|
127
|
-
dateapprox = f'(?P<qualifier>ABT|CAL|EST){d}(?P<dateapprox>{date})'
|
128
|
-
dateexact = f'(?P<day>{day}){d}(?P<month>{month}){d}(?P<year>{year})'
|
129
|
-
dateperiod = f'((TO{d}(?P<todate1>{date}))?|FROM{d}(?P<fromdate>{date})({d}TO{d}(?P<todate2>{date}))?)'
|
130
|
-
daterange = f'(BET{d}(?P<between>{date}){d}AND{d}(?P<and>{date})|AFT{d}(?P<after>{date})|BEF{d}(?P<before>{date}))'
|
131
|
-
datevalue = f'({date}|{dateperiod}|{daterange}|{dateapprox})?'
|
132
|
-
|
133
|
-
# --- Media types ---
|
134
|
-
mt_char = "[ -!#-'*-+\\--.0-9A-Z^-~]"
|
135
|
-
mt_token = f'({mt_char})+'
|
136
|
-
mt_type = mt_token
|
137
|
-
mt_subtype = mt_token
|
138
|
-
mt_attribute = mt_token
|
139
|
-
mt_qtext = '[\t-\n -!#-\\[\\]-~]'
|
140
|
-
mt_qpair = '\\\\[\t-~]'
|
141
|
-
mt_qstring = f'"({mt_qtext}|{mt_qpair})*"'
|
142
|
-
mt_value = f'({mt_token}|{mt_qstring})'
|
143
|
-
mt_parameter = f'{mt_attribute}={mt_value}'
|
144
|
-
mediatype = f'{mt_type}/{mt_subtype}(;{mt_parameter})*'
|
145
|
-
|
146
|
-
# --- Line structure (GEDCOM record lines) ---
|
147
|
-
atsign = '@'
|
148
|
-
xref = f'{atsign}({tagchar})+{atsign}'
|
149
|
-
voidptr = '@VOID@'
|
150
|
-
pointer = f'(?P<pointer>{voidptr}|{xref})'
|
151
|
-
nonat = '[\t -?A-\\U0010ffff]'
|
152
|
-
noneol = '[\t -\\U0010ffff]'
|
153
|
-
linestr = f'(?P<linestr>({nonat}|{atsign}{atsign})({noneol})*)'
|
154
|
-
lineval = f'({pointer}|{linestr})'
|
155
|
-
|
156
|
-
level = f'(?P<level>0|{nonzero}[0-9]*)'
|
157
|
-
eol = '(\\\r(\\\n)?|\\\n)'
|
158
|
-
line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
|
159
|
-
|
160
|
-
# --- List formats ---
|
161
|
-
nocommasp = '[\t-\\x1d!-+\\--\\U0010ffff]'
|
162
|
-
nocomma = '[\t-+\\--\\U0010ffff]'
|
163
|
-
listitem = f'({nocommasp}|{nocommasp}({nocomma})*{nocommasp})?'
|
164
|
-
listdelim = f'({d})*,({d})*'
|
165
|
-
list = f'{listitem}({listdelim}{listitem})*'
|
166
|
-
list_enum = f'{enum}({listdelim}{enum})*'
|
167
|
-
list_text = list
|
168
|
-
|
169
|
-
# --- Names ---
|
170
|
-
namechar = '[ -.0-\\U0010ffff]'
|
171
|
-
namestr = f'({namechar})+'
|
172
|
-
personalname = f'({namestr}|({namestr})?/(?P<surname>{namestr})?/({namestr})?)'
|
173
|
-
|
174
|
-
# --- Time format ---
|
175
|
-
fraction = '[0-9]+'
|
176
|
-
second = '[012345][0-9]'
|
177
|
-
minute = '[012345][0-9]'
|
178
|
-
hour = '([0-9]|[01][0-9]|2[0123])'
|
179
|
-
time = f'(?P<hour>{hour}):(?P<minute>{minute})(:(?P<second>{second})(\\.(?P<fraction>{fraction}))?)?(?P<tz>Z)?'
|
180
|
-
|
181
|
-
# --- Text and special ---
|
182
|
-
anychar = '[\t-\\U0010ffff]'
|
183
|
-
text = f'({anychar})*'
|
184
|
-
special = text
|
185
|
-
|
186
|
-
# --- Boolean ---
|
187
|
-
boolean = 'Y'
|
188
|
-
|
189
|
-
# --- Banned Unicode Ranges ---
|
190
|
-
'''
|
191
|
-
banned = %x00-08 / %x0B-0C / %x0E-1F ; C0 other than LF CR and Tab
|
192
|
-
/ %x7F ; DEL
|
193
|
-
/ %x80-9F ; C1
|
194
|
-
/ %xD800-DFFF ; Surrogates
|
195
|
-
/ %xFFFE-FFFF ; invalid
|
196
|
-
; All other rules assume the absence of any banned characters
|
197
|
-
'''
|
198
|
-
banned = (
|
199
|
-
'[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f\\x7f\\x80-\\x9f\\ud800-\\udfff'
|
200
|
-
'\\ufffe-\\uffff]'
|
201
|
-
)
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
# TAGS
|
207
|
-
CONT = "CONT"
|
208
|
-
HEAD = "HEAD"
|
209
|
-
ABBR = "ABBR"
|
210
|
-
ADDR = "ADDR"
|
211
|
-
ADOP = "ADOP"
|
212
|
-
ADR1 = "ADR1"
|
213
|
-
ADR2 = "ADR2"
|
214
|
-
ADR3 = "ADR3"
|
215
|
-
AGE = "AGE"
|
216
|
-
AGNC = "AGNC"
|
217
|
-
ALIA = "ALIA"
|
218
|
-
ANCI = "ANCI"
|
219
|
-
ANUL = "ANUL"
|
220
|
-
ASSO = "ASSO"
|
221
|
-
AUTH = "AUTH"
|
222
|
-
BAPL = "BAPL"
|
223
|
-
BAPM = "BAPM"
|
224
|
-
BARM = "BARM"
|
225
|
-
BASM = "BASM"
|
226
|
-
BIRT = "BIRT"
|
227
|
-
BLES = "BLES"
|
228
|
-
BURI = "BURI"
|
229
|
-
CALN = "CALN"
|
230
|
-
CAST = "CAST"
|
231
|
-
CAUS = "CAUS"
|
232
|
-
CENS = "CENS"
|
233
|
-
CHAN = "CHAN"
|
234
|
-
CHIL = "CHIL"
|
235
|
-
CHR = "CHR"
|
236
|
-
CHRA = "CHRA"
|
237
|
-
CITY = "CITY"
|
238
|
-
CONF = "CONF"
|
239
|
-
CONL = "CONL"
|
240
|
-
COPR = "COPR"
|
241
|
-
CORP = "CORP"
|
242
|
-
CREA = "CREA"
|
243
|
-
CREM = "CREM"
|
244
|
-
CROP = "CROP"
|
245
|
-
CTRY = "CTRY"
|
246
|
-
DATA = "DATA"
|
247
|
-
DATE = "DATE"
|
248
|
-
DEAT = "DEAT"
|
249
|
-
DESI = "DESI"
|
250
|
-
DEST = "DEST"
|
251
|
-
DIV = "DIV"
|
252
|
-
DIVF = "DIVF"
|
253
|
-
DSCR = "DSCR"
|
254
|
-
EDUC = "EDUC"
|
255
|
-
EMAIL = "EMAIL"
|
256
|
-
EMIG = "EMIG"
|
257
|
-
ENDL = "ENDL"
|
258
|
-
ENGA = "ENGA"
|
259
|
-
EVEN = "EVEN"
|
260
|
-
EXID = "EXID"
|
261
|
-
FACT = "FACT"
|
262
|
-
FAM = "FAM"
|
263
|
-
FAMC = "FAMC"
|
264
|
-
FAMS = "FAMS"
|
265
|
-
FAX = "FAX"
|
266
|
-
FCOM = "FCOM"
|
267
|
-
FILE = "FILE"
|
268
|
-
FORM = "FORM"
|
269
|
-
GEDC = "GEDC"
|
270
|
-
GIVN = "GIVN"
|
271
|
-
GRAD = "GRAD"
|
272
|
-
HEIGHT = "HEIGHT"
|
273
|
-
HUSB = "HUSB"
|
274
|
-
IDNO = "IDNO"
|
275
|
-
IMMI = "IMMI"
|
276
|
-
INDI = "INDI"
|
277
|
-
INIL = "INIL"
|
278
|
-
LANG = "LANG"
|
279
|
-
LATI = "LATI"
|
280
|
-
LEFT = "LEFT"
|
281
|
-
LONG = "LONG"
|
282
|
-
MAP = "MAP"
|
283
|
-
MARB = "MARB"
|
284
|
-
MARC = "MARC"
|
285
|
-
MARL = "MARL"
|
286
|
-
MARR = "MARR"
|
287
|
-
MARS = "MARS"
|
288
|
-
MEDI = "MEDI"
|
289
|
-
MIME = "MIME"
|
290
|
-
NAME = "NAME"
|
291
|
-
NATI = "NATI"
|
292
|
-
NATU = "NATU"
|
293
|
-
NCHI = "NCHI"
|
294
|
-
NICK = "NICK"
|
295
|
-
NMR = "NMR"
|
296
|
-
NO = "NO"
|
297
|
-
NOTE = "NOTE"
|
298
|
-
NPFX = "NPFX"
|
299
|
-
NSFX = "NSFX"
|
300
|
-
OBJE = "OBJE"
|
301
|
-
OCCU = "OCCU"
|
302
|
-
ORDN = "ORDN"
|
303
|
-
PAGE = "PAGE"
|
304
|
-
PEDI = "PEDI"
|
305
|
-
PHON = "PHON"
|
306
|
-
PHRASE = "PHRASE"
|
307
|
-
PLAC = "PLAC"
|
308
|
-
POST = "POST"
|
309
|
-
PROB = "PROB"
|
310
|
-
PROP = "PROP"
|
311
|
-
PUBL = "PUBL"
|
312
|
-
QUAY = "QUAY"
|
313
|
-
REFN = "REFN"
|
314
|
-
RELI = "RELI"
|
315
|
-
REPO = "REPO"
|
316
|
-
RESI = "RESI"
|
317
|
-
RESN = "RESN"
|
318
|
-
RETI = "RETI"
|
319
|
-
ROLE = "ROLE"
|
320
|
-
SCHMA = "SCHMA"
|
321
|
-
SDATE = "SDATE"
|
322
|
-
SEX = "SEX"
|
323
|
-
SLGC = "SLGC"
|
324
|
-
SLGS = "SLGS"
|
325
|
-
SNOTE = "SNOTE"
|
326
|
-
SOUR = "SOUR"
|
327
|
-
SPFX = "SPFX"
|
328
|
-
SSN = "SSN"
|
329
|
-
STAE = "STAE"
|
330
|
-
STAT = "STAT"
|
331
|
-
SUBM = "SUBM"
|
332
|
-
SURN = "SURN"
|
333
|
-
TAG = "TAG"
|
334
|
-
TEMP = "TEMP"
|
335
|
-
TEXT = "TEXT"
|
336
|
-
TIME = "TIME"
|
337
|
-
TITL = "TITL"
|
338
|
-
TOP = "TOP"
|
339
|
-
TRAN = "TRAN"
|
340
|
-
TRLR = "TRLR"
|
341
|
-
TYPE = "TYPE"
|
342
|
-
UID = "UID"
|
343
|
-
VERS = "VERS"
|
344
|
-
WIDTH = "WIDTH"
|
345
|
-
WIFE = "WIFE"
|
346
|
-
WILL = "WILL"
|
347
|
-
WWW = "WWW"
|
File without changes
|
File without changes
|