gedcom-x 0.5.8__py3-none-any.whl → 0.5.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {gedcom_x-0.5.8.dist-info → gedcom_x-0.5.10.dist-info}/METADATA +1 -1
  2. gedcom_x-0.5.10.dist-info/RECORD +58 -0
  3. gedcomx/Extensions/rs10/rsLink.py +109 -59
  4. gedcomx/__init__.py +4 -1
  5. gedcomx/address.py +102 -16
  6. gedcomx/agent.py +81 -24
  7. gedcomx/attribution.py +52 -28
  8. gedcomx/conclusion.py +98 -46
  9. gedcomx/converter.py +209 -79
  10. gedcomx/coverage.py +10 -1
  11. gedcomx/date.py +42 -8
  12. gedcomx/document.py +37 -7
  13. gedcomx/event.py +77 -20
  14. gedcomx/evidence_reference.py +9 -0
  15. gedcomx/extensible.py +86 -0
  16. gedcomx/fact.py +53 -54
  17. gedcomx/gedcom.py +10 -0
  18. gedcomx/gedcom5x.py +30 -20
  19. gedcomx/gedcom7/GedcomStructure.py +1 -3
  20. gedcomx/gedcom7/__init__.py +2 -2
  21. gedcomx/gedcom7/{Gedcom7.py → gedcom7.py} +3 -3
  22. gedcomx/gedcom7/specification.py +4817 -0
  23. gedcomx/gedcomx.py +95 -93
  24. gedcomx/gender.py +21 -9
  25. gedcomx/group.py +9 -0
  26. gedcomx/identifier.py +47 -20
  27. gedcomx/logging_hub.py +19 -0
  28. gedcomx/mutations.py +10 -5
  29. gedcomx/name.py +74 -33
  30. gedcomx/note.py +50 -18
  31. gedcomx/online_account.py +9 -0
  32. gedcomx/person.py +46 -27
  33. gedcomx/place_description.py +54 -8
  34. gedcomx/place_reference.py +30 -8
  35. gedcomx/qualifier.py +19 -3
  36. gedcomx/relationship.py +55 -14
  37. gedcomx/resource.py +45 -18
  38. gedcomx/schemas.py +328 -0
  39. gedcomx/serialization.py +400 -421
  40. gedcomx/source_citation.py +16 -4
  41. gedcomx/source_description.py +181 -94
  42. gedcomx/source_reference.py +51 -16
  43. gedcomx/subject.py +59 -14
  44. gedcomx/textvalue.py +66 -12
  45. gedcomx/translation.py +3 -3
  46. gedcomx/uri.py +155 -3
  47. gedcom_x-0.5.8.dist-info/RECORD +0 -56
  48. gedcomx/gedcom7/Specification.py +0 -347
  49. {gedcom_x-0.5.8.dist-info → gedcom_x-0.5.10.dist-info}/WHEEL +0 -0
  50. {gedcom_x-0.5.8.dist-info → gedcom_x-0.5.10.dist-info}/top_level.txt +0 -0
gedcomx/translation.py CHANGED
@@ -334,7 +334,7 @@ class Translater():
334
334
  print(f"Translated {len(self.gedcomx.agents)} 'REPO' records to Agents")
335
335
  for source in self.gedcom.sources:
336
336
  self.parse_record(source)
337
- print(f"Translated {len(self.gedcomx.source_descriptions)} 'SOUR' records to SourceDescription")
337
+ print(f"Translated {len(self.gedcomx.sourceDescriptions)} 'SOUR' records to SourceDescription")
338
338
 
339
339
  for object in self.gedcom.objects:
340
340
  self.parse_record(object)
@@ -1089,8 +1089,8 @@ class Translater():
1089
1089
  if record.xref and record.xref.strip() == '':
1090
1090
  import_log.warning(f"SOUR points to nothing: {record.describe()}")
1091
1091
  return False
1092
- if self.gedcomx.source_descriptions.byId(record.xref):
1093
- gxobject = SourceReference(descriptionId=record.xref, description=self.gedcomx.source_descriptions.byId(record.xref))
1092
+ if self.gedcomx.sourceDescriptions.byId(record.xref):
1093
+ gxobject = SourceReference(descriptionId=record.xref, description=self.gedcomx.sourceDescriptions.byId(record.xref))
1094
1094
  else:
1095
1095
  import_log.warning(f'Could not find source with id: {record.xref}')
1096
1096
  source_description = SourceDescription(id=record.xref)
gedcomx/uri.py CHANGED
@@ -2,6 +2,36 @@ from __future__ import annotations
2
2
  from dataclasses import dataclass, field
3
3
  from typing import Mapping, Sequence, Tuple, Union, Iterable
4
4
  from urllib.parse import urlsplit, urlunsplit, urlencode, parse_qsl, SplitResult
5
+ from urllib.parse import urlunparse
6
+
7
+ """
8
+ ======================================================================
9
+ Project: Gedcom-X
10
+ File: uri.py
11
+ Author: David J. Cartwright
12
+ Purpose:
13
+
14
+ Created: 2025-08-25
15
+ Updated:
16
+ - 2025-09-03: _from_json_ refactor
17
+
18
+ ======================================================================
19
+ """
20
+
21
+ """
22
+ ======================================================================
23
+ GEDCOM Module Types
24
+ ======================================================================
25
+ """
26
+ from .logging_hub import hub, logging
27
+ """
28
+ ======================================================================
29
+ Logging
30
+ ======================================================================
31
+ """
32
+ log = logging.getLogger("gedcomx")
33
+ serial_log = "gedcomx.serialization"
34
+ #=====================================================================
5
35
 
6
36
  _DEFAULT_SCHEME = "gedcomx"
7
37
 
@@ -19,8 +49,129 @@ def _encode_query(q: QueryLike) -> str:
19
49
  return urlencode(list(q), doseq=True) # coerce iterable to a sequence
20
50
 
21
51
 
52
+ class URI():
53
+ def __init__(self,
54
+
55
+ target=None,
56
+ scheme: str | None = None,
57
+ authority: str | None = None,
58
+ path: str | None = None,
59
+ params: str | None = None,
60
+ query: str | None = None,
61
+ fragment: str | None = None,
62
+ value: str | None = None
63
+ ) -> None:
64
+
65
+ self.target = target
66
+
67
+ self.scheme = scheme
68
+ self.authority = authority
69
+ self.path = path
70
+ self.params = params
71
+ self.query = query
72
+ self.fragment = fragment
73
+
74
+ self._value = value
75
+
76
+ if self._value:
77
+ s = urlsplit(self._value)
78
+ self.scheme = s.scheme or _DEFAULT_SCHEME
79
+ self.authority=s.netloc
80
+ self.path=s.path
81
+ self.query=s.query
82
+ self.fragment=s.fragment
83
+
84
+ if self.target is not None:
85
+ #log.debug(f"Creating URI from Target {target}, most likely for serialization")
86
+ if hasattr(self.target,'id'):
87
+ log.debug(f"'{target}.id' found {target.id}, using as fragment")
88
+ self.fragment = self.target.id
89
+ if hasattr(self.target,'uri'):
90
+ #log.debug(f"'{target}.uri' found, copying")
91
+ self._value = target.uri._value
92
+ self.scheme = target.uri.scheme
93
+ self.authority = target.uri.authority
94
+ self.path = target.uri.path
95
+ self.query = target.uri.query
96
+ self.fragment = target.uri.fragment
97
+ elif isinstance(target,URI):
98
+ #log.debug(f"'{target} is a URI, copying")
99
+ self._value = target._value
100
+ self.scheme = target.scheme
101
+ self.authority = target.authority
102
+ self.path = target.path
103
+ self.query = target.query
104
+ self.fragment = target.fragment
105
+
106
+
107
+
108
+ elif isinstance(self.target,str):
109
+ #log.warning(f"Creating a URI from target type {type(target)} with data: {target}.")
110
+ s = urlsplit(self.target)
111
+ self.scheme = s.scheme or _DEFAULT_SCHEME
112
+ self.authority=s.netloc
113
+ self.path=s.path
114
+ self.query=s.query
115
+ self.fragment=s.fragment
116
+ else:
117
+ #log.warning(f"Unable to create URI from target type {type(target)} with data: {target}.")
118
+ self._value = target
119
+ #log.info(f"self.scheme = {self.scheme} self.authority={self.authority} self.path={self.path} self.query={self.query} self.fragment={self.fragment}")
120
+
121
+ parts = [
122
+ self.scheme or "",
123
+ self.authority or "",
124
+ self.path or "",
125
+ self.params or "",
126
+ self.query or "",
127
+ self.fragment or "",
128
+ ]
129
+ if not any(parts):
130
+ raise ValueError()
131
+
132
+ @property
133
+ def value(self) -> str:
134
+ parts = [
135
+ self.scheme or "",
136
+ self.authority or "",
137
+ self.path or "",
138
+ self.params or "",
139
+ self.query or "",
140
+ self.fragment or "",
141
+ ]
142
+ if not any(parts):
143
+ return None
144
+ return str(urlunparse(parts))
145
+
146
+ def split(self) -> SplitResult:
147
+ return SplitResult(self.scheme, self.authority, self.path, self.query, self.fragment)
148
+
149
+ def __str__(self) -> str:
150
+ return urlunsplit(self.split())
151
+
152
+ def __repr__(self) -> str:
153
+ return (f"scheme = {self.scheme}, authority={self.authority}, path={self.path}, query={self.query}, fragment={self.fragment}")
154
+
155
+ @property
156
+ def _as_dict_(self):
157
+ return self.value or self._value
158
+
159
+ @property
160
+ def uri(self):
161
+ return self
162
+
163
+ @classmethod
164
+ def from_url(cls,url):
165
+ return cls(target=url)
166
+
167
+ @classmethod
168
+ def _from_json_(cls,data,context=None):
169
+ return cls(value=data)
170
+
171
+
172
+
22
173
  @dataclass(slots=True)
23
- class URI:
174
+ class _URI:
24
175
  scheme: str = field(default=_DEFAULT_SCHEME)
25
176
  authority: str = field(default="")
26
177
  path: str = field(default="")
@@ -79,9 +230,10 @@ class URI:
79
230
 
80
231
  # Accepts {'resource': '...'} or a plain string
81
232
  @classmethod
82
- def from_jsonish(cls, data: str | Mapping[str, object]) -> URI:
233
+ def _from_json_(cls, data: str | Mapping[str, object],context=None) -> URI:
234
+ return cls.from_parts(fragment="NOT IMPLIMENTED")
83
235
  if isinstance(data, str):
84
- return cls.from_url(data)
236
+ return cls.from_parts(fragment="NOT IMPLIMENTED")
85
237
  if isinstance(data, Mapping):
86
238
  raw = data.get("resource") or data.get("value") or ""
87
239
  if isinstance(raw, str) and raw:
@@ -1,56 +0,0 @@
1
- gedcomx/Logging.py,sha256=vBDOjawVXc4tCge1laYjy6_2Ves-fnGzG0m6NnLZejE,624
2
- gedcomx/TopLevelTypeCollection.py,sha256=p99i-O5LXiXe3GlC6jWuz4nH1TAcKxOLbe0VRxWbSFY,1495
3
- gedcomx/Zip.py,sha256=lBxcv-Vip45884EHj56wZJJ5I36Q38UuHUidDxQBoS8,14
4
- gedcomx/__init__.py,sha256=bQs2gL9GfLTFJbDppCIhxn8HTwuJUBo1PUjeEqQuHFE,1723
5
- gedcomx/address.py,sha256=zwvsA6N6edR3FJ31kraGfv250e70TAsi2AB-grZTkF8,4865
6
- gedcomx/agent.py,sha256=voUM3pxxr3DTTwZUCZgkqONAux2FsoIIJAErlEofbZ4,9321
7
- gedcomx/attribution.py,sha256=cGvRyzofLSORzgtv7dFWAh8EKwWfd_I7oxpKeyqNBU0,3381
8
- gedcomx/conclusion.py,sha256=yU0eul3rj6VBb5lFAPTAY9138x15yYQVjO6dN2gcdKo,9187
9
- gedcomx/converter.py,sha256=yP3xw1vo0uVaM74_fyE51UQA0lq2eBzfcnasLyo-DfU,51508
10
- gedcomx/coverage.py,sha256=E6Oa9O3ahbE81zTLgX2QQ-VbnLdzjWvqeRpY_KO2v_s,1837
11
- gedcomx/date.py,sha256=rkvpVtziwvzsbGip8OkyeJXsi1spc4S6uWpRcNSSxCo,2344
12
- gedcomx/document.py,sha256=iS3eANznPx8TxBytPa3CtwHS4QuPs80skQNdL-Rwis4,3519
13
- gedcomx/event.py,sha256=YJhaWp42RCQdELIYzjZVrZKdb5E9qjs-MfUrujZLIbU,13324
14
- gedcomx/evidence_reference.py,sha256=9Wo2iF8G8BIni5EvuqFj78KKbIBbSs1ZJQJ0pGiUoM4,348
15
- gedcomx/exceptions.py,sha256=0OdPM3euhBMgX8o61ZwPuKeN8zPuSuuDcSBFflVGFqk,587
16
- gedcomx/extensible_enum.py,sha256=DftCZLMBNul3C9hwh-rf0GE3SVdvylvyd5mt7bX_l6o,6535
17
- gedcomx/fact.py,sha256=vYS4yweXGN6gX02sQXZbIiye_SLO3f3mRoCvKmgQ5po,24871
18
- gedcomx/gedcom.py,sha256=l_BuLBynQacDtpLjhs2Afrabfiqt2Opoo_5_7myI7Z4,1709
19
- gedcomx/gedcom5x.py,sha256=V4kLW-TJobJbaVRYPJVApMUYI2pPbEBHWetXQ-mwzbM,23138
20
- gedcomx/gedcomx.py,sha256=dIbPas65wWcj3BeCqqRauE6WK2iGgjSj7olekF5H0pU,19019
21
- gedcomx/gender.py,sha256=3om8xY-mjAx1lGUs9XZwNR5x-Dmqxvsh9NDcw8vy2K0,2698
22
- gedcomx/group.py,sha256=8yuQuZb6XE56A3J57pqISPIsZwfY0zWyvQcBn0-3zLs,2531
23
- gedcomx/identifier.py,sha256=_o821Y1dIkcC9knb4GeSD691o9Nj-A_bS2MHHNft--0,8492
24
- gedcomx/logging_hub.py,sha256=f4z1r6WL3TdL1kg1eaCfCk7XEn9epCH5zUCXd1OT4mc,7793
25
- gedcomx/mutations.py,sha256=3TSNZggB6kNsys7XLojgzwiHPNGnmz-XBK2Fnuqg6KA,7224
26
- gedcomx/name.py,sha256=1DffrckDP0_A05bHDxV9KgMhGlXPIP5SNgtZQEA84Nw,18643
27
- gedcomx/note.py,sha256=URgLdSmrSCSA9hx4eZzAUPDSWph0pMoUyRQp9qafGss,2547
28
- gedcomx/online_account.py,sha256=zZliLAmrtQnPJH0z-UgfqBdKDhq8Y3eBbViKsJnRrkg,291
29
- gedcomx/person.py,sha256=71zig0D708qGQmLhHZct9xAnp9uVdQ4E0sd7WFVwO5Y,8103
30
- gedcomx/place_description.py,sha256=rXYQnYcyZemJtZnkMxqd8-4AXm8Ez_e09PvBQG_9ucc,5563
31
- gedcomx/place_reference.py,sha256=sEo_8PIEeuO0VzBCYhORhxvN4opuGk4RweOlPTjTqVY,2112
32
- gedcomx/qualifier.py,sha256=nM02EW8uEpZXUhejX9YTZk1sD0Why-_27RgUdJrZWkM,1778
33
- gedcomx/relationship.py,sha256=D2qsgUZ-s3HJWI8IlSahissFVyfa6-YhSb6P0QIReVk,4265
34
- gedcomx/resource.py,sha256=c1nID0mzDMNyq8vQWZmISLlGm765Ku_m6BJmPA9yanI,2674
35
- gedcomx/serialization.py,sha256=RvrIQkAJJNmIk6qHomEbikkuudk9ei9Tc_C17xr-GJs,34015
36
- gedcomx/source_citation.py,sha256=aW-lEb7bT9QU49GiBjJppFMBvtisR6fhVVuXjr5y4vQ,742
37
- gedcomx/source_description.py,sha256=9rrFVgFXIPF-M2suRQmJy_-e3uOak2kE_Bc3ZAtwTTs,14973
38
- gedcomx/source_reference.py,sha256=fvAvhi046d82gQ53v64moq9o11VulHQbZ1FPErjHDjw,5190
39
- gedcomx/subject.py,sha256=AQkC2fZOdEIXjIfoKIh7bI3heqkupeCo5j6pFAdrbms,3084
40
- gedcomx/textvalue.py,sha256=6B0wMxL0nigFNzhXZDhbTONvFGbnM2t2NcDZiZuu4Zw,1112
41
- gedcomx/translation.py,sha256=nCs6jAXHBg9qCePcCnY0S2uw-c8GfEn7IJhHHmDVe6s,61398
42
- gedcomx/uri.py,sha256=0k6NySntsZoyAeEkTthNM-fcd49_3hPDanaKXLaYYzA,4558
43
- gedcomx/Extensions/__init__.py,sha256=MQzi_whzlxiLiknUNh10hG8OVrNqJE38l6n-AwCssx8,24
44
- gedcomx/Extensions/rs10/__init__.py,sha256=nSHoZiD8hsCAyE-KyRTuWSLqSJSFh12kSz7hqilAMps,26
45
- gedcomx/Extensions/rs10/rsLink.py,sha256=yZhd-XIqERhBD4w1-VsIFJ7IeR09W0JYK8fAbkIR0zs,4353
46
- gedcomx/gedcom7/Exceptions.py,sha256=xeKr4x8b7r8pOqJ9yMpsCVTyxPeOlREDGgKoM5rX4U0,149
47
- gedcomx/gedcom7/Gedcom7.py,sha256=i_g9W0qsZQYLMoD2sBCA13ibKRYFnSf4uj9-ix_tE4Q,5614
48
- gedcomx/gedcom7/GedcomStructure.py,sha256=ZKNoEcXc41KdrCLPx-A8ohOU9VYmXAkFc4xuZExsBPw,3435
49
- gedcomx/gedcom7/Specification.py,sha256=qIBe9wzL1GB0l0NyetS1ncbhz5C44b9nMyjAxHuqMt8,9245
50
- gedcomx/gedcom7/__init__.py,sha256=8ELFZJ_j8RbRDKG9i6Sqb8s_CSxU9iM3bMN_0779rgI,679
51
- gedcomx/gedcom7/g7interop.py,sha256=hSzwqeok2n7xziEvN2QiJY7bVCWrOnZIZWXubnkrv7w,9945
52
- gedcomx/gedcom7/logger.py,sha256=QM1SySyh91UEhs90d2DMhH-s9qGF8XS8I8gr1eOcmfw,617
53
- gedcom_x-0.5.8.dist-info/METADATA,sha256=E7gYz4fVbIoNVtFf9Mce6i5f4Nxj1mtKrpBNJnDqrps,4332
54
- gedcom_x-0.5.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
55
- gedcom_x-0.5.8.dist-info/top_level.txt,sha256=smVBF4nxSU-mzCd6idtRYTbYjPICMMi8pTqewEmqF8Y,8
56
- gedcom_x-0.5.8.dist-info/RECORD,,
@@ -1,347 +0,0 @@
1
- import json
2
- from typing import Dict, Any
3
- import os
4
-
5
- def load_spec(file_path: str) -> Dict[str, Any]:
6
- """
7
- Load the JSON spec file into a Python dict.
8
-
9
- :param file_path: Path to your spec.json
10
- :return: A dict mapping each URI to its structure-definition dict.
11
- """
12
- with open(file_path, "r", encoding="utf-8") as f:
13
- return json.load(f)
14
-
15
- SPEC_PATH = os.path.join(os.path.dirname(__file__), "spec.json")
16
- structure_specs = load_spec(SPEC_PATH)
17
-
18
- def get_substructures(key: str) -> Dict[str, Any]:
19
- """
20
- Return the 'substructures' dict for the given key.
21
- """
22
- struct = structure_specs.get(key)
23
- if struct is None:
24
- return {}
25
- raise KeyError(f"No entry for key {key!r} in spec.json")
26
- return struct.get("substructures", {})
27
-
28
- def get_label(key: str) -> Dict[str, Any]:
29
- """
30
- Return the label for the given key.
31
- """
32
- struct = structure_specs.get(key)
33
- if struct is None:
34
- raise KeyError(f"No entry for key {key!r} in spec.json")
35
- return 'None'
36
-
37
- return struct.get("label", 'No Label')
38
-
39
- def match_uri(tag: str,parent):
40
- uri = None
41
- if tag.startswith("_"):
42
- uri = structure_specs.get(tag)
43
- elif parent:
44
- valid_substrutures = get_substructures(parent.uri)
45
- uri = valid_substrutures.get(tag)
46
- elif 'https://gedcom.io/terms/v7/record-' + tag in structure_specs.keys():
47
- uri = 'https://gedcom.io/terms/v7/record-' + tag
48
- elif 'https://gedcom.io/terms/v7/' + tag in structure_specs.keys():
49
- uri = 'https://gedcom.io/terms/v7/' + tag
50
- if uri == None:
51
- raise ValueError(f'Could not get uri for tag: {tag}, parent: {parent}')
52
- return uri
53
-
54
- '''
55
- MIT License
56
-
57
- Copyright (c) 2022 David Straub
58
-
59
- Permission is hereby granted, free of charge, to any person obtaining a copy
60
- of this software and associated documentation files (the "Software"), to deal
61
- in the Software without restriction, including without limitation the rights
62
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
63
- copies of the Software, and to permit persons to whom the Software is
64
- furnished to do so, subject to the following conditions:
65
-
66
- The above copyright notice and this permission notice shall be included in all
67
- copies or substantial portions of the Software.
68
-
69
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
70
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
71
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
72
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
73
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
74
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
75
- SOFTWARE.
76
- '''
77
- # TODO: https://github.com/DavidMStraub
78
-
79
- # GEDCOM 7 regex patterns thanks@DavidMStraub
80
-
81
- # --- Common primitives ---
82
- d = '\\ ' # GEDCOM delimiter (escaped space)
83
- integer = '[0-9]+' # One or more digits
84
- nonzero = '[1-9]' # Digits 1–9
85
-
86
- # --- Duration units ---
87
- years = f'{integer}y'
88
- months = f'{integer}m'
89
- weeks = f'{integer}w'
90
- days = f'{integer}d'
91
-
92
- # --- Age format ---
93
- agebound = '[<>]' # Optional boundary indicator (less than, greater than)
94
- ageduration = (
95
- f'((?P<years>{years})({d}(?P<months1>{months}))?({d}(?P<weeks1>{weeks}))?'
96
- f'({d}(?P<days1>{days}))?|(?P<months2>{months})({d}(?P<weeks2>{weeks}))?'
97
- f'({d}(?P<days2>{days}))?|(?P<weeks3>{weeks})({d}(?P<days3>{days}))?|'
98
- f'(?P<days4>{days}))'
99
- )
100
- age = f'((?P<agebound>{agebound}){d})?{ageduration}'
101
-
102
- # --- Tags and Enums ---
103
- underscore = '_'
104
- ucletter = '[A-Z]'
105
- tagchar = f'({ucletter}|[0-9]|{underscore})'
106
- exttag = f'{underscore}({tagchar})+'
107
- stdtag = f'{ucletter}({tagchar})*'
108
- tag = f'({stdtag}|{exttag})'
109
- enum = tag
110
-
111
- # --- Dates ---
112
- daterestrict = 'FROM|TO|BET|AND|BEF|AFT|ABT|CAL|EST'
113
- calendar = f'(?!{daterestrict})(GREGORIAN|JULIAN|FRENCH_R|HEBREW|{exttag})'
114
- day = integer
115
- month = f'(?!{daterestrict})({stdtag}|{exttag})'
116
- year = integer
117
- epoch = f'(?!{daterestrict})(BCE|{exttag})'
118
-
119
- date = f'({calendar}{d})?(({day}{d})?{month}{d})?{year}({d}{epoch})?'
120
-
121
- # --- Date variants with captures ---
122
- date_capture = (
123
- f'((?P<calendar>{calendar}){d})?(((?P<day>{day}){d})?'
124
- f'(?P<month>{month}){d})?(?P<year>{year})({d}(?P<epoch>{epoch}))?'
125
- )
126
-
127
- dateapprox = f'(?P<qualifier>ABT|CAL|EST){d}(?P<dateapprox>{date})'
128
- dateexact = f'(?P<day>{day}){d}(?P<month>{month}){d}(?P<year>{year})'
129
- dateperiod = f'((TO{d}(?P<todate1>{date}))?|FROM{d}(?P<fromdate>{date})({d}TO{d}(?P<todate2>{date}))?)'
130
- daterange = f'(BET{d}(?P<between>{date}){d}AND{d}(?P<and>{date})|AFT{d}(?P<after>{date})|BEF{d}(?P<before>{date}))'
131
- datevalue = f'({date}|{dateperiod}|{daterange}|{dateapprox})?'
132
-
133
- # --- Media types ---
134
- mt_char = "[ -!#-'*-+\\--.0-9A-Z^-~]"
135
- mt_token = f'({mt_char})+'
136
- mt_type = mt_token
137
- mt_subtype = mt_token
138
- mt_attribute = mt_token
139
- mt_qtext = '[\t-\n -!#-\\[\\]-~]'
140
- mt_qpair = '\\\\[\t-~]'
141
- mt_qstring = f'"({mt_qtext}|{mt_qpair})*"'
142
- mt_value = f'({mt_token}|{mt_qstring})'
143
- mt_parameter = f'{mt_attribute}={mt_value}'
144
- mediatype = f'{mt_type}/{mt_subtype}(;{mt_parameter})*'
145
-
146
- # --- Line structure (GEDCOM record lines) ---
147
- atsign = '@'
148
- xref = f'{atsign}({tagchar})+{atsign}'
149
- voidptr = '@VOID@'
150
- pointer = f'(?P<pointer>{voidptr}|{xref})'
151
- nonat = '[\t -?A-\\U0010ffff]'
152
- noneol = '[\t -\\U0010ffff]'
153
- linestr = f'(?P<linestr>({nonat}|{atsign}{atsign})({noneol})*)'
154
- lineval = f'({pointer}|{linestr})'
155
-
156
- level = f'(?P<level>0|{nonzero}[0-9]*)'
157
- eol = '(\\\r(\\\n)?|\\\n)'
158
- line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
159
-
160
- # --- List formats ---
161
- nocommasp = '[\t-\\x1d!-+\\--\\U0010ffff]'
162
- nocomma = '[\t-+\\--\\U0010ffff]'
163
- listitem = f'({nocommasp}|{nocommasp}({nocomma})*{nocommasp})?'
164
- listdelim = f'({d})*,({d})*'
165
- list = f'{listitem}({listdelim}{listitem})*'
166
- list_enum = f'{enum}({listdelim}{enum})*'
167
- list_text = list
168
-
169
- # --- Names ---
170
- namechar = '[ -.0-\\U0010ffff]'
171
- namestr = f'({namechar})+'
172
- personalname = f'({namestr}|({namestr})?/(?P<surname>{namestr})?/({namestr})?)'
173
-
174
- # --- Time format ---
175
- fraction = '[0-9]+'
176
- second = '[012345][0-9]'
177
- minute = '[012345][0-9]'
178
- hour = '([0-9]|[01][0-9]|2[0123])'
179
- time = f'(?P<hour>{hour}):(?P<minute>{minute})(:(?P<second>{second})(\\.(?P<fraction>{fraction}))?)?(?P<tz>Z)?'
180
-
181
- # --- Text and special ---
182
- anychar = '[\t-\\U0010ffff]'
183
- text = f'({anychar})*'
184
- special = text
185
-
186
- # --- Boolean ---
187
- boolean = 'Y'
188
-
189
- # --- Banned Unicode Ranges ---
190
- '''
191
- banned = %x00-08 / %x0B-0C / %x0E-1F ; C0 other than LF CR and Tab
192
- / %x7F ; DEL
193
- / %x80-9F ; C1
194
- / %xD800-DFFF ; Surrogates
195
- / %xFFFE-FFFF ; invalid
196
- ; All other rules assume the absence of any banned characters
197
- '''
198
- banned = (
199
- '[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f\\x7f\\x80-\\x9f\\ud800-\\udfff'
200
- '\\ufffe-\\uffff]'
201
- )
202
-
203
-
204
-
205
-
206
- # TAGS
207
- CONT = "CONT"
208
- HEAD = "HEAD"
209
- ABBR = "ABBR"
210
- ADDR = "ADDR"
211
- ADOP = "ADOP"
212
- ADR1 = "ADR1"
213
- ADR2 = "ADR2"
214
- ADR3 = "ADR3"
215
- AGE = "AGE"
216
- AGNC = "AGNC"
217
- ALIA = "ALIA"
218
- ANCI = "ANCI"
219
- ANUL = "ANUL"
220
- ASSO = "ASSO"
221
- AUTH = "AUTH"
222
- BAPL = "BAPL"
223
- BAPM = "BAPM"
224
- BARM = "BARM"
225
- BASM = "BASM"
226
- BIRT = "BIRT"
227
- BLES = "BLES"
228
- BURI = "BURI"
229
- CALN = "CALN"
230
- CAST = "CAST"
231
- CAUS = "CAUS"
232
- CENS = "CENS"
233
- CHAN = "CHAN"
234
- CHIL = "CHIL"
235
- CHR = "CHR"
236
- CHRA = "CHRA"
237
- CITY = "CITY"
238
- CONF = "CONF"
239
- CONL = "CONL"
240
- COPR = "COPR"
241
- CORP = "CORP"
242
- CREA = "CREA"
243
- CREM = "CREM"
244
- CROP = "CROP"
245
- CTRY = "CTRY"
246
- DATA = "DATA"
247
- DATE = "DATE"
248
- DEAT = "DEAT"
249
- DESI = "DESI"
250
- DEST = "DEST"
251
- DIV = "DIV"
252
- DIVF = "DIVF"
253
- DSCR = "DSCR"
254
- EDUC = "EDUC"
255
- EMAIL = "EMAIL"
256
- EMIG = "EMIG"
257
- ENDL = "ENDL"
258
- ENGA = "ENGA"
259
- EVEN = "EVEN"
260
- EXID = "EXID"
261
- FACT = "FACT"
262
- FAM = "FAM"
263
- FAMC = "FAMC"
264
- FAMS = "FAMS"
265
- FAX = "FAX"
266
- FCOM = "FCOM"
267
- FILE = "FILE"
268
- FORM = "FORM"
269
- GEDC = "GEDC"
270
- GIVN = "GIVN"
271
- GRAD = "GRAD"
272
- HEIGHT = "HEIGHT"
273
- HUSB = "HUSB"
274
- IDNO = "IDNO"
275
- IMMI = "IMMI"
276
- INDI = "INDI"
277
- INIL = "INIL"
278
- LANG = "LANG"
279
- LATI = "LATI"
280
- LEFT = "LEFT"
281
- LONG = "LONG"
282
- MAP = "MAP"
283
- MARB = "MARB"
284
- MARC = "MARC"
285
- MARL = "MARL"
286
- MARR = "MARR"
287
- MARS = "MARS"
288
- MEDI = "MEDI"
289
- MIME = "MIME"
290
- NAME = "NAME"
291
- NATI = "NATI"
292
- NATU = "NATU"
293
- NCHI = "NCHI"
294
- NICK = "NICK"
295
- NMR = "NMR"
296
- NO = "NO"
297
- NOTE = "NOTE"
298
- NPFX = "NPFX"
299
- NSFX = "NSFX"
300
- OBJE = "OBJE"
301
- OCCU = "OCCU"
302
- ORDN = "ORDN"
303
- PAGE = "PAGE"
304
- PEDI = "PEDI"
305
- PHON = "PHON"
306
- PHRASE = "PHRASE"
307
- PLAC = "PLAC"
308
- POST = "POST"
309
- PROB = "PROB"
310
- PROP = "PROP"
311
- PUBL = "PUBL"
312
- QUAY = "QUAY"
313
- REFN = "REFN"
314
- RELI = "RELI"
315
- REPO = "REPO"
316
- RESI = "RESI"
317
- RESN = "RESN"
318
- RETI = "RETI"
319
- ROLE = "ROLE"
320
- SCHMA = "SCHMA"
321
- SDATE = "SDATE"
322
- SEX = "SEX"
323
- SLGC = "SLGC"
324
- SLGS = "SLGS"
325
- SNOTE = "SNOTE"
326
- SOUR = "SOUR"
327
- SPFX = "SPFX"
328
- SSN = "SSN"
329
- STAE = "STAE"
330
- STAT = "STAT"
331
- SUBM = "SUBM"
332
- SURN = "SURN"
333
- TAG = "TAG"
334
- TEMP = "TEMP"
335
- TEXT = "TEXT"
336
- TIME = "TIME"
337
- TITL = "TITL"
338
- TOP = "TOP"
339
- TRAN = "TRAN"
340
- TRLR = "TRLR"
341
- TYPE = "TYPE"
342
- UID = "UID"
343
- VERS = "VERS"
344
- WIDTH = "WIDTH"
345
- WIFE = "WIFE"
346
- WILL = "WILL"
347
- WWW = "WWW"