gedcom-x 0.5.9__py3-none-any.whl → 0.5.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gedcom_x-0.5.9.dist-info → gedcom_x-0.5.10.dist-info}/METADATA +1 -1
- {gedcom_x-0.5.9.dist-info → gedcom_x-0.5.10.dist-info}/RECORD +13 -11
- gedcomx/__init__.py +3 -0
- gedcomx/conclusion.py +1 -1
- gedcomx/extensible.py +86 -0
- gedcomx/gedcom7/GedcomStructure.py +1 -3
- gedcomx/gedcom7/__init__.py +1 -1
- gedcomx/gedcom7/gedcom7.py +3 -3
- gedcomx/gedcom7/specification.py +4817 -0
- gedcomx/person.py +2 -1
- gedcomx/schemas.py +328 -0
- gedcomx/gedcom7/Specification.py +0 -347
- {gedcom_x-0.5.9.dist-info → gedcom_x-0.5.10.dist-info}/WHEEL +0 -0
- {gedcom_x-0.5.9.dist-info → gedcom_x-0.5.10.dist-info}/top_level.txt +0 -0
gedcomx/person.py
CHANGED
@@ -26,6 +26,7 @@ from .conclusion import ConfidenceLevel
|
|
26
26
|
from .date import Date
|
27
27
|
from .evidence_reference import EvidenceReference
|
28
28
|
from .Extensions.rs10.rsLink import _rsLinks
|
29
|
+
from .extensible import Extensible
|
29
30
|
from .fact import Fact, FactType
|
30
31
|
from .gender import Gender, GenderType
|
31
32
|
from .identifier import IdentifierList
|
@@ -48,7 +49,7 @@ deserial_log = "degedcomx.serialization"
|
|
48
49
|
|
49
50
|
|
50
51
|
|
51
|
-
class Person(Subject):
|
52
|
+
class Person(Extensible,Subject):
|
52
53
|
"""A person in the system.
|
53
54
|
|
54
55
|
Args:
|
gedcomx/schemas.py
ADDED
@@ -0,0 +1,328 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
|
4
|
+
|
5
|
+
class Schema:
|
6
|
+
def __init__(self) -> None:
|
7
|
+
pass
|
8
|
+
|
9
|
+
def _init_schema(self):
|
10
|
+
from .address import Address
|
11
|
+
from .agent import Agent
|
12
|
+
from .attribution import Attribution
|
13
|
+
from .conclusion import ConfidenceLevel
|
14
|
+
from .date import Date
|
15
|
+
from .document import Document, DocumentType, TextType
|
16
|
+
from .evidence_reference import EvidenceReference
|
17
|
+
from .event import Event, EventType, EventRole, EventRoleType
|
18
|
+
from .Extensions.rs10.rsLink import _rsLinks, rsLink
|
19
|
+
from .fact import Fact, FactType, FactQualifier
|
20
|
+
from .gender import Gender, GenderType
|
21
|
+
from .identifier import IdentifierList, Identifier
|
22
|
+
from .logging_hub import hub, ChannelConfig
|
23
|
+
from .name import Name, NameType, NameForm, NamePart, NamePartType, NamePartQualifier
|
24
|
+
from .note import Note
|
25
|
+
from .online_account import OnlineAccount
|
26
|
+
from .person import Person
|
27
|
+
from .place_description import PlaceDescription
|
28
|
+
from .place_reference import PlaceReference
|
29
|
+
from .qualifier import Qualifier
|
30
|
+
from .relationship import Relationship, RelationshipType
|
31
|
+
from .resource import Resource
|
32
|
+
from .source_description import SourceDescription, ResourceType, SourceCitation, Coverage
|
33
|
+
from .source_reference import SourceReference
|
34
|
+
from .textvalue import TextValue
|
35
|
+
from .uri import URI
|
36
|
+
|
37
|
+
self.field_type_table ={
|
38
|
+
"Agent": {
|
39
|
+
"id": str,
|
40
|
+
"identifiers": IdentifierList,
|
41
|
+
"names": List[TextValue],
|
42
|
+
"homepage": URI,
|
43
|
+
"openid": URI,
|
44
|
+
"accounts": List[OnlineAccount],
|
45
|
+
"emails": List[URI],
|
46
|
+
"phones": List[URI],
|
47
|
+
"addresses": List[Address],
|
48
|
+
"person": object | Resource, # intended Person | Resource
|
49
|
+
"attribution": object, # GEDCOM5/7 compatibility
|
50
|
+
"uri": URI | Resource,
|
51
|
+
},
|
52
|
+
"Attribution": {
|
53
|
+
"contributor": Resource,
|
54
|
+
"modified": str,
|
55
|
+
"changeMessage": str,
|
56
|
+
"creator": Resource,
|
57
|
+
"created": str,
|
58
|
+
},
|
59
|
+
"Conclusion": {
|
60
|
+
"id": str,
|
61
|
+
"lang": str,
|
62
|
+
"sources": List["SourceReference"],
|
63
|
+
"analysis": Document | Resource,
|
64
|
+
"notes": List[Note],
|
65
|
+
"confidence": ConfidenceLevel,
|
66
|
+
"attribution": Attribution,
|
67
|
+
"uri": "Resource",
|
68
|
+
"max_note_count": int,
|
69
|
+
"links": _rsLinks,
|
70
|
+
},
|
71
|
+
"Date": {
|
72
|
+
"original": str,
|
73
|
+
"formal": str,
|
74
|
+
"normalized": str,
|
75
|
+
},
|
76
|
+
"Document": {
|
77
|
+
"id": str,
|
78
|
+
"lang": str,
|
79
|
+
"sources": List[SourceReference],
|
80
|
+
"analysis": Resource,
|
81
|
+
"notes": List[Note],
|
82
|
+
"confidence": ConfidenceLevel,
|
83
|
+
"attribution": Attribution,
|
84
|
+
"type": DocumentType,
|
85
|
+
"extracted": bool,
|
86
|
+
"textType": TextType,
|
87
|
+
"text": str,
|
88
|
+
},
|
89
|
+
"Event": {
|
90
|
+
"id": str,
|
91
|
+
"lang": str,
|
92
|
+
"sources": List[SourceReference],
|
93
|
+
"analysis": Resource,
|
94
|
+
"notes": List[Note],
|
95
|
+
"confidence": ConfidenceLevel,
|
96
|
+
"attribution": Attribution,
|
97
|
+
"extracted": bool,
|
98
|
+
"evidence": List[EvidenceReference],
|
99
|
+
"media": List[SourceReference],
|
100
|
+
"identifiers": List[Identifier],
|
101
|
+
"type": EventType,
|
102
|
+
"date": Date,
|
103
|
+
"place": PlaceReference,
|
104
|
+
"roles": List[EventRole],
|
105
|
+
},
|
106
|
+
"EventRole": {
|
107
|
+
"id:": str,
|
108
|
+
"lang": str,
|
109
|
+
"sources": List[SourceReference],
|
110
|
+
"analysis": Resource,
|
111
|
+
"notes": List[Note],
|
112
|
+
"confidence": ConfidenceLevel,
|
113
|
+
"attribution": Attribution,
|
114
|
+
"person": Resource,
|
115
|
+
"type": EventRoleType,
|
116
|
+
"details": str,
|
117
|
+
},
|
118
|
+
"Fact": {
|
119
|
+
"id": str,
|
120
|
+
"lang": str,
|
121
|
+
"sources": List[SourceReference],
|
122
|
+
"analysis": Resource | Document,
|
123
|
+
"notes": List[Note],
|
124
|
+
"confidence": ConfidenceLevel,
|
125
|
+
"attribution": Attribution,
|
126
|
+
"type": FactType,
|
127
|
+
"date": Date,
|
128
|
+
"place": PlaceReference,
|
129
|
+
"value": str,
|
130
|
+
"qualifiers": List[FactQualifier],
|
131
|
+
"links": _rsLinks,
|
132
|
+
},
|
133
|
+
"GedcomX": {
|
134
|
+
"persons": List[Person],
|
135
|
+
"relationships": List[Relationship],
|
136
|
+
"sourceDescriptions": List[SourceDescription],
|
137
|
+
"agents": List[Agent],
|
138
|
+
"places": List[PlaceDescription]
|
139
|
+
},
|
140
|
+
"Gender": {
|
141
|
+
"id": str,
|
142
|
+
"lang": str,
|
143
|
+
"sources": List[SourceReference],
|
144
|
+
"analysis": Resource,
|
145
|
+
"notes": List[Note],
|
146
|
+
"confidence": ConfidenceLevel,
|
147
|
+
"attribution": Attribution,
|
148
|
+
"type": GenderType,
|
149
|
+
},
|
150
|
+
"KnownSourceReference": {
|
151
|
+
"name": str,
|
152
|
+
"value": str,
|
153
|
+
},
|
154
|
+
"Name": {
|
155
|
+
"id": str,
|
156
|
+
"lang": str,
|
157
|
+
"sources": List[SourceReference],
|
158
|
+
"analysis": Resource,
|
159
|
+
"notes": List[Note],
|
160
|
+
"confidence": ConfidenceLevel,
|
161
|
+
"attribution": Attribution,
|
162
|
+
"type": NameType,
|
163
|
+
"nameForms": List[NameForm], # use string to avoid circulars if needed
|
164
|
+
"date": Date,
|
165
|
+
},
|
166
|
+
"NameForm": {
|
167
|
+
"lang": str,
|
168
|
+
"fullText": str,
|
169
|
+
"parts": List[NamePart], # use "NamePart" as a forward-ref to avoid circulars
|
170
|
+
},
|
171
|
+
"NamePart": {
|
172
|
+
"type": NamePartType,
|
173
|
+
"value": str,
|
174
|
+
"qualifiers": List["NamePartQualifier"], # quote if you want to avoid circulars
|
175
|
+
},
|
176
|
+
"Note":{"lang":str,
|
177
|
+
"subject":str,
|
178
|
+
"text":str,
|
179
|
+
"attribution": Attribution},
|
180
|
+
"Person": {
|
181
|
+
"id": str,
|
182
|
+
"lang": str,
|
183
|
+
"sources": List[SourceReference],
|
184
|
+
"analysis": Resource,
|
185
|
+
"notes": List[Note],
|
186
|
+
"confidence": ConfidenceLevel,
|
187
|
+
"attribution": Attribution,
|
188
|
+
"extracted": bool,
|
189
|
+
"evidence": List[EvidenceReference],
|
190
|
+
"media": List[SourceReference],
|
191
|
+
"identifiers": IdentifierList,
|
192
|
+
"private": bool,
|
193
|
+
"gender": Gender,
|
194
|
+
"names": List[Name],
|
195
|
+
"facts": List[Fact],
|
196
|
+
"living": bool,
|
197
|
+
"links": _rsLinks,
|
198
|
+
},
|
199
|
+
"PlaceDescription": {
|
200
|
+
"id": str,
|
201
|
+
"lang": str,
|
202
|
+
"sources": List[SourceReference],
|
203
|
+
"analysis": Resource,
|
204
|
+
"notes": List[Note],
|
205
|
+
"confidence": ConfidenceLevel,
|
206
|
+
"attribution": Attribution,
|
207
|
+
"extracted": bool,
|
208
|
+
"evidence": List[EvidenceReference],
|
209
|
+
"media": List[SourceReference],
|
210
|
+
"identifiers": List[IdentifierList],
|
211
|
+
"names": List[TextValue],
|
212
|
+
"type": str,
|
213
|
+
"place": URI,
|
214
|
+
"jurisdiction": Resource,
|
215
|
+
"latitude": float,
|
216
|
+
"longitude": float,
|
217
|
+
"temporalDescription": Date,
|
218
|
+
"spatialDescription": Resource,
|
219
|
+
},
|
220
|
+
"PlaceReference": {
|
221
|
+
"original": str,
|
222
|
+
"description": URI,
|
223
|
+
},
|
224
|
+
"Qualifier": {
|
225
|
+
"name": str,
|
226
|
+
"value": str,
|
227
|
+
},
|
228
|
+
"_rsLinks": {
|
229
|
+
"person":rsLink,
|
230
|
+
"portrait":rsLink},
|
231
|
+
"rsLink": {
|
232
|
+
"href": URI,
|
233
|
+
"template": str,
|
234
|
+
"type": str,
|
235
|
+
"accept": str,
|
236
|
+
"allow": str,
|
237
|
+
"hreflang": str,
|
238
|
+
"title": str,
|
239
|
+
},
|
240
|
+
"Relationship": {
|
241
|
+
"id": str,
|
242
|
+
"lang": str,
|
243
|
+
"sources": List[SourceReference],
|
244
|
+
"analysis": Resource,
|
245
|
+
"notes": List[Note],
|
246
|
+
"confidence": ConfidenceLevel,
|
247
|
+
"attribution": Attribution,
|
248
|
+
"extracted": bool,
|
249
|
+
"evidence": List[EvidenceReference],
|
250
|
+
"media": List[SourceReference],
|
251
|
+
"identifiers": IdentifierList,
|
252
|
+
"type": RelationshipType,
|
253
|
+
"person1": Resource,
|
254
|
+
"person2": Resource,
|
255
|
+
"facts": List[Fact],
|
256
|
+
},
|
257
|
+
"Resource": {
|
258
|
+
"resource": str,
|
259
|
+
"resourceId": str,
|
260
|
+
},
|
261
|
+
"SourceDescription": {
|
262
|
+
"id": str,
|
263
|
+
"resourceType": ResourceType,
|
264
|
+
"citations": List[SourceCitation],
|
265
|
+
"mediaType": str,
|
266
|
+
"about": URI,
|
267
|
+
"mediator": Resource,
|
268
|
+
"publisher": Resource, # forward-ref to avoid circular import
|
269
|
+
"authors": List[Resource],
|
270
|
+
"sources": List[SourceReference], # SourceReference
|
271
|
+
"analysis": Resource, # analysis is typically a Document (kept union to avoid cycle)
|
272
|
+
"componentOf": SourceReference, # SourceReference
|
273
|
+
"titles": List[TextValue],
|
274
|
+
"notes": List[Note],
|
275
|
+
"attribution": Attribution,
|
276
|
+
"rights": List[Resource],
|
277
|
+
"coverage": List[Coverage], # Coverage
|
278
|
+
"descriptions": List[TextValue],
|
279
|
+
"identifiers": IdentifierList,
|
280
|
+
"created": Date,
|
281
|
+
"modified": Date,
|
282
|
+
"published": Date,
|
283
|
+
"repository": Agent, # forward-ref
|
284
|
+
"max_note_count": int,
|
285
|
+
},
|
286
|
+
"SourceReference": {
|
287
|
+
"description": Resource,
|
288
|
+
"descriptionId": str,
|
289
|
+
"attribution": Attribution,
|
290
|
+
"qualifiers": List[Qualifier],
|
291
|
+
},
|
292
|
+
"Subject": {
|
293
|
+
"id": str,
|
294
|
+
"lang": str,
|
295
|
+
"sources": List["SourceReference"],
|
296
|
+
"analysis": Resource,
|
297
|
+
"notes": List["Note"],
|
298
|
+
"confidence": ConfidenceLevel,
|
299
|
+
"attribution": Attribution,
|
300
|
+
"extracted": bool,
|
301
|
+
"evidence": List[EvidenceReference],
|
302
|
+
"media": List[SourceReference],
|
303
|
+
"identifiers": IdentifierList,
|
304
|
+
"uri": Resource,
|
305
|
+
"links": _rsLinks,
|
306
|
+
},
|
307
|
+
"TextValue":{"lang":str,"value":str},
|
308
|
+
"URI": {
|
309
|
+
"value": str,
|
310
|
+
},
|
311
|
+
|
312
|
+
}
|
313
|
+
|
314
|
+
def register_extra(self, cls, name, typ ):
|
315
|
+
print("Adding...",cls,name,typ)
|
316
|
+
if cls.__name__ not in self.field_type_table.keys():
|
317
|
+
print("A")
|
318
|
+
self.field_type_table[cls.__name__] = {name:typ}
|
319
|
+
else:
|
320
|
+
if name in self.field_type_table[cls.__name__].keys():
|
321
|
+
print("B")
|
322
|
+
raise ValueError
|
323
|
+
else:
|
324
|
+
self.field_type_table[cls.__name__][name] = typ
|
325
|
+
print("C")
|
326
|
+
|
327
|
+
SCHEMA = Schema()
|
328
|
+
SCHEMA._init_schema()
|
gedcomx/gedcom7/Specification.py
DELETED
@@ -1,347 +0,0 @@
|
|
1
|
-
import json
|
2
|
-
from typing import Dict, Any
|
3
|
-
import os
|
4
|
-
|
5
|
-
def load_spec(file_path: str) -> Dict[str, Any]:
|
6
|
-
"""
|
7
|
-
Load the JSON spec file into a Python dict.
|
8
|
-
|
9
|
-
:param file_path: Path to your spec.json
|
10
|
-
:return: A dict mapping each URI to its structure-definition dict.
|
11
|
-
"""
|
12
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
13
|
-
return json.load(f)
|
14
|
-
|
15
|
-
SPEC_PATH = os.path.join(os.path.dirname(__file__), "spec.json")
|
16
|
-
structure_specs = load_spec(SPEC_PATH)
|
17
|
-
|
18
|
-
def get_substructures(key: str) -> Dict[str, Any]:
|
19
|
-
"""
|
20
|
-
Return the 'substructures' dict for the given key.
|
21
|
-
"""
|
22
|
-
struct = structure_specs.get(key)
|
23
|
-
if struct is None:
|
24
|
-
return {}
|
25
|
-
raise KeyError(f"No entry for key {key!r} in spec.json")
|
26
|
-
return struct.get("substructures", {})
|
27
|
-
|
28
|
-
def get_label(key: str) -> Dict[str, Any]:
|
29
|
-
"""
|
30
|
-
Return the label for the given key.
|
31
|
-
"""
|
32
|
-
struct = structure_specs.get(key)
|
33
|
-
if struct is None:
|
34
|
-
raise KeyError(f"No entry for key {key!r} in spec.json")
|
35
|
-
return 'None'
|
36
|
-
|
37
|
-
return struct.get("label", 'No Label')
|
38
|
-
|
39
|
-
def match_uri(tag: str,parent):
|
40
|
-
uri = None
|
41
|
-
if tag.startswith("_"):
|
42
|
-
uri = structure_specs.get(tag)
|
43
|
-
elif parent:
|
44
|
-
valid_substrutures = get_substructures(parent.uri)
|
45
|
-
uri = valid_substrutures.get(tag)
|
46
|
-
elif 'https://gedcom.io/terms/v7/record-' + tag in structure_specs.keys():
|
47
|
-
uri = 'https://gedcom.io/terms/v7/record-' + tag
|
48
|
-
elif 'https://gedcom.io/terms/v7/' + tag in structure_specs.keys():
|
49
|
-
uri = 'https://gedcom.io/terms/v7/' + tag
|
50
|
-
if uri == None:
|
51
|
-
raise ValueError(f'Could not get uri for tag: {tag}, parent: {parent}')
|
52
|
-
return uri
|
53
|
-
|
54
|
-
'''
|
55
|
-
MIT License
|
56
|
-
|
57
|
-
Copyright (c) 2022 David Straub
|
58
|
-
|
59
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
60
|
-
of this software and associated documentation files (the "Software"), to deal
|
61
|
-
in the Software without restriction, including without limitation the rights
|
62
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
63
|
-
copies of the Software, and to permit persons to whom the Software is
|
64
|
-
furnished to do so, subject to the following conditions:
|
65
|
-
|
66
|
-
The above copyright notice and this permission notice shall be included in all
|
67
|
-
copies or substantial portions of the Software.
|
68
|
-
|
69
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
70
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
71
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
72
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
73
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
74
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
75
|
-
SOFTWARE.
|
76
|
-
'''
|
77
|
-
# TODO: https://github.com/DavidMStraub
|
78
|
-
|
79
|
-
# GEDCOM 7 regex patterns thanks@DavidMStraub
|
80
|
-
|
81
|
-
# --- Common primitives ---
|
82
|
-
d = '\\ ' # GEDCOM delimiter (escaped space)
|
83
|
-
integer = '[0-9]+' # One or more digits
|
84
|
-
nonzero = '[1-9]' # Digits 1–9
|
85
|
-
|
86
|
-
# --- Duration units ---
|
87
|
-
years = f'{integer}y'
|
88
|
-
months = f'{integer}m'
|
89
|
-
weeks = f'{integer}w'
|
90
|
-
days = f'{integer}d'
|
91
|
-
|
92
|
-
# --- Age format ---
|
93
|
-
agebound = '[<>]' # Optional boundary indicator (less than, greater than)
|
94
|
-
ageduration = (
|
95
|
-
f'((?P<years>{years})({d}(?P<months1>{months}))?({d}(?P<weeks1>{weeks}))?'
|
96
|
-
f'({d}(?P<days1>{days}))?|(?P<months2>{months})({d}(?P<weeks2>{weeks}))?'
|
97
|
-
f'({d}(?P<days2>{days}))?|(?P<weeks3>{weeks})({d}(?P<days3>{days}))?|'
|
98
|
-
f'(?P<days4>{days}))'
|
99
|
-
)
|
100
|
-
age = f'((?P<agebound>{agebound}){d})?{ageduration}'
|
101
|
-
|
102
|
-
# --- Tags and Enums ---
|
103
|
-
underscore = '_'
|
104
|
-
ucletter = '[A-Z]'
|
105
|
-
tagchar = f'({ucletter}|[0-9]|{underscore})'
|
106
|
-
exttag = f'{underscore}({tagchar})+'
|
107
|
-
stdtag = f'{ucletter}({tagchar})*'
|
108
|
-
tag = f'({stdtag}|{exttag})'
|
109
|
-
enum = tag
|
110
|
-
|
111
|
-
# --- Dates ---
|
112
|
-
daterestrict = 'FROM|TO|BET|AND|BEF|AFT|ABT|CAL|EST'
|
113
|
-
calendar = f'(?!{daterestrict})(GREGORIAN|JULIAN|FRENCH_R|HEBREW|{exttag})'
|
114
|
-
day = integer
|
115
|
-
month = f'(?!{daterestrict})({stdtag}|{exttag})'
|
116
|
-
year = integer
|
117
|
-
epoch = f'(?!{daterestrict})(BCE|{exttag})'
|
118
|
-
|
119
|
-
date = f'({calendar}{d})?(({day}{d})?{month}{d})?{year}({d}{epoch})?'
|
120
|
-
|
121
|
-
# --- Date variants with captures ---
|
122
|
-
date_capture = (
|
123
|
-
f'((?P<calendar>{calendar}){d})?(((?P<day>{day}){d})?'
|
124
|
-
f'(?P<month>{month}){d})?(?P<year>{year})({d}(?P<epoch>{epoch}))?'
|
125
|
-
)
|
126
|
-
|
127
|
-
dateapprox = f'(?P<qualifier>ABT|CAL|EST){d}(?P<dateapprox>{date})'
|
128
|
-
dateexact = f'(?P<day>{day}){d}(?P<month>{month}){d}(?P<year>{year})'
|
129
|
-
dateperiod = f'((TO{d}(?P<todate1>{date}))?|FROM{d}(?P<fromdate>{date})({d}TO{d}(?P<todate2>{date}))?)'
|
130
|
-
daterange = f'(BET{d}(?P<between>{date}){d}AND{d}(?P<and>{date})|AFT{d}(?P<after>{date})|BEF{d}(?P<before>{date}))'
|
131
|
-
datevalue = f'({date}|{dateperiod}|{daterange}|{dateapprox})?'
|
132
|
-
|
133
|
-
# --- Media types ---
|
134
|
-
mt_char = "[ -!#-'*-+\\--.0-9A-Z^-~]"
|
135
|
-
mt_token = f'({mt_char})+'
|
136
|
-
mt_type = mt_token
|
137
|
-
mt_subtype = mt_token
|
138
|
-
mt_attribute = mt_token
|
139
|
-
mt_qtext = '[\t-\n -!#-\\[\\]-~]'
|
140
|
-
mt_qpair = '\\\\[\t-~]'
|
141
|
-
mt_qstring = f'"({mt_qtext}|{mt_qpair})*"'
|
142
|
-
mt_value = f'({mt_token}|{mt_qstring})'
|
143
|
-
mt_parameter = f'{mt_attribute}={mt_value}'
|
144
|
-
mediatype = f'{mt_type}/{mt_subtype}(;{mt_parameter})*'
|
145
|
-
|
146
|
-
# --- Line structure (GEDCOM record lines) ---
|
147
|
-
atsign = '@'
|
148
|
-
xref = f'{atsign}({tagchar})+{atsign}'
|
149
|
-
voidptr = '@VOID@'
|
150
|
-
pointer = f'(?P<pointer>{voidptr}|{xref})'
|
151
|
-
nonat = '[\t -?A-\\U0010ffff]'
|
152
|
-
noneol = '[\t -\\U0010ffff]'
|
153
|
-
linestr = f'(?P<linestr>({nonat}|{atsign}{atsign})({noneol})*)'
|
154
|
-
lineval = f'({pointer}|{linestr})'
|
155
|
-
|
156
|
-
level = f'(?P<level>0|{nonzero}[0-9]*)'
|
157
|
-
eol = '(\\\r(\\\n)?|\\\n)'
|
158
|
-
line = f'{level}{d}((?P<xref>{xref}){d})?(?P<tag>{tag})({d}{lineval})?{eol}'
|
159
|
-
|
160
|
-
# --- List formats ---
|
161
|
-
nocommasp = '[\t-\\x1d!-+\\--\\U0010ffff]'
|
162
|
-
nocomma = '[\t-+\\--\\U0010ffff]'
|
163
|
-
listitem = f'({nocommasp}|{nocommasp}({nocomma})*{nocommasp})?'
|
164
|
-
listdelim = f'({d})*,({d})*'
|
165
|
-
list = f'{listitem}({listdelim}{listitem})*'
|
166
|
-
list_enum = f'{enum}({listdelim}{enum})*'
|
167
|
-
list_text = list
|
168
|
-
|
169
|
-
# --- Names ---
|
170
|
-
namechar = '[ -.0-\\U0010ffff]'
|
171
|
-
namestr = f'({namechar})+'
|
172
|
-
personalname = f'({namestr}|({namestr})?/(?P<surname>{namestr})?/({namestr})?)'
|
173
|
-
|
174
|
-
# --- Time format ---
|
175
|
-
fraction = '[0-9]+'
|
176
|
-
second = '[012345][0-9]'
|
177
|
-
minute = '[012345][0-9]'
|
178
|
-
hour = '([0-9]|[01][0-9]|2[0123])'
|
179
|
-
time = f'(?P<hour>{hour}):(?P<minute>{minute})(:(?P<second>{second})(\\.(?P<fraction>{fraction}))?)?(?P<tz>Z)?'
|
180
|
-
|
181
|
-
# --- Text and special ---
|
182
|
-
anychar = '[\t-\\U0010ffff]'
|
183
|
-
text = f'({anychar})*'
|
184
|
-
special = text
|
185
|
-
|
186
|
-
# --- Boolean ---
|
187
|
-
boolean = 'Y'
|
188
|
-
|
189
|
-
# --- Banned Unicode Ranges ---
|
190
|
-
'''
|
191
|
-
banned = %x00-08 / %x0B-0C / %x0E-1F ; C0 other than LF CR and Tab
|
192
|
-
/ %x7F ; DEL
|
193
|
-
/ %x80-9F ; C1
|
194
|
-
/ %xD800-DFFF ; Surrogates
|
195
|
-
/ %xFFFE-FFFF ; invalid
|
196
|
-
; All other rules assume the absence of any banned characters
|
197
|
-
'''
|
198
|
-
banned = (
|
199
|
-
'[\\x00-\\x08\\x0b-\\x0c\\x0e-\\x1f\\x7f\\x80-\\x9f\\ud800-\\udfff'
|
200
|
-
'\\ufffe-\\uffff]'
|
201
|
-
)
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
# TAGS
|
207
|
-
CONT = "CONT"
|
208
|
-
HEAD = "HEAD"
|
209
|
-
ABBR = "ABBR"
|
210
|
-
ADDR = "ADDR"
|
211
|
-
ADOP = "ADOP"
|
212
|
-
ADR1 = "ADR1"
|
213
|
-
ADR2 = "ADR2"
|
214
|
-
ADR3 = "ADR3"
|
215
|
-
AGE = "AGE"
|
216
|
-
AGNC = "AGNC"
|
217
|
-
ALIA = "ALIA"
|
218
|
-
ANCI = "ANCI"
|
219
|
-
ANUL = "ANUL"
|
220
|
-
ASSO = "ASSO"
|
221
|
-
AUTH = "AUTH"
|
222
|
-
BAPL = "BAPL"
|
223
|
-
BAPM = "BAPM"
|
224
|
-
BARM = "BARM"
|
225
|
-
BASM = "BASM"
|
226
|
-
BIRT = "BIRT"
|
227
|
-
BLES = "BLES"
|
228
|
-
BURI = "BURI"
|
229
|
-
CALN = "CALN"
|
230
|
-
CAST = "CAST"
|
231
|
-
CAUS = "CAUS"
|
232
|
-
CENS = "CENS"
|
233
|
-
CHAN = "CHAN"
|
234
|
-
CHIL = "CHIL"
|
235
|
-
CHR = "CHR"
|
236
|
-
CHRA = "CHRA"
|
237
|
-
CITY = "CITY"
|
238
|
-
CONF = "CONF"
|
239
|
-
CONL = "CONL"
|
240
|
-
COPR = "COPR"
|
241
|
-
CORP = "CORP"
|
242
|
-
CREA = "CREA"
|
243
|
-
CREM = "CREM"
|
244
|
-
CROP = "CROP"
|
245
|
-
CTRY = "CTRY"
|
246
|
-
DATA = "DATA"
|
247
|
-
DATE = "DATE"
|
248
|
-
DEAT = "DEAT"
|
249
|
-
DESI = "DESI"
|
250
|
-
DEST = "DEST"
|
251
|
-
DIV = "DIV"
|
252
|
-
DIVF = "DIVF"
|
253
|
-
DSCR = "DSCR"
|
254
|
-
EDUC = "EDUC"
|
255
|
-
EMAIL = "EMAIL"
|
256
|
-
EMIG = "EMIG"
|
257
|
-
ENDL = "ENDL"
|
258
|
-
ENGA = "ENGA"
|
259
|
-
EVEN = "EVEN"
|
260
|
-
EXID = "EXID"
|
261
|
-
FACT = "FACT"
|
262
|
-
FAM = "FAM"
|
263
|
-
FAMC = "FAMC"
|
264
|
-
FAMS = "FAMS"
|
265
|
-
FAX = "FAX"
|
266
|
-
FCOM = "FCOM"
|
267
|
-
FILE = "FILE"
|
268
|
-
FORM = "FORM"
|
269
|
-
GEDC = "GEDC"
|
270
|
-
GIVN = "GIVN"
|
271
|
-
GRAD = "GRAD"
|
272
|
-
HEIGHT = "HEIGHT"
|
273
|
-
HUSB = "HUSB"
|
274
|
-
IDNO = "IDNO"
|
275
|
-
IMMI = "IMMI"
|
276
|
-
INDI = "INDI"
|
277
|
-
INIL = "INIL"
|
278
|
-
LANG = "LANG"
|
279
|
-
LATI = "LATI"
|
280
|
-
LEFT = "LEFT"
|
281
|
-
LONG = "LONG"
|
282
|
-
MAP = "MAP"
|
283
|
-
MARB = "MARB"
|
284
|
-
MARC = "MARC"
|
285
|
-
MARL = "MARL"
|
286
|
-
MARR = "MARR"
|
287
|
-
MARS = "MARS"
|
288
|
-
MEDI = "MEDI"
|
289
|
-
MIME = "MIME"
|
290
|
-
NAME = "NAME"
|
291
|
-
NATI = "NATI"
|
292
|
-
NATU = "NATU"
|
293
|
-
NCHI = "NCHI"
|
294
|
-
NICK = "NICK"
|
295
|
-
NMR = "NMR"
|
296
|
-
NO = "NO"
|
297
|
-
NOTE = "NOTE"
|
298
|
-
NPFX = "NPFX"
|
299
|
-
NSFX = "NSFX"
|
300
|
-
OBJE = "OBJE"
|
301
|
-
OCCU = "OCCU"
|
302
|
-
ORDN = "ORDN"
|
303
|
-
PAGE = "PAGE"
|
304
|
-
PEDI = "PEDI"
|
305
|
-
PHON = "PHON"
|
306
|
-
PHRASE = "PHRASE"
|
307
|
-
PLAC = "PLAC"
|
308
|
-
POST = "POST"
|
309
|
-
PROB = "PROB"
|
310
|
-
PROP = "PROP"
|
311
|
-
PUBL = "PUBL"
|
312
|
-
QUAY = "QUAY"
|
313
|
-
REFN = "REFN"
|
314
|
-
RELI = "RELI"
|
315
|
-
REPO = "REPO"
|
316
|
-
RESI = "RESI"
|
317
|
-
RESN = "RESN"
|
318
|
-
RETI = "RETI"
|
319
|
-
ROLE = "ROLE"
|
320
|
-
SCHMA = "SCHMA"
|
321
|
-
SDATE = "SDATE"
|
322
|
-
SEX = "SEX"
|
323
|
-
SLGC = "SLGC"
|
324
|
-
SLGS = "SLGS"
|
325
|
-
SNOTE = "SNOTE"
|
326
|
-
SOUR = "SOUR"
|
327
|
-
SPFX = "SPFX"
|
328
|
-
SSN = "SSN"
|
329
|
-
STAE = "STAE"
|
330
|
-
STAT = "STAT"
|
331
|
-
SUBM = "SUBM"
|
332
|
-
SURN = "SURN"
|
333
|
-
TAG = "TAG"
|
334
|
-
TEMP = "TEMP"
|
335
|
-
TEXT = "TEXT"
|
336
|
-
TIME = "TIME"
|
337
|
-
TITL = "TITL"
|
338
|
-
TOP = "TOP"
|
339
|
-
TRAN = "TRAN"
|
340
|
-
TRLR = "TRLR"
|
341
|
-
TYPE = "TYPE"
|
342
|
-
UID = "UID"
|
343
|
-
VERS = "VERS"
|
344
|
-
WIDTH = "WIDTH"
|
345
|
-
WIFE = "WIFE"
|
346
|
-
WILL = "WILL"
|
347
|
-
WWW = "WWW"
|
File without changes
|
File without changes
|