interscript 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: bul
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Bulgarian Cyrillic to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf
28
+ characters:
29
+ "\u0027": "" # '
30
+ "\u0410": "A" # А
31
+ "\u0411": "B" # Б
32
+ "\u0414": "D" # Д
33
+ "\u0401": "E" # Ё
34
+ "\u0415": "E" # Е
35
+ "\u042D": "E" # Э
36
+ "\u0424": "F" # Ф
37
+ "\u0413": "G" # Г
38
+ "\u0418": "I" # И
39
+ "\u0419": "I" # Й
40
+ "\u041A": "K" # К
41
+ "\u041B": "L" # Л
42
+ "\u041C": "M" # М
43
+ "\u041D": "N" # Н
44
+ "\u041E": "O" # О
45
+ "\u041F": "P" # П
46
+ "\u0420": "R" # Р
47
+ "\u0421": "S" # С
48
+ "\u0422": "T" # Т
49
+ "\u0423": "U" # У
50
+ "\u0412": "V" # В
51
+ "\u042B": "Y" # Ы
52
+ "\u0417": "Z" # З
53
+ "\u0427": "CH" # Ч
54
+ "\u042F": "IA" # Я
55
+ "\u042E": "IU" # Ю
56
+ "\u0425": "KH" # Х
57
+ "\u0428": "SH" # Ш
58
+ "\u0429": "SHT" # Щ
59
+ "\u0426": "TS" # Ц
60
+ "\u0416": "ZH" # Ж
61
+ "\u0490": "G" # Ґ
62
+ "\u040E": "U" # Ў
63
+ "\u046A": "U" # Ѫ
64
+ "\u0402": "D" # Ђ
65
+ "\u0405": "DZ" # Ѕ
66
+ "\u0408": "J" # Ј
67
+ "\u0409": "LJ" # Љ
68
+ "\u040A": "NJ" # Њ
69
+ "\u04BA": "C" # Һ
70
+ "\u040F": "DZ" # Џ
71
+ "\u0404": "IE" # Є
72
+ "\u0407": "I" # Ї
73
+ "\u0403": "G" # Ѓ
74
+ "\u0027": "" # '
75
+
76
+ "\u0430": "a" # а
77
+ "\u0431": "b" # б
78
+ "\u0434": "d" # д
79
+ "\u0451": "e" # ё
80
+ "\u0435": "e" # e
81
+ "\u044D": "e" # э
82
+ "\u0444": "f" # ф
83
+ "\u0433": "g" # г
84
+ "\u0438": "i" # и
85
+ "\u0439": "i" # й
86
+ "\u043A": "k" # к
87
+ "\u043B": "l" # л
88
+ "\u043C": "m" # м
89
+ "\u043D": "n" # н
90
+ "\u043E": "o" # о
91
+ "\u043F": "p" # п
92
+ "\u0440": "r" # р
93
+ "\u0441": "s" # с
94
+ "\u0442": "t" # т
95
+ "\u0443": "" # у
96
+ "\u0432": "v" # в
97
+ "\u044B": "y" # ы
98
+ "\u0437": "z" # з
99
+ "\u0447": "ch" # ч
100
+ "\u044F": "ia" # я
101
+ "\u044E": "i" # ю
102
+ "\u0445": "kh" # х
103
+ "\u0448": "sh" # ш
104
+ "\u0449": "sht" # щ
105
+ "\u0446": "ts" # ц
106
+ "\u0436": "zh" # ж
107
+ "\u0491": "g" # ґ
108
+ "\u045E": "" # ў
109
+ "\u046B": "" # ѫ
110
+ "\u0452": "d" # ђ
111
+ "\u0455": "dz" # ѕ
112
+ "\u0458": "j" # ј
113
+ "\u0459": "lj" # љ
114
+ "\u045A": "nj" # њ
115
+ "\u04BB": "c" # һ
116
+ "\u045F": "dz" # џ
117
+ "\u0454": "ie" # є
118
+ "\u0457": "i" # ї
119
+ "\u0453": "g" # ѓ
120
+
121
+
122
+
123
+
@@ -0,0 +1,101 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: gre
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Greek to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://en.wikipedia.org/wiki/Romanization_of_Greek
28
+
29
+ characters:
30
+ "\u0027": "",
31
+ "\u0386": "A", # Ά
32
+ "\u0391": "A", # Α
33
+ "\u0392": "B", # Β
34
+ "\u0393": "G", # Γ
35
+ "\u0394": "D", # Δ
36
+ "\u0395": "E", # Ε
37
+ "\u0396": "Z", # Ζ
38
+ "\u0397": "I", # Η
39
+ "\u0398": "TH", # Θ
40
+ "\u0399": "I", # Ι
41
+ "\u039A": "K", # Κ
42
+ "\u039B": "L", # Λ
43
+ "\u039C": "M", # Μ
44
+ "\u039D": "N", # Ν
45
+ "\u039E": "X", # Ξ
46
+ "\u039F": "O", # Ο
47
+ "\u03A0": "P", # Π
48
+ "\u03A1": "P", # Ρ
49
+ "\u03A3": "S", # Σ
50
+ "\u03C2": "s", # ς
51
+ "\u03A4": "T", # Τ
52
+ "\u03A5": "Y", # Υ
53
+ "\u03A7": "CH", # Χ
54
+ "\u03A8": "PS", # Ψ
55
+ "\u03A9": "O", # Ω
56
+ "\u0388": "E", # Έ
57
+ "\u0389": "I", # Ή
58
+ "\u038A": "I", # Ί
59
+ "\u038C": "O", # Ό
60
+ "\u038E": "Y", # Ύ
61
+ "\u038F": "O", # Ώ
62
+ "\u03AA": "I", # Ϊ
63
+ "\u03AB": "Y", # Ϋ
64
+
65
+ "\u03AC": "a", # ά
66
+ "\u03B1": "a", # α
67
+ "\u03B2": "b", # β
68
+ "\u03B3": "g", # γ
69
+ "\u03B4": "d", # δ
70
+ "\u03B5": "e", # ε
71
+ "\u03B6": "z", # ζ
72
+ "\u03B7": "i", # η
73
+ "\u03B8": "th", # θ
74
+ "\u03B9": "i", # ι
75
+ "\u03BA": "k", # κ
76
+ "\u03BB": "l", # λ
77
+ "\u03BC": "m", # μ
78
+ "\u03BD": "n", # ν
79
+ "\u03BE": "x", # ξ
80
+ "\u03BF": "o", # ο
81
+ "\u03C0": "p", # π
82
+ "\u03C1": "p", # ρ
83
+ "\u03C3": "s", # σ
84
+
85
+ "\u03C4": "t", # τ
86
+ "\u03C5": "y", # υ
87
+ "\u03C7": "ch", # χ
88
+ "\u03C8": "ps", # ψ
89
+ "\u03C9": "o", # ω
90
+ "\u03AD": "e", # έ
91
+ "\u03AE": "i", # ή
92
+ "\u03AF": "i", # ί
93
+ "\u03CC": "o", # ό
94
+ "\u03CD": "y", # ύ
95
+ "\u03CE": "o", # ώ
96
+ "\u03CA": "i", # ϊ
97
+ "\u03CB": "y", # ϋ
98
+
99
+
100
+
101
+
@@ -0,0 +1,157 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: heb
5
+ source_script: Hebr
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Hebrew to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://en.wikipedia.org/wiki/Romanization_of_Hebrew
28
+
29
+ characters:
30
+ "\u0027": "", # '
31
+
32
+ # Consonants
33
+ "\u05D0": "", # א
34
+ "\u05D1": "V", # ב
35
+ "\u05D1\u05BC": "B", # בּ
36
+ "\uFB31": "B", # בּ
37
+ "\u05D2": "G", # ג
38
+ "\u05D2\u05BC": "G", # גּ
39
+ "\uFB32": "G", # גּ‬‬
40
+ "\u05D2\u05F3": "J", # ג׳
41
+ "\u05D3": "D", # ד
42
+ "\u05D3\u05BC": "D", # דּ
43
+ "\uFB33": "D", # דּ
44
+ "\u05D3\u05F3": "DH", # ד׳
45
+ "\u05D4": "H", # ה
46
+ "\u05D4\u05BC": "H", # הּ
47
+ "\uFB34": "H", # הּ
48
+ "\u05D5": "V", # ו‬
49
+ "\u05D5\u202C": "V", # ו‬
50
+ "\u05D5\u05BC": "V", # וּ
51
+ # "\uFB35": "V", # וּ # To vowels "U"
52
+ "\u05D6": "Z", # ז
53
+ "\u05D6\u05BC": "Z", # זּ
54
+ "\uFB36": "Z", # זּ‬
55
+ "\u05D6\u05F3": "ZH", # ז׳
56
+ "\u05D7": "CH", # ח
57
+ "\u05D8": "T", # ט
58
+ "\u05D8\u05BC": "T", # טּ
59
+ "\uFB38": "T", # טּ
60
+ "\u05D9": "Y", # י
61
+ "\u05D9\u05BC": "Y", # יּ
62
+ "\u05D9\u05BC" +
63
+ "\u202C": "Y", # יּ‬
64
+ "\uFB39": "Y", # יּ‬
65
+ "\u05DB": "CH", # כ
66
+ "\u05DB\u05BC": "CH", # כּ
67
+ "\u05DB\u05BC" +
68
+ "\u202C": "CH", # כּ
69
+ "\uFB3B": "C", # כּ
70
+ "\u05DA": "CH", # ך
71
+ "\u05DA\u05BC": "CH", # ךּ
72
+ "\u05DA\u05BC" +
73
+ "\u202C": "CH", # ךּ‬
74
+ "\uFB3A": "CH", # ךּ
75
+ "\u05DC": "L", # ל‬
76
+ "\u05DC\u05BC": "L", # לּ
77
+ "\uFB3C": "L", # לּ
78
+ "\u05DD": "M", # ם
79
+ "\u05DE": "M", # מ‬
80
+ "\u05DE\u05BC": "M", # מּ
81
+ "\uFB3E": "M", # מּ‬
82
+ "\u05DF": "N", # ן
83
+ "\u05E0": "N", # נ
84
+ "\u05E0\u05BC": "N", # נּ
85
+ "\uFB40": "N", # נּ
86
+ "\u05E1": "S", # ס
87
+ "\u05E1\u05BC": "S", # סּ
88
+ "\uFB41": "S", # סּ
89
+ "\u05E2": "", # ע
90
+ "\u05E3": "F", # ף
91
+ "\u05E3\u05BC": "P", # Possible problem u05BC # ףּ
92
+ "\uFB43": "P", # ףּ
93
+ "\u05E4": "F", # פ‬
94
+ "\u05E4\u05BC": "P", # פּ
95
+ "\uFB44": "P", # פּ
96
+ "\u05E5": "TZ", # ץ
97
+ "\u05E5\u05F3": "TSH", # Possible problem u05F3 # ץ׳
98
+ "\u05E6": "TZ", # צ‬
99
+ "\u05E6\u05BC": "TZ", # צּ
100
+ "\uFB46": "TZ", # צּ‬
101
+ "\u05E6\u05F3": "TSH", # Possible problem u05F3 # צ׳
102
+ "\u05E7": "Q", # ק
103
+ "\u05E7\u05BC": "Q", # קּ
104
+ "\uFB47": "Q", # קּ‬
105
+ "\u05E8": "R", # ר
106
+ "\u05E8\u05BC": "R", # רּ
107
+ "\uFB48": "R", # רּ
108
+ "\u05E9": "S", # ש
109
+ "\u05E9\u05BC": "S", # שּ
110
+ "\uFB49": "S", # שּ‬
111
+ "\u05E9\u05C2" +
112
+ "\u202C": "S", # שׂ
113
+ "\uFB2B": "S", # שׂ
114
+ "\u05E9\u05C1": "SH", # שׁ
115
+ "\uFB2A": "SH", # שׁ
116
+ "\u05E9\u05BC" +
117
+ "\u05C2\u202C": "S", # שּׂ‬
118
+ "\uFB2D": "S", # שּׂ
119
+ "\u05EA": "T", # ת
120
+ "\u05EA\u05BC": "T", # תּ
121
+ "\uFB4A": "T", # תּ
122
+ "\u05EA\u05F3": "T", # ת׳
123
+
124
+ # Niqqud vowels
125
+ "\u05B0": "E", # ( ְ‬ )
126
+ "\u05B1": "E", # ( ֱ )
127
+ "\u05B2": "A", # ( ֲ )
128
+ "\u05B3": "O", # ( ֲ )
129
+ "\u05B4": "I", # ( ִ )
130
+ "\u05B5": "E", # ( ֵ )
131
+ "\u05B6": "E", # ( ֶ )
132
+ "\u05B7": "A", # ( ַ )
133
+ "\u05B8": "O", # ( ָ ) # It could be "A" too
134
+ "\u05B9": "O", # ( ֹ )
135
+ "\u05BB": "U", # ( ֻ )
136
+ "\u05D5\u05BC": "U", # ( וּ )
137
+ "\uFB35": "U", # ( וּ )
138
+
139
+ # Diphthongs
140
+ "\u05B5\u05D9": "EI", # ( ֵי )
141
+ "\u05B6\u05D9": "EI", # ( ֶי )
142
+ "\u05B7\u05D9": "AI", # ( ַי )
143
+ "\u05B7\u05D9\u05B0": "AI", # ( ַיְ )
144
+ "\u05B7\u05D9\u05B0\u202C": "AI", # ( ַיְ‬ )
145
+ "\u05B8\u05D9": "AI", # ( ָי )
146
+ "\u05B8\u05D9\u202C": "AI", # ( ָי‬ )
147
+ "\u05B8\u05D9\u05B0": "AI", # ( ָיְ )
148
+ "\u05B8\u05D9\u05B0\u202C": "AI", # ( ָיְ‬ )
149
+ "\u05B9\u05D9": "OI", # ( ֹי )
150
+ "\u05B9\u05D9\u05B0": "OI", # ( ֹיְ )
151
+ "\u05B9\u05D9\u05B0\u202C": "OI", # ( ֹיְ‬ )
152
+ "\u05BB\u05D9": "UI", # ( ֻי )
153
+ "\u05BB\u05D9\u05B0": "UI", # ( ֻיְ )
154
+ "\u05BB\u05D9\u05B0\u202C": "UI", # ( ֻיְ‬ )
155
+ "\u05D5\u05BC\u05D9": "UI", # ( וּי )
156
+ "\u05D5\u05BC\u05D9\u05B0": "UI", # ( וּיְ )
157
+ "\u05D5\u05BC\u05D9\u05B0\u202C": "UI", # ( וּיְ‬ )
@@ -0,0 +1,118 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: mkd
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Macedonian Cyrillic to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf
28
+ characters:
29
+ "\u0027": "" # '
30
+ "\u0410": "A" # А
31
+ "\u0411": "B" # Б
32
+ "\u0414": "D" # Д
33
+ "\u0401": "E" # Ё
34
+ "\u0415": "E" # Е
35
+ "\u042D": "E" # Э
36
+ "\u0424": "F" # Ф
37
+ "\u0413": "G" # Г
38
+ "\u0418": "I" # И
39
+ "\u0419": "I" # Й
40
+ "\u041A": "KJ" # К
41
+ "\u041B": "L" # Л
42
+ "\u041C": "M" # М
43
+ "\u041D": "N" # Н
44
+ "\u041E": "O" # О
45
+ "\u041F": "P" # П
46
+ "\u0420": "R" # Р
47
+ "\u0421": "S" # С
48
+ "\u0422": "T" # Т
49
+ "\u0423": "U" # У
50
+ "\u0412": "V" # В
51
+ "\u042B": "Y" # Ы
52
+ "\u0417": "Z" # З
53
+ "\u0427": "CH" # Ч
54
+ "\u042F": "IA" # Я
55
+ "\u042E": "IU" # Ю
56
+ "\u0425": "H" # Х
57
+ "\u0428": "SH" # Ш
58
+ "\u0429": "SHCH" # Щ
59
+ "\u0426": "C" # Ц
60
+ "\u0416": "ZH" # Ж
61
+ "\u0490": "G" # Ґ
62
+ "\u040E": "U" # Ў
63
+ "\u046A": "U" # Ѫ
64
+ "\u0402": "D" # Ђ
65
+ "\u0405": "DZ" # Ѕ
66
+ "\u0408": "J" # Ј
67
+ "\u0409": "LJ" # Љ
68
+ "\u040A": "NJ" # Њ
69
+ "\u04BA": "C" # Һ
70
+ "\u040F": "DJ" # Џ
71
+ "\u0404": "IE" # Є
72
+ "\u0407": "I" # Ї
73
+ "\u0403": "GJ" # Ѓ
74
+
75
+ "\u0430": "a" # а
76
+ "\u0431": "b" # б
77
+ "\u0434": "d" # д
78
+ "\u0451": "e" # ё
79
+ "\u0435": "e" # e
80
+ "\u044D": "e" # э
81
+ "\u0444": "f" # ф
82
+ "\u0433": "g" # г
83
+ "\u0438": "i" # и
84
+ "\u0439": "i" # й
85
+ "\u043A": "kj" # к
86
+ "\u043B": "l" # л
87
+ "\u043C": "m" # м
88
+ "\u043D": "n" # н
89
+ "\u043E": "o" # о
90
+ "\u043F": "p" # п
91
+ "\u0440": "r" # р
92
+ "\u0441": "s" # с
93
+ "\u0442": "t" # т
94
+ "\u0443": "" # у
95
+ "\u0432": "v" # в
96
+ "\u044B": "y" # ы
97
+ "\u0437": "z" # з
98
+ "\u0447": "ch" # ч
99
+ "\u044F": "ia" # я
100
+ "\u044E": "i" # ю
101
+ "\u0445": "h" # х
102
+ "\u0448": "sh" # ш
103
+ "\u0449": "shch" # щ
104
+ "\u0446": "c" # ц
105
+ "\u0436": "zh" # ж
106
+ "\u0491": "g" # ґ
107
+ "\u045E": "" # ў
108
+ "\u046B": "" # ѫ
109
+ "\u0452": "d" # ђ
110
+ "\u0455": "dz" # ѕ
111
+ "\u0458": "j" # ј
112
+ "\u0459": "lj" # љ
113
+ "\u045A": "nj" # њ
114
+ "\u04BB": "c" # һ
115
+ "\u045F": "dj" # џ
116
+ "\u0454": "ie" # є
117
+ "\u0457": "i" # ї
118
+ "\u0453": "gj" # ѓ