interscript 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,123 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: bul
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Bulgarian Cyrillic to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf
28
+ characters:
29
+ "\u0027": "" # '
30
+ "\u0410": "A" # А
31
+ "\u0411": "B" # Б
32
+ "\u0414": "D" # Д
33
+ "\u0401": "E" # Ё
34
+ "\u0415": "E" # Е
35
+ "\u042D": "E" # Э
36
+ "\u0424": "F" # Ф
37
+ "\u0413": "G" # Г
38
+ "\u0418": "I" # И
39
+ "\u0419": "I" # Й
40
+ "\u041A": "K" # К
41
+ "\u041B": "L" # Л
42
+ "\u041C": "M" # М
43
+ "\u041D": "N" # Н
44
+ "\u041E": "O" # О
45
+ "\u041F": "P" # П
46
+ "\u0420": "R" # Р
47
+ "\u0421": "S" # С
48
+ "\u0422": "T" # Т
49
+ "\u0423": "U" # У
50
+ "\u0412": "V" # В
51
+ "\u042B": "Y" # Ы
52
+ "\u0417": "Z" # З
53
+ "\u0427": "CH" # Ч
54
+ "\u042F": "IA" # Я
55
+ "\u042E": "IU" # Ю
56
+ "\u0425": "KH" # Х
57
+ "\u0428": "SH" # Ш
58
+ "\u0429": "SHT" # Щ
59
+ "\u0426": "TS" # Ц
60
+ "\u0416": "ZH" # Ж
61
+ "\u0490": "G" # Ґ
62
+ "\u040E": "U" # Ў
63
+ "\u046A": "U" # Ѫ
64
+ "\u0402": "D" # Ђ
65
+ "\u0405": "DZ" # Ѕ
66
+ "\u0408": "J" # Ј
67
+ "\u0409": "LJ" # Љ
68
+ "\u040A": "NJ" # Њ
69
+ "\u04BA": "C" # Һ
70
+ "\u040F": "DZ" # Џ
71
+ "\u0404": "IE" # Є
72
+ "\u0407": "I" # Ї
73
+ "\u0403": "G" # Ѓ
74
+ "\u0027": "" # '
75
+
76
+ "\u0430": "a" # а
77
+ "\u0431": "b" # б
78
+ "\u0434": "d" # д
79
+ "\u0451": "e" # ё
80
+ "\u0435": "e" # e
81
+ "\u044D": "e" # э
82
+ "\u0444": "f" # ф
83
+ "\u0433": "g" # г
84
+ "\u0438": "i" # и
85
+ "\u0439": "i" # й
86
+ "\u043A": "k" # к
87
+ "\u043B": "l" # л
88
+ "\u043C": "m" # м
89
+ "\u043D": "n" # н
90
+ "\u043E": "o" # о
91
+ "\u043F": "p" # п
92
+ "\u0440": "r" # р
93
+ "\u0441": "s" # с
94
+ "\u0442": "t" # т
95
+ "\u0443": "" # у
96
+ "\u0432": "v" # в
97
+ "\u044B": "y" # ы
98
+ "\u0437": "z" # з
99
+ "\u0447": "ch" # ч
100
+ "\u044F": "ia" # я
101
+ "\u044E": "i" # ю
102
+ "\u0445": "kh" # х
103
+ "\u0448": "sh" # ш
104
+ "\u0449": "sht" # щ
105
+ "\u0446": "ts" # ц
106
+ "\u0436": "zh" # ж
107
+ "\u0491": "g" # ґ
108
+ "\u045E": "" # ў
109
+ "\u046B": "" # ѫ
110
+ "\u0452": "d" # ђ
111
+ "\u0455": "dz" # ѕ
112
+ "\u0458": "j" # ј
113
+ "\u0459": "lj" # љ
114
+ "\u045A": "nj" # њ
115
+ "\u04BB": "c" # һ
116
+ "\u045F": "dz" # џ
117
+ "\u0454": "ie" # є
118
+ "\u0457": "i" # ї
119
+ "\u0453": "g" # ѓ
120
+
121
+
122
+
123
+
@@ -0,0 +1,101 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: gre
5
+ source_script: Grek
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Greek to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://en.wikipedia.org/wiki/Romanization_of_Greek
28
+
29
+ characters:
30
+ "\u0027": "",
31
+ "\u0386": "A", # Ά
32
+ "\u0391": "A", # Α
33
+ "\u0392": "B", # Β
34
+ "\u0393": "G", # Γ
35
+ "\u0394": "D", # Δ
36
+ "\u0395": "E", # Ε
37
+ "\u0396": "Z", # Ζ
38
+ "\u0397": "I", # Η
39
+ "\u0398": "TH", # Θ
40
+ "\u0399": "I", # Ι
41
+ "\u039A": "K", # Κ
42
+ "\u039B": "L", # Λ
43
+ "\u039C": "M", # Μ
44
+ "\u039D": "N", # Ν
45
+ "\u039E": "X", # Ξ
46
+ "\u039F": "O", # Ο
47
+ "\u03A0": "P", # Π
48
+ "\u03A1": "P", # Ρ
49
+ "\u03A3": "S", # Σ
50
+ "\u03C2": "s", # ς
51
+ "\u03A4": "T", # Τ
52
+ "\u03A5": "Y", # Υ
53
+ "\u03A7": "CH", # Χ
54
+ "\u03A8": "PS", # Ψ
55
+ "\u03A9": "O", # Ω
56
+ "\u0388": "E", # Έ
57
+ "\u0389": "I", # Ή
58
+ "\u038A": "I", # Ί
59
+ "\u038C": "O", # Ό
60
+ "\u038E": "Y", # Ύ
61
+ "\u038F": "O", # Ώ
62
+ "\u03AA": "I", # Ϊ
63
+ "\u03AB": "Y", # Ϋ
64
+
65
+ "\u03AC": "a", # ά
66
+ "\u03B1": "a", # α
67
+ "\u03B2": "b", # β
68
+ "\u03B3": "g", # γ
69
+ "\u03B4": "d", # δ
70
+ "\u03B5": "e", # ε
71
+ "\u03B6": "z", # ζ
72
+ "\u03B7": "i", # η
73
+ "\u03B8": "th", # θ
74
+ "\u03B9": "i", # ι
75
+ "\u03BA": "k", # κ
76
+ "\u03BB": "l", # λ
77
+ "\u03BC": "m", # μ
78
+ "\u03BD": "n", # ν
79
+ "\u03BE": "x", # ξ
80
+ "\u03BF": "o", # ο
81
+ "\u03C0": "p", # π
82
+ "\u03C1": "p", # ρ
83
+ "\u03C3": "s", # σ
84
+
85
+ "\u03C4": "t", # τ
86
+ "\u03C5": "y", # υ
87
+ "\u03C7": "ch", # χ
88
+ "\u03C8": "ps", # ψ
89
+ "\u03C9": "o", # ω
90
+ "\u03AD": "e", # έ
91
+ "\u03AE": "i", # ή
92
+ "\u03AF": "i", # ί
93
+ "\u03CC": "o", # ό
94
+ "\u03CD": "y", # ύ
95
+ "\u03CE": "o", # ώ
96
+ "\u03CA": "i", # ϊ
97
+ "\u03CB": "y", # ϋ
98
+
99
+
100
+
101
+
@@ -0,0 +1,157 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: heb
5
+ source_script: Hebr
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Hebrew to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://en.wikipedia.org/wiki/Romanization_of_Hebrew
28
+
29
+ characters:
30
+ "\u0027": "", # '
31
+
32
+ # Consonants
33
+ "\u05D0": "", # א
34
+ "\u05D1": "V", # ב
35
+ "\u05D1\u05BC": "B", # בּ
36
+ "\uFB31": "B", # בּ
37
+ "\u05D2": "G", # ג
38
+ "\u05D2\u05BC": "G", # גּ
39
+ "\uFB32": "G", # גּ‬‬
40
+ "\u05D2\u05F3": "J", # ג׳
41
+ "\u05D3": "D", # ד
42
+ "\u05D3\u05BC": "D", # דּ
43
+ "\uFB33": "D", # דּ
44
+ "\u05D3\u05F3": "DH", # ד׳
45
+ "\u05D4": "H", # ה
46
+ "\u05D4\u05BC": "H", # הּ
47
+ "\uFB34": "H", # הּ
48
+ "\u05D5": "V", # ו‬
49
+ "\u05D5\u202C": "V", # ו‬
50
+ "\u05D5\u05BC": "V", # וּ
51
+ # "\uFB35": "V", # וּ # To vowels "U"
52
+ "\u05D6": "Z", # ז
53
+ "\u05D6\u05BC": "Z", # זּ
54
+ "\uFB36": "Z", # זּ‬
55
+ "\u05D6\u05F3": "ZH", # ז׳
56
+ "\u05D7": "CH", # ח
57
+ "\u05D8": "T", # ט
58
+ "\u05D8\u05BC": "T", # טּ
59
+ "\uFB38": "T", # טּ
60
+ "\u05D9": "Y", # י
61
+ "\u05D9\u05BC": "Y", # יּ
62
+ "\u05D9\u05BC" +
63
+ "\u202C": "Y", # יּ‬
64
+ "\uFB39": "Y", # יּ‬
65
+ "\u05DB": "CH", # כ
66
+ "\u05DB\u05BC": "CH", # כּ
67
+ "\u05DB\u05BC" +
68
+ "\u202C": "CH", # כּ
69
+ "\uFB3B": "C", # כּ
70
+ "\u05DA": "CH", # ך
71
+ "\u05DA\u05BC": "CH", # ךּ
72
+ "\u05DA\u05BC" +
73
+ "\u202C": "CH", # ךּ‬
74
+ "\uFB3A": "CH", # ךּ
75
+ "\u05DC": "L", # ל‬
76
+ "\u05DC\u05BC": "L", # לּ
77
+ "\uFB3C": "L", # לּ
78
+ "\u05DD": "M", # ם
79
+ "\u05DE": "M", # מ‬
80
+ "\u05DE\u05BC": "M", # מּ
81
+ "\uFB3E": "M", # מּ‬
82
+ "\u05DF": "N", # ן
83
+ "\u05E0": "N", # נ
84
+ "\u05E0\u05BC": "N", # נּ
85
+ "\uFB40": "N", # נּ
86
+ "\u05E1": "S", # ס
87
+ "\u05E1\u05BC": "S", # סּ
88
+ "\uFB41": "S", # סּ
89
+ "\u05E2": "", # ע
90
+ "\u05E3": "F", # ף
91
+ "\u05E3\u05BC": "P", # Possible problem u05BC # ףּ
92
+ "\uFB43": "P", # ףּ
93
+ "\u05E4": "F", # פ‬
94
+ "\u05E4\u05BC": "P", # פּ
95
+ "\uFB44": "P", # פּ
96
+ "\u05E5": "TZ", # ץ
97
+ "\u05E5\u05F3": "TSH", # Possible problem u05F3 # ץ׳
98
+ "\u05E6": "TZ", # צ‬
99
+ "\u05E6\u05BC": "TZ", # צּ
100
+ "\uFB46": "TZ", # צּ‬
101
+ "\u05E6\u05F3": "TSH", # Possible problem u05F3 # צ׳
102
+ "\u05E7": "Q", # ק
103
+ "\u05E7\u05BC": "Q", # קּ
104
+ "\uFB47": "Q", # קּ‬
105
+ "\u05E8": "R", # ר
106
+ "\u05E8\u05BC": "R", # רּ
107
+ "\uFB48": "R", # רּ
108
+ "\u05E9": "S", # ש
109
+ "\u05E9\u05BC": "S", # שּ
110
+ "\uFB49": "S", # שּ‬
111
+ "\u05E9\u05C2" +
112
+ "\u202C": "S", # שׂ
113
+ "\uFB2B": "S", # שׂ
114
+ "\u05E9\u05C1": "SH", # שׁ
115
+ "\uFB2A": "SH", # שׁ
116
+ "\u05E9\u05BC" +
117
+ "\u05C2\u202C": "S", # שּׂ‬
118
+ "\uFB2D": "S", # שּׂ
119
+ "\u05EA": "T", # ת
120
+ "\u05EA\u05BC": "T", # תּ
121
+ "\uFB4A": "T", # תּ
122
+ "\u05EA\u05F3": "T", # ת׳
123
+
124
+ # Niqqud vowels
125
+ "\u05B0": "E", # ( ְ‬ )
126
+ "\u05B1": "E", # ( ֱ )
127
+ "\u05B2": "A", # ( ֲ )
128
+ "\u05B3": "O", # ( ֲ )
129
+ "\u05B4": "I", # ( ִ )
130
+ "\u05B5": "E", # ( ֵ )
131
+ "\u05B6": "E", # ( ֶ )
132
+ "\u05B7": "A", # ( ַ )
133
+ "\u05B8": "O", # ( ָ ) # It could be "A" too
134
+ "\u05B9": "O", # ( ֹ )
135
+ "\u05BB": "U", # ( ֻ )
136
+ "\u05D5\u05BC": "U", # ( וּ )
137
+ "\uFB35": "U", # ( וּ )
138
+
139
+ # Diphthongs
140
+ "\u05B5\u05D9": "EI", # ( ֵי )
141
+ "\u05B6\u05D9": "EI", # ( ֶי )
142
+ "\u05B7\u05D9": "AI", # ( ַי )
143
+ "\u05B7\u05D9\u05B0": "AI", # ( ַיְ )
144
+ "\u05B7\u05D9\u05B0\u202C": "AI", # ( ַיְ‬ )
145
+ "\u05B8\u05D9": "AI", # ( ָי )
146
+ "\u05B8\u05D9\u202C": "AI", # ( ָי‬ )
147
+ "\u05B8\u05D9\u05B0": "AI", # ( ָיְ )
148
+ "\u05B8\u05D9\u05B0\u202C": "AI", # ( ָיְ‬ )
149
+ "\u05B9\u05D9": "OI", # ( ֹי )
150
+ "\u05B9\u05D9\u05B0": "OI", # ( ֹיְ )
151
+ "\u05B9\u05D9\u05B0\u202C": "OI", # ( ֹיְ‬ )
152
+ "\u05BB\u05D9": "UI", # ( ֻי )
153
+ "\u05BB\u05D9\u05B0": "UI", # ( ֻיְ )
154
+ "\u05BB\u05D9\u05B0\u202C": "UI", # ( ֻיְ‬ )
155
+ "\u05D5\u05BC\u05D9": "UI", # ( וּי )
156
+ "\u05D5\u05BC\u05D9\u05B0": "UI", # ( וּיְ )
157
+ "\u05D5\u05BC\u05D9\u05B0\u202C": "UI", # ( וּיְ‬ )
@@ -0,0 +1,118 @@
1
+ ---
2
+ authority_id: icao
3
+ id: 9303
4
+ language: mkd
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: ICAO Doc 9303 Machine Readable Travel Documents Part 3 -- Macedonian Cyrillic to Latin
8
+ url: https://www.icao.int/publications/pages/publication.aspx?docnum=9303
9
+ creation_date: 2015
10
+ description: |
11
+ Part 3 defines specifications that are common to TD1, TD2 and TD3
12
+ size machine readable travel documents (MRTDs) including those
13
+ necessary for global interoperability using visual inspection and
14
+ machine readable (optical character recognition) means.
15
+
16
+ Since only Latin-alphabet characters are allowed in the VIZ, if
17
+ mandatory data elements are in a national language that does not use
18
+ the Latin alphabet, a transcription or transliteration shall also be
19
+ provided.
20
+
21
+ This document defines the transliteration mappings used to produce
22
+ this transcription or transliteration.
23
+ tests:
24
+ - source:
25
+ expected:
26
+ map:
27
+ # https://www.icao.int/publications/Documents/9303_p3_cons_en.pdf
28
+ characters:
29
+ "\u0027": "" # '
30
+ "\u0410": "A" # А
31
+ "\u0411": "B" # Б
32
+ "\u0414": "D" # Д
33
+ "\u0401": "E" # Ё
34
+ "\u0415": "E" # Е
35
+ "\u042D": "E" # Э
36
+ "\u0424": "F" # Ф
37
+ "\u0413": "G" # Г
38
+ "\u0418": "I" # И
39
+ "\u0419": "I" # Й
40
+ "\u041A": "KJ" # К
41
+ "\u041B": "L" # Л
42
+ "\u041C": "M" # М
43
+ "\u041D": "N" # Н
44
+ "\u041E": "O" # О
45
+ "\u041F": "P" # П
46
+ "\u0420": "R" # Р
47
+ "\u0421": "S" # С
48
+ "\u0422": "T" # Т
49
+ "\u0423": "U" # У
50
+ "\u0412": "V" # В
51
+ "\u042B": "Y" # Ы
52
+ "\u0417": "Z" # З
53
+ "\u0427": "CH" # Ч
54
+ "\u042F": "IA" # Я
55
+ "\u042E": "IU" # Ю
56
+ "\u0425": "H" # Х
57
+ "\u0428": "SH" # Ш
58
+ "\u0429": "SHCH" # Щ
59
+ "\u0426": "C" # Ц
60
+ "\u0416": "ZH" # Ж
61
+ "\u0490": "G" # Ґ
62
+ "\u040E": "U" # Ў
63
+ "\u046A": "U" # Ѫ
64
+ "\u0402": "D" # Ђ
65
+ "\u0405": "DZ" # Ѕ
66
+ "\u0408": "J" # Ј
67
+ "\u0409": "LJ" # Љ
68
+ "\u040A": "NJ" # Њ
69
+ "\u04BA": "C" # Һ
70
+ "\u040F": "DJ" # Џ
71
+ "\u0404": "IE" # Є
72
+ "\u0407": "I" # Ї
73
+ "\u0403": "GJ" # Ѓ
74
+
75
+ "\u0430": "a" # а
76
+ "\u0431": "b" # б
77
+ "\u0434": "d" # д
78
+ "\u0451": "e" # ё
79
+ "\u0435": "e" # e
80
+ "\u044D": "e" # э
81
+ "\u0444": "f" # ф
82
+ "\u0433": "g" # г
83
+ "\u0438": "i" # и
84
+ "\u0439": "i" # й
85
+ "\u043A": "kj" # к
86
+ "\u043B": "l" # л
87
+ "\u043C": "m" # м
88
+ "\u043D": "n" # н
89
+ "\u043E": "o" # о
90
+ "\u043F": "p" # п
91
+ "\u0440": "r" # р
92
+ "\u0441": "s" # с
93
+ "\u0442": "t" # т
94
+ "\u0443": "" # у
95
+ "\u0432": "v" # в
96
+ "\u044B": "y" # ы
97
+ "\u0437": "z" # з
98
+ "\u0447": "ch" # ч
99
+ "\u044F": "ia" # я
100
+ "\u044E": "i" # ю
101
+ "\u0445": "h" # х
102
+ "\u0448": "sh" # ш
103
+ "\u0449": "shch" # щ
104
+ "\u0446": "c" # ц
105
+ "\u0436": "zh" # ж
106
+ "\u0491": "g" # ґ
107
+ "\u045E": "" # ў
108
+ "\u046B": "" # ѫ
109
+ "\u0452": "d" # ђ
110
+ "\u0455": "dz" # ѕ
111
+ "\u0458": "j" # ј
112
+ "\u0459": "lj" # љ
113
+ "\u045A": "nj" # њ
114
+ "\u04BB": "c" # һ
115
+ "\u045F": "dj" # џ
116
+ "\u0454": "ie" # є
117
+ "\u0457": "i" # ї
118
+ "\u0453": "gj" # ѓ