interscript-maps 2.4.1 → 2.4.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/interscript-maps.gemspec +1 -1
- data/interscript-maps.yaml +3 -2
- data/maps/{alalc-per-Arab-Latn-1997.imp → alalc-fas-Arab-Latn-1997.imp} +1 -1
- data/maps/bgnpcgn-fas-Arab-Latn-1956.imp +1 -1
- data/maps/elot-ell-Grek-Latn-743-2001-tl.imp +1 -1
- data/maps/elot-ell-Grek-Latn-743-2001-ts.imp +1 -1
- data/maps/iso-ell-Grek-Latn-843-1997-t1.imp +1 -1
- data/maps/iso-ell-Grek-Latn-843-1997-t2.imp +1 -1
- data/maps/lshk-yue-Hani-Latn-jyutping-1993.imp +9 -9
- data/maps/mext-jpn-Hrkt-Latn-1954.imp +26 -24
- data/maps/un-ell-Grek-Latn-1987-phonetic.imp +4 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a497101e667c29fc9cec9896fe1b0f052187835c566803cbff4541a528155149
|
4
|
+
data.tar.gz: 8b58b2423fb8c6b57815841926e2e98f001a71a67776f8c705e6f98906292552
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbf2de534e2ef493dc7e8a081d008ebd66808f5b42bc75297dd91b9a9d7b4effd39dfc7b3c138701510949a88086aa400d1015908e1c1019593d797e9197d907
|
7
|
+
data.tar.gz: 87834f177ef2700a7527b40d3f162a78ca0150adfced50ddd5a2545a3ed8fddd5789e0fedff59948672db991a51166d9fbb0f7ad30bc7e845a0803e859a86fef
|
data/interscript-maps.gemspec
CHANGED
data/interscript-maps.yaml
CHANGED
@@ -19,14 +19,14 @@ secryst-models:
|
|
19
19
|
# Here we describe Rababa configs for the maps that are required
|
20
20
|
rababa-configs:
|
21
21
|
"200":
|
22
|
-
model: https://github.com/secryst/rababa-models/releases/download/0.1/
|
22
|
+
model: https://github.com/secryst/rababa-models/releases/download/0.1/diacritization_model_arabic.onnx
|
23
23
|
config:
|
24
24
|
session_name: base
|
25
25
|
text_encoder: ArabicEncoderWithStartSymbol
|
26
26
|
text_cleaner: valid_arabic_cleaners
|
27
27
|
max_len: 200
|
28
28
|
batch_size: 32
|
29
|
-
|
29
|
+
|
30
30
|
# This instruction denotes that the following maps are incompatible with certain
|
31
31
|
# compilers and platforms and shouldn't be tested or compiled in.
|
32
32
|
skip:
|
@@ -167,6 +167,7 @@ aliases:
|
|
167
167
|
alias_to: bgnpcgn-fao-Latn-Latn-1968
|
168
168
|
fas_Arab2Latn_ALA_1997:
|
169
169
|
description: Persian 1997 ALA-Library of Congress System
|
170
|
+
alias_to: alalc-fas-Arab-Latn-1997
|
170
171
|
fas_Arab2Latn_AMMI_1959:
|
171
172
|
description: Afghanistan Ministry of Mines and Industries Transliteration System
|
172
173
|
for Geographic Names in Afghanistan (Yaghubi)
|
@@ -47,7 +47,7 @@ metadata {
|
|
47
47
|
- Since maddah (آ), which is placed over alif (ا), nearly always occurs in word-initial position, no .)◌َا( as well as for fatḩah alif )آ( confusion results from the use of ā for alif maddah
|
48
48
|
- The ligatures لا and لـا represent lām- alif, and should be romanized lā.
|
49
49
|
|
50
|
-
|
50
|
+
implementation_notes:
|
51
51
|
# TODO: These are not used
|
52
52
|
- Initial definite articles and prepositions should be capitalized and hyphens should not be used to connect parts of names, e.g., Ash Shāriqah and Tall al Laḩm.
|
53
53
|
- If any evidence is found for the use of the definite article in a name, the article should be used in the name chosen.
|
@@ -12,7 +12,7 @@ metadata {
|
|
12
12
|
Reversible transliteration standard, ELOT
|
13
13
|
|
14
14
|
notes:
|
15
|
-
- Transliteration standard (reversible): Clause 3.1, Table 1
|
15
|
+
- "Transliteration standard (reversible): Clause 3.1, Table 1"
|
16
16
|
}
|
17
17
|
|
18
18
|
# This map has been partially converted by the bin/maps_v1_to_v2 script
|
@@ -15,7 +15,7 @@ metadata {
|
|
15
15
|
or Modern Greek. Replaces ISO/R 843.
|
16
16
|
|
17
17
|
notes:
|
18
|
-
- Transliteration of Greek into Latin: Type 1, Clause 3 Table 1
|
18
|
+
- "Transliteration of Greek into Latin: Type 1, Clause 3 Table 1"
|
19
19
|
- Equivalent to elot-ell-Grek-Latn-743-2001-ts, the transliteration table of ELOT 743:2001
|
20
20
|
- Assuming that ou, au, eu transliterations are only intended for historical diphthongs /u, av, ev/, and that όυ, άυ, έυ are not to be transliterated as ou, au, eu
|
21
21
|
- Introduced casing to digamma and lunate sigma. (Casing was late introduction to character sets for those characters)
|
@@ -15,7 +15,7 @@ metadata {
|
|
15
15
|
or Modern Greek. Replaces ISO/R 843.
|
16
16
|
|
17
17
|
notes:
|
18
|
-
- Transliteration of Greek into Latin: Type 2, Clause 3 Table 2
|
18
|
+
- "Transliteration of Greek into Latin: Type 2, Clause 3 Table 2"
|
19
19
|
- Introduced casing to digamma, yot, and lunate sigma. (Casing was late introduction to character sets for those characters)
|
20
20
|
}
|
21
21
|
|
@@ -7,15 +7,15 @@ metadata {
|
|
7
7
|
name: Jyutping Cantonese Romanisation Scheme
|
8
8
|
url: https://lshk.org/jyutping
|
9
9
|
creation_date: 1993-12
|
10
|
-
description:
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
10
|
+
description: |
|
11
|
+
The Linguistic Society of Hong Kong Cantonese Romanisation Scheme, or
|
12
|
+
known as Jyutping, was designed and proposed by the Linguistic Society of
|
13
|
+
Hong Kong in 1993. Jyutping is a new Cantonese romanization system which
|
14
|
+
has many advantages. It is multifunctional, systematic, user-friendly,
|
15
|
+
compatible with all possible modern Cantonese sounds, and solely based on
|
16
|
+
alphanumeric characters without any diacritics and strange symbols.
|
17
|
+
Jyutping can also be used as a Chinese computer input method. Its basic
|
18
|
+
principles are simple, easy to learn, and professional.
|
19
19
|
|
20
20
|
notes:
|
21
21
|
- One may need to parse the text in order to generate accurate
|
@@ -11,34 +11,36 @@ metadata {
|
|
11
11
|
adoption_date: 1954-12-09
|
12
12
|
# 昭和二十九年十二月九日
|
13
13
|
description:
|
14
|
-
|
15
|
-
国語を書き表わす場合に用いるローマ字のつづり方を次のように定める。
|
14
|
+
The spelling method for Roman characters used when writing Japanese language is as follows.
|
16
15
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
en: |
|
22
|
-
The spelling method for Roman characters used when writing Japanese language is as follows.
|
16
|
+
Preface
|
17
|
+
1. In general, when the language is written, the spelling shown in Table 1 shall be used.
|
18
|
+
2. The spelling methods listed in Table 2 can be used only when there is a situation that is difficult to change due to international relations or other conventional practices.
|
19
|
+
3. In either case of the preceding two paragraphs, the general introduction will apply.
|
23
20
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
21
|
+
original_description: |
|
22
|
+
国語を書き表わす場合に用いるローマ字のつづり方を次のように定める。
|
23
|
+
|
24
|
+
まえがき
|
25
|
+
1 一般に国語を書き表わす場合は、第1表に掲げたつづり方によるものとする。
|
26
|
+
2 国際的関係その他従来の慣例をにわかに改めがたい事情にある場合に限り、第2表に掲げたつづり方によつてもさしつかえない。
|
27
|
+
3 前二項のいずれの場合においても、おおむねそえがきを適用する。
|
28
28
|
|
29
29
|
notes:
|
30
|
-
-
|
31
|
-
|
32
|
-
-
|
33
|
-
|
34
|
-
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
-
|
39
|
-
|
40
|
-
-
|
41
|
-
|
30
|
+
- ン / ん is romanized always n in Kunrei-siki
|
31
|
+
- When it is necessary to separate the sound n from the vowel or y to follow, the apostrophe is added after the n.
|
32
|
+
- The clogged sound is represented by overlapping the first consonant characters.
|
33
|
+
- Long vowels are expressed in Kunrei-siki by placing a circumflex (^) over a vowel. In the case of capital letters, vowel characters may be arranged.
|
34
|
+
- The way of writing special sounds is free.
|
35
|
+
- Begin writing sentences and proper nouns with capital letters. Note that the beginning of nouns other than proper nouns may be written in capital letters.
|
36
|
+
|
37
|
+
original_notes:
|
38
|
+
- はねる音「ン」はすべてnと書く。
|
39
|
+
- はねる音を表わすnと次にくる母音字またはyとを切り離す必要がある場合には、nの次に’を入れる。
|
40
|
+
- つまる音は、最初の子音字を重ねて表わす。
|
41
|
+
- 長音は母音字の上に^をつけて表わす。なお、大文字の場合は母音字を並べてもよい。
|
42
|
+
- 特殊音の書き表わし方は自由とする。
|
43
|
+
- 文の書きはじめ、および固有名詞は語頭を大文字で書く。なお、固有名詞以外の名詞の語頭を大文字で書いてもよい。
|
42
44
|
}
|
43
45
|
|
44
46
|
tests {
|
@@ -17,12 +17,12 @@ metadata {
|
|
17
17
|
|
18
18
|
notes:
|
19
19
|
- Also included in ISO 843:1997, Annex B, Column 5, and ELOT 743:1982, column 5.
|
20
|
-
- Corrected obvious errors, which occur every time the table has reappeared: χ > x, x > ks, oï > oi.
|
21
|
-
- The vowels are taken from the specification, but some are controversial: /ɑ ɛ/ but /o/.
|
20
|
+
- "Corrected obvious errors, which occur every time the table has reappeared: χ > x, x > ks, oï > oi."
|
21
|
+
- "The vowels are taken from the specification, but some are controversial: /ɑ ɛ/ but /o/."
|
22
22
|
- Stress is not indicated. (To do so in IPA would require syllabification in preprocessing, since stress is positioned at syllable breaks)
|
23
23
|
- Followed specification in treating final μπ as b, but final ντ as nd. That distinction is dubious. (In ELOT 743:1982, both d and nd are erroneously marked as initial, and no final is given.)
|
24
|
-
- τζ is not correctly transcribed as dz: fixed
|
25
|
-
- not reducing geminated consonants: fixed
|
24
|
+
- "τζ is not correctly transcribed as dz: fixed"
|
25
|
+
- "not reducing geminated consonants: fixed"
|
26
26
|
}
|
27
27
|
|
28
28
|
tests {
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: interscript-maps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.4.
|
4
|
+
version: 2.4.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
@@ -48,6 +48,7 @@ files:
|
|
48
48
|
- maps/alalc-div-Thaa-Latn-2012.imp
|
49
49
|
- maps/alalc-ell-Grek-Latn-1997.imp
|
50
50
|
- maps/alalc-ell-Grek-Latn-2010.imp
|
51
|
+
- maps/alalc-fas-Arab-Latn-1997.imp
|
51
52
|
- maps/alalc-guj-Gujr-Latn-1997.imp
|
52
53
|
- maps/alalc-guj-Gujr-Latn-2011.imp
|
53
54
|
- maps/alalc-hin-Deva-Latn-1997.imp
|
@@ -70,7 +71,6 @@ files:
|
|
70
71
|
- maps/alalc-ori-Orya-Latn-2011.imp
|
71
72
|
- maps/alalc-pan-Guru-Latn-1997.imp
|
72
73
|
- maps/alalc-pan-Guru-Latn-2011.imp
|
73
|
-
- maps/alalc-per-Arab-Latn-1997.imp
|
74
74
|
- maps/alalc-pli-Deva-Latn-2012.imp
|
75
75
|
- maps/alalc-pra-Deva-Latn-2012.imp
|
76
76
|
- maps/alalc-rus-Cyrl-Latn-1997.imp
|