interscript 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +1 -3
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +8 -3
  5. data/lib/interscript/fs.rb +27 -0
  6. data/lib/interscript/mapping.rb +3 -1
  7. data/lib/interscript/opal.rb +142 -3
  8. data/lib/interscript/opal/entrypoint.rb +8 -0
  9. data/lib/interscript/opal/exports.rb +11 -0
  10. data/lib/interscript/opal/maps.js.erb +2 -4
  11. data/lib/interscript/version.rb +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -5
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +104 -10
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +18 -3
  15. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  16. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  17. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  18. data/maps/alalc-hin-Deva-Latn-1997.yaml +102 -10
  19. data/maps/alalc-hin-Deva-Latn-2011.yaml +19 -1
  20. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  21. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  22. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  23. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  24. data/maps/alalc-pra-Deva-Latn-2012.yaml +2 -2
  25. data/maps/alalc-san-Deva-Latn-2012.yaml +78 -9
  26. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  27. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  28. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  29. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  30. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +3 -1
  31. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  32. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  33. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +1 -1
  34. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  35. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  36. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  37. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  38. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  39. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  40. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +2 -0
  41. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +87 -53
  42. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  43. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  44. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  45. data/maps/{bis-knd-Knda-Latn-13194-1991.yaml → bis-kan-Kana-Latn-13194-1991.yaml} +2 -2
  46. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +17 -2
  47. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  48. data/maps/{iso-kan-Knda-Latn-15919-2001.yaml → iso-kan-Kana-Latn-15919-2001.yaml} +1 -1
  49. data/maps/{mns-mon-Cyrl-Latn-5217-2012.yaml → masm-mon-Cyrl-Latn-5217-2012.yaml} +2 -2
  50. data/maps/{mns-mon-Latn-Cyrl-5217-2012.yaml → masm-mon-Latn-Cyrl-5217-2012.yaml} +1 -1
  51. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  52. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  53. data/maps/odni-ara-Arab-Latn-2015.yaml +20 -130
  54. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  55. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  56. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  57. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  58. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  59. data/maps/{odni-per-Arab-Latn-2015.yaml → odni-prs-Arab-Latn-2015.yaml} +0 -0
  60. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  61. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  62. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +4 -0
  63. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  64. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  65. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  66. data/maps/un-hin-Deva-Latn-2016.yaml +104 -10
  67. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  68. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  69. data/maps/un-mar-Deva-Latn-2016.yaml +24 -13
  70. data/maps/un-nep-Deva-Latn-1972.yaml +40 -121
  71. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  72. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  73. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  74. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  75. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  76. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  77. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  78. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  79. data/spec/interscript/filenames_spec.rb +6 -369
  80. data/spec/interscript_spec.rb +10 -2
  81. metadata +50 -7
  82. data/lib/interscript/opal/map_translate.rb +0 -7
@@ -0,0 +1,247 @@
1
+ ---
2
+ authority_id: ungegn
3
+ id: 1972
4
+ language: iso-639-2:ori
5
+ source_script: Orya
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES -- Oriya Romanization, 1972
8
+ url: http://www.eki.ee/wgrs/v2_2/rom1_or.pdf
9
+ creation_date: 1972
10
+ confirmation_date: 2003
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11), based on a report
13
+ prepared by D. N. Sharma. The note on the system was published in volume II of the
14
+ conference reports.
15
+
16
+ There is no evidence of the use of the system either in India or in international cartographic
17
+ products.
18
+
19
+ Oriya uses an alphasyllabic script whereby each character represents a syllable rather than one sound.
20
+ Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially) and in an
21
+ abbreviated form, to denote vowels after consonants. The romanization table is unambiguous. The system is mostly
22
+ reversible but there may exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters)
23
+ and consonants (combinations with subscript consonants vs. character sequences).
24
+
25
+ notes:
26
+ - Combinations with r as the first component are written with a special superscript symbol, e.g. ର୍କ rka.
27
+
28
+ tests:
29
+ - source: "ର୍କ"
30
+ expected: "rka"
31
+ - source: "ଓଡ଼ିଆ"
32
+ expected: "oṙiā"
33
+ - source: "ଓଡ଼ିଶା"
34
+ expected: "oṙishā"
35
+ - source: "ଭୁବନେଶ୍ୱର"
36
+ expected: "bhubaneshvara"
37
+ - source: "ଆଇପିଏଲ୍‌-୧୩: ଦିଲ୍ଲୀ କ୍ୟାପିଟାଲ୍ସକୁ ୮୮ ରନ୍‌ ପରାସ୍ତ କଲା ସନରାଇଜର୍ସ ହାଇଦ୍ରାବାଦ"
38
+ expected: "āipiel-13: dillī kyāpiṭālsaku 88 ran parāsta kalā sanarāijarsa hāidrābāda"
39
+ - source: "ପ୍ରେମ ସମ୍ପର୍କରେ ଭଟ୍ଟା: ରାଗରେ ପ୍ରେମିକାର ତଣ୍ଟି କାଟି ନିଜେ ବିଷ ପିଇଲା ପ୍ରେମିକ"
40
+ expected: "prema samparkare bhaṭṭā: rāgare premikāra taṇṭi kāṭi nije biṣha piilā premika"
41
+ - source: "ପ୍ରେମ ସମ୍ପର୍କରେ ଭଟ୍ଟା: ରାଗରେ ପ୍ରେମିକାର ତଣ୍ଟି କାଟି ନିଜେ ବିଷ ପିଇଲା ପ୍ରେମିକ"
42
+ expected: "prema samparkare bhaṭṭā: rāgare premikāra taṇṭi kāṭi nije biṣha piilā premika"
43
+ - source: "ହୋଟେଲ, ଲଜ୍‌ରେ ରୁମ୍‌ ମିଳୁନି: ନେତା‌ଙ୍କ ନାଁରେ ଆଗୁଆ ହୋଇଯାଇଛି ବୁକିଂ"
44
+ expected: "heāṭela, lajre rum miḷuni: netāṅka nāmre āguā heāiỵāichhhi bukiṃ"
45
+ - source: "ପର୍ଯ୍ୟଟକମାନଙ୍କ ନିମନ୍ତେ ନଭେମ୍ବର ୧ରୁ ଖୋଲିବ ଶିମିଳିପାଳ ଅଭୟାରଣ୍ୟ"
46
+ expected: "parỵyaṭakamānaṅka nimante nabhembara 1ru kholiba shimiḷipāḷa abhayāraṇya"
47
+ - source: "ପାରିବାରିକ ଅଶାନ୍ତିର କରୁଣ ପରିଣତି: କୂଅକୁ ଡେଇଁଲେ ମା’-ଝିଅ, ଝିଅ ମୃତ"
48
+ expected: "pāribārika ashāntira karuṇa pariṇati: kūaku ḍeimle mā’-jhia, jhia mṛta"
49
+ - source: "‘ଭ୍ରଷ୍ଟାଚାରର ବଂଶବାଦ’ ଏବେ ସାଜିଛି ଦେଶ ପାଇଁ ନୂଆ ସମସ୍ୟା; ପ୍ରଧାନମନ୍ତ୍ରୀ ମୋଦୀ"
50
+ expected: "‘bhraṣhṭāchārara baṃshabāda’ ebe sājichhhi desha pāim nūā samasyā; pradhānamantrī modī"
51
+ - source: "ପାହାଡ଼ି ଇଲାକାବାସୀଙ୍କ ଆଶାର ବତୀ ‘ପାର୍ବତୀ’"
52
+ expected: "pāhāṙi ilākābāsīṅka āshāra batī ‘pārbatī’"
53
+
54
+
55
+ map:
56
+
57
+ rules:
58
+ - pattern: ([କ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
59
+ result: 'k'
60
+ - pattern: ([ଖ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
61
+ result: 'kh'
62
+ - pattern: ([ଗ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
63
+ result: 'g'
64
+ - pattern: ([ଘ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
65
+ result: 'gh'
66
+ - pattern: ([ଙ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
67
+ result: 'ṅ'
68
+ - pattern: ([ଚ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
69
+ result: 'ch'
70
+ - pattern: ([ଛ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
71
+ result: 'chhh'
72
+ - pattern: ([ଜ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
73
+ result: 'j'
74
+ - pattern: ([ଝ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
75
+ result: 'jh'
76
+ - pattern: ([ଞ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
77
+ result: 'ñ'
78
+ - pattern: ([ଟ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
79
+ result: 'ṭ'
80
+ - pattern: ([ଠ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
81
+ result: 'ṭh'
82
+ - pattern: ([ଡ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
83
+ result: 'ḍ'
84
+ - pattern: ([ଡ଼]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
85
+ result: 'ṙ'
86
+ - pattern: ([ଢ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
87
+ result: 'ḍh'
88
+ - pattern: ([ଢ଼]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
89
+ result: 'ṙh'
90
+ - pattern: ([ଣ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
91
+ result: 'ṇ'
92
+ - pattern: ([ତ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
93
+ result: 't'
94
+ - pattern: ([ଥ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
95
+ result: 'th'
96
+ - pattern: ([ଦ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
97
+ result: 'd'
98
+ - pattern: ([ଧ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
99
+ result: 'dh'
100
+ - pattern: ([ନ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
101
+ result: 'n'
102
+ - pattern: ([ପ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
103
+ result: 'p'
104
+ - pattern: ([ଫ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
105
+ result: 'ph'
106
+ - pattern: ([ବ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
107
+ result: 'b'
108
+ - pattern: ([ଭ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
109
+ result: 'bh'
110
+ - pattern: ([ମ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
111
+ result: 'm'
112
+ - pattern: ([ଯ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
113
+ result: 'ỵ'
114
+ - pattern: ([ୟ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
115
+ result: 'y'
116
+ - pattern: ([ର]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
117
+ result: 'r'
118
+ - pattern: ([ଲ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
119
+ result: 'l'
120
+ - pattern: ([ଳ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
121
+ result: 'ḷ'
122
+ - pattern: ([ଶ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
123
+ result: 'sh'
124
+ - pattern: ([ଷ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
125
+ result: 'ṣh'
126
+ - pattern: ([ସ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
127
+ result: 's'
128
+ - pattern: ([ହ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
129
+ result: 'h'
130
+ - pattern: ([କ୍ଷ]=?)(?=[\u0b4d\u0b3e\u0b3f\u0b40\u0b41\u0b42\u0b43\u0b47\u0b48\u0b4b\u0b4c])
131
+ result: 'kṣh'
132
+
133
+ characters:
134
+ 'ଅ': 'a'
135
+ 'ଆ': 'ā'
136
+ 'ଇ': 'i'
137
+ 'ଈ': 'ī'
138
+ 'ଉ': 'u'
139
+ 'ଊ': 'ū'
140
+ 'ଋ': 'ṛ'
141
+ 'ୠ': 'ṝ'
142
+ 'ଌ': 'ḻ'
143
+ 'ଏ': 'e'
144
+ 'ଐ': 'ai'
145
+ 'ଓ': 'o'
146
+ 'ୱ': 'va'
147
+ 'ଔ': 'au'
148
+
149
+ # II. Consonants
150
+ # Gutturals
151
+ 'କ': 'ka'
152
+ 'ଖ': 'kha'
153
+ 'ଗ': 'ga'
154
+ 'ଘ': 'gha'
155
+ 'ଙ': 'ṅa'
156
+
157
+ # Palatals
158
+ 'ଚ': 'cha'
159
+ 'ଛ': 'chha'
160
+ 'ଜ': 'ja'
161
+ 'ଝ': 'jha'
162
+ 'ଞ': 'ña'
163
+
164
+ # Cerebrals
165
+ 'ଟ': 'ṭa'
166
+ 'ଠ': 'ṭha'
167
+ 'ଡ': 'ḍa'
168
+ 'ଡ଼': 'ṙa'
169
+ 'ଢ': 'ḍha'
170
+ 'ଢ଼': 'ṙha'
171
+ 'ଣ': 'ṇa'
172
+
173
+ # Dentals
174
+ 'ତ': 'ta'
175
+ 'ଥ': 'tha'
176
+ 'ଦ': 'da'
177
+ 'ଧ': 'dha'
178
+ 'ନ': 'na'
179
+
180
+ # Labials
181
+ 'ପ': 'pa'
182
+ 'ଫ': 'pha'
183
+ 'ବ': 'ba'
184
+ 'ଭ': 'bha'
185
+ 'ମ': 'ma'
186
+
187
+ # Semivowels
188
+ 'ଯ': 'ỵa'
189
+ 'ୟ': 'ya'
190
+ 'ର': 'ra'
191
+ 'ଲ': 'la'
192
+ 'ଳ': 'ḷa'
193
+
194
+ # Sibilants
195
+ 'ଶ': 'sha'
196
+ 'ଷ': 'ṣha'
197
+ 'ସ': 'sa'
198
+
199
+
200
+ # Aspirate
201
+ 'ହ': 'ha'
202
+
203
+ 'କ୍ଷ': 'kṣha'
204
+
205
+ # Chandrabindu
206
+ 'ଁ': 'm'
207
+
208
+ # Bisarga
209
+ 'ଃ': 'ḥ'
210
+
211
+ # Anusvāra
212
+ 'ଂ': 'ṃ'
213
+
214
+ # Medials # Needed for connecting constants
215
+
216
+ 'ା': 'ā'
217
+ 'ି': 'i'
218
+ 'ୀ': 'ī'
219
+ 'ୁ': 'u'
220
+ 'ୂ': 'ū'
221
+ 'ୃ': 'ṛ'
222
+ 'େ': 'e'
223
+ 'ୈ': 'ai'
224
+ 'ୋ': 'o'
225
+ 'ୌ': 'au'
226
+
227
+ '्': ''
228
+ '୍': ''
229
+ '़': ''
230
+ '଼': ''
231
+ '।': '.'
232
+ "‍": ''# Used for joining
233
+ "‌": ''# Used for non joining
234
+
235
+ # Numbers
236
+
237
+ '୦': '0'
238
+ '୧': '1'
239
+ '୨': '2'
240
+ '୩': '3'
241
+ '୪': '4'
242
+ '୫': '5'
243
+ '୬': '6'
244
+ '୭': '7'
245
+ '୮': '8'
246
+ '୯': '9'
247
+
@@ -0,0 +1,402 @@
1
+ ---
2
+ authority_id: un
3
+ id: 1972
4
+ language: iso-639-2:pan
5
+ source_script: Guru
6
+ destination_script: Latn
7
+ name: REPORT ON THE CURRENT STATUS OF UNITED NATIONS ROMANIZATION SYSTEMS FOR GEOGRAPHICAL NAMES --Panjabi Romanization Version 4.0
8
+ url: https://www.eki.ee/wgrs/rom1_pa.htm
9
+ creation_date: 1972
10
+ confirmation_date: 2016
11
+ description: |
12
+ The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
13
+ based on a report prepared by D. N. Sharma. The tables and their corrections were published in volume
14
+ II of the conference reports1,2.
15
+
16
+ There is no evidence of the use of the system either in India or in international cartographic products.
17
+
18
+ Punjabi (Panjābī) in India uses an alphasyllabic script (Gurmukhi) whereby each character represents a syllable
19
+ rather than one sound. Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially)
20
+ and in an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous. The system is mostly
21
+ reversible but there exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters) and
22
+ consonants (combinations with subscript consonants vs. character sequences).
23
+
24
+ References
25
+
26
+ Second United Nations Conference on the Standardization of Geographical Names. London, 10–31 May 1972. Vol. II. Technical papers.
27
+ United Nations. New York 1974, pp. 136–138.
28
+
29
+ Third United Nations Conference on the Standardization of Geographical Names. Athens, 17 August – 7 September 1977. Vol. II,
30
+ Technical papers, pp. 393 etc.
31
+
32
+
33
+
34
+ notes:
35
+ - |
36
+ These characters are used in combination with abbreviated vowel characters: ਉ u, ਊ ū, ਅ a, ਆ ā, ਐ ai, ਔ au, ਇ i, ਈ ī, ਏ e; exceptional variation: ਓ o.
37
+ - |
38
+ Dotted variants of the characters: ਸ਼ sha, ਖ਼ ḳha, ਗ਼ g̣a, ਜ਼ za, ਫ਼ fa.
39
+ - |
40
+ (ੰ) Used if it is preceded by short vowels (a, i, u) and ū, excluding the independent vowel character ਉ (u, ū), e.g. ਸੰਗ saṁg, ਸਿੰਗ siṁg, ਬੁੰਦਾ buṁdā, ਬੂੰਦ būṁd, ਇੰਜਨ iṁjan.
41
+ - |
42
+ (ਂ) Used in all other occasions, e.g. ਝੋਂਕਾ jhoṁkā.
43
+ - |
44
+ (ੱ) Marks doubling of the following consonant: ਨਿੱਕਾ nikkā, ਲੱਭਣਾ labhbhṇā, ਕੁੱਤਾ kuttā, ਹਿੱਸਾ hissā, ਲੱਮਾ lammā.
45
+ - |
46
+ Absence of the inherent vowel (-a) is not marked in the spelling in any way except for the combinations
47
+ with subscript characters and those which are doubled by ੱ (adhaka).
48
+
49
+ tests:
50
+ - source: "ਪੰਜਾਬ 'ਚ ਵਧ ਰਿਹਾ ਖ਼ੁਦਕੁਸ਼ੀਆਂ ਦਾ ਰੁਝਾਨ"
51
+ expected: "paṁzāba 'cha vadha rihā khaḳhudakusḳhīāṁ dā rujhāna"
52
+ - source: "ਲੱਖ ਤੋਂ ਪਾਰ ਪੁੱਜਾ ਸਰਗਰਮ ਕੇਸਾਂ ਦਾ ਅੰਕੜਾ, ਦਿੱਲੀ 'ਚ ਦੋ ਲੱਖ ਤੋਂ ਪਾਰ ਇਨਫੈਕਟਿਡ"
53
+ expected: "lakkha toṁ pāra puzzā sragarama kesāṁ dā aṁkaṙā, dillī 'cha do lakkha toṁ pāra inaphaikaṭiḍa"
54
+ - source: "ਪਰਿਵਾਰਕ ਸਮੱਸਿਆਵਾਂ ਅਤੇ ਵਿਆਹ ਵੀ ਹੈ ਹੋਰ ਅਹਿਮ ਕਾਰਨ"
55
+ expected: "parivāraka smassiāvāṁ ate viāh vī hai hora ahima kārana"
56
+ - source: "ਮਰਦਾਂ 'ਚ ਔਰਤਾਂ ਨਾਲੋਂ ਵੱਧ ਹੈ ਖ਼ੁਦਕੁਸ਼ੀ ਦਾ ਰੁਝਾਨ"
57
+ expected: "maradāṁ 'cha auratāṁ nāloṁ vaddha hai khaḳhudakusḳhī dā rujhāna"
58
+ - source: "ਰਾਸ਼ਟਰੀ ਪੱਧਰ 'ਤੇ ਪੰਜਾਬ ਦੀ ਸਥਿਤੀ ਕਾਫ਼ੀ ਸੂਬਿਆਂ ਤੋਂ ਬਿਹਤਰ"
59
+ expected: "rāsṭarī paddhara 'te paṁzāba dī sthitī kāphaḳhī sūbiāṁ toṁ bihtara"
60
+ - source: "ਚੀਨੀ ਸੈਨਾ ਨੇ ਲਾਪਤਾ ਅਰੁਣਾਚਲ ਦੇ 5 ਨੌਜਵਾਨਾਂ ਬਾਰੇ ਦੱਸਿਆ"
61
+ expected: "chīnī sainā ne lāpatā aruṇāchala de 5 naujavānāṁ bāre dassiā"
62
+ - source: "ਸਾਖਰਤਾ ਦੇ ਮਾਮਲੇ 'ਚ ਦੇਸ਼ 'ਚ 7ਵੇਂ ਨੰਬਰ 'ਤੇ ਪੰਜਾਬ"
63
+ expected: "sākharatā de māmale 'cha des 'cha 7veṁ naṁbara 'te paṁzāba"
64
+ - source: "ਦਿੱਲੀ ਕਮੇਟੀ ਦੇ ਮੈਂਬਰ ਸ਼ੰਟੀ ਨੇ ਅਕਾਲੀ ਦਲ ਤੋਂ ਦਿੱਤਾ ਅਸਤੀਫ਼ਾ"
65
+ expected: "dillī kameṭī de maiṁbara sṁṭī ne akālī dala toṁ dittā astīphaḳhā"
66
+ - source: "੧੦੨ ਹੋਰ ਕੋਰੋਨਾ ਪਾਜ਼ੀਟਿਵ ਮਰੀਜ਼ਾਂ ਦੀ ਪੁਸ਼ਟੀ, ਇਕ ਦੀ ਮੌਤ"
67
+ expected: "102 hora koronā pājaḳhīṭiva marījaḳhāṁ dī pusṭī, ika dī mauta"
68
+ - source: "ਸੜਕ ਹਾਦਸੇ ਦੌਰਾਨ ਇਕ ਦੀ ਮੌਤ"
69
+ expected: "sṙaka hādase daurāna ika dī mauta"
70
+
71
+ map:
72
+
73
+ rules:
74
+ - pattern: (?<!ੱ)([ਕ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
75
+ result: 'k'
76
+ - pattern: (?<!ੱ)([ਖ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
77
+ result: 'kh'
78
+ - pattern: (?<!ੱ)([ਖ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
79
+ result: 'ḳh'
80
+ - pattern: (?<!ੱ)([ਗ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
81
+ result: 'g'
82
+ - pattern: (?<!ੱ)([ਗ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
83
+ result: 'g̣'
84
+ - pattern: (?<!ੱ)([ਘ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
85
+ result: 'gh'
86
+ - pattern: (?<!ੱ)([ਙ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
87
+ result: 'ṅ'
88
+ - pattern: (?<!ੱ)([ਚ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
89
+ result: 'ch'
90
+ - pattern: (?<!ੱ)([ਛ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
91
+ result: 'chh'
92
+ - pattern: (?<!ੱ)([ਜ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
93
+ result: 'z'
94
+ - pattern: (?<!ੱ)([ਜ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
95
+ result: 'j'
96
+ - pattern: (?<!ੱ)([ਝ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
97
+ result: 'jh'
98
+ - pattern: (?<!ੱ)([ਞ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
99
+ result: 'ñ'
100
+ - pattern: (?<!ੱ)([ਟ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
101
+ result: 'ṭ'
102
+ - pattern: (?<!ੱ)([ਠ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
103
+ result: 'ṭh'
104
+ - pattern: (?<!ੱ)([ਡ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
105
+ result: 'ḍ'
106
+ - pattern: (?<!ੱ)([ਢ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
107
+ result: 'ḍh'
108
+ - pattern: (?<!ੱ)([ਣ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
109
+ result: 'ṇ'
110
+ - pattern: (?<!ੱ)([ਤ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
111
+ result: 't'
112
+ - pattern: (?<!ੱ)([ਥ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
113
+ result: 'th'
114
+ - pattern: (?<!ੱ)([ਦ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
115
+ result: 'd'
116
+ - pattern: (?<!ੱ)([ਧ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
117
+ result: 'dh'
118
+ - pattern: (?<!ੱ)([ਨ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
119
+ result: 'n'
120
+ - pattern: (?<!ੱ)([ਪ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
121
+ result: 'p'
122
+ - pattern: (?<!ੱ)([ਫ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
123
+ result: 'ph'
124
+ - pattern: (?<!ੱ)([ਫ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
125
+ result: 'f'
126
+ - pattern: (?<!ੱ)([ਬ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
127
+ result: 'b'
128
+ - pattern: (?<!ੱ)([ਭ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
129
+ result: 'bh'
130
+ - pattern: (?<!ੱ)([ਮ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
131
+ result: 'm'
132
+ - pattern: (?<!ੱ)([ਯ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
133
+ result: 'y'
134
+ - pattern: (?<!ੱ)([ਰ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
135
+ result: 'r'
136
+ - pattern: (?<!ੱ)([ਲ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
137
+ result: 'l'
138
+ - pattern: (?<!ੱ)([ਲੵ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
139
+ result: 'l'
140
+ - pattern: (?<!ੱ)([ਲ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
141
+ result: 'l'
142
+ - pattern: (?<!ੱ)([ਵ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
143
+ result: 'v'
144
+ - pattern: (?<!ੱ)([ੜ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
145
+ result: 'ṙ'
146
+ - pattern: (?<!ੱ)([ਸ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
147
+ result: 's'
148
+ - pattern: (?<!ੱ)([ਸ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
149
+ result: 'sh'
150
+ - pattern: (?<!ੱ)([ਹ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
151
+ result: 'h'
152
+
153
+ - pattern: (?<=ੱ)([ਕ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
154
+ result: 'kk'
155
+ - pattern: (?<=ੱ)([ਖ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
156
+ result: 'kkh'
157
+ - pattern: (?<=ੱ)([ਖ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
158
+ result: 'ḳḳh'
159
+ - pattern: (?<=ੱ)([ਗ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
160
+ result: 'gg'
161
+ - pattern: (?<=ੱ)([ਗ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
162
+ result: 'gg̣'
163
+ - pattern: (?<=ੱ)([ਘ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
164
+ result: 'ggh'
165
+ - pattern: (?<=ੱ)([ਙ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
166
+ result: 'ṅṅ'
167
+ - pattern: (?<=ੱ)([ਚ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
168
+ result: 'cch'
169
+ - pattern: (?<=ੱ)([ਛ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
170
+ result: 'cchh'
171
+ - pattern: (?<=ੱ)([ਜ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
172
+ result: 'zz'
173
+ - pattern: (?<=ੱ)([ਜ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
174
+ result: 'jj'
175
+ - pattern: (?<=ੱ)([ਝ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
176
+ result: 'jjh'
177
+ - pattern: (?<=ੱ)([ਞ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
178
+ result: 'ññ'
179
+ - pattern: (?<=ੱ)([ਟ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
180
+ result: 'ṭṭ'
181
+ - pattern: (?<=ੱ)([ਠ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
182
+ result: 'ṭṭh'
183
+ - pattern: (?<=ੱ)([ਡ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
184
+ result: 'ḍḍ'
185
+ - pattern: (?<=ੱ)([ਢ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
186
+ result: 'ḍḍh'
187
+ - pattern: (?<=ੱ)([ਣ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
188
+ result: 'ṇṇ'
189
+ - pattern: (?<=ੱ)([ਤ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
190
+ result: 'tt'
191
+ - pattern: (?<=ੱ)([ਥ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
192
+ result: 'tth'
193
+ - pattern: (?<=ੱ)([ਦ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
194
+ result: 'dd'
195
+ - pattern: (?<=ੱ)([ਧ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
196
+ result: 'ddh'
197
+ - pattern: (?<=ੱ)([ਨ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
198
+ result: 'nn'
199
+ - pattern: (?<=ੱ)([ਪ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
200
+ result: 'pp'
201
+ - pattern: (?<=ੱ)([ਫ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
202
+ result: 'pph'
203
+ - pattern: (?<=ੱ)([ਫ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
204
+ result: 'ff'
205
+ - pattern: (?<=ੱ)([ਬ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
206
+ result: 'bb'
207
+ - pattern: (?<=ੱ)([ਭ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
208
+ result: 'bbh'
209
+ - pattern: (?<=ੱ)([ਮ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
210
+ result: 'mm'
211
+ - pattern: (?<=ੱ)([ਯ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
212
+ result: 'yy'
213
+ - pattern: (?<=ੱ)([ਰ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
214
+ result: 'rr'
215
+ - pattern: (?<=ੱ)([ਲ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
216
+ result: 'll'
217
+ - pattern: (?<=ੱ)([ਲੵ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
218
+ result: 'll'
219
+ - pattern: (?<=ੱ)([ਲ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
220
+ result: 'll'
221
+ - pattern: (?<=ੱ)([ਵ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
222
+ result: 'vv'
223
+ - pattern: (?<=ੱ)([ੜ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
224
+ result: 'ṙṙ'
225
+ - pattern: (?<=ੱ)([ਸ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
226
+ result: 'ss'
227
+ - pattern: (?<=ੱ)([ਸ਼]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
228
+ result: 'ssh'
229
+ - pattern: (?<=ੱ)([ਹ]=?)(?=[\u0a3e\u0a3f\u0a40\u0a41\u0a42\u0a47\u0a48\u0a4b\u0a4c\u0a4d])
230
+ result: 'hh'
231
+
232
+ characters:
233
+
234
+ # I. Vowels and Diphthongs (see Note 1)
235
+ 'ਅ': 'a'
236
+ 'ਆ': 'ā'
237
+ 'ਇ': 'i'
238
+ 'ਈ': 'ī'
239
+ 'ਉ': 'u'
240
+ 'ਊ': 'ū'
241
+ 'ਏ': 'e'
242
+ 'ਐ': 'ai'
243
+ 'ਓ': 'o'
244
+ 'ਔ': 'au'
245
+
246
+ 'ਾ': "ā"
247
+ 'ਿ': "i"
248
+ 'ੀ': "ī"
249
+ 'ੁ': "u"
250
+ 'ੂ': "ū"
251
+ 'ੇ': "e"
252
+ 'ੈ': "ai"
253
+ 'ੋ': "o"
254
+ 'ੌ': "au"
255
+
256
+ # II. Consonants
257
+ 'ਕ': 'ka'
258
+ 'ਖ': 'kha'
259
+ 'ਖ਼': 'ḳha'
260
+ 'ਗ': 'ga'
261
+ 'ਗ਼': 'g̣a'
262
+ 'ਘ': 'gha'
263
+ 'ਙ': 'ṅa'
264
+ 'ਚ': 'cha'
265
+ 'ਛ': 'chha'
266
+ 'ਜ਼': 'za'
267
+ 'ਜ': 'ja'
268
+ 'ਝ': 'jha'
269
+ 'ਞ': 'ña'
270
+ 'ਟ': 'ṭa'
271
+ 'ਠ': 'ṭha'
272
+ 'ਡ': 'ḍa'
273
+ 'ਢ': 'ḍha'
274
+ 'ਣ': 'ṇa'
275
+ 'ਤ': 'ta'
276
+ 'ਥ': 'tha'
277
+ 'ਦ': 'da'
278
+ 'ਧ': 'dha'
279
+ 'ਨ': 'na'
280
+ 'ਪ': 'pa'
281
+ 'ਫ': 'pha'
282
+ 'ਫ਼': 'fa'
283
+ 'ਬ': 'ba'
284
+ 'ਭ': 'bha'
285
+ 'ਮ': 'ma'
286
+ 'ਯ': 'ya'
287
+ 'ਰ': 'ra'
288
+ 'ਲ': 'la'
289
+ 'ਲੵ': 'la'
290
+ 'ਲ਼': 'la'
291
+ 'ਵ': 'va'
292
+ 'ੜ': 'ṙa'
293
+ 'ਸ': 's'
294
+ 'ਸ਼': 'sha'
295
+ 'ਹ': 'h'
296
+ 'ਂ': 'ṁ'
297
+ 'ੰ': 'ṁ'
298
+
299
+ # Adhik character doubling of the following consonant[Note 5]
300
+ 'ੱਕ': 'kka'
301
+ 'ੱਖ': 'kkha'
302
+ 'ੱਖ਼': 'ḳḳha'
303
+ 'ੱਗ': 'gga'
304
+ 'ੱਗ਼': 'gg̣a'
305
+ 'ੱਘ': 'ggha'
306
+ 'ੱਙ': 'ṅṅa'
307
+ 'ੱਚ': 'ccha'
308
+ 'ੱਛ': 'cchha'
309
+ 'ੱਜ਼': 'zza'
310
+ 'ੱਜ': 'jja'
311
+ 'ੱਝ': 'jjha'
312
+ 'ੱਞ': 'ñña'
313
+ 'ੱਟ': 'ṭṭa'
314
+ 'ੱਠ': 'ṭṭha'
315
+ 'ੱਡ': 'ḍḍa'
316
+ 'ੱਢ': 'ḍḍha'
317
+ 'ੱਣ': 'ṇṇa'
318
+ 'ੱਤ': 'tta'
319
+ 'ੱਥ': 'ttha'
320
+ 'ੱਦ': 'dda'
321
+ 'ੱਧ': 'ddha'
322
+ 'ੱਨ': 'nna'
323
+ 'ੱਪ': 'ppa'
324
+ 'ੱਫ': 'ppha'
325
+ 'ੱਫ਼': 'ffa'
326
+ 'ੱਬ': 'bba'
327
+ 'ੱਭ': 'bbha'
328
+ 'ੱਮ': 'mma'
329
+ 'ੱਯ': 'yya'
330
+ 'ੱਰ': 'rra'
331
+ 'ੱਲ': 'lla'
332
+ 'ੱਲੵ': 'lla'
333
+ 'ੱਲ਼': 'lla'
334
+ 'ੱਵ': 'vva'
335
+ 'ੱੜ': 'ṙṙa'
336
+ 'ੱਸ': 'ss'
337
+ 'ੱਸ਼': 'ssha'
338
+ 'ੱਹ': 'hh'
339
+
340
+ # Adhik character doubling of the following consonant and ends with ੍ [Note 5]
341
+ 'ੱਕ੍': 'kk'
342
+ 'ੱਖ੍': 'kkh'
343
+ 'ੱਖ਼੍': 'ḳḳh'
344
+ 'ੱਗ੍': 'gg'
345
+ 'ੱਗ਼੍': 'gg̣'
346
+ 'ੱਘ੍': 'ggh'
347
+ 'ੱਙ੍': 'ṅṅ'
348
+ 'ੱਚ੍': 'cch'
349
+ 'ੱਛ੍': 'cchh'
350
+ 'ੱਜ਼੍': 'zz'
351
+ 'ੱਜ੍': 'jj'
352
+ 'ੱਝ੍': 'jjh'
353
+ 'ੱਞ੍': 'ññ'
354
+ 'ੱਟ੍': 'ṭṭ'
355
+ 'ੱਠ੍': 'ṭṭh'
356
+ 'ੱਡ੍': 'ḍḍ'
357
+ 'ੱਢ੍': 'ḍḍh'
358
+ 'ੱਣ੍': 'ṇṇ'
359
+ 'ੱਤ੍': 'tt'
360
+ 'ੱਥ੍': 'tth'
361
+ 'ੱਦ੍': 'dd'
362
+ 'ੱਧ੍': 'ddh'
363
+ 'ੱਨ੍': 'nn'
364
+ 'ੱਪ੍': 'pp'
365
+ 'ੱਫ੍': 'pph'
366
+ 'ੱਫ਼੍': 'ff'
367
+ 'ੱਬ੍': 'bb'
368
+ 'ੱਭ੍': 'bbh'
369
+ 'ੱਮ੍': 'mm'
370
+ 'ੱਯ੍': 'yy'
371
+ 'ੱਰ੍': 'rr'
372
+ 'ੱਲ੍': 'll'
373
+ 'ੱਲੵ੍': 'll'
374
+ 'ੱਲ਼੍': 'll'
375
+ 'ੱਵ੍': 'vv'
376
+ 'ੱੜ੍': 'ṙṙ'
377
+ 'ੱਸ੍': 'ss'
378
+ 'ੱਸ਼੍': 'ssh'
379
+ 'ੱਹ੍': 'hh'
380
+
381
+
382
+ # III. Subscript consonant characters
383
+ "੍ਹ": "-h"
384
+ "੍ਵ": "-v"
385
+ "੍ਰ": "-r"
386
+ "੍ਯ": "-y"
387
+
388
+ "੍": ""
389
+ "ੱ": ""
390
+ "਼": ""
391
+
392
+ # digits
393
+ '੦': '0'
394
+ '੧': '1'
395
+ '੨': '2'
396
+ '੩': '3'
397
+ '੪': '4'
398
+ '੫': '5'
399
+ '੬': '6'
400
+ '੭': '7'
401
+ '੮': '8'
402
+ '੯': '9'