tamil 0.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/tamil.rb +519 -0
  3. metadata +45 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: cd7f55b1244ad7f539d0d0211027171f2ea7acdf
4
+ data.tar.gz: 198b5958f12c7e363b3d86ed736ab82a4f496fb4
5
+ SHA512:
6
+ metadata.gz: c1d8bd9c3df20586ffb1f4e56a36e0bb949fe961d365781e770d371c1bd56957b0a208417c457d8f0a20c34278b994790ee6e7e6db570166344bab3b4cd8055b
7
+ data.tar.gz: 9abf039aadfdc6b8a95cc7646ee433ead2ac54d8e252c6df8283e612e7c15431b5bb810a06a4274028e378a2d3527b56531ad6cb5a6aacab47ae5bf0a97554f5
@@ -0,0 +1,519 @@
1
+ # encoding: UTF-8
2
+ # (C) 2015 Muthiah Annamalai <ezhillang@gmail.com>
3
+
4
+ class AssertionError < RuntimeError
5
+ end
6
+
7
+ def assert &block
8
+ raise AssertionError unless yield
9
+ end
10
+
11
+ module Tamil
12
+ ## constants
13
+ TA_ACCENT_LEN = 13 #12 + 1
14
+ TA_AYUDHA_LEN = 1
15
+ TA_UYIR_LEN = 12
16
+ TA_MEI_LEN = 18
17
+ TA_AGARAM_LEN = 18
18
+ TA_SANSKRIT_LEN = 6
19
+ TA_UYIRMEI_LEN = 216
20
+ TA_GRANTHA_UYIRMEI_LEN = 24*12
21
+ TA_LETTERS_LEN = 247 + 6*12 + 22 + 4 - TA_AGARAM_LEN - 4 #323
22
+
23
+ # List of letters you can use
24
+ @@agaram_letters = ["க","ச","ட","த","ப","ற","ஞ","ங","ண","ந","ம","ன","ய","ர","ல","வ","ழ","ள"]
25
+ AGARAM_LETTERS = @@agaram_letters.clone
26
+
27
+ @@uyir_letters = ["அ","ஆ","இ","ஈ","உ","ஊ","எ","ஏ","ஐ","ஒ","ஓ","ஔ"]
28
+ @@ayudha_letter = "ஃ"
29
+
30
+ @@kuril_letters = ["அ", "இ", "உ", "எ", "ஒ"]
31
+ @@nedil_letters = ["ஆ", "ஈ", "ஊ", "ஏ", "ஓ"]
32
+
33
+ @@vallinam_letters = ["க்", "ச்", "ட்", "த்", "ப்", "ற்"]
34
+ @@mellinam_letters = ["ங்", "ஞ்", "ண்", "ந்", "ம்", "ன்"]
35
+ @@idayinam_letters = ["ய்", "ர்", "ல்", "வ்", "ழ்", "ள்"]
36
+
37
+ @@mei_letters = ["க்","ச்","ட்","த்","ப்","ற்","ஞ்","ங்","ண்","ந்","ம்","ன்","ய்","ர்","ல்","வ்","ழ்","ள்" ]
38
+
39
+ @@accent_symbols = ["","ா","ி","ீ","ு","ூ","ெ","ே","ை","ொ","ோ","ௌ","ஃ"]
40
+ @@pulli_symbols = ["்"]
41
+
42
+ @@sanskrit_letters = ["ஶ","ஜ","ஷ", "ஸ","ஹ","க்ஷ"]
43
+ @@sanskrit_mei_letters =["ஶ்","ஜ்","ஷ்", "ஸ்","ஹ்","க்ஷ்"]
44
+
45
+ @@grantha_mei_letters = @@mei_letters.clone()
46
+ @@grantha_mei_letters.concat(@@sanskrit_mei_letters)
47
+
48
+ @@grantha_agaram_letters = @@agaram_letters.clone()
49
+ @@grantha_agaram_letters.concat(@@sanskrit_letters)
50
+
51
+ @@uyirmei_letters = [ "க" ,"கா" ,"கி" ,"கீ" ,"கு" ,"கூ" ,"கெ" ,"கே" ,"கை" ,"கொ" ,"கோ" ,"கௌ" ,
52
+ "ச" ,"சா" ,"சி" ,"சீ" ,"சு" ,"சூ" ,"செ" ,"சே" ,"சை" ,"சொ" ,"சோ" ,"சௌ" ,
53
+ "ட" ,"டா" ,"டி" ,"டீ" ,"டு" ,"டூ" ,"டெ" ,"டே" ,"டை" ,"டொ" ,"டோ" ,"டௌ",
54
+ "த" ,"தா" ,"தி" ,"தீ" ,"து" ,"தூ" ,"தெ" ,"தே" ,"தை" ,"தொ" ,"தோ" ,"தௌ",
55
+ "ப" ,"பா" ,"பி" ,"பீ" ,"பு" ,"பூ" ,"பெ" ,"பே" ,"பை" ,"பொ" ,"போ" ,"பௌ" ,
56
+ "ற" ,"றா" ,"றி" ,"றீ" ,"று" ,"றூ" ,"றெ" ,"றே" ,"றை" ,"றொ" ,"றோ" ,"றௌ",
57
+ "ஞ" ,"ஞா" ,"ஞி" ,"ஞீ" ,"ஞு" ,"ஞூ" ,"ஞெ" ,"ஞே" ,"ஞை" ,"ஞொ" ,"ஞோ" ,"ஞௌ" ,
58
+ "ங" ,"ஙா" ,"ஙி" ,"ஙீ" ,"ஙு" ,"ஙூ" ,"ஙெ" ,"ஙே" ,"ஙை" ,"ஙொ" ,"ஙோ" ,"ஙௌ" ,
59
+ "ண" ,"ணா" ,"ணி" ,"ணீ" ,"ணு" ,"ணூ" ,"ணெ" ,"ணே" ,"ணை" ,"ணொ" ,"ணோ" ,"ணௌ" ,
60
+ "ந" ,"நா" ,"நி" ,"நீ" ,"நு" ,"நூ" ,"நெ" ,"நே" ,"நை" ,"நொ" ,"நோ" ,"நௌ" ,
61
+ "ம" ,"மா" ,"மி" ,"மீ" ,"மு" ,"மூ" ,"மெ" ,"மே" ,"மை" ,"மொ" ,"மோ" ,"மௌ" ,
62
+ "ன" ,"னா" ,"னி" ,"னீ" ,"னு" ,"னூ" ,"னெ" ,"னே" ,"னை" ,"னொ" ,"னோ" ,"னௌ",
63
+ "ய" ,"யா" ,"யி" ,"யீ" ,"யு" ,"யூ" ,"யெ" ,"யே" ,"யை" ,"யொ" ,"யோ" ,"யௌ",
64
+ "ர" ,"ரா" ,"ரி" ,"ரீ" ,"ரு" ,"ரூ" ,"ரெ" ,"ரே" ,"ரை" ,"ரொ" ,"ரோ" ,"ரௌ",
65
+ "ல" ,"லா" ,"லி" ,"லீ" ,"லு" ,"லூ" ,"லெ" ,"லே" ,"லை" ,"லொ" ,"லோ" ,"லௌ" ,
66
+ "வ" ,"வா" ,"வி" ,"வீ" ,"வு" ,"வூ" ,"வெ" ,"வே" ,"வை" ,"வொ" ,"வோ" ,"வௌ" ,
67
+ "ழ" ,"ழா" ,"ழி" ,"ழீ" ,"ழு" ,"ழூ" ,"ழெ" ,"ழே" ,"ழை" ,"ழொ" ,"ழோ" ,"ழௌ" ,
68
+ "ள" ,"ளா" ,"ளி" ,"ளீ" ,"ளு" ,"ளூ" ,"ளெ" ,"ளே" ,"ளை" ,"ளொ" ,"ளோ" ,"ளௌ" ]
69
+
70
+ def Tamil.get_letters(word)
71
+ ## Split a tamil-unicode stream into
72
+ ## tamil characters (individuals).
73
+ """ splits the word into a character-list of tamil/english
74
+ characters present in the stream """
75
+ ta_letters = Array.new()
76
+ not_empty = false
77
+ wlen = word.length()
78
+ idx = 0
79
+ while (idx < wlen)
80
+ c = word[idx]
81
+ if @@uyir_letters.include?(c) or c == @@ayudha_letter
82
+ ta_letters.insert(-1,c)
83
+ not_empty = true
84
+ elsif @@grantha_agaram_letters.include?(c)
85
+ ta_letters.insert(-1,c)
86
+ not_empty = true
87
+ elsif @@accent_symbols.include?(c)
88
+ if not not_empty
89
+ # odd situation
90
+ ta_letters.insert(-1,c)
91
+ not_empty = true
92
+ else
93
+ ta_letters[-1] += c
94
+ end
95
+ else
96
+ if c < "\u00FF"
97
+ ta_letters.insert(-1, c )
98
+ else
99
+ if not_empty
100
+ ta_letters[-1]+= c
101
+ else
102
+ ta_letters.insert(-1,c)
103
+ not_empty = true
104
+ end
105
+ end
106
+ end
107
+ idx = idx + 1
108
+ end
109
+ return ta_letters
110
+ end
111
+
112
+ ## length of the definitions
113
+ def Tamil.accent_len( )
114
+ return Tamil::TA_ACCENT_LEN ## 13 = 12 + 1
115
+ end
116
+
117
+ def Tamil.ayudha_len( )
118
+ return Tamil::TA_AYUDHA_LEN ## 1
119
+ end
120
+
121
+ def Tamil.uyir_len( )
122
+ return Tamil::TA_UYIR_LEN ##12
123
+ end
124
+
125
+ def Tamil.mei_len( )
126
+ return Tamil::TA_MEI_LEN ##18
127
+ end
128
+
129
+ def Tamil.agaram_len( )
130
+ assert { @@agaram_letters.length == Tamil::TA_AGARAM_LEN }
131
+ return Tamil::TA_AGARAM_LEN ##18
132
+ end
133
+
134
+ def Tamil.uyirmei_len( )
135
+ return Tamil::TA_UYIRMEI_LEN ##216
136
+ end
137
+
138
+ def Tamil.tamil_len( )
139
+ return @@tamil_letters.length
140
+ end
141
+
142
+ ## access the letters
143
+ def Tamil.uyir( idx )
144
+ assert { ( idx >= 0 ) and ( idx < Tamil.uyir_len() ) }
145
+ return Tamil::uyir_letters[idx]
146
+ end
147
+
148
+ def Tamil.agaram( idx )
149
+ assert {( idx >= 0) and ( idx < Tamil.agaram_len() )}
150
+ return @@agaram_letters[idx]
151
+ end
152
+
153
+ def Tamil.mei( idx )
154
+ assert {( idx >= 0 ) and ( idx < Tamil.mei_len() )}
155
+ return @@mei_letters[idx]
156
+ end
157
+
158
+ def Tamil.uyirmei( idx )
159
+ assert {( idx >= 0 ) and ( idx < Tamil.uyirmei_len() ) }
160
+ return @@uyirmei_letters[idx]
161
+ end
162
+
163
+ end
164
+
165
+ # ## total tamil letters in use, including sanskrit letters
166
+ # tamil_letters = [
167
+
168
+ # ## /* Uyir */
169
+ # "அ","ஆ","இ", "ஈ","உ","ஊ","எ","ஏ","ஐ","ஒ","ஓ","ஔ",
170
+
171
+ # ##/* Ayuda Ezhuthu */
172
+ # "ஃ",
173
+
174
+ # ## /* Mei */
175
+ # "க்","ச்","ட்","த்","ப்","ற்","ஞ்","ங்","ண்","ந்","ம்","ன்","ய்","ர்","ல்","வ்","ழ்","ள்",
176
+
177
+ # ## /* Agaram */
178
+ # ## "க","ச","ட","த","ப","ற","ஞ","ங","ண","ந","ம","ன","ய","ர","ல","வ","ழ","ள",
179
+
180
+ # ## /* Sanskrit (Vada Mozhi) */
181
+ # ## "ஜ","ஷ", "ஸ","ஹ",
182
+
183
+ # ##/* Sanskrit (Mei) */
184
+ # "ஜ்","ஷ்", "ஸ்","ஹ்",
185
+
186
+ # ## /* Uyir Mei */
187
+ # "க" ,"கா" ,"கி" ,"கீ" ,"கு" ,"கூ" ,"கெ" ,"கே" ,"கை" ,"கொ" ,"கோ" ,"கௌ"
188
+ # ,"ச" ,"சா" ,"சி" ,"சீ" ,"சு" ,"சூ" ,"செ" ,"சே" ,"சை" ,"சொ" ,"சோ" ,"சௌ"
189
+ # ,"ட" ,"டா" ,"டி" ,"டீ" ,"டு" ,"டூ" ,"டெ" ,"டே" ,"டை" ,"டொ" ,"டோ" ,"டௌ"
190
+ # ,"த" ,"தா" ,"தி" ,"தீ" ,"து" ,"தூ" ,"தெ" ,"தே" ,"தை" ,"தொ" ,"தோ" ,"தௌ"
191
+ # ,"ப" ,"பா" ,"பி" ,"பீ" ,"பு" ,"பூ" ,"பெ" ,"பே" ,"பை" ,"பொ" ,"போ" ,"பௌ"
192
+ # ,"ற" ,"றா" ,"றி" ,"றீ" ,"று" ,"றூ" ,"றெ" ,"றே" ,"றை" ,"றொ" ,"றோ" ,"றௌ"
193
+ # ,"ஞ" ,"ஞா" ,"ஞி" ,"ஞீ" ,"ஞு" ,"ஞூ" ,"ஞெ" ,"ஞே" ,"ஞை" ,"ஞொ" ,"ஞோ" ,"ஞௌ"
194
+ # ,"ங" ,"ஙா" ,"ஙி" ,"ஙீ" ,"ஙு" ,"ஙூ" ,"ஙெ" ,"ஙே" ,"ஙை" ,"ஙொ" ,"ஙோ" ,"ஙௌ"
195
+ # ,"ண" ,"ணா" ,"ணி" ,"ணீ" ,"ணு" ,"ணூ" ,"ணெ" ,"ணே" ,"ணை" ,"ணொ" ,"ணோ" ,"ணௌ"
196
+ # ,"ந" ,"நா" ,"நி" ,"நீ" ,"நு" ,"நூ" ,"நெ" ,"நே" ,"நை" ,"நொ" ,"நோ" ,"நௌ"
197
+ # ,"ம" ,"மா" ,"மி" ,"மீ" ,"மு" ,"மூ" ,"மெ" ,"மே" ,"மை" ,"மொ" ,"மோ" ,"மௌ"
198
+ # ,"ன" ,"னா" ,"னி" ,"னீ" ,"னு" ,"னூ" ,"னெ" ,"னே" ,"னை" ,"னொ" ,"னோ" ,"னௌ"
199
+ # ,"ய" ,"யா" ,"யி" ,"யீ" ,"யு" ,"யூ" ,"யெ" ,"யே" ,"யை" ,"யொ" ,"யோ" ,"யௌ"
200
+ # ,"ர" ,"ரா" ,"ரி" ,"ரீ" ,"ரு" ,"ரூ" ,"ரெ" ,"ரே" ,"ரை" ,"ரொ" ,"ரோ" ,"ரௌ"
201
+ # ,"ல" ,"லா" ,"லி" ,"லீ" ,"லு" ,"லூ" ,"லெ" ,"லே" ,"லை" ,"லொ" ,"லோ" ,"லௌ"
202
+ # ,"வ" ,"வா" ,"வி" ,"வீ" ,"வு" ,"வூ" ,"வெ" ,"வே" ,"வை" ,"வொ" ,"வோ" ,"வௌ"
203
+ # ,"ழ" ,"ழா" ,"ழி" ,"ழீ" ,"ழு" ,"ழூ" ,"ழெ" ,"ழே" ,"ழை" ,"ழொ" ,"ழோ" ,"ழௌ"
204
+ # ,"ள" ,"ளா" ,"ளி" ,"ளீ" ,"ளு" ,"ளூ" ,"ளெ" ,"ளே" ,"ளை" ,"ளொ" ,"ளோ" ,"ளௌ"
205
+
206
+ # ##/* Sanskrit Uyir-Mei */
207
+ # ,"ஶ", "ஶா", "ஶி", "ஶீ", "ஶு", "ஶூ", "ஶெ", "ஶே", "ஶை", "ஶொ", "ஶோ", "ஶௌ"
208
+ # ,"ஜ" ,"ஜா" ,"ஜி" ,"ஜீ" ,"ஜு" ,"ஜூ" ,"ஜெ" ,"ஜே" ,"ஜை" ,"ஜொ" ,"ஜோ" ,"ஜௌ"
209
+ # ,"ஷ" ,"ஷா" ,"ஷி" ,"ஷீ" ,"ஷு" ,"ஷூ" ,"ஷெ" ,"ஷே" ,"ஷை" ,"ஷொ" ,"ஷோ" ,"ஷௌ"
210
+ # ,"ஸ" ,"ஸா" ,"ஸி" ,"ஸீ" ,"ஸு" ,"ஸூ" ,"ஸெ" ,"ஸே" ,"ஸை" ,"ஸொ" ,"ஸோ" ,"ஸௌ"
211
+ # ,"ஹ" ,"ஹா" ,"ஹி" ,"ஹீ" ,"ஹு" ,"ஹூ" ,"ஹெ" ,"ஹே" ,"ஹை" ,"ஹொ" ,"ஹோ" ,"ஹௌ"
212
+ # ,"க்ஷ" ,"க்ஷா" ,"க்ஷி" ,"க்ஷீ" ,"க்ஷு" ,"க்ஷூ" ,"க்ஷெ" ,"க்ஷே" ,"க்ஷை" ,"க்ஷொ" ,"க்ஷோ" ,"க்ஷௌ" ]
213
+
214
+ # grantha_uyirmei_letters = tamil_letters[tamil_letters.index("கா")-1:].clone()
215
+
216
+
217
+ # def uyirmei_constructed( mei_idx, uyir_idx):
218
+ # """ construct uyirmei letter give mei index and uyir index """
219
+ # idx,idy = mei_idx,uyir_idx
220
+ # assert ( idy >= 0 and idy < uyir_len() )
221
+ # assert ( idx >= 0 and idx < mei_len() )
222
+ # return agaram_letters[mei_idx]+accent_symbols[uyir_idx]
223
+
224
+ # def tamil( idx ):
225
+ # """ retrieve Tamil letter at canonical index from array utf8.tamil_letters """
226
+ # assert ( idx >= 0 and idx < tamil_len() )
227
+ # return tamil_letters[idx]
228
+
229
+ # # companion function to @tamil()
230
+ # def getidx(letter):
231
+ # for itr in range(0,tamil_len()):
232
+ # if tamil_letters[itr] == letter:
233
+ # return itr
234
+ # raise Exception("Cannot find letter in Tamil arichuvadi")
235
+
236
+ # ## useful part of the API:
237
+ # def istamil_prefix( word ):
238
+ # """ check if the given word has a tamil prefix. Returns
239
+ # either a True/False flag """
240
+ # for letter in tamil_letters:
241
+ # if ( word.find(letter) == 0 ):
242
+ # return True
243
+ # return False
244
+
245
+ # if not PYTHON3:
246
+ # is_tamil_unicode_predicate = lambda x: x >= unichr(2946) and x <= unichr(3066)
247
+ # else:
248
+ # is_tamil_unicode_predicate = lambda x: x >= chr(2946) and x <= chr(3066)
249
+ # def is_tamil_unicode( sequence ):
250
+ # # Ref: languagetool-office-extension/src/main/java/org/languagetool/openoffice/TamilDetector.java
251
+ # if type(sequence) is list:
252
+ # return list(map( is_tamil_unicode_predicate, sequence ))
253
+ # if len(sequence) > 1:
254
+ # return list(map( is_tamil_unicode_predicate, get_letters(sequence) ))
255
+ # return is_tamil_unicode_predicate( sequence )
256
+
257
+ # def all_tamil( word_in ):
258
+ # """ predicate checks if all letters of the input word are Tamil letters """
259
+ # if isinstance(word_in,list):
260
+ # word = word_in
261
+ # else:
262
+ # word = get_letters( word_in )
263
+ # return all( [(letter in tamil_letters) for letter in word] )
264
+
265
+ # def has_tamil( word ):
266
+ # """check if the word has any occurance of any tamil letter """
267
+ # # list comprehension is not necessary - we bail at earliest
268
+ # for letters in tamil_letters:
269
+ # if ( word.find(letters) >= 0 ):
270
+ # return True
271
+ # return False
272
+
273
+ # def istamil( tchar ):
274
+ # """ check if the letter tchar is prefix of
275
+ # any of tamil-letter. It suggests we have a tamil identifier"""
276
+ # if (tchar in tamil_letters):
277
+ # return True
278
+ # return False
279
+
280
+ # def istamil_alnum( tchar ):
281
+ # """ check if the character is alphanumeric, or tamil.
282
+ # This saves time from running through istamil() check. """
283
+ # return ( tchar.isalnum( ) or istamil( tchar ) )
284
+
285
+ # def reverse_word( word ):
286
+ # """ reverse a Tamil word according to letters not unicode-points """
287
+ # op = get_letters( word )
288
+ # op.reverse()
289
+ # return "".join(op)
290
+
291
+ # ## find out if the letters like, "பொ" are written in canonical "ப + ொ"" graphemes then
292
+ # ## return True. If they are written like "ப + ெ + ா" then return False on first occurrence
293
+ # def is_normalized( text ):
294
+ # TLEN,idx = len(text),1
295
+ # kaal = "ா"
296
+ # sinna_kombu, periya_kombu = "ெ", "ே"
297
+ # kombugal = [sinna_kombu, periya_kombu]
298
+
299
+ # def predicate( last_letter, prev_letter):
300
+ # if ((last_letter == kaal) and (prev_letter in kombugal)):
301
+ # return True
302
+ # return False
303
+ # if TLEN < 2:
304
+ # return True
305
+ # elif TLEN == 2:
306
+ # if predicate( text[-1], text[-2] ):
307
+ # return False
308
+ # return True
309
+ # a = text[0]
310
+ # b = text[1]
311
+ # assert idx == 1
312
+ # while (idx < TLEN):
313
+ # if predicate(b,a):
314
+ # return False
315
+ # a=b
316
+ # idx = idx + 1
317
+ # if idx < TLEN:
318
+ # b=text[idx]
319
+ # # reached end and nothing tripped us
320
+ # return True
321
+
322
+ # def _make_set(args):
323
+ # if PYTHON3:
324
+ # return frozenset(args)
325
+ # return set(args)
326
+
327
+ # grantha_agaram_set = _make_set(grantha_agaram_letters)
328
+ # accent_symbol_set = _make_set(accent_symbols)
329
+ # uyir_letter_set = _make_set(uyir_letters)
330
+
331
+
332
+ # _all_symbols = copy( accent_symbols )
333
+ # _all_symbols.extend( pulli_symbols )
334
+ # all_symbol_set = _make_set(_all_symbols)
335
+
336
+ # # same as get_letters but use as iterable
337
+ # def get_letters_iterable( word ):
338
+ # """ splits the word into a character-list of tamil/english
339
+ # characters present in the stream """
340
+ # WLEN,idx = len(word),0
341
+
342
+ # while (idx < WLEN):
343
+ # c = word[idx]
344
+ # #print(idx,hex(ord(c)),len(ta_letters))
345
+ # if c in uyir_letter_set or c == ayudha_letter:
346
+ # idx = idx + 1
347
+ # yield c
348
+ # elif c in grantha_agaram_set:
349
+ # if idx + 1 < WLEN and word[idx+1] in all_symbol_set:
350
+ # c2 = word[idx+1]
351
+ # idx = idx + 2
352
+ # yield (c + c2)
353
+ # else:
354
+ # idx = idx + 1
355
+ # yield c
356
+ # else:
357
+ # idx = idx + 1
358
+ # yield c
359
+ # raise StopIteration
360
+
361
+ # def get_words(letters,tamil_only=False):
362
+ # return [ word for word in get_words_iterable(letters,tamil_only) ]
363
+
364
+ # def get_words_iterable( letters, tamil_only=False ):
365
+ # """ given a list of UTF-8 letters section them into words, grouping them at spaces """
366
+
367
+ # # correct algorithm for get-tamil-words
368
+ # buf = []
369
+ # for idx,let in enumerate(letters):
370
+ # if not let.isspace():
371
+ # if istamil(let) or (not tamil_only):
372
+ # buf.append( let )
373
+ # else:
374
+ # if len(buf) > 0:
375
+ # yield "".join( buf )
376
+ # buf = []
377
+ # if len(buf) > 0:
378
+ # yield "".join(buf)
379
+
380
+ # def get_tamil_words( letters ):
381
+ # """ reverse a Tamil word according to letters, not unicode-points """
382
+ # return [word for word in get_words_iterable( letters, tamil_only = True )]
383
+
384
+ # if PYTHON3:
385
+ # def cmp( x, y):
386
+ # if x == y:
387
+ # return 0
388
+ # if x > y:
389
+ # return 1
390
+ # return -1
391
+
392
+ # # answer if word_a ranks ahead of, or at same level, as word_b in a Tamil dictionary order...
393
+ # # for use with Python : if a > 0
394
+ # def compare_words_lexicographic( word_a, word_b ):
395
+ # """ compare words in Tamil lexicographic order """
396
+ # # sanity check for words to be all Tamil
397
+ # if ( not all_tamil(word_a) ) or (not all_tamil(word_b)) :
398
+ # print("## ")
399
+ # print(word_a)
400
+ # print(word_b)
401
+ # print("Both operands need to be Tamil words")
402
+ # La = len(word_a)
403
+ # Lb = len(word_b)
404
+ # all_TA_letters = "".join(tamil_letters)
405
+ # for itr in range(0,min(La,Lb)):
406
+ # pos1 = all_TA_letters.find( word_a[itr] )
407
+ # pos2 = all_TA_letters.find( word_b[itr] )
408
+
409
+ # if pos1 != pos2 :
410
+ # #print not( pos1 > pos2), pos1, pos2
411
+ # return cmp(pos1, pos2)
412
+
413
+ # # result depends on if La is shorter than Lb, or 0 if La == Lb i.e. cmp
414
+ # return cmp(La,Lb)
415
+
416
+ # # return a list of ordered-pairs containing positions
417
+ # # that are common in word_a, and word_b; e.g.
418
+ # # தேடுக x தடங்கல் -> one common letter க [(2,3)]
419
+ # # சொல் x தேடுக -> no common letters []
420
+ # def word_intersection( word_a, word_b ):
421
+ # """ return a list of tuples where word_a, word_b intersect """
422
+ # positions = []
423
+ # word_a_letters = get_letters( word_a )
424
+ # word_b_letters = get_letters( word_b )
425
+ # for idx,wa in enumerate(word_a_letters):
426
+ # for idy,wb in enumerate(word_b_letters):
427
+ # if ( wa == wb ):
428
+ # positions.append( (idx, idy) )
429
+ # return positions
430
+
431
+ # def splitMeiUyir(uyirmei_char):
432
+ # """
433
+ # This function split uyirmei compound character into mei + uyir characters
434
+ # and returns in tuple.
435
+
436
+ # Input : It must be unicode tamil char.
437
+
438
+ # Written By : Arulalan.T
439
+ # Date : 22.09.2014
440
+
441
+ # """
442
+
443
+ # if not isinstance(uyirmei_char, PYTHON3 and str or unicode):
444
+ # raise ValueError("Passed input letter '%s' must be unicode, \
445
+ # not just string" % uyirmei_char)
446
+
447
+ # if uyirmei_char in mei_letters:
448
+ # return uyirmei_char
449
+
450
+ # if uyirmei_char in uyir_letters:
451
+ # return uyirmei_char
452
+
453
+ # if uyirmei_char not in grantha_uyirmei_letters:
454
+ # raise ValueError("Passed input letter '%s' is not tamil letter" % uyirmei_char)
455
+
456
+ # idx = grantha_uyirmei_letters.index(uyirmei_char)
457
+ # uyiridx = idx % 12
458
+ # meiidx = int((idx - uyiridx)/ 12)
459
+ # return (grantha_mei_letters[meiidx], uyir_letters[uyiridx])
460
+ # # end of def splitMeiUyir(uyirmei_char):
461
+
462
+ # def joinMeiUyir(mei_char, uyir_char):
463
+ # """
464
+ # This function join mei character and uyir character, and retuns as
465
+ # compound uyirmei unicode character.
466
+
467
+ # Inputs:
468
+ # mei_char : It must be unicode tamil mei char.
469
+ # uyir_char : It must be unicode tamil uyir char.
470
+
471
+ # Written By : Arulalan.T
472
+ # Date : 22.09.2014
473
+ # """
474
+ # if not isinstance(mei_char, PYTHON3 and str or unicode):
475
+ # raise ValueError("Passed input mei character '%s' must be unicode, \
476
+ # not just string" % mei_char)
477
+ # if not isinstance(uyir_char, PYTHON3 and str or unicode):
478
+ # raise ValueError("Passed input uyir character '%s' must be unicode, \
479
+ # not just string" % uyir_char)
480
+ # if mei_char not in grantha_mei_letters:
481
+ # raise ValueError("Passed input character '%s' is not a"
482
+ # "tamil mei character" % mei_char)
483
+ # if uyir_char not in uyir_letters:
484
+ # raise ValueError("Passed input character '%s' is not a"
485
+ # "tamil uyir character" % uyir_char)
486
+ # uyiridx = uyir_letters.index(uyir_char)
487
+ # meiidx = grantha_mei_letters.index(mei_char)
488
+ # # calculate uyirmei index
489
+ # uyirmeiidx = meiidx*12 + uyiridx
490
+ # return grantha_uyirmei_letters[uyirmeiidx]
491
+
492
+ # Tamil Letters
493
+ # அ ஆ இ ஈ உ ஊ எ ஏ ஐ ஒ ஓ ஔ ஃ
494
+ # க் ச் ட் த் ப் ற் ஞ் ங் ண் ந் ம் ன் ய் ர் ல் வ் ழ் ள் ஜ் ஷ் ஸ் ஹ்
495
+ # க ச ட த ப ற ஞ ங ண ந ம ன ய ர ல வ ழ ள ஜ ஷ ஸ ஹ
496
+ # க கா கி கீ கு கூ கெ கே கை கௌ
497
+ # ச சா சி சீ சு சூ செ சே சை சொ சோ சௌ
498
+ # ட டா டி டீ டு டூ டெ டே டை டொ டோ டௌ
499
+ # த தா தி தீ து தூ தெ தே தை தொ தோ தௌ
500
+ # ப பா பி பீ பு பூ பெ பே பை பொ போ பௌ
501
+ # ற றா றி றீ று றூ றெ றே றை றொ றோ றௌ
502
+ # ஞ ஞா ஞி ஞீ ஞு ஞூ ஞெ ஞே ஞை ஞொ ஞோ ஞௌ
503
+ # ங ஙா ஙி ஙீ ஙு ஙூ ஙெ ஙே ஙை ஙொ ஙோ ஙௌ
504
+ # ண ணா ணி ணீ ணு ணூ ணெ ணே ணை ணொ ணோ ணௌ
505
+ # ந நா நி நீ நு நூ நெ நே நை நொ நோ நௌ
506
+ # ம மா மி மீ மு மூ மெ மே மை மொ மோ மௌ
507
+ # ன னா னி னீ னு னூ னெ னே னை னொ னோ னௌ
508
+ # ய யா யி யீ யு யூ யெ யே யை யொ யோ யௌ
509
+ # ர ரா ரி ரீ ரு ரூ ரெ ரே ரை ரொ ரோ ரௌ
510
+ # ல லா லி லீ லு லூ லெ லே லை லொ லோ லௌ
511
+ # வ வா வி வீ வு வூ வெ வே வை வொ வோ வௌ
512
+ # ழ ழா ழி ழீ ழு ழூ ழெ ழே ழை ழொ ழோ ழௌ
513
+ # ள ளா ளி ளீ ளு ளூ ளெ ளே ளை ளொ ளோ ளௌ
514
+ # ஶ ஶா ஶி ஶீ ஶு ஶூ ஶெ ஶே ஶை ஶொ ஶோ ஶௌ
515
+ # ஜ ஜா ஜி ஜீ ஜு ஜூ ஜெ ஜே ஜை ஜொ ஜோ ஜௌ
516
+ # ஷ ஷா ஷி ஷீ ஷு ஷூ ஷெ ஷே ஷை ஷொ ஷோ ஷௌ
517
+ # ஸ ஸா ஸி ஸீ ஸு ஸூ ஸெ ஸே ஸை ஸொ ஸோ ஸௌ
518
+ # ஹ ஹா ஹி ஹீ ஹு ஹூ ஹெ ஹே ஹை ஹொ ஹோ ஹௌ
519
+ # க்ஷ க்ஷா க்ஷி க்ஷீ க்ஷு க்ஷூ க்ஷெ க்ஷே க்ஷை க்ஷொ க்ஷோ க்ஷௌ
metadata ADDED
@@ -0,0 +1,45 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tamil
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.11'
5
+ platform: ruby
6
+ authors:
7
+ - Muthu Annamalai
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-05-19 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: open-tamil project provides a ruby gem 'tamil' for working with Tamil
14
+ language text and NLP
15
+ email: ezhillang@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - lib/tamil.rb
21
+ homepage: http://ezhillang.org
22
+ licenses:
23
+ - MIT
24
+ metadata: {}
25
+ post_install_message:
26
+ rdoc_options: []
27
+ require_paths:
28
+ - lib
29
+ required_ruby_version: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ required_rubygems_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - '>='
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ requirements: []
40
+ rubyforge_project:
41
+ rubygems_version: 2.0.14
42
+ signing_key:
43
+ specification_version: 4
44
+ summary: open-tamil project provides a ruby gem 'tamil'
45
+ test_files: []