language 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,10 @@
1
+ = Release History
2
+
3
+ == 0.6.0 / 2010-05-29
4
+
5
+ This is the first release of Language, however the code was previously
6
+ released with the English project. Hence the current version of
7
+ Language matches the present version of English. Language combines
8
+ all the features previously part of English that are language
9
+ netural or multi-lingual. It threfore provide a dependency for
10
+ the English library.
data/LICENSE ADDED
@@ -0,0 +1,23 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2009 Thomas Sawyer
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
22
+
23
+
data/PROFILE ADDED
@@ -0,0 +1,23 @@
1
+ ---
2
+ title : Language
3
+ suite : rubyworks
4
+ summary: Language Support Library
5
+ license: MIT
6
+ authors: Thomas Sawyer
7
+ created: 2007-08-01
8
+
9
+ description:
10
+ Language is a support library for other langauge libraries.
11
+ While some of it's contents are prefectly usable on there own,
12
+ most are generally intended to be subclassed and extended by
13
+ specific language modules, such as English.
14
+
15
+ resources:
16
+ homepage : http://rubyworks.github.com/language
17
+ development : http://github.com/rubyworks/language
18
+ respository : git://github.com/rubyworks/language.git
19
+ subscribe : rubyworks-mailinglist+subscribe@googlegroups.com
20
+
21
+ copyright:
22
+ COpyright (c) 2007 Thomas Sawyer
23
+
@@ -0,0 +1,38 @@
1
+ = Language
2
+
3
+ * home: http://rubyworks.github.com/language
4
+ * work: http://github.com/rubyworks/language
5
+
6
+ Language is a support library for other langauge libraries.
7
+ While some of it's contents are prefectly usable on there own,
8
+ most are generally intended to be subclassed and extended by
9
+ specific language modules, such as English.
10
+
11
+
12
+ == SYNOPSIS
13
+
14
+ require 'language'
15
+
16
+ "How many words?".words #=> ['How', 'many', 'words']
17
+
18
+
19
+ == INSTALLATION
20
+
21
+ The usual Rubygems way:
22
+
23
+ $ gem install language
24
+
25
+
26
+ == COPYING
27
+
28
+ (MIT License)
29
+
30
+ Copyright (c) 2010 Thomas Sawyer
31
+
32
+ English is distributed under the terms of the MIT license.
33
+
34
+ See LICENCE for details.
35
+
36
+ Some libraries are subtantial derivatives of other persons
37
+ work. Fully copyright and licensing information is given
38
+ for those in the corresponding source files.
data/REQUIRE ADDED
@@ -0,0 +1,2 @@
1
+ development:
2
+ - syckle
data/VERSION ADDED
@@ -0,0 +1,5 @@
1
+ name : language
2
+ major: 0
3
+ minor: 6
4
+ patch: 0
5
+ date : 2010-05-29
@@ -0,0 +1,5 @@
1
+ require 'language/class'
2
+ require 'language/censor'
3
+ require 'language/words'
4
+ require 'language/mixin'
5
+
@@ -0,0 +1,97 @@
1
+ require 'language/class'
2
+
3
+ class Language
4
+
5
+ # = Censor
6
+ #
7
+ # This class allows one to define a resuable text filter.
8
+ # This is useful for removing or replacing curse words or
9
+ # senstive information from user input.
10
+
11
+ class Censor
12
+
13
+ # Default censor list.
14
+ def self.default_words
15
+ []
16
+ end
17
+
18
+ # Abritraty rules.
19
+ attr :rules
20
+
21
+ # Word-oriented rules.
22
+ attr :word_rules
23
+
24
+ # New Censor object.
25
+ #
26
+ def initialize()
27
+ @rules = []
28
+ @word_rules = []
29
+
30
+ self.class.default_words.each do |word|
31
+ word_rule(word)
32
+ end
33
+ end
34
+
35
+ # Create new rule. A rule consists of a string or regexp
36
+ # to match against.
37
+ #
38
+ # NOTE: The rules must be applied in order! So we cannot
39
+ # use a hash because the ordering is not guaranteed. So
40
+ # an array is used instead.
41
+ #
42
+ def rule(match, &edit)
43
+ edit = lambda{''} unless edit
44
+ @rules << [match, edit]
45
+ end
46
+
47
+ # Rules that apply only to words. This takes the regular
48
+ # expression and add word boundry matches to either side.
49
+ #
50
+ # filter.word_rule(/damn/){ |w| 'darn' }
51
+ #
52
+ # Is equivalent to teh regular rule:
53
+ #
54
+ # filter.rule(/\bdamn\b/){ |w| 'darn' }
55
+ #
56
+ def word_rule(match, &edit)
57
+ edit = lambda{''} unless edit
58
+ @word_rules << [/\b#{match}\b/, edit]
59
+ end
60
+
61
+ # Apply the set of rules (regular expression matches) to
62
+ # a string.
63
+ #
64
+ def filter(string)
65
+ rewritten_string = string.dup
66
+ rules.each do |match,edit|
67
+ rewritten_string.gsub!(match,edit)
68
+ end
69
+ return (rewritten_string or string)
70
+ end
71
+
72
+ alias_method :apply, :filter
73
+
74
+ # Is the string clear of any matching rules?
75
+ #
76
+ # Note that running a filter does not necessarily clear a
77
+ # a string of all matches, since the filter could apply
78
+ # edits that would also match the filter expressions.
79
+ #
80
+ def censored?(string)
81
+ case string
82
+ when *matches
83
+ false
84
+ else
85
+ true
86
+ end
87
+ end
88
+
89
+ #
90
+ #
91
+ def matches
92
+ rules.collect{ |match, modify| match }
93
+ end
94
+
95
+ end
96
+
97
+ end
@@ -0,0 +1,159 @@
1
+ class Language
2
+
3
+ #
4
+ def self.abbreviation
5
+ 'lang'
6
+ end
7
+
8
+ #
9
+ def self.default
10
+ @default || abbreviation
11
+ end
12
+
13
+ #
14
+ def self.default=(lang)
15
+ @default = lang
16
+ end
17
+
18
+ #
19
+ def self.current
20
+ @current || default
21
+ end
22
+
23
+ #
24
+ def self.current=(lang)
25
+ @current = lang
26
+ end
27
+
28
+ #
29
+ def self.instance(string)
30
+ @cache ||= {}
31
+ @cache[string.object_id] = new(string)
32
+ end
33
+
34
+ #
35
+ def initialize(subject)
36
+ @self = subject
37
+ end
38
+
39
+ end
40
+
41
+ class String
42
+ # Higher-order function to invoke Language functions.
43
+ def lang
44
+ Language.instance(self)
45
+ end
46
+ end
47
+
48
+ class Array
49
+ # Higher-order function to invoke Language functions.
50
+ def lang
51
+ Language.instance(self)
52
+ end
53
+ end
54
+
55
+ class Integer
56
+ # Higher-order function to invoke Language functions.
57
+ def lang
58
+ Language.instance(self)
59
+ end
60
+ end
61
+
62
+
63
+
64
+
65
+ =begin
66
+ module Language
67
+ extend self
68
+
69
+ # Subclass this in your specific language modules.
70
+ #
71
+ # class English::String < Language::String
72
+ #
73
+ class String < ::String
74
+
75
+ #
76
+ def self.language
77
+ Language
78
+ end
79
+
80
+ #
81
+ def self.instance(string)
82
+ @cache ||= {}
83
+ @cache[string.object_id] = new(string)
84
+ end
85
+
86
+ #
87
+ def initialize(string)
88
+ super()
89
+ replace(string)
90
+ end
91
+
92
+ def language
93
+ @_language ||= self.class.language
94
+ end
95
+
96
+ end
97
+
98
+ # TODO: We can't actually subclass Integer.
99
+ # But we can fake it. However we need to sublass
100
+ # it just so #is_a? works. However subclassing it causes
101
+ # the .new method not to exist, how to fix?
102
+ #
103
+ class Integer #< ::Integer
104
+ instance_methods{ |m| private m unless /^__/ =~ m.to_s }
105
+
106
+ #
107
+ def self.language
108
+ Language
109
+ end
110
+
111
+ #
112
+ def self.instance(integer)
113
+ @cache ||= {}
114
+ @cache[integer] = new(integer)
115
+ end
116
+
117
+ #
118
+ def initialize(integer)
119
+ @integer = integer
120
+ end
121
+
122
+ #
123
+ def to_i
124
+ @integer
125
+ end
126
+
127
+ #
128
+ def method_missing(s,*a,&b)
129
+ @integer.__send__(s,*a,&b)
130
+ end
131
+
132
+ #
133
+ def language
134
+ @_language ||= self.class.language
135
+ end
136
+ end
137
+
138
+ #
139
+ class Array < ::Array
140
+
141
+ #
142
+ def self.language
143
+ Language
144
+ end
145
+
146
+ #
147
+ def self.instance(array)
148
+ @cache ||= {}
149
+ @cache[array.object_id] = new(array)
150
+ end
151
+
152
+ def language
153
+ @_language ||= self.class.language
154
+ end
155
+ end
156
+
157
+ end
158
+ =end
159
+
@@ -0,0 +1,43 @@
1
+ require 'language/class'
2
+
3
+ class Language
4
+
5
+ # A hash of International 2- and 3-letter ISO639-1 and ISO639-2 language codes.
6
+ module Codes
7
+
8
+ # Hash of ISO639 2--letter language codes
9
+ ISO639_1 = {}
10
+
11
+ # Hash of ISO639 3-letter language codes
12
+ ISO639_2 = {}
13
+
14
+ file = File.join(File.dirname(__FILE__), 'codes_iso639.txt')
15
+
16
+ File.readlines(file).each do |line|
17
+ next if /^#/ =~ line
18
+
19
+ codes3, codes2, desc = line[0,7].strip, line[9,6].strip, line[15...-1].strip
20
+
21
+ codes3 = codes3.split('/')
22
+ codes2 = codes2.split('/')
23
+
24
+ codes2.each do |code|
25
+ if ISO639_1.key?(code)
26
+ raise "Duplicate language code #{code}"
27
+ end
28
+ ISO639_1[code] = desc
29
+ end
30
+
31
+ codes3.each do |code|
32
+ if ISO639_2.key?(code)
33
+ raise "Duplicate language code #{code}"
34
+ end
35
+ ISO639_2[code] = desc
36
+ end
37
+
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+
@@ -0,0 +1,404 @@
1
+ abk ab Abkhazian
2
+ ace Achinese
3
+ ach Acoli
4
+ ada Adangme
5
+ aar aa Afar
6
+ afh Afrihili
7
+ afr af Afrikaans
8
+ afa Afro-Asiatic (Other)
9
+ aka Akan
10
+ akk Akkadian
11
+ alb/sqi sq Albanian
12
+ ale Aleut
13
+ alg Algonquian languages
14
+ tut Altaic (Other)
15
+ amh am Amharic
16
+ apa Apache languages
17
+ ara ar Arabic
18
+ arc Aramaic
19
+ arp Arapaho
20
+ arn Araucanian
21
+ arw Arawak
22
+ arm/hye hy Armenian
23
+ art Artificial (Other)
24
+ asm as Assamese
25
+ ath Athapascan languages
26
+ map Austronesian (Other)
27
+ ava Avaric
28
+ ave Avestan
29
+ awa Awadhi
30
+ aym ay Aymara
31
+ aze az Azerbaijani
32
+ nah Aztec
33
+ ban Balinese
34
+ bat Baltic (Other)
35
+ bal Baluchi
36
+ bam Bambara
37
+ bai Bamileke languages
38
+ bad Banda
39
+ bnt Bantu (Other)
40
+ bas Basa
41
+ bak ba Bashkir
42
+ baq/eus eu Basque
43
+ bej Beja
44
+ bem Bemba
45
+ ben bn Bengali
46
+ ber Berber (Other)
47
+ bho Bhojpuri
48
+ bih bh Bihari
49
+ bik Bikol
50
+ bin Bini
51
+ bis bi Bislama
52
+ bra Braj
53
+ bre br Breton
54
+ bug Buginese
55
+ bul bg Bulgarian
56
+ bua Buriat
57
+ bur/mya my Burmese
58
+ bel be Byelorussian
59
+ cad Caddo
60
+ car Carib
61
+ cat ca Catalan
62
+ cau Caucasian (Other)
63
+ ceb Cebuano
64
+ cel Celtic (Other)
65
+ cai Central American Indian (Other)
66
+ chg Chagatai
67
+ cha Chamorro
68
+ che Chechen
69
+ chr Cherokee
70
+ chy Cheyenne
71
+ chb Chibcha
72
+ chi/zho zh Chinese
73
+ chn Chinook jargon
74
+ cho Choctaw
75
+ chu Church Slavic
76
+ chv Chuvash
77
+ cop Coptic
78
+ cor Cornish
79
+ cos co Corsican
80
+ cre Cree
81
+ mus Creek
82
+ crp Creoles and Pidgins (Other)
83
+ cpe Creoles and Pidgins, English-based (Other)
84
+ cpf Creoles and Pidgins, French-based (Other)
85
+ cpp Creoles and Pidgins, Portuguese-based (Other)
86
+ cus Cushitic (Other)
87
+ hrv hr Croatian
88
+ ces/cze cs Czech
89
+ dak Dakota
90
+ dan da Danish
91
+ del Delaware
92
+ din Dinka
93
+ div Divehi
94
+ doi Dogri
95
+ dra Dravidian (Other)
96
+ dua Duala
97
+ dut/nla nl Dutch
98
+ dum Dutch, Middle (ca. 1050-1350)
99
+ dyu Dyula
100
+ dzo dz Dzongkha
101
+ efi Efik
102
+ egy Egyptian (Ancient)
103
+ eka Ekajuk
104
+ elx Elamite
105
+ eng en English
106
+ enm English, Middle (ca. 1100-1500)
107
+ ang English, Old (ca. 450-1100)
108
+ esk Eskimo (Other)
109
+ epo eo Esperanto
110
+ est et Estonian
111
+ ewe Ewe
112
+ ewo Ewondo
113
+ fan Fang
114
+ fat Fanti
115
+ fao fo Faroese
116
+ fij fj Fijian
117
+ fin fi Finnish
118
+ fiu Finno-Ugrian (Other)
119
+ fon Fon
120
+ fra/fre fr French
121
+ frm French, Middle (ca. 1400-1600)
122
+ fro French, Old (842- ca. 1400)
123
+ fry fy Frisian
124
+ ful Fulah
125
+ gaa Ga
126
+ gae/gdh Gaelic (Scots)
127
+ glg gl Gallegan
128
+ lug Ganda
129
+ gay Gayo
130
+ gez Geez
131
+ geo/kat ka Georgian
132
+ deu/ger de German
133
+ gmh German, Middle High (ca. 1050-1500)
134
+ goh German, Old High (ca. 750-1050)
135
+ gem Germanic (Other)
136
+ gil Gilbertese
137
+ gon Gondi
138
+ got Gothic
139
+ grb Grebo
140
+ grc Greek, Ancient (to 1453)
141
+ ell/gre el Greek, Modern (1453-)
142
+ kal kl Greenlandic
143
+ grn gn Guarani
144
+ guj gu Gujarati
145
+ hai Haida
146
+ hau ha Hausa
147
+ haw Hawaiian
148
+ heb he Hebrew
149
+ her Herero
150
+ hil Hiligaynon
151
+ him Himachali
152
+ hin hi Hindi
153
+ hmo Hiri Motu
154
+ hun hu Hungarian
155
+ hup Hupa
156
+ iba Iban
157
+ ice/isl is Icelandic
158
+ ibo Igbo
159
+ ijo Ijo
160
+ ilo Iloko
161
+ inc Indic (Other)
162
+ ine Indo-European (Other)
163
+ ind id Indonesian
164
+ ina ia Interlingua (International Auxiliary language Association)
165
+ ile Interlingue
166
+ iku iu Inuktitut
167
+ ipk ik Inupiak
168
+ ira Iranian (Other)
169
+ gai/iri ga Irish
170
+ sga Irish, Old (to 900)
171
+ mga Irish, Middle (900 - 1200)
172
+ iro Iroquoian languages
173
+ ita it Italian
174
+ jpn ja Japanese
175
+ jav jv Javanese
176
+ jrb Judeo-Arabic
177
+ jpr Judeo-Persian
178
+ kab Kabyle
179
+ kac Kachin
180
+ kam Kamba
181
+ kan kn Kannada
182
+ kau Kanuri
183
+ kaa Kara-Kalpak
184
+ kar Karen
185
+ kas ks Kashmiri
186
+ kaw Kawi
187
+ kaz kk Kazakh
188
+ kha Khasi
189
+ khm km Khmer
190
+ khi Khoisan (Other)
191
+ kho Khotanese
192
+ kik Kikuyu
193
+ kin rw Kinyarwanda
194
+ kir ky Kirghiz
195
+ kom Komi
196
+ kon Kongo
197
+ kok Konkani
198
+ kor ko Korean
199
+ kpe Kpelle
200
+ kro Kru
201
+ kua Kuanyama
202
+ kum Kumyk
203
+ kur ku Kurdish
204
+ kru Kurukh
205
+ kus Kusaie
206
+ kut Kutenai
207
+ lad Ladino
208
+ lah Lahnda
209
+ lam Lamba
210
+ oci oc Langue d'Oc (post 1500)
211
+ lao lo Lao
212
+ lat la Latin
213
+ lav lv Latvian
214
+ ltz Letzeburgesch
215
+ lez Lezghian
216
+ lin ln Lingala
217
+ lit lt Lithuanian
218
+ loz Lozi
219
+ lub Luba-Katanga
220
+ lui Luiseno
221
+ lun Lunda
222
+ luo Luo (Kenya and Tanzania)
223
+ mac/mke mk Macedonian
224
+ mad Madurese
225
+ mag Magahi
226
+ mai Maithili
227
+ mak Makasar
228
+ mlg mg Malagasy
229
+ may/msa ms Malay
230
+ mal Malayalam
231
+ mlt ml Maltese
232
+ man Mandingo
233
+ mni Manipuri
234
+ mno Manobo languages
235
+ max Manx
236
+ mao/mri mi Maori
237
+ mar mr Marathi
238
+ chm Mari
239
+ mah Marshall
240
+ mwr Marwari
241
+ mas Masai
242
+ myn Mayan languages
243
+ men Mende
244
+ mic Micmac
245
+ min Minangkabau
246
+ mis Miscellaneous (Other)
247
+ moh Mohawk
248
+ mol mo Moldavian
249
+ mkh Mon-Kmer (Other)
250
+ lol Mongo
251
+ mon mn Mongolian
252
+ mos Mossi
253
+ mul Multiple languages
254
+ mun Munda languages
255
+ nau na Nauru
256
+ nav Navajo
257
+ nde Ndebele, North
258
+ nbl Ndebele, South
259
+ ndo Ndongo
260
+ nep ne Nepali
261
+ new Newari
262
+ nic Niger-Kordofanian (Other)
263
+ ssa Nilo-Saharan (Other)
264
+ niu Niuean
265
+ non Norse, Old
266
+ nai North American Indian (Other)
267
+ nor no Norwegian
268
+ nno Norwegian (Nynorsk)
269
+ nub Nubian languages
270
+ nym Nyamwezi
271
+ nya Nyanja
272
+ nyn Nyankole
273
+ nyo Nyoro
274
+ nzi Nzima
275
+ oji Ojibwa
276
+ ori or Oriya
277
+ orm om Oromo
278
+ osa Osage
279
+ oss Ossetic
280
+ oto Otomian languages
281
+ pal Pahlavi
282
+ pau Palauan
283
+ pli Pali
284
+ pam Pampanga
285
+ pag Pangasinan
286
+ pan pa Panjabi
287
+ pap Papiamento
288
+ paa Papuan-Australian (Other)
289
+ fas/per fa Persian
290
+ peo Persian, Old (ca 600 - 400 B.C.)
291
+ phn Phoenician
292
+ pol pl Polish
293
+ pon Ponape
294
+ por pt Portuguese
295
+ pra Prakrit languages
296
+ pro Provencal, Old (to 1500)
297
+ pus ps Pushto
298
+ que qu Quechua
299
+ roh rm Rhaeto-Romance
300
+ raj Rajasthani
301
+ rar Rarotongan
302
+ roa Romance (Other)
303
+ ron/rum ro Romanian
304
+ rom Romany
305
+ run rn Rundi
306
+ rus ru Russian
307
+ sal Salishan languages
308
+ sam Samaritan Aramaic
309
+ smi Sami languages
310
+ smo sm Samoan
311
+ sad Sandawe
312
+ sag sg Sango
313
+ san sa Sanskrit
314
+ srd Sardinian
315
+ sco Scots
316
+ sel Selkup
317
+ sem Semitic (Other)
318
+ sr Serbian
319
+ scr sh Serbo-Croatian
320
+ srr Serer
321
+ shn Shan
322
+ sna sn Shona
323
+ sid Sidamo
324
+ bla Siksika
325
+ snd sd Sindhi
326
+ sin si Singhalese
327
+ sit Sino-Tibetan (Other)
328
+ sio Siouan languages
329
+ sla Slavic (Other)
330
+ ss Siswati
331
+ slk/slo sk Slovak
332
+ slv sl Slovenian
333
+ sog Sogdian
334
+ som so Somali
335
+ son Songhai
336
+ wen Sorbian languages
337
+ nso Sotho, Northern
338
+ sot st Sotho, Southern
339
+ sai South American Indian (Other)
340
+ esl/spa es Spanish
341
+ suk Sukuma
342
+ sux Sumerian
343
+ sun su Sudanese
344
+ sus Susu
345
+ swa sw Swahili
346
+ ssw Swazi
347
+ sve/swe sv Swedish
348
+ syr Syriac
349
+ tgl tl Tagalog
350
+ tah Tahitian
351
+ tgk tg Tajik
352
+ tmh Tamashek
353
+ tam ta Tamil
354
+ tat tt Tatar
355
+ tel te Telugu
356
+ ter Tereno
357
+ tha th Thai
358
+ bod/tib bo Tibetan
359
+ tig Tigre
360
+ tir ti Tigrinya
361
+ tem Timne
362
+ tiv Tivi
363
+ tli Tlingit
364
+ tog to Tonga (Nyasa)
365
+ ton Tonga (Tonga Islands)
366
+ tru Truk
367
+ tsi Tsimshian
368
+ tso ts Tsonga
369
+ tsn tn Tswana
370
+ tum Tumbuka
371
+ tur tr Turkish
372
+ ota Ottoman
373
+ tuk tk Turkmen
374
+ tyv Tuvinian
375
+ twi tw Twi
376
+ uga Ugaritic
377
+ uig ug Uighur
378
+ ukr uk Ukrainian
379
+ umb Umbundu
380
+ und Undetermined
381
+ urd ur Urdu
382
+ uzb uz Uzbek
383
+ vai Vai
384
+ ven Venda
385
+ vie vi Vietnamese
386
+ vol vo Volap�k
387
+ vot Votic
388
+ wak Wakashan languages
389
+ wal Walamo
390
+ war Waray
391
+ was Washo
392
+ cym/wel cy Welsh
393
+ wol wo Wolof
394
+ xho xh Xhosa
395
+ sah Yakut
396
+ yao Yao
397
+ yap Yap
398
+ yid yi Yiddish
399
+ yor yo Yoruba
400
+ zap Zapotec
401
+ zen Zenaga
402
+ zha za Zhuang
403
+ zul zu Zulu
404
+ zun Zuni
@@ -0,0 +1 @@
1
+
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,147 @@
1
+ require 'language/class'
2
+
3
+ class Language
4
+
5
+ #= Matcher
6
+ #
7
+ # Matcher derives from Ruby Quiz #103, the DictionaryMatcher quiz.
8
+
9
+ class Matcher
10
+
11
+ attr_reader :word_count
12
+
13
+ #Contains the index matched, and the word matched
14
+ class MatchData < Struct.new(:index,:match)
15
+ def inspect
16
+ "#{match.inspect}@#{index}"
17
+ end
18
+ end
19
+
20
+ def inspect
21
+ to_s
22
+ end
23
+
24
+ #Create a DictionaryMatcher with no words in it
25
+ def initialize
26
+ @trie = {}
27
+ @word_count = 0
28
+ end
29
+
30
+ #Add a word to the DictionaryMatcher
31
+ def add(word)
32
+ @word_count += 1
33
+ container = @trie
34
+ containers=[]
35
+
36
+ i=0
37
+ word.each_byte do |b|
38
+ container[b] = {} unless container.has_key? b
39
+ container[:depth]=i
40
+ containers << container
41
+ container = container[b]
42
+ i+=1
43
+ end
44
+ containers << container
45
+
46
+ container[0] = true # Mark end of word
47
+ container[:depth]=i
48
+
49
+ ff=compute_failure_function word
50
+ ff.zip(containers).each do |pointto,container|
51
+ container[:failure]=containers[pointto] if pointto
52
+ end
53
+
54
+ self
55
+
56
+ end
57
+
58
+ alias << add
59
+
60
+ def compute_failure_function p
61
+ m=p.size
62
+ pi=[nil,0]
63
+ k=0
64
+ 2.upto m do |q|
65
+ k=pi[k] while k>0 and p[k] != p[q-1]
66
+ k=k+1 if p[k]==p[q-1]
67
+ pi[q]=k
68
+ end
69
+ pi
70
+ end
71
+ private :compute_failure_function
72
+
73
+ #Determine whether +string+ was previously <tt>add</tt>ed to the
74
+ #Trie.
75
+ def include?(word)
76
+ container = @trie
77
+ word.each_byte do |b|
78
+ break unless container.has_key? b
79
+ container = container[b]
80
+ end
81
+ container[0]
82
+ end
83
+
84
+ #Determines whether one of the words in the DictionaryMatcher is a
85
+ #substring of
86
+ #+string+. Returns the index of the match if found, +nil+ if not
87
+ #found.
88
+ def =~ text
89
+ internal_match(text){|md| return md.index}
90
+ nil
91
+ end
92
+
93
+ #Determine whether one of the words in the DictionaryMatcher is a
94
+ #substring of
95
+ #+string+. Returns a DictionaryMatcher::MatchData object if found,
96
+ #+nil+ if not #found.
97
+ def match text
98
+ internal_match(text){|md| return md}
99
+ nil
100
+ end
101
+
102
+ def internal_match string
103
+ node=@trie
104
+ pos=0
105
+ string.each_byte do |b|
106
+ advance=false
107
+ until advance
108
+ nextnode=node[b]
109
+ if not nextnode
110
+ if node[:failure]
111
+ node=node[:failure]
112
+ else
113
+ advance=true
114
+ end
115
+ elsif nextnode[0]
116
+ yield MatchData.new(pos, string[pos+1-nextnode[:depth],nextnode[:depth]])
117
+ advance=true
118
+ node=@trie
119
+ else
120
+ advance=true
121
+ node=nextnode
122
+ end
123
+ pos+=1
124
+ end
125
+ end
126
+ end
127
+ private :internal_match
128
+
129
+ #Scans +string+ for all occurrances of strings in the
130
+ #DictionaryMatcher.
131
+ #Overlapping matches are skipped (only the first one is yielded), and
132
+ #when some strings in the
133
+ #DictionaryMatcher are substrings of others, only the shortest match
134
+ #at a given position is found.
135
+ def scan(text, &block)
136
+ matches=[]
137
+ block= lambda{ |md| matches << md } unless block
138
+ internal_match(text,&block)
139
+ matches
140
+ end
141
+
142
+ #Case equality. Similar to =~.
143
+ alias_method :===, :=~
144
+ end
145
+
146
+ end
147
+
@@ -0,0 +1,32 @@
1
+ require 'language/class'
2
+
3
+ class Language
4
+
5
+ module Mixin
6
+ #
7
+ def method_missing(s,*a,&b)
8
+ return super(s,*a,&b) if s == Language.current.to_sym
9
+
10
+ lang = __send__(Language.current)
11
+ if lang && lang.respond_to?(s)
12
+ lang.__send__(s,*a,&b)
13
+ else
14
+ super(s,*a,&b)
15
+ end
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ class String
22
+ include Language::Mixin
23
+ end
24
+
25
+ class Numeric
26
+ include Language::Mixin
27
+ end
28
+
29
+ class Array
30
+ include Language::Mixin
31
+ end
32
+
@@ -0,0 +1,152 @@
1
+ # This module charaterizes the most common forms of Orthography
2
+ # in computer systems --words divided by spaces, used paragraphs
3
+ # by blank lines, and so on.
4
+
5
+ require 'language/class'
6
+
7
+ class Language
8
+
9
+ # If block given, iterate through each word.
10
+ #
11
+ # "a string".each_word { |word, range| ... }
12
+ #
13
+ # Returns an array of words.
14
+ #
15
+ # "abc 123".words #=> ["abc","123"]
16
+ #
17
+ def self.words(string, &yld)
18
+ if block_given?
19
+ string.scan(/([-'\w]+)/).each do |word|
20
+ range = $~.begin(0)...$~.end(0)
21
+ if yld.arity == 1
22
+ yld.call(word)
23
+ else
24
+ yld.call(word, range)
25
+ end
26
+ end
27
+ else
28
+ string.scan(/([-'\w]+)/).flatten
29
+ end
30
+ end
31
+
32
+ #
33
+ def self.sentences(string, &yld)
34
+ if block_given?
35
+ string.scan(/(.*?\.\ )/).each do |sentence|
36
+ range = $~.begin(0)...$~.end(0)
37
+ if yld.arity == 1
38
+ yld.call(sentence)
39
+ else
40
+ yld.call(sentence, range)
41
+ end
42
+ end
43
+ else
44
+ string.scan(/(.*?\.\ )/)
45
+ end
46
+ end
47
+
48
+ #
49
+ def self.paragraphs(string, &yld)
50
+ if block_given?
51
+ string.scan(/(.*?\n\s{2,})/).each do |paragraph|
52
+ range = $~.begin(0)...$~.end(0)
53
+ if yld.arity == 1
54
+ yld.call(paragraph)
55
+ else
56
+ yld.call(paragraph, range)
57
+ end
58
+ end
59
+ else
60
+ string.scan(/(.*?\n\s{2,})/)
61
+ end
62
+ end
63
+
64
+ # Word wrap a string not exceeding max width.
65
+ #
66
+ # puts "this is a test".word_wrap(4)
67
+ #
68
+ # _produces_
69
+ #
70
+ # this
71
+ # is a
72
+ # test
73
+ #
74
+ # CREDIT: Gavin Kistner
75
+ # CREDIT: Dayne Broderson
76
+
77
+ def self.word_wrap(string, col_width=79)
78
+ string = string.gsub( /(\S{#{col_width}})(?=\S)/, '\1 ' )
79
+ string = string.gsub( /(.{1,#{col_width}})(?:\s+|$)/, "\\1\n" )
80
+ string
81
+ end
82
+
83
+ =begin
84
+ # TODO: This is alternateive from glue: worth providing?
85
+ #
86
+ # Enforces a maximum width of a string inside an
87
+ # html container. If the string exceeds this maximum width
88
+ # the string gets wraped.
89
+ #
90
+ # Not really useful, better use the CSS overflow: hidden
91
+ # functionality.
92
+ #
93
+ # === Input:
94
+ # the string to be wrapped
95
+ # the enforced width
96
+ # the separator used for wrapping
97
+ #
98
+ # === Output:
99
+ # the wrapped string
100
+ #
101
+ # === Example:
102
+ # text = "1111111111111111111111111111111111111111111"
103
+ # text = wrap(text, 10, " ")
104
+ # p text # => "1111111111 1111111111 1111111111"
105
+ #
106
+ # See the test cases to better understand the behaviour!
107
+
108
+ # def wrap(width = 20, separator = " ")
109
+ # re = /([^#{separator}]{1,#{width}})/
110
+ # scan(re).join(separator)
111
+ # end
112
+ =end
113
+
114
+ def words(&blk)
115
+ self.class.words(@self, &blk)
116
+ end
117
+
118
+ #
119
+ def each_word(&blk)
120
+ words(&blk)
121
+ end
122
+
123
+ def sentences(&yld)
124
+ self.class.sentences(@self, &blk)
125
+ end
126
+
127
+ #
128
+ def each_sentence(&blk)
129
+ sentences(&blk)
130
+ end
131
+
132
+ def paragrpahs(&yld)
133
+ self.class.paragraphs(@self, &blk)
134
+ end
135
+
136
+ #
137
+ def each_paragraph(&blk)
138
+ paragraphs(&blk)
139
+ end
140
+
141
+ #
142
+ def word_wrap(col_width=79)
143
+ self.class.word_wrap(@self, col_width)
144
+ end
145
+
146
+ # As with #word_wrap, but modifies the string in place.
147
+ def word_wrap!(col_width=79)
148
+ @self.replace(word_wrap(col_width=79))
149
+ end
150
+
151
+ end
152
+
metadata ADDED
@@ -0,0 +1,80 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: language
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 6
8
+ - 0
9
+ version: 0.6.0
10
+ platform: ruby
11
+ authors:
12
+ - Thomas Sawyer
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-05-28 00:00:00 -04:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: Language is a support library for other langauge libraries. While some of it's contents are prefectly usable on there own, most are generally intended to be subclassed and extended by specific language modules, such as English.
22
+ email:
23
+ executables: []
24
+
25
+ extensions: []
26
+
27
+ extra_rdoc_files:
28
+ - README.rdoc
29
+ files:
30
+ - lib/language/censor.rb
31
+ - lib/language/class.rb
32
+ - lib/language/codes.rb
33
+ - lib/language/codes_iso639.txt
34
+ - lib/language/current.rb
35
+ - lib/language/dsl.rb
36
+ - lib/language/matcher.rb
37
+ - lib/language/mixin.rb
38
+ - lib/language/words.rb
39
+ - lib/language.rb
40
+ - HISTORY.rdoc
41
+ - PROFILE
42
+ - LICENSE
43
+ - README.rdoc
44
+ - REQUIRE
45
+ - VERSION
46
+ has_rdoc: true
47
+ homepage: http://rubyworks.github.com/language
48
+ licenses: []
49
+
50
+ post_install_message:
51
+ rdoc_options:
52
+ - --title
53
+ - Language API
54
+ - --main
55
+ - README.rdoc
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
64
+ version: "0"
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ segments:
70
+ - 0
71
+ version: "0"
72
+ requirements: []
73
+
74
+ rubyforge_project: language
75
+ rubygems_version: 1.3.6
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: Language Support Library
79
+ test_files: []
80
+