language 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.rdoc +10 -0
- data/LICENSE +23 -0
- data/PROFILE +23 -0
- data/README.rdoc +38 -0
- data/REQUIRE +2 -0
- data/VERSION +5 -0
- data/lib/language.rb +5 -0
- data/lib/language/censor.rb +97 -0
- data/lib/language/class.rb +159 -0
- data/lib/language/codes.rb +43 -0
- data/lib/language/codes_iso639.txt +404 -0
- data/lib/language/current.rb +1 -0
- data/lib/language/dsl.rb +1 -0
- data/lib/language/matcher.rb +147 -0
- data/lib/language/mixin.rb +32 -0
- data/lib/language/words.rb +152 -0
- metadata +80 -0
data/HISTORY.rdoc
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
= Release History
|
2
|
+
|
3
|
+
== 0.6.0 / 2010-05-29
|
4
|
+
|
5
|
+
This is the first release of Language, however the code was previously
|
6
|
+
released with the English project. Hence the current version of
|
7
|
+
Language matches the present version of English. Language combines
|
8
|
+
all the features previously part of English that are language
|
9
|
+
netural or multi-lingual. It threfore provide a dependency for
|
10
|
+
the English library.
|
data/LICENSE
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2009 Thomas Sawyer
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
22
|
+
|
23
|
+
|
data/PROFILE
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
---
|
2
|
+
title : Language
|
3
|
+
suite : rubyworks
|
4
|
+
summary: Language Support Library
|
5
|
+
license: MIT
|
6
|
+
authors: Thomas Sawyer
|
7
|
+
created: 2007-08-01
|
8
|
+
|
9
|
+
description:
|
10
|
+
Language is a support library for other langauge libraries.
|
11
|
+
While some of it's contents are prefectly usable on there own,
|
12
|
+
most are generally intended to be subclassed and extended by
|
13
|
+
specific language modules, such as English.
|
14
|
+
|
15
|
+
resources:
|
16
|
+
homepage : http://rubyworks.github.com/language
|
17
|
+
development : http://github.com/rubyworks/language
|
18
|
+
respository : git://github.com/rubyworks/language.git
|
19
|
+
subscribe : rubyworks-mailinglist+subscribe@googlegroups.com
|
20
|
+
|
21
|
+
copyright:
|
22
|
+
COpyright (c) 2007 Thomas Sawyer
|
23
|
+
|
data/README.rdoc
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
= Language
|
2
|
+
|
3
|
+
* home: http://rubyworks.github.com/language
|
4
|
+
* work: http://github.com/rubyworks/language
|
5
|
+
|
6
|
+
Language is a support library for other langauge libraries.
|
7
|
+
While some of it's contents are prefectly usable on there own,
|
8
|
+
most are generally intended to be subclassed and extended by
|
9
|
+
specific language modules, such as English.
|
10
|
+
|
11
|
+
|
12
|
+
== SYNOPSIS
|
13
|
+
|
14
|
+
require 'language'
|
15
|
+
|
16
|
+
"How many words?".words #=> ['How', 'many', 'words']
|
17
|
+
|
18
|
+
|
19
|
+
== INSTALLATION
|
20
|
+
|
21
|
+
The usual Rubygems way:
|
22
|
+
|
23
|
+
$ gem install language
|
24
|
+
|
25
|
+
|
26
|
+
== COPYING
|
27
|
+
|
28
|
+
(MIT License)
|
29
|
+
|
30
|
+
Copyright (c) 2010 Thomas Sawyer
|
31
|
+
|
32
|
+
English is distributed under the terms of the MIT license.
|
33
|
+
|
34
|
+
See LICENCE for details.
|
35
|
+
|
36
|
+
Some libraries are subtantial derivatives of other persons
|
37
|
+
work. Fully copyright and licensing information is given
|
38
|
+
for those in the corresponding source files.
|
data/REQUIRE
ADDED
data/VERSION
ADDED
data/lib/language.rb
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
require 'language/class'
|
2
|
+
|
3
|
+
class Language
|
4
|
+
|
5
|
+
# = Censor
|
6
|
+
#
|
7
|
+
# This class allows one to define a resuable text filter.
|
8
|
+
# This is useful for removing or replacing curse words or
|
9
|
+
# senstive information from user input.
|
10
|
+
|
11
|
+
class Censor
|
12
|
+
|
13
|
+
# Default censor list.
|
14
|
+
def self.default_words
|
15
|
+
[]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Abritraty rules.
|
19
|
+
attr :rules
|
20
|
+
|
21
|
+
# Word-oriented rules.
|
22
|
+
attr :word_rules
|
23
|
+
|
24
|
+
# New Censor object.
|
25
|
+
#
|
26
|
+
def initialize()
|
27
|
+
@rules = []
|
28
|
+
@word_rules = []
|
29
|
+
|
30
|
+
self.class.default_words.each do |word|
|
31
|
+
word_rule(word)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Create new rule. A rule consists of a string or regexp
|
36
|
+
# to match against.
|
37
|
+
#
|
38
|
+
# NOTE: The rules must be applied in order! So we cannot
|
39
|
+
# use a hash because the ordering is not guaranteed. So
|
40
|
+
# an array is used instead.
|
41
|
+
#
|
42
|
+
def rule(match, &edit)
|
43
|
+
edit = lambda{''} unless edit
|
44
|
+
@rules << [match, edit]
|
45
|
+
end
|
46
|
+
|
47
|
+
# Rules that apply only to words. This takes the regular
|
48
|
+
# expression and add word boundry matches to either side.
|
49
|
+
#
|
50
|
+
# filter.word_rule(/damn/){ |w| 'darn' }
|
51
|
+
#
|
52
|
+
# Is equivalent to teh regular rule:
|
53
|
+
#
|
54
|
+
# filter.rule(/\bdamn\b/){ |w| 'darn' }
|
55
|
+
#
|
56
|
+
def word_rule(match, &edit)
|
57
|
+
edit = lambda{''} unless edit
|
58
|
+
@word_rules << [/\b#{match}\b/, edit]
|
59
|
+
end
|
60
|
+
|
61
|
+
# Apply the set of rules (regular expression matches) to
|
62
|
+
# a string.
|
63
|
+
#
|
64
|
+
def filter(string)
|
65
|
+
rewritten_string = string.dup
|
66
|
+
rules.each do |match,edit|
|
67
|
+
rewritten_string.gsub!(match,edit)
|
68
|
+
end
|
69
|
+
return (rewritten_string or string)
|
70
|
+
end
|
71
|
+
|
72
|
+
alias_method :apply, :filter
|
73
|
+
|
74
|
+
# Is the string clear of any matching rules?
|
75
|
+
#
|
76
|
+
# Note that running a filter does not necessarily clear a
|
77
|
+
# a string of all matches, since the filter could apply
|
78
|
+
# edits that would also match the filter expressions.
|
79
|
+
#
|
80
|
+
def censored?(string)
|
81
|
+
case string
|
82
|
+
when *matches
|
83
|
+
false
|
84
|
+
else
|
85
|
+
true
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
#
|
91
|
+
def matches
|
92
|
+
rules.collect{ |match, modify| match }
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
class Language
|
2
|
+
|
3
|
+
#
|
4
|
+
def self.abbreviation
|
5
|
+
'lang'
|
6
|
+
end
|
7
|
+
|
8
|
+
#
|
9
|
+
def self.default
|
10
|
+
@default || abbreviation
|
11
|
+
end
|
12
|
+
|
13
|
+
#
|
14
|
+
def self.default=(lang)
|
15
|
+
@default = lang
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
def self.current
|
20
|
+
@current || default
|
21
|
+
end
|
22
|
+
|
23
|
+
#
|
24
|
+
def self.current=(lang)
|
25
|
+
@current = lang
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
def self.instance(string)
|
30
|
+
@cache ||= {}
|
31
|
+
@cache[string.object_id] = new(string)
|
32
|
+
end
|
33
|
+
|
34
|
+
#
|
35
|
+
def initialize(subject)
|
36
|
+
@self = subject
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
class String
|
42
|
+
# Higher-order function to invoke Language functions.
|
43
|
+
def lang
|
44
|
+
Language.instance(self)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class Array
|
49
|
+
# Higher-order function to invoke Language functions.
|
50
|
+
def lang
|
51
|
+
Language.instance(self)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class Integer
|
56
|
+
# Higher-order function to invoke Language functions.
|
57
|
+
def lang
|
58
|
+
Language.instance(self)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
=begin
|
66
|
+
module Language
|
67
|
+
extend self
|
68
|
+
|
69
|
+
# Subclass this in your specific language modules.
|
70
|
+
#
|
71
|
+
# class English::String < Language::String
|
72
|
+
#
|
73
|
+
class String < ::String
|
74
|
+
|
75
|
+
#
|
76
|
+
def self.language
|
77
|
+
Language
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
def self.instance(string)
|
82
|
+
@cache ||= {}
|
83
|
+
@cache[string.object_id] = new(string)
|
84
|
+
end
|
85
|
+
|
86
|
+
#
|
87
|
+
def initialize(string)
|
88
|
+
super()
|
89
|
+
replace(string)
|
90
|
+
end
|
91
|
+
|
92
|
+
def language
|
93
|
+
@_language ||= self.class.language
|
94
|
+
end
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
# TODO: We can't actually subclass Integer.
|
99
|
+
# But we can fake it. However we need to sublass
|
100
|
+
# it just so #is_a? works. However subclassing it causes
|
101
|
+
# the .new method not to exist, how to fix?
|
102
|
+
#
|
103
|
+
class Integer #< ::Integer
|
104
|
+
instance_methods{ |m| private m unless /^__/ =~ m.to_s }
|
105
|
+
|
106
|
+
#
|
107
|
+
def self.language
|
108
|
+
Language
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
def self.instance(integer)
|
113
|
+
@cache ||= {}
|
114
|
+
@cache[integer] = new(integer)
|
115
|
+
end
|
116
|
+
|
117
|
+
#
|
118
|
+
def initialize(integer)
|
119
|
+
@integer = integer
|
120
|
+
end
|
121
|
+
|
122
|
+
#
|
123
|
+
def to_i
|
124
|
+
@integer
|
125
|
+
end
|
126
|
+
|
127
|
+
#
|
128
|
+
def method_missing(s,*a,&b)
|
129
|
+
@integer.__send__(s,*a,&b)
|
130
|
+
end
|
131
|
+
|
132
|
+
#
|
133
|
+
def language
|
134
|
+
@_language ||= self.class.language
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
#
|
139
|
+
class Array < ::Array
|
140
|
+
|
141
|
+
#
|
142
|
+
def self.language
|
143
|
+
Language
|
144
|
+
end
|
145
|
+
|
146
|
+
#
|
147
|
+
def self.instance(array)
|
148
|
+
@cache ||= {}
|
149
|
+
@cache[array.object_id] = new(array)
|
150
|
+
end
|
151
|
+
|
152
|
+
def language
|
153
|
+
@_language ||= self.class.language
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
158
|
+
=end
|
159
|
+
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'language/class'
|
2
|
+
|
3
|
+
class Language
|
4
|
+
|
5
|
+
# A hash of International 2- and 3-letter ISO639-1 and ISO639-2 language codes.
|
6
|
+
module Codes
|
7
|
+
|
8
|
+
# Hash of ISO639 2--letter language codes
|
9
|
+
ISO639_1 = {}
|
10
|
+
|
11
|
+
# Hash of ISO639 3-letter language codes
|
12
|
+
ISO639_2 = {}
|
13
|
+
|
14
|
+
file = File.join(File.dirname(__FILE__), 'codes_iso639.txt')
|
15
|
+
|
16
|
+
File.readlines(file).each do |line|
|
17
|
+
next if /^#/ =~ line
|
18
|
+
|
19
|
+
codes3, codes2, desc = line[0,7].strip, line[9,6].strip, line[15...-1].strip
|
20
|
+
|
21
|
+
codes3 = codes3.split('/')
|
22
|
+
codes2 = codes2.split('/')
|
23
|
+
|
24
|
+
codes2.each do |code|
|
25
|
+
if ISO639_1.key?(code)
|
26
|
+
raise "Duplicate language code #{code}"
|
27
|
+
end
|
28
|
+
ISO639_1[code] = desc
|
29
|
+
end
|
30
|
+
|
31
|
+
codes3.each do |code|
|
32
|
+
if ISO639_2.key?(code)
|
33
|
+
raise "Duplicate language code #{code}"
|
34
|
+
end
|
35
|
+
ISO639_2[code] = desc
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
@@ -0,0 +1,404 @@
|
|
1
|
+
abk ab Abkhazian
|
2
|
+
ace Achinese
|
3
|
+
ach Acoli
|
4
|
+
ada Adangme
|
5
|
+
aar aa Afar
|
6
|
+
afh Afrihili
|
7
|
+
afr af Afrikaans
|
8
|
+
afa Afro-Asiatic (Other)
|
9
|
+
aka Akan
|
10
|
+
akk Akkadian
|
11
|
+
alb/sqi sq Albanian
|
12
|
+
ale Aleut
|
13
|
+
alg Algonquian languages
|
14
|
+
tut Altaic (Other)
|
15
|
+
amh am Amharic
|
16
|
+
apa Apache languages
|
17
|
+
ara ar Arabic
|
18
|
+
arc Aramaic
|
19
|
+
arp Arapaho
|
20
|
+
arn Araucanian
|
21
|
+
arw Arawak
|
22
|
+
arm/hye hy Armenian
|
23
|
+
art Artificial (Other)
|
24
|
+
asm as Assamese
|
25
|
+
ath Athapascan languages
|
26
|
+
map Austronesian (Other)
|
27
|
+
ava Avaric
|
28
|
+
ave Avestan
|
29
|
+
awa Awadhi
|
30
|
+
aym ay Aymara
|
31
|
+
aze az Azerbaijani
|
32
|
+
nah Aztec
|
33
|
+
ban Balinese
|
34
|
+
bat Baltic (Other)
|
35
|
+
bal Baluchi
|
36
|
+
bam Bambara
|
37
|
+
bai Bamileke languages
|
38
|
+
bad Banda
|
39
|
+
bnt Bantu (Other)
|
40
|
+
bas Basa
|
41
|
+
bak ba Bashkir
|
42
|
+
baq/eus eu Basque
|
43
|
+
bej Beja
|
44
|
+
bem Bemba
|
45
|
+
ben bn Bengali
|
46
|
+
ber Berber (Other)
|
47
|
+
bho Bhojpuri
|
48
|
+
bih bh Bihari
|
49
|
+
bik Bikol
|
50
|
+
bin Bini
|
51
|
+
bis bi Bislama
|
52
|
+
bra Braj
|
53
|
+
bre br Breton
|
54
|
+
bug Buginese
|
55
|
+
bul bg Bulgarian
|
56
|
+
bua Buriat
|
57
|
+
bur/mya my Burmese
|
58
|
+
bel be Byelorussian
|
59
|
+
cad Caddo
|
60
|
+
car Carib
|
61
|
+
cat ca Catalan
|
62
|
+
cau Caucasian (Other)
|
63
|
+
ceb Cebuano
|
64
|
+
cel Celtic (Other)
|
65
|
+
cai Central American Indian (Other)
|
66
|
+
chg Chagatai
|
67
|
+
cha Chamorro
|
68
|
+
che Chechen
|
69
|
+
chr Cherokee
|
70
|
+
chy Cheyenne
|
71
|
+
chb Chibcha
|
72
|
+
chi/zho zh Chinese
|
73
|
+
chn Chinook jargon
|
74
|
+
cho Choctaw
|
75
|
+
chu Church Slavic
|
76
|
+
chv Chuvash
|
77
|
+
cop Coptic
|
78
|
+
cor Cornish
|
79
|
+
cos co Corsican
|
80
|
+
cre Cree
|
81
|
+
mus Creek
|
82
|
+
crp Creoles and Pidgins (Other)
|
83
|
+
cpe Creoles and Pidgins, English-based (Other)
|
84
|
+
cpf Creoles and Pidgins, French-based (Other)
|
85
|
+
cpp Creoles and Pidgins, Portuguese-based (Other)
|
86
|
+
cus Cushitic (Other)
|
87
|
+
hrv hr Croatian
|
88
|
+
ces/cze cs Czech
|
89
|
+
dak Dakota
|
90
|
+
dan da Danish
|
91
|
+
del Delaware
|
92
|
+
din Dinka
|
93
|
+
div Divehi
|
94
|
+
doi Dogri
|
95
|
+
dra Dravidian (Other)
|
96
|
+
dua Duala
|
97
|
+
dut/nla nl Dutch
|
98
|
+
dum Dutch, Middle (ca. 1050-1350)
|
99
|
+
dyu Dyula
|
100
|
+
dzo dz Dzongkha
|
101
|
+
efi Efik
|
102
|
+
egy Egyptian (Ancient)
|
103
|
+
eka Ekajuk
|
104
|
+
elx Elamite
|
105
|
+
eng en English
|
106
|
+
enm English, Middle (ca. 1100-1500)
|
107
|
+
ang English, Old (ca. 450-1100)
|
108
|
+
esk Eskimo (Other)
|
109
|
+
epo eo Esperanto
|
110
|
+
est et Estonian
|
111
|
+
ewe Ewe
|
112
|
+
ewo Ewondo
|
113
|
+
fan Fang
|
114
|
+
fat Fanti
|
115
|
+
fao fo Faroese
|
116
|
+
fij fj Fijian
|
117
|
+
fin fi Finnish
|
118
|
+
fiu Finno-Ugrian (Other)
|
119
|
+
fon Fon
|
120
|
+
fra/fre fr French
|
121
|
+
frm French, Middle (ca. 1400-1600)
|
122
|
+
fro French, Old (842- ca. 1400)
|
123
|
+
fry fy Frisian
|
124
|
+
ful Fulah
|
125
|
+
gaa Ga
|
126
|
+
gae/gdh Gaelic (Scots)
|
127
|
+
glg gl Gallegan
|
128
|
+
lug Ganda
|
129
|
+
gay Gayo
|
130
|
+
gez Geez
|
131
|
+
geo/kat ka Georgian
|
132
|
+
deu/ger de German
|
133
|
+
gmh German, Middle High (ca. 1050-1500)
|
134
|
+
goh German, Old High (ca. 750-1050)
|
135
|
+
gem Germanic (Other)
|
136
|
+
gil Gilbertese
|
137
|
+
gon Gondi
|
138
|
+
got Gothic
|
139
|
+
grb Grebo
|
140
|
+
grc Greek, Ancient (to 1453)
|
141
|
+
ell/gre el Greek, Modern (1453-)
|
142
|
+
kal kl Greenlandic
|
143
|
+
grn gn Guarani
|
144
|
+
guj gu Gujarati
|
145
|
+
hai Haida
|
146
|
+
hau ha Hausa
|
147
|
+
haw Hawaiian
|
148
|
+
heb he Hebrew
|
149
|
+
her Herero
|
150
|
+
hil Hiligaynon
|
151
|
+
him Himachali
|
152
|
+
hin hi Hindi
|
153
|
+
hmo Hiri Motu
|
154
|
+
hun hu Hungarian
|
155
|
+
hup Hupa
|
156
|
+
iba Iban
|
157
|
+
ice/isl is Icelandic
|
158
|
+
ibo Igbo
|
159
|
+
ijo Ijo
|
160
|
+
ilo Iloko
|
161
|
+
inc Indic (Other)
|
162
|
+
ine Indo-European (Other)
|
163
|
+
ind id Indonesian
|
164
|
+
ina ia Interlingua (International Auxiliary language Association)
|
165
|
+
ile Interlingue
|
166
|
+
iku iu Inuktitut
|
167
|
+
ipk ik Inupiak
|
168
|
+
ira Iranian (Other)
|
169
|
+
gai/iri ga Irish
|
170
|
+
sga Irish, Old (to 900)
|
171
|
+
mga Irish, Middle (900 - 1200)
|
172
|
+
iro Iroquoian languages
|
173
|
+
ita it Italian
|
174
|
+
jpn ja Japanese
|
175
|
+
jav jv Javanese
|
176
|
+
jrb Judeo-Arabic
|
177
|
+
jpr Judeo-Persian
|
178
|
+
kab Kabyle
|
179
|
+
kac Kachin
|
180
|
+
kam Kamba
|
181
|
+
kan kn Kannada
|
182
|
+
kau Kanuri
|
183
|
+
kaa Kara-Kalpak
|
184
|
+
kar Karen
|
185
|
+
kas ks Kashmiri
|
186
|
+
kaw Kawi
|
187
|
+
kaz kk Kazakh
|
188
|
+
kha Khasi
|
189
|
+
khm km Khmer
|
190
|
+
khi Khoisan (Other)
|
191
|
+
kho Khotanese
|
192
|
+
kik Kikuyu
|
193
|
+
kin rw Kinyarwanda
|
194
|
+
kir ky Kirghiz
|
195
|
+
kom Komi
|
196
|
+
kon Kongo
|
197
|
+
kok Konkani
|
198
|
+
kor ko Korean
|
199
|
+
kpe Kpelle
|
200
|
+
kro Kru
|
201
|
+
kua Kuanyama
|
202
|
+
kum Kumyk
|
203
|
+
kur ku Kurdish
|
204
|
+
kru Kurukh
|
205
|
+
kus Kusaie
|
206
|
+
kut Kutenai
|
207
|
+
lad Ladino
|
208
|
+
lah Lahnda
|
209
|
+
lam Lamba
|
210
|
+
oci oc Langue d'Oc (post 1500)
|
211
|
+
lao lo Lao
|
212
|
+
lat la Latin
|
213
|
+
lav lv Latvian
|
214
|
+
ltz Letzeburgesch
|
215
|
+
lez Lezghian
|
216
|
+
lin ln Lingala
|
217
|
+
lit lt Lithuanian
|
218
|
+
loz Lozi
|
219
|
+
lub Luba-Katanga
|
220
|
+
lui Luiseno
|
221
|
+
lun Lunda
|
222
|
+
luo Luo (Kenya and Tanzania)
|
223
|
+
mac/mke mk Macedonian
|
224
|
+
mad Madurese
|
225
|
+
mag Magahi
|
226
|
+
mai Maithili
|
227
|
+
mak Makasar
|
228
|
+
mlg mg Malagasy
|
229
|
+
may/msa ms Malay
|
230
|
+
mal Malayalam
|
231
|
+
mlt ml Maltese
|
232
|
+
man Mandingo
|
233
|
+
mni Manipuri
|
234
|
+
mno Manobo languages
|
235
|
+
max Manx
|
236
|
+
mao/mri mi Maori
|
237
|
+
mar mr Marathi
|
238
|
+
chm Mari
|
239
|
+
mah Marshall
|
240
|
+
mwr Marwari
|
241
|
+
mas Masai
|
242
|
+
myn Mayan languages
|
243
|
+
men Mende
|
244
|
+
mic Micmac
|
245
|
+
min Minangkabau
|
246
|
+
mis Miscellaneous (Other)
|
247
|
+
moh Mohawk
|
248
|
+
mol mo Moldavian
|
249
|
+
mkh Mon-Kmer (Other)
|
250
|
+
lol Mongo
|
251
|
+
mon mn Mongolian
|
252
|
+
mos Mossi
|
253
|
+
mul Multiple languages
|
254
|
+
mun Munda languages
|
255
|
+
nau na Nauru
|
256
|
+
nav Navajo
|
257
|
+
nde Ndebele, North
|
258
|
+
nbl Ndebele, South
|
259
|
+
ndo Ndongo
|
260
|
+
nep ne Nepali
|
261
|
+
new Newari
|
262
|
+
nic Niger-Kordofanian (Other)
|
263
|
+
ssa Nilo-Saharan (Other)
|
264
|
+
niu Niuean
|
265
|
+
non Norse, Old
|
266
|
+
nai North American Indian (Other)
|
267
|
+
nor no Norwegian
|
268
|
+
nno Norwegian (Nynorsk)
|
269
|
+
nub Nubian languages
|
270
|
+
nym Nyamwezi
|
271
|
+
nya Nyanja
|
272
|
+
nyn Nyankole
|
273
|
+
nyo Nyoro
|
274
|
+
nzi Nzima
|
275
|
+
oji Ojibwa
|
276
|
+
ori or Oriya
|
277
|
+
orm om Oromo
|
278
|
+
osa Osage
|
279
|
+
oss Ossetic
|
280
|
+
oto Otomian languages
|
281
|
+
pal Pahlavi
|
282
|
+
pau Palauan
|
283
|
+
pli Pali
|
284
|
+
pam Pampanga
|
285
|
+
pag Pangasinan
|
286
|
+
pan pa Panjabi
|
287
|
+
pap Papiamento
|
288
|
+
paa Papuan-Australian (Other)
|
289
|
+
fas/per fa Persian
|
290
|
+
peo Persian, Old (ca 600 - 400 B.C.)
|
291
|
+
phn Phoenician
|
292
|
+
pol pl Polish
|
293
|
+
pon Ponape
|
294
|
+
por pt Portuguese
|
295
|
+
pra Prakrit languages
|
296
|
+
pro Provencal, Old (to 1500)
|
297
|
+
pus ps Pushto
|
298
|
+
que qu Quechua
|
299
|
+
roh rm Rhaeto-Romance
|
300
|
+
raj Rajasthani
|
301
|
+
rar Rarotongan
|
302
|
+
roa Romance (Other)
|
303
|
+
ron/rum ro Romanian
|
304
|
+
rom Romany
|
305
|
+
run rn Rundi
|
306
|
+
rus ru Russian
|
307
|
+
sal Salishan languages
|
308
|
+
sam Samaritan Aramaic
|
309
|
+
smi Sami languages
|
310
|
+
smo sm Samoan
|
311
|
+
sad Sandawe
|
312
|
+
sag sg Sango
|
313
|
+
san sa Sanskrit
|
314
|
+
srd Sardinian
|
315
|
+
sco Scots
|
316
|
+
sel Selkup
|
317
|
+
sem Semitic (Other)
|
318
|
+
sr Serbian
|
319
|
+
scr sh Serbo-Croatian
|
320
|
+
srr Serer
|
321
|
+
shn Shan
|
322
|
+
sna sn Shona
|
323
|
+
sid Sidamo
|
324
|
+
bla Siksika
|
325
|
+
snd sd Sindhi
|
326
|
+
sin si Singhalese
|
327
|
+
sit Sino-Tibetan (Other)
|
328
|
+
sio Siouan languages
|
329
|
+
sla Slavic (Other)
|
330
|
+
ss Siswati
|
331
|
+
slk/slo sk Slovak
|
332
|
+
slv sl Slovenian
|
333
|
+
sog Sogdian
|
334
|
+
som so Somali
|
335
|
+
son Songhai
|
336
|
+
wen Sorbian languages
|
337
|
+
nso Sotho, Northern
|
338
|
+
sot st Sotho, Southern
|
339
|
+
sai South American Indian (Other)
|
340
|
+
esl/spa es Spanish
|
341
|
+
suk Sukuma
|
342
|
+
sux Sumerian
|
343
|
+
sun su Sudanese
|
344
|
+
sus Susu
|
345
|
+
swa sw Swahili
|
346
|
+
ssw Swazi
|
347
|
+
sve/swe sv Swedish
|
348
|
+
syr Syriac
|
349
|
+
tgl tl Tagalog
|
350
|
+
tah Tahitian
|
351
|
+
tgk tg Tajik
|
352
|
+
tmh Tamashek
|
353
|
+
tam ta Tamil
|
354
|
+
tat tt Tatar
|
355
|
+
tel te Telugu
|
356
|
+
ter Tereno
|
357
|
+
tha th Thai
|
358
|
+
bod/tib bo Tibetan
|
359
|
+
tig Tigre
|
360
|
+
tir ti Tigrinya
|
361
|
+
tem Timne
|
362
|
+
tiv Tivi
|
363
|
+
tli Tlingit
|
364
|
+
tog to Tonga (Nyasa)
|
365
|
+
ton Tonga (Tonga Islands)
|
366
|
+
tru Truk
|
367
|
+
tsi Tsimshian
|
368
|
+
tso ts Tsonga
|
369
|
+
tsn tn Tswana
|
370
|
+
tum Tumbuka
|
371
|
+
tur tr Turkish
|
372
|
+
ota Ottoman
|
373
|
+
tuk tk Turkmen
|
374
|
+
tyv Tuvinian
|
375
|
+
twi tw Twi
|
376
|
+
uga Ugaritic
|
377
|
+
uig ug Uighur
|
378
|
+
ukr uk Ukrainian
|
379
|
+
umb Umbundu
|
380
|
+
und Undetermined
|
381
|
+
urd ur Urdu
|
382
|
+
uzb uz Uzbek
|
383
|
+
vai Vai
|
384
|
+
ven Venda
|
385
|
+
vie vi Vietnamese
|
386
|
+
vol vo Volap�k
|
387
|
+
vot Votic
|
388
|
+
wak Wakashan languages
|
389
|
+
wal Walamo
|
390
|
+
war Waray
|
391
|
+
was Washo
|
392
|
+
cym/wel cy Welsh
|
393
|
+
wol wo Wolof
|
394
|
+
xho xh Xhosa
|
395
|
+
sah Yakut
|
396
|
+
yao Yao
|
397
|
+
yap Yap
|
398
|
+
yid yi Yiddish
|
399
|
+
yor yo Yoruba
|
400
|
+
zap Zapotec
|
401
|
+
zen Zenaga
|
402
|
+
zha za Zhuang
|
403
|
+
zul zu Zulu
|
404
|
+
zun Zuni
|
@@ -0,0 +1 @@
|
|
1
|
+
|
data/lib/language/dsl.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'language/class'
|
2
|
+
|
3
|
+
class Language
|
4
|
+
|
5
|
+
#= Matcher
|
6
|
+
#
|
7
|
+
# Matcher derives from Ruby Quiz #103, the DictionaryMatcher quiz.
|
8
|
+
|
9
|
+
class Matcher
|
10
|
+
|
11
|
+
attr_reader :word_count
|
12
|
+
|
13
|
+
#Contains the index matched, and the word matched
|
14
|
+
class MatchData < Struct.new(:index,:match)
|
15
|
+
def inspect
|
16
|
+
"#{match.inspect}@#{index}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def inspect
|
21
|
+
to_s
|
22
|
+
end
|
23
|
+
|
24
|
+
#Create a DictionaryMatcher with no words in it
|
25
|
+
def initialize
|
26
|
+
@trie = {}
|
27
|
+
@word_count = 0
|
28
|
+
end
|
29
|
+
|
30
|
+
#Add a word to the DictionaryMatcher
|
31
|
+
def add(word)
|
32
|
+
@word_count += 1
|
33
|
+
container = @trie
|
34
|
+
containers=[]
|
35
|
+
|
36
|
+
i=0
|
37
|
+
word.each_byte do |b|
|
38
|
+
container[b] = {} unless container.has_key? b
|
39
|
+
container[:depth]=i
|
40
|
+
containers << container
|
41
|
+
container = container[b]
|
42
|
+
i+=1
|
43
|
+
end
|
44
|
+
containers << container
|
45
|
+
|
46
|
+
container[0] = true # Mark end of word
|
47
|
+
container[:depth]=i
|
48
|
+
|
49
|
+
ff=compute_failure_function word
|
50
|
+
ff.zip(containers).each do |pointto,container|
|
51
|
+
container[:failure]=containers[pointto] if pointto
|
52
|
+
end
|
53
|
+
|
54
|
+
self
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
alias << add
|
59
|
+
|
60
|
+
def compute_failure_function p
|
61
|
+
m=p.size
|
62
|
+
pi=[nil,0]
|
63
|
+
k=0
|
64
|
+
2.upto m do |q|
|
65
|
+
k=pi[k] while k>0 and p[k] != p[q-1]
|
66
|
+
k=k+1 if p[k]==p[q-1]
|
67
|
+
pi[q]=k
|
68
|
+
end
|
69
|
+
pi
|
70
|
+
end
|
71
|
+
private :compute_failure_function
|
72
|
+
|
73
|
+
#Determine whether +string+ was previously <tt>add</tt>ed to the
|
74
|
+
#Trie.
|
75
|
+
def include?(word)
|
76
|
+
container = @trie
|
77
|
+
word.each_byte do |b|
|
78
|
+
break unless container.has_key? b
|
79
|
+
container = container[b]
|
80
|
+
end
|
81
|
+
container[0]
|
82
|
+
end
|
83
|
+
|
84
|
+
#Determines whether one of the words in the DictionaryMatcher is a
|
85
|
+
#substring of
|
86
|
+
#+string+. Returns the index of the match if found, +nil+ if not
|
87
|
+
#found.
|
88
|
+
def =~ text
|
89
|
+
internal_match(text){|md| return md.index}
|
90
|
+
nil
|
91
|
+
end
|
92
|
+
|
93
|
+
#Determine whether one of the words in the DictionaryMatcher is a
|
94
|
+
#substring of
|
95
|
+
#+string+. Returns a DictionaryMatcher::MatchData object if found,
|
96
|
+
#+nil+ if not #found.
|
97
|
+
def match text
|
98
|
+
internal_match(text){|md| return md}
|
99
|
+
nil
|
100
|
+
end
|
101
|
+
|
102
|
+
def internal_match string
|
103
|
+
node=@trie
|
104
|
+
pos=0
|
105
|
+
string.each_byte do |b|
|
106
|
+
advance=false
|
107
|
+
until advance
|
108
|
+
nextnode=node[b]
|
109
|
+
if not nextnode
|
110
|
+
if node[:failure]
|
111
|
+
node=node[:failure]
|
112
|
+
else
|
113
|
+
advance=true
|
114
|
+
end
|
115
|
+
elsif nextnode[0]
|
116
|
+
yield MatchData.new(pos, string[pos+1-nextnode[:depth],nextnode[:depth]])
|
117
|
+
advance=true
|
118
|
+
node=@trie
|
119
|
+
else
|
120
|
+
advance=true
|
121
|
+
node=nextnode
|
122
|
+
end
|
123
|
+
pos+=1
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
private :internal_match
|
128
|
+
|
129
|
+
#Scans +string+ for all occurrances of strings in the
|
130
|
+
#DictionaryMatcher.
|
131
|
+
#Overlapping matches are skipped (only the first one is yielded), and
|
132
|
+
#when some strings in the
|
133
|
+
#DictionaryMatcher are substrings of others, only the shortest match
|
134
|
+
#at a given position is found.
|
135
|
+
def scan(text, &block)
|
136
|
+
matches=[]
|
137
|
+
block= lambda{ |md| matches << md } unless block
|
138
|
+
internal_match(text,&block)
|
139
|
+
matches
|
140
|
+
end
|
141
|
+
|
142
|
+
#Case equality. Similar to =~.
|
143
|
+
alias_method :===, :=~
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'language/class'
|
2
|
+
|
3
|
+
class Language
|
4
|
+
|
5
|
+
module Mixin
|
6
|
+
#
|
7
|
+
def method_missing(s,*a,&b)
|
8
|
+
return super(s,*a,&b) if s == Language.current.to_sym
|
9
|
+
|
10
|
+
lang = __send__(Language.current)
|
11
|
+
if lang && lang.respond_to?(s)
|
12
|
+
lang.__send__(s,*a,&b)
|
13
|
+
else
|
14
|
+
super(s,*a,&b)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
class String
|
22
|
+
include Language::Mixin
|
23
|
+
end
|
24
|
+
|
25
|
+
class Numeric
|
26
|
+
include Language::Mixin
|
27
|
+
end
|
28
|
+
|
29
|
+
class Array
|
30
|
+
include Language::Mixin
|
31
|
+
end
|
32
|
+
|
@@ -0,0 +1,152 @@
|
|
1
|
+
# This module charaterizes the most common forms of Orthography
|
2
|
+
# in computer systems --words divided by spaces, used paragraphs
|
3
|
+
# by blank lines, and so on.
|
4
|
+
|
5
|
+
require 'language/class'
|
6
|
+
|
7
|
+
class Language
|
8
|
+
|
9
|
+
# If block given, iterate through each word.
|
10
|
+
#
|
11
|
+
# "a string".each_word { |word, range| ... }
|
12
|
+
#
|
13
|
+
# Returns an array of words.
|
14
|
+
#
|
15
|
+
# "abc 123".words #=> ["abc","123"]
|
16
|
+
#
|
17
|
+
def self.words(string, &yld)
|
18
|
+
if block_given?
|
19
|
+
string.scan(/([-'\w]+)/).each do |word|
|
20
|
+
range = $~.begin(0)...$~.end(0)
|
21
|
+
if yld.arity == 1
|
22
|
+
yld.call(word)
|
23
|
+
else
|
24
|
+
yld.call(word, range)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
else
|
28
|
+
string.scan(/([-'\w]+)/).flatten
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
def self.sentences(string, &yld)
|
34
|
+
if block_given?
|
35
|
+
string.scan(/(.*?\.\ )/).each do |sentence|
|
36
|
+
range = $~.begin(0)...$~.end(0)
|
37
|
+
if yld.arity == 1
|
38
|
+
yld.call(sentence)
|
39
|
+
else
|
40
|
+
yld.call(sentence, range)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
else
|
44
|
+
string.scan(/(.*?\.\ )/)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
#
|
49
|
+
def self.paragraphs(string, &yld)
|
50
|
+
if block_given?
|
51
|
+
string.scan(/(.*?\n\s{2,})/).each do |paragraph|
|
52
|
+
range = $~.begin(0)...$~.end(0)
|
53
|
+
if yld.arity == 1
|
54
|
+
yld.call(paragraph)
|
55
|
+
else
|
56
|
+
yld.call(paragraph, range)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
else
|
60
|
+
string.scan(/(.*?\n\s{2,})/)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Word wrap a string not exceeding max width.
|
65
|
+
#
|
66
|
+
# puts "this is a test".word_wrap(4)
|
67
|
+
#
|
68
|
+
# _produces_
|
69
|
+
#
|
70
|
+
# this
|
71
|
+
# is a
|
72
|
+
# test
|
73
|
+
#
|
74
|
+
# CREDIT: Gavin Kistner
|
75
|
+
# CREDIT: Dayne Broderson
|
76
|
+
|
77
|
+
def self.word_wrap(string, col_width=79)
|
78
|
+
string = string.gsub( /(\S{#{col_width}})(?=\S)/, '\1 ' )
|
79
|
+
string = string.gsub( /(.{1,#{col_width}})(?:\s+|$)/, "\\1\n" )
|
80
|
+
string
|
81
|
+
end
|
82
|
+
|
83
|
+
=begin
|
84
|
+
# TODO: This is alternateive from glue: worth providing?
|
85
|
+
#
|
86
|
+
# Enforces a maximum width of a string inside an
|
87
|
+
# html container. If the string exceeds this maximum width
|
88
|
+
# the string gets wraped.
|
89
|
+
#
|
90
|
+
# Not really useful, better use the CSS overflow: hidden
|
91
|
+
# functionality.
|
92
|
+
#
|
93
|
+
# === Input:
|
94
|
+
# the string to be wrapped
|
95
|
+
# the enforced width
|
96
|
+
# the separator used for wrapping
|
97
|
+
#
|
98
|
+
# === Output:
|
99
|
+
# the wrapped string
|
100
|
+
#
|
101
|
+
# === Example:
|
102
|
+
# text = "1111111111111111111111111111111111111111111"
|
103
|
+
# text = wrap(text, 10, " ")
|
104
|
+
# p text # => "1111111111 1111111111 1111111111"
|
105
|
+
#
|
106
|
+
# See the test cases to better understand the behaviour!
|
107
|
+
|
108
|
+
# def wrap(width = 20, separator = " ")
|
109
|
+
# re = /([^#{separator}]{1,#{width}})/
|
110
|
+
# scan(re).join(separator)
|
111
|
+
# end
|
112
|
+
=end
|
113
|
+
|
114
|
+
def words(&blk)
|
115
|
+
self.class.words(@self, &blk)
|
116
|
+
end
|
117
|
+
|
118
|
+
#
|
119
|
+
def each_word(&blk)
|
120
|
+
words(&blk)
|
121
|
+
end
|
122
|
+
|
123
|
+
def sentences(&yld)
|
124
|
+
self.class.sentences(@self, &blk)
|
125
|
+
end
|
126
|
+
|
127
|
+
#
|
128
|
+
def each_sentence(&blk)
|
129
|
+
sentences(&blk)
|
130
|
+
end
|
131
|
+
|
132
|
+
def paragrpahs(&yld)
|
133
|
+
self.class.paragraphs(@self, &blk)
|
134
|
+
end
|
135
|
+
|
136
|
+
#
|
137
|
+
def each_paragraph(&blk)
|
138
|
+
paragraphs(&blk)
|
139
|
+
end
|
140
|
+
|
141
|
+
#
|
142
|
+
def word_wrap(col_width=79)
|
143
|
+
self.class.word_wrap(@self, col_width)
|
144
|
+
end
|
145
|
+
|
146
|
+
# As with #word_wrap, but modifies the string in place.
|
147
|
+
def word_wrap!(col_width=79)
|
148
|
+
@self.replace(word_wrap(col_width=79))
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: language
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 6
|
8
|
+
- 0
|
9
|
+
version: 0.6.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Thomas Sawyer
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-05-28 00:00:00 -04:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: Language is a support library for other langauge libraries. While some of it's contents are prefectly usable on there own, most are generally intended to be subclassed and extended by specific language modules, such as English.
|
22
|
+
email:
|
23
|
+
executables: []
|
24
|
+
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files:
|
28
|
+
- README.rdoc
|
29
|
+
files:
|
30
|
+
- lib/language/censor.rb
|
31
|
+
- lib/language/class.rb
|
32
|
+
- lib/language/codes.rb
|
33
|
+
- lib/language/codes_iso639.txt
|
34
|
+
- lib/language/current.rb
|
35
|
+
- lib/language/dsl.rb
|
36
|
+
- lib/language/matcher.rb
|
37
|
+
- lib/language/mixin.rb
|
38
|
+
- lib/language/words.rb
|
39
|
+
- lib/language.rb
|
40
|
+
- HISTORY.rdoc
|
41
|
+
- PROFILE
|
42
|
+
- LICENSE
|
43
|
+
- README.rdoc
|
44
|
+
- REQUIRE
|
45
|
+
- VERSION
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: http://rubyworks.github.com/language
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options:
|
52
|
+
- --title
|
53
|
+
- Language API
|
54
|
+
- --main
|
55
|
+
- README.rdoc
|
56
|
+
require_paths:
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 0
|
64
|
+
version: "0"
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
segments:
|
70
|
+
- 0
|
71
|
+
version: "0"
|
72
|
+
requirements: []
|
73
|
+
|
74
|
+
rubyforge_project: language
|
75
|
+
rubygems_version: 1.3.6
|
76
|
+
signing_key:
|
77
|
+
specification_version: 3
|
78
|
+
summary: Language Support Library
|
79
|
+
test_files: []
|
80
|
+
|