markos_linguistics 1.0.8.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +640 -0
- data/LICENSE +27 -0
- data/README +166 -0
- data/README.english +245 -0
- data/Rakefile +338 -0
- data/examples/generalize_sentence.rb +46 -0
- data/lib/linguistics.rb +366 -0
- data/lib/linguistics/en.rb +1728 -0
- data/lib/linguistics/en/infinitive.rb +1145 -0
- data/lib/linguistics/en/linkparser.rb +109 -0
- data/lib/linguistics/en/wordnet.rb +257 -0
- data/lib/linguistics/iso639.rb +461 -0
- data/rake/191_compat.rb +26 -0
- data/rake/dependencies.rb +76 -0
- data/rake/helpers.rb +434 -0
- data/rake/hg.rb +261 -0
- data/rake/manual.rb +782 -0
- data/rake/packaging.rb +144 -0
- data/rake/publishing.rb +318 -0
- data/rake/rdoc.rb +30 -0
- data/rake/style.rb +62 -0
- data/rake/svn.rb +668 -0
- data/rake/testing.rb +187 -0
- data/rake/verifytask.rb +64 -0
- data/rake/win32.rb +190 -0
- data/spec/linguistics/en_spec.rb +215 -0
- data/spec/linguistics/iso639_spec.rb +72 -0
- data/spec/linguistics_spec.rb +107 -0
- data/tests/en/infinitive.tests.rb +207 -0
- data/tests/en/inflect.tests.rb +1389 -0
- data/tests/en/lafcadio.tests.rb +77 -0
- data/tests/en/linkparser.tests.rb +42 -0
- data/tests/en/lprintf.tests.rb +77 -0
- data/tests/en/titlecase.tests.rb +73 -0
- data/tests/en/wordnet.tests.rb +95 -0
- metadata +121 -0
@@ -0,0 +1,109 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'linguistics/en'
|
4
|
+
|
5
|
+
#
|
6
|
+
# This file contains the extensions to the Linguistics::EN module which provide
|
7
|
+
# support for the Ruby LinkParser module. LinkParser enables grammatic queries
|
8
|
+
# of English language sentences.
|
9
|
+
#
|
10
|
+
# == Synopsis
|
11
|
+
#
|
12
|
+
# # Test to see whether or not the link parser is loaded.
|
13
|
+
# Linguistics::EN.has_link_parser?
|
14
|
+
# # => true
|
15
|
+
#
|
16
|
+
# # Diagram the first linkage for a test sentence
|
17
|
+
# puts "he is a big dog".sentence.linkages.first.to_s
|
18
|
+
# +---O*---+
|
19
|
+
# | +--Ds--+
|
20
|
+
# +Ss+ | +-A-+
|
21
|
+
# | | | | |
|
22
|
+
# he is a big dog
|
23
|
+
#
|
24
|
+
# # Find the verb in the sentence
|
25
|
+
# "he is a big dog".en.sentence.verb.to_s
|
26
|
+
# # => "is"
|
27
|
+
#
|
28
|
+
# # Combined infinitive + LinkParser: Find the infinitive form of the verb of the
|
29
|
+
# given sentence.
|
30
|
+
# "he is a big dog".en.sentence.verb.infinitive
|
31
|
+
# # => "be"
|
32
|
+
#
|
33
|
+
# # Find the direct object of the sentence
|
34
|
+
# "he is a big dog".en.sentence.object.to_s
|
35
|
+
# # => "dog"
|
36
|
+
#
|
37
|
+
# # Combine WordNet + LinkParser to find the definition of the direct object of
|
38
|
+
# # the sentence
|
39
|
+
# "he is a big dog".en.sentence.object.gloss
|
40
|
+
# # => "a member of the genus Canis (probably descended from the common wolf) that
|
41
|
+
# has been domesticated by man since prehistoric times; occurs in many breeds;
|
42
|
+
# \"the dog barked all night\""
|
43
|
+
#
|
44
|
+
# == Authors
|
45
|
+
#
|
46
|
+
# * Martin Chase <stillflame@FaerieMUD.org>
|
47
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
48
|
+
#
|
49
|
+
# :include: LICENSE
|
50
|
+
#
|
51
|
+
#--
|
52
|
+
#
|
53
|
+
# Please see the file LICENSE in the base directory for licensing details.
|
54
|
+
#
|
55
|
+
module Linguistics::EN
|
56
|
+
|
57
|
+
@has_link_parser = false
|
58
|
+
@lp_dict = nil
|
59
|
+
@lp_error = nil
|
60
|
+
|
61
|
+
begin
|
62
|
+
require "linkparser"
|
63
|
+
@has_link_parser = true
|
64
|
+
rescue LoadError => err
|
65
|
+
@lp_error = err
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
#################################################################
|
70
|
+
### M O D U L E M E T H O D S
|
71
|
+
#################################################################
|
72
|
+
class << self
|
73
|
+
|
74
|
+
### Returns +true+ if LinkParser was loaded okay
|
75
|
+
def has_link_parser? ; @has_link_parser ; end
|
76
|
+
|
77
|
+
### If #has_link_parser? returns +false+, this can be called to fetch the
|
78
|
+
### exception which was raised when trying to load LinkParser.
|
79
|
+
def lp_error ; @lp_error ; end
|
80
|
+
|
81
|
+
### The instance of LinkParser used for all Linguistics LinkParser
|
82
|
+
### functions.
|
83
|
+
def lp_dict
|
84
|
+
if @lp_error
|
85
|
+
raise NotImplementedError,
|
86
|
+
"LinkParser functions are not loaded: %s" %
|
87
|
+
@lp_error.message
|
88
|
+
end
|
89
|
+
|
90
|
+
return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
#################################################################
|
96
|
+
### L I N K P A R S E R I N T E R F A C E
|
97
|
+
#################################################################
|
98
|
+
|
99
|
+
###############
|
100
|
+
module_function
|
101
|
+
###############
|
102
|
+
|
103
|
+
### Return a LinkParser::Sentence for the stringified +obj+.
|
104
|
+
def sentence( obj )
|
105
|
+
return Linguistics::EN::lp_dict.parse( obj.to_s )
|
106
|
+
end
|
107
|
+
module_function :sentence
|
108
|
+
|
109
|
+
end
|
@@ -0,0 +1,257 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'linguistics/en'
|
4
|
+
|
5
|
+
# This file contains functions for finding relations for English words. It
|
6
|
+
# requires the Ruby-WordNet module to be installed; if it is not installed,
|
7
|
+
# calling the functions defined by this file will raise NotImplemented
|
8
|
+
# exceptions if called. Requiring this file adds functions and constants to the
|
9
|
+
# Linguistics::EN module.
|
10
|
+
#
|
11
|
+
# == Synopsis
|
12
|
+
#
|
13
|
+
# # Test to be sure the WordNet module loaded okay.
|
14
|
+
# Linguistics::EN.has_wordnet?
|
15
|
+
# # => true
|
16
|
+
#
|
17
|
+
# # Fetch the default synset for the word "balance"
|
18
|
+
# "balance".synset
|
19
|
+
# # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
|
20
|
+
# (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
|
21
|
+
#
|
22
|
+
# # Fetch the synset for the first verb sense of "balance"
|
23
|
+
# "balance".en.synset( :verb )
|
24
|
+
# # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
|
25
|
+
# (verb): "bring into balance or equilibrium; "She has to balance work and her
|
26
|
+
# domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
|
27
|
+
# verbGroups: 2, hypernyms: 1, hyponyms: 5)>
|
28
|
+
#
|
29
|
+
# # Fetch the second noun sense
|
30
|
+
# "balance".en.synset( 2, :noun )
|
31
|
+
# # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
|
32
|
+
# on pull of gravity" (hypernyms: 1, hyponyms: 5)>
|
33
|
+
#
|
34
|
+
# # Fetch the second noun sense's hypernyms (more-general words, like a superclass)
|
35
|
+
# "balance".en.synset( 2, :noun ).hypernyms
|
36
|
+
# # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
37
|
+
# instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
38
|
+
# hyponyms: 2)>]
|
39
|
+
#
|
40
|
+
# # A simpler way of doing the same thing:
|
41
|
+
# "balance".en.hypernyms( 2, :noun )
|
42
|
+
# # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
43
|
+
# instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
44
|
+
# hyponyms: 2)>]
|
45
|
+
#
|
46
|
+
# # Fetch the first hypernym's hypernyms
|
47
|
+
# "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
|
48
|
+
# # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
|
49
|
+
# measuring device (noun): "instrument that shows the extent or amount or quantity
|
50
|
+
# or degree of something" (hypernyms: 1, hyponyms: 83)>]
|
51
|
+
#
|
52
|
+
# # Find the synset to which both the second noun sense of "balance" and the
|
53
|
+
# # default sense of "shovel" belong.
|
54
|
+
# ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
|
55
|
+
# # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
|
56
|
+
# artifact (or system of artifacts) that is instrumental in accomplishing some
|
57
|
+
# end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
|
58
|
+
#
|
59
|
+
# # Fetch just the words for the other kinds of "instruments"
|
60
|
+
# "instrument".en.hyponyms.collect {|synset| synset.words}.flatten
|
61
|
+
# # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
|
62
|
+
# "extractor", "instrument of execution", "instrument of punishment", "measuring
|
63
|
+
# instrument", "measuring system", "measuring device", "medical instrument",
|
64
|
+
# "navigational instrument", "optical instrument", "plotter", "scientific
|
65
|
+
# instrument", "sonograph", "surveying instrument", "surveyor's instrument",
|
66
|
+
# "tracer", "weapon", "arm", "weapon system", "whip"]
|
67
|
+
#
|
68
|
+
#
|
69
|
+
# == Authors
|
70
|
+
#
|
71
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
72
|
+
#
|
73
|
+
# :include: LICENSE
|
74
|
+
#
|
75
|
+
# == Version
|
76
|
+
#
|
77
|
+
# $Id$
|
78
|
+
#
|
79
|
+
module Linguistics::EN
|
80
|
+
|
81
|
+
@has_wordnet = false
|
82
|
+
@wn_error = nil
|
83
|
+
@wn_lexicon = nil
|
84
|
+
|
85
|
+
# Load WordNet and open the lexicon if possible, saving the error that
|
86
|
+
# occurs if anything goes wrong.
|
87
|
+
begin
|
88
|
+
require 'wordnet'
|
89
|
+
@has_wordnet = true
|
90
|
+
rescue LoadError => err
|
91
|
+
@wn_error = err
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
#################################################################
|
96
|
+
### M O D U L E M E T H O D S
|
97
|
+
#################################################################
|
98
|
+
class << self
|
99
|
+
|
100
|
+
### Returns +true+ if WordNet was loaded okay
|
101
|
+
def has_wordnet? ; @has_wordnet; end
|
102
|
+
|
103
|
+
### If #haveWordnet? returns +false+, this can be called to fetch the
|
104
|
+
### exception which was raised when WordNet was loaded.
|
105
|
+
def wn_error ; @wn_error; end
|
106
|
+
|
107
|
+
### The instance of the WordNet::Lexicon used for all Linguistics WordNet
|
108
|
+
### functions.
|
109
|
+
def wn_lexicon
|
110
|
+
if @wn_error
|
111
|
+
raise NotImplementedError,
|
112
|
+
"WordNet functions are not loaded: %s" %
|
113
|
+
@wn_error.message
|
114
|
+
end
|
115
|
+
|
116
|
+
@wn_lexicon ||= WordNet::Lexicon::new
|
117
|
+
end
|
118
|
+
|
119
|
+
### Make a function that calls the method +meth+ on the synset of an input
|
120
|
+
### word.
|
121
|
+
def def_synset_function( meth )
|
122
|
+
(class << self; self; end).instance_eval do
|
123
|
+
define_method( meth ) {|*args|
|
124
|
+
word, pos, sense = *args
|
125
|
+
raise ArgumentError,
|
126
|
+
"wrong number of arguments (0 for 1)" unless word
|
127
|
+
sense ||= 1
|
128
|
+
|
129
|
+
syn = synset( word.to_s, pos, sense )
|
130
|
+
return syn.nil? ? nil : syn.send( meth )
|
131
|
+
}
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
#################################################################
|
139
|
+
### W O R D N E T I N T E R F A C E
|
140
|
+
#################################################################
|
141
|
+
|
142
|
+
###############
|
143
|
+
module_function
|
144
|
+
###############
|
145
|
+
|
146
|
+
### Look up the synset associated with the given word or collocation in the
|
147
|
+
### WordNet lexicon and return a WordNet::Synset object.
|
148
|
+
def synset( word, pos=nil, sense=1 )
|
149
|
+
lex = Linguistics::EN::wn_lexicon
|
150
|
+
if pos.is_a?( Fixnum )
|
151
|
+
sense = pos
|
152
|
+
pos = nil
|
153
|
+
end
|
154
|
+
postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
|
155
|
+
syn = nil
|
156
|
+
|
157
|
+
postries.each do |pos|
|
158
|
+
break if syn = lex.lookup_synsets( word.to_s, pos, sense )
|
159
|
+
end
|
160
|
+
|
161
|
+
return syn
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
### Look up all the synsets associated with the given word or collocation in
|
166
|
+
### the WordNet lexicon and return an Array of WordNet::Synset objects. If
|
167
|
+
### +pos+ is +nil+, return synsets for all parts of speech.
|
168
|
+
def synsets( word, pos=nil )
|
169
|
+
lex = Linguistics::EN::wn_lexicon
|
170
|
+
postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
|
171
|
+
syns = []
|
172
|
+
|
173
|
+
postries.each {|pos|
|
174
|
+
syns << lex.lookup_synsets( word.to_s, pos )
|
175
|
+
}
|
176
|
+
|
177
|
+
return syns.flatten.compact
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
# Returns definitions and/or example sentences as a String.
|
182
|
+
def_synset_function :gloss
|
183
|
+
|
184
|
+
# Returns definitions and/or example sentences as an Array.
|
185
|
+
def_synset_function :glosses
|
186
|
+
|
187
|
+
# Return nouns or verbs that have the same hypernym as the receiver.
|
188
|
+
def_synset_function :coordinates
|
189
|
+
|
190
|
+
# Returns the Array of synonyms contained in the synset for the receiver.
|
191
|
+
def_synset_function :words
|
192
|
+
def_synset_function :synonyms
|
193
|
+
|
194
|
+
# Returns the name of the lexicographer file that contains the raw data for
|
195
|
+
# the receiver.
|
196
|
+
def_synset_function :lex_info
|
197
|
+
|
198
|
+
# :TODO: Finish these comments, and figure out how the hell to get the
|
199
|
+
# methods to show up in RDoc.
|
200
|
+
def_synset_function :frames
|
201
|
+
|
202
|
+
|
203
|
+
# Returns the synsets for the receiver's antonyms, if any. Ex:
|
204
|
+
# 'opaque'.en.synset.antonyms
|
205
|
+
# ==> [#<WordNet::Synset:0x010ca614/454927 clear (adjective): "free
|
206
|
+
# from cloudiness; allowing light to pass through; "clear water";
|
207
|
+
# "clear plastic bags"; "clear glass"; "the air is clear and clean""
|
208
|
+
# (similarTos: 6, attributes: 1, derivations: 2, antonyms: 1,
|
209
|
+
# seeAlsos: 1)>]
|
210
|
+
def_synset_function :antonyms
|
211
|
+
|
212
|
+
def_synset_function :hypernyms
|
213
|
+
def_synset_function :instance_hypernyms
|
214
|
+
def_synset_function :entailment
|
215
|
+
def_synset_function :hyponyms
|
216
|
+
def_synset_function :instance_hyponyms
|
217
|
+
def_synset_function :causes
|
218
|
+
def_synset_function :verbgroups
|
219
|
+
def_synset_function :similar_to
|
220
|
+
def_synset_function :participles
|
221
|
+
def_synset_function :pertainyms
|
222
|
+
def_synset_function :attributes
|
223
|
+
def_synset_function :derived_from
|
224
|
+
def_synset_function :see_also
|
225
|
+
def_synset_function :functions
|
226
|
+
|
227
|
+
def_synset_function :meronyms
|
228
|
+
def_synset_function :member_meronyms
|
229
|
+
def_synset_function :stuff_meronyms
|
230
|
+
def_synset_function :portion_meronyms
|
231
|
+
def_synset_function :component_meronyms
|
232
|
+
def_synset_function :feature_meronyms
|
233
|
+
def_synset_function :phase_meronyms
|
234
|
+
def_synset_function :place_meronyms
|
235
|
+
|
236
|
+
def_synset_function :holonyms
|
237
|
+
def_synset_function :member_holonyms
|
238
|
+
def_synset_function :stuff_holonyms
|
239
|
+
def_synset_function :portion_holonyms
|
240
|
+
def_synset_function :component_holonyms
|
241
|
+
def_synset_function :feature_holonyms
|
242
|
+
def_synset_function :phase_holonyms
|
243
|
+
def_synset_function :place_holonyms
|
244
|
+
|
245
|
+
def_synset_function :domains
|
246
|
+
def_synset_function :category_domains
|
247
|
+
def_synset_function :region_domains
|
248
|
+
def_synset_function :usage_domains
|
249
|
+
|
250
|
+
def_synset_function :members
|
251
|
+
def_synset_function :category_members
|
252
|
+
def_synset_function :region_members
|
253
|
+
def_synset_function :usage_members
|
254
|
+
|
255
|
+
|
256
|
+
end # module Linguistics::EN
|
257
|
+
|
@@ -0,0 +1,461 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#coding: utf-8
|
3
|
+
|
4
|
+
#
|
5
|
+
# linguistics/iso639.rb - A hash of International 2- and 3-letter
|
6
|
+
# ISO639-1 and ISO639-2 language codes. Each entry has two keys:
|
7
|
+
#
|
8
|
+
# [<tt>:codes</tt>]
|
9
|
+
# All of the codes known for this language
|
10
|
+
# [<tt>:desc</tt>]
|
11
|
+
# The English-language description of the language.
|
12
|
+
#
|
13
|
+
# :include: LICENSE
|
14
|
+
#
|
15
|
+
#--
|
16
|
+
#
|
17
|
+
# Please see the file LICENSE in the base directory for licensing details.
|
18
|
+
#
|
19
|
+
module Linguistics
|
20
|
+
|
21
|
+
# Hash of ISO639 2- and 3-letter language codes
|
22
|
+
LanguageCodes = {}
|
23
|
+
|
24
|
+
# Read through the source for this file, capturing everything
|
25
|
+
# between __END__ and __END_DATA__ tokens.
|
26
|
+
in_data_section = false
|
27
|
+
File::readlines( __FILE__ ).each {|line|
|
28
|
+
case line
|
29
|
+
when /^__END_DATA__$/
|
30
|
+
in_data_section = false
|
31
|
+
false
|
32
|
+
|
33
|
+
when /^__END__$/
|
34
|
+
in_data_section = true
|
35
|
+
false
|
36
|
+
|
37
|
+
else
|
38
|
+
if in_data_section
|
39
|
+
codes, desc = line[0,15].split(%r{/|\s+}), line[15...-1]
|
40
|
+
codes.delete_if {|code| code.empty?}
|
41
|
+
entry = {
|
42
|
+
:desc => desc.strip,
|
43
|
+
:codes => codes.dup,
|
44
|
+
}
|
45
|
+
codes.each {|code|
|
46
|
+
raise "Duplicate language code #{code}:"\
|
47
|
+
"(#{LanguageCodes[code][:desc]}})}" \
|
48
|
+
if LanguageCodes.key?( code )
|
49
|
+
LanguageCodes[ code.strip ] = entry
|
50
|
+
}
|
51
|
+
end
|
52
|
+
end
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
__END__
|
57
|
+
abk ab Abkhazian
|
58
|
+
ace Achinese
|
59
|
+
ach Acoli
|
60
|
+
ada Adangme
|
61
|
+
aar aa Afar
|
62
|
+
afh Afrihili
|
63
|
+
afr af Afrikaans
|
64
|
+
afa Afro-Asiatic (Other)
|
65
|
+
aka Akan
|
66
|
+
akk Akkadian
|
67
|
+
alb/sqi sq Albanian
|
68
|
+
ale Aleut
|
69
|
+
alg Algonquian languages
|
70
|
+
tut Altaic (Other)
|
71
|
+
amh am Amharic
|
72
|
+
apa Apache languages
|
73
|
+
ara ar Arabic
|
74
|
+
arc Aramaic
|
75
|
+
arp Arapaho
|
76
|
+
arn Araucanian
|
77
|
+
arw Arawak
|
78
|
+
arm/hye hy Armenian
|
79
|
+
art Artificial (Other)
|
80
|
+
asm as Assamese
|
81
|
+
ath Athapascan languages
|
82
|
+
map Austronesian (Other)
|
83
|
+
ava Avaric
|
84
|
+
ave Avestan
|
85
|
+
awa Awadhi
|
86
|
+
aym ay Aymara
|
87
|
+
aze az Azerbaijani
|
88
|
+
nah Aztec
|
89
|
+
ban Balinese
|
90
|
+
bat Baltic (Other)
|
91
|
+
bal Baluchi
|
92
|
+
bam Bambara
|
93
|
+
bai Bamileke languages
|
94
|
+
bad Banda
|
95
|
+
bnt Bantu (Other)
|
96
|
+
bas Basa
|
97
|
+
bak ba Bashkir
|
98
|
+
baq/eus eu Basque
|
99
|
+
bej Beja
|
100
|
+
bem Bemba
|
101
|
+
ben bn Bengali
|
102
|
+
ber Berber (Other)
|
103
|
+
bho Bhojpuri
|
104
|
+
bih bh Bihari
|
105
|
+
bik Bikol
|
106
|
+
bin Bini
|
107
|
+
bis bi Bislama
|
108
|
+
bra Braj
|
109
|
+
bre br Breton
|
110
|
+
bug Buginese
|
111
|
+
bul bg Bulgarian
|
112
|
+
bua Buriat
|
113
|
+
bur/mya my Burmese
|
114
|
+
bel be Byelorussian
|
115
|
+
cad Caddo
|
116
|
+
car Carib
|
117
|
+
cat ca Catalan
|
118
|
+
cau Caucasian (Other)
|
119
|
+
ceb Cebuano
|
120
|
+
cel Celtic (Other)
|
121
|
+
cai Central American Indian (Other)
|
122
|
+
chg Chagatai
|
123
|
+
cha Chamorro
|
124
|
+
che Chechen
|
125
|
+
chr Cherokee
|
126
|
+
chy Cheyenne
|
127
|
+
chb Chibcha
|
128
|
+
chi/zho zh Chinese
|
129
|
+
chn Chinook jargon
|
130
|
+
cho Choctaw
|
131
|
+
chu Church Slavic
|
132
|
+
chv Chuvash
|
133
|
+
cop Coptic
|
134
|
+
cor Cornish
|
135
|
+
cos co Corsican
|
136
|
+
cre Cree
|
137
|
+
mus Creek
|
138
|
+
crp Creoles and Pidgins (Other)
|
139
|
+
cpe Creoles and Pidgins, English-based (Other)
|
140
|
+
cpf Creoles and Pidgins, French-based (Other)
|
141
|
+
cpp Creoles and Pidgins, Portuguese-based (Other)
|
142
|
+
cus Cushitic (Other)
|
143
|
+
hr Croatian
|
144
|
+
ces/cze cs Czech
|
145
|
+
dak Dakota
|
146
|
+
dan da Danish
|
147
|
+
del Delaware
|
148
|
+
din Dinka
|
149
|
+
div Divehi
|
150
|
+
doi Dogri
|
151
|
+
dra Dravidian (Other)
|
152
|
+
dua Duala
|
153
|
+
dut/nla nl Dutch
|
154
|
+
dum Dutch, Middle (ca. 1050-1350)
|
155
|
+
dyu Dyula
|
156
|
+
dzo dz Dzongkha
|
157
|
+
efi Efik
|
158
|
+
egy Egyptian (Ancient)
|
159
|
+
eka Ekajuk
|
160
|
+
elx Elamite
|
161
|
+
eng en English
|
162
|
+
enm English, Middle (ca. 1100-1500)
|
163
|
+
ang English, Old (ca. 450-1100)
|
164
|
+
esk Eskimo (Other)
|
165
|
+
epo eo Esperanto
|
166
|
+
est et Estonian
|
167
|
+
ewe Ewe
|
168
|
+
ewo Ewondo
|
169
|
+
fan Fang
|
170
|
+
fat Fanti
|
171
|
+
fao fo Faroese
|
172
|
+
fij fj Fijian
|
173
|
+
fin fi Finnish
|
174
|
+
fiu Finno-Ugrian (Other)
|
175
|
+
fon Fon
|
176
|
+
fra/fre fr French
|
177
|
+
frm French, Middle (ca. 1400-1600)
|
178
|
+
fro French, Old (842- ca. 1400)
|
179
|
+
fry fy Frisian
|
180
|
+
ful Fulah
|
181
|
+
gaa Ga
|
182
|
+
gae/gdh Gaelic (Scots)
|
183
|
+
glg gl Gallegan
|
184
|
+
lug Ganda
|
185
|
+
gay Gayo
|
186
|
+
gez Geez
|
187
|
+
geo/kat ka Georgian
|
188
|
+
deu/ger de German
|
189
|
+
gmh German, Middle High (ca. 1050-1500)
|
190
|
+
goh German, Old High (ca. 750-1050)
|
191
|
+
gem Germanic (Other)
|
192
|
+
gil Gilbertese
|
193
|
+
gon Gondi
|
194
|
+
got Gothic
|
195
|
+
grb Grebo
|
196
|
+
grc Greek, Ancient (to 1453)
|
197
|
+
ell/gre el Greek, Modern (1453-)
|
198
|
+
kal kl Greenlandic
|
199
|
+
grn gn Guarani
|
200
|
+
guj gu Gujarati
|
201
|
+
hai Haida
|
202
|
+
hau ha Hausa
|
203
|
+
haw Hawaiian
|
204
|
+
heb he Hebrew
|
205
|
+
her Herero
|
206
|
+
hil Hiligaynon
|
207
|
+
him Himachali
|
208
|
+
hin hi Hindi
|
209
|
+
hmo Hiri Motu
|
210
|
+
hun hu Hungarian
|
211
|
+
hup Hupa
|
212
|
+
iba Iban
|
213
|
+
ice/isl is Icelandic
|
214
|
+
ibo Igbo
|
215
|
+
ijo Ijo
|
216
|
+
ilo Iloko
|
217
|
+
inc Indic (Other)
|
218
|
+
ine Indo-European (Other)
|
219
|
+
ind id Indonesian
|
220
|
+
ina ia Interlingua (International Auxiliary language Association)
|
221
|
+
ile Interlingue
|
222
|
+
iku iu Inuktitut
|
223
|
+
ipk ik Inupiak
|
224
|
+
ira Iranian (Other)
|
225
|
+
gai/iri ga Irish
|
226
|
+
sga Irish, Old (to 900)
|
227
|
+
mga Irish, Middle (900 - 1200)
|
228
|
+
iro Iroquoian languages
|
229
|
+
ita it Italian
|
230
|
+
jpn ja Japanese
|
231
|
+
jav/jaw jv/jw Javanese
|
232
|
+
jrb Judeo-Arabic
|
233
|
+
jpr Judeo-Persian
|
234
|
+
kab Kabyle
|
235
|
+
kac Kachin
|
236
|
+
kam Kamba
|
237
|
+
kan kn Kannada
|
238
|
+
kau Kanuri
|
239
|
+
kaa Kara-Kalpak
|
240
|
+
kar Karen
|
241
|
+
kas ks Kashmiri
|
242
|
+
kaw Kawi
|
243
|
+
kaz kk Kazakh
|
244
|
+
kha Khasi
|
245
|
+
khm km Khmer
|
246
|
+
khi Khoisan (Other)
|
247
|
+
kho Khotanese
|
248
|
+
kik Kikuyu
|
249
|
+
kin rw Kinyarwanda
|
250
|
+
kir ky Kirghiz
|
251
|
+
kom Komi
|
252
|
+
kon Kongo
|
253
|
+
kok Konkani
|
254
|
+
kor ko Korean
|
255
|
+
kpe Kpelle
|
256
|
+
kro Kru
|
257
|
+
kua Kuanyama
|
258
|
+
kum Kumyk
|
259
|
+
kur ku Kurdish
|
260
|
+
kru Kurukh
|
261
|
+
kus Kusaie
|
262
|
+
kut Kutenai
|
263
|
+
lad Ladino
|
264
|
+
lah Lahnda
|
265
|
+
lam Lamba
|
266
|
+
oci oc Langue d'Oc (post 1500)
|
267
|
+
lao lo Lao
|
268
|
+
lat la Latin
|
269
|
+
lav lv Latvian
|
270
|
+
ltz Letzeburgesch
|
271
|
+
lez Lezghian
|
272
|
+
lin ln Lingala
|
273
|
+
lit lt Lithuanian
|
274
|
+
loz Lozi
|
275
|
+
lub Luba-Katanga
|
276
|
+
lui Luiseno
|
277
|
+
lun Lunda
|
278
|
+
luo Luo (Kenya and Tanzania)
|
279
|
+
mac/mke mk Macedonian
|
280
|
+
mad Madurese
|
281
|
+
mag Magahi
|
282
|
+
mai Maithili
|
283
|
+
mak Makasar
|
284
|
+
mlg mg Malagasy
|
285
|
+
may/msa ms Malay
|
286
|
+
mal Malayalam
|
287
|
+
mlt ml Maltese
|
288
|
+
man Mandingo
|
289
|
+
mni Manipuri
|
290
|
+
mno Manobo languages
|
291
|
+
max Manx
|
292
|
+
mao/mri mi Maori
|
293
|
+
mar mr Marathi
|
294
|
+
chm Mari
|
295
|
+
mah Marshall
|
296
|
+
mwr Marwari
|
297
|
+
mas Masai
|
298
|
+
myn Mayan languages
|
299
|
+
men Mende
|
300
|
+
mic Micmac
|
301
|
+
min Minangkabau
|
302
|
+
mis Miscellaneous (Other)
|
303
|
+
moh Mohawk
|
304
|
+
mol mo Moldavian
|
305
|
+
mkh Mon-Kmer (Other)
|
306
|
+
lol Mongo
|
307
|
+
mon mn Mongolian
|
308
|
+
mos Mossi
|
309
|
+
mul Multiple languages
|
310
|
+
mun Munda languages
|
311
|
+
nau na Nauru
|
312
|
+
nav Navajo
|
313
|
+
nde Ndebele, North
|
314
|
+
nbl Ndebele, South
|
315
|
+
ndo Ndongo
|
316
|
+
nep ne Nepali
|
317
|
+
new Newari
|
318
|
+
nic Niger-Kordofanian (Other)
|
319
|
+
ssa Nilo-Saharan (Other)
|
320
|
+
niu Niuean
|
321
|
+
non Norse, Old
|
322
|
+
nai North American Indian (Other)
|
323
|
+
nor no Norwegian
|
324
|
+
nno Norwegian (Nynorsk)
|
325
|
+
nub Nubian languages
|
326
|
+
nym Nyamwezi
|
327
|
+
nya Nyanja
|
328
|
+
nyn Nyankole
|
329
|
+
nyo Nyoro
|
330
|
+
nzi Nzima
|
331
|
+
oji Ojibwa
|
332
|
+
ori or Oriya
|
333
|
+
orm om Oromo
|
334
|
+
osa Osage
|
335
|
+
oss Ossetic
|
336
|
+
oto Otomian languages
|
337
|
+
pal Pahlavi
|
338
|
+
pau Palauan
|
339
|
+
pli Pali
|
340
|
+
pam Pampanga
|
341
|
+
pag Pangasinan
|
342
|
+
pan pa Panjabi
|
343
|
+
pap Papiamento
|
344
|
+
paa Papuan-Australian (Other)
|
345
|
+
fas/per fa Persian
|
346
|
+
peo Persian, Old (ca 600 - 400 B.C.)
|
347
|
+
phn Phoenician
|
348
|
+
pol pl Polish
|
349
|
+
pon Ponape
|
350
|
+
por pt Portuguese
|
351
|
+
pra Prakrit languages
|
352
|
+
pro Provencal, Old (to 1500)
|
353
|
+
pus ps Pushto
|
354
|
+
que qu Quechua
|
355
|
+
roh rm Rhaeto-Romance
|
356
|
+
raj Rajasthani
|
357
|
+
rar Rarotongan
|
358
|
+
roa Romance (Other)
|
359
|
+
ron/rum ro Romanian
|
360
|
+
rom Romany
|
361
|
+
run rn Rundi
|
362
|
+
rus ru Russian
|
363
|
+
sal Salishan languages
|
364
|
+
sam Samaritan Aramaic
|
365
|
+
smi Sami languages
|
366
|
+
smo sm Samoan
|
367
|
+
sad Sandawe
|
368
|
+
sag sg Sango
|
369
|
+
san sa Sanskrit
|
370
|
+
srd Sardinian
|
371
|
+
sco Scots
|
372
|
+
sel Selkup
|
373
|
+
sem Semitic (Other)
|
374
|
+
sr Serbian
|
375
|
+
scr sh Serbo-Croatian
|
376
|
+
srr Serer
|
377
|
+
shn Shan
|
378
|
+
sna sn Shona
|
379
|
+
sid Sidamo
|
380
|
+
bla Siksika
|
381
|
+
snd sd Sindhi
|
382
|
+
sin si Singhalese
|
383
|
+
sit Sino-Tibetan (Other)
|
384
|
+
sio Siouan languages
|
385
|
+
sla Slavic (Other)
|
386
|
+
ss Siswati
|
387
|
+
slk/slo sk Slovak
|
388
|
+
slv sl Slovenian
|
389
|
+
sog Sogdian
|
390
|
+
som so Somali
|
391
|
+
son Songhai
|
392
|
+
wen Sorbian languages
|
393
|
+
nso Sotho, Northern
|
394
|
+
sot st Sotho, Southern
|
395
|
+
sai South American Indian (Other)
|
396
|
+
esl/spa es Spanish
|
397
|
+
suk Sukuma
|
398
|
+
sux Sumerian
|
399
|
+
sun su Sudanese
|
400
|
+
sus Susu
|
401
|
+
swa sw Swahili
|
402
|
+
ssw Swazi
|
403
|
+
sve/swe sv Swedish
|
404
|
+
syr Syriac
|
405
|
+
tgl tl Tagalog
|
406
|
+
tah Tahitian
|
407
|
+
tgk tg Tajik
|
408
|
+
tmh Tamashek
|
409
|
+
tam ta Tamil
|
410
|
+
tat tt Tatar
|
411
|
+
tel te Telugu
|
412
|
+
ter Tereno
|
413
|
+
tha th Thai
|
414
|
+
bod/tib bo Tibetan
|
415
|
+
tig Tigre
|
416
|
+
tir ti Tigrinya
|
417
|
+
tem Timne
|
418
|
+
tiv Tivi
|
419
|
+
tli Tlingit
|
420
|
+
tog to Tonga (Nyasa)
|
421
|
+
ton Tonga (Tonga Islands)
|
422
|
+
tru Truk
|
423
|
+
tsi Tsimshian
|
424
|
+
tso ts Tsonga
|
425
|
+
tsn tn Tswana
|
426
|
+
tum Tumbuka
|
427
|
+
tur tr Turkish
|
428
|
+
ota Turkish, Ottoman (1500 - 1928)
|
429
|
+
tuk tk Turkmen
|
430
|
+
tyv Tuvinian
|
431
|
+
twi tw Twi
|
432
|
+
uga Ugaritic
|
433
|
+
uig ug Uighur
|
434
|
+
ukr uk Ukrainian
|
435
|
+
umb Umbundu
|
436
|
+
und Undetermined
|
437
|
+
urd ur Urdu
|
438
|
+
uzb uz Uzbek
|
439
|
+
vai Vai
|
440
|
+
ven Venda
|
441
|
+
vie vi Vietnamese
|
442
|
+
vol vo Volap�k
|
443
|
+
vot Votic
|
444
|
+
wak Wakashan languages
|
445
|
+
wal Walamo
|
446
|
+
war Waray
|
447
|
+
was Washo
|
448
|
+
cym/wel cy Welsh
|
449
|
+
wol wo Wolof
|
450
|
+
xho xh Xhosa
|
451
|
+
sah Yakut
|
452
|
+
yao Yao
|
453
|
+
yap Yap
|
454
|
+
yid yi Yiddish
|
455
|
+
yor yo Yoruba
|
456
|
+
zap Zapotec
|
457
|
+
zen Zenaga
|
458
|
+
zha za Zhuang
|
459
|
+
zul zu Zulu
|
460
|
+
zun Zuni
|
461
|
+
__END_DATA__
|