linguistics 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +640 -0
- data/LICENSE +27 -0
- data/README +166 -0
- data/README.english +245 -0
- data/Rakefile +338 -0
- data/examples/generalize_sentence.rb +46 -0
- data/lib/linguistics.rb +366 -0
- data/lib/linguistics/en.rb +1728 -0
- data/lib/linguistics/en/infinitive.rb +1145 -0
- data/lib/linguistics/en/linkparser.rb +109 -0
- data/lib/linguistics/en/wordnet.rb +257 -0
- data/lib/linguistics/iso639.rb +461 -0
- data/rake/191_compat.rb +26 -0
- data/rake/dependencies.rb +76 -0
- data/rake/helpers.rb +434 -0
- data/rake/hg.rb +261 -0
- data/rake/manual.rb +782 -0
- data/rake/packaging.rb +144 -0
- data/rake/publishing.rb +318 -0
- data/rake/rdoc.rb +30 -0
- data/rake/style.rb +62 -0
- data/rake/svn.rb +668 -0
- data/rake/testing.rb +187 -0
- data/rake/verifytask.rb +64 -0
- data/rake/win32.rb +190 -0
- data/spec/linguistics/en_spec.rb +215 -0
- data/spec/linguistics/iso639_spec.rb +72 -0
- data/spec/linguistics_spec.rb +107 -0
- data/tests/en/infinitive.tests.rb +207 -0
- data/tests/en/inflect.tests.rb +1389 -0
- data/tests/en/lafcadio.tests.rb +77 -0
- data/tests/en/linkparser.tests.rb +42 -0
- data/tests/en/lprintf.tests.rb +77 -0
- data/tests/en/titlecase.tests.rb +73 -0
- data/tests/en/wordnet.tests.rb +95 -0
- metadata +107 -0
@@ -0,0 +1,109 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'linguistics/en'
|
4
|
+
|
5
|
+
#
|
6
|
+
# This file contains the extensions to the Linguistics::EN module which provide
|
7
|
+
# support for the Ruby LinkParser module. LinkParser enables grammatic queries
|
8
|
+
# of English language sentences.
|
9
|
+
#
|
10
|
+
# == Synopsis
|
11
|
+
#
|
12
|
+
# # Test to see whether or not the link parser is loaded.
|
13
|
+
# Linguistics::EN.has_link_parser?
|
14
|
+
# # => true
|
15
|
+
#
|
16
|
+
# # Diagram the first linkage for a test sentence
|
17
|
+
# puts "he is a big dog".sentence.linkages.first.to_s
|
18
|
+
# +---O*---+
|
19
|
+
# | +--Ds--+
|
20
|
+
# +Ss+ | +-A-+
|
21
|
+
# | | | | |
|
22
|
+
# he is a big dog
|
23
|
+
#
|
24
|
+
# # Find the verb in the sentence
|
25
|
+
# "he is a big dog".en.sentence.verb.to_s
|
26
|
+
# # => "is"
|
27
|
+
#
|
28
|
+
# # Combined infinitive + LinkParser: Find the infinitive form of the verb of the
|
29
|
+
# given sentence.
|
30
|
+
# "he is a big dog".en.sentence.verb.infinitive
|
31
|
+
# # => "be"
|
32
|
+
#
|
33
|
+
# # Find the direct object of the sentence
|
34
|
+
# "he is a big dog".en.sentence.object.to_s
|
35
|
+
# # => "dog"
|
36
|
+
#
|
37
|
+
# # Combine WordNet + LinkParser to find the definition of the direct object of
|
38
|
+
# # the sentence
|
39
|
+
# "he is a big dog".en.sentence.object.gloss
|
40
|
+
# # => "a member of the genus Canis (probably descended from the common wolf) that
|
41
|
+
# has been domesticated by man since prehistoric times; occurs in many breeds;
|
42
|
+
# \"the dog barked all night\""
|
43
|
+
#
|
44
|
+
# == Authors
|
45
|
+
#
|
46
|
+
# * Martin Chase <stillflame@FaerieMUD.org>
|
47
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
48
|
+
#
|
49
|
+
# :include: LICENSE
|
50
|
+
#
|
51
|
+
#--
|
52
|
+
#
|
53
|
+
# Please see the file LICENSE in the base directory for licensing details.
|
54
|
+
#
|
55
|
+
module Linguistics::EN
|
56
|
+
|
57
|
+
@has_link_parser = false
|
58
|
+
@lp_dict = nil
|
59
|
+
@lp_error = nil
|
60
|
+
|
61
|
+
begin
|
62
|
+
require "linkparser"
|
63
|
+
@has_link_parser = true
|
64
|
+
rescue LoadError => err
|
65
|
+
@lp_error = err
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
#################################################################
|
70
|
+
### M O D U L E M E T H O D S
|
71
|
+
#################################################################
|
72
|
+
class << self
|
73
|
+
|
74
|
+
### Returns +true+ if LinkParser was loaded okay
|
75
|
+
def has_link_parser? ; @has_link_parser ; end
|
76
|
+
|
77
|
+
### If #has_link_parser? returns +false+, this can be called to fetch the
|
78
|
+
### exception which was raised when trying to load LinkParser.
|
79
|
+
def lp_error ; @lp_error ; end
|
80
|
+
|
81
|
+
### The instance of LinkParser used for all Linguistics LinkParser
|
82
|
+
### functions.
|
83
|
+
def lp_dict
|
84
|
+
if @lp_error
|
85
|
+
raise NotImplementedError,
|
86
|
+
"LinkParser functions are not loaded: %s" %
|
87
|
+
@lp_error.message
|
88
|
+
end
|
89
|
+
|
90
|
+
return @lp_dict ||= LinkParser::Dictionary.new( :verbosity => 0 )
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
#################################################################
|
96
|
+
### L I N K P A R S E R I N T E R F A C E
|
97
|
+
#################################################################
|
98
|
+
|
99
|
+
###############
|
100
|
+
module_function
|
101
|
+
###############
|
102
|
+
|
103
|
+
### Return a LinkParser::Sentence for the stringified +obj+.
|
104
|
+
def sentence( obj )
|
105
|
+
return Linguistics::EN::lp_dict.parse( obj.to_s )
|
106
|
+
end
|
107
|
+
module_function :sentence
|
108
|
+
|
109
|
+
end
|
@@ -0,0 +1,257 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'linguistics/en'
|
4
|
+
|
5
|
+
# This file contains functions for finding relations for English words. It
|
6
|
+
# requires the Ruby-WordNet module to be installed; if it is not installed,
|
7
|
+
# calling the functions defined by this file will raise NotImplemented
|
8
|
+
# exceptions if called. Requiring this file adds functions and constants to the
|
9
|
+
# Linguistics::EN module.
|
10
|
+
#
|
11
|
+
# == Synopsis
|
12
|
+
#
|
13
|
+
# # Test to be sure the WordNet module loaded okay.
|
14
|
+
# Linguistics::EN.has_wordnet?
|
15
|
+
# # => true
|
16
|
+
#
|
17
|
+
# # Fetch the default synset for the word "balance"
|
18
|
+
# "balance".synset
|
19
|
+
# # => #<WordNet::Synset:0x40376844 balance (noun): "a state of equilibrium"
|
20
|
+
# (derivations: 3, antonyms: 1, hypernyms: 1, hyponyms: 3)>
|
21
|
+
#
|
22
|
+
# # Fetch the synset for the first verb sense of "balance"
|
23
|
+
# "balance".en.synset( :verb )
|
24
|
+
# # => #<WordNet::Synset:0x4033f448 balance, equilibrate, equilibrize, equilibrise
|
25
|
+
# (verb): "bring into balance or equilibrium; "She has to balance work and her
|
26
|
+
# domestic duties"; "balance the two weights"" (derivations: 7, antonyms: 1,
|
27
|
+
# verbGroups: 2, hypernyms: 1, hyponyms: 5)>
|
28
|
+
#
|
29
|
+
# # Fetch the second noun sense
|
30
|
+
# "balance".en.synset( 2, :noun )
|
31
|
+
# # => #<WordNet::Synset:0x404ebb24 balance (noun): "a scale for weighing; depends
|
32
|
+
# on pull of gravity" (hypernyms: 1, hyponyms: 5)>
|
33
|
+
#
|
34
|
+
# # Fetch the second noun sense's hypernyms (more-general words, like a superclass)
|
35
|
+
# "balance".en.synset( 2, :noun ).hypernyms
|
36
|
+
# # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
37
|
+
# instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
38
|
+
# hyponyms: 2)>]
|
39
|
+
#
|
40
|
+
# # A simpler way of doing the same thing:
|
41
|
+
# "balance".en.hypernyms( 2, :noun )
|
42
|
+
# # => [#<WordNet::Synset:0x404e5620 scale, weighing machine (noun): "a measuring
|
43
|
+
# instrument for weighing; shows amount of mass" (derivations: 2, hypernyms: 1,
|
44
|
+
# hyponyms: 2)>]
|
45
|
+
#
|
46
|
+
# # Fetch the first hypernym's hypernyms
|
47
|
+
# "balance".en.synset( 2, :noun ).hypernyms.first.hypernyms
|
48
|
+
# # => [#<WordNet::Synset:0x404c60b8 measuring instrument, measuring system,
|
49
|
+
# measuring device (noun): "instrument that shows the extent or amount or quantity
|
50
|
+
# or degree of something" (hypernyms: 1, hyponyms: 83)>]
|
51
|
+
#
|
52
|
+
# # Find the synset to which both the second noun sense of "balance" and the
|
53
|
+
# # default sense of "shovel" belong.
|
54
|
+
# ("balance".en.synset( 2, :noun ) | "shovel".en.synset)
|
55
|
+
# # => #<WordNet::Synset:0x40473da4 instrumentality, instrumentation (noun): "an
|
56
|
+
# artifact (or system of artifacts) that is instrumental in accomplishing some
|
57
|
+
# end" (derivations: 1, hypernyms: 1, hyponyms: 13)>
|
58
|
+
#
|
59
|
+
# # Fetch just the words for the other kinds of "instruments"
|
60
|
+
# "instrument".en.hyponyms.collect {|synset| synset.words}.flatten
|
61
|
+
# # => ["analyzer", "analyser", "cautery", "cauterant", "drafting instrument",
|
62
|
+
# "extractor", "instrument of execution", "instrument of punishment", "measuring
|
63
|
+
# instrument", "measuring system", "measuring device", "medical instrument",
|
64
|
+
# "navigational instrument", "optical instrument", "plotter", "scientific
|
65
|
+
# instrument", "sonograph", "surveying instrument", "surveyor's instrument",
|
66
|
+
# "tracer", "weapon", "arm", "weapon system", "whip"]
|
67
|
+
#
|
68
|
+
#
|
69
|
+
# == Authors
|
70
|
+
#
|
71
|
+
# * Michael Granger <ged@FaerieMUD.org>
|
72
|
+
#
|
73
|
+
# :include: LICENSE
|
74
|
+
#
|
75
|
+
# == Version
|
76
|
+
#
|
77
|
+
# $Id$
|
78
|
+
#
|
79
|
+
module Linguistics::EN
|
80
|
+
|
81
|
+
@has_wordnet = false
|
82
|
+
@wn_error = nil
|
83
|
+
@wn_lexicon = nil
|
84
|
+
|
85
|
+
# Load WordNet and open the lexicon if possible, saving the error that
|
86
|
+
# occurs if anything goes wrong.
|
87
|
+
begin
|
88
|
+
require 'wordnet'
|
89
|
+
@has_wordnet = true
|
90
|
+
rescue LoadError => err
|
91
|
+
@wn_error = err
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
#################################################################
|
96
|
+
### M O D U L E M E T H O D S
|
97
|
+
#################################################################
|
98
|
+
class << self
|
99
|
+
|
100
|
+
### Returns +true+ if WordNet was loaded okay
|
101
|
+
def has_wordnet? ; @has_wordnet; end
|
102
|
+
|
103
|
+
### If #haveWordnet? returns +false+, this can be called to fetch the
|
104
|
+
### exception which was raised when WordNet was loaded.
|
105
|
+
def wn_error ; @wn_error; end
|
106
|
+
|
107
|
+
### The instance of the WordNet::Lexicon used for all Linguistics WordNet
|
108
|
+
### functions.
|
109
|
+
def wn_lexicon
|
110
|
+
if @wn_error
|
111
|
+
raise NotImplementedError,
|
112
|
+
"WordNet functions are not loaded: %s" %
|
113
|
+
@wn_error.message
|
114
|
+
end
|
115
|
+
|
116
|
+
@wn_lexicon ||= WordNet::Lexicon::new
|
117
|
+
end
|
118
|
+
|
119
|
+
### Make a function that calls the method +meth+ on the synset of an input
|
120
|
+
### word.
|
121
|
+
def def_synset_function( meth )
|
122
|
+
(class << self; self; end).instance_eval do
|
123
|
+
define_method( meth ) {|*args|
|
124
|
+
word, pos, sense = *args
|
125
|
+
raise ArgumentError,
|
126
|
+
"wrong number of arguments (0 for 1)" unless word
|
127
|
+
sense ||= 1
|
128
|
+
|
129
|
+
syn = synset( word.to_s, pos, sense )
|
130
|
+
return syn.nil? ? nil : syn.send( meth )
|
131
|
+
}
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
#################################################################
|
139
|
+
### W O R D N E T I N T E R F A C E
|
140
|
+
#################################################################
|
141
|
+
|
142
|
+
###############
|
143
|
+
module_function
|
144
|
+
###############
|
145
|
+
|
146
|
+
### Look up the synset associated with the given word or collocation in the
|
147
|
+
### WordNet lexicon and return a WordNet::Synset object.
|
148
|
+
def synset( word, pos=nil, sense=1 )
|
149
|
+
lex = Linguistics::EN::wn_lexicon
|
150
|
+
if pos.is_a?( Fixnum )
|
151
|
+
sense = pos
|
152
|
+
pos = nil
|
153
|
+
end
|
154
|
+
postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
|
155
|
+
syn = nil
|
156
|
+
|
157
|
+
postries.each do |pos|
|
158
|
+
break if syn = lex.lookup_synsets( word.to_s, pos, sense )
|
159
|
+
end
|
160
|
+
|
161
|
+
return syn
|
162
|
+
end
|
163
|
+
|
164
|
+
|
165
|
+
### Look up all the synsets associated with the given word or collocation in
|
166
|
+
### the WordNet lexicon and return an Array of WordNet::Synset objects. If
|
167
|
+
### +pos+ is +nil+, return synsets for all parts of speech.
|
168
|
+
def synsets( word, pos=nil )
|
169
|
+
lex = Linguistics::EN::wn_lexicon
|
170
|
+
postries = pos ? [pos] : [:noun, :verb, :adjective, :adverb, :other]
|
171
|
+
syns = []
|
172
|
+
|
173
|
+
postries.each {|pos|
|
174
|
+
syns << lex.lookup_synsets( word.to_s, pos )
|
175
|
+
}
|
176
|
+
|
177
|
+
return syns.flatten.compact
|
178
|
+
end
|
179
|
+
|
180
|
+
|
181
|
+
# Returns definitions and/or example sentences as a String.
|
182
|
+
def_synset_function :gloss
|
183
|
+
|
184
|
+
# Returns definitions and/or example sentences as an Array.
|
185
|
+
def_synset_function :glosses
|
186
|
+
|
187
|
+
# Return nouns or verbs that have the same hypernym as the receiver.
|
188
|
+
def_synset_function :coordinates
|
189
|
+
|
190
|
+
# Returns the Array of synonyms contained in the synset for the receiver.
|
191
|
+
def_synset_function :words
|
192
|
+
def_synset_function :synonyms
|
193
|
+
|
194
|
+
# Returns the name of the lexicographer file that contains the raw data for
|
195
|
+
# the receiver.
|
196
|
+
def_synset_function :lex_info
|
197
|
+
|
198
|
+
# :TODO: Finish these comments, and figure out how the hell to get the
|
199
|
+
# methods to show up in RDoc.
|
200
|
+
def_synset_function :frames
|
201
|
+
|
202
|
+
|
203
|
+
# Returns the synsets for the receiver's antonyms, if any. Ex:
|
204
|
+
# 'opaque'.en.synset.antonyms
|
205
|
+
# ==> [#<WordNet::Synset:0x010ca614/454927 clear (adjective): "free
|
206
|
+
# from cloudiness; allowing light to pass through; "clear water";
|
207
|
+
# "clear plastic bags"; "clear glass"; "the air is clear and clean""
|
208
|
+
# (similarTos: 6, attributes: 1, derivations: 2, antonyms: 1,
|
209
|
+
# seeAlsos: 1)>]
|
210
|
+
def_synset_function :antonyms
|
211
|
+
|
212
|
+
def_synset_function :hypernyms
|
213
|
+
def_synset_function :instance_hypernyms
|
214
|
+
def_synset_function :entailment
|
215
|
+
def_synset_function :hyponyms
|
216
|
+
def_synset_function :instance_hyponyms
|
217
|
+
def_synset_function :causes
|
218
|
+
def_synset_function :verbgroups
|
219
|
+
def_synset_function :similar_to
|
220
|
+
def_synset_function :participles
|
221
|
+
def_synset_function :pertainyms
|
222
|
+
def_synset_function :attributes
|
223
|
+
def_synset_function :derived_from
|
224
|
+
def_synset_function :see_also
|
225
|
+
def_synset_function :functions
|
226
|
+
|
227
|
+
def_synset_function :meronyms
|
228
|
+
def_synset_function :member_meronyms
|
229
|
+
def_synset_function :stuff_meronyms
|
230
|
+
def_synset_function :portion_meronyms
|
231
|
+
def_synset_function :component_meronyms
|
232
|
+
def_synset_function :feature_meronyms
|
233
|
+
def_synset_function :phase_meronyms
|
234
|
+
def_synset_function :place_meronyms
|
235
|
+
|
236
|
+
def_synset_function :holonyms
|
237
|
+
def_synset_function :member_holonyms
|
238
|
+
def_synset_function :stuff_holonyms
|
239
|
+
def_synset_function :portion_holonyms
|
240
|
+
def_synset_function :component_holonyms
|
241
|
+
def_synset_function :feature_holonyms
|
242
|
+
def_synset_function :phase_holonyms
|
243
|
+
def_synset_function :place_holonyms
|
244
|
+
|
245
|
+
def_synset_function :domains
|
246
|
+
def_synset_function :category_domains
|
247
|
+
def_synset_function :region_domains
|
248
|
+
def_synset_function :usage_domains
|
249
|
+
|
250
|
+
def_synset_function :members
|
251
|
+
def_synset_function :category_members
|
252
|
+
def_synset_function :region_members
|
253
|
+
def_synset_function :usage_members
|
254
|
+
|
255
|
+
|
256
|
+
end # module Linguistics::EN
|
257
|
+
|
@@ -0,0 +1,461 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
#coding: utf-8
|
3
|
+
|
4
|
+
#
|
5
|
+
# linguistics/iso639.rb - A hash of International 2- and 3-letter
|
6
|
+
# ISO639-1 and ISO639-2 language codes. Each entry has two keys:
|
7
|
+
#
|
8
|
+
# [<tt>:codes</tt>]
|
9
|
+
# All of the codes known for this language
|
10
|
+
# [<tt>:desc</tt>]
|
11
|
+
# The English-language description of the language.
|
12
|
+
#
|
13
|
+
# :include: LICENSE
|
14
|
+
#
|
15
|
+
#--
|
16
|
+
#
|
17
|
+
# Please see the file LICENSE in the base directory for licensing details.
|
18
|
+
#
|
19
|
+
module Linguistics
|
20
|
+
|
21
|
+
# Hash of ISO639 2- and 3-letter language codes
|
22
|
+
LanguageCodes = {}
|
23
|
+
|
24
|
+
# Read through the source for this file, capturing everything
|
25
|
+
# between __END__ and __END_DATA__ tokens.
|
26
|
+
in_data_section = false
|
27
|
+
File::readlines( __FILE__ ).each {|line|
|
28
|
+
case line
|
29
|
+
when /^__END_DATA__$/
|
30
|
+
in_data_section = false
|
31
|
+
false
|
32
|
+
|
33
|
+
when /^__END__$/
|
34
|
+
in_data_section = true
|
35
|
+
false
|
36
|
+
|
37
|
+
else
|
38
|
+
if in_data_section
|
39
|
+
codes, desc = line[0,15].split(%r{/|\s+}), line[15...-1]
|
40
|
+
codes.delete_if {|code| code.empty?}
|
41
|
+
entry = {
|
42
|
+
:desc => desc.strip,
|
43
|
+
:codes => codes.dup,
|
44
|
+
}
|
45
|
+
codes.each {|code|
|
46
|
+
raise "Duplicate language code #{code}:"\
|
47
|
+
"(#{LanguageCodes[code][:desc]}})}" \
|
48
|
+
if LanguageCodes.key?( code )
|
49
|
+
LanguageCodes[ code.strip ] = entry
|
50
|
+
}
|
51
|
+
end
|
52
|
+
end
|
53
|
+
}
|
54
|
+
end
|
55
|
+
|
56
|
+
__END__
|
57
|
+
abk ab Abkhazian
|
58
|
+
ace Achinese
|
59
|
+
ach Acoli
|
60
|
+
ada Adangme
|
61
|
+
aar aa Afar
|
62
|
+
afh Afrihili
|
63
|
+
afr af Afrikaans
|
64
|
+
afa Afro-Asiatic (Other)
|
65
|
+
aka Akan
|
66
|
+
akk Akkadian
|
67
|
+
alb/sqi sq Albanian
|
68
|
+
ale Aleut
|
69
|
+
alg Algonquian languages
|
70
|
+
tut Altaic (Other)
|
71
|
+
amh am Amharic
|
72
|
+
apa Apache languages
|
73
|
+
ara ar Arabic
|
74
|
+
arc Aramaic
|
75
|
+
arp Arapaho
|
76
|
+
arn Araucanian
|
77
|
+
arw Arawak
|
78
|
+
arm/hye hy Armenian
|
79
|
+
art Artificial (Other)
|
80
|
+
asm as Assamese
|
81
|
+
ath Athapascan languages
|
82
|
+
map Austronesian (Other)
|
83
|
+
ava Avaric
|
84
|
+
ave Avestan
|
85
|
+
awa Awadhi
|
86
|
+
aym ay Aymara
|
87
|
+
aze az Azerbaijani
|
88
|
+
nah Aztec
|
89
|
+
ban Balinese
|
90
|
+
bat Baltic (Other)
|
91
|
+
bal Baluchi
|
92
|
+
bam Bambara
|
93
|
+
bai Bamileke languages
|
94
|
+
bad Banda
|
95
|
+
bnt Bantu (Other)
|
96
|
+
bas Basa
|
97
|
+
bak ba Bashkir
|
98
|
+
baq/eus eu Basque
|
99
|
+
bej Beja
|
100
|
+
bem Bemba
|
101
|
+
ben bn Bengali
|
102
|
+
ber Berber (Other)
|
103
|
+
bho Bhojpuri
|
104
|
+
bih bh Bihari
|
105
|
+
bik Bikol
|
106
|
+
bin Bini
|
107
|
+
bis bi Bislama
|
108
|
+
bra Braj
|
109
|
+
bre br Breton
|
110
|
+
bug Buginese
|
111
|
+
bul bg Bulgarian
|
112
|
+
bua Buriat
|
113
|
+
bur/mya my Burmese
|
114
|
+
bel be Byelorussian
|
115
|
+
cad Caddo
|
116
|
+
car Carib
|
117
|
+
cat ca Catalan
|
118
|
+
cau Caucasian (Other)
|
119
|
+
ceb Cebuano
|
120
|
+
cel Celtic (Other)
|
121
|
+
cai Central American Indian (Other)
|
122
|
+
chg Chagatai
|
123
|
+
cha Chamorro
|
124
|
+
che Chechen
|
125
|
+
chr Cherokee
|
126
|
+
chy Cheyenne
|
127
|
+
chb Chibcha
|
128
|
+
chi/zho zh Chinese
|
129
|
+
chn Chinook jargon
|
130
|
+
cho Choctaw
|
131
|
+
chu Church Slavic
|
132
|
+
chv Chuvash
|
133
|
+
cop Coptic
|
134
|
+
cor Cornish
|
135
|
+
cos co Corsican
|
136
|
+
cre Cree
|
137
|
+
mus Creek
|
138
|
+
crp Creoles and Pidgins (Other)
|
139
|
+
cpe Creoles and Pidgins, English-based (Other)
|
140
|
+
cpf Creoles and Pidgins, French-based (Other)
|
141
|
+
cpp Creoles and Pidgins, Portuguese-based (Other)
|
142
|
+
cus Cushitic (Other)
|
143
|
+
hr Croatian
|
144
|
+
ces/cze cs Czech
|
145
|
+
dak Dakota
|
146
|
+
dan da Danish
|
147
|
+
del Delaware
|
148
|
+
din Dinka
|
149
|
+
div Divehi
|
150
|
+
doi Dogri
|
151
|
+
dra Dravidian (Other)
|
152
|
+
dua Duala
|
153
|
+
dut/nla nl Dutch
|
154
|
+
dum Dutch, Middle (ca. 1050-1350)
|
155
|
+
dyu Dyula
|
156
|
+
dzo dz Dzongkha
|
157
|
+
efi Efik
|
158
|
+
egy Egyptian (Ancient)
|
159
|
+
eka Ekajuk
|
160
|
+
elx Elamite
|
161
|
+
eng en English
|
162
|
+
enm English, Middle (ca. 1100-1500)
|
163
|
+
ang English, Old (ca. 450-1100)
|
164
|
+
esk Eskimo (Other)
|
165
|
+
epo eo Esperanto
|
166
|
+
est et Estonian
|
167
|
+
ewe Ewe
|
168
|
+
ewo Ewondo
|
169
|
+
fan Fang
|
170
|
+
fat Fanti
|
171
|
+
fao fo Faroese
|
172
|
+
fij fj Fijian
|
173
|
+
fin fi Finnish
|
174
|
+
fiu Finno-Ugrian (Other)
|
175
|
+
fon Fon
|
176
|
+
fra/fre fr French
|
177
|
+
frm French, Middle (ca. 1400-1600)
|
178
|
+
fro French, Old (842- ca. 1400)
|
179
|
+
fry fy Frisian
|
180
|
+
ful Fulah
|
181
|
+
gaa Ga
|
182
|
+
gae/gdh Gaelic (Scots)
|
183
|
+
glg gl Gallegan
|
184
|
+
lug Ganda
|
185
|
+
gay Gayo
|
186
|
+
gez Geez
|
187
|
+
geo/kat ka Georgian
|
188
|
+
deu/ger de German
|
189
|
+
gmh German, Middle High (ca. 1050-1500)
|
190
|
+
goh German, Old High (ca. 750-1050)
|
191
|
+
gem Germanic (Other)
|
192
|
+
gil Gilbertese
|
193
|
+
gon Gondi
|
194
|
+
got Gothic
|
195
|
+
grb Grebo
|
196
|
+
grc Greek, Ancient (to 1453)
|
197
|
+
ell/gre el Greek, Modern (1453-)
|
198
|
+
kal kl Greenlandic
|
199
|
+
grn gn Guarani
|
200
|
+
guj gu Gujarati
|
201
|
+
hai Haida
|
202
|
+
hau ha Hausa
|
203
|
+
haw Hawaiian
|
204
|
+
heb he Hebrew
|
205
|
+
her Herero
|
206
|
+
hil Hiligaynon
|
207
|
+
him Himachali
|
208
|
+
hin hi Hindi
|
209
|
+
hmo Hiri Motu
|
210
|
+
hun hu Hungarian
|
211
|
+
hup Hupa
|
212
|
+
iba Iban
|
213
|
+
ice/isl is Icelandic
|
214
|
+
ibo Igbo
|
215
|
+
ijo Ijo
|
216
|
+
ilo Iloko
|
217
|
+
inc Indic (Other)
|
218
|
+
ine Indo-European (Other)
|
219
|
+
ind id Indonesian
|
220
|
+
ina ia Interlingua (International Auxiliary language Association)
|
221
|
+
ile Interlingue
|
222
|
+
iku iu Inuktitut
|
223
|
+
ipk ik Inupiak
|
224
|
+
ira Iranian (Other)
|
225
|
+
gai/iri ga Irish
|
226
|
+
sga Irish, Old (to 900)
|
227
|
+
mga Irish, Middle (900 - 1200)
|
228
|
+
iro Iroquoian languages
|
229
|
+
ita it Italian
|
230
|
+
jpn ja Japanese
|
231
|
+
jav/jaw jv/jw Javanese
|
232
|
+
jrb Judeo-Arabic
|
233
|
+
jpr Judeo-Persian
|
234
|
+
kab Kabyle
|
235
|
+
kac Kachin
|
236
|
+
kam Kamba
|
237
|
+
kan kn Kannada
|
238
|
+
kau Kanuri
|
239
|
+
kaa Kara-Kalpak
|
240
|
+
kar Karen
|
241
|
+
kas ks Kashmiri
|
242
|
+
kaw Kawi
|
243
|
+
kaz kk Kazakh
|
244
|
+
kha Khasi
|
245
|
+
khm km Khmer
|
246
|
+
khi Khoisan (Other)
|
247
|
+
kho Khotanese
|
248
|
+
kik Kikuyu
|
249
|
+
kin rw Kinyarwanda
|
250
|
+
kir ky Kirghiz
|
251
|
+
kom Komi
|
252
|
+
kon Kongo
|
253
|
+
kok Konkani
|
254
|
+
kor ko Korean
|
255
|
+
kpe Kpelle
|
256
|
+
kro Kru
|
257
|
+
kua Kuanyama
|
258
|
+
kum Kumyk
|
259
|
+
kur ku Kurdish
|
260
|
+
kru Kurukh
|
261
|
+
kus Kusaie
|
262
|
+
kut Kutenai
|
263
|
+
lad Ladino
|
264
|
+
lah Lahnda
|
265
|
+
lam Lamba
|
266
|
+
oci oc Langue d'Oc (post 1500)
|
267
|
+
lao lo Lao
|
268
|
+
lat la Latin
|
269
|
+
lav lv Latvian
|
270
|
+
ltz Letzeburgesch
|
271
|
+
lez Lezghian
|
272
|
+
lin ln Lingala
|
273
|
+
lit lt Lithuanian
|
274
|
+
loz Lozi
|
275
|
+
lub Luba-Katanga
|
276
|
+
lui Luiseno
|
277
|
+
lun Lunda
|
278
|
+
luo Luo (Kenya and Tanzania)
|
279
|
+
mac/mke mk Macedonian
|
280
|
+
mad Madurese
|
281
|
+
mag Magahi
|
282
|
+
mai Maithili
|
283
|
+
mak Makasar
|
284
|
+
mlg mg Malagasy
|
285
|
+
may/msa ms Malay
|
286
|
+
mal Malayalam
|
287
|
+
mlt ml Maltese
|
288
|
+
man Mandingo
|
289
|
+
mni Manipuri
|
290
|
+
mno Manobo languages
|
291
|
+
max Manx
|
292
|
+
mao/mri mi Maori
|
293
|
+
mar mr Marathi
|
294
|
+
chm Mari
|
295
|
+
mah Marshall
|
296
|
+
mwr Marwari
|
297
|
+
mas Masai
|
298
|
+
myn Mayan languages
|
299
|
+
men Mende
|
300
|
+
mic Micmac
|
301
|
+
min Minangkabau
|
302
|
+
mis Miscellaneous (Other)
|
303
|
+
moh Mohawk
|
304
|
+
mol mo Moldavian
|
305
|
+
mkh Mon-Kmer (Other)
|
306
|
+
lol Mongo
|
307
|
+
mon mn Mongolian
|
308
|
+
mos Mossi
|
309
|
+
mul Multiple languages
|
310
|
+
mun Munda languages
|
311
|
+
nau na Nauru
|
312
|
+
nav Navajo
|
313
|
+
nde Ndebele, North
|
314
|
+
nbl Ndebele, South
|
315
|
+
ndo Ndongo
|
316
|
+
nep ne Nepali
|
317
|
+
new Newari
|
318
|
+
nic Niger-Kordofanian (Other)
|
319
|
+
ssa Nilo-Saharan (Other)
|
320
|
+
niu Niuean
|
321
|
+
non Norse, Old
|
322
|
+
nai North American Indian (Other)
|
323
|
+
nor no Norwegian
|
324
|
+
nno Norwegian (Nynorsk)
|
325
|
+
nub Nubian languages
|
326
|
+
nym Nyamwezi
|
327
|
+
nya Nyanja
|
328
|
+
nyn Nyankole
|
329
|
+
nyo Nyoro
|
330
|
+
nzi Nzima
|
331
|
+
oji Ojibwa
|
332
|
+
ori or Oriya
|
333
|
+
orm om Oromo
|
334
|
+
osa Osage
|
335
|
+
oss Ossetic
|
336
|
+
oto Otomian languages
|
337
|
+
pal Pahlavi
|
338
|
+
pau Palauan
|
339
|
+
pli Pali
|
340
|
+
pam Pampanga
|
341
|
+
pag Pangasinan
|
342
|
+
pan pa Panjabi
|
343
|
+
pap Papiamento
|
344
|
+
paa Papuan-Australian (Other)
|
345
|
+
fas/per fa Persian
|
346
|
+
peo Persian, Old (ca 600 - 400 B.C.)
|
347
|
+
phn Phoenician
|
348
|
+
pol pl Polish
|
349
|
+
pon Ponape
|
350
|
+
por pt Portuguese
|
351
|
+
pra Prakrit languages
|
352
|
+
pro Provencal, Old (to 1500)
|
353
|
+
pus ps Pushto
|
354
|
+
que qu Quechua
|
355
|
+
roh rm Rhaeto-Romance
|
356
|
+
raj Rajasthani
|
357
|
+
rar Rarotongan
|
358
|
+
roa Romance (Other)
|
359
|
+
ron/rum ro Romanian
|
360
|
+
rom Romany
|
361
|
+
run rn Rundi
|
362
|
+
rus ru Russian
|
363
|
+
sal Salishan languages
|
364
|
+
sam Samaritan Aramaic
|
365
|
+
smi Sami languages
|
366
|
+
smo sm Samoan
|
367
|
+
sad Sandawe
|
368
|
+
sag sg Sango
|
369
|
+
san sa Sanskrit
|
370
|
+
srd Sardinian
|
371
|
+
sco Scots
|
372
|
+
sel Selkup
|
373
|
+
sem Semitic (Other)
|
374
|
+
sr Serbian
|
375
|
+
scr sh Serbo-Croatian
|
376
|
+
srr Serer
|
377
|
+
shn Shan
|
378
|
+
sna sn Shona
|
379
|
+
sid Sidamo
|
380
|
+
bla Siksika
|
381
|
+
snd sd Sindhi
|
382
|
+
sin si Singhalese
|
383
|
+
sit Sino-Tibetan (Other)
|
384
|
+
sio Siouan languages
|
385
|
+
sla Slavic (Other)
|
386
|
+
ss Siswati
|
387
|
+
slk/slo sk Slovak
|
388
|
+
slv sl Slovenian
|
389
|
+
sog Sogdian
|
390
|
+
som so Somali
|
391
|
+
son Songhai
|
392
|
+
wen Sorbian languages
|
393
|
+
nso Sotho, Northern
|
394
|
+
sot st Sotho, Southern
|
395
|
+
sai South American Indian (Other)
|
396
|
+
esl/spa es Spanish
|
397
|
+
suk Sukuma
|
398
|
+
sux Sumerian
|
399
|
+
sun su Sudanese
|
400
|
+
sus Susu
|
401
|
+
swa sw Swahili
|
402
|
+
ssw Swazi
|
403
|
+
sve/swe sv Swedish
|
404
|
+
syr Syriac
|
405
|
+
tgl tl Tagalog
|
406
|
+
tah Tahitian
|
407
|
+
tgk tg Tajik
|
408
|
+
tmh Tamashek
|
409
|
+
tam ta Tamil
|
410
|
+
tat tt Tatar
|
411
|
+
tel te Telugu
|
412
|
+
ter Tereno
|
413
|
+
tha th Thai
|
414
|
+
bod/tib bo Tibetan
|
415
|
+
tig Tigre
|
416
|
+
tir ti Tigrinya
|
417
|
+
tem Timne
|
418
|
+
tiv Tivi
|
419
|
+
tli Tlingit
|
420
|
+
tog to Tonga (Nyasa)
|
421
|
+
ton Tonga (Tonga Islands)
|
422
|
+
tru Truk
|
423
|
+
tsi Tsimshian
|
424
|
+
tso ts Tsonga
|
425
|
+
tsn tn Tswana
|
426
|
+
tum Tumbuka
|
427
|
+
tur tr Turkish
|
428
|
+
ota Turkish, Ottoman (1500 - 1928)
|
429
|
+
tuk tk Turkmen
|
430
|
+
tyv Tuvinian
|
431
|
+
twi tw Twi
|
432
|
+
uga Ugaritic
|
433
|
+
uig ug Uighur
|
434
|
+
ukr uk Ukrainian
|
435
|
+
umb Umbundu
|
436
|
+
und Undetermined
|
437
|
+
urd ur Urdu
|
438
|
+
uzb uz Uzbek
|
439
|
+
vai Vai
|
440
|
+
ven Venda
|
441
|
+
vie vi Vietnamese
|
442
|
+
vol vo Volap�k
|
443
|
+
vot Votic
|
444
|
+
wak Wakashan languages
|
445
|
+
wal Walamo
|
446
|
+
war Waray
|
447
|
+
was Washo
|
448
|
+
cym/wel cy Welsh
|
449
|
+
wol wo Wolof
|
450
|
+
xho xh Xhosa
|
451
|
+
sah Yakut
|
452
|
+
yao Yao
|
453
|
+
yap Yap
|
454
|
+
yid yi Yiddish
|
455
|
+
yor yo Yoruba
|
456
|
+
zap Zapotec
|
457
|
+
zen Zenaga
|
458
|
+
zha za Zhuang
|
459
|
+
zul zu Zulu
|
460
|
+
zun Zuni
|
461
|
+
__END_DATA__
|