traject 3.1.0.rc1 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +26 -0
- data/CHANGES.md +46 -0
- data/README.md +3 -1
- data/doc/settings.md +5 -1
- data/doc/xml.md +10 -0
- data/lib/traject/command_line.rb +34 -43
- data/lib/traject/indexer.rb +12 -4
- data/lib/traject/macros/marc21.rb +3 -3
- data/lib/traject/macros/marc21_semantics.rb +15 -12
- data/lib/traject/macros/nokogiri_macros.rb +9 -3
- data/lib/traject/marc_extractor.rb +3 -3
- data/lib/traject/nokogiri_reader.rb +8 -1
- data/lib/traject/oai_pmh_nokogiri_reader.rb +9 -3
- data/lib/traject/solr_json_writer.rb +58 -17
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +77 -48
- data/test/command_line_test.rb +51 -0
- data/test/delimited_writer_test.rb +14 -16
- data/test/indexer/class_level_configuration_test.rb +23 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +4 -0
- data/test/indexer/nokogiri_indexer_test.rb +35 -0
- data/test/nokogiri_reader_test.rb +10 -0
- data/test/solr_json_writer_test.rb +65 -0
- data/test/test_support/date_resort_to_264.marc +1 -0
- data/traject.gemspec +3 -3
- metadata +32 -23
- data/.travis.yml +0 -16
@@ -115,9 +115,15 @@ module Traject
|
|
115
115
|
# @returns [HTTP::Client] from http.rb gem
|
116
116
|
def http_client
|
117
117
|
@http_client ||= begin
|
118
|
-
|
119
|
-
|
120
|
-
|
118
|
+
client = nil
|
119
|
+
|
120
|
+
if HTTP::VERSION.split(".").first.to_i > 3
|
121
|
+
client = HTTP.timeout(timeout)
|
122
|
+
else
|
123
|
+
# timeout setting on http.rb 3.x are a bit of a mess.
|
124
|
+
# https://github.com/httprb/http/issues/488
|
125
|
+
client = HTTP.timeout(:global, write: timeout / 3, connect: timeout / 3, read: timeout / 3)
|
126
|
+
end
|
121
127
|
|
122
128
|
if settings["oai_pmh.try_gzip"]
|
123
129
|
client = client.use(:auto_inflate).headers("accept-encoding" => "gzip;q=1.0, identity;q=0.5")
|
@@ -41,10 +41,12 @@ require 'concurrent' # for atomic_fixnum
|
|
41
41
|
#
|
42
42
|
# ## Relevant settings
|
43
43
|
#
|
44
|
-
# * solr.url (optional if solr.update_url is set) The URL to the solr core to index into
|
44
|
+
# * solr.url (optional if solr.update_url is set) The URL to the solr core to index into.
|
45
|
+
# (Can include embedded HTTP basic auth as eg `http://user:pass@host/solr`)
|
45
46
|
#
|
46
47
|
# * solr.update_url: The actual update url. If unset, we'll first see if
|
47
|
-
# "#{solr.url}/update/json" exists, and if not use "#{solr.url}/update"
|
48
|
+
# "#{solr.url}/update/json" exists, and if not use "#{solr.url}/update". (Can include
|
49
|
+
# embedded HTTP basic auth as eg `http://user:pass@host/solr)
|
48
50
|
#
|
49
51
|
# * solr_writer.batch_size: How big a batch to send to solr. Default is 100.
|
50
52
|
# My tests indicate that this setting doesn't change overall index speed by a ton.
|
@@ -101,12 +103,17 @@ class Traject::SolrJsonWriter
|
|
101
103
|
def initialize(argSettings)
|
102
104
|
@settings = Traject::Indexer::Settings.new(argSettings)
|
103
105
|
|
106
|
+
|
104
107
|
# Set max errors
|
105
108
|
@max_skipped = (@settings['solr_writer.max_skipped'] || DEFAULT_MAX_SKIPPED).to_i
|
106
109
|
if @max_skipped < 0
|
107
110
|
@max_skipped = nil
|
108
111
|
end
|
109
112
|
|
113
|
+
|
114
|
+
# Figure out where to send updates, and if with basic auth
|
115
|
+
@solr_update_url, basic_auth_user, basic_auth_password = self.determine_solr_update_url
|
116
|
+
|
110
117
|
@http_client = if @settings["solr_json_writer.http_client"]
|
111
118
|
@settings["solr_json_writer.http_client"]
|
112
119
|
else
|
@@ -114,6 +121,11 @@ class Traject::SolrJsonWriter
|
|
114
121
|
if @settings["solr_writer.http_timeout"]
|
115
122
|
client.connect_timeout = client.receive_timeout = client.send_timeout = @settings["solr_writer.http_timeout"]
|
116
123
|
end
|
124
|
+
|
125
|
+
if basic_auth_user || basic_auth_password
|
126
|
+
client.set_auth(@solr_update_url, basic_auth_user, basic_auth_password)
|
127
|
+
end
|
128
|
+
|
117
129
|
client
|
118
130
|
end
|
119
131
|
|
@@ -137,13 +149,11 @@ class Traject::SolrJsonWriter
|
|
137
149
|
# this the new default writer.
|
138
150
|
@commit_on_close = (settings["solr_writer.commit_on_close"] || settings["solrj_writer.commit_on_close"]).to_s == "true"
|
139
151
|
|
140
|
-
# Figure out where to send updates
|
141
|
-
@solr_update_url = self.determine_solr_update_url
|
142
152
|
|
143
153
|
@solr_update_args = settings["solr_writer.solr_update_args"]
|
144
154
|
@commit_solr_update_args = settings["solr_writer.commit_solr_update_args"]
|
145
155
|
|
146
|
-
logger.info(" #{self.class.name} writing to '#{@solr_update_url}' in batches of #{@batch_size} with #{@thread_pool_size} bg threads")
|
156
|
+
logger.info(" #{self.class.name} writing to '#{@solr_update_url}' #{"(with HTTP basic auth)" if basic_auth_user || basic_auth_password}in batches of #{@batch_size} with #{@thread_pool_size} bg threads")
|
147
157
|
end
|
148
158
|
|
149
159
|
|
@@ -185,6 +195,9 @@ class Traject::SolrJsonWriter
|
|
185
195
|
# @param [Array<Traject::Indexer::Context>] an array of contexts
|
186
196
|
def send_batch(batch)
|
187
197
|
return if batch.empty?
|
198
|
+
|
199
|
+
logger.debug("#{self.class.name}: sending batch of #{batch.size} to Solr")
|
200
|
+
|
188
201
|
json_package = JSON.generate(batch.map { |c| c.output_hash })
|
189
202
|
|
190
203
|
begin
|
@@ -209,12 +222,15 @@ class Traject::SolrJsonWriter
|
|
209
222
|
# Send a single context to Solr, logging an error if need be
|
210
223
|
# @param [Traject::Indexer::Context] c The context whose document you want to send
|
211
224
|
def send_single(c)
|
225
|
+
logger.debug("#{self.class.name}: sending single record to Solr: #{c.output_hash}")
|
226
|
+
|
212
227
|
json_package = JSON.generate([c.output_hash])
|
213
228
|
begin
|
214
|
-
|
229
|
+
post_url = solr_update_url_with_query(@solr_update_args)
|
230
|
+
resp = @http_client.post post_url, json_package, "Content-type" => "application/json"
|
215
231
|
|
216
232
|
unless resp.status == 200
|
217
|
-
raise BadHttpResponse.new("Unexpected HTTP response status #{resp.status}", resp)
|
233
|
+
raise BadHttpResponse.new("Unexpected HTTP response status #{resp.status} from POST #{post_url}", resp)
|
218
234
|
end
|
219
235
|
|
220
236
|
# Catch Timeouts and network errors -- as well as non-200 http responses --
|
@@ -234,7 +250,7 @@ class Traject::SolrJsonWriter
|
|
234
250
|
if @max_skipped and skipped_record_count > @max_skipped
|
235
251
|
# re-raising in rescue means the last encountered error will be available as #cause
|
236
252
|
# on raised exception, a feature in ruby 2.1+.
|
237
|
-
raise MaxSkippedRecordsExceeded.new("#{self.class.name}: Exceeded maximum number of skipped records (#{@max_skipped}): aborting")
|
253
|
+
raise MaxSkippedRecordsExceeded.new("#{self.class.name}: Exceeded maximum number of skipped records (#{@max_skipped}): aborting: #{exception.message}")
|
238
254
|
end
|
239
255
|
end
|
240
256
|
end
|
@@ -255,6 +271,8 @@ class Traject::SolrJsonWriter
|
|
255
271
|
# There is no built-in way to direct a record to be deleted from an indexing config
|
256
272
|
# file at the moment, this is just a loose method on the writer.
|
257
273
|
def delete(id)
|
274
|
+
logger.debug("#{self.class.name}: Sending delete to Solr for #{id}")
|
275
|
+
|
258
276
|
json_package = {delete: id}
|
259
277
|
resp = @http_client.post solr_update_url_with_query(@solr_update_args), JSON.generate(json_package), "Content-type" => "application/json"
|
260
278
|
if resp.status != 200
|
@@ -262,6 +280,13 @@ class Traject::SolrJsonWriter
|
|
262
280
|
end
|
263
281
|
end
|
264
282
|
|
283
|
+
# Send a delete all query.
|
284
|
+
#
|
285
|
+
# This method takes no params and will not automatically commit the deletes.
|
286
|
+
# @example @writer.delete_all!
|
287
|
+
def delete_all!
|
288
|
+
delete(query: "*:*")
|
289
|
+
end
|
265
290
|
|
266
291
|
# Get the logger from the settings, or default to an effectively null logger
|
267
292
|
def logger
|
@@ -282,14 +307,16 @@ class Traject::SolrJsonWriter
|
|
282
307
|
@thread_pool.maybe_in_thread_pool { send_batch(batch) }
|
283
308
|
end
|
284
309
|
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
310
|
+
if @thread_pool_size && @thread_pool_size > 0
|
311
|
+
# Wait for shutdown, and time it.
|
312
|
+
logger.debug "#{self.class.name}: Shutting down thread pool, waiting if needed..."
|
313
|
+
elapsed = @thread_pool.shutdown_and_wait
|
314
|
+
if elapsed > 60
|
315
|
+
logger.warn "Waited #{elapsed} seconds for all threads, you may want to increase solr_writer.thread_pool (currently #{@settings["solr_writer.thread_pool"]})"
|
316
|
+
end
|
317
|
+
logger.debug "#{self.class.name}: Thread pool shutdown complete"
|
318
|
+
logger.warn "#{self.class.name}: #{skipped_record_count} skipped records" if skipped_record_count > 0
|
290
319
|
end
|
291
|
-
logger.debug "#{self.class.name}: Thread pool shutdown complete"
|
292
|
-
logger.warn "#{self.class.name}: #{skipped_record_count} skipped records" if skipped_record_count > 0
|
293
320
|
|
294
321
|
# check again now that we've waited, there could still be some
|
295
322
|
# that didn't show up before.
|
@@ -345,13 +372,27 @@ class Traject::SolrJsonWriter
|
|
345
372
|
end
|
346
373
|
|
347
374
|
|
348
|
-
# Relatively complex logic to determine if we have a valid URL and what it is
|
375
|
+
# Relatively complex logic to determine if we have a valid URL and what it is,
|
376
|
+
# and if we have basic_auth info
|
377
|
+
#
|
378
|
+
# Empties out user and password embedded in URI returned, to help avoid logging it.
|
379
|
+
#
|
380
|
+
# @returns [update_url, basic_auth_user, basic_auth_password]
|
349
381
|
def determine_solr_update_url
|
350
|
-
if settings['solr.update_url']
|
382
|
+
url = if settings['solr.update_url']
|
351
383
|
check_solr_update_url(settings['solr.update_url'])
|
352
384
|
else
|
353
385
|
derive_solr_update_url_from_solr_url(settings['solr.url'])
|
354
386
|
end
|
387
|
+
|
388
|
+
parsed_uri = URI.parse(url)
|
389
|
+
user_from_uri, password_from_uri = parsed_uri.user, parsed_uri.password
|
390
|
+
parsed_uri.user, parsed_uri.password = nil, nil
|
391
|
+
|
392
|
+
basic_auth_user = @settings["solr_writer.basic_auth_user"] || user_from_uri
|
393
|
+
basic_auth_password = @settings["solr_writer.basic_auth_password"] || password_from_uri
|
394
|
+
|
395
|
+
return [parsed_uri.to_s, basic_auth_user, basic_auth_password]
|
355
396
|
end
|
356
397
|
|
357
398
|
|
data/lib/traject/version.rb
CHANGED
@@ -10,18 +10,21 @@ ady: Adygei
|
|
10
10
|
afa: Afroasiatic (Other)
|
11
11
|
afh: Afrihili (Artificial language)
|
12
12
|
afr: Afrikaans
|
13
|
-
|
13
|
+
ain: Ainu
|
14
|
+
ajm: Aljamía
|
14
15
|
aka: Akan
|
15
16
|
akk: Akkadian
|
16
17
|
alb: Albanian
|
17
18
|
ale: Aleut
|
18
19
|
alg: Algonquian (Other)
|
20
|
+
alt: Altai
|
19
21
|
amh: Amharic
|
20
|
-
ang: English, Old (ca. 450-1100)
|
22
|
+
ang: "English, Old (ca. 450-1100)"
|
23
|
+
anp: Angika
|
21
24
|
apa: Apache languages
|
22
25
|
ara: Arabic
|
23
26
|
arc: Aramaic
|
24
|
-
arg: Aragonese
|
27
|
+
arg: Aragonese
|
25
28
|
arm: Armenian
|
26
29
|
arn: Mapuche
|
27
30
|
arp: Arapaho
|
@@ -36,7 +39,7 @@ ave: Avestan
|
|
36
39
|
awa: Awadhi
|
37
40
|
aym: Aymara
|
38
41
|
aze: Azerbaijani
|
39
|
-
bad: Banda
|
42
|
+
bad: Banda languages
|
40
43
|
bai: Bamileke languages
|
41
44
|
bak: Bashkir
|
42
45
|
bal: Baluchi
|
@@ -51,7 +54,7 @@ bem: Bemba
|
|
51
54
|
ben: Bengali
|
52
55
|
ber: Berber (Other)
|
53
56
|
bho: Bhojpuri
|
54
|
-
bih: Bihari
|
57
|
+
bih: Bihari (Other)
|
55
58
|
bik: Bikol
|
56
59
|
bin: Edo
|
57
60
|
bis: Bislama
|
@@ -65,6 +68,7 @@ bua: Buriat
|
|
65
68
|
bug: Bugis
|
66
69
|
bul: Bulgarian
|
67
70
|
bur: Burmese
|
71
|
+
byn: Bilin
|
68
72
|
cad: Caddo
|
69
73
|
cai: Central American Indian (Other)
|
70
74
|
cam: Khmer
|
@@ -78,7 +82,7 @@ chb: Chibcha
|
|
78
82
|
che: Chechen
|
79
83
|
chg: Chagatai
|
80
84
|
chi: Chinese
|
81
|
-
chk:
|
85
|
+
chk: Chuukese
|
82
86
|
chm: Mari
|
83
87
|
chn: Chinook jargon
|
84
88
|
cho: Choctaw
|
@@ -88,15 +92,17 @@ chu: Church Slavic
|
|
88
92
|
chv: Chuvash
|
89
93
|
chy: Cheyenne
|
90
94
|
cmc: Chamic languages
|
95
|
+
cnr: Montenegrin
|
91
96
|
cop: Coptic
|
92
97
|
cor: Cornish
|
93
98
|
cos: Corsican
|
94
|
-
cpe: Creoles and Pidgins, English-based (Other)
|
95
|
-
cpf: Creoles and Pidgins, French-based (Other)
|
96
|
-
cpp: Creoles and Pidgins, Portuguese-based (Other)
|
99
|
+
cpe: "Creoles and Pidgins, English-based (Other)"
|
100
|
+
cpf: "Creoles and Pidgins, French-based (Other)"
|
101
|
+
cpp: "Creoles and Pidgins, Portuguese-based (Other)"
|
97
102
|
cre: Cree
|
98
103
|
crh: Crimean Tatar
|
99
104
|
crp: Creoles and Pidgins (Other)
|
105
|
+
csb: Kashubian
|
100
106
|
cus: Cushitic (Other)
|
101
107
|
cze: Czech
|
102
108
|
dak: Dakota
|
@@ -104,14 +110,15 @@ dan: Danish
|
|
104
110
|
dar: Dargwa
|
105
111
|
day: Dayak
|
106
112
|
del: Delaware
|
107
|
-
den:
|
113
|
+
den: Slavey
|
108
114
|
dgr: Dogrib
|
109
115
|
din: Dinka
|
110
116
|
div: Divehi
|
111
117
|
doi: Dogri
|
112
118
|
dra: Dravidian (Other)
|
119
|
+
dsb: Lower Sorbian
|
113
120
|
dua: Duala
|
114
|
-
dum: Dutch, Middle (ca. 1050-1350)
|
121
|
+
dum: "Dutch, Middle (ca. 1050-1350)"
|
115
122
|
dut: Dutch
|
116
123
|
dyu: Dyula
|
117
124
|
dzo: Dzongkha
|
@@ -120,7 +127,7 @@ egy: Egyptian
|
|
120
127
|
eka: Ekajuk
|
121
128
|
elx: Elamite
|
122
129
|
eng: English
|
123
|
-
enm: English, Middle (1100-1500)
|
130
|
+
enm: "English, Middle (1100-1500)"
|
124
131
|
epo: Esperanto
|
125
132
|
esk: Eskimo languages
|
126
133
|
esp: Esperanto
|
@@ -133,18 +140,21 @@ fao: Faroese
|
|
133
140
|
far: Faroese
|
134
141
|
fat: Fanti
|
135
142
|
fij: Fijian
|
143
|
+
fil: Filipino
|
136
144
|
fin: Finnish
|
137
145
|
fiu: Finno-Ugrian (Other)
|
138
146
|
fon: Fon
|
139
147
|
fre: French
|
140
148
|
fri: Frisian
|
141
|
-
frm: French, Middle (ca.
|
142
|
-
fro: French, Old (ca. 842-
|
149
|
+
frm: "French, Middle (ca. 1300-1600)"
|
150
|
+
fro: "French, Old (ca. 842-1300)"
|
151
|
+
frr: North Frisian
|
152
|
+
frs: East Frisian
|
143
153
|
fry: Frisian
|
144
154
|
ful: Fula
|
145
155
|
fur: Friulian
|
146
|
-
gaa:
|
147
|
-
gae: Scottish
|
156
|
+
gaa: Gã
|
157
|
+
gae: Scottish Gaelix
|
148
158
|
gag: Galician
|
149
159
|
gal: Oromo
|
150
160
|
gay: Gayo
|
@@ -158,15 +168,16 @@ gla: Scottish Gaelic
|
|
158
168
|
gle: Irish
|
159
169
|
glg: Galician
|
160
170
|
glv: Manx
|
161
|
-
gmh: German, Middle High (ca. 1050-1500)
|
162
|
-
goh: German, Old High (ca. 750-1050)
|
171
|
+
gmh: "German, Middle High (ca. 1050-1500)"
|
172
|
+
goh: "German, Old High (ca. 750-1050)"
|
163
173
|
gon: Gondi
|
164
174
|
gor: Gorontalo
|
165
175
|
got: Gothic
|
166
176
|
grb: Grebo
|
167
|
-
grc: Greek, Ancient (to 1453)
|
168
|
-
gre: Greek, Modern (1453-
|
177
|
+
grc: "Greek, Ancient (to 1453)"
|
178
|
+
gre: "Greek, Modern (1453-)"
|
169
179
|
grn: Guarani
|
180
|
+
gsw: Swiss German
|
170
181
|
gua: Guarani
|
171
182
|
guj: Gujarati
|
172
183
|
gwi: Gwich'in
|
@@ -177,11 +188,13 @@ haw: Hawaiian
|
|
177
188
|
heb: Hebrew
|
178
189
|
her: Herero
|
179
190
|
hil: Hiligaynon
|
180
|
-
him:
|
191
|
+
him: Western Pahari languages
|
181
192
|
hin: Hindi
|
182
193
|
hit: Hittite
|
183
194
|
hmn: Hmong
|
184
195
|
hmo: Hiri Motu
|
196
|
+
hrv: Croatian
|
197
|
+
hsb: Upper Sorbian
|
185
198
|
hun: Hungarian
|
186
199
|
hup: Hupa
|
187
200
|
iba: Iban
|
@@ -205,16 +218,17 @@ iri: Irish
|
|
205
218
|
iro: Iroquoian (Other)
|
206
219
|
ita: Italian
|
207
220
|
jav: Javanese
|
221
|
+
jbo: Lojban (Artificial language)
|
208
222
|
jpn: Japanese
|
209
223
|
jpr: Judeo-Persian
|
210
224
|
jrb: Judeo-Arabic
|
211
225
|
kaa: Kara-Kalpak
|
212
226
|
kab: Kabyle
|
213
227
|
kac: Kachin
|
214
|
-
kal:
|
228
|
+
kal: Kalâtdlisut
|
215
229
|
kam: Kamba
|
216
230
|
kan: Kannada
|
217
|
-
kar: Karen
|
231
|
+
kar: Karen languages
|
218
232
|
kas: Kashmiri
|
219
233
|
kau: Kanuri
|
220
234
|
kaw: Kawi
|
@@ -232,19 +246,21 @@ kok: Konkani
|
|
232
246
|
kom: Komi
|
233
247
|
kon: Kongo
|
234
248
|
kor: Korean
|
235
|
-
kos:
|
249
|
+
kos: Kosraean
|
236
250
|
kpe: Kpelle
|
237
|
-
|
251
|
+
krc: Karachay-Balkar
|
252
|
+
krl: Karelian
|
253
|
+
kro: Kru (Other)
|
238
254
|
kru: Kurukh
|
239
255
|
kua: Kuanyama
|
240
256
|
kum: Kumyk
|
241
257
|
kur: Kurdish
|
242
258
|
kus: Kusaie
|
243
|
-
kut:
|
259
|
+
kut: Kootenai
|
244
260
|
lad: Ladino
|
245
|
-
lah:
|
246
|
-
lam: Lamba
|
247
|
-
lan: Occitan (post
|
261
|
+
lah: Lahndā
|
262
|
+
lam: Lamba (Zambia and Congo)
|
263
|
+
lan: Occitan (post 1500)
|
248
264
|
lao: Lao
|
249
265
|
lap: Sami
|
250
266
|
lat: Latin
|
@@ -255,11 +271,11 @@ lin: Lingala
|
|
255
271
|
lit: Lithuanian
|
256
272
|
lol: Mongo-Nkundu
|
257
273
|
loz: Lozi
|
258
|
-
ltz:
|
274
|
+
ltz: Luxembourgish
|
259
275
|
lua: Luba-Lulua
|
260
276
|
lub: Luba-Katanga
|
261
277
|
lug: Ganda
|
262
|
-
lui:
|
278
|
+
lui: Luiseño
|
263
279
|
lun: Lunda
|
264
280
|
luo: Luo (Kenya and Tanzania)
|
265
281
|
lus: Lushai
|
@@ -274,12 +290,13 @@ man: Mandingo
|
|
274
290
|
mao: Maori
|
275
291
|
map: Austronesian (Other)
|
276
292
|
mar: Marathi
|
277
|
-
mas:
|
293
|
+
mas: Maasai
|
278
294
|
max: Manx
|
279
295
|
may: Malay
|
296
|
+
mdf: Moksha
|
280
297
|
mdr: Mandar
|
281
298
|
men: Mende
|
282
|
-
mga: Irish, Middle (ca. 1100-1550)
|
299
|
+
mga: "Irish, Middle (ca. 1100-1550)"
|
283
300
|
mic: Micmac
|
284
301
|
min: Minangkabau
|
285
302
|
mis: Miscellaneous languages
|
@@ -293,12 +310,14 @@ mno: Manobo languages
|
|
293
310
|
moh: Mohawk
|
294
311
|
mol: Moldavian
|
295
312
|
mon: Mongolian
|
296
|
-
mos:
|
313
|
+
mos: Mooré
|
297
314
|
mul: Multiple languages
|
298
315
|
mun: Munda (Other)
|
299
316
|
mus: Creek
|
317
|
+
mwl: Mirandese
|
300
318
|
mwr: Marwari
|
301
319
|
myn: Mayan languages
|
320
|
+
myv: Erzya
|
302
321
|
nah: Nahuatl
|
303
322
|
nai: North American Indian (Other)
|
304
323
|
nap: Neapolitan Italian
|
@@ -314,12 +333,14 @@ nia: Nias
|
|
314
333
|
nic: Niger-Kordofanian (Other)
|
315
334
|
niu: Niuean
|
316
335
|
nno: Norwegian (Nynorsk)
|
317
|
-
nob: Norwegian (
|
336
|
+
nob: Norwegian (Bokmål)
|
318
337
|
nog: Nogai
|
319
338
|
non: Old Norse
|
320
339
|
nor: Norwegian
|
340
|
+
nqo: N'Ko
|
321
341
|
nso: Northern Sotho
|
322
342
|
nub: Nubian languages
|
343
|
+
nwc: "Newari, Old"
|
323
344
|
nya: Nyanja
|
324
345
|
nym: Nyamwezi
|
325
346
|
nyn: Nyankole
|
@@ -331,7 +352,7 @@ ori: Oriya
|
|
331
352
|
orm: Oromo
|
332
353
|
osa: Osage
|
333
354
|
oss: Ossetic
|
334
|
-
ota: Turkish, Ottoman
|
355
|
+
ota: "Turkish, Ottoman"
|
335
356
|
oto: Otomian languages
|
336
357
|
paa: Papuan (Other)
|
337
358
|
pag: Pangasinan
|
@@ -346,10 +367,10 @@ phi: Philippine (Other)
|
|
346
367
|
phn: Phoenician
|
347
368
|
pli: Pali
|
348
369
|
pol: Polish
|
349
|
-
pon:
|
370
|
+
pon: Pohnpeian
|
350
371
|
por: Portuguese
|
351
372
|
pra: Prakrit languages
|
352
|
-
pro:
|
373
|
+
pro: Provençal (to 1500)
|
353
374
|
pus: Pushto
|
354
375
|
que: Quechua
|
355
376
|
raj: Rajasthani
|
@@ -360,6 +381,7 @@ roh: Raeto-Romance
|
|
360
381
|
rom: Romani
|
361
382
|
rum: Romanian
|
362
383
|
run: Rundi
|
384
|
+
rup: Aromanian
|
363
385
|
rus: Russian
|
364
386
|
sad: Sandawe
|
365
387
|
sag: Sango (Ubangi Creole)
|
@@ -372,11 +394,12 @@ sao: Samoan
|
|
372
394
|
sas: Sasak
|
373
395
|
sat: Santali
|
374
396
|
scc: Serbian
|
397
|
+
scn: Sicilian Italian
|
375
398
|
sco: Scots
|
376
399
|
scr: Croatian
|
377
400
|
sel: Selkup
|
378
401
|
sem: Semitic (Other)
|
379
|
-
sga: Irish, Old (to 1100)
|
402
|
+
sga: "Irish, Old (to 1100)"
|
380
403
|
sgn: Sign languages
|
381
404
|
shn: Shan
|
382
405
|
sho: Shona
|
@@ -404,6 +427,8 @@ son: Songhai
|
|
404
427
|
sot: Sotho
|
405
428
|
spa: Spanish
|
406
429
|
srd: Sardinian
|
430
|
+
srn: Sranan
|
431
|
+
srp: Serbian
|
407
432
|
srr: Serer
|
408
433
|
ssa: Nilo-Saharan (Other)
|
409
434
|
sso: Sotho
|
@@ -415,7 +440,8 @@ sux: Sumerian
|
|
415
440
|
swa: Swahili
|
416
441
|
swe: Swedish
|
417
442
|
swz: Swazi
|
418
|
-
|
443
|
+
syc: Syriac
|
444
|
+
syr: "Syriac, Modern"
|
419
445
|
tag: Tagalog
|
420
446
|
tah: Tahitian
|
421
447
|
tai: Tai (Other)
|
@@ -431,10 +457,11 @@ tgk: Tajik
|
|
431
457
|
tgl: Tagalog
|
432
458
|
tha: Thai
|
433
459
|
tib: Tibetan
|
434
|
-
tig:
|
460
|
+
tig: Tigré
|
435
461
|
tir: Tigrinya
|
436
462
|
tiv: Tiv
|
437
463
|
tkl: Tokelauan
|
464
|
+
tlh: Klingon (Artificial language)
|
438
465
|
tli: Tlingit
|
439
466
|
tmh: Tamashek
|
440
467
|
tog: Tonga (Nyasa)
|
@@ -464,17 +491,17 @@ uzb: Uzbek
|
|
464
491
|
vai: Vai
|
465
492
|
ven: Venda
|
466
493
|
vie: Vietnamese
|
467
|
-
vol:
|
494
|
+
vol: Volapük
|
468
495
|
vot: Votic
|
469
496
|
wak: Wakashan languages
|
470
|
-
wal:
|
497
|
+
wal: Wolayta
|
471
498
|
war: Waray
|
472
|
-
was:
|
499
|
+
was: Washoe
|
473
500
|
wel: Welsh
|
474
|
-
wen: Sorbian
|
501
|
+
wen: Sorbian (Other)
|
475
502
|
wln: Walloon
|
476
503
|
wol: Wolof
|
477
|
-
xal:
|
504
|
+
xal: Oirat
|
478
505
|
xho: Xhosa
|
479
506
|
yao: Yao (Africa)
|
480
507
|
yap: Yapese
|
@@ -482,9 +509,11 @@ yid: Yiddish
|
|
482
509
|
yor: Yoruba
|
483
510
|
ypk: Yupik languages
|
484
511
|
zap: Zapotec
|
512
|
+
zbl: Blissymbolics
|
485
513
|
zen: Zenaga
|
486
514
|
zha: Zhuang
|
487
|
-
znd: Zande
|
515
|
+
znd: Zande languages
|
488
516
|
zul: Zulu
|
489
517
|
zun: Zuni
|
490
|
-
# zxx:
|
518
|
+
# zxx: No linguistic content
|
519
|
+
zza: Zaza
|