traject 3.1.0 → 3.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/CHANGES.md +46 -0
- data/README.md +18 -2
- data/doc/settings.md +5 -1
- data/doc/xml.md +12 -0
- data/examples/marc/tiny.xml +35 -0
- data/lib/traject/command_line.rb +34 -43
- data/lib/traject/debug_writer.rb +1 -1
- data/lib/traject/indexer.rb +12 -4
- data/lib/traject/macros/marc21.rb +3 -3
- data/lib/traject/macros/marc21_semantics.rb +15 -12
- data/lib/traject/macros/nokogiri_macros.rb +9 -3
- data/lib/traject/marc_extractor.rb +3 -3
- data/lib/traject/nokogiri_reader.rb +10 -1
- data/lib/traject/oai_pmh_nokogiri_reader.rb +9 -3
- data/lib/traject/solr_json_writer.rb +38 -7
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +77 -48
- data/test/command_line_test.rb +52 -0
- data/test/debug_writer_test.rb +13 -0
- data/test/delimited_writer_test.rb +14 -16
- data/test/indexer/class_level_configuration_test.rb +23 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +4 -0
- data/test/indexer/nokogiri_indexer_test.rb +35 -0
- data/test/indexer/read_write_test.rb +14 -3
- data/test/nokogiri_reader_test.rb +10 -0
- data/test/solr_json_writer_test.rb +65 -0
- data/test/test_support/date_resort_to_264.marc +1 -0
- data/traject.gemspec +3 -3
- metadata +31 -21
- data/.travis.yml +0 -16
@@ -26,9 +26,15 @@ module Traject
|
|
26
26
|
# Make sure to avoid text content that was all blank, which is "between the children"
|
27
27
|
# whitespace.
|
28
28
|
result = result.collect do |n|
|
29
|
-
n.
|
30
|
-
|
31
|
-
|
29
|
+
if n.kind_of?(Nokogiri::XML::Attr)
|
30
|
+
# attribute value
|
31
|
+
n.value
|
32
|
+
else
|
33
|
+
# text from node
|
34
|
+
n.xpath('.//text()').collect(&:text).tap do |arr|
|
35
|
+
arr.reject! { |s| s =~ (/\A\s+\z/) }
|
36
|
+
end.join(" ")
|
37
|
+
end
|
32
38
|
end
|
33
39
|
else
|
34
40
|
# just put all matches in accumulator as Nokogiri::XML::Node's
|
@@ -2,9 +2,9 @@ require 'traject/marc_extractor_spec'
|
|
2
2
|
|
3
3
|
module Traject
|
4
4
|
# MarcExtractor is a class for extracting lists of strings from a MARC::Record,
|
5
|
-
# according to specifications. See
|
6
|
-
# string arguments used to specify extraction. See #initialize for
|
7
|
-
# that can be set controlling extraction.
|
5
|
+
# according to specifications. See Traject::MarcExtractor::Spec for description
|
6
|
+
# of string string arguments used to specify extraction. See #initialize for
|
7
|
+
# options that can be set controlling extraction.
|
8
8
|
#
|
9
9
|
# Examples:
|
10
10
|
#
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
1
3
|
module Traject
|
2
4
|
# A Trajet reader which reads XML, and yields zero to many Nokogiri::XML::Document
|
3
5
|
# objects as source records in the traject pipeline.
|
@@ -21,6 +23,9 @@ module Traject
|
|
21
23
|
# If you need to use namespaces here, you need to have them registered with
|
22
24
|
# `nokogiri.default_namespaces`. If your source docs use namespaces, you DO need
|
23
25
|
# to use them in your each_record_xpath.
|
26
|
+
# * nokogiri.strict_mode: if set to `true` or `"true"`, ask Nokogiri to parse in 'strict'
|
27
|
+
# mode, it will raise a `Nokogiri::XML::SyntaxError` if the XML is not well-formed, instead
|
28
|
+
# of trying to take it's best-guess correction. https://nokogiri.org/tutorials/ensuring_well_formed_markup.html
|
24
29
|
# * nokogiri_reader.extra_xpath_hooks: Experimental in progress, see below.
|
25
30
|
#
|
26
31
|
# ## nokogiri_reader.extra_xpath_hooks: For handling nodes outside of your each_record_xpath
|
@@ -87,7 +92,11 @@ module Traject
|
|
87
92
|
end
|
88
93
|
|
89
94
|
def each
|
90
|
-
|
95
|
+
config_proc = if settings["nokogiri.strict_mode"]
|
96
|
+
proc { |config| config.strict }
|
97
|
+
end
|
98
|
+
|
99
|
+
whole_input_doc = Nokogiri::XML.parse(input_stream, &config_proc)
|
91
100
|
|
92
101
|
if each_record_xpath
|
93
102
|
whole_input_doc.xpath(each_record_xpath, default_namespaces).each do |matching_node|
|
@@ -115,9 +115,15 @@ module Traject
|
|
115
115
|
# @returns [HTTP::Client] from http.rb gem
|
116
116
|
def http_client
|
117
117
|
@http_client ||= begin
|
118
|
-
|
119
|
-
|
120
|
-
|
118
|
+
client = nil
|
119
|
+
|
120
|
+
if HTTP::VERSION.split(".").first.to_i > 3
|
121
|
+
client = HTTP.timeout(timeout)
|
122
|
+
else
|
123
|
+
# timeout setting on http.rb 3.x are a bit of a mess.
|
124
|
+
# https://github.com/httprb/http/issues/488
|
125
|
+
client = HTTP.timeout(:global, write: timeout / 3, connect: timeout / 3, read: timeout / 3)
|
126
|
+
end
|
121
127
|
|
122
128
|
if settings["oai_pmh.try_gzip"]
|
123
129
|
client = client.use(:auto_inflate).headers("accept-encoding" => "gzip;q=1.0, identity;q=0.5")
|
@@ -41,10 +41,12 @@ require 'concurrent' # for atomic_fixnum
|
|
41
41
|
#
|
42
42
|
# ## Relevant settings
|
43
43
|
#
|
44
|
-
# * solr.url (optional if solr.update_url is set) The URL to the solr core to index into
|
44
|
+
# * solr.url (optional if solr.update_url is set) The URL to the solr core to index into.
|
45
|
+
# (Can include embedded HTTP basic auth as eg `http://user:pass@host/solr`)
|
45
46
|
#
|
46
47
|
# * solr.update_url: The actual update url. If unset, we'll first see if
|
47
|
-
# "#{solr.url}/update/json" exists, and if not use "#{solr.url}/update"
|
48
|
+
# "#{solr.url}/update/json" exists, and if not use "#{solr.url}/update". (Can include
|
49
|
+
# embedded HTTP basic auth as eg `http://user:pass@host/solr)
|
48
50
|
#
|
49
51
|
# * solr_writer.batch_size: How big a batch to send to solr. Default is 100.
|
50
52
|
# My tests indicate that this setting doesn't change overall index speed by a ton.
|
@@ -101,12 +103,17 @@ class Traject::SolrJsonWriter
|
|
101
103
|
def initialize(argSettings)
|
102
104
|
@settings = Traject::Indexer::Settings.new(argSettings)
|
103
105
|
|
106
|
+
|
104
107
|
# Set max errors
|
105
108
|
@max_skipped = (@settings['solr_writer.max_skipped'] || DEFAULT_MAX_SKIPPED).to_i
|
106
109
|
if @max_skipped < 0
|
107
110
|
@max_skipped = nil
|
108
111
|
end
|
109
112
|
|
113
|
+
|
114
|
+
# Figure out where to send updates, and if with basic auth
|
115
|
+
@solr_update_url, basic_auth_user, basic_auth_password = self.determine_solr_update_url
|
116
|
+
|
110
117
|
@http_client = if @settings["solr_json_writer.http_client"]
|
111
118
|
@settings["solr_json_writer.http_client"]
|
112
119
|
else
|
@@ -114,6 +121,11 @@ class Traject::SolrJsonWriter
|
|
114
121
|
if @settings["solr_writer.http_timeout"]
|
115
122
|
client.connect_timeout = client.receive_timeout = client.send_timeout = @settings["solr_writer.http_timeout"]
|
116
123
|
end
|
124
|
+
|
125
|
+
if basic_auth_user || basic_auth_password
|
126
|
+
client.set_auth(@solr_update_url, basic_auth_user, basic_auth_password)
|
127
|
+
end
|
128
|
+
|
117
129
|
client
|
118
130
|
end
|
119
131
|
|
@@ -137,13 +149,11 @@ class Traject::SolrJsonWriter
|
|
137
149
|
# this the new default writer.
|
138
150
|
@commit_on_close = (settings["solr_writer.commit_on_close"] || settings["solrj_writer.commit_on_close"]).to_s == "true"
|
139
151
|
|
140
|
-
# Figure out where to send updates
|
141
|
-
@solr_update_url = self.determine_solr_update_url
|
142
152
|
|
143
153
|
@solr_update_args = settings["solr_writer.solr_update_args"]
|
144
154
|
@commit_solr_update_args = settings["solr_writer.commit_solr_update_args"]
|
145
155
|
|
146
|
-
logger.info(" #{self.class.name} writing to '#{@solr_update_url}' in batches of #{@batch_size} with #{@thread_pool_size} bg threads")
|
156
|
+
logger.info(" #{self.class.name} writing to '#{@solr_update_url}' #{"(with HTTP basic auth)" if basic_auth_user || basic_auth_password}in batches of #{@batch_size} with #{@thread_pool_size} bg threads")
|
147
157
|
end
|
148
158
|
|
149
159
|
|
@@ -270,6 +280,13 @@ class Traject::SolrJsonWriter
|
|
270
280
|
end
|
271
281
|
end
|
272
282
|
|
283
|
+
# Send a delete all query.
|
284
|
+
#
|
285
|
+
# This method takes no params and will not automatically commit the deletes.
|
286
|
+
# @example @writer.delete_all!
|
287
|
+
def delete_all!
|
288
|
+
delete(query: "*:*")
|
289
|
+
end
|
273
290
|
|
274
291
|
# Get the logger from the settings, or default to an effectively null logger
|
275
292
|
def logger
|
@@ -355,13 +372,27 @@ class Traject::SolrJsonWriter
|
|
355
372
|
end
|
356
373
|
|
357
374
|
|
358
|
-
# Relatively complex logic to determine if we have a valid URL and what it is
|
375
|
+
# Relatively complex logic to determine if we have a valid URL and what it is,
|
376
|
+
# and if we have basic_auth info
|
377
|
+
#
|
378
|
+
# Empties out user and password embedded in URI returned, to help avoid logging it.
|
379
|
+
#
|
380
|
+
# @returns [update_url, basic_auth_user, basic_auth_password]
|
359
381
|
def determine_solr_update_url
|
360
|
-
if settings['solr.update_url']
|
382
|
+
url = if settings['solr.update_url']
|
361
383
|
check_solr_update_url(settings['solr.update_url'])
|
362
384
|
else
|
363
385
|
derive_solr_update_url_from_solr_url(settings['solr.url'])
|
364
386
|
end
|
387
|
+
|
388
|
+
parsed_uri = URI.parse(url)
|
389
|
+
user_from_uri, password_from_uri = parsed_uri.user, parsed_uri.password
|
390
|
+
parsed_uri.user, parsed_uri.password = nil, nil
|
391
|
+
|
392
|
+
basic_auth_user = @settings["solr_writer.basic_auth_user"] || user_from_uri
|
393
|
+
basic_auth_password = @settings["solr_writer.basic_auth_password"] || password_from_uri
|
394
|
+
|
395
|
+
return [parsed_uri.to_s, basic_auth_user, basic_auth_password]
|
365
396
|
end
|
366
397
|
|
367
398
|
|
data/lib/traject/version.rb
CHANGED
@@ -10,18 +10,21 @@ ady: Adygei
|
|
10
10
|
afa: Afroasiatic (Other)
|
11
11
|
afh: Afrihili (Artificial language)
|
12
12
|
afr: Afrikaans
|
13
|
-
|
13
|
+
ain: Ainu
|
14
|
+
ajm: Aljamía
|
14
15
|
aka: Akan
|
15
16
|
akk: Akkadian
|
16
17
|
alb: Albanian
|
17
18
|
ale: Aleut
|
18
19
|
alg: Algonquian (Other)
|
20
|
+
alt: Altai
|
19
21
|
amh: Amharic
|
20
|
-
ang: English, Old (ca. 450-1100)
|
22
|
+
ang: "English, Old (ca. 450-1100)"
|
23
|
+
anp: Angika
|
21
24
|
apa: Apache languages
|
22
25
|
ara: Arabic
|
23
26
|
arc: Aramaic
|
24
|
-
arg: Aragonese
|
27
|
+
arg: Aragonese
|
25
28
|
arm: Armenian
|
26
29
|
arn: Mapuche
|
27
30
|
arp: Arapaho
|
@@ -36,7 +39,7 @@ ave: Avestan
|
|
36
39
|
awa: Awadhi
|
37
40
|
aym: Aymara
|
38
41
|
aze: Azerbaijani
|
39
|
-
bad: Banda
|
42
|
+
bad: Banda languages
|
40
43
|
bai: Bamileke languages
|
41
44
|
bak: Bashkir
|
42
45
|
bal: Baluchi
|
@@ -51,7 +54,7 @@ bem: Bemba
|
|
51
54
|
ben: Bengali
|
52
55
|
ber: Berber (Other)
|
53
56
|
bho: Bhojpuri
|
54
|
-
bih: Bihari
|
57
|
+
bih: Bihari (Other)
|
55
58
|
bik: Bikol
|
56
59
|
bin: Edo
|
57
60
|
bis: Bislama
|
@@ -65,6 +68,7 @@ bua: Buriat
|
|
65
68
|
bug: Bugis
|
66
69
|
bul: Bulgarian
|
67
70
|
bur: Burmese
|
71
|
+
byn: Bilin
|
68
72
|
cad: Caddo
|
69
73
|
cai: Central American Indian (Other)
|
70
74
|
cam: Khmer
|
@@ -78,7 +82,7 @@ chb: Chibcha
|
|
78
82
|
che: Chechen
|
79
83
|
chg: Chagatai
|
80
84
|
chi: Chinese
|
81
|
-
chk:
|
85
|
+
chk: Chuukese
|
82
86
|
chm: Mari
|
83
87
|
chn: Chinook jargon
|
84
88
|
cho: Choctaw
|
@@ -88,15 +92,17 @@ chu: Church Slavic
|
|
88
92
|
chv: Chuvash
|
89
93
|
chy: Cheyenne
|
90
94
|
cmc: Chamic languages
|
95
|
+
cnr: Montenegrin
|
91
96
|
cop: Coptic
|
92
97
|
cor: Cornish
|
93
98
|
cos: Corsican
|
94
|
-
cpe: Creoles and Pidgins, English-based (Other)
|
95
|
-
cpf: Creoles and Pidgins, French-based (Other)
|
96
|
-
cpp: Creoles and Pidgins, Portuguese-based (Other)
|
99
|
+
cpe: "Creoles and Pidgins, English-based (Other)"
|
100
|
+
cpf: "Creoles and Pidgins, French-based (Other)"
|
101
|
+
cpp: "Creoles and Pidgins, Portuguese-based (Other)"
|
97
102
|
cre: Cree
|
98
103
|
crh: Crimean Tatar
|
99
104
|
crp: Creoles and Pidgins (Other)
|
105
|
+
csb: Kashubian
|
100
106
|
cus: Cushitic (Other)
|
101
107
|
cze: Czech
|
102
108
|
dak: Dakota
|
@@ -104,14 +110,15 @@ dan: Danish
|
|
104
110
|
dar: Dargwa
|
105
111
|
day: Dayak
|
106
112
|
del: Delaware
|
107
|
-
den:
|
113
|
+
den: Slavey
|
108
114
|
dgr: Dogrib
|
109
115
|
din: Dinka
|
110
116
|
div: Divehi
|
111
117
|
doi: Dogri
|
112
118
|
dra: Dravidian (Other)
|
119
|
+
dsb: Lower Sorbian
|
113
120
|
dua: Duala
|
114
|
-
dum: Dutch, Middle (ca. 1050-1350)
|
121
|
+
dum: "Dutch, Middle (ca. 1050-1350)"
|
115
122
|
dut: Dutch
|
116
123
|
dyu: Dyula
|
117
124
|
dzo: Dzongkha
|
@@ -120,7 +127,7 @@ egy: Egyptian
|
|
120
127
|
eka: Ekajuk
|
121
128
|
elx: Elamite
|
122
129
|
eng: English
|
123
|
-
enm: English, Middle (1100-1500)
|
130
|
+
enm: "English, Middle (1100-1500)"
|
124
131
|
epo: Esperanto
|
125
132
|
esk: Eskimo languages
|
126
133
|
esp: Esperanto
|
@@ -133,18 +140,21 @@ fao: Faroese
|
|
133
140
|
far: Faroese
|
134
141
|
fat: Fanti
|
135
142
|
fij: Fijian
|
143
|
+
fil: Filipino
|
136
144
|
fin: Finnish
|
137
145
|
fiu: Finno-Ugrian (Other)
|
138
146
|
fon: Fon
|
139
147
|
fre: French
|
140
148
|
fri: Frisian
|
141
|
-
frm: French, Middle (ca.
|
142
|
-
fro: French, Old (ca. 842-
|
149
|
+
frm: "French, Middle (ca. 1300-1600)"
|
150
|
+
fro: "French, Old (ca. 842-1300)"
|
151
|
+
frr: North Frisian
|
152
|
+
frs: East Frisian
|
143
153
|
fry: Frisian
|
144
154
|
ful: Fula
|
145
155
|
fur: Friulian
|
146
|
-
gaa:
|
147
|
-
gae: Scottish
|
156
|
+
gaa: Gã
|
157
|
+
gae: Scottish Gaelix
|
148
158
|
gag: Galician
|
149
159
|
gal: Oromo
|
150
160
|
gay: Gayo
|
@@ -158,15 +168,16 @@ gla: Scottish Gaelic
|
|
158
168
|
gle: Irish
|
159
169
|
glg: Galician
|
160
170
|
glv: Manx
|
161
|
-
gmh: German, Middle High (ca. 1050-1500)
|
162
|
-
goh: German, Old High (ca. 750-1050)
|
171
|
+
gmh: "German, Middle High (ca. 1050-1500)"
|
172
|
+
goh: "German, Old High (ca. 750-1050)"
|
163
173
|
gon: Gondi
|
164
174
|
gor: Gorontalo
|
165
175
|
got: Gothic
|
166
176
|
grb: Grebo
|
167
|
-
grc: Greek, Ancient (to 1453)
|
168
|
-
gre: Greek, Modern (1453-
|
177
|
+
grc: "Greek, Ancient (to 1453)"
|
178
|
+
gre: "Greek, Modern (1453-)"
|
169
179
|
grn: Guarani
|
180
|
+
gsw: Swiss German
|
170
181
|
gua: Guarani
|
171
182
|
guj: Gujarati
|
172
183
|
gwi: Gwich'in
|
@@ -177,11 +188,13 @@ haw: Hawaiian
|
|
177
188
|
heb: Hebrew
|
178
189
|
her: Herero
|
179
190
|
hil: Hiligaynon
|
180
|
-
him:
|
191
|
+
him: Western Pahari languages
|
181
192
|
hin: Hindi
|
182
193
|
hit: Hittite
|
183
194
|
hmn: Hmong
|
184
195
|
hmo: Hiri Motu
|
196
|
+
hrv: Croatian
|
197
|
+
hsb: Upper Sorbian
|
185
198
|
hun: Hungarian
|
186
199
|
hup: Hupa
|
187
200
|
iba: Iban
|
@@ -205,16 +218,17 @@ iri: Irish
|
|
205
218
|
iro: Iroquoian (Other)
|
206
219
|
ita: Italian
|
207
220
|
jav: Javanese
|
221
|
+
jbo: Lojban (Artificial language)
|
208
222
|
jpn: Japanese
|
209
223
|
jpr: Judeo-Persian
|
210
224
|
jrb: Judeo-Arabic
|
211
225
|
kaa: Kara-Kalpak
|
212
226
|
kab: Kabyle
|
213
227
|
kac: Kachin
|
214
|
-
kal:
|
228
|
+
kal: Kalâtdlisut
|
215
229
|
kam: Kamba
|
216
230
|
kan: Kannada
|
217
|
-
kar: Karen
|
231
|
+
kar: Karen languages
|
218
232
|
kas: Kashmiri
|
219
233
|
kau: Kanuri
|
220
234
|
kaw: Kawi
|
@@ -232,19 +246,21 @@ kok: Konkani
|
|
232
246
|
kom: Komi
|
233
247
|
kon: Kongo
|
234
248
|
kor: Korean
|
235
|
-
kos:
|
249
|
+
kos: Kosraean
|
236
250
|
kpe: Kpelle
|
237
|
-
|
251
|
+
krc: Karachay-Balkar
|
252
|
+
krl: Karelian
|
253
|
+
kro: Kru (Other)
|
238
254
|
kru: Kurukh
|
239
255
|
kua: Kuanyama
|
240
256
|
kum: Kumyk
|
241
257
|
kur: Kurdish
|
242
258
|
kus: Kusaie
|
243
|
-
kut:
|
259
|
+
kut: Kootenai
|
244
260
|
lad: Ladino
|
245
|
-
lah:
|
246
|
-
lam: Lamba
|
247
|
-
lan: Occitan (post
|
261
|
+
lah: Lahndā
|
262
|
+
lam: Lamba (Zambia and Congo)
|
263
|
+
lan: Occitan (post 1500)
|
248
264
|
lao: Lao
|
249
265
|
lap: Sami
|
250
266
|
lat: Latin
|
@@ -255,11 +271,11 @@ lin: Lingala
|
|
255
271
|
lit: Lithuanian
|
256
272
|
lol: Mongo-Nkundu
|
257
273
|
loz: Lozi
|
258
|
-
ltz:
|
274
|
+
ltz: Luxembourgish
|
259
275
|
lua: Luba-Lulua
|
260
276
|
lub: Luba-Katanga
|
261
277
|
lug: Ganda
|
262
|
-
lui:
|
278
|
+
lui: Luiseño
|
263
279
|
lun: Lunda
|
264
280
|
luo: Luo (Kenya and Tanzania)
|
265
281
|
lus: Lushai
|
@@ -274,12 +290,13 @@ man: Mandingo
|
|
274
290
|
mao: Maori
|
275
291
|
map: Austronesian (Other)
|
276
292
|
mar: Marathi
|
277
|
-
mas:
|
293
|
+
mas: Maasai
|
278
294
|
max: Manx
|
279
295
|
may: Malay
|
296
|
+
mdf: Moksha
|
280
297
|
mdr: Mandar
|
281
298
|
men: Mende
|
282
|
-
mga: Irish, Middle (ca. 1100-1550)
|
299
|
+
mga: "Irish, Middle (ca. 1100-1550)"
|
283
300
|
mic: Micmac
|
284
301
|
min: Minangkabau
|
285
302
|
mis: Miscellaneous languages
|
@@ -293,12 +310,14 @@ mno: Manobo languages
|
|
293
310
|
moh: Mohawk
|
294
311
|
mol: Moldavian
|
295
312
|
mon: Mongolian
|
296
|
-
mos:
|
313
|
+
mos: Mooré
|
297
314
|
mul: Multiple languages
|
298
315
|
mun: Munda (Other)
|
299
316
|
mus: Creek
|
317
|
+
mwl: Mirandese
|
300
318
|
mwr: Marwari
|
301
319
|
myn: Mayan languages
|
320
|
+
myv: Erzya
|
302
321
|
nah: Nahuatl
|
303
322
|
nai: North American Indian (Other)
|
304
323
|
nap: Neapolitan Italian
|
@@ -314,12 +333,14 @@ nia: Nias
|
|
314
333
|
nic: Niger-Kordofanian (Other)
|
315
334
|
niu: Niuean
|
316
335
|
nno: Norwegian (Nynorsk)
|
317
|
-
nob: Norwegian (
|
336
|
+
nob: Norwegian (Bokmål)
|
318
337
|
nog: Nogai
|
319
338
|
non: Old Norse
|
320
339
|
nor: Norwegian
|
340
|
+
nqo: N'Ko
|
321
341
|
nso: Northern Sotho
|
322
342
|
nub: Nubian languages
|
343
|
+
nwc: "Newari, Old"
|
323
344
|
nya: Nyanja
|
324
345
|
nym: Nyamwezi
|
325
346
|
nyn: Nyankole
|
@@ -331,7 +352,7 @@ ori: Oriya
|
|
331
352
|
orm: Oromo
|
332
353
|
osa: Osage
|
333
354
|
oss: Ossetic
|
334
|
-
ota: Turkish, Ottoman
|
355
|
+
ota: "Turkish, Ottoman"
|
335
356
|
oto: Otomian languages
|
336
357
|
paa: Papuan (Other)
|
337
358
|
pag: Pangasinan
|
@@ -346,10 +367,10 @@ phi: Philippine (Other)
|
|
346
367
|
phn: Phoenician
|
347
368
|
pli: Pali
|
348
369
|
pol: Polish
|
349
|
-
pon:
|
370
|
+
pon: Pohnpeian
|
350
371
|
por: Portuguese
|
351
372
|
pra: Prakrit languages
|
352
|
-
pro:
|
373
|
+
pro: Provençal (to 1500)
|
353
374
|
pus: Pushto
|
354
375
|
que: Quechua
|
355
376
|
raj: Rajasthani
|
@@ -360,6 +381,7 @@ roh: Raeto-Romance
|
|
360
381
|
rom: Romani
|
361
382
|
rum: Romanian
|
362
383
|
run: Rundi
|
384
|
+
rup: Aromanian
|
363
385
|
rus: Russian
|
364
386
|
sad: Sandawe
|
365
387
|
sag: Sango (Ubangi Creole)
|
@@ -372,11 +394,12 @@ sao: Samoan
|
|
372
394
|
sas: Sasak
|
373
395
|
sat: Santali
|
374
396
|
scc: Serbian
|
397
|
+
scn: Sicilian Italian
|
375
398
|
sco: Scots
|
376
399
|
scr: Croatian
|
377
400
|
sel: Selkup
|
378
401
|
sem: Semitic (Other)
|
379
|
-
sga: Irish, Old (to 1100)
|
402
|
+
sga: "Irish, Old (to 1100)"
|
380
403
|
sgn: Sign languages
|
381
404
|
shn: Shan
|
382
405
|
sho: Shona
|
@@ -404,6 +427,8 @@ son: Songhai
|
|
404
427
|
sot: Sotho
|
405
428
|
spa: Spanish
|
406
429
|
srd: Sardinian
|
430
|
+
srn: Sranan
|
431
|
+
srp: Serbian
|
407
432
|
srr: Serer
|
408
433
|
ssa: Nilo-Saharan (Other)
|
409
434
|
sso: Sotho
|
@@ -415,7 +440,8 @@ sux: Sumerian
|
|
415
440
|
swa: Swahili
|
416
441
|
swe: Swedish
|
417
442
|
swz: Swazi
|
418
|
-
|
443
|
+
syc: Syriac
|
444
|
+
syr: "Syriac, Modern"
|
419
445
|
tag: Tagalog
|
420
446
|
tah: Tahitian
|
421
447
|
tai: Tai (Other)
|
@@ -431,10 +457,11 @@ tgk: Tajik
|
|
431
457
|
tgl: Tagalog
|
432
458
|
tha: Thai
|
433
459
|
tib: Tibetan
|
434
|
-
tig:
|
460
|
+
tig: Tigré
|
435
461
|
tir: Tigrinya
|
436
462
|
tiv: Tiv
|
437
463
|
tkl: Tokelauan
|
464
|
+
tlh: Klingon (Artificial language)
|
438
465
|
tli: Tlingit
|
439
466
|
tmh: Tamashek
|
440
467
|
tog: Tonga (Nyasa)
|
@@ -464,17 +491,17 @@ uzb: Uzbek
|
|
464
491
|
vai: Vai
|
465
492
|
ven: Venda
|
466
493
|
vie: Vietnamese
|
467
|
-
vol:
|
494
|
+
vol: Volapük
|
468
495
|
vot: Votic
|
469
496
|
wak: Wakashan languages
|
470
|
-
wal:
|
497
|
+
wal: Wolayta
|
471
498
|
war: Waray
|
472
|
-
was:
|
499
|
+
was: Washoe
|
473
500
|
wel: Welsh
|
474
|
-
wen: Sorbian
|
501
|
+
wen: Sorbian (Other)
|
475
502
|
wln: Walloon
|
476
503
|
wol: Wolof
|
477
|
-
xal:
|
504
|
+
xal: Oirat
|
478
505
|
xho: Xhosa
|
479
506
|
yao: Yao (Africa)
|
480
507
|
yap: Yapese
|
@@ -482,9 +509,11 @@ yid: Yiddish
|
|
482
509
|
yor: Yoruba
|
483
510
|
ypk: Yupik languages
|
484
511
|
zap: Zapotec
|
512
|
+
zbl: Blissymbolics
|
485
513
|
zen: Zenaga
|
486
514
|
zha: Zhuang
|
487
|
-
znd: Zande
|
515
|
+
znd: Zande languages
|
488
516
|
zul: Zulu
|
489
517
|
zun: Zuni
|
490
|
-
# zxx:
|
518
|
+
# zxx: No linguistic content
|
519
|
+
zza: Zaza
|