traject 3.1.0 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/CHANGES.md +46 -0
- data/README.md +18 -2
- data/doc/settings.md +5 -1
- data/doc/xml.md +12 -0
- data/examples/marc/tiny.xml +35 -0
- data/lib/traject/command_line.rb +34 -43
- data/lib/traject/debug_writer.rb +1 -1
- data/lib/traject/indexer.rb +12 -4
- data/lib/traject/macros/marc21.rb +3 -3
- data/lib/traject/macros/marc21_semantics.rb +15 -12
- data/lib/traject/macros/nokogiri_macros.rb +9 -3
- data/lib/traject/marc_extractor.rb +3 -3
- data/lib/traject/nokogiri_reader.rb +10 -1
- data/lib/traject/oai_pmh_nokogiri_reader.rb +9 -3
- data/lib/traject/solr_json_writer.rb +38 -7
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +77 -48
- data/test/command_line_test.rb +52 -0
- data/test/debug_writer_test.rb +13 -0
- data/test/delimited_writer_test.rb +14 -16
- data/test/indexer/class_level_configuration_test.rb +23 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +4 -0
- data/test/indexer/nokogiri_indexer_test.rb +35 -0
- data/test/indexer/read_write_test.rb +14 -3
- data/test/nokogiri_reader_test.rb +10 -0
- data/test/solr_json_writer_test.rb +65 -0
- data/test/test_support/date_resort_to_264.marc +1 -0
- data/traject.gemspec +3 -3
- metadata +31 -21
- data/.travis.yml +0 -16
@@ -26,9 +26,15 @@ module Traject
|
|
26
26
|
# Make sure to avoid text content that was all blank, which is "between the children"
|
27
27
|
# whitespace.
|
28
28
|
result = result.collect do |n|
|
29
|
-
n.
|
30
|
-
|
31
|
-
|
29
|
+
if n.kind_of?(Nokogiri::XML::Attr)
|
30
|
+
# attribute value
|
31
|
+
n.value
|
32
|
+
else
|
33
|
+
# text from node
|
34
|
+
n.xpath('.//text()').collect(&:text).tap do |arr|
|
35
|
+
arr.reject! { |s| s =~ (/\A\s+\z/) }
|
36
|
+
end.join(" ")
|
37
|
+
end
|
32
38
|
end
|
33
39
|
else
|
34
40
|
# just put all matches in accumulator as Nokogiri::XML::Node's
|
@@ -2,9 +2,9 @@ require 'traject/marc_extractor_spec'
|
|
2
2
|
|
3
3
|
module Traject
|
4
4
|
# MarcExtractor is a class for extracting lists of strings from a MARC::Record,
|
5
|
-
# according to specifications. See
|
6
|
-
# string arguments used to specify extraction. See #initialize for
|
7
|
-
# that can be set controlling extraction.
|
5
|
+
# according to specifications. See Traject::MarcExtractor::Spec for description
|
6
|
+
# of string string arguments used to specify extraction. See #initialize for
|
7
|
+
# options that can be set controlling extraction.
|
8
8
|
#
|
9
9
|
# Examples:
|
10
10
|
#
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
1
3
|
module Traject
|
2
4
|
# A Trajet reader which reads XML, and yields zero to many Nokogiri::XML::Document
|
3
5
|
# objects as source records in the traject pipeline.
|
@@ -21,6 +23,9 @@ module Traject
|
|
21
23
|
# If you need to use namespaces here, you need to have them registered with
|
22
24
|
# `nokogiri.default_namespaces`. If your source docs use namespaces, you DO need
|
23
25
|
# to use them in your each_record_xpath.
|
26
|
+
# * nokogiri.strict_mode: if set to `true` or `"true"`, ask Nokogiri to parse in 'strict'
|
27
|
+
# mode, it will raise a `Nokogiri::XML::SyntaxError` if the XML is not well-formed, instead
|
28
|
+
# of trying to take it's best-guess correction. https://nokogiri.org/tutorials/ensuring_well_formed_markup.html
|
24
29
|
# * nokogiri_reader.extra_xpath_hooks: Experimental in progress, see below.
|
25
30
|
#
|
26
31
|
# ## nokogiri_reader.extra_xpath_hooks: For handling nodes outside of your each_record_xpath
|
@@ -87,7 +92,11 @@ module Traject
|
|
87
92
|
end
|
88
93
|
|
89
94
|
def each
|
90
|
-
|
95
|
+
config_proc = if settings["nokogiri.strict_mode"]
|
96
|
+
proc { |config| config.strict }
|
97
|
+
end
|
98
|
+
|
99
|
+
whole_input_doc = Nokogiri::XML.parse(input_stream, &config_proc)
|
91
100
|
|
92
101
|
if each_record_xpath
|
93
102
|
whole_input_doc.xpath(each_record_xpath, default_namespaces).each do |matching_node|
|
@@ -115,9 +115,15 @@ module Traject
|
|
115
115
|
# @returns [HTTP::Client] from http.rb gem
|
116
116
|
def http_client
|
117
117
|
@http_client ||= begin
|
118
|
-
|
119
|
-
|
120
|
-
|
118
|
+
client = nil
|
119
|
+
|
120
|
+
if HTTP::VERSION.split(".").first.to_i > 3
|
121
|
+
client = HTTP.timeout(timeout)
|
122
|
+
else
|
123
|
+
# timeout setting on http.rb 3.x are a bit of a mess.
|
124
|
+
# https://github.com/httprb/http/issues/488
|
125
|
+
client = HTTP.timeout(:global, write: timeout / 3, connect: timeout / 3, read: timeout / 3)
|
126
|
+
end
|
121
127
|
|
122
128
|
if settings["oai_pmh.try_gzip"]
|
123
129
|
client = client.use(:auto_inflate).headers("accept-encoding" => "gzip;q=1.0, identity;q=0.5")
|
@@ -41,10 +41,12 @@ require 'concurrent' # for atomic_fixnum
|
|
41
41
|
#
|
42
42
|
# ## Relevant settings
|
43
43
|
#
|
44
|
-
# * solr.url (optional if solr.update_url is set) The URL to the solr core to index into
|
44
|
+
# * solr.url (optional if solr.update_url is set) The URL to the solr core to index into.
|
45
|
+
# (Can include embedded HTTP basic auth as eg `http://user:pass@host/solr`)
|
45
46
|
#
|
46
47
|
# * solr.update_url: The actual update url. If unset, we'll first see if
|
47
|
-
# "#{solr.url}/update/json" exists, and if not use "#{solr.url}/update"
|
48
|
+
# "#{solr.url}/update/json" exists, and if not use "#{solr.url}/update". (Can include
|
49
|
+
# embedded HTTP basic auth as eg `http://user:pass@host/solr)
|
48
50
|
#
|
49
51
|
# * solr_writer.batch_size: How big a batch to send to solr. Default is 100.
|
50
52
|
# My tests indicate that this setting doesn't change overall index speed by a ton.
|
@@ -101,12 +103,17 @@ class Traject::SolrJsonWriter
|
|
101
103
|
def initialize(argSettings)
|
102
104
|
@settings = Traject::Indexer::Settings.new(argSettings)
|
103
105
|
|
106
|
+
|
104
107
|
# Set max errors
|
105
108
|
@max_skipped = (@settings['solr_writer.max_skipped'] || DEFAULT_MAX_SKIPPED).to_i
|
106
109
|
if @max_skipped < 0
|
107
110
|
@max_skipped = nil
|
108
111
|
end
|
109
112
|
|
113
|
+
|
114
|
+
# Figure out where to send updates, and if with basic auth
|
115
|
+
@solr_update_url, basic_auth_user, basic_auth_password = self.determine_solr_update_url
|
116
|
+
|
110
117
|
@http_client = if @settings["solr_json_writer.http_client"]
|
111
118
|
@settings["solr_json_writer.http_client"]
|
112
119
|
else
|
@@ -114,6 +121,11 @@ class Traject::SolrJsonWriter
|
|
114
121
|
if @settings["solr_writer.http_timeout"]
|
115
122
|
client.connect_timeout = client.receive_timeout = client.send_timeout = @settings["solr_writer.http_timeout"]
|
116
123
|
end
|
124
|
+
|
125
|
+
if basic_auth_user || basic_auth_password
|
126
|
+
client.set_auth(@solr_update_url, basic_auth_user, basic_auth_password)
|
127
|
+
end
|
128
|
+
|
117
129
|
client
|
118
130
|
end
|
119
131
|
|
@@ -137,13 +149,11 @@ class Traject::SolrJsonWriter
|
|
137
149
|
# this the new default writer.
|
138
150
|
@commit_on_close = (settings["solr_writer.commit_on_close"] || settings["solrj_writer.commit_on_close"]).to_s == "true"
|
139
151
|
|
140
|
-
# Figure out where to send updates
|
141
|
-
@solr_update_url = self.determine_solr_update_url
|
142
152
|
|
143
153
|
@solr_update_args = settings["solr_writer.solr_update_args"]
|
144
154
|
@commit_solr_update_args = settings["solr_writer.commit_solr_update_args"]
|
145
155
|
|
146
|
-
logger.info(" #{self.class.name} writing to '#{@solr_update_url}' in batches of #{@batch_size} with #{@thread_pool_size} bg threads")
|
156
|
+
logger.info(" #{self.class.name} writing to '#{@solr_update_url}' #{"(with HTTP basic auth)" if basic_auth_user || basic_auth_password}in batches of #{@batch_size} with #{@thread_pool_size} bg threads")
|
147
157
|
end
|
148
158
|
|
149
159
|
|
@@ -270,6 +280,13 @@ class Traject::SolrJsonWriter
|
|
270
280
|
end
|
271
281
|
end
|
272
282
|
|
283
|
+
# Send a delete all query.
|
284
|
+
#
|
285
|
+
# This method takes no params and will not automatically commit the deletes.
|
286
|
+
# @example @writer.delete_all!
|
287
|
+
def delete_all!
|
288
|
+
delete(query: "*:*")
|
289
|
+
end
|
273
290
|
|
274
291
|
# Get the logger from the settings, or default to an effectively null logger
|
275
292
|
def logger
|
@@ -355,13 +372,27 @@ class Traject::SolrJsonWriter
|
|
355
372
|
end
|
356
373
|
|
357
374
|
|
358
|
-
# Relatively complex logic to determine if we have a valid URL and what it is
|
375
|
+
# Relatively complex logic to determine if we have a valid URL and what it is,
|
376
|
+
# and if we have basic_auth info
|
377
|
+
#
|
378
|
+
# Empties out user and password embedded in URI returned, to help avoid logging it.
|
379
|
+
#
|
380
|
+
# @returns [update_url, basic_auth_user, basic_auth_password]
|
359
381
|
def determine_solr_update_url
|
360
|
-
if settings['solr.update_url']
|
382
|
+
url = if settings['solr.update_url']
|
361
383
|
check_solr_update_url(settings['solr.update_url'])
|
362
384
|
else
|
363
385
|
derive_solr_update_url_from_solr_url(settings['solr.url'])
|
364
386
|
end
|
387
|
+
|
388
|
+
parsed_uri = URI.parse(url)
|
389
|
+
user_from_uri, password_from_uri = parsed_uri.user, parsed_uri.password
|
390
|
+
parsed_uri.user, parsed_uri.password = nil, nil
|
391
|
+
|
392
|
+
basic_auth_user = @settings["solr_writer.basic_auth_user"] || user_from_uri
|
393
|
+
basic_auth_password = @settings["solr_writer.basic_auth_password"] || password_from_uri
|
394
|
+
|
395
|
+
return [parsed_uri.to_s, basic_auth_user, basic_auth_password]
|
365
396
|
end
|
366
397
|
|
367
398
|
|
data/lib/traject/version.rb
CHANGED
@@ -10,18 +10,21 @@ ady: Adygei
|
|
10
10
|
afa: Afroasiatic (Other)
|
11
11
|
afh: Afrihili (Artificial language)
|
12
12
|
afr: Afrikaans
|
13
|
-
|
13
|
+
ain: Ainu
|
14
|
+
ajm: Aljamía
|
14
15
|
aka: Akan
|
15
16
|
akk: Akkadian
|
16
17
|
alb: Albanian
|
17
18
|
ale: Aleut
|
18
19
|
alg: Algonquian (Other)
|
20
|
+
alt: Altai
|
19
21
|
amh: Amharic
|
20
|
-
ang: English, Old (ca. 450-1100)
|
22
|
+
ang: "English, Old (ca. 450-1100)"
|
23
|
+
anp: Angika
|
21
24
|
apa: Apache languages
|
22
25
|
ara: Arabic
|
23
26
|
arc: Aramaic
|
24
|
-
arg: Aragonese
|
27
|
+
arg: Aragonese
|
25
28
|
arm: Armenian
|
26
29
|
arn: Mapuche
|
27
30
|
arp: Arapaho
|
@@ -36,7 +39,7 @@ ave: Avestan
|
|
36
39
|
awa: Awadhi
|
37
40
|
aym: Aymara
|
38
41
|
aze: Azerbaijani
|
39
|
-
bad: Banda
|
42
|
+
bad: Banda languages
|
40
43
|
bai: Bamileke languages
|
41
44
|
bak: Bashkir
|
42
45
|
bal: Baluchi
|
@@ -51,7 +54,7 @@ bem: Bemba
|
|
51
54
|
ben: Bengali
|
52
55
|
ber: Berber (Other)
|
53
56
|
bho: Bhojpuri
|
54
|
-
bih: Bihari
|
57
|
+
bih: Bihari (Other)
|
55
58
|
bik: Bikol
|
56
59
|
bin: Edo
|
57
60
|
bis: Bislama
|
@@ -65,6 +68,7 @@ bua: Buriat
|
|
65
68
|
bug: Bugis
|
66
69
|
bul: Bulgarian
|
67
70
|
bur: Burmese
|
71
|
+
byn: Bilin
|
68
72
|
cad: Caddo
|
69
73
|
cai: Central American Indian (Other)
|
70
74
|
cam: Khmer
|
@@ -78,7 +82,7 @@ chb: Chibcha
|
|
78
82
|
che: Chechen
|
79
83
|
chg: Chagatai
|
80
84
|
chi: Chinese
|
81
|
-
chk:
|
85
|
+
chk: Chuukese
|
82
86
|
chm: Mari
|
83
87
|
chn: Chinook jargon
|
84
88
|
cho: Choctaw
|
@@ -88,15 +92,17 @@ chu: Church Slavic
|
|
88
92
|
chv: Chuvash
|
89
93
|
chy: Cheyenne
|
90
94
|
cmc: Chamic languages
|
95
|
+
cnr: Montenegrin
|
91
96
|
cop: Coptic
|
92
97
|
cor: Cornish
|
93
98
|
cos: Corsican
|
94
|
-
cpe: Creoles and Pidgins, English-based (Other)
|
95
|
-
cpf: Creoles and Pidgins, French-based (Other)
|
96
|
-
cpp: Creoles and Pidgins, Portuguese-based (Other)
|
99
|
+
cpe: "Creoles and Pidgins, English-based (Other)"
|
100
|
+
cpf: "Creoles and Pidgins, French-based (Other)"
|
101
|
+
cpp: "Creoles and Pidgins, Portuguese-based (Other)"
|
97
102
|
cre: Cree
|
98
103
|
crh: Crimean Tatar
|
99
104
|
crp: Creoles and Pidgins (Other)
|
105
|
+
csb: Kashubian
|
100
106
|
cus: Cushitic (Other)
|
101
107
|
cze: Czech
|
102
108
|
dak: Dakota
|
@@ -104,14 +110,15 @@ dan: Danish
|
|
104
110
|
dar: Dargwa
|
105
111
|
day: Dayak
|
106
112
|
del: Delaware
|
107
|
-
den:
|
113
|
+
den: Slavey
|
108
114
|
dgr: Dogrib
|
109
115
|
din: Dinka
|
110
116
|
div: Divehi
|
111
117
|
doi: Dogri
|
112
118
|
dra: Dravidian (Other)
|
119
|
+
dsb: Lower Sorbian
|
113
120
|
dua: Duala
|
114
|
-
dum: Dutch, Middle (ca. 1050-1350)
|
121
|
+
dum: "Dutch, Middle (ca. 1050-1350)"
|
115
122
|
dut: Dutch
|
116
123
|
dyu: Dyula
|
117
124
|
dzo: Dzongkha
|
@@ -120,7 +127,7 @@ egy: Egyptian
|
|
120
127
|
eka: Ekajuk
|
121
128
|
elx: Elamite
|
122
129
|
eng: English
|
123
|
-
enm: English, Middle (1100-1500)
|
130
|
+
enm: "English, Middle (1100-1500)"
|
124
131
|
epo: Esperanto
|
125
132
|
esk: Eskimo languages
|
126
133
|
esp: Esperanto
|
@@ -133,18 +140,21 @@ fao: Faroese
|
|
133
140
|
far: Faroese
|
134
141
|
fat: Fanti
|
135
142
|
fij: Fijian
|
143
|
+
fil: Filipino
|
136
144
|
fin: Finnish
|
137
145
|
fiu: Finno-Ugrian (Other)
|
138
146
|
fon: Fon
|
139
147
|
fre: French
|
140
148
|
fri: Frisian
|
141
|
-
frm: French, Middle (ca.
|
142
|
-
fro: French, Old (ca. 842-
|
149
|
+
frm: "French, Middle (ca. 1300-1600)"
|
150
|
+
fro: "French, Old (ca. 842-1300)"
|
151
|
+
frr: North Frisian
|
152
|
+
frs: East Frisian
|
143
153
|
fry: Frisian
|
144
154
|
ful: Fula
|
145
155
|
fur: Friulian
|
146
|
-
gaa:
|
147
|
-
gae: Scottish
|
156
|
+
gaa: Gã
|
157
|
+
gae: Scottish Gaelix
|
148
158
|
gag: Galician
|
149
159
|
gal: Oromo
|
150
160
|
gay: Gayo
|
@@ -158,15 +168,16 @@ gla: Scottish Gaelic
|
|
158
168
|
gle: Irish
|
159
169
|
glg: Galician
|
160
170
|
glv: Manx
|
161
|
-
gmh: German, Middle High (ca. 1050-1500)
|
162
|
-
goh: German, Old High (ca. 750-1050)
|
171
|
+
gmh: "German, Middle High (ca. 1050-1500)"
|
172
|
+
goh: "German, Old High (ca. 750-1050)"
|
163
173
|
gon: Gondi
|
164
174
|
gor: Gorontalo
|
165
175
|
got: Gothic
|
166
176
|
grb: Grebo
|
167
|
-
grc: Greek, Ancient (to 1453)
|
168
|
-
gre: Greek, Modern (1453-
|
177
|
+
grc: "Greek, Ancient (to 1453)"
|
178
|
+
gre: "Greek, Modern (1453-)"
|
169
179
|
grn: Guarani
|
180
|
+
gsw: Swiss German
|
170
181
|
gua: Guarani
|
171
182
|
guj: Gujarati
|
172
183
|
gwi: Gwich'in
|
@@ -177,11 +188,13 @@ haw: Hawaiian
|
|
177
188
|
heb: Hebrew
|
178
189
|
her: Herero
|
179
190
|
hil: Hiligaynon
|
180
|
-
him:
|
191
|
+
him: Western Pahari languages
|
181
192
|
hin: Hindi
|
182
193
|
hit: Hittite
|
183
194
|
hmn: Hmong
|
184
195
|
hmo: Hiri Motu
|
196
|
+
hrv: Croatian
|
197
|
+
hsb: Upper Sorbian
|
185
198
|
hun: Hungarian
|
186
199
|
hup: Hupa
|
187
200
|
iba: Iban
|
@@ -205,16 +218,17 @@ iri: Irish
|
|
205
218
|
iro: Iroquoian (Other)
|
206
219
|
ita: Italian
|
207
220
|
jav: Javanese
|
221
|
+
jbo: Lojban (Artificial language)
|
208
222
|
jpn: Japanese
|
209
223
|
jpr: Judeo-Persian
|
210
224
|
jrb: Judeo-Arabic
|
211
225
|
kaa: Kara-Kalpak
|
212
226
|
kab: Kabyle
|
213
227
|
kac: Kachin
|
214
|
-
kal:
|
228
|
+
kal: Kalâtdlisut
|
215
229
|
kam: Kamba
|
216
230
|
kan: Kannada
|
217
|
-
kar: Karen
|
231
|
+
kar: Karen languages
|
218
232
|
kas: Kashmiri
|
219
233
|
kau: Kanuri
|
220
234
|
kaw: Kawi
|
@@ -232,19 +246,21 @@ kok: Konkani
|
|
232
246
|
kom: Komi
|
233
247
|
kon: Kongo
|
234
248
|
kor: Korean
|
235
|
-
kos:
|
249
|
+
kos: Kosraean
|
236
250
|
kpe: Kpelle
|
237
|
-
|
251
|
+
krc: Karachay-Balkar
|
252
|
+
krl: Karelian
|
253
|
+
kro: Kru (Other)
|
238
254
|
kru: Kurukh
|
239
255
|
kua: Kuanyama
|
240
256
|
kum: Kumyk
|
241
257
|
kur: Kurdish
|
242
258
|
kus: Kusaie
|
243
|
-
kut:
|
259
|
+
kut: Kootenai
|
244
260
|
lad: Ladino
|
245
|
-
lah:
|
246
|
-
lam: Lamba
|
247
|
-
lan: Occitan (post
|
261
|
+
lah: Lahndā
|
262
|
+
lam: Lamba (Zambia and Congo)
|
263
|
+
lan: Occitan (post 1500)
|
248
264
|
lao: Lao
|
249
265
|
lap: Sami
|
250
266
|
lat: Latin
|
@@ -255,11 +271,11 @@ lin: Lingala
|
|
255
271
|
lit: Lithuanian
|
256
272
|
lol: Mongo-Nkundu
|
257
273
|
loz: Lozi
|
258
|
-
ltz:
|
274
|
+
ltz: Luxembourgish
|
259
275
|
lua: Luba-Lulua
|
260
276
|
lub: Luba-Katanga
|
261
277
|
lug: Ganda
|
262
|
-
lui:
|
278
|
+
lui: Luiseño
|
263
279
|
lun: Lunda
|
264
280
|
luo: Luo (Kenya and Tanzania)
|
265
281
|
lus: Lushai
|
@@ -274,12 +290,13 @@ man: Mandingo
|
|
274
290
|
mao: Maori
|
275
291
|
map: Austronesian (Other)
|
276
292
|
mar: Marathi
|
277
|
-
mas:
|
293
|
+
mas: Maasai
|
278
294
|
max: Manx
|
279
295
|
may: Malay
|
296
|
+
mdf: Moksha
|
280
297
|
mdr: Mandar
|
281
298
|
men: Mende
|
282
|
-
mga: Irish, Middle (ca. 1100-1550)
|
299
|
+
mga: "Irish, Middle (ca. 1100-1550)"
|
283
300
|
mic: Micmac
|
284
301
|
min: Minangkabau
|
285
302
|
mis: Miscellaneous languages
|
@@ -293,12 +310,14 @@ mno: Manobo languages
|
|
293
310
|
moh: Mohawk
|
294
311
|
mol: Moldavian
|
295
312
|
mon: Mongolian
|
296
|
-
mos:
|
313
|
+
mos: Mooré
|
297
314
|
mul: Multiple languages
|
298
315
|
mun: Munda (Other)
|
299
316
|
mus: Creek
|
317
|
+
mwl: Mirandese
|
300
318
|
mwr: Marwari
|
301
319
|
myn: Mayan languages
|
320
|
+
myv: Erzya
|
302
321
|
nah: Nahuatl
|
303
322
|
nai: North American Indian (Other)
|
304
323
|
nap: Neapolitan Italian
|
@@ -314,12 +333,14 @@ nia: Nias
|
|
314
333
|
nic: Niger-Kordofanian (Other)
|
315
334
|
niu: Niuean
|
316
335
|
nno: Norwegian (Nynorsk)
|
317
|
-
nob: Norwegian (
|
336
|
+
nob: Norwegian (Bokmål)
|
318
337
|
nog: Nogai
|
319
338
|
non: Old Norse
|
320
339
|
nor: Norwegian
|
340
|
+
nqo: N'Ko
|
321
341
|
nso: Northern Sotho
|
322
342
|
nub: Nubian languages
|
343
|
+
nwc: "Newari, Old"
|
323
344
|
nya: Nyanja
|
324
345
|
nym: Nyamwezi
|
325
346
|
nyn: Nyankole
|
@@ -331,7 +352,7 @@ ori: Oriya
|
|
331
352
|
orm: Oromo
|
332
353
|
osa: Osage
|
333
354
|
oss: Ossetic
|
334
|
-
ota: Turkish, Ottoman
|
355
|
+
ota: "Turkish, Ottoman"
|
335
356
|
oto: Otomian languages
|
336
357
|
paa: Papuan (Other)
|
337
358
|
pag: Pangasinan
|
@@ -346,10 +367,10 @@ phi: Philippine (Other)
|
|
346
367
|
phn: Phoenician
|
347
368
|
pli: Pali
|
348
369
|
pol: Polish
|
349
|
-
pon:
|
370
|
+
pon: Pohnpeian
|
350
371
|
por: Portuguese
|
351
372
|
pra: Prakrit languages
|
352
|
-
pro:
|
373
|
+
pro: Provençal (to 1500)
|
353
374
|
pus: Pushto
|
354
375
|
que: Quechua
|
355
376
|
raj: Rajasthani
|
@@ -360,6 +381,7 @@ roh: Raeto-Romance
|
|
360
381
|
rom: Romani
|
361
382
|
rum: Romanian
|
362
383
|
run: Rundi
|
384
|
+
rup: Aromanian
|
363
385
|
rus: Russian
|
364
386
|
sad: Sandawe
|
365
387
|
sag: Sango (Ubangi Creole)
|
@@ -372,11 +394,12 @@ sao: Samoan
|
|
372
394
|
sas: Sasak
|
373
395
|
sat: Santali
|
374
396
|
scc: Serbian
|
397
|
+
scn: Sicilian Italian
|
375
398
|
sco: Scots
|
376
399
|
scr: Croatian
|
377
400
|
sel: Selkup
|
378
401
|
sem: Semitic (Other)
|
379
|
-
sga: Irish, Old (to 1100)
|
402
|
+
sga: "Irish, Old (to 1100)"
|
380
403
|
sgn: Sign languages
|
381
404
|
shn: Shan
|
382
405
|
sho: Shona
|
@@ -404,6 +427,8 @@ son: Songhai
|
|
404
427
|
sot: Sotho
|
405
428
|
spa: Spanish
|
406
429
|
srd: Sardinian
|
430
|
+
srn: Sranan
|
431
|
+
srp: Serbian
|
407
432
|
srr: Serer
|
408
433
|
ssa: Nilo-Saharan (Other)
|
409
434
|
sso: Sotho
|
@@ -415,7 +440,8 @@ sux: Sumerian
|
|
415
440
|
swa: Swahili
|
416
441
|
swe: Swedish
|
417
442
|
swz: Swazi
|
418
|
-
|
443
|
+
syc: Syriac
|
444
|
+
syr: "Syriac, Modern"
|
419
445
|
tag: Tagalog
|
420
446
|
tah: Tahitian
|
421
447
|
tai: Tai (Other)
|
@@ -431,10 +457,11 @@ tgk: Tajik
|
|
431
457
|
tgl: Tagalog
|
432
458
|
tha: Thai
|
433
459
|
tib: Tibetan
|
434
|
-
tig:
|
460
|
+
tig: Tigré
|
435
461
|
tir: Tigrinya
|
436
462
|
tiv: Tiv
|
437
463
|
tkl: Tokelauan
|
464
|
+
tlh: Klingon (Artificial language)
|
438
465
|
tli: Tlingit
|
439
466
|
tmh: Tamashek
|
440
467
|
tog: Tonga (Nyasa)
|
@@ -464,17 +491,17 @@ uzb: Uzbek
|
|
464
491
|
vai: Vai
|
465
492
|
ven: Venda
|
466
493
|
vie: Vietnamese
|
467
|
-
vol:
|
494
|
+
vol: Volapük
|
468
495
|
vot: Votic
|
469
496
|
wak: Wakashan languages
|
470
|
-
wal:
|
497
|
+
wal: Wolayta
|
471
498
|
war: Waray
|
472
|
-
was:
|
499
|
+
was: Washoe
|
473
500
|
wel: Welsh
|
474
|
-
wen: Sorbian
|
501
|
+
wen: Sorbian (Other)
|
475
502
|
wln: Walloon
|
476
503
|
wol: Wolof
|
477
|
-
xal:
|
504
|
+
xal: Oirat
|
478
505
|
xho: Xhosa
|
479
506
|
yao: Yao (Africa)
|
480
507
|
yap: Yapese
|
@@ -482,9 +509,11 @@ yid: Yiddish
|
|
482
509
|
yor: Yoruba
|
483
510
|
ypk: Yupik languages
|
484
511
|
zap: Zapotec
|
512
|
+
zbl: Blissymbolics
|
485
513
|
zen: Zenaga
|
486
514
|
zha: Zhuang
|
487
|
-
znd: Zande
|
515
|
+
znd: Zande languages
|
488
516
|
zul: Zulu
|
489
517
|
zun: Zuni
|
490
|
-
# zxx:
|
518
|
+
# zxx: No linguistic content
|
519
|
+
zza: Zaza
|