whatlanguage 1.0.6 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +41 -0
- data/Gemfile +2 -2
- data/LICENSE.txt +42 -9
- data/README.md +42 -83
- data/Rakefile +9 -3
- data/lib/whatlanguage/languages.rb +180 -0
- data/lib/whatlanguage/trigrams.json +1 -0
- data/lib/whatlanguage/version.rb +3 -1
- data/lib/whatlanguage.rb +251 -72
- data/whatlanguage.gemspec +24 -13
- metadata +39 -43
- data/.gitignore +0 -17
- data/History.txt +0 -20
- data/Manifest.txt +0 -19
- data/build_filter.rb +0 -9
- data/build_lang_from_wordlists.rb +0 -13
- data/copyright-en +0 -243
- data/example.rb +0 -51
- data/lang/arabic.lang +0 -0
- data/lang/danish.lang +0 -0
- data/lang/dutch.lang +0 -0
- data/lang/english.lang +0 -0
- data/lang/farsi.lang +0 -0
- data/lang/finnish.lang +0 -0
- data/lang/french.lang +0 -0
- data/lang/german.lang +0 -0
- data/lang/greek.lang +0 -0
- data/lang/hebrew.lang +0 -0
- data/lang/hungarian.lang +0 -0
- data/lang/italian.lang +0 -0
- data/lang/korean.lang +0 -0
- data/lang/norwegian.lang +0 -0
- data/lang/pinyin.lang +0 -0
- data/lang/polish.lang +0 -0
- data/lang/portuguese.lang +0 -0
- data/lang/russian.lang +0 -0
- data/lang/spanish.lang +0 -0
- data/lang/swedish.lang +0 -0
- data/lib/whatlanguage/bitfield.rb +0 -64
- data/lib/whatlanguage/bloominsimple.rb +0 -88
- data/lib/whatlanguage/string.rb +0 -11
- data/test/test_whatlanguage.rb +0 -129
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: '0939c1109ab44fd32e9e611c20696ee315a980877c0f04d5990fe42f51c17201'
|
|
4
|
+
data.tar.gz: e5002ebde89b8c6acfa313355d2aa588b31cbb7db27870690bbeae761ecaeb2c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2a06fa18163cee6de9278291856c1b04c633880be1a2816476ce4b47b03c374242c9eb0ea012e1fb04322de753d45492a9ca96856ec70582ea1287fb19034c2e
|
|
7
|
+
data.tar.gz: e57243d9fccc9b5c43e5da591db26085d2eedd7776a4bc3e6dfd9b8ddcfcc59112de6309f63a41f0b2f38bc171bce2d80e638b2c3a8645230657aeb643f852ef
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 2.0.0 / 2026-06-10
|
|
4
|
+
|
|
5
|
+
- Rewrote the detection engine from scratch. Bloom-filter dictionary lookups are
|
|
6
|
+
replaced by a script router plus character-trigram out-of-place scoring (a pure-Ruby
|
|
7
|
+
port of whatlang/Franc, public-domain UDHR-derived profiles).
|
|
8
|
+
- Expanded from 20 to 160+ languages across 20+ scripts.
|
|
9
|
+
- Removed the ~5 MB of binary `.lang` filter files and the wordlist corpus; the model
|
|
10
|
+
is now a single ~220 KB JSON file. No dependencies beyond the standard library.
|
|
11
|
+
- Requires Ruby 3.0 or newer.
|
|
12
|
+
- Added `#detect`, `#ranked`, and `#score_hash` for callers that need result metadata
|
|
13
|
+
or runner-up scores. `#scores` and `#process_text` remain as compatibility aliases.
|
|
14
|
+
- Added class-level convenience methods: `WhatLanguage.detect`, `.language`,
|
|
15
|
+
`.language_iso`, `.ranked`, `.score_hash`, and `.languages`.
|
|
16
|
+
- Added `only:` for configured detectors, replacing positional language
|
|
17
|
+
selection as the documented API.
|
|
18
|
+
- Removed the `String#language` and `String#language_iso` monkeypatch.
|
|
19
|
+
- Short shared-script fragments now return `nil` by default; pass `min_chars: 0`
|
|
20
|
+
to preserve the previous best-effort behavior.
|
|
21
|
+
- Removed internals: `BloominSimple`, `BitField`, the filter-build scripts.
|
|
22
|
+
|
|
23
|
+
## 1.0.6 / 2016-01-28
|
|
24
|
+
|
|
25
|
+
- Minor test fixes and tweaks
|
|
26
|
+
- New release taking into account a handful of pull requests
|
|
27
|
+
|
|
28
|
+
## 1.0.5 / 2013-10-05
|
|
29
|
+
|
|
30
|
+
- Many more languages supported
|
|
31
|
+
|
|
32
|
+
## 1.0.4 / 2013-03-07
|
|
33
|
+
|
|
34
|
+
## 1.0.1 / 2008-08-22
|
|
35
|
+
|
|
36
|
+
- Public release
|
|
37
|
+
- Removed wordlists from distribution to reduce size
|
|
38
|
+
|
|
39
|
+
## 1.0.0 / 2007-07-02
|
|
40
|
+
|
|
41
|
+
- First version with pre-built English, French, and Spanish filters
|
data/Gemfile
CHANGED
data/LICENSE.txt
CHANGED
|
@@ -1,10 +1,43 @@
|
|
|
1
|
-
Copyright (c) 2008-2013 Peter Cooper
|
|
2
|
-
|
|
3
1
|
MIT License
|
|
4
2
|
|
|
3
|
+
Ruby code is Copyright (c) 2007-2026 Peter Cooper
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
|
6
|
+
a copy of this software and associated documentation files (the
|
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
|
11
|
+
the following conditions:
|
|
12
|
+
|
|
13
|
+
The above copyright notice and this permission notice shall be
|
|
14
|
+
included in all copies or substantial portions of the Software.
|
|
15
|
+
|
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
lib/whatlanguage/trigrams.json is licensed as per the https://github.com/greyblake/whatlang-rs project.
|
|
27
|
+
|
|
28
|
+
Its license is as follows:
|
|
29
|
+
|
|
30
|
+
(The MIT License)
|
|
31
|
+
|
|
32
|
+
Copyright (c) 2017 Sergey Potapov <blake131313@gmail.com>
|
|
33
|
+
Copyright (c) 2014 Titus Wormer <tituswormer@gmail.com>
|
|
34
|
+
Copyright (c) 2008 Kent S Johnson
|
|
35
|
+
Copyright (c) 2006 Jacob R Rideout <kde@jacobrideout.net>
|
|
36
|
+
Copyright (c) 2004 Maciej Ceglowski
|
|
37
|
+
|
|
5
38
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
6
39
|
a copy of this software and associated documentation files (the
|
|
7
|
-
|
|
40
|
+
'Software'), to deal in the Software without restriction, including
|
|
8
41
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
9
42
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
10
43
|
permit persons to whom the Software is furnished to do so, subject to
|
|
@@ -13,10 +46,10 @@ the following conditions:
|
|
|
13
46
|
The above copyright notice and this permission notice shall be
|
|
14
47
|
included in all copies or substantial portions of the Software.
|
|
15
48
|
|
|
16
|
-
THE SOFTWARE IS PROVIDED
|
|
49
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
17
50
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
51
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
52
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
53
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
54
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
55
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
|
@@ -1,114 +1,73 @@
|
|
|
1
1
|
# whatlanguage
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Pure Ruby natural language detection for 160+ languages.
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
```ruby
|
|
6
|
+
require 'whatlanguage'
|
|
6
7
|
|
|
7
|
-
|
|
8
|
+
WhatLanguage.language("Que linguagem é essa? É uma pergunta sobre a língua portuguesa.")
|
|
9
|
+
# => :portuguese
|
|
10
|
+
```
|
|
8
11
|
|
|
9
|
-
|
|
12
|
+
- `gem install whatlanguage`
|
|
13
|
+
- No runtime dependencies.
|
|
14
|
+
- Supports 20+ writing systems.
|
|
15
|
+
- Ships a compact ~220 KB trigram model.
|
|
16
|
+
- Requires Ruby 3.0+
|
|
17
|
+
- Best on sentence-length text or longer.
|
|
10
18
|
|
|
11
|
-
|
|
19
|
+
## How it works
|
|
12
20
|
|
|
13
|
-
|
|
21
|
+
Detection is in two stages. First, the dominant Unicode script is detected; scripts used by a single language (Greek, Korean, Thai, Japanese using Hiragana/Katakana) resolve immediately. For scripts shared by several languages (e.g. Latin, Cyrillic, Arabic, Hebrew) trigrams are ranked by frequency and compared against candidate language profiles.
|
|
14
22
|
|
|
15
|
-
|
|
23
|
+
The trigram profiles are vendored from [whatlang](https://github.com/greyblake/whatlang-rs), a port of [Franc](https://github.com/wooorm/franc), whose models are built from the public-domain UDHR corpus (see Credits). The model is a ~220 KB JSON file.
|
|
16
24
|
|
|
17
|
-
|
|
25
|
+
> [!IMPORTANT]
|
|
26
|
+
> v2.0 has many breaking changes as the entire library has been rewritten, though the core `WhatLanguage.language` API remains similar. Versions 1.0.6 and earlier (so the 2007-2025 run of the library) used a Bloom-filter technique and had 5MB of binary files to handle ~20 languages. Version 2.0 is more accurate, faster, and supports more languages from a single 220KB JSON file :-)
|
|
18
27
|
|
|
19
|
-
|
|
20
|
-
require 'whatlanguage/string'
|
|
21
|
-
|
|
22
|
-
texts = []
|
|
23
|
-
texts << %q{Deux autres personnes ont été arrêtées durant la nuit}
|
|
24
|
-
texts << %q{The links between the attempted car bombings in Glasgow and London are becoming clearer}
|
|
25
|
-
texts << %q{En estado de máxima alertaen su nivel de crítico}
|
|
26
|
-
texts << %q{Returns the object in enum with the maximum value.}
|
|
27
|
-
texts << %q{Propose des données au sujet de la langue espagnole.}
|
|
28
|
-
texts << %q{La palabra "mezquita" se usa en español para referirse a todo tipo de edificios dedicados.}
|
|
29
|
-
texts << %q{اللغة التي هي هذه؟}
|
|
30
|
-
texts << %q{Mitä kieltä tämä on?}
|
|
31
|
-
texts << %q{Ποια γλώσσα είναι αυτή;}
|
|
32
|
-
texts << %q{באיזו שפה זה?}
|
|
33
|
-
texts << %q{Milyen nyelv ez?}
|
|
34
|
-
texts << %q{이 어떤 언어인가?}
|
|
35
|
-
texts << %q{Hvilket språk er dette?}
|
|
36
|
-
texts << %q{W jakim języku to jest?}
|
|
37
|
-
|
|
38
|
-
texts.each { |text| puts "#{text[0..18]}... is in #{text.language.to_s.capitalize}" }
|
|
39
|
-
```
|
|
28
|
+
## Usage
|
|
40
29
|
|
|
41
|
-
|
|
30
|
+
Return a full detection result:
|
|
42
31
|
|
|
43
32
|
```ruby
|
|
44
|
-
wl = WhatLanguage.new
|
|
33
|
+
wl = WhatLanguage.new
|
|
34
|
+
text = "Die Stadt plant neue Investitionen in den öffentlichen Verkehr"
|
|
35
|
+
result = wl.detect(text)
|
|
36
|
+
result.language # => :german
|
|
37
|
+
result.iso # => :de
|
|
38
|
+
result.score # => 79018
|
|
39
|
+
result.ranked # => [[:german, 79018], [:dutch, 77631], ... ]
|
|
45
40
|
```
|
|
46
41
|
|
|
47
|
-
Return
|
|
42
|
+
Return ranked scores, or the raw score hash:
|
|
48
43
|
|
|
49
44
|
```ruby
|
|
50
|
-
wl.
|
|
45
|
+
wl.ranked(text) # => [[:german, 79018], [:dutch, 77631], ... ]
|
|
46
|
+
wl.score_hash(text) # => { german: 79018, dutch: 77631, ... }
|
|
51
47
|
```
|
|
52
48
|
|
|
53
|
-
|
|
49
|
+
Restrict candidate languages:
|
|
54
50
|
|
|
55
51
|
```ruby
|
|
56
|
-
wl.
|
|
52
|
+
wl = WhatLanguage.new(only: [:english, :german, :french])
|
|
57
53
|
```
|
|
58
54
|
|
|
59
|
-
|
|
55
|
+
Short Latin-script fragments are ignored by default because there is not enough signal to rank shared-script languages reliably. The threshold applies to the statistical trigram stage; scripts that identify a single supported language, such as Greek, Korean, or Thai, can still resolve from shorter text. The threshold can be adjusted:
|
|
60
56
|
|
|
61
57
|
```ruby
|
|
62
|
-
|
|
63
|
-
"This is a test".language_iso # => :en
|
|
58
|
+
wl = WhatLanguage.new(min_chars: 0)
|
|
64
59
|
```
|
|
65
60
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
```ruby
|
|
69
|
-
wl = WhatLanguage.new(:english, :german, :french)
|
|
70
|
-
```
|
|
71
|
-
|
|
72
|
-
## Requirements
|
|
73
|
-
|
|
74
|
-
None, minor libraries (BloominSimple and BitField) included with this release.
|
|
75
|
-
|
|
76
|
-
## Installation
|
|
77
|
-
|
|
78
|
-
gem install whatlanguage
|
|
79
|
-
|
|
80
|
-
To test, go into irb, then:
|
|
81
|
-
|
|
82
|
-
```ruby
|
|
83
|
-
require 'whatlanguage'
|
|
84
|
-
"Je suis un homme".language
|
|
85
|
-
```
|
|
86
|
-
|
|
87
|
-
## Credits
|
|
88
|
-
|
|
89
|
-
Contributions from Konrad Reiche, Salimane Adjao Moustapha, and others appreciated.
|
|
90
|
-
|
|
91
|
-
## License
|
|
92
|
-
|
|
93
|
-
MIT License
|
|
61
|
+
## Known limitations
|
|
94
62
|
|
|
95
|
-
|
|
63
|
+
- Short fragments are unreliable. For languages resolved by statistical comparison, fewer than 10 significant characters returns `nil` by default.
|
|
64
|
+
- Scores are relative ranking values, not probabilities. Use `#ranked` or `#detect.ranked` when close runners-up matter.
|
|
65
|
+
- Closely related written languages can be hard to separate, especially Norwegian Bokmål/Danish, Hebrew/Yiddish, and similar language pairs.
|
|
66
|
+
- Kanji-only Japanese text can classify as Chinese because Han characters alone do not identify the language.
|
|
67
|
+
- Romanized text is classified by Latin-script trigram profiles; it is not treated as native-script text.
|
|
96
68
|
|
|
97
|
-
|
|
98
|
-
a copy of this software and associated documentation files (the
|
|
99
|
-
'Software'), to deal in the Software without restriction, including
|
|
100
|
-
without limitation the rights to use, copy, modify, merge, publish,
|
|
101
|
-
distribute, sublicense, and/or sell copies of the Software, and to
|
|
102
|
-
permit persons to whom the Software is furnished to do so, subject to
|
|
103
|
-
the following conditions:
|
|
69
|
+
## Credits
|
|
104
70
|
|
|
105
|
-
|
|
106
|
-
included in all copies or substantial portions of the Software.
|
|
71
|
+
Contributions from Konrad Reiche, Salimane Adjao Moustapha, Andrew Cone, Lasse Skindstad Ebert, Henrik Nyh, Daniel Sandbecker, Michael Hartl, Pedro Lambert, Tobias Preuss, Pepijn Looije, and others appreciated.
|
|
107
72
|
|
|
108
|
-
|
|
109
|
-
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
110
|
-
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
111
|
-
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
112
|
-
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
113
|
-
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
114
|
-
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
73
|
+
The trigram language profiles in `lib/whatlanguage/trigrams.json` are taken from [whatlang](https://github.com/greyblake/whatlang-rs) (MIT, © Sergey Potapov), itself a derivative of [Franc](https://github.com/wooorm/franc) (MIT, © Titus Wormer). Those profiles are derived from the public-domain Universal Declaration of Human Rights translations.
|
data/Rakefile
CHANGED
|
@@ -1,6 +1,12 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'bundler/gem_tasks'
|
|
2
4
|
require 'rake/testtask'
|
|
3
5
|
|
|
4
|
-
Rake::TestTask.new
|
|
6
|
+
Rake::TestTask.new do |t|
|
|
7
|
+
t.libs << 'lib' << 'test'
|
|
8
|
+
t.pattern = 'test/**/*_test.rb'
|
|
9
|
+
t.warning = false
|
|
10
|
+
end
|
|
5
11
|
|
|
6
|
-
task :
|
|
12
|
+
task default: :test
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# AUTO-GENERATED from the ISO 639-3 registry + whatlang dataset codes.
|
|
4
|
+
# Maps ISO 639-3 code => [language name symbol, ISO 639-1 (or 639-3 fallback) symbol].
|
|
5
|
+
# Original gem languages keep their historical symbols and 2-letter codes.
|
|
6
|
+
class WhatLanguage
|
|
7
|
+
CODE_INFO = {
|
|
8
|
+
"ace" => [:achinese, :ace],
|
|
9
|
+
"ada" => [:adangme, :ada],
|
|
10
|
+
"afr" => [:afrikaans, :af],
|
|
11
|
+
"aka" => [:akan, :ak],
|
|
12
|
+
"amh" => [:amharic, :am],
|
|
13
|
+
"ara" => [:arabic, :ar],
|
|
14
|
+
"hye" => [:armenian, :hy],
|
|
15
|
+
"quy" => [:ayacucho_quechua, :quy],
|
|
16
|
+
"aze" => [:azerbaijani, :az],
|
|
17
|
+
"ban" => [:balinese, :ban],
|
|
18
|
+
"rmn" => [:balkan_romani, :rmn],
|
|
19
|
+
"bam" => [:bambara, :bm],
|
|
20
|
+
"bci" => [:baoule, :bci],
|
|
21
|
+
"bel" => [:belarusian, :be],
|
|
22
|
+
"bem" => [:bemba, :bem],
|
|
23
|
+
"ben" => [:bengali, :bn],
|
|
24
|
+
"bho" => [:bhojpuri, :bho],
|
|
25
|
+
"bin" => [:bini, :bin],
|
|
26
|
+
"bos" => [:bosnian, :bs],
|
|
27
|
+
"bug" => [:buginese, :bug],
|
|
28
|
+
"bul" => [:bulgarian, :bg],
|
|
29
|
+
"mya" => [:burmese, :my],
|
|
30
|
+
"cat" => [:catalan, :ca],
|
|
31
|
+
"ceb" => [:cebuano, :ceb],
|
|
32
|
+
"tzm" => [:central_atlas_tamazight, :tzm],
|
|
33
|
+
"ayr" => [:central_aymara, :ayr],
|
|
34
|
+
"bcl" => [:central_bikol, :bcl],
|
|
35
|
+
"knc" => [:central_kanuri, :knc],
|
|
36
|
+
"nhn" => [:central_nahuatl, :nhn],
|
|
37
|
+
"nya" => [:chichewa, :ny],
|
|
38
|
+
"qug" => [:chimborazo_highland_quichua, :qug],
|
|
39
|
+
"cmn" => [:chinese, :zh],
|
|
40
|
+
"cjk" => [:chokwe, :cjk],
|
|
41
|
+
"hrv" => [:croatian, :hr],
|
|
42
|
+
"quz" => [:cusco_quechua, :quz],
|
|
43
|
+
"ces" => [:czech, :cs],
|
|
44
|
+
"dan" => [:danish, :da],
|
|
45
|
+
"nld" => [:dutch, :nl],
|
|
46
|
+
"dyu" => [:dyula, :dyu],
|
|
47
|
+
"emk" => [:eastern_maninkakan, :emk],
|
|
48
|
+
"eng" => [:english, :en],
|
|
49
|
+
"epo" => [:esperanto, :eo],
|
|
50
|
+
"est" => [:estonian, :et],
|
|
51
|
+
"ewe" => [:ewe, :ee],
|
|
52
|
+
"pes" => [:farsi, :fa],
|
|
53
|
+
"fin" => [:finnish, :fi],
|
|
54
|
+
"fon" => [:fon, :fon],
|
|
55
|
+
"fra" => [:french, :fr],
|
|
56
|
+
"gaa" => [:ga, :gaa],
|
|
57
|
+
"glg" => [:galician, :gl],
|
|
58
|
+
"lug" => [:ganda, :lg],
|
|
59
|
+
"kat" => [:georgian, :ka],
|
|
60
|
+
"deu" => [:german, :de],
|
|
61
|
+
"ell" => [:greek, :el],
|
|
62
|
+
"guj" => [:gujarati, :gu],
|
|
63
|
+
"hat" => [:haitian, :ht],
|
|
64
|
+
"khk" => [:halh_mongolian, :khk],
|
|
65
|
+
"hau" => [:hausa, :ha],
|
|
66
|
+
"heb" => [:hebrew, :he],
|
|
67
|
+
"hil" => [:hiligaynon, :hil],
|
|
68
|
+
"hin" => [:hindi, :hi],
|
|
69
|
+
"hun" => [:hungarian, :hu],
|
|
70
|
+
"ibb" => [:ibibio, :ibb],
|
|
71
|
+
"ibo" => [:igbo, :ig],
|
|
72
|
+
"ilo" => [:iloko, :ilo],
|
|
73
|
+
"ind" => [:indonesian, :id],
|
|
74
|
+
"ita" => [:italian, :it],
|
|
75
|
+
"jpn" => [:japanese, :ja],
|
|
76
|
+
"jav" => [:javanese, :jv],
|
|
77
|
+
"kbp" => [:kabiye, :kbp],
|
|
78
|
+
"kan" => [:kannada, :kn],
|
|
79
|
+
"kaz" => [:kazakh, :kk],
|
|
80
|
+
"khm" => [:khmer, :km],
|
|
81
|
+
"kmb" => [:kimbundu, :kmb],
|
|
82
|
+
"kin" => [:kinyarwanda, :rw],
|
|
83
|
+
"kir" => [:kirghiz, :ky],
|
|
84
|
+
"koi" => [:komi_permyak, :koi],
|
|
85
|
+
"kng" => [:koongo, :kng],
|
|
86
|
+
"kor" => [:korean, :ko],
|
|
87
|
+
"kur" => [:kurdish, :ku],
|
|
88
|
+
"lat" => [:latin, :la],
|
|
89
|
+
"lav" => [:latvian, :lv],
|
|
90
|
+
"lin" => [:lingala, :ln],
|
|
91
|
+
"lit" => [:lithuanian, :lt],
|
|
92
|
+
"src" => [:logudorese_sardinian, :src],
|
|
93
|
+
"nds" => [:low_german, :nds],
|
|
94
|
+
"lua" => [:luba_lulua, :lua],
|
|
95
|
+
"lun" => [:lunda, :lun],
|
|
96
|
+
"mkd" => [:macedonian, :mk],
|
|
97
|
+
"mad" => [:madurese, :mad],
|
|
98
|
+
"mag" => [:magahi, :mag],
|
|
99
|
+
"mai" => [:maithili, :mai],
|
|
100
|
+
"vmw" => [:makhuwa, :vmw],
|
|
101
|
+
"kde" => [:makonde, :kde],
|
|
102
|
+
"mlg" => [:malagasy, :mg],
|
|
103
|
+
"mal" => [:malayalam, :ml],
|
|
104
|
+
"mar" => [:marathi, :mr],
|
|
105
|
+
"men" => [:mende, :men],
|
|
106
|
+
"min" => [:minangkabau, :min],
|
|
107
|
+
"mos" => [:mossi, :mos],
|
|
108
|
+
"ndo" => [:ndonga, :ng],
|
|
109
|
+
"nep" => [:nepali, :ne],
|
|
110
|
+
"dip" => [:northeastern_dinka, :dip],
|
|
111
|
+
"uzn" => [:northern_uzbek, :uzn],
|
|
112
|
+
"nob" => [:norwegian, :no],
|
|
113
|
+
"nno" => [:norwegian_nynorsk, :nn],
|
|
114
|
+
"nyn" => [:nyankole, :nyn],
|
|
115
|
+
"ori" => [:oriya, :or],
|
|
116
|
+
"orm" => [:oromo, :om],
|
|
117
|
+
"pam" => [:pampanga, :pam],
|
|
118
|
+
"pan" => [:panjabi, :pa],
|
|
119
|
+
"nso" => [:pedi, :nso],
|
|
120
|
+
"pol" => [:polish, :pl],
|
|
121
|
+
"por" => [:portuguese, :pt],
|
|
122
|
+
"ron" => [:romanian, :ro],
|
|
123
|
+
"run" => [:rundi, :rn],
|
|
124
|
+
"rus" => [:russian, :ru],
|
|
125
|
+
"sag" => [:sango, :sg],
|
|
126
|
+
"skr" => [:saraiki, :skr],
|
|
127
|
+
"srp" => [:serbian, :sr],
|
|
128
|
+
"sna" => [:shona, :sn],
|
|
129
|
+
"sin" => [:sinhala, :si],
|
|
130
|
+
"snn" => [:siona, :snn],
|
|
131
|
+
"slk" => [:slovak, :sk],
|
|
132
|
+
"slv" => [:slovenian, :sl],
|
|
133
|
+
"som" => [:somali, :so],
|
|
134
|
+
"snk" => [:soninke, :snk],
|
|
135
|
+
"hms" => [:southern_qiandong_miao, :hms],
|
|
136
|
+
"sot" => [:southern_sotho, :st],
|
|
137
|
+
"spa" => [:spanish, :es],
|
|
138
|
+
"suk" => [:sukuma, :suk],
|
|
139
|
+
"sun" => [:sundanese, :su],
|
|
140
|
+
"swh" => [:swahili, :swh],
|
|
141
|
+
"ssw" => [:swati, :ss],
|
|
142
|
+
"swe" => [:swedish, :sv],
|
|
143
|
+
"tgl" => [:tagalog, :tl],
|
|
144
|
+
"tgk" => [:tajik, :tg],
|
|
145
|
+
"tam" => [:tamil, :ta],
|
|
146
|
+
"tat" => [:tatar, :tt],
|
|
147
|
+
"tel" => [:telugu, :te],
|
|
148
|
+
"tha" => [:thai, :th],
|
|
149
|
+
"tir" => [:tigrinya, :ti],
|
|
150
|
+
"tem" => [:timne, :tem],
|
|
151
|
+
"tiv" => [:tiv, :tiv],
|
|
152
|
+
"tpi" => [:tok_pisin, :tpi],
|
|
153
|
+
"toi" => [:tonga, :toi],
|
|
154
|
+
"als" => [:tosk_albanian, :als],
|
|
155
|
+
"tso" => [:tsonga, :ts],
|
|
156
|
+
"tsn" => [:tswana, :tn],
|
|
157
|
+
"tur" => [:turkish, :tr],
|
|
158
|
+
"tuk" => [:turkmen, :tk],
|
|
159
|
+
"uig" => [:uighur, :ug],
|
|
160
|
+
"ukr" => [:ukrainian, :uk],
|
|
161
|
+
"umb" => [:umbundu, :umb],
|
|
162
|
+
"urd" => [:urdu, :ur],
|
|
163
|
+
"uzb" => [:uzbek, :uz],
|
|
164
|
+
"vie" => [:vietnamese, :vi],
|
|
165
|
+
"rmy" => [:vlax_romani, :rmy],
|
|
166
|
+
"war" => [:waray, :war],
|
|
167
|
+
"cym" => [:welsh, :cy],
|
|
168
|
+
"wol" => [:wolof, :wo],
|
|
169
|
+
"xho" => [:xhosa, :xh],
|
|
170
|
+
"yao" => [:yao, :yao],
|
|
171
|
+
"yid" => [:yiddish, :yi],
|
|
172
|
+
"yor" => [:yoruba, :yo],
|
|
173
|
+
"zul" => [:zulu, :zu],
|
|
174
|
+
}.freeze
|
|
175
|
+
|
|
176
|
+
# Convenience aliases for legacy selection symbols that no longer map 1:1.
|
|
177
|
+
NAME_ALIASES = { pinyin: :chinese, mandarin: :chinese }.freeze
|
|
178
|
+
|
|
179
|
+
private_constant :CODE_INFO, :NAME_ALIASES
|
|
180
|
+
end
|