human_languages 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.reuse/dep5 +12 -0
- data/.rubocop.yml +9 -0
- data/CHANGELOG.md +65 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +48 -0
- data/LICENSES/LicenseRef-ISO639-3ToU.txt +11 -0
- data/LICENSES/MIT.txt +9 -0
- data/README.adoc +133 -0
- data/Rakefile +16 -0
- data/data/iso-639-3.tsv +7911 -0
- data/lib/languages/constants.rb +6 -0
- data/lib/languages/language.rb +62 -0
- data/lib/languages/version.rb +5 -0
- data/lib/languages.rb +85 -0
- data/sig/languages.rbs +67 -0
- metadata +63 -0
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Languages
|
4
|
+
# Language defined in ISO 639-3
|
5
|
+
class Language
|
6
|
+
include Comparable
|
7
|
+
|
8
|
+
attr_reader :iso639_1, :iso639_2b, :iso639_2t, :iso639_3, :scope, :type, :name # , :comment
|
9
|
+
|
10
|
+
def initialize(csv_attributes) # rubocop:disable Metrics/AbcSize
|
11
|
+
@iso639_3 = csv_attributes.fetch(:id).to_sym
|
12
|
+
@iso639_2b = csv_attributes.fetch(:part2b)&.to_sym
|
13
|
+
@iso639_2t = csv_attributes.fetch(:part2t)&.to_sym
|
14
|
+
@iso639_1 = csv_attributes.fetch(:part1)&.to_sym
|
15
|
+
@scope = SCOPES.detect { |s| s.chr.upcase == csv_attributes.fetch(:scope) }
|
16
|
+
@type = TYPES.detect { |t| t.chr.upcase == csv_attributes.fetch(:language_type) }
|
17
|
+
@name = csv_attributes.fetch(:ref_name)
|
18
|
+
# @comment = csv_attributes.fetch(:comment)
|
19
|
+
end
|
20
|
+
|
21
|
+
alias iso639_2 iso639_2t
|
22
|
+
alias iso639_5 iso639_2
|
23
|
+
|
24
|
+
alias alpha2 iso639_1
|
25
|
+
alias alpha3 iso639_3
|
26
|
+
alias alpha3_bibliographic iso639_2b
|
27
|
+
alias alpha3_terminology iso639_2t
|
28
|
+
|
29
|
+
def to_s
|
30
|
+
name.to_s # Enforce return of String, even if name is nil
|
31
|
+
end
|
32
|
+
|
33
|
+
TYPES.each do |type|
|
34
|
+
define_method "#{type}?" do
|
35
|
+
self.type == type
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
SCOPES.each do |scope|
|
40
|
+
# prevent ambiguity of scope "special" and type "special"
|
41
|
+
method_name = scope.end_with?('language') ? scope : "#{scope}_language"
|
42
|
+
|
43
|
+
define_method "#{method_name}?" do
|
44
|
+
self.scope == scope
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def ==(other)
|
49
|
+
other.class == self.class && other.iso639_3 == iso639_3
|
50
|
+
end
|
51
|
+
|
52
|
+
alias eql? ==
|
53
|
+
|
54
|
+
def hash
|
55
|
+
iso639_3.hash
|
56
|
+
end
|
57
|
+
|
58
|
+
def <=>(other)
|
59
|
+
other.iso639_3 <=> iso639_3
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/languages.rb
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
|
5
|
+
require_relative 'languages/version'
|
6
|
+
require_relative 'languages/constants'
|
7
|
+
require_relative 'languages/language'
|
8
|
+
|
9
|
+
# Provides living, extinct, ancient, historic, and constructed languages, specified in ISO 639-3
|
10
|
+
module Languages
|
11
|
+
@@data = CSV.read(File.join(File.dirname(__FILE__), '../data/iso-639-3.tsv'), headers: true, col_sep: "\t") # rubocop:disable Style/ClassVars
|
12
|
+
.map { |row| row.to_h.transform_keys { |k| k.downcase.to_sym } }
|
13
|
+
.map { |h| Language.new(h) }
|
14
|
+
.freeze
|
15
|
+
|
16
|
+
class << self
|
17
|
+
TYPES.each do |type|
|
18
|
+
define_method type do
|
19
|
+
@@data.select { |l| l.public_send("#{type}?") }
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
SCOPES.each do |scope|
|
24
|
+
# prevent ambiguity of scope "special" and type "special"
|
25
|
+
scope = "#{scope}_language" unless scope.end_with? 'language'
|
26
|
+
|
27
|
+
define_method "#{scope}s" do
|
28
|
+
@@data.select { |l| l.public_send("#{scope}?") }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def [](key)
|
33
|
+
key = key.to_s.downcase
|
34
|
+
case key.size
|
35
|
+
when 3 then get_by_alpha3(key.to_sym)
|
36
|
+
when 2 then get_by_alpha2(key.to_sym)
|
37
|
+
else get_by_name(key)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def search(pattern)
|
42
|
+
@@data.select { |l| l.name.match? pattern }
|
43
|
+
end
|
44
|
+
|
45
|
+
# Returns all human known languages, specified in ISO 639-3
|
46
|
+
def all
|
47
|
+
@@data
|
48
|
+
end
|
49
|
+
|
50
|
+
def names
|
51
|
+
@@data.map(&:name)
|
52
|
+
end
|
53
|
+
|
54
|
+
def alpha2_codes
|
55
|
+
@@data.map(&:alpha2).compact
|
56
|
+
end
|
57
|
+
|
58
|
+
def alpha3_codes
|
59
|
+
@@data.map(&:alpha3)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# Returns language associated with ISO 639-1 identifier
|
65
|
+
# @param [Symbol] key ISO 639-1 identifier
|
66
|
+
# @return [Language,NilClass] language with associated with the identifier; otherwise +nil+
|
67
|
+
def get_by_alpha2(key)
|
68
|
+
@@data.detect { |l| l.iso639_1 == key }
|
69
|
+
end
|
70
|
+
|
71
|
+
# Returns language associated with ISO 639-2 or ISO 639-3 identifier
|
72
|
+
# @param [Symbol] key ISO 639-2 or ISO 639-3 identifier
|
73
|
+
# @return [Language,NilClass] language with associated with the identifier; otherwise +nil+
|
74
|
+
def get_by_alpha3(key)
|
75
|
+
@@data.detect { |l| l.iso639_3 == key || l.iso639_2b == key || l.iso639_2t == key }
|
76
|
+
end
|
77
|
+
|
78
|
+
# Returns language associated with ISO 639-3 reference name
|
79
|
+
# @param [String] name reference name (english)
|
80
|
+
# @return [Language,NilClass] language with associated with the name; otherwise +nil+
|
81
|
+
def get_by_name(name)
|
82
|
+
@@data.detect { |l| l.name.downcase == name.downcase }
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/sig/languages.rbs
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
module Languages
|
2
|
+
VERSION: String
|
3
|
+
# See the writing guide of rbs: https://github.com/ruby/rbs#guides
|
4
|
+
|
5
|
+
TYPES: Array[String]
|
6
|
+
SCOPES: Array[String]
|
7
|
+
|
8
|
+
@@data: Array[Language]
|
9
|
+
|
10
|
+
def self?.[]: (String | Symbol) -> Language?
|
11
|
+
def self?.search: (String|Regexp) -> Array[Language]
|
12
|
+
|
13
|
+
def self?.all: () -> Array[Language]
|
14
|
+
def self?.names: () -> Array[String]
|
15
|
+
def self?.alpha2_codes: () -> Array[Symbol]
|
16
|
+
def self?.alpha3_codes: () -> Array[Symbol]
|
17
|
+
|
18
|
+
def self?.special_languages: () -> Array[Language]
|
19
|
+
def self?.macrolanguages:() -> Array[Language]
|
20
|
+
def self?.individual_languages: () -> Array[Language]
|
21
|
+
|
22
|
+
def self?.ancient: () -> Array[Language]
|
23
|
+
def self?.constructed: () -> Array[Language]
|
24
|
+
def self?.extinct: () -> Array[Language]
|
25
|
+
def self?.historical: () -> Array[Language]
|
26
|
+
def self?.living: () -> Array[Language]
|
27
|
+
def self?.special: () -> Array[Language]
|
28
|
+
|
29
|
+
class Language
|
30
|
+
attr_reader iso639_1: Symbol?
|
31
|
+
attr_reader iso639_2b: Symbol?
|
32
|
+
attr_reader iso639_2t: Symbol?
|
33
|
+
attr_reader iso639_3: Symbol
|
34
|
+
attr_reader scope: String
|
35
|
+
attr_reader type: String
|
36
|
+
attr_reader name: String
|
37
|
+
|
38
|
+
alias iso639_2 iso639_2t
|
39
|
+
alias iso639_5 iso639_2
|
40
|
+
|
41
|
+
alias alpha2 iso639_1
|
42
|
+
alias alpha3 iso639_3
|
43
|
+
alias alpha3_bibliographic iso639_2b
|
44
|
+
alias alpha3_terminology iso639_2t
|
45
|
+
|
46
|
+
|
47
|
+
def initialize: ({id: String, part2b: String?, part2t: String?, part1: String?, scope: "I" | "M" | "S", language_type: "A" | "C" | "E" | "H" | "L" | "S", ref_name: String, comment: String?}) -> void
|
48
|
+
|
49
|
+
def to_s: () -> String
|
50
|
+
|
51
|
+
def special_language?: () -> bool
|
52
|
+
def macrolanguage? :() -> bool
|
53
|
+
def individual_language?: () -> bool
|
54
|
+
|
55
|
+
def ancient?: () -> bool
|
56
|
+
def constructed?: () -> bool
|
57
|
+
def extinct?: () -> bool
|
58
|
+
def historical?: () -> bool
|
59
|
+
def living?: () -> bool
|
60
|
+
def special?: () -> bool
|
61
|
+
|
62
|
+
def ==: (untyped) -> bool
|
63
|
+
alias eql? ==
|
64
|
+
def hash: () -> Integer
|
65
|
+
def <=>: (Language) -> Integer
|
66
|
+
end
|
67
|
+
end
|
metadata
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: human_languages
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Benno Bielmeier
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-06-06 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: ISO 639-3 is a set of codes that defines three-letter identifiers for
|
14
|
+
all known human languages.
|
15
|
+
email:
|
16
|
+
- git@bbenno.com
|
17
|
+
executables: []
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- ".reuse/dep5"
|
22
|
+
- ".rubocop.yml"
|
23
|
+
- CHANGELOG.md
|
24
|
+
- Gemfile
|
25
|
+
- Gemfile.lock
|
26
|
+
- LICENSES/LicenseRef-ISO639-3ToU.txt
|
27
|
+
- LICENSES/MIT.txt
|
28
|
+
- README.adoc
|
29
|
+
- Rakefile
|
30
|
+
- data/iso-639-3.tsv
|
31
|
+
- lib/languages.rb
|
32
|
+
- lib/languages/constants.rb
|
33
|
+
- lib/languages/language.rb
|
34
|
+
- lib/languages/version.rb
|
35
|
+
- sig/languages.rbs
|
36
|
+
homepage: https://github.com/bbenno/languages
|
37
|
+
licenses:
|
38
|
+
- MIT
|
39
|
+
metadata:
|
40
|
+
homepage_uri: https://github.com/bbenno/languages
|
41
|
+
source_code_uri: https://github.com/bbenno/languages
|
42
|
+
changelog_uri: https://github.com/bbenno/languages/blob/HEAD/CHANGELOG.md
|
43
|
+
rubygems_mfa_required: 'true'
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 2.7.0
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubygems_version: 3.3.7
|
60
|
+
signing_key:
|
61
|
+
specification_version: 4
|
62
|
+
summary: ISO 639-3 languages
|
63
|
+
test_files: []
|