iso-639 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -2
- data/Gemfile.lock +1 -7
- data/README.markdown +7 -2
- data/Rakefile +0 -14
- data/iso-639.gemspec +1 -5
- data/lib/iso-639.rb +32 -1
- data/test/test_ISO_639.rb +38 -0
- metadata +1 -21
- data/VERSION +0 -1
data/Gemfile
CHANGED
@@ -6,8 +6,8 @@ source "http://rubygems.org"
|
|
6
6
|
# Add dependencies to develop your gem here.
|
7
7
|
# Include everything needed to run rake, tests, features, etc.
|
8
8
|
group :development, :test do
|
9
|
-
gem "jeweler", "~> 1.8.4"
|
10
9
|
gem "mocha", ">= 0"
|
11
10
|
gem "rdoc", "~> 3.12"
|
12
11
|
gem "shoulda", ">= 0"
|
13
|
-
|
12
|
+
gem "rake"
|
13
|
+
end
|
data/Gemfile.lock
CHANGED
@@ -4,13 +4,7 @@ GEM
|
|
4
4
|
activesupport (3.2.8)
|
5
5
|
i18n (~> 0.6)
|
6
6
|
multi_json (~> 1.0)
|
7
|
-
git (1.2.5)
|
8
7
|
i18n (0.6.1)
|
9
|
-
jeweler (1.8.4)
|
10
|
-
bundler (~> 1.0)
|
11
|
-
git (>= 1.2.5)
|
12
|
-
rake
|
13
|
-
rdoc
|
14
8
|
json (1.7.5)
|
15
9
|
metaclass (0.0.1)
|
16
10
|
mocha (0.12.7)
|
@@ -30,7 +24,7 @@ PLATFORMS
|
|
30
24
|
ruby
|
31
25
|
|
32
26
|
DEPENDENCIES
|
33
|
-
jeweler (~> 1.8.4)
|
34
27
|
mocha
|
28
|
+
rake
|
35
29
|
rdoc (~> 3.12)
|
36
30
|
shoulda
|
data/README.markdown
CHANGED
@@ -10,8 +10,6 @@ The [ISO 639-1](http://en.wikipedia.org/wiki/ISO_639-1) specification uses a two
|
|
10
10
|
|
11
11
|
The [ISO 639-2](http://www.loc.gov/standards/iso639-2/) ([Wikipedia](http://en.wikipedia.org/wiki/ISO_639-2)) specification uses a three-letter code, is used primarily in bibliography and terminology and covers many more languages than the ISO 639-1 specification.
|
12
12
|
|
13
|
-
The other ISO 639 standards are considered beyond the scope of this library.
|
14
|
-
|
15
13
|
## Usage
|
16
14
|
|
17
15
|
require 'iso-639'
|
@@ -27,6 +25,13 @@ To find a language entry:
|
|
27
25
|
# by French name
|
28
26
|
ISO_639.find_by_french_name("français")
|
29
27
|
|
28
|
+
The `ISO_639.search` class method searches across all fields and will
|
29
|
+
match names in cases where a record has multiple names. This method
|
30
|
+
always returns an array of 0 or more results. For example:
|
31
|
+
|
32
|
+
ISO_639.search("spanish")
|
33
|
+
# => [["spa", "", "es", "Spanish; Castilian", "espagnol; castillan"]]
|
34
|
+
|
30
35
|
Entries are arrays with convenience methods for accessing fields:
|
31
36
|
|
32
37
|
@entry = ISO_639.find("slo")
|
data/Rakefile
CHANGED
@@ -11,20 +11,6 @@ rescue Bundler::BundlerError => e
|
|
11
11
|
end
|
12
12
|
require 'rake'
|
13
13
|
|
14
|
-
require 'jeweler'
|
15
|
-
Jeweler::Tasks.new do |gem|
|
16
|
-
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
-
gem.name = "iso-639"
|
18
|
-
gem.homepage = "http://github.com/alphabetum/pandoc-ruby"
|
19
|
-
gem.license = "MIT"
|
20
|
-
gem.summary = %Q{ISO 639-1 and ISO 639-2 language code entries and convenience methods}
|
21
|
-
gem.description = %Q{ISO 639-1 and ISO 639-2 language code entries and convenience methods}
|
22
|
-
gem.email = "hi@williammelody.com"
|
23
|
-
gem.authors = ["William Melody"]
|
24
|
-
# dependencies defined in Gemfile
|
25
|
-
end
|
26
|
-
Jeweler::RubygemsDotOrgTasks.new
|
27
|
-
|
28
14
|
require 'rake/testtask'
|
29
15
|
Rake::TestTask.new(:test) do |test|
|
30
16
|
test.libs << 'lib' << 'test'
|
data/iso-639.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "iso-639"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["William Melody"]
|
@@ -23,7 +23,6 @@ Gem::Specification.new do |s|
|
|
23
23
|
"LICENSE",
|
24
24
|
"README.markdown",
|
25
25
|
"Rakefile",
|
26
|
-
"VERSION",
|
27
26
|
"iso-639.gemspec",
|
28
27
|
"lib/iso-639.rb",
|
29
28
|
"test/helper.rb",
|
@@ -39,18 +38,15 @@ Gem::Specification.new do |s|
|
|
39
38
|
s.specification_version = 3
|
40
39
|
|
41
40
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
42
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
43
41
|
s.add_development_dependency(%q<mocha>, [">= 0"])
|
44
42
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
45
43
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
46
44
|
else
|
47
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
48
45
|
s.add_dependency(%q<mocha>, [">= 0"])
|
49
46
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
50
47
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
51
48
|
end
|
52
49
|
else
|
53
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
54
50
|
s.add_dependency(%q<mocha>, [">= 0"])
|
55
51
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
56
52
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
data/lib/iso-639.rb
CHANGED
@@ -497,6 +497,28 @@ class ISO_639 < Array
|
|
497
497
|
self["zxx", "", "", "No linguistic content; Not applicable", "pas de contenu linguistique; non applicable"],
|
498
498
|
self["zza", "", "", "Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki", "zaza; dimili; dimli; kirdki; kirmanjki; zazaki"]
|
499
499
|
]
|
500
|
+
|
501
|
+
# A reverse index generated from the ISO_639_2 data. Used for searching
|
502
|
+
# all words and codes in all fields.
|
503
|
+
REVERSE_INDEX = lambda {
|
504
|
+
index = {}
|
505
|
+
ISO_639_2.each_with_index do |record, i|
|
506
|
+
record.each do |field|
|
507
|
+
downcased = field.downcase
|
508
|
+
words = (
|
509
|
+
downcased.split(/[[:blank:]]|\(|\)|,|;/) +
|
510
|
+
downcased.split(/;/)
|
511
|
+
)
|
512
|
+
words.each do |word|
|
513
|
+
unless word.empty?
|
514
|
+
index[word] ||= []
|
515
|
+
index[word] << i
|
516
|
+
end
|
517
|
+
end
|
518
|
+
end
|
519
|
+
end
|
520
|
+
return index
|
521
|
+
}.call
|
500
522
|
|
501
523
|
# The ISO 639-1 dataset as an array of entries. Each entry is an array with
|
502
524
|
# the following format:
|
@@ -530,6 +552,15 @@ class ISO_639 < Array
|
|
530
552
|
def find_by_french_name(name)
|
531
553
|
ISO_639_2.detect { |entry| entry if entry.french_name == name }
|
532
554
|
end
|
555
|
+
|
556
|
+
# Returns an array of matches for the search term. The term can be a code
|
557
|
+
# of any kind, or it can be one of the words contained in the English or
|
558
|
+
# French name field.
|
559
|
+
def search(term)
|
560
|
+
normalized_term = term.downcase.strip
|
561
|
+
indexes = REVERSE_INDEX[normalized_term]
|
562
|
+
indexes ? ISO_639_2.values_at(*indexes).uniq : []
|
563
|
+
end
|
533
564
|
end
|
534
565
|
|
535
566
|
# The entry's alpha-3 bibliotigraphic code.
|
@@ -544,4 +575,4 @@ class ISO_639 < Array
|
|
544
575
|
# The entry's french name.
|
545
576
|
def french_name; self[4] end
|
546
577
|
|
547
|
-
end
|
578
|
+
end
|
data/test/test_ISO_639.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
require 'helper'
|
2
3
|
|
3
4
|
class TestISO639 < Test::Unit::TestCase
|
@@ -37,5 +38,42 @@ class TestISO639 < Test::Unit::TestCase
|
|
37
38
|
assert_equal ["eng", "eng", "", "en", "English", "anglais"][i], @entry.send(m)
|
38
39
|
end
|
39
40
|
end
|
41
|
+
|
42
|
+
should "return single record array by searching a unique code" do
|
43
|
+
assert_equal(
|
44
|
+
[["spa", "", "es", "Spanish; Castilian", "espagnol; castillan"]],
|
45
|
+
ISO_639.search("es")
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
should "return single record array by searching a unique term" do
|
50
|
+
assert_equal(
|
51
|
+
[["spa", "", "es", "Spanish; Castilian", "espagnol; castillan"]],
|
52
|
+
ISO_639.search("spanish")
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
should "return multiple record array by searching a common term" do
|
57
|
+
assert_equal(
|
58
|
+
[
|
59
|
+
["egy", "", "", "Egyptian (Ancient)", "égyptien"],
|
60
|
+
["grc", "", "", "Greek, Ancient (to 1453)", "grec ancien (jusqu'à 1453)"]
|
61
|
+
],
|
62
|
+
ISO_639.search("ancient")
|
63
|
+
)
|
64
|
+
end
|
65
|
+
|
66
|
+
should "return empty array when searching a non-existent term" do
|
67
|
+
assert_equal(
|
68
|
+
[], ISO_639.search("bad term")
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
should "return single record array by searching a unique multi-word term" do
|
73
|
+
assert_equal(
|
74
|
+
[["ypk", "", "", "Yupik languages", "yupik, langues"]],
|
75
|
+
ISO_639.search("yupik, langues")
|
76
|
+
)
|
77
|
+
end
|
40
78
|
|
41
79
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iso-639
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,22 +11,6 @@ bindir: bin
|
|
11
11
|
cert_chain: []
|
12
12
|
date: 2012-11-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: jeweler
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: 1.8.4
|
22
|
-
type: :development
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ~>
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: 1.8.4
|
30
14
|
- !ruby/object:Gem::Dependency
|
31
15
|
name: mocha
|
32
16
|
requirement: !ruby/object:Gem::Requirement
|
@@ -89,7 +73,6 @@ files:
|
|
89
73
|
- LICENSE
|
90
74
|
- README.markdown
|
91
75
|
- Rakefile
|
92
|
-
- VERSION
|
93
76
|
- iso-639.gemspec
|
94
77
|
- lib/iso-639.rb
|
95
78
|
- test/helper.rb
|
@@ -107,9 +90,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
107
90
|
- - ! '>='
|
108
91
|
- !ruby/object:Gem::Version
|
109
92
|
version: '0'
|
110
|
-
segments:
|
111
|
-
- 0
|
112
|
-
hash: 2442761226781882380
|
113
93
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
94
|
none: false
|
115
95
|
requirements:
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.1.0
|