iso-639 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -2
- data/Gemfile.lock +1 -7
- data/README.markdown +7 -2
- data/Rakefile +0 -14
- data/iso-639.gemspec +1 -5
- data/lib/iso-639.rb +32 -1
- data/test/test_ISO_639.rb +38 -0
- metadata +1 -21
- data/VERSION +0 -1
data/Gemfile
CHANGED
@@ -6,8 +6,8 @@ source "http://rubygems.org"
|
|
6
6
|
# Add dependencies to develop your gem here.
|
7
7
|
# Include everything needed to run rake, tests, features, etc.
|
8
8
|
group :development, :test do
|
9
|
-
gem "jeweler", "~> 1.8.4"
|
10
9
|
gem "mocha", ">= 0"
|
11
10
|
gem "rdoc", "~> 3.12"
|
12
11
|
gem "shoulda", ">= 0"
|
13
|
-
|
12
|
+
gem "rake"
|
13
|
+
end
|
data/Gemfile.lock
CHANGED
@@ -4,13 +4,7 @@ GEM
|
|
4
4
|
activesupport (3.2.8)
|
5
5
|
i18n (~> 0.6)
|
6
6
|
multi_json (~> 1.0)
|
7
|
-
git (1.2.5)
|
8
7
|
i18n (0.6.1)
|
9
|
-
jeweler (1.8.4)
|
10
|
-
bundler (~> 1.0)
|
11
|
-
git (>= 1.2.5)
|
12
|
-
rake
|
13
|
-
rdoc
|
14
8
|
json (1.7.5)
|
15
9
|
metaclass (0.0.1)
|
16
10
|
mocha (0.12.7)
|
@@ -30,7 +24,7 @@ PLATFORMS
|
|
30
24
|
ruby
|
31
25
|
|
32
26
|
DEPENDENCIES
|
33
|
-
jeweler (~> 1.8.4)
|
34
27
|
mocha
|
28
|
+
rake
|
35
29
|
rdoc (~> 3.12)
|
36
30
|
shoulda
|
data/README.markdown
CHANGED
@@ -10,8 +10,6 @@ The [ISO 639-1](http://en.wikipedia.org/wiki/ISO_639-1) specification uses a two
|
|
10
10
|
|
11
11
|
The [ISO 639-2](http://www.loc.gov/standards/iso639-2/) ([Wikipedia](http://en.wikipedia.org/wiki/ISO_639-2)) specification uses a three-letter code, is used primarily in bibliography and terminology and covers many more languages than the ISO 639-1 specification.
|
12
12
|
|
13
|
-
The other ISO 639 standards are considered beyond the scope of this library.
|
14
|
-
|
15
13
|
## Usage
|
16
14
|
|
17
15
|
require 'iso-639'
|
@@ -27,6 +25,13 @@ To find a language entry:
|
|
27
25
|
# by French name
|
28
26
|
ISO_639.find_by_french_name("français")
|
29
27
|
|
28
|
+
The `ISO_639.search` class method searches across all fields and will
|
29
|
+
match names in cases where a record has multiple names. This method
|
30
|
+
always returns an array of 0 or more results. For example:
|
31
|
+
|
32
|
+
ISO_639.search("spanish")
|
33
|
+
# => [["spa", "", "es", "Spanish; Castilian", "espagnol; castillan"]]
|
34
|
+
|
30
35
|
Entries are arrays with convenience methods for accessing fields:
|
31
36
|
|
32
37
|
@entry = ISO_639.find("slo")
|
data/Rakefile
CHANGED
@@ -11,20 +11,6 @@ rescue Bundler::BundlerError => e
|
|
11
11
|
end
|
12
12
|
require 'rake'
|
13
13
|
|
14
|
-
require 'jeweler'
|
15
|
-
Jeweler::Tasks.new do |gem|
|
16
|
-
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
-
gem.name = "iso-639"
|
18
|
-
gem.homepage = "http://github.com/alphabetum/pandoc-ruby"
|
19
|
-
gem.license = "MIT"
|
20
|
-
gem.summary = %Q{ISO 639-1 and ISO 639-2 language code entries and convenience methods}
|
21
|
-
gem.description = %Q{ISO 639-1 and ISO 639-2 language code entries and convenience methods}
|
22
|
-
gem.email = "hi@williammelody.com"
|
23
|
-
gem.authors = ["William Melody"]
|
24
|
-
# dependencies defined in Gemfile
|
25
|
-
end
|
26
|
-
Jeweler::RubygemsDotOrgTasks.new
|
27
|
-
|
28
14
|
require 'rake/testtask'
|
29
15
|
Rake::TestTask.new(:test) do |test|
|
30
16
|
test.libs << 'lib' << 'test'
|
data/iso-639.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "iso-639"
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["William Melody"]
|
@@ -23,7 +23,6 @@ Gem::Specification.new do |s|
|
|
23
23
|
"LICENSE",
|
24
24
|
"README.markdown",
|
25
25
|
"Rakefile",
|
26
|
-
"VERSION",
|
27
26
|
"iso-639.gemspec",
|
28
27
|
"lib/iso-639.rb",
|
29
28
|
"test/helper.rb",
|
@@ -39,18 +38,15 @@ Gem::Specification.new do |s|
|
|
39
38
|
s.specification_version = 3
|
40
39
|
|
41
40
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
42
|
-
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
43
41
|
s.add_development_dependency(%q<mocha>, [">= 0"])
|
44
42
|
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
45
43
|
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
46
44
|
else
|
47
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
48
45
|
s.add_dependency(%q<mocha>, [">= 0"])
|
49
46
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
50
47
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
51
48
|
end
|
52
49
|
else
|
53
|
-
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
54
50
|
s.add_dependency(%q<mocha>, [">= 0"])
|
55
51
|
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
56
52
|
s.add_dependency(%q<shoulda>, [">= 0"])
|
data/lib/iso-639.rb
CHANGED
@@ -497,6 +497,28 @@ class ISO_639 < Array
|
|
497
497
|
self["zxx", "", "", "No linguistic content; Not applicable", "pas de contenu linguistique; non applicable"],
|
498
498
|
self["zza", "", "", "Zaza; Dimili; Dimli; Kirdki; Kirmanjki; Zazaki", "zaza; dimili; dimli; kirdki; kirmanjki; zazaki"]
|
499
499
|
]
|
500
|
+
|
501
|
+
# A reverse index generated from the ISO_639_2 data. Used for searching
|
502
|
+
# all words and codes in all fields.
|
503
|
+
REVERSE_INDEX = lambda {
|
504
|
+
index = {}
|
505
|
+
ISO_639_2.each_with_index do |record, i|
|
506
|
+
record.each do |field|
|
507
|
+
downcased = field.downcase
|
508
|
+
words = (
|
509
|
+
downcased.split(/[[:blank:]]|\(|\)|,|;/) +
|
510
|
+
downcased.split(/;/)
|
511
|
+
)
|
512
|
+
words.each do |word|
|
513
|
+
unless word.empty?
|
514
|
+
index[word] ||= []
|
515
|
+
index[word] << i
|
516
|
+
end
|
517
|
+
end
|
518
|
+
end
|
519
|
+
end
|
520
|
+
return index
|
521
|
+
}.call
|
500
522
|
|
501
523
|
# The ISO 639-1 dataset as an array of entries. Each entry is an array with
|
502
524
|
# the following format:
|
@@ -530,6 +552,15 @@ class ISO_639 < Array
|
|
530
552
|
def find_by_french_name(name)
|
531
553
|
ISO_639_2.detect { |entry| entry if entry.french_name == name }
|
532
554
|
end
|
555
|
+
|
556
|
+
# Returns an array of matches for the search term. The term can be a code
|
557
|
+
# of any kind, or it can be one of the words contained in the English or
|
558
|
+
# French name field.
|
559
|
+
def search(term)
|
560
|
+
normalized_term = term.downcase.strip
|
561
|
+
indexes = REVERSE_INDEX[normalized_term]
|
562
|
+
indexes ? ISO_639_2.values_at(*indexes).uniq : []
|
563
|
+
end
|
533
564
|
end
|
534
565
|
|
535
566
|
# The entry's alpha-3 bibliotigraphic code.
|
@@ -544,4 +575,4 @@ class ISO_639 < Array
|
|
544
575
|
# The entry's french name.
|
545
576
|
def french_name; self[4] end
|
546
577
|
|
547
|
-
end
|
578
|
+
end
|
data/test/test_ISO_639.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: UTF-8
|
1
2
|
require 'helper'
|
2
3
|
|
3
4
|
class TestISO639 < Test::Unit::TestCase
|
@@ -37,5 +38,42 @@ class TestISO639 < Test::Unit::TestCase
|
|
37
38
|
assert_equal ["eng", "eng", "", "en", "English", "anglais"][i], @entry.send(m)
|
38
39
|
end
|
39
40
|
end
|
41
|
+
|
42
|
+
should "return single record array by searching a unique code" do
|
43
|
+
assert_equal(
|
44
|
+
[["spa", "", "es", "Spanish; Castilian", "espagnol; castillan"]],
|
45
|
+
ISO_639.search("es")
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
should "return single record array by searching a unique term" do
|
50
|
+
assert_equal(
|
51
|
+
[["spa", "", "es", "Spanish; Castilian", "espagnol; castillan"]],
|
52
|
+
ISO_639.search("spanish")
|
53
|
+
)
|
54
|
+
end
|
55
|
+
|
56
|
+
should "return multiple record array by searching a common term" do
|
57
|
+
assert_equal(
|
58
|
+
[
|
59
|
+
["egy", "", "", "Egyptian (Ancient)", "égyptien"],
|
60
|
+
["grc", "", "", "Greek, Ancient (to 1453)", "grec ancien (jusqu'à 1453)"]
|
61
|
+
],
|
62
|
+
ISO_639.search("ancient")
|
63
|
+
)
|
64
|
+
end
|
65
|
+
|
66
|
+
should "return empty array when searching a non-existent term" do
|
67
|
+
assert_equal(
|
68
|
+
[], ISO_639.search("bad term")
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
should "return single record array by searching a unique multi-word term" do
|
73
|
+
assert_equal(
|
74
|
+
[["ypk", "", "", "Yupik languages", "yupik, langues"]],
|
75
|
+
ISO_639.search("yupik, langues")
|
76
|
+
)
|
77
|
+
end
|
40
78
|
|
41
79
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: iso-639
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,22 +11,6 @@ bindir: bin
|
|
11
11
|
cert_chain: []
|
12
12
|
date: 2012-11-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
|
-
- !ruby/object:Gem::Dependency
|
15
|
-
name: jeweler
|
16
|
-
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
|
-
requirements:
|
19
|
-
- - ~>
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: 1.8.4
|
22
|
-
type: :development
|
23
|
-
prerelease: false
|
24
|
-
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
|
-
requirements:
|
27
|
-
- - ~>
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
version: 1.8.4
|
30
14
|
- !ruby/object:Gem::Dependency
|
31
15
|
name: mocha
|
32
16
|
requirement: !ruby/object:Gem::Requirement
|
@@ -89,7 +73,6 @@ files:
|
|
89
73
|
- LICENSE
|
90
74
|
- README.markdown
|
91
75
|
- Rakefile
|
92
|
-
- VERSION
|
93
76
|
- iso-639.gemspec
|
94
77
|
- lib/iso-639.rb
|
95
78
|
- test/helper.rb
|
@@ -107,9 +90,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
107
90
|
- - ! '>='
|
108
91
|
- !ruby/object:Gem::Version
|
109
92
|
version: '0'
|
110
|
-
segments:
|
111
|
-
- 0
|
112
|
-
hash: 2442761226781882380
|
113
93
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
114
94
|
none: false
|
115
95
|
requirements:
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.1.0
|