unclekryon 0.4.9.pre.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +34 -0
- data/Gemfile.lock +43 -0
- data/LICENSE +674 -0
- data/README.md +55 -0
- data/Rakefile +59 -0
- data/bin/unclekryon +30 -0
- data/iso/can_provs_terrs.yaml +54 -0
- data/iso/countries.yaml +3050 -0
- data/iso/iso.yaml +8 -0
- data/iso/languages.yaml +5641 -0
- data/iso/regions.yaml +42 -0
- data/iso/subregions.yaml +6 -0
- data/iso/usa_states.yaml +230 -0
- data/lib/unclekryon.rb +384 -0
- data/lib/unclekryon/data/album_data.rb +147 -0
- data/lib/unclekryon/data/artist_data.rb +109 -0
- data/lib/unclekryon/data/artist_data_data.rb +146 -0
- data/lib/unclekryon/data/aum_data.rb +75 -0
- data/lib/unclekryon/data/base_data.rb +79 -0
- data/lib/unclekryon/data/pic_data.rb +76 -0
- data/lib/unclekryon/data/release_data.rb +57 -0
- data/lib/unclekryon/data/social_data.rb +39 -0
- data/lib/unclekryon/data/timespan_data.rb +70 -0
- data/lib/unclekryon/dev_opts.rb +41 -0
- data/lib/unclekryon/hacker.rb +327 -0
- data/lib/unclekryon/iso.rb +341 -0
- data/lib/unclekryon/iso/base_iso.rb +196 -0
- data/lib/unclekryon/iso/can_prov_terr.rb +113 -0
- data/lib/unclekryon/iso/country.rb +133 -0
- data/lib/unclekryon/iso/language.rb +241 -0
- data/lib/unclekryon/iso/region.rb +53 -0
- data/lib/unclekryon/iso/subregion.rb +53 -0
- data/lib/unclekryon/iso/usa_state.rb +106 -0
- data/lib/unclekryon/jsoner.rb +124 -0
- data/lib/unclekryon/log.rb +111 -0
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +499 -0
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +413 -0
- data/lib/unclekryon/server.rb +29 -0
- data/lib/unclekryon/trainer.rb +231 -0
- data/lib/unclekryon/uploader.rb +29 -0
- data/lib/unclekryon/util.rb +228 -0
- data/lib/unclekryon/version.rb +26 -0
- data/unclekryon.gemspec +67 -0
- metadata +189 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
#--
|
|
6
|
+
# This file is part of UncleKryon-server.
|
|
7
|
+
# Copyright (c) 2018-2019 Jonathan Bradley Whited (@esotericpig)
|
|
8
|
+
#
|
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
12
|
+
# (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17
|
+
# GNU General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the GNU General Public License
|
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
|
21
|
+
#++
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
require 'bundler/setup'
|
|
25
|
+
|
|
26
|
+
require 'nokogiri'
|
|
27
|
+
require 'open-uri'
|
|
28
|
+
|
|
29
|
+
require 'unclekryon/iso/base_iso'
|
|
30
|
+
|
|
31
|
+
##
|
|
32
|
+
# @see https://en.wikipedia.org/wiki/ISO_3166-2:CA
|
|
33
|
+
# @see https://www.iso.org/obp/ui/#iso:code:3166:CA
|
|
34
|
+
##
|
|
35
|
+
module UncleKryon
|
|
36
|
+
class CanProvTerr < BaseIso
|
|
37
|
+
def initialize(row=nil)
|
|
38
|
+
super()
|
|
39
|
+
|
|
40
|
+
if row.is_a?(Array)
|
|
41
|
+
@name = self.class.simplify_name(row[2])
|
|
42
|
+
@code = self.class.simplify_code(row[1])
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
class CanProvsTerrs < BaseIsos
|
|
48
|
+
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/can_provs_terrs.yaml"
|
|
49
|
+
|
|
50
|
+
def initialize()
|
|
51
|
+
super()
|
|
52
|
+
|
|
53
|
+
@id = 'CAN Provinces & Territories'
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.load_file(filepath=DEFAULT_FILEPATH)
|
|
57
|
+
return CanProvsTerrs.new().load_file(filepath)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML into local file
|
|
61
|
+
# @param save_filepath [String] local file to save YAML to
|
|
62
|
+
# @see https://www.iso.org/obp/ui/#iso:code:3166:CA
|
|
63
|
+
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
|
64
|
+
doc = Nokogiri::HTML(open(parse_filepath),nil,'utf-8')
|
|
65
|
+
trs = doc.css('tr')
|
|
66
|
+
|
|
67
|
+
provs_terrs = CanProvsTerrs.new()
|
|
68
|
+
|
|
69
|
+
trs.each() do |tr|
|
|
70
|
+
tds = tr.css('td')
|
|
71
|
+
|
|
72
|
+
# Skip French; we just want English
|
|
73
|
+
next if tds[4].content.gsub(/[[:space:]]+/,' ').strip().downcase() == 'fr'
|
|
74
|
+
|
|
75
|
+
i = 0
|
|
76
|
+
tr = []
|
|
77
|
+
|
|
78
|
+
tds.each() do |td|
|
|
79
|
+
c = td.content
|
|
80
|
+
c.gsub!(/[[:space:]]+/,' ')
|
|
81
|
+
c.strip!()
|
|
82
|
+
tr.push(c)
|
|
83
|
+
|
|
84
|
+
if (i += 1) >= 7
|
|
85
|
+
#puts tr.inspect()
|
|
86
|
+
prov_terr = CanProvTerr.new(tr)
|
|
87
|
+
raise "CAN prov/terr already exists: #{prov_terr.inspect()}" if provs_terrs.key?(prov_terr.code)
|
|
88
|
+
|
|
89
|
+
provs_terrs.values.each_value() do |v|
|
|
90
|
+
puts "Duplicate CAN prov/terr names: #{v.name}" if v.name == prov_terr.name
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
provs_terrs[prov_terr.code] = prov_terr
|
|
94
|
+
tr.clear()
|
|
95
|
+
i = 0
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
provs_terrs.sort_keys!()
|
|
101
|
+
provs_terrs.save_to_file(save_filepath)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
if $0 == __FILE__
|
|
107
|
+
if ARGV.length < 1
|
|
108
|
+
puts UncleKryon::CanProvsTerrs.load_file().to_s()
|
|
109
|
+
else
|
|
110
|
+
UncleKryon::CanProvsTerrs.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
|
111
|
+
UncleKryon::CanProvsTerrs::DEFAULT_FILEPATH)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
#--
|
|
6
|
+
# This file is part of UncleKryon-server.
|
|
7
|
+
# Copyright (c) 2018-2019 Jonathan Bradley Whited (@esotericpig)
|
|
8
|
+
#
|
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
12
|
+
# (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17
|
+
# GNU General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the GNU General Public License
|
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
|
21
|
+
#++
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
require 'bundler/setup'
|
|
25
|
+
|
|
26
|
+
require 'nokogiri'
|
|
27
|
+
require 'open-uri'
|
|
28
|
+
require 'yaml'
|
|
29
|
+
|
|
30
|
+
require 'unclekryon/iso/base_iso'
|
|
31
|
+
|
|
32
|
+
##
|
|
33
|
+
# @see https://en.wikipedia.org/wiki/ISO_3166
|
|
34
|
+
# @see https://en.wikipedia.org/wiki/ISO_3166-1
|
|
35
|
+
# @see https://en.wikipedia.org/wiki/ISO_3166-2:GB
|
|
36
|
+
# @see https://www.iso.org/obp/ui/#search/code/
|
|
37
|
+
##
|
|
38
|
+
module UncleKryon
|
|
39
|
+
class Country < BaseIso
|
|
40
|
+
attr_reader :names
|
|
41
|
+
attr_reader :codes
|
|
42
|
+
attr_reader :alpha2_code
|
|
43
|
+
attr_reader :alpha3_code
|
|
44
|
+
attr_reader :region
|
|
45
|
+
|
|
46
|
+
def initialize(row=nil)
|
|
47
|
+
super()
|
|
48
|
+
|
|
49
|
+
@names = nil
|
|
50
|
+
@codes = nil
|
|
51
|
+
@alpha2_code = nil
|
|
52
|
+
@alpha3_code = nil
|
|
53
|
+
@region = nil
|
|
54
|
+
|
|
55
|
+
if row.is_a?(Array)
|
|
56
|
+
@name = self.class.simplify_name(row[0])
|
|
57
|
+
@alpha2_code = row[2]
|
|
58
|
+
@alpha3_code = row[3]
|
|
59
|
+
|
|
60
|
+
@names = @name
|
|
61
|
+
@code = @alpha3_code
|
|
62
|
+
@codes = [@alpha3_code,@alpha2_code].compact().uniq()
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def to_s()
|
|
67
|
+
s = '['.dup()
|
|
68
|
+
s << %Q("#{@name}","#{@names.join(';')}")
|
|
69
|
+
s << %Q(,#{@code},"#{@codes.join(';')}",#{@alpha2_code},#{@alpha3_code})
|
|
70
|
+
s << %Q(,#{@region})
|
|
71
|
+
s << ']'
|
|
72
|
+
|
|
73
|
+
return s
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
class Countries < BaseIsos
|
|
78
|
+
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/countries.yaml"
|
|
79
|
+
|
|
80
|
+
def initialize()
|
|
81
|
+
super()
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def self.load_file(filepath=DEFAULT_FILEPATH)
|
|
85
|
+
return Countries.new().load_file(filepath)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML into local file
|
|
89
|
+
# @param save_filepath [String] local file to save YAML to
|
|
90
|
+
# @see https://www.iso.org/obp/ui/#search/code/
|
|
91
|
+
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
|
92
|
+
doc = Nokogiri::HTML(open(parse_filepath),nil,'utf-8')
|
|
93
|
+
tds = doc.css('td')
|
|
94
|
+
|
|
95
|
+
countries = Countries.new()
|
|
96
|
+
i = 0
|
|
97
|
+
tr = []
|
|
98
|
+
|
|
99
|
+
tds.each do |td|
|
|
100
|
+
c = td.content
|
|
101
|
+
c.gsub!(/[[:space:]]+/,' ')
|
|
102
|
+
c.strip!()
|
|
103
|
+
tr.push(c)
|
|
104
|
+
|
|
105
|
+
if (i += 1) >= 5
|
|
106
|
+
#puts tr.inspect()
|
|
107
|
+
country = Country.new(tr)
|
|
108
|
+
raise "Country already exists: #{country.inspect()}" if countries.key?(country.code)
|
|
109
|
+
|
|
110
|
+
countries.values.each_value() do |v|
|
|
111
|
+
puts "Duplicate country names: #{v.name}" if v.name == country.name
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
countries[country.code] = country
|
|
115
|
+
tr.clear()
|
|
116
|
+
i = 0
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
countries.sort_keys!()
|
|
121
|
+
countries.save_to_file(save_filepath)
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
if $0 == __FILE__
|
|
127
|
+
if ARGV.length < 1
|
|
128
|
+
puts UncleKryon::Countries.load_file().to_s()
|
|
129
|
+
else
|
|
130
|
+
UncleKryon::Countries.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
|
131
|
+
UncleKryon::Countries::DEFAULT_FILEPATH)
|
|
132
|
+
end
|
|
133
|
+
end
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
#--
|
|
6
|
+
# This file is part of UncleKryon-server.
|
|
7
|
+
# Copyright (c) 2018-2019 Jonathan Bradley Whited (@esotericpig)
|
|
8
|
+
#
|
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
12
|
+
# (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17
|
+
# GNU General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the GNU General Public License
|
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
|
21
|
+
#++
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
require 'bundler/setup'
|
|
25
|
+
|
|
26
|
+
require 'nokogiri'
|
|
27
|
+
require 'open-uri'
|
|
28
|
+
require 'yaml'
|
|
29
|
+
|
|
30
|
+
require 'unclekryon/dev_opts'
|
|
31
|
+
require 'unclekryon/log'
|
|
32
|
+
|
|
33
|
+
require 'unclekryon/iso/base_iso'
|
|
34
|
+
|
|
35
|
+
##
|
|
36
|
+
# @see https://en.wikipedia.org/wiki/ISO_639
|
|
37
|
+
# @see http://www.loc.gov/standards/iso639-2/php/code_list.php
|
|
38
|
+
# @see http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
|
|
39
|
+
##
|
|
40
|
+
module UncleKryon
|
|
41
|
+
class Language < BaseIso
|
|
42
|
+
attr_reader :names
|
|
43
|
+
attr_reader :codes
|
|
44
|
+
attr_reader :alpha2_code
|
|
45
|
+
attr_reader :alpha3_code
|
|
46
|
+
attr_reader :alpha3_code_b
|
|
47
|
+
|
|
48
|
+
def initialize(row=nil)
|
|
49
|
+
super()
|
|
50
|
+
|
|
51
|
+
@names = nil
|
|
52
|
+
@codes = nil
|
|
53
|
+
@alpha2_code = nil
|
|
54
|
+
@alpha3_code = nil
|
|
55
|
+
@alpha3_code_b = nil
|
|
56
|
+
|
|
57
|
+
if row.is_a?(Array)
|
|
58
|
+
@names = row[2].split(';').compact().uniq().map(&self.class.method(:fix_name))
|
|
59
|
+
@alpha2_code = row[1].empty?() ? nil : row[1]
|
|
60
|
+
@alpha3_code = row[0].split(/[[:space:]]*[\(\)][[:space:]]*/)
|
|
61
|
+
|
|
62
|
+
if @alpha3_code.length <= 1
|
|
63
|
+
@alpha3_code = row[0]
|
|
64
|
+
else
|
|
65
|
+
prev_was_tag = true
|
|
66
|
+
|
|
67
|
+
@alpha3_code.each_with_index() do |c,i|
|
|
68
|
+
c.strip!()
|
|
69
|
+
c_up = c.upcase()
|
|
70
|
+
|
|
71
|
+
if c_up == 'B' || c_up == 'T'
|
|
72
|
+
if prev_was_tag
|
|
73
|
+
raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
case c_up
|
|
77
|
+
when 'B'
|
|
78
|
+
raise "Multiple alpha3_code_b: #{@alpha3_code}" unless @alpha3_code_b.nil?()
|
|
79
|
+
@alpha3_code_b = @alpha3_code[i - 1]
|
|
80
|
+
when 'T'
|
|
81
|
+
raise "Multiple alpha3_code (T): #{@alpha3_code}" unless @alpha3_code.is_a?(Array)
|
|
82
|
+
@alpha3_code = @alpha3_code[i - 1]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
prev_was_tag = true
|
|
86
|
+
else
|
|
87
|
+
prev_was_tag = false
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Wasn't set in the above loop?
|
|
92
|
+
if @alpha3_code.is_a?(Array)
|
|
93
|
+
raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
@name = @names[0]
|
|
98
|
+
@names = @names
|
|
99
|
+
@code = @alpha3_code
|
|
100
|
+
@codes = [@alpha3_code,@alpha3_code_b,@alpha2_code].compact().uniq()
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# @see Languages.parse_and_save_to_file(...)
|
|
105
|
+
def ==(lang)
|
|
106
|
+
return super(lang) &&
|
|
107
|
+
@names == lang.names &&
|
|
108
|
+
@codes == lang.codes &&
|
|
109
|
+
@alpha2_code == lang.alpha2_code &&
|
|
110
|
+
@alpha3_code == lang.alpha3_code &&
|
|
111
|
+
@alpha3_code_b == lang.alpha3_code_b
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def to_s()
|
|
115
|
+
s = '['.dup()
|
|
116
|
+
s << %Q("#{@name}","#{@names.join(';')}",)
|
|
117
|
+
s << %Q(#{@code},"#{@codes.join(';')}",)
|
|
118
|
+
s << %Q(#{@alpha2_code},#{@alpha3_code},#{@alpha3_code_b})
|
|
119
|
+
s << ']'
|
|
120
|
+
|
|
121
|
+
return s
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
class Languages < BaseIsos
|
|
126
|
+
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/languages.yaml"
|
|
127
|
+
|
|
128
|
+
def initialize()
|
|
129
|
+
super()
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def find_by_kryon(text,add_english: false,**options)
|
|
133
|
+
langs = []
|
|
134
|
+
regexes = [
|
|
135
|
+
/[[:space:]]*[\/\+][[:space:]]*/, # Multiple languages are usually separated by '/'
|
|
136
|
+
/[[:space:]]+/ # Sometimes separated by space/newline
|
|
137
|
+
]
|
|
138
|
+
|
|
139
|
+
regexes.each_with_index() do |regex,i|
|
|
140
|
+
text.split(regex).each() do |t|
|
|
141
|
+
# Fix misspellings and/or weird shortenings
|
|
142
|
+
t = t.clone()
|
|
143
|
+
t.gsub!(/\AFRENC\z/i,'French')
|
|
144
|
+
t.gsub!(/[\+\*]+/,'') # Means more languages, but won't worry about it (since not listed)
|
|
145
|
+
t.gsub!(/\ASPAN\z/i,'Spanish')
|
|
146
|
+
t.gsub!(/\AENGLSH\z/i,'English')
|
|
147
|
+
t.gsub!(/\AHUNGARY\z/i,'Hungarian')
|
|
148
|
+
|
|
149
|
+
lang = find(t)
|
|
150
|
+
|
|
151
|
+
if lang.nil?()
|
|
152
|
+
if i >= (regexes.length() - 1)
|
|
153
|
+
msg = "No language found for: #{t}"
|
|
154
|
+
|
|
155
|
+
if DevOpts.instance.dev?()
|
|
156
|
+
raise msg
|
|
157
|
+
else
|
|
158
|
+
log.warn(msg)
|
|
159
|
+
end
|
|
160
|
+
else
|
|
161
|
+
log.warn("Not a language; trying next regex: #{t}")
|
|
162
|
+
|
|
163
|
+
# Try next regex
|
|
164
|
+
langs.clear()
|
|
165
|
+
next
|
|
166
|
+
end
|
|
167
|
+
else
|
|
168
|
+
langs.push(lang.code)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# No problem with this regex, so bail out
|
|
173
|
+
break
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
eng_code = find_by_code('eng').code
|
|
177
|
+
|
|
178
|
+
if add_english && !langs.include?(eng_code)
|
|
179
|
+
langs.push(eng_code)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
return langs.empty?() ? nil : langs
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def self.load_file(filepath=DEFAULT_FILEPATH)
|
|
186
|
+
return Languages.new().load_file(filepath)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML into local file
|
|
190
|
+
# @param save_filepath [String] local file to save YAML to
|
|
191
|
+
# @see http://www.loc.gov/standards/iso639-2/php/code_list.php
|
|
192
|
+
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
|
193
|
+
doc = Nokogiri::HTML(open(parse_filepath),nil,'utf-8')
|
|
194
|
+
tds = doc.css('td')
|
|
195
|
+
|
|
196
|
+
langs = Languages.new()
|
|
197
|
+
i = 0
|
|
198
|
+
tr = []
|
|
199
|
+
|
|
200
|
+
tds.each do |td|
|
|
201
|
+
c = td.content
|
|
202
|
+
c.gsub!(/[[:space:]]+/,' ')
|
|
203
|
+
c.strip!()
|
|
204
|
+
tr.push(c)
|
|
205
|
+
|
|
206
|
+
if (i += 1) >= 5
|
|
207
|
+
#puts tr.inspect()
|
|
208
|
+
|
|
209
|
+
add_it = true
|
|
210
|
+
lang = Language.new(tr)
|
|
211
|
+
|
|
212
|
+
if langs.key?(lang.code)
|
|
213
|
+
# There were so many duplicates, so added comparison check
|
|
214
|
+
raise "Language already exists: #{lang.inspect()}" if lang != langs[lang.code]
|
|
215
|
+
add_it = false
|
|
216
|
+
else
|
|
217
|
+
langs.values.each_value() do |v|
|
|
218
|
+
puts "Duplicate lang names: #{v.name}" if v.name == lang.name
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
langs[lang.code] = lang if add_it
|
|
223
|
+
tr.clear()
|
|
224
|
+
i = 0
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
langs.sort_keys!()
|
|
229
|
+
langs.save_to_file(save_filepath)
|
|
230
|
+
end
|
|
231
|
+
end
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
if $0 == __FILE__
|
|
235
|
+
if ARGV.length < 1
|
|
236
|
+
puts UncleKryon::Languages.load_file().to_s()
|
|
237
|
+
else
|
|
238
|
+
UncleKryon::Languages.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
|
239
|
+
UncleKryon::Languages::DEFAULT_FILEPATH)
|
|
240
|
+
end
|
|
241
|
+
end
|