unclekryon 0.4.10 → 0.4.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +6 -19
- data/Gemfile.lock +19 -23
- data/README.md +3 -2
- data/Rakefile +11 -30
- data/bin/unclekryon +4 -15
- data/hax/kryon.yaml +28 -0
- data/hax/kryon_aums_2002-2005.yaml +460 -0
- data/hax/kryon_aums_2006.yaml +601 -0
- data/hax/kryon_aums_2007.yaml +1024 -0
- data/hax/kryon_aums_2008.yaml +950 -0
- data/hax/kryon_aums_2009.yaml +496 -0
- data/hax/kryon_aums_2010.yaml +1443 -0
- data/hax/kryon_aums_2011.yaml +1458 -0
- data/hax/kryon_aums_2012.yaml +2123 -0
- data/hax/kryon_aums_2013.yaml +1647 -0
- data/hax/kryon_aums_2014.yaml +2478 -0
- data/hax/kryon_aums_2015.yaml +3386 -0
- data/hax/kryon_aums_2016.yaml +3476 -0
- data/hax/kryon_aums_2017.yaml +3712 -0
- data/hax/kryon_aums_2018.yaml +3654 -0
- data/lib/unclekryon.rb +165 -165
- data/lib/unclekryon/data/album_data.rb +74 -82
- data/lib/unclekryon/data/artist_data.rb +24 -36
- data/lib/unclekryon/data/artist_data_data.rb +29 -41
- data/lib/unclekryon/data/aum_data.rb +20 -32
- data/lib/unclekryon/data/base_data.rb +27 -39
- data/lib/unclekryon/data/pic_data.rb +25 -37
- data/lib/unclekryon/data/release_data.rb +14 -26
- data/lib/unclekryon/data/social_data.rb +6 -18
- data/lib/unclekryon/data/timespan_data.rb +16 -28
- data/lib/unclekryon/dev_opts.rb +7 -19
- data/lib/unclekryon/hacker.rb +119 -133
- data/lib/unclekryon/iso.rb +128 -138
- data/lib/unclekryon/iso/base_iso.rb +69 -81
- data/lib/unclekryon/iso/can_prov_terr.rb +34 -47
- data/lib/unclekryon/iso/country.rb +36 -49
- data/lib/unclekryon/iso/language.rb +86 -96
- data/lib/unclekryon/iso/region.rb +11 -25
- data/lib/unclekryon/iso/subregion.rb +11 -25
- data/lib/unclekryon/iso/usa_state.rb +28 -41
- data/lib/unclekryon/jsoner.rb +31 -50
- data/lib/unclekryon/log.rb +34 -46
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +163 -167
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +122 -127
- data/lib/unclekryon/server.rb +8 -17
- data/lib/unclekryon/trainer.rb +69 -83
- data/lib/unclekryon/uploader.rb +8 -17
- data/lib/unclekryon/util.rb +80 -92
- data/lib/unclekryon/version.rb +4 -16
- data/train/kryon.yaml +6077 -0
- data/unclekryon.gemspec +44 -42
- metadata +59 -16
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2018-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2018-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'nokogiri'
|
27
13
|
require 'open-uri'
|
28
14
|
|
@@ -36,76 +22,77 @@ module UncleKryon
|
|
36
22
|
class CanProvTerr < BaseIso
|
37
23
|
def initialize(row=nil)
|
38
24
|
super()
|
39
|
-
|
25
|
+
|
40
26
|
if row.is_a?(Array)
|
41
27
|
@name = self.class.simplify_name(row[2])
|
42
28
|
@code = self.class.simplify_code(row[1])
|
43
29
|
end
|
44
30
|
end
|
45
31
|
end
|
46
|
-
|
32
|
+
|
47
33
|
class CanProvsTerrs < BaseIsos
|
48
34
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/can_provs_terrs.yaml"
|
49
|
-
|
50
|
-
def initialize
|
35
|
+
|
36
|
+
def initialize
|
51
37
|
super()
|
52
|
-
|
38
|
+
|
53
39
|
@id = 'CAN Provinces & Territories'
|
54
40
|
end
|
55
|
-
|
41
|
+
|
56
42
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
57
|
-
return CanProvsTerrs.new
|
43
|
+
return CanProvsTerrs.new.load_file(filepath)
|
58
44
|
end
|
59
|
-
|
60
|
-
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
45
|
+
|
46
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
47
|
+
# into local file
|
61
48
|
# @param save_filepath [String] local file to save YAML to
|
62
49
|
# @see https://www.iso.org/obp/ui/#iso:code:3166:CA
|
63
50
|
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
64
|
-
doc = Nokogiri::HTML(
|
51
|
+
doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8')
|
65
52
|
trs = doc.css('tr')
|
66
|
-
|
67
|
-
provs_terrs = CanProvsTerrs.new
|
68
|
-
|
69
|
-
trs.each
|
53
|
+
|
54
|
+
provs_terrs = CanProvsTerrs.new
|
55
|
+
|
56
|
+
trs.each do |tr|
|
70
57
|
tds = tr.css('td')
|
71
|
-
|
58
|
+
|
72
59
|
# Skip French; we just want English
|
73
|
-
next if tds[4].content.gsub(/[[:space:]]+/,' ').strip
|
74
|
-
|
60
|
+
next if tds[4].content.gsub(/[[:space:]]+/,' ').strip.downcase == 'fr'
|
61
|
+
|
75
62
|
i = 0
|
76
63
|
tr = []
|
77
|
-
|
78
|
-
tds.each
|
64
|
+
|
65
|
+
tds.each do |td|
|
79
66
|
c = td.content
|
80
67
|
c.gsub!(/[[:space:]]+/,' ')
|
81
|
-
c.strip!
|
68
|
+
c.strip!
|
82
69
|
tr.push(c)
|
83
|
-
|
70
|
+
|
84
71
|
if (i += 1) >= 7
|
85
72
|
#puts tr.inspect()
|
86
73
|
prov_terr = CanProvTerr.new(tr)
|
87
|
-
raise "CAN prov/terr already exists: #{prov_terr.inspect
|
88
|
-
|
89
|
-
provs_terrs.values.each_value
|
74
|
+
raise "CAN prov/terr already exists: #{prov_terr.inspect}" if provs_terrs.key?(prov_terr.code)
|
75
|
+
|
76
|
+
provs_terrs.values.each_value do |v|
|
90
77
|
puts "Duplicate CAN prov/terr names: #{v.name}" if v.name == prov_terr.name
|
91
78
|
end
|
92
|
-
|
79
|
+
|
93
80
|
provs_terrs[prov_terr.code] = prov_terr
|
94
|
-
tr.clear
|
81
|
+
tr.clear
|
95
82
|
i = 0
|
96
83
|
end
|
97
84
|
end
|
98
85
|
end
|
99
|
-
|
100
|
-
provs_terrs.sort_keys!
|
86
|
+
|
87
|
+
provs_terrs.sort_keys!
|
101
88
|
provs_terrs.save_to_file(save_filepath)
|
102
89
|
end
|
103
90
|
end
|
104
91
|
end
|
105
92
|
|
106
|
-
if $
|
93
|
+
if $PROGRAM_NAME == __FILE__
|
107
94
|
if ARGV.length < 1
|
108
|
-
puts UncleKryon::CanProvsTerrs.load_file
|
95
|
+
puts UncleKryon::CanProvsTerrs.load_file.to_s
|
109
96
|
else
|
110
97
|
UncleKryon::CanProvsTerrs.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
111
98
|
UncleKryon::CanProvsTerrs::DEFAULT_FILEPATH)
|
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2018-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2018-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'nokogiri'
|
27
13
|
require 'open-uri'
|
28
14
|
require 'yaml'
|
@@ -42,90 +28,91 @@ module UncleKryon
|
|
42
28
|
attr_reader :alpha2_code
|
43
29
|
attr_reader :alpha3_code
|
44
30
|
attr_reader :region
|
45
|
-
|
31
|
+
|
46
32
|
def initialize(row=nil)
|
47
33
|
super()
|
48
|
-
|
34
|
+
|
49
35
|
@names = nil
|
50
36
|
@codes = nil
|
51
37
|
@alpha2_code = nil
|
52
38
|
@alpha3_code = nil
|
53
39
|
@region = nil
|
54
|
-
|
40
|
+
|
55
41
|
if row.is_a?(Array)
|
56
42
|
@name = self.class.simplify_name(row[0])
|
57
43
|
@alpha2_code = row[2]
|
58
44
|
@alpha3_code = row[3]
|
59
|
-
|
45
|
+
|
60
46
|
@names = @name
|
61
47
|
@code = @alpha3_code
|
62
|
-
@codes = [@alpha3_code,@alpha2_code].compact
|
48
|
+
@codes = [@alpha3_code,@alpha2_code].compact.uniq
|
63
49
|
end
|
64
50
|
end
|
65
|
-
|
66
|
-
def to_s
|
67
|
-
s = '['.dup
|
51
|
+
|
52
|
+
def to_s
|
53
|
+
s = '['.dup
|
68
54
|
s << %Q("#{@name}","#{@names.join(';')}")
|
69
55
|
s << %Q(,#{@code},"#{@codes.join(';')}",#{@alpha2_code},#{@alpha3_code})
|
70
|
-
s <<
|
56
|
+
s << ",#{@region}"
|
71
57
|
s << ']'
|
72
|
-
|
58
|
+
|
73
59
|
return s
|
74
60
|
end
|
75
61
|
end
|
76
|
-
|
62
|
+
|
77
63
|
class Countries < BaseIsos
|
78
64
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/countries.yaml"
|
79
|
-
|
80
|
-
def initialize
|
65
|
+
|
66
|
+
def initialize
|
81
67
|
super()
|
82
68
|
end
|
83
|
-
|
69
|
+
|
84
70
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
85
|
-
return Countries.new
|
71
|
+
return Countries.new.load_file(filepath)
|
86
72
|
end
|
87
|
-
|
88
|
-
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
73
|
+
|
74
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
75
|
+
# into local file
|
89
76
|
# @param save_filepath [String] local file to save YAML to
|
90
77
|
# @see https://www.iso.org/obp/ui/#search/code/
|
91
78
|
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
92
|
-
doc = Nokogiri::HTML(
|
79
|
+
doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8')
|
93
80
|
tds = doc.css('td')
|
94
|
-
|
95
|
-
countries = Countries.new
|
81
|
+
|
82
|
+
countries = Countries.new
|
96
83
|
i = 0
|
97
84
|
tr = []
|
98
|
-
|
85
|
+
|
99
86
|
tds.each do |td|
|
100
87
|
c = td.content
|
101
88
|
c.gsub!(/[[:space:]]+/,' ')
|
102
|
-
c.strip!
|
89
|
+
c.strip!
|
103
90
|
tr.push(c)
|
104
|
-
|
91
|
+
|
105
92
|
if (i += 1) >= 5
|
106
93
|
#puts tr.inspect()
|
107
94
|
country = Country.new(tr)
|
108
|
-
raise "Country already exists: #{country.inspect
|
109
|
-
|
110
|
-
countries.values.each_value
|
95
|
+
raise "Country already exists: #{country.inspect}" if countries.key?(country.code)
|
96
|
+
|
97
|
+
countries.values.each_value do |v|
|
111
98
|
puts "Duplicate country names: #{v.name}" if v.name == country.name
|
112
99
|
end
|
113
|
-
|
100
|
+
|
114
101
|
countries[country.code] = country
|
115
|
-
tr.clear
|
102
|
+
tr.clear
|
116
103
|
i = 0
|
117
104
|
end
|
118
105
|
end
|
119
|
-
|
120
|
-
countries.sort_keys!
|
106
|
+
|
107
|
+
countries.sort_keys!
|
121
108
|
countries.save_to_file(save_filepath)
|
122
109
|
end
|
123
110
|
end
|
124
111
|
end
|
125
112
|
|
126
|
-
if $
|
113
|
+
if $PROGRAM_NAME == __FILE__
|
127
114
|
if ARGV.length < 1
|
128
|
-
puts UncleKryon::Countries.load_file
|
115
|
+
puts UncleKryon::Countries.load_file.to_s
|
129
116
|
else
|
130
117
|
UncleKryon::Countries.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
131
118
|
UncleKryon::Countries::DEFAULT_FILEPATH)
|
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2018-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2018-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'nokogiri'
|
27
13
|
require 'open-uri'
|
28
14
|
require 'yaml'
|
@@ -44,196 +30,200 @@ module UncleKryon
|
|
44
30
|
attr_reader :alpha2_code
|
45
31
|
attr_reader :alpha3_code
|
46
32
|
attr_reader :alpha3_code_b
|
47
|
-
|
33
|
+
|
48
34
|
def initialize(row=nil)
|
49
35
|
super()
|
50
|
-
|
36
|
+
|
51
37
|
@names = nil
|
52
38
|
@codes = nil
|
53
39
|
@alpha2_code = nil
|
54
40
|
@alpha3_code = nil
|
55
41
|
@alpha3_code_b = nil
|
56
|
-
|
42
|
+
|
57
43
|
if row.is_a?(Array)
|
58
|
-
@names = row[2].split(';').compact
|
59
|
-
@alpha2_code = row[1].empty?
|
44
|
+
@names = row[2].split(';').compact.uniq.map(&self.class.method(:fix_name))
|
45
|
+
@alpha2_code = row[1].empty? ? nil : row[1]
|
60
46
|
@alpha3_code = row[0].split(/[[:space:]]*[\(\)][[:space:]]*/)
|
61
|
-
|
47
|
+
|
62
48
|
if @alpha3_code.length <= 1
|
63
49
|
@alpha3_code = row[0]
|
64
50
|
else
|
65
51
|
prev_was_tag = true
|
66
|
-
|
67
|
-
@alpha3_code.each_with_index
|
68
|
-
c.strip!
|
69
|
-
c_up = c.upcase
|
70
|
-
|
52
|
+
|
53
|
+
@alpha3_code.each_with_index do |c,i|
|
54
|
+
c.strip!
|
55
|
+
c_up = c.upcase
|
56
|
+
|
71
57
|
if c_up == 'B' || c_up == 'T'
|
72
58
|
if prev_was_tag
|
73
59
|
raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
|
74
60
|
end
|
75
|
-
|
61
|
+
|
76
62
|
case c_up
|
77
63
|
when 'B'
|
78
|
-
raise "Multiple alpha3_code_b: #{@alpha3_code}" unless @alpha3_code_b.nil?
|
64
|
+
raise "Multiple alpha3_code_b: #{@alpha3_code}" unless @alpha3_code_b.nil?
|
79
65
|
@alpha3_code_b = @alpha3_code[i - 1]
|
80
66
|
when 'T'
|
81
67
|
raise "Multiple alpha3_code (T): #{@alpha3_code}" unless @alpha3_code.is_a?(Array)
|
82
68
|
@alpha3_code = @alpha3_code[i - 1]
|
83
69
|
end
|
84
|
-
|
70
|
+
|
85
71
|
prev_was_tag = true
|
86
72
|
else
|
87
73
|
prev_was_tag = false
|
88
74
|
end
|
89
75
|
end
|
90
|
-
|
76
|
+
|
91
77
|
# Wasn't set in the above loop?
|
92
78
|
if @alpha3_code.is_a?(Array)
|
93
79
|
raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
|
94
80
|
end
|
95
81
|
end
|
96
|
-
|
82
|
+
|
97
83
|
@name = @names[0]
|
98
|
-
|
84
|
+
#@names = @names
|
99
85
|
@code = @alpha3_code
|
100
|
-
@codes = [@alpha3_code,@alpha3_code_b,@alpha2_code].compact
|
86
|
+
@codes = [@alpha3_code,@alpha3_code_b,@alpha2_code].compact.uniq
|
101
87
|
end
|
102
88
|
end
|
103
|
-
|
89
|
+
|
104
90
|
# @see Languages.parse_and_save_to_file(...)
|
105
|
-
def ==(
|
106
|
-
return super(
|
107
|
-
@names ==
|
108
|
-
@codes ==
|
109
|
-
@alpha2_code ==
|
110
|
-
@alpha3_code ==
|
111
|
-
@alpha3_code_b ==
|
91
|
+
def ==(other)
|
92
|
+
return super(other) &&
|
93
|
+
@names == other.names &&
|
94
|
+
@codes == other.codes &&
|
95
|
+
@alpha2_code == other.alpha2_code &&
|
96
|
+
@alpha3_code == other.alpha3_code &&
|
97
|
+
@alpha3_code_b == other.alpha3_code_b
|
112
98
|
end
|
113
|
-
|
114
|
-
def to_s
|
115
|
-
s = '['.dup
|
99
|
+
|
100
|
+
def to_s
|
101
|
+
s = '['.dup
|
116
102
|
s << %Q("#{@name}","#{@names.join(';')}",)
|
117
103
|
s << %Q(#{@code},"#{@codes.join(';')}",)
|
118
|
-
s <<
|
104
|
+
s << "#{@alpha2_code},#{@alpha3_code},#{@alpha3_code_b}"
|
119
105
|
s << ']'
|
120
|
-
|
106
|
+
|
121
107
|
return s
|
122
108
|
end
|
123
109
|
end
|
124
|
-
|
110
|
+
|
125
111
|
class Languages < BaseIsos
|
126
112
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/languages.yaml"
|
127
|
-
|
128
|
-
def initialize
|
113
|
+
|
114
|
+
def initialize
|
129
115
|
super()
|
130
116
|
end
|
131
|
-
|
117
|
+
|
132
118
|
def find_by_kryon(text,add_english: false,**options)
|
133
119
|
langs = []
|
134
120
|
regexes = [
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
regexes.each_with_index
|
140
|
-
|
121
|
+
%r{[[:space:]]*[/\+][[:space:]]*}, # Multiple languages are usually separated by '/'
|
122
|
+
/[[:space:]]+/ # Sometimes separated by space/newline
|
123
|
+
]
|
124
|
+
|
125
|
+
regexes.each_with_index do |regex,i|
|
126
|
+
try_next_regex = false
|
127
|
+
|
128
|
+
text.split(regex).each do |t|
|
141
129
|
# Fix misspellings and/or weird shortenings
|
142
|
-
t = t.clone
|
130
|
+
t = t.clone
|
143
131
|
t.gsub!(/\AFRENC\z/i,'French')
|
144
132
|
t.gsub!(/[\+\*]+/,'') # Means more languages, but won't worry about it (since not listed)
|
145
133
|
t.gsub!(/\ASPAN\z/i,'Spanish')
|
146
134
|
t.gsub!(/\AENGLSH\z/i,'English')
|
147
135
|
t.gsub!(/\AHUNGARY\z/i,'Hungarian')
|
148
|
-
|
136
|
+
|
149
137
|
lang = find(t)
|
150
|
-
|
151
|
-
if lang.nil?
|
152
|
-
if i >= (regexes.length
|
138
|
+
|
139
|
+
if lang.nil?
|
140
|
+
if i >= (regexes.length - 1)
|
153
141
|
msg = "No language found for: #{t}"
|
154
|
-
|
155
|
-
if DevOpts.instance.dev?
|
142
|
+
|
143
|
+
if DevOpts.instance.dev?
|
156
144
|
raise msg
|
157
145
|
else
|
158
146
|
log.warn(msg)
|
159
147
|
end
|
160
148
|
else
|
161
149
|
log.warn("Not a language; trying next regex: #{t}")
|
162
|
-
|
163
|
-
# Try next regex
|
164
|
-
langs.clear
|
165
|
-
|
150
|
+
|
151
|
+
# Try next regex.
|
152
|
+
langs.clear
|
153
|
+
try_next_regex = true
|
154
|
+
break
|
166
155
|
end
|
167
156
|
else
|
168
157
|
langs.push(lang.code)
|
169
158
|
end
|
170
159
|
end
|
171
|
-
|
172
|
-
# No problem with this regex, so bail out
|
173
|
-
break
|
160
|
+
|
161
|
+
# No problem with this regex, so bail out.
|
162
|
+
break unless try_next_regex
|
174
163
|
end
|
175
|
-
|
164
|
+
|
176
165
|
eng_code = find_by_code('eng').code
|
177
|
-
|
166
|
+
|
178
167
|
if add_english && !langs.include?(eng_code)
|
179
168
|
langs.push(eng_code)
|
180
169
|
end
|
181
|
-
|
182
|
-
return langs.empty?
|
170
|
+
|
171
|
+
return langs.empty? ? nil : langs
|
183
172
|
end
|
184
|
-
|
173
|
+
|
185
174
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
186
|
-
return Languages.new
|
175
|
+
return Languages.new.load_file(filepath)
|
187
176
|
end
|
188
|
-
|
189
|
-
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
177
|
+
|
178
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
179
|
+
# into local file
|
190
180
|
# @param save_filepath [String] local file to save YAML to
|
191
181
|
# @see http://www.loc.gov/standards/iso639-2/php/code_list.php
|
192
182
|
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
193
|
-
doc = Nokogiri::HTML(
|
183
|
+
doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8')
|
194
184
|
tds = doc.css('td')
|
195
|
-
|
196
|
-
langs = Languages.new
|
185
|
+
|
186
|
+
langs = Languages.new
|
197
187
|
i = 0
|
198
188
|
tr = []
|
199
|
-
|
189
|
+
|
200
190
|
tds.each do |td|
|
201
191
|
c = td.content
|
202
192
|
c.gsub!(/[[:space:]]+/,' ')
|
203
|
-
c.strip!
|
193
|
+
c.strip!
|
204
194
|
tr.push(c)
|
205
|
-
|
195
|
+
|
206
196
|
if (i += 1) >= 5
|
207
197
|
#puts tr.inspect()
|
208
|
-
|
198
|
+
|
209
199
|
add_it = true
|
210
200
|
lang = Language.new(tr)
|
211
|
-
|
201
|
+
|
212
202
|
if langs.key?(lang.code)
|
213
203
|
# There were so many duplicates, so added comparison check
|
214
|
-
raise "Language already exists: #{lang.inspect
|
204
|
+
raise "Language already exists: #{lang.inspect}" if lang != langs[lang.code]
|
215
205
|
add_it = false
|
216
206
|
else
|
217
|
-
langs.values.each_value
|
207
|
+
langs.values.each_value do |v|
|
218
208
|
puts "Duplicate lang names: #{v.name}" if v.name == lang.name
|
219
209
|
end
|
220
210
|
end
|
221
|
-
|
211
|
+
|
222
212
|
langs[lang.code] = lang if add_it
|
223
|
-
tr.clear
|
213
|
+
tr.clear
|
224
214
|
i = 0
|
225
215
|
end
|
226
216
|
end
|
227
|
-
|
228
|
-
langs.sort_keys!
|
217
|
+
|
218
|
+
langs.sort_keys!
|
229
219
|
langs.save_to_file(save_filepath)
|
230
220
|
end
|
231
221
|
end
|
232
222
|
end
|
233
223
|
|
234
|
-
if $
|
224
|
+
if $PROGRAM_NAME == __FILE__
|
235
225
|
if ARGV.length < 1
|
236
|
-
puts UncleKryon::Languages.load_file
|
226
|
+
puts UncleKryon::Languages.load_file.to_s
|
237
227
|
else
|
238
228
|
UncleKryon::Languages.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
239
229
|
UncleKryon::Languages::DEFAULT_FILEPATH)
|