unclekryon 0.4.10 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +6 -19
- data/Gemfile.lock +19 -23
- data/README.md +3 -2
- data/Rakefile +11 -30
- data/bin/unclekryon +4 -15
- data/hax/kryon.yaml +28 -0
- data/hax/kryon_aums_2002-2005.yaml +460 -0
- data/hax/kryon_aums_2006.yaml +601 -0
- data/hax/kryon_aums_2007.yaml +1024 -0
- data/hax/kryon_aums_2008.yaml +950 -0
- data/hax/kryon_aums_2009.yaml +496 -0
- data/hax/kryon_aums_2010.yaml +1443 -0
- data/hax/kryon_aums_2011.yaml +1458 -0
- data/hax/kryon_aums_2012.yaml +2123 -0
- data/hax/kryon_aums_2013.yaml +1647 -0
- data/hax/kryon_aums_2014.yaml +2478 -0
- data/hax/kryon_aums_2015.yaml +3386 -0
- data/hax/kryon_aums_2016.yaml +3476 -0
- data/hax/kryon_aums_2017.yaml +3712 -0
- data/hax/kryon_aums_2018.yaml +3654 -0
- data/lib/unclekryon.rb +165 -165
- data/lib/unclekryon/data/album_data.rb +74 -82
- data/lib/unclekryon/data/artist_data.rb +24 -36
- data/lib/unclekryon/data/artist_data_data.rb +29 -41
- data/lib/unclekryon/data/aum_data.rb +20 -32
- data/lib/unclekryon/data/base_data.rb +27 -39
- data/lib/unclekryon/data/pic_data.rb +25 -37
- data/lib/unclekryon/data/release_data.rb +14 -26
- data/lib/unclekryon/data/social_data.rb +6 -18
- data/lib/unclekryon/data/timespan_data.rb +16 -28
- data/lib/unclekryon/dev_opts.rb +7 -19
- data/lib/unclekryon/hacker.rb +119 -133
- data/lib/unclekryon/iso.rb +128 -138
- data/lib/unclekryon/iso/base_iso.rb +69 -81
- data/lib/unclekryon/iso/can_prov_terr.rb +34 -47
- data/lib/unclekryon/iso/country.rb +36 -49
- data/lib/unclekryon/iso/language.rb +86 -96
- data/lib/unclekryon/iso/region.rb +11 -25
- data/lib/unclekryon/iso/subregion.rb +11 -25
- data/lib/unclekryon/iso/usa_state.rb +28 -41
- data/lib/unclekryon/jsoner.rb +31 -50
- data/lib/unclekryon/log.rb +34 -46
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +163 -167
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +122 -127
- data/lib/unclekryon/server.rb +8 -17
- data/lib/unclekryon/trainer.rb +69 -83
- data/lib/unclekryon/uploader.rb +8 -17
- data/lib/unclekryon/util.rb +80 -92
- data/lib/unclekryon/version.rb +4 -16
- data/train/kryon.yaml +6077 -0
- data/unclekryon.gemspec +44 -42
- metadata +59 -16
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2018-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2018-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'nokogiri'
|
27
13
|
require 'open-uri'
|
28
14
|
|
@@ -36,76 +22,77 @@ module UncleKryon
|
|
36
22
|
class CanProvTerr < BaseIso
|
37
23
|
def initialize(row=nil)
|
38
24
|
super()
|
39
|
-
|
25
|
+
|
40
26
|
if row.is_a?(Array)
|
41
27
|
@name = self.class.simplify_name(row[2])
|
42
28
|
@code = self.class.simplify_code(row[1])
|
43
29
|
end
|
44
30
|
end
|
45
31
|
end
|
46
|
-
|
32
|
+
|
47
33
|
class CanProvsTerrs < BaseIsos
|
48
34
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/can_provs_terrs.yaml"
|
49
|
-
|
50
|
-
def initialize
|
35
|
+
|
36
|
+
def initialize
|
51
37
|
super()
|
52
|
-
|
38
|
+
|
53
39
|
@id = 'CAN Provinces & Territories'
|
54
40
|
end
|
55
|
-
|
41
|
+
|
56
42
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
57
|
-
return CanProvsTerrs.new
|
43
|
+
return CanProvsTerrs.new.load_file(filepath)
|
58
44
|
end
|
59
|
-
|
60
|
-
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
45
|
+
|
46
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
47
|
+
# into local file
|
61
48
|
# @param save_filepath [String] local file to save YAML to
|
62
49
|
# @see https://www.iso.org/obp/ui/#iso:code:3166:CA
|
63
50
|
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
64
|
-
doc = Nokogiri::HTML(
|
51
|
+
doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8')
|
65
52
|
trs = doc.css('tr')
|
66
|
-
|
67
|
-
provs_terrs = CanProvsTerrs.new
|
68
|
-
|
69
|
-
trs.each
|
53
|
+
|
54
|
+
provs_terrs = CanProvsTerrs.new
|
55
|
+
|
56
|
+
trs.each do |tr|
|
70
57
|
tds = tr.css('td')
|
71
|
-
|
58
|
+
|
72
59
|
# Skip French; we just want English
|
73
|
-
next if tds[4].content.gsub(/[[:space:]]+/,' ').strip
|
74
|
-
|
60
|
+
next if tds[4].content.gsub(/[[:space:]]+/,' ').strip.downcase == 'fr'
|
61
|
+
|
75
62
|
i = 0
|
76
63
|
tr = []
|
77
|
-
|
78
|
-
tds.each
|
64
|
+
|
65
|
+
tds.each do |td|
|
79
66
|
c = td.content
|
80
67
|
c.gsub!(/[[:space:]]+/,' ')
|
81
|
-
c.strip!
|
68
|
+
c.strip!
|
82
69
|
tr.push(c)
|
83
|
-
|
70
|
+
|
84
71
|
if (i += 1) >= 7
|
85
72
|
#puts tr.inspect()
|
86
73
|
prov_terr = CanProvTerr.new(tr)
|
87
|
-
raise "CAN prov/terr already exists: #{prov_terr.inspect
|
88
|
-
|
89
|
-
provs_terrs.values.each_value
|
74
|
+
raise "CAN prov/terr already exists: #{prov_terr.inspect}" if provs_terrs.key?(prov_terr.code)
|
75
|
+
|
76
|
+
provs_terrs.values.each_value do |v|
|
90
77
|
puts "Duplicate CAN prov/terr names: #{v.name}" if v.name == prov_terr.name
|
91
78
|
end
|
92
|
-
|
79
|
+
|
93
80
|
provs_terrs[prov_terr.code] = prov_terr
|
94
|
-
tr.clear
|
81
|
+
tr.clear
|
95
82
|
i = 0
|
96
83
|
end
|
97
84
|
end
|
98
85
|
end
|
99
|
-
|
100
|
-
provs_terrs.sort_keys!
|
86
|
+
|
87
|
+
provs_terrs.sort_keys!
|
101
88
|
provs_terrs.save_to_file(save_filepath)
|
102
89
|
end
|
103
90
|
end
|
104
91
|
end
|
105
92
|
|
106
|
-
if $
|
93
|
+
if $PROGRAM_NAME == __FILE__
|
107
94
|
if ARGV.length < 1
|
108
|
-
puts UncleKryon::CanProvsTerrs.load_file
|
95
|
+
puts UncleKryon::CanProvsTerrs.load_file.to_s
|
109
96
|
else
|
110
97
|
UncleKryon::CanProvsTerrs.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
111
98
|
UncleKryon::CanProvsTerrs::DEFAULT_FILEPATH)
|
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2018-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2018-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'nokogiri'
|
27
13
|
require 'open-uri'
|
28
14
|
require 'yaml'
|
@@ -42,90 +28,91 @@ module UncleKryon
|
|
42
28
|
attr_reader :alpha2_code
|
43
29
|
attr_reader :alpha3_code
|
44
30
|
attr_reader :region
|
45
|
-
|
31
|
+
|
46
32
|
def initialize(row=nil)
|
47
33
|
super()
|
48
|
-
|
34
|
+
|
49
35
|
@names = nil
|
50
36
|
@codes = nil
|
51
37
|
@alpha2_code = nil
|
52
38
|
@alpha3_code = nil
|
53
39
|
@region = nil
|
54
|
-
|
40
|
+
|
55
41
|
if row.is_a?(Array)
|
56
42
|
@name = self.class.simplify_name(row[0])
|
57
43
|
@alpha2_code = row[2]
|
58
44
|
@alpha3_code = row[3]
|
59
|
-
|
45
|
+
|
60
46
|
@names = @name
|
61
47
|
@code = @alpha3_code
|
62
|
-
@codes = [@alpha3_code,@alpha2_code].compact
|
48
|
+
@codes = [@alpha3_code,@alpha2_code].compact.uniq
|
63
49
|
end
|
64
50
|
end
|
65
|
-
|
66
|
-
def to_s
|
67
|
-
s = '['.dup
|
51
|
+
|
52
|
+
def to_s
|
53
|
+
s = '['.dup
|
68
54
|
s << %Q("#{@name}","#{@names.join(';')}")
|
69
55
|
s << %Q(,#{@code},"#{@codes.join(';')}",#{@alpha2_code},#{@alpha3_code})
|
70
|
-
s <<
|
56
|
+
s << ",#{@region}"
|
71
57
|
s << ']'
|
72
|
-
|
58
|
+
|
73
59
|
return s
|
74
60
|
end
|
75
61
|
end
|
76
|
-
|
62
|
+
|
77
63
|
class Countries < BaseIsos
|
78
64
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/countries.yaml"
|
79
|
-
|
80
|
-
def initialize
|
65
|
+
|
66
|
+
def initialize
|
81
67
|
super()
|
82
68
|
end
|
83
|
-
|
69
|
+
|
84
70
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
85
|
-
return Countries.new
|
71
|
+
return Countries.new.load_file(filepath)
|
86
72
|
end
|
87
|
-
|
88
|
-
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
73
|
+
|
74
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
75
|
+
# into local file
|
89
76
|
# @param save_filepath [String] local file to save YAML to
|
90
77
|
# @see https://www.iso.org/obp/ui/#search/code/
|
91
78
|
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
92
|
-
doc = Nokogiri::HTML(
|
79
|
+
doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8')
|
93
80
|
tds = doc.css('td')
|
94
|
-
|
95
|
-
countries = Countries.new
|
81
|
+
|
82
|
+
countries = Countries.new
|
96
83
|
i = 0
|
97
84
|
tr = []
|
98
|
-
|
85
|
+
|
99
86
|
tds.each do |td|
|
100
87
|
c = td.content
|
101
88
|
c.gsub!(/[[:space:]]+/,' ')
|
102
|
-
c.strip!
|
89
|
+
c.strip!
|
103
90
|
tr.push(c)
|
104
|
-
|
91
|
+
|
105
92
|
if (i += 1) >= 5
|
106
93
|
#puts tr.inspect()
|
107
94
|
country = Country.new(tr)
|
108
|
-
raise "Country already exists: #{country.inspect
|
109
|
-
|
110
|
-
countries.values.each_value
|
95
|
+
raise "Country already exists: #{country.inspect}" if countries.key?(country.code)
|
96
|
+
|
97
|
+
countries.values.each_value do |v|
|
111
98
|
puts "Duplicate country names: #{v.name}" if v.name == country.name
|
112
99
|
end
|
113
|
-
|
100
|
+
|
114
101
|
countries[country.code] = country
|
115
|
-
tr.clear
|
102
|
+
tr.clear
|
116
103
|
i = 0
|
117
104
|
end
|
118
105
|
end
|
119
|
-
|
120
|
-
countries.sort_keys!
|
106
|
+
|
107
|
+
countries.sort_keys!
|
121
108
|
countries.save_to_file(save_filepath)
|
122
109
|
end
|
123
110
|
end
|
124
111
|
end
|
125
112
|
|
126
|
-
if $
|
113
|
+
if $PROGRAM_NAME == __FILE__
|
127
114
|
if ARGV.length < 1
|
128
|
-
puts UncleKryon::Countries.load_file
|
115
|
+
puts UncleKryon::Countries.load_file.to_s
|
129
116
|
else
|
130
117
|
UncleKryon::Countries.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
131
118
|
UncleKryon::Countries::DEFAULT_FILEPATH)
|
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2018-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2018-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'nokogiri'
|
27
13
|
require 'open-uri'
|
28
14
|
require 'yaml'
|
@@ -44,196 +30,200 @@ module UncleKryon
|
|
44
30
|
attr_reader :alpha2_code
|
45
31
|
attr_reader :alpha3_code
|
46
32
|
attr_reader :alpha3_code_b
|
47
|
-
|
33
|
+
|
48
34
|
def initialize(row=nil)
|
49
35
|
super()
|
50
|
-
|
36
|
+
|
51
37
|
@names = nil
|
52
38
|
@codes = nil
|
53
39
|
@alpha2_code = nil
|
54
40
|
@alpha3_code = nil
|
55
41
|
@alpha3_code_b = nil
|
56
|
-
|
42
|
+
|
57
43
|
if row.is_a?(Array)
|
58
|
-
@names = row[2].split(';').compact
|
59
|
-
@alpha2_code = row[1].empty?
|
44
|
+
@names = row[2].split(';').compact.uniq.map(&self.class.method(:fix_name))
|
45
|
+
@alpha2_code = row[1].empty? ? nil : row[1]
|
60
46
|
@alpha3_code = row[0].split(/[[:space:]]*[\(\)][[:space:]]*/)
|
61
|
-
|
47
|
+
|
62
48
|
if @alpha3_code.length <= 1
|
63
49
|
@alpha3_code = row[0]
|
64
50
|
else
|
65
51
|
prev_was_tag = true
|
66
|
-
|
67
|
-
@alpha3_code.each_with_index
|
68
|
-
c.strip!
|
69
|
-
c_up = c.upcase
|
70
|
-
|
52
|
+
|
53
|
+
@alpha3_code.each_with_index do |c,i|
|
54
|
+
c.strip!
|
55
|
+
c_up = c.upcase
|
56
|
+
|
71
57
|
if c_up == 'B' || c_up == 'T'
|
72
58
|
if prev_was_tag
|
73
59
|
raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
|
74
60
|
end
|
75
|
-
|
61
|
+
|
76
62
|
case c_up
|
77
63
|
when 'B'
|
78
|
-
raise "Multiple alpha3_code_b: #{@alpha3_code}" unless @alpha3_code_b.nil?
|
64
|
+
raise "Multiple alpha3_code_b: #{@alpha3_code}" unless @alpha3_code_b.nil?
|
79
65
|
@alpha3_code_b = @alpha3_code[i - 1]
|
80
66
|
when 'T'
|
81
67
|
raise "Multiple alpha3_code (T): #{@alpha3_code}" unless @alpha3_code.is_a?(Array)
|
82
68
|
@alpha3_code = @alpha3_code[i - 1]
|
83
69
|
end
|
84
|
-
|
70
|
+
|
85
71
|
prev_was_tag = true
|
86
72
|
else
|
87
73
|
prev_was_tag = false
|
88
74
|
end
|
89
75
|
end
|
90
|
-
|
76
|
+
|
91
77
|
# Wasn't set in the above loop?
|
92
78
|
if @alpha3_code.is_a?(Array)
|
93
79
|
raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
|
94
80
|
end
|
95
81
|
end
|
96
|
-
|
82
|
+
|
97
83
|
@name = @names[0]
|
98
|
-
|
84
|
+
#@names = @names
|
99
85
|
@code = @alpha3_code
|
100
|
-
@codes = [@alpha3_code,@alpha3_code_b,@alpha2_code].compact
|
86
|
+
@codes = [@alpha3_code,@alpha3_code_b,@alpha2_code].compact.uniq
|
101
87
|
end
|
102
88
|
end
|
103
|
-
|
89
|
+
|
104
90
|
# @see Languages.parse_and_save_to_file(...)
|
105
|
-
def ==(
|
106
|
-
return super(
|
107
|
-
@names ==
|
108
|
-
@codes ==
|
109
|
-
@alpha2_code ==
|
110
|
-
@alpha3_code ==
|
111
|
-
@alpha3_code_b ==
|
91
|
+
def ==(other)
|
92
|
+
return super(other) &&
|
93
|
+
@names == other.names &&
|
94
|
+
@codes == other.codes &&
|
95
|
+
@alpha2_code == other.alpha2_code &&
|
96
|
+
@alpha3_code == other.alpha3_code &&
|
97
|
+
@alpha3_code_b == other.alpha3_code_b
|
112
98
|
end
|
113
|
-
|
114
|
-
def to_s
|
115
|
-
s = '['.dup
|
99
|
+
|
100
|
+
def to_s
|
101
|
+
s = '['.dup
|
116
102
|
s << %Q("#{@name}","#{@names.join(';')}",)
|
117
103
|
s << %Q(#{@code},"#{@codes.join(';')}",)
|
118
|
-
s <<
|
104
|
+
s << "#{@alpha2_code},#{@alpha3_code},#{@alpha3_code_b}"
|
119
105
|
s << ']'
|
120
|
-
|
106
|
+
|
121
107
|
return s
|
122
108
|
end
|
123
109
|
end
|
124
|
-
|
110
|
+
|
125
111
|
class Languages < BaseIsos
|
126
112
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/languages.yaml"
|
127
|
-
|
128
|
-
def initialize
|
113
|
+
|
114
|
+
def initialize
|
129
115
|
super()
|
130
116
|
end
|
131
|
-
|
117
|
+
|
132
118
|
def find_by_kryon(text,add_english: false,**options)
|
133
119
|
langs = []
|
134
120
|
regexes = [
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
regexes.each_with_index
|
140
|
-
|
121
|
+
%r{[[:space:]]*[/\+][[:space:]]*}, # Multiple languages are usually separated by '/'
|
122
|
+
/[[:space:]]+/ # Sometimes separated by space/newline
|
123
|
+
]
|
124
|
+
|
125
|
+
regexes.each_with_index do |regex,i|
|
126
|
+
try_next_regex = false
|
127
|
+
|
128
|
+
text.split(regex).each do |t|
|
141
129
|
# Fix misspellings and/or weird shortenings
|
142
|
-
t = t.clone
|
130
|
+
t = t.clone
|
143
131
|
t.gsub!(/\AFRENC\z/i,'French')
|
144
132
|
t.gsub!(/[\+\*]+/,'') # Means more languages, but won't worry about it (since not listed)
|
145
133
|
t.gsub!(/\ASPAN\z/i,'Spanish')
|
146
134
|
t.gsub!(/\AENGLSH\z/i,'English')
|
147
135
|
t.gsub!(/\AHUNGARY\z/i,'Hungarian')
|
148
|
-
|
136
|
+
|
149
137
|
lang = find(t)
|
150
|
-
|
151
|
-
if lang.nil?
|
152
|
-
if i >= (regexes.length
|
138
|
+
|
139
|
+
if lang.nil?
|
140
|
+
if i >= (regexes.length - 1)
|
153
141
|
msg = "No language found for: #{t}"
|
154
|
-
|
155
|
-
if DevOpts.instance.dev?
|
142
|
+
|
143
|
+
if DevOpts.instance.dev?
|
156
144
|
raise msg
|
157
145
|
else
|
158
146
|
log.warn(msg)
|
159
147
|
end
|
160
148
|
else
|
161
149
|
log.warn("Not a language; trying next regex: #{t}")
|
162
|
-
|
163
|
-
# Try next regex
|
164
|
-
langs.clear
|
165
|
-
|
150
|
+
|
151
|
+
# Try next regex.
|
152
|
+
langs.clear
|
153
|
+
try_next_regex = true
|
154
|
+
break
|
166
155
|
end
|
167
156
|
else
|
168
157
|
langs.push(lang.code)
|
169
158
|
end
|
170
159
|
end
|
171
|
-
|
172
|
-
# No problem with this regex, so bail out
|
173
|
-
break
|
160
|
+
|
161
|
+
# No problem with this regex, so bail out.
|
162
|
+
break unless try_next_regex
|
174
163
|
end
|
175
|
-
|
164
|
+
|
176
165
|
eng_code = find_by_code('eng').code
|
177
|
-
|
166
|
+
|
178
167
|
if add_english && !langs.include?(eng_code)
|
179
168
|
langs.push(eng_code)
|
180
169
|
end
|
181
|
-
|
182
|
-
return langs.empty?
|
170
|
+
|
171
|
+
return langs.empty? ? nil : langs
|
183
172
|
end
|
184
|
-
|
173
|
+
|
185
174
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
186
|
-
return Languages.new
|
175
|
+
return Languages.new.load_file(filepath)
|
187
176
|
end
|
188
|
-
|
189
|
-
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
177
|
+
|
178
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
179
|
+
# into local file
|
190
180
|
# @param save_filepath [String] local file to save YAML to
|
191
181
|
# @see http://www.loc.gov/standards/iso639-2/php/code_list.php
|
192
182
|
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
193
|
-
doc = Nokogiri::HTML(
|
183
|
+
doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8')
|
194
184
|
tds = doc.css('td')
|
195
|
-
|
196
|
-
langs = Languages.new
|
185
|
+
|
186
|
+
langs = Languages.new
|
197
187
|
i = 0
|
198
188
|
tr = []
|
199
|
-
|
189
|
+
|
200
190
|
tds.each do |td|
|
201
191
|
c = td.content
|
202
192
|
c.gsub!(/[[:space:]]+/,' ')
|
203
|
-
c.strip!
|
193
|
+
c.strip!
|
204
194
|
tr.push(c)
|
205
|
-
|
195
|
+
|
206
196
|
if (i += 1) >= 5
|
207
197
|
#puts tr.inspect()
|
208
|
-
|
198
|
+
|
209
199
|
add_it = true
|
210
200
|
lang = Language.new(tr)
|
211
|
-
|
201
|
+
|
212
202
|
if langs.key?(lang.code)
|
213
203
|
# There were so many duplicates, so added comparison check
|
214
|
-
raise "Language already exists: #{lang.inspect
|
204
|
+
raise "Language already exists: #{lang.inspect}" if lang != langs[lang.code]
|
215
205
|
add_it = false
|
216
206
|
else
|
217
|
-
langs.values.each_value
|
207
|
+
langs.values.each_value do |v|
|
218
208
|
puts "Duplicate lang names: #{v.name}" if v.name == lang.name
|
219
209
|
end
|
220
210
|
end
|
221
|
-
|
211
|
+
|
222
212
|
langs[lang.code] = lang if add_it
|
223
|
-
tr.clear
|
213
|
+
tr.clear
|
224
214
|
i = 0
|
225
215
|
end
|
226
216
|
end
|
227
|
-
|
228
|
-
langs.sort_keys!
|
217
|
+
|
218
|
+
langs.sort_keys!
|
229
219
|
langs.save_to_file(save_filepath)
|
230
220
|
end
|
231
221
|
end
|
232
222
|
end
|
233
223
|
|
234
|
-
if $
|
224
|
+
if $PROGRAM_NAME == __FILE__
|
235
225
|
if ARGV.length < 1
|
236
|
-
puts UncleKryon::Languages.load_file
|
226
|
+
puts UncleKryon::Languages.load_file.to_s
|
237
227
|
else
|
238
228
|
UncleKryon::Languages.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
239
229
|
UncleKryon::Languages::DEFAULT_FILEPATH)
|