unclekryon 0.4.9.pre.alpha → 0.4.12.pre.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +10 -26
- data/Gemfile.lock +20 -24
- data/README.md +3 -2
- data/Rakefile +11 -30
- data/bin/unclekryon +4 -15
- data/hax/kryon.yaml +28 -0
- data/hax/kryon_aums_2002-2005.yaml +460 -0
- data/hax/kryon_aums_2006.yaml +601 -0
- data/hax/kryon_aums_2007.yaml +1024 -0
- data/hax/kryon_aums_2008.yaml +950 -0
- data/hax/kryon_aums_2009.yaml +496 -0
- data/hax/kryon_aums_2010.yaml +1443 -0
- data/hax/kryon_aums_2011.yaml +1458 -0
- data/hax/kryon_aums_2012.yaml +2123 -0
- data/hax/kryon_aums_2013.yaml +1647 -0
- data/hax/kryon_aums_2014.yaml +2478 -0
- data/hax/kryon_aums_2015.yaml +3386 -0
- data/hax/kryon_aums_2016.yaml +3476 -0
- data/hax/kryon_aums_2017.yaml +3712 -0
- data/hax/kryon_aums_2018.yaml +3654 -0
- data/lib/unclekryon/data/album_data.rb +74 -82
- data/lib/unclekryon/data/artist_data.rb +24 -36
- data/lib/unclekryon/data/artist_data_data.rb +29 -41
- data/lib/unclekryon/data/aum_data.rb +20 -32
- data/lib/unclekryon/data/base_data.rb +27 -39
- data/lib/unclekryon/data/pic_data.rb +25 -37
- data/lib/unclekryon/data/release_data.rb +14 -26
- data/lib/unclekryon/data/social_data.rb +6 -18
- data/lib/unclekryon/data/timespan_data.rb +16 -28
- data/lib/unclekryon/dev_opts.rb +7 -19
- data/lib/unclekryon/hacker.rb +121 -135
- data/lib/unclekryon/iso/base_iso.rb +69 -81
- data/lib/unclekryon/iso/can_prov_terr.rb +34 -47
- data/lib/unclekryon/iso/country.rb +34 -51
- data/lib/unclekryon/iso/language.rb +84 -98
- data/lib/unclekryon/iso/region.rb +8 -29
- data/lib/unclekryon/iso/subregion.rb +8 -29
- data/lib/unclekryon/iso/usa_state.rb +28 -41
- data/lib/unclekryon/iso.rb +128 -138
- data/lib/unclekryon/jsoner.rb +31 -50
- data/lib/unclekryon/log.rb +34 -46
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +163 -167
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +122 -127
- data/lib/unclekryon/server.rb +8 -17
- data/lib/unclekryon/trainer.rb +68 -85
- data/lib/unclekryon/uploader.rb +8 -17
- data/lib/unclekryon/util.rb +80 -92
- data/lib/unclekryon/version.rb +4 -16
- data/lib/unclekryon.rb +166 -166
- data/train/kryon.yaml +6077 -0
- data/unclekryon.gemspec +49 -49
- metadata +50 -22
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2018-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2018-2022 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'nokogiri'
|
27
13
|
require 'open-uri'
|
28
14
|
require 'yaml'
|
@@ -44,196 +30,196 @@ module UncleKryon
|
|
44
30
|
attr_reader :alpha2_code
|
45
31
|
attr_reader :alpha3_code
|
46
32
|
attr_reader :alpha3_code_b
|
47
|
-
|
33
|
+
|
48
34
|
def initialize(row=nil)
|
49
35
|
super()
|
50
|
-
|
36
|
+
|
51
37
|
@names = nil
|
52
38
|
@codes = nil
|
53
39
|
@alpha2_code = nil
|
54
40
|
@alpha3_code = nil
|
55
41
|
@alpha3_code_b = nil
|
56
|
-
|
42
|
+
|
57
43
|
if row.is_a?(Array)
|
58
|
-
@names = row[2].split(';').compact
|
59
|
-
@alpha2_code = row[1].empty?
|
44
|
+
@names = row[2].split(';').compact.uniq.map(&self.class.method(:fix_name))
|
45
|
+
@alpha2_code = row[1].empty? ? nil : row[1]
|
60
46
|
@alpha3_code = row[0].split(/[[:space:]]*[\(\)][[:space:]]*/)
|
61
|
-
|
47
|
+
|
62
48
|
if @alpha3_code.length <= 1
|
63
49
|
@alpha3_code = row[0]
|
64
50
|
else
|
65
51
|
prev_was_tag = true
|
66
|
-
|
67
|
-
@alpha3_code.each_with_index
|
68
|
-
c.strip!
|
69
|
-
c_up = c.upcase
|
70
|
-
|
52
|
+
|
53
|
+
@alpha3_code.each_with_index do |c,i|
|
54
|
+
c.strip!
|
55
|
+
c_up = c.upcase
|
56
|
+
|
71
57
|
if c_up == 'B' || c_up == 'T'
|
72
58
|
if prev_was_tag
|
73
59
|
raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
|
74
60
|
end
|
75
|
-
|
61
|
+
|
76
62
|
case c_up
|
77
63
|
when 'B'
|
78
|
-
raise "Multiple alpha3_code_b: #{@alpha3_code}" unless @alpha3_code_b.nil?
|
64
|
+
raise "Multiple alpha3_code_b: #{@alpha3_code}" unless @alpha3_code_b.nil?
|
79
65
|
@alpha3_code_b = @alpha3_code[i - 1]
|
80
66
|
when 'T'
|
81
67
|
raise "Multiple alpha3_code (T): #{@alpha3_code}" unless @alpha3_code.is_a?(Array)
|
82
68
|
@alpha3_code = @alpha3_code[i - 1]
|
83
69
|
end
|
84
|
-
|
70
|
+
|
85
71
|
prev_was_tag = true
|
86
72
|
else
|
87
73
|
prev_was_tag = false
|
88
74
|
end
|
89
75
|
end
|
90
|
-
|
76
|
+
|
91
77
|
# Wasn't set in the above loop?
|
92
78
|
if @alpha3_code.is_a?(Array)
|
93
79
|
raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
|
94
80
|
end
|
95
81
|
end
|
96
|
-
|
82
|
+
|
97
83
|
@name = @names[0]
|
98
|
-
|
84
|
+
#@names = @names
|
99
85
|
@code = @alpha3_code
|
100
|
-
@codes = [@alpha3_code,@alpha3_code_b,@alpha2_code].compact
|
86
|
+
@codes = [@alpha3_code,@alpha3_code_b,@alpha2_code].compact.uniq
|
101
87
|
end
|
102
88
|
end
|
103
|
-
|
89
|
+
|
104
90
|
# @see Languages.parse_and_save_to_file(...)
|
105
|
-
def ==(
|
106
|
-
return super(
|
107
|
-
@names ==
|
108
|
-
@codes ==
|
109
|
-
@alpha2_code ==
|
110
|
-
@alpha3_code ==
|
111
|
-
@alpha3_code_b ==
|
91
|
+
def ==(other)
|
92
|
+
return super(other) &&
|
93
|
+
@names == other.names &&
|
94
|
+
@codes == other.codes &&
|
95
|
+
@alpha2_code == other.alpha2_code &&
|
96
|
+
@alpha3_code == other.alpha3_code &&
|
97
|
+
@alpha3_code_b == other.alpha3_code_b
|
112
98
|
end
|
113
|
-
|
114
|
-
def to_s
|
115
|
-
s = '['.dup
|
99
|
+
|
100
|
+
def to_s
|
101
|
+
s = '['.dup
|
116
102
|
s << %Q("#{@name}","#{@names.join(';')}",)
|
117
103
|
s << %Q(#{@code},"#{@codes.join(';')}",)
|
118
|
-
s <<
|
104
|
+
s << "#{@alpha2_code},#{@alpha3_code},#{@alpha3_code_b}"
|
119
105
|
s << ']'
|
120
|
-
|
106
|
+
|
121
107
|
return s
|
122
108
|
end
|
123
109
|
end
|
124
|
-
|
110
|
+
|
125
111
|
class Languages < BaseIsos
|
126
112
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/languages.yaml"
|
127
|
-
|
128
|
-
def initialize()
|
129
|
-
super()
|
130
|
-
end
|
131
|
-
|
113
|
+
|
132
114
|
def find_by_kryon(text,add_english: false,**options)
|
133
115
|
langs = []
|
134
116
|
regexes = [
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
regexes.each_with_index
|
140
|
-
|
117
|
+
%r{[[:space:]]*[/\+][[:space:]]*}, # Multiple languages are usually separated by '/'
|
118
|
+
/[[:space:]]+/ # Sometimes separated by space/newline
|
119
|
+
]
|
120
|
+
|
121
|
+
regexes.each_with_index do |regex,i|
|
122
|
+
try_next_regex = false
|
123
|
+
|
124
|
+
text.split(regex).each do |t|
|
141
125
|
# Fix misspellings and/or weird shortenings
|
142
|
-
t = t.clone
|
126
|
+
t = t.clone
|
143
127
|
t.gsub!(/\AFRENC\z/i,'French')
|
144
128
|
t.gsub!(/[\+\*]+/,'') # Means more languages, but won't worry about it (since not listed)
|
145
129
|
t.gsub!(/\ASPAN\z/i,'Spanish')
|
146
130
|
t.gsub!(/\AENGLSH\z/i,'English')
|
147
131
|
t.gsub!(/\AHUNGARY\z/i,'Hungarian')
|
148
|
-
|
132
|
+
|
149
133
|
lang = find(t)
|
150
|
-
|
151
|
-
if lang.nil?
|
152
|
-
if i >= (regexes.length
|
134
|
+
|
135
|
+
if lang.nil?
|
136
|
+
if i >= (regexes.length - 1)
|
153
137
|
msg = "No language found for: #{t}"
|
154
|
-
|
155
|
-
if DevOpts.instance.dev?
|
138
|
+
|
139
|
+
if DevOpts.instance.dev?
|
156
140
|
raise msg
|
157
141
|
else
|
158
142
|
log.warn(msg)
|
159
143
|
end
|
160
144
|
else
|
161
145
|
log.warn("Not a language; trying next regex: #{t}")
|
162
|
-
|
163
|
-
# Try next regex
|
164
|
-
langs.clear
|
165
|
-
|
146
|
+
|
147
|
+
# Try next regex.
|
148
|
+
langs.clear
|
149
|
+
try_next_regex = true
|
150
|
+
break
|
166
151
|
end
|
167
152
|
else
|
168
153
|
langs.push(lang.code)
|
169
154
|
end
|
170
155
|
end
|
171
|
-
|
172
|
-
# No problem with this regex, so bail out
|
173
|
-
break
|
156
|
+
|
157
|
+
# No problem with this regex, so bail out.
|
158
|
+
break unless try_next_regex
|
174
159
|
end
|
175
|
-
|
160
|
+
|
176
161
|
eng_code = find_by_code('eng').code
|
177
|
-
|
162
|
+
|
178
163
|
if add_english && !langs.include?(eng_code)
|
179
164
|
langs.push(eng_code)
|
180
165
|
end
|
181
|
-
|
182
|
-
return langs.empty?
|
166
|
+
|
167
|
+
return langs.empty? ? nil : langs
|
183
168
|
end
|
184
|
-
|
169
|
+
|
185
170
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
186
|
-
return Languages.new
|
171
|
+
return Languages.new.load_file(filepath)
|
187
172
|
end
|
188
|
-
|
189
|
-
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
173
|
+
|
174
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
175
|
+
# into local file
|
190
176
|
# @param save_filepath [String] local file to save YAML to
|
191
177
|
# @see http://www.loc.gov/standards/iso639-2/php/code_list.php
|
192
178
|
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
193
|
-
doc = Nokogiri::HTML(
|
179
|
+
doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8')
|
194
180
|
tds = doc.css('td')
|
195
|
-
|
196
|
-
langs = Languages.new
|
181
|
+
|
182
|
+
langs = Languages.new
|
197
183
|
i = 0
|
198
184
|
tr = []
|
199
|
-
|
185
|
+
|
200
186
|
tds.each do |td|
|
201
187
|
c = td.content
|
202
188
|
c.gsub!(/[[:space:]]+/,' ')
|
203
|
-
c.strip!
|
189
|
+
c.strip!
|
204
190
|
tr.push(c)
|
205
|
-
|
191
|
+
|
206
192
|
if (i += 1) >= 5
|
207
193
|
#puts tr.inspect()
|
208
|
-
|
194
|
+
|
209
195
|
add_it = true
|
210
196
|
lang = Language.new(tr)
|
211
|
-
|
197
|
+
|
212
198
|
if langs.key?(lang.code)
|
213
199
|
# There were so many duplicates, so added comparison check
|
214
|
-
raise "Language already exists: #{lang.inspect
|
200
|
+
raise "Language already exists: #{lang.inspect}" if lang != langs[lang.code]
|
215
201
|
add_it = false
|
216
202
|
else
|
217
|
-
langs.values.each_value
|
203
|
+
langs.values.each_value do |v|
|
218
204
|
puts "Duplicate lang names: #{v.name}" if v.name == lang.name
|
219
205
|
end
|
220
206
|
end
|
221
|
-
|
207
|
+
|
222
208
|
langs[lang.code] = lang if add_it
|
223
|
-
tr.clear
|
209
|
+
tr.clear
|
224
210
|
i = 0
|
225
211
|
end
|
226
212
|
end
|
227
|
-
|
228
|
-
langs.sort_keys!
|
213
|
+
|
214
|
+
langs.sort_keys!
|
229
215
|
langs.save_to_file(save_filepath)
|
230
216
|
end
|
231
217
|
end
|
232
218
|
end
|
233
219
|
|
234
|
-
if $
|
220
|
+
if $PROGRAM_NAME == __FILE__
|
235
221
|
if ARGV.length < 1
|
236
|
-
puts UncleKryon::Languages.load_file
|
222
|
+
puts UncleKryon::Languages.load_file.to_s
|
237
223
|
else
|
238
224
|
UncleKryon::Languages.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
239
225
|
UncleKryon::Languages::DEFAULT_FILEPATH)
|
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2018-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2018-2022 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'unclekryon/iso/base_iso'
|
27
13
|
|
28
14
|
##
|
@@ -30,24 +16,17 @@ require 'unclekryon/iso/base_iso'
|
|
30
16
|
##
|
31
17
|
module UncleKryon
|
32
18
|
class Region < BaseIso
|
33
|
-
def initialize()
|
34
|
-
super()
|
35
|
-
end
|
36
19
|
end
|
37
|
-
|
20
|
+
|
38
21
|
class Regions < BaseIsos
|
39
22
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/regions.yaml"
|
40
|
-
|
41
|
-
def initialize()
|
42
|
-
super()
|
43
|
-
end
|
44
|
-
|
23
|
+
|
45
24
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
46
|
-
return Regions.new
|
25
|
+
return Regions.new.load_file(filepath)
|
47
26
|
end
|
48
27
|
end
|
49
28
|
end
|
50
29
|
|
51
|
-
if $
|
52
|
-
puts UncleKryon::Regions.load_file
|
30
|
+
if $PROGRAM_NAME == __FILE__
|
31
|
+
puts UncleKryon::Regions.load_file.to_s
|
53
32
|
end
|
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2019 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2019-2022 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'unclekryon/iso/base_iso'
|
27
13
|
|
28
14
|
##
|
@@ -30,24 +16,17 @@ require 'unclekryon/iso/base_iso'
|
|
30
16
|
##
|
31
17
|
module UncleKryon
|
32
18
|
class Subregion < BaseIso
|
33
|
-
def initialize()
|
34
|
-
super()
|
35
|
-
end
|
36
19
|
end
|
37
|
-
|
20
|
+
|
38
21
|
class Subregions < BaseIsos
|
39
22
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/subregions.yaml"
|
40
|
-
|
41
|
-
def initialize()
|
42
|
-
super()
|
43
|
-
end
|
44
|
-
|
23
|
+
|
45
24
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
46
|
-
return Subregions.new
|
25
|
+
return Subregions.new.load_file(filepath)
|
47
26
|
end
|
48
27
|
end
|
49
28
|
end
|
50
29
|
|
51
|
-
if $
|
52
|
-
puts UncleKryon::Subregions.load_file
|
30
|
+
if $PROGRAM_NAME == __FILE__
|
31
|
+
puts UncleKryon::Subregions.load_file.to_s
|
53
32
|
end
|
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2018-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2018-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'nokogiri'
|
27
13
|
require 'open-uri'
|
28
14
|
require 'yaml'
|
@@ -37,68 +23,69 @@ module UncleKryon
|
|
37
23
|
class UsaState < BaseIso
|
38
24
|
def initialize(row=nil)
|
39
25
|
super()
|
40
|
-
|
26
|
+
|
41
27
|
if row.is_a?(Array)
|
42
28
|
@name = self.class.simplify_name(row[2])
|
43
29
|
@code = self.class.simplify_code(row[1])
|
44
30
|
end
|
45
31
|
end
|
46
32
|
end
|
47
|
-
|
33
|
+
|
48
34
|
class UsaStates < BaseIsos
|
49
35
|
DEFAULT_FILEPATH = "#{DEFAULT_DIR}/usa_states.yaml"
|
50
|
-
|
51
|
-
def initialize
|
36
|
+
|
37
|
+
def initialize
|
52
38
|
super()
|
53
|
-
|
39
|
+
|
54
40
|
@id = 'USA States'
|
55
41
|
end
|
56
|
-
|
42
|
+
|
57
43
|
def self.load_file(filepath=DEFAULT_FILEPATH)
|
58
|
-
return UsaStates.new
|
44
|
+
return UsaStates.new.load_file(filepath)
|
59
45
|
end
|
60
|
-
|
61
|
-
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
46
|
+
|
47
|
+
# @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML
|
48
|
+
# into local file
|
62
49
|
# @param save_filepath [String] local file to save YAML to
|
63
50
|
# @see https://www.iso.org/obp/ui/#iso:code:3166:US
|
64
51
|
def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
|
65
|
-
doc = Nokogiri::HTML(
|
52
|
+
doc = Nokogiri::HTML(URI(parse_filepath).open,nil,'utf-8')
|
66
53
|
tds = doc.css('td')
|
67
|
-
|
68
|
-
states = UsaStates.new
|
54
|
+
|
55
|
+
states = UsaStates.new
|
69
56
|
i = 0
|
70
57
|
tr = []
|
71
|
-
|
58
|
+
|
72
59
|
tds.each do |td|
|
73
60
|
c = td.content
|
74
61
|
c.gsub!(/[[:space:]]+/,' ')
|
75
|
-
c.strip!
|
62
|
+
c.strip!
|
76
63
|
tr.push(c)
|
77
|
-
|
64
|
+
|
78
65
|
if (i += 1) >= 7
|
79
66
|
#puts tr.inspect()
|
80
67
|
state = UsaState.new(tr)
|
81
|
-
raise "USA state already exists: #{state.inspect
|
82
|
-
|
83
|
-
states.values.each_value
|
68
|
+
raise "USA state already exists: #{state.inspect}" if states.key?(state.code)
|
69
|
+
|
70
|
+
states.values.each_value do |v|
|
84
71
|
puts "Duplicate USA state names: #{v.name}" if v.name == state.name
|
85
72
|
end
|
86
|
-
|
73
|
+
|
87
74
|
states[state.code] = state
|
88
|
-
tr.clear
|
75
|
+
tr.clear
|
89
76
|
i = 0
|
90
77
|
end
|
91
78
|
end
|
92
|
-
|
93
|
-
states.sort_keys!
|
79
|
+
|
80
|
+
states.sort_keys!
|
94
81
|
states.save_to_file(save_filepath)
|
95
82
|
end
|
96
83
|
end
|
97
84
|
end
|
98
85
|
|
99
|
-
if $
|
86
|
+
if $PROGRAM_NAME == __FILE__
|
100
87
|
if ARGV.length < 1
|
101
|
-
puts UncleKryon::UsaStates.load_file
|
88
|
+
puts UncleKryon::UsaStates.load_file.to_s
|
102
89
|
else
|
103
90
|
UncleKryon::UsaStates.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
|
104
91
|
UncleKryon::UsaStates::DEFAULT_FILEPATH)
|