unclekryon 0.4.9.pre.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +34 -0
  3. data/Gemfile.lock +43 -0
  4. data/LICENSE +674 -0
  5. data/README.md +55 -0
  6. data/Rakefile +59 -0
  7. data/bin/unclekryon +30 -0
  8. data/iso/can_provs_terrs.yaml +54 -0
  9. data/iso/countries.yaml +3050 -0
  10. data/iso/iso.yaml +8 -0
  11. data/iso/languages.yaml +5641 -0
  12. data/iso/regions.yaml +42 -0
  13. data/iso/subregions.yaml +6 -0
  14. data/iso/usa_states.yaml +230 -0
  15. data/lib/unclekryon.rb +384 -0
  16. data/lib/unclekryon/data/album_data.rb +147 -0
  17. data/lib/unclekryon/data/artist_data.rb +109 -0
  18. data/lib/unclekryon/data/artist_data_data.rb +146 -0
  19. data/lib/unclekryon/data/aum_data.rb +75 -0
  20. data/lib/unclekryon/data/base_data.rb +79 -0
  21. data/lib/unclekryon/data/pic_data.rb +76 -0
  22. data/lib/unclekryon/data/release_data.rb +57 -0
  23. data/lib/unclekryon/data/social_data.rb +39 -0
  24. data/lib/unclekryon/data/timespan_data.rb +70 -0
  25. data/lib/unclekryon/dev_opts.rb +41 -0
  26. data/lib/unclekryon/hacker.rb +327 -0
  27. data/lib/unclekryon/iso.rb +341 -0
  28. data/lib/unclekryon/iso/base_iso.rb +196 -0
  29. data/lib/unclekryon/iso/can_prov_terr.rb +113 -0
  30. data/lib/unclekryon/iso/country.rb +133 -0
  31. data/lib/unclekryon/iso/language.rb +241 -0
  32. data/lib/unclekryon/iso/region.rb +53 -0
  33. data/lib/unclekryon/iso/subregion.rb +53 -0
  34. data/lib/unclekryon/iso/usa_state.rb +106 -0
  35. data/lib/unclekryon/jsoner.rb +124 -0
  36. data/lib/unclekryon/log.rb +111 -0
  37. data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +499 -0
  38. data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +413 -0
  39. data/lib/unclekryon/server.rb +29 -0
  40. data/lib/unclekryon/trainer.rb +231 -0
  41. data/lib/unclekryon/uploader.rb +29 -0
  42. data/lib/unclekryon/util.rb +228 -0
  43. data/lib/unclekryon/version.rb +26 -0
  44. data/unclekryon.gemspec +67 -0
  45. metadata +189 -0
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2018-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ require 'nokogiri'
27
+ require 'open-uri'
28
+
29
+ require 'unclekryon/iso/base_iso'
30
+
31
+ ##
32
+ # @see https://en.wikipedia.org/wiki/ISO_3166-2:CA
33
+ # @see https://www.iso.org/obp/ui/#iso:code:3166:CA
34
+ ##
35
+ module UncleKryon
36
+ class CanProvTerr < BaseIso
37
+ def initialize(row=nil)
38
+ super()
39
+
40
+ if row.is_a?(Array)
41
+ @name = self.class.simplify_name(row[2])
42
+ @code = self.class.simplify_code(row[1])
43
+ end
44
+ end
45
+ end
46
+
47
+ class CanProvsTerrs < BaseIsos
48
+ DEFAULT_FILEPATH = "#{DEFAULT_DIR}/can_provs_terrs.yaml"
49
+
50
+ def initialize()
51
+ super()
52
+
53
+ @id = 'CAN Provinces & Territories'
54
+ end
55
+
56
+ def self.load_file(filepath=DEFAULT_FILEPATH)
57
+ return CanProvsTerrs.new().load_file(filepath)
58
+ end
59
+
60
+ # @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML into local file
61
+ # @param save_filepath [String] local file to save YAML to
62
+ # @see https://www.iso.org/obp/ui/#iso:code:3166:CA
63
+ def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
64
+ doc = Nokogiri::HTML(open(parse_filepath),nil,'utf-8')
65
+ trs = doc.css('tr')
66
+
67
+ provs_terrs = CanProvsTerrs.new()
68
+
69
+ trs.each() do |tr|
70
+ tds = tr.css('td')
71
+
72
+ # Skip French; we just want English
73
+ next if tds[4].content.gsub(/[[:space:]]+/,' ').strip().downcase() == 'fr'
74
+
75
+ i = 0
76
+ tr = []
77
+
78
+ tds.each() do |td|
79
+ c = td.content
80
+ c.gsub!(/[[:space:]]+/,' ')
81
+ c.strip!()
82
+ tr.push(c)
83
+
84
+ if (i += 1) >= 7
85
+ #puts tr.inspect()
86
+ prov_terr = CanProvTerr.new(tr)
87
+ raise "CAN prov/terr already exists: #{prov_terr.inspect()}" if provs_terrs.key?(prov_terr.code)
88
+
89
+ provs_terrs.values.each_value() do |v|
90
+ puts "Duplicate CAN prov/terr names: #{v.name}" if v.name == prov_terr.name
91
+ end
92
+
93
+ provs_terrs[prov_terr.code] = prov_terr
94
+ tr.clear()
95
+ i = 0
96
+ end
97
+ end
98
+ end
99
+
100
+ provs_terrs.sort_keys!()
101
+ provs_terrs.save_to_file(save_filepath)
102
+ end
103
+ end
104
+ end
105
+
106
+ if $0 == __FILE__
107
+ if ARGV.length < 1
108
+ puts UncleKryon::CanProvsTerrs.load_file().to_s()
109
+ else
110
+ UncleKryon::CanProvsTerrs.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
111
+ UncleKryon::CanProvsTerrs::DEFAULT_FILEPATH)
112
+ end
113
+ end
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2018-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ require 'nokogiri'
27
+ require 'open-uri'
28
+ require 'yaml'
29
+
30
+ require 'unclekryon/iso/base_iso'
31
+
32
+ ##
33
+ # @see https://en.wikipedia.org/wiki/ISO_3166
34
+ # @see https://en.wikipedia.org/wiki/ISO_3166-1
35
+ # @see https://en.wikipedia.org/wiki/ISO_3166-2:GB
36
+ # @see https://www.iso.org/obp/ui/#search/code/
37
+ ##
38
+ module UncleKryon
39
+ class Country < BaseIso
40
+ attr_reader :names
41
+ attr_reader :codes
42
+ attr_reader :alpha2_code
43
+ attr_reader :alpha3_code
44
+ attr_reader :region
45
+
46
+ def initialize(row=nil)
47
+ super()
48
+
49
+ @names = nil
50
+ @codes = nil
51
+ @alpha2_code = nil
52
+ @alpha3_code = nil
53
+ @region = nil
54
+
55
+ if row.is_a?(Array)
56
+ @name = self.class.simplify_name(row[0])
57
+ @alpha2_code = row[2]
58
+ @alpha3_code = row[3]
59
+
60
+ @names = @name
61
+ @code = @alpha3_code
62
+ @codes = [@alpha3_code,@alpha2_code].compact().uniq()
63
+ end
64
+ end
65
+
66
+ def to_s()
67
+ s = '['.dup()
68
+ s << %Q("#{@name}","#{@names.join(';')}")
69
+ s << %Q(,#{@code},"#{@codes.join(';')}",#{@alpha2_code},#{@alpha3_code})
70
+ s << %Q(,#{@region})
71
+ s << ']'
72
+
73
+ return s
74
+ end
75
+ end
76
+
77
+ class Countries < BaseIsos
78
+ DEFAULT_FILEPATH = "#{DEFAULT_DIR}/countries.yaml"
79
+
80
+ def initialize()
81
+ super()
82
+ end
83
+
84
+ def self.load_file(filepath=DEFAULT_FILEPATH)
85
+ return Countries.new().load_file(filepath)
86
+ end
87
+
88
+ # @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML into local file
89
+ # @param save_filepath [String] local file to save YAML to
90
+ # @see https://www.iso.org/obp/ui/#search/code/
91
+ def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
92
+ doc = Nokogiri::HTML(open(parse_filepath),nil,'utf-8')
93
+ tds = doc.css('td')
94
+
95
+ countries = Countries.new()
96
+ i = 0
97
+ tr = []
98
+
99
+ tds.each do |td|
100
+ c = td.content
101
+ c.gsub!(/[[:space:]]+/,' ')
102
+ c.strip!()
103
+ tr.push(c)
104
+
105
+ if (i += 1) >= 5
106
+ #puts tr.inspect()
107
+ country = Country.new(tr)
108
+ raise "Country already exists: #{country.inspect()}" if countries.key?(country.code)
109
+
110
+ countries.values.each_value() do |v|
111
+ puts "Duplicate country names: #{v.name}" if v.name == country.name
112
+ end
113
+
114
+ countries[country.code] = country
115
+ tr.clear()
116
+ i = 0
117
+ end
118
+ end
119
+
120
+ countries.sort_keys!()
121
+ countries.save_to_file(save_filepath)
122
+ end
123
+ end
124
+ end
125
+
126
+ if $0 == __FILE__
127
+ if ARGV.length < 1
128
+ puts UncleKryon::Countries.load_file().to_s()
129
+ else
130
+ UncleKryon::Countries.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
131
+ UncleKryon::Countries::DEFAULT_FILEPATH)
132
+ end
133
+ end
@@ -0,0 +1,241 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2018-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ require 'nokogiri'
27
+ require 'open-uri'
28
+ require 'yaml'
29
+
30
+ require 'unclekryon/dev_opts'
31
+ require 'unclekryon/log'
32
+
33
+ require 'unclekryon/iso/base_iso'
34
+
35
+ ##
36
+ # @see https://en.wikipedia.org/wiki/ISO_639
37
+ # @see http://www.loc.gov/standards/iso639-2/php/code_list.php
38
+ # @see http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
39
+ ##
40
+ module UncleKryon
41
+ class Language < BaseIso
42
+ attr_reader :names
43
+ attr_reader :codes
44
+ attr_reader :alpha2_code
45
+ attr_reader :alpha3_code
46
+ attr_reader :alpha3_code_b
47
+
48
+ def initialize(row=nil)
49
+ super()
50
+
51
+ @names = nil
52
+ @codes = nil
53
+ @alpha2_code = nil
54
+ @alpha3_code = nil
55
+ @alpha3_code_b = nil
56
+
57
+ if row.is_a?(Array)
58
+ @names = row[2].split(';').compact().uniq().map(&self.class.method(:fix_name))
59
+ @alpha2_code = row[1].empty?() ? nil : row[1]
60
+ @alpha3_code = row[0].split(/[[:space:]]*[\(\)][[:space:]]*/)
61
+
62
+ if @alpha3_code.length <= 1
63
+ @alpha3_code = row[0]
64
+ else
65
+ prev_was_tag = true
66
+
67
+ @alpha3_code.each_with_index() do |c,i|
68
+ c.strip!()
69
+ c_up = c.upcase()
70
+
71
+ if c_up == 'B' || c_up == 'T'
72
+ if prev_was_tag
73
+ raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
74
+ end
75
+
76
+ case c_up
77
+ when 'B'
78
+ raise "Multiple alpha3_code_b: #{@alpha3_code}" unless @alpha3_code_b.nil?()
79
+ @alpha3_code_b = @alpha3_code[i - 1]
80
+ when 'T'
81
+ raise "Multiple alpha3_code (T): #{@alpha3_code}" unless @alpha3_code.is_a?(Array)
82
+ @alpha3_code = @alpha3_code[i - 1]
83
+ end
84
+
85
+ prev_was_tag = true
86
+ else
87
+ prev_was_tag = false
88
+ end
89
+ end
90
+
91
+ # Wasn't set in the above loop?
92
+ if @alpha3_code.is_a?(Array)
93
+ raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
94
+ end
95
+ end
96
+
97
+ @name = @names[0]
98
+ @names = @names
99
+ @code = @alpha3_code
100
+ @codes = [@alpha3_code,@alpha3_code_b,@alpha2_code].compact().uniq()
101
+ end
102
+ end
103
+
104
+ # @see Languages.parse_and_save_to_file(...)
105
+ def ==(lang)
106
+ return super(lang) &&
107
+ @names == lang.names &&
108
+ @codes == lang.codes &&
109
+ @alpha2_code == lang.alpha2_code &&
110
+ @alpha3_code == lang.alpha3_code &&
111
+ @alpha3_code_b == lang.alpha3_code_b
112
+ end
113
+
114
+ def to_s()
115
+ s = '['.dup()
116
+ s << %Q("#{@name}","#{@names.join(';')}",)
117
+ s << %Q(#{@code},"#{@codes.join(';')}",)
118
+ s << %Q(#{@alpha2_code},#{@alpha3_code},#{@alpha3_code_b})
119
+ s << ']'
120
+
121
+ return s
122
+ end
123
+ end
124
+
125
+ class Languages < BaseIsos
126
+ DEFAULT_FILEPATH = "#{DEFAULT_DIR}/languages.yaml"
127
+
128
+ def initialize()
129
+ super()
130
+ end
131
+
132
+ def find_by_kryon(text,add_english: false,**options)
133
+ langs = []
134
+ regexes = [
135
+ /[[:space:]]*[\/\+][[:space:]]*/, # Multiple languages are usually separated by '/'
136
+ /[[:space:]]+/ # Sometimes separated by space/newline
137
+ ]
138
+
139
+ regexes.each_with_index() do |regex,i|
140
+ text.split(regex).each() do |t|
141
+ # Fix misspellings and/or weird shortenings
142
+ t = t.clone()
143
+ t.gsub!(/\AFRENC\z/i,'French')
144
+ t.gsub!(/[\+\*]+/,'') # Means more languages, but won't worry about it (since not listed)
145
+ t.gsub!(/\ASPAN\z/i,'Spanish')
146
+ t.gsub!(/\AENGLSH\z/i,'English')
147
+ t.gsub!(/\AHUNGARY\z/i,'Hungarian')
148
+
149
+ lang = find(t)
150
+
151
+ if lang.nil?()
152
+ if i >= (regexes.length() - 1)
153
+ msg = "No language found for: #{t}"
154
+
155
+ if DevOpts.instance.dev?()
156
+ raise msg
157
+ else
158
+ log.warn(msg)
159
+ end
160
+ else
161
+ log.warn("Not a language; trying next regex: #{t}")
162
+
163
+ # Try next regex
164
+ langs.clear()
165
+ next
166
+ end
167
+ else
168
+ langs.push(lang.code)
169
+ end
170
+ end
171
+
172
+ # No problem with this regex, so bail out
173
+ break
174
+ end
175
+
176
+ eng_code = find_by_code('eng').code
177
+
178
+ if add_english && !langs.include?(eng_code)
179
+ langs.push(eng_code)
180
+ end
181
+
182
+ return langs.empty?() ? nil : langs
183
+ end
184
+
185
+ def self.load_file(filepath=DEFAULT_FILEPATH)
186
+ return Languages.new().load_file(filepath)
187
+ end
188
+
189
+ # @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML into local file
190
+ # @param save_filepath [String] local file to save YAML to
191
+ # @see http://www.loc.gov/standards/iso639-2/php/code_list.php
192
+ def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
193
+ doc = Nokogiri::HTML(open(parse_filepath),nil,'utf-8')
194
+ tds = doc.css('td')
195
+
196
+ langs = Languages.new()
197
+ i = 0
198
+ tr = []
199
+
200
+ tds.each do |td|
201
+ c = td.content
202
+ c.gsub!(/[[:space:]]+/,' ')
203
+ c.strip!()
204
+ tr.push(c)
205
+
206
+ if (i += 1) >= 5
207
+ #puts tr.inspect()
208
+
209
+ add_it = true
210
+ lang = Language.new(tr)
211
+
212
+ if langs.key?(lang.code)
213
+ # There were so many duplicates, so added comparison check
214
+ raise "Language already exists: #{lang.inspect()}" if lang != langs[lang.code]
215
+ add_it = false
216
+ else
217
+ langs.values.each_value() do |v|
218
+ puts "Duplicate lang names: #{v.name}" if v.name == lang.name
219
+ end
220
+ end
221
+
222
+ langs[lang.code] = lang if add_it
223
+ tr.clear()
224
+ i = 0
225
+ end
226
+ end
227
+
228
+ langs.sort_keys!()
229
+ langs.save_to_file(save_filepath)
230
+ end
231
+ end
232
+ end
233
+
234
+ if $0 == __FILE__
235
+ if ARGV.length < 1
236
+ puts UncleKryon::Languages.load_file().to_s()
237
+ else
238
+ UncleKryon::Languages.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
239
+ UncleKryon::Languages::DEFAULT_FILEPATH)
240
+ end
241
+ end