unclekryon 0.4.9.pre.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +34 -0
  3. data/Gemfile.lock +43 -0
  4. data/LICENSE +674 -0
  5. data/README.md +55 -0
  6. data/Rakefile +59 -0
  7. data/bin/unclekryon +30 -0
  8. data/iso/can_provs_terrs.yaml +54 -0
  9. data/iso/countries.yaml +3050 -0
  10. data/iso/iso.yaml +8 -0
  11. data/iso/languages.yaml +5641 -0
  12. data/iso/regions.yaml +42 -0
  13. data/iso/subregions.yaml +6 -0
  14. data/iso/usa_states.yaml +230 -0
  15. data/lib/unclekryon.rb +384 -0
  16. data/lib/unclekryon/data/album_data.rb +147 -0
  17. data/lib/unclekryon/data/artist_data.rb +109 -0
  18. data/lib/unclekryon/data/artist_data_data.rb +146 -0
  19. data/lib/unclekryon/data/aum_data.rb +75 -0
  20. data/lib/unclekryon/data/base_data.rb +79 -0
  21. data/lib/unclekryon/data/pic_data.rb +76 -0
  22. data/lib/unclekryon/data/release_data.rb +57 -0
  23. data/lib/unclekryon/data/social_data.rb +39 -0
  24. data/lib/unclekryon/data/timespan_data.rb +70 -0
  25. data/lib/unclekryon/dev_opts.rb +41 -0
  26. data/lib/unclekryon/hacker.rb +327 -0
  27. data/lib/unclekryon/iso.rb +341 -0
  28. data/lib/unclekryon/iso/base_iso.rb +196 -0
  29. data/lib/unclekryon/iso/can_prov_terr.rb +113 -0
  30. data/lib/unclekryon/iso/country.rb +133 -0
  31. data/lib/unclekryon/iso/language.rb +241 -0
  32. data/lib/unclekryon/iso/region.rb +53 -0
  33. data/lib/unclekryon/iso/subregion.rb +53 -0
  34. data/lib/unclekryon/iso/usa_state.rb +106 -0
  35. data/lib/unclekryon/jsoner.rb +124 -0
  36. data/lib/unclekryon/log.rb +111 -0
  37. data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +499 -0
  38. data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +413 -0
  39. data/lib/unclekryon/server.rb +29 -0
  40. data/lib/unclekryon/trainer.rb +231 -0
  41. data/lib/unclekryon/uploader.rb +29 -0
  42. data/lib/unclekryon/util.rb +228 -0
  43. data/lib/unclekryon/version.rb +26 -0
  44. data/unclekryon.gemspec +67 -0
  45. metadata +189 -0
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2018-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ require 'nokogiri'
27
+ require 'open-uri'
28
+
29
+ require 'unclekryon/iso/base_iso'
30
+
31
+ ##
32
+ # @see https://en.wikipedia.org/wiki/ISO_3166-2:CA
33
+ # @see https://www.iso.org/obp/ui/#iso:code:3166:CA
34
+ ##
35
+ module UncleKryon
36
+ class CanProvTerr < BaseIso
37
+ def initialize(row=nil)
38
+ super()
39
+
40
+ if row.is_a?(Array)
41
+ @name = self.class.simplify_name(row[2])
42
+ @code = self.class.simplify_code(row[1])
43
+ end
44
+ end
45
+ end
46
+
47
+ class CanProvsTerrs < BaseIsos
48
+ DEFAULT_FILEPATH = "#{DEFAULT_DIR}/can_provs_terrs.yaml"
49
+
50
+ def initialize()
51
+ super()
52
+
53
+ @id = 'CAN Provinces & Territories'
54
+ end
55
+
56
+ def self.load_file(filepath=DEFAULT_FILEPATH)
57
+ return CanProvsTerrs.new().load_file(filepath)
58
+ end
59
+
60
+ # @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML into local file
61
+ # @param save_filepath [String] local file to save YAML to
62
+ # @see https://www.iso.org/obp/ui/#iso:code:3166:CA
63
+ def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
64
+ doc = Nokogiri::HTML(open(parse_filepath),nil,'utf-8')
65
+ trs = doc.css('tr')
66
+
67
+ provs_terrs = CanProvsTerrs.new()
68
+
69
+ trs.each() do |tr|
70
+ tds = tr.css('td')
71
+
72
+ # Skip French; we just want English
73
+ next if tds[4].content.gsub(/[[:space:]]+/,' ').strip().downcase() == 'fr'
74
+
75
+ i = 0
76
+ tr = []
77
+
78
+ tds.each() do |td|
79
+ c = td.content
80
+ c.gsub!(/[[:space:]]+/,' ')
81
+ c.strip!()
82
+ tr.push(c)
83
+
84
+ if (i += 1) >= 7
85
+ #puts tr.inspect()
86
+ prov_terr = CanProvTerr.new(tr)
87
+ raise "CAN prov/terr already exists: #{prov_terr.inspect()}" if provs_terrs.key?(prov_terr.code)
88
+
89
+ provs_terrs.values.each_value() do |v|
90
+ puts "Duplicate CAN prov/terr names: #{v.name}" if v.name == prov_terr.name
91
+ end
92
+
93
+ provs_terrs[prov_terr.code] = prov_terr
94
+ tr.clear()
95
+ i = 0
96
+ end
97
+ end
98
+ end
99
+
100
+ provs_terrs.sort_keys!()
101
+ provs_terrs.save_to_file(save_filepath)
102
+ end
103
+ end
104
+ end
105
+
106
+ if $0 == __FILE__
107
+ if ARGV.length < 1
108
+ puts UncleKryon::CanProvsTerrs.load_file().to_s()
109
+ else
110
+ UncleKryon::CanProvsTerrs.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
111
+ UncleKryon::CanProvsTerrs::DEFAULT_FILEPATH)
112
+ end
113
+ end
@@ -0,0 +1,133 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2018-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ require 'nokogiri'
27
+ require 'open-uri'
28
+ require 'yaml'
29
+
30
+ require 'unclekryon/iso/base_iso'
31
+
32
+ ##
33
+ # @see https://en.wikipedia.org/wiki/ISO_3166
34
+ # @see https://en.wikipedia.org/wiki/ISO_3166-1
35
+ # @see https://en.wikipedia.org/wiki/ISO_3166-2:GB
36
+ # @see https://www.iso.org/obp/ui/#search/code/
37
+ ##
38
+ module UncleKryon
39
+ class Country < BaseIso
40
+ attr_reader :names
41
+ attr_reader :codes
42
+ attr_reader :alpha2_code
43
+ attr_reader :alpha3_code
44
+ attr_reader :region
45
+
46
+ def initialize(row=nil)
47
+ super()
48
+
49
+ @names = nil
50
+ @codes = nil
51
+ @alpha2_code = nil
52
+ @alpha3_code = nil
53
+ @region = nil
54
+
55
+ if row.is_a?(Array)
56
+ @name = self.class.simplify_name(row[0])
57
+ @alpha2_code = row[2]
58
+ @alpha3_code = row[3]
59
+
60
+ @names = @name
61
+ @code = @alpha3_code
62
+ @codes = [@alpha3_code,@alpha2_code].compact().uniq()
63
+ end
64
+ end
65
+
66
+ def to_s()
67
+ s = '['.dup()
68
+ s << %Q("#{@name}","#{@names.join(';')}")
69
+ s << %Q(,#{@code},"#{@codes.join(';')}",#{@alpha2_code},#{@alpha3_code})
70
+ s << %Q(,#{@region})
71
+ s << ']'
72
+
73
+ return s
74
+ end
75
+ end
76
+
77
+ class Countries < BaseIsos
78
+ DEFAULT_FILEPATH = "#{DEFAULT_DIR}/countries.yaml"
79
+
80
+ def initialize()
81
+ super()
82
+ end
83
+
84
+ def self.load_file(filepath=DEFAULT_FILEPATH)
85
+ return Countries.new().load_file(filepath)
86
+ end
87
+
88
+ # @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML into local file
89
+ # @param save_filepath [String] local file to save YAML to
90
+ # @see https://www.iso.org/obp/ui/#search/code/
91
+ def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
92
+ doc = Nokogiri::HTML(open(parse_filepath),nil,'utf-8')
93
+ tds = doc.css('td')
94
+
95
+ countries = Countries.new()
96
+ i = 0
97
+ tr = []
98
+
99
+ tds.each do |td|
100
+ c = td.content
101
+ c.gsub!(/[[:space:]]+/,' ')
102
+ c.strip!()
103
+ tr.push(c)
104
+
105
+ if (i += 1) >= 5
106
+ #puts tr.inspect()
107
+ country = Country.new(tr)
108
+ raise "Country already exists: #{country.inspect()}" if countries.key?(country.code)
109
+
110
+ countries.values.each_value() do |v|
111
+ puts "Duplicate country names: #{v.name}" if v.name == country.name
112
+ end
113
+
114
+ countries[country.code] = country
115
+ tr.clear()
116
+ i = 0
117
+ end
118
+ end
119
+
120
+ countries.sort_keys!()
121
+ countries.save_to_file(save_filepath)
122
+ end
123
+ end
124
+ end
125
+
126
+ if $0 == __FILE__
127
+ if ARGV.length < 1
128
+ puts UncleKryon::Countries.load_file().to_s()
129
+ else
130
+ UncleKryon::Countries.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
131
+ UncleKryon::Countries::DEFAULT_FILEPATH)
132
+ end
133
+ end
@@ -0,0 +1,241 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2018-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ require 'nokogiri'
27
+ require 'open-uri'
28
+ require 'yaml'
29
+
30
+ require 'unclekryon/dev_opts'
31
+ require 'unclekryon/log'
32
+
33
+ require 'unclekryon/iso/base_iso'
34
+
35
+ ##
36
+ # @see https://en.wikipedia.org/wiki/ISO_639
37
+ # @see http://www.loc.gov/standards/iso639-2/php/code_list.php
38
+ # @see http://www.iana.org/assignments/language-subtag-registry/language-subtag-registry
39
+ ##
40
+ module UncleKryon
41
+ class Language < BaseIso
42
+ attr_reader :names
43
+ attr_reader :codes
44
+ attr_reader :alpha2_code
45
+ attr_reader :alpha3_code
46
+ attr_reader :alpha3_code_b
47
+
48
+ def initialize(row=nil)
49
+ super()
50
+
51
+ @names = nil
52
+ @codes = nil
53
+ @alpha2_code = nil
54
+ @alpha3_code = nil
55
+ @alpha3_code_b = nil
56
+
57
+ if row.is_a?(Array)
58
+ @names = row[2].split(';').compact().uniq().map(&self.class.method(:fix_name))
59
+ @alpha2_code = row[1].empty?() ? nil : row[1]
60
+ @alpha3_code = row[0].split(/[[:space:]]*[\(\)][[:space:]]*/)
61
+
62
+ if @alpha3_code.length <= 1
63
+ @alpha3_code = row[0]
64
+ else
65
+ prev_was_tag = true
66
+
67
+ @alpha3_code.each_with_index() do |c,i|
68
+ c.strip!()
69
+ c_up = c.upcase()
70
+
71
+ if c_up == 'B' || c_up == 'T'
72
+ if prev_was_tag
73
+ raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
74
+ end
75
+
76
+ case c_up
77
+ when 'B'
78
+ raise "Multiple alpha3_code_b: #{@alpha3_code}" unless @alpha3_code_b.nil?()
79
+ @alpha3_code_b = @alpha3_code[i - 1]
80
+ when 'T'
81
+ raise "Multiple alpha3_code (T): #{@alpha3_code}" unless @alpha3_code.is_a?(Array)
82
+ @alpha3_code = @alpha3_code[i - 1]
83
+ end
84
+
85
+ prev_was_tag = true
86
+ else
87
+ prev_was_tag = false
88
+ end
89
+ end
90
+
91
+ # Wasn't set in the above loop?
92
+ if @alpha3_code.is_a?(Array)
93
+ raise "Invalid alpha-3 code for: #{@names},#{@alpha2_code},#{@alpha3_code}"
94
+ end
95
+ end
96
+
97
+ @name = @names[0]
98
+ @names = @names
99
+ @code = @alpha3_code
100
+ @codes = [@alpha3_code,@alpha3_code_b,@alpha2_code].compact().uniq()
101
+ end
102
+ end
103
+
104
+ # @see Languages.parse_and_save_to_file(...)
105
+ def ==(lang)
106
+ return super(lang) &&
107
+ @names == lang.names &&
108
+ @codes == lang.codes &&
109
+ @alpha2_code == lang.alpha2_code &&
110
+ @alpha3_code == lang.alpha3_code &&
111
+ @alpha3_code_b == lang.alpha3_code_b
112
+ end
113
+
114
+ def to_s()
115
+ s = '['.dup()
116
+ s << %Q("#{@name}","#{@names.join(';')}",)
117
+ s << %Q(#{@code},"#{@codes.join(';')}",)
118
+ s << %Q(#{@alpha2_code},#{@alpha3_code},#{@alpha3_code_b})
119
+ s << ']'
120
+
121
+ return s
122
+ end
123
+ end
124
+
125
+ class Languages < BaseIsos
126
+ DEFAULT_FILEPATH = "#{DEFAULT_DIR}/languages.yaml"
127
+
128
+ def initialize()
129
+ super()
130
+ end
131
+
132
+ def find_by_kryon(text,add_english: false,**options)
133
+ langs = []
134
+ regexes = [
135
+ /[[:space:]]*[\/\+][[:space:]]*/, # Multiple languages are usually separated by '/'
136
+ /[[:space:]]+/ # Sometimes separated by space/newline
137
+ ]
138
+
139
+ regexes.each_with_index() do |regex,i|
140
+ text.split(regex).each() do |t|
141
+ # Fix misspellings and/or weird shortenings
142
+ t = t.clone()
143
+ t.gsub!(/\AFRENC\z/i,'French')
144
+ t.gsub!(/[\+\*]+/,'') # Means more languages, but won't worry about it (since not listed)
145
+ t.gsub!(/\ASPAN\z/i,'Spanish')
146
+ t.gsub!(/\AENGLSH\z/i,'English')
147
+ t.gsub!(/\AHUNGARY\z/i,'Hungarian')
148
+
149
+ lang = find(t)
150
+
151
+ if lang.nil?()
152
+ if i >= (regexes.length() - 1)
153
+ msg = "No language found for: #{t}"
154
+
155
+ if DevOpts.instance.dev?()
156
+ raise msg
157
+ else
158
+ log.warn(msg)
159
+ end
160
+ else
161
+ log.warn("Not a language; trying next regex: #{t}")
162
+
163
+ # Try next regex
164
+ langs.clear()
165
+ next
166
+ end
167
+ else
168
+ langs.push(lang.code)
169
+ end
170
+ end
171
+
172
+ # No problem with this regex, so bail out
173
+ break
174
+ end
175
+
176
+ eng_code = find_by_code('eng').code
177
+
178
+ if add_english && !langs.include?(eng_code)
179
+ langs.push(eng_code)
180
+ end
181
+
182
+ return langs.empty?() ? nil : langs
183
+ end
184
+
185
+ def self.load_file(filepath=DEFAULT_FILEPATH)
186
+ return Languages.new().load_file(filepath)
187
+ end
188
+
189
+ # @param parse_filepath [String] use web browser's developer tools to copy & paste table HTML into local file
190
+ # @param save_filepath [String] local file to save YAML to
191
+ # @see http://www.loc.gov/standards/iso639-2/php/code_list.php
192
+ def self.parse_and_save_to_file(parse_filepath,save_filepath=DEFAULT_FILEPATH)
193
+ doc = Nokogiri::HTML(open(parse_filepath),nil,'utf-8')
194
+ tds = doc.css('td')
195
+
196
+ langs = Languages.new()
197
+ i = 0
198
+ tr = []
199
+
200
+ tds.each do |td|
201
+ c = td.content
202
+ c.gsub!(/[[:space:]]+/,' ')
203
+ c.strip!()
204
+ tr.push(c)
205
+
206
+ if (i += 1) >= 5
207
+ #puts tr.inspect()
208
+
209
+ add_it = true
210
+ lang = Language.new(tr)
211
+
212
+ if langs.key?(lang.code)
213
+ # There were so many duplicates, so added comparison check
214
+ raise "Language already exists: #{lang.inspect()}" if lang != langs[lang.code]
215
+ add_it = false
216
+ else
217
+ langs.values.each_value() do |v|
218
+ puts "Duplicate lang names: #{v.name}" if v.name == lang.name
219
+ end
220
+ end
221
+
222
+ langs[lang.code] = lang if add_it
223
+ tr.clear()
224
+ i = 0
225
+ end
226
+ end
227
+
228
+ langs.sort_keys!()
229
+ langs.save_to_file(save_filepath)
230
+ end
231
+ end
232
+ end
233
+
234
+ if $0 == __FILE__
235
+ if ARGV.length < 1
236
+ puts UncleKryon::Languages.load_file().to_s()
237
+ else
238
+ UncleKryon::Languages.parse_and_save_to_file(ARGV[0],(ARGV.length >= 2) ? ARGV[1] :
239
+ UncleKryon::Languages::DEFAULT_FILEPATH)
240
+ end
241
+ end