unclekryon 0.4.9.pre.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +34 -0
- data/Gemfile.lock +43 -0
- data/LICENSE +674 -0
- data/README.md +55 -0
- data/Rakefile +59 -0
- data/bin/unclekryon +30 -0
- data/iso/can_provs_terrs.yaml +54 -0
- data/iso/countries.yaml +3050 -0
- data/iso/iso.yaml +8 -0
- data/iso/languages.yaml +5641 -0
- data/iso/regions.yaml +42 -0
- data/iso/subregions.yaml +6 -0
- data/iso/usa_states.yaml +230 -0
- data/lib/unclekryon.rb +384 -0
- data/lib/unclekryon/data/album_data.rb +147 -0
- data/lib/unclekryon/data/artist_data.rb +109 -0
- data/lib/unclekryon/data/artist_data_data.rb +146 -0
- data/lib/unclekryon/data/aum_data.rb +75 -0
- data/lib/unclekryon/data/base_data.rb +79 -0
- data/lib/unclekryon/data/pic_data.rb +76 -0
- data/lib/unclekryon/data/release_data.rb +57 -0
- data/lib/unclekryon/data/social_data.rb +39 -0
- data/lib/unclekryon/data/timespan_data.rb +70 -0
- data/lib/unclekryon/dev_opts.rb +41 -0
- data/lib/unclekryon/hacker.rb +327 -0
- data/lib/unclekryon/iso.rb +341 -0
- data/lib/unclekryon/iso/base_iso.rb +196 -0
- data/lib/unclekryon/iso/can_prov_terr.rb +113 -0
- data/lib/unclekryon/iso/country.rb +133 -0
- data/lib/unclekryon/iso/language.rb +241 -0
- data/lib/unclekryon/iso/region.rb +53 -0
- data/lib/unclekryon/iso/subregion.rb +53 -0
- data/lib/unclekryon/iso/usa_state.rb +106 -0
- data/lib/unclekryon/jsoner.rb +124 -0
- data/lib/unclekryon/log.rb +111 -0
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +499 -0
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +413 -0
- data/lib/unclekryon/server.rb +29 -0
- data/lib/unclekryon/trainer.rb +231 -0
- data/lib/unclekryon/uploader.rb +29 -0
- data/lib/unclekryon/util.rb +228 -0
- data/lib/unclekryon/version.rb +26 -0
- data/unclekryon.gemspec +67 -0
- metadata +189 -0
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
#--
|
|
6
|
+
# This file is part of UncleKryon-server.
|
|
7
|
+
# Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
|
|
8
|
+
#
|
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
12
|
+
# (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17
|
+
# GNU General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the GNU General Public License
|
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
|
21
|
+
#++
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
require 'date'
|
|
25
|
+
require 'nokogiri'
|
|
26
|
+
require 'open-uri'
|
|
27
|
+
|
|
28
|
+
require 'unclekryon/iso'
|
|
29
|
+
require 'unclekryon/log'
|
|
30
|
+
require 'unclekryon/trainer'
|
|
31
|
+
require 'unclekryon/util'
|
|
32
|
+
|
|
33
|
+
require 'unclekryon/data/album_data'
|
|
34
|
+
require 'unclekryon/data/artist_data_data'
|
|
35
|
+
require 'unclekryon/data/release_data'
|
|
36
|
+
|
|
37
|
+
module UncleKryon
|
|
38
|
+
class KryonAumYearParser
|
|
39
|
+
include Logging
|
|
40
|
+
|
|
41
|
+
attr_accessor :artist
|
|
42
|
+
attr_accessor :exclude_album
|
|
43
|
+
attr_accessor :release
|
|
44
|
+
attr_accessor :title
|
|
45
|
+
attr_accessor :trainers
|
|
46
|
+
attr_accessor :training
|
|
47
|
+
attr_reader :updated_on
|
|
48
|
+
attr_accessor :url
|
|
49
|
+
|
|
50
|
+
alias_method :training?,:training
|
|
51
|
+
|
|
52
|
+
def initialize(title=nil,url=nil,artist=ArtistDataData.new(),training: false,train_filepath: nil,
|
|
53
|
+
updated_on: nil,**options)
|
|
54
|
+
@artist = artist
|
|
55
|
+
@exclude_album = false
|
|
56
|
+
@title = title
|
|
57
|
+
@trainers = Trainers.new(train_filepath)
|
|
58
|
+
@training = training
|
|
59
|
+
@updated_on = Util.format_datetime(DateTime.now()) if Util.empty_s?(updated_on)
|
|
60
|
+
@url = Util.empty_s?(url) ? self.class.get_kryon_year_url(title) : url
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def self.parse_kryon_date(date,year=nil)
|
|
64
|
+
# Don't modify args and clean them up so can use /\s/ instead of /[[:space:]]/
|
|
65
|
+
date = Util.clean_data(date.clone())
|
|
66
|
+
year = Util.clean_data(year.clone())
|
|
67
|
+
|
|
68
|
+
# Fix misspellings and/or weird shortenings
|
|
69
|
+
date.gsub!(/Feburary/i,'February') # "Feburary 2-13, 2017"
|
|
70
|
+
date.gsub!(/SEPT(\s+|\-)/i,'Sep\1') # "SEPT 29 - OCT 9, 2017", "Sept-Oct 2015"
|
|
71
|
+
date.gsub!(/Septembe\s+/i,'September ') # "Septembe 4, 2016"
|
|
72
|
+
date.gsub!(/Ocotber/i,'October') # "Ocotber 10, 2015"
|
|
73
|
+
|
|
74
|
+
comma = date.include?(',') ? ',' : '' # "May 6 2017"
|
|
75
|
+
r = Array.new(2)
|
|
76
|
+
|
|
77
|
+
begin
|
|
78
|
+
if date.include?('-')
|
|
79
|
+
# "Sept-Oct 2015"
|
|
80
|
+
if date =~ /\A[[:alpha:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
|
|
81
|
+
r[1] = Date.strptime(date,'%b-%b %Y')
|
|
82
|
+
r[0] = Date.strptime(date,'%b')
|
|
83
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
|
84
|
+
# "4/28/12 - 4/29/12"
|
|
85
|
+
elsif date =~ /\A[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\-/
|
|
86
|
+
date = date.split(/\s*-\s*/)
|
|
87
|
+
|
|
88
|
+
r[0] = Date.strptime(date[0],'%m/%d/%y')
|
|
89
|
+
r[1] = Date.strptime(date[1],'%m/%d/%y')
|
|
90
|
+
# "10-17 to 11-18, 2012"
|
|
91
|
+
elsif date =~ /\A[[:digit:]]+\s*\-\s*[[:digit:]]+\s+to\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\s*,\s*[[:digit:]]+\z/i
|
|
92
|
+
date = date.split(/\s*to\s*/i)
|
|
93
|
+
|
|
94
|
+
r[1] = Date.strptime(date[1],'%m-%d, %Y')
|
|
95
|
+
r[0] = Date.strptime(date[0],'%m-%d')
|
|
96
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
|
97
|
+
else
|
|
98
|
+
# "SEPT 29 - OCT 9, 2017", "May 31-June 1, 2014"
|
|
99
|
+
if date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+[\,\s]+[[:digit:]]+\z/
|
|
100
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
|
101
|
+
r1f = "%B %d-%B %d#{comma} %Y"
|
|
102
|
+
# "OCT 25 - NOV 3" (2014)
|
|
103
|
+
elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
|
|
104
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
|
105
|
+
r1f = '%B %d-%B %d'
|
|
106
|
+
|
|
107
|
+
if !year.nil?()
|
|
108
|
+
date << ", #{year}"
|
|
109
|
+
r1f << ", %Y"
|
|
110
|
+
end
|
|
111
|
+
# "December 12-13"
|
|
112
|
+
elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
|
|
113
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
|
114
|
+
|
|
115
|
+
# "September 16 - 2018"
|
|
116
|
+
if date =~ /-[[:digit:]]{4}\z/
|
|
117
|
+
r1f = '%B %d-%Y'
|
|
118
|
+
else
|
|
119
|
+
r1f = '%B %d-%d'.dup()
|
|
120
|
+
|
|
121
|
+
if !year.nil?()
|
|
122
|
+
date << ", #{year}"
|
|
123
|
+
r1f << ', %Y'
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
# "June 30-July 1-2018"
|
|
127
|
+
elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
|
|
128
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
|
129
|
+
r1f = '%B %d-%B %d-%Y'
|
|
130
|
+
# "September 7 & 9-2018"
|
|
131
|
+
elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s+\&\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
|
|
132
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
|
133
|
+
r1f = '%B %d & %d-%Y'
|
|
134
|
+
else
|
|
135
|
+
# "OCT 27 - 28 - 29, 2017"; remove spaces around dashes
|
|
136
|
+
date.gsub!(/\s+\-\s+/,'-')
|
|
137
|
+
|
|
138
|
+
# "June 7-9-16-17" & "June 9-10-11-12"
|
|
139
|
+
if date =~ /\A[[:alpha:]]+\s*[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\z/
|
|
140
|
+
r1f = "%B %d-%d-%d-%d"
|
|
141
|
+
|
|
142
|
+
if !year.nil?()
|
|
143
|
+
date << ", #{year}"
|
|
144
|
+
r1f << ", %Y"
|
|
145
|
+
end
|
|
146
|
+
else
|
|
147
|
+
# "MAY 15-16-17, 2017" and "January 7-8, 2017"
|
|
148
|
+
r1f = (date =~ /\-.*\-/) ? "%B %d-%d-%d#{comma} %Y" : "%B %d-%d#{comma} %Y"
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
r[1] = Date.strptime(date,r1f)
|
|
153
|
+
r[0] = Date.strptime(date,'%B %d')
|
|
154
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
|
155
|
+
end
|
|
156
|
+
elsif date.include?('/')
|
|
157
|
+
# "1/7/2012"
|
|
158
|
+
if date =~ /\A[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\/\s*[[:digit:]]+\z/
|
|
159
|
+
date = date.gsub(/\s+/,'')
|
|
160
|
+
|
|
161
|
+
r[0] = Date.strptime(date,'%m/%d/%Y')
|
|
162
|
+
r[1] = nil
|
|
163
|
+
else
|
|
164
|
+
# "JULY/AUG 2017"
|
|
165
|
+
r[1] = Date.strptime(date,'%b/%b %Y')
|
|
166
|
+
r[0] = Date.strptime(date,'%b')
|
|
167
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
|
168
|
+
end
|
|
169
|
+
else
|
|
170
|
+
# "April 11, 12, 2015"
|
|
171
|
+
if date =~ /\A[[:alpha:]]+\s*[[:digit:]]+\s*,\s*[[:digit:]]+\s*,\s*[[:digit:]]+\z/
|
|
172
|
+
r[1] = Date.strptime(date,'%B %d, %d, %Y')
|
|
173
|
+
r[0] = Date.strptime(date,'%B %d')
|
|
174
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
|
175
|
+
# "March, 2014"
|
|
176
|
+
elsif date =~ /\A[[:alpha:]]+\s*,\s*[[:digit:]]+\z/
|
|
177
|
+
r[0] = Date.strptime(date,'%B, %Y')
|
|
178
|
+
r[1] = nil
|
|
179
|
+
else
|
|
180
|
+
r[0] = Date.strptime(date,"%B %d#{comma} %Y")
|
|
181
|
+
r[1] = nil
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
rescue ArgumentError => e
|
|
185
|
+
Log.instance.fatal("Invalid Date: '#{date}'",error: e)
|
|
186
|
+
raise
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
r[0] = (!r[0].nil?) ? Util.format_date(r[0]) : ''
|
|
190
|
+
r[1] = (!r[1].nil?) ? Util.format_date(r[1]) : ''
|
|
191
|
+
|
|
192
|
+
return r
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
def parse_site(title=nil,url=nil,artist=nil)
|
|
196
|
+
@artist = artist unless artist.nil?()
|
|
197
|
+
@title = title unless title.nil?()
|
|
198
|
+
|
|
199
|
+
@url = Util.empty_s?(url) ? self.class.get_kryon_year_url(@title) : url
|
|
200
|
+
|
|
201
|
+
raise ArgumentError,"Artist cannot be nil" if @artist.nil?()
|
|
202
|
+
raise ArgumentError,"Title cannot be empty" if @title.nil?() || (@title = @title.strip()).empty?()
|
|
203
|
+
raise ArgumentError,"URL cannot be empty" if @url.nil?() || (@url = @url.strip()).empty?()
|
|
204
|
+
|
|
205
|
+
@release = @artist.releases[@title]
|
|
206
|
+
@trainers.load_file()
|
|
207
|
+
|
|
208
|
+
if @release.nil?
|
|
209
|
+
@release = ReleaseData.new
|
|
210
|
+
@release.mirrors = self.class.get_kryon_year_mirrors(@title)
|
|
211
|
+
@release.title = @title
|
|
212
|
+
@release.updated_on = @updated_on
|
|
213
|
+
@release.url = @url
|
|
214
|
+
|
|
215
|
+
@artist.releases[@title] = @release
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
doc = Nokogiri::HTML(open(@release.url),nil,'utf-8') # Force utf-8 encoding
|
|
219
|
+
row_pos = 1
|
|
220
|
+
rows = doc.css('table tr tr')
|
|
221
|
+
|
|
222
|
+
rows.each() do |row|
|
|
223
|
+
next if row.nil?
|
|
224
|
+
next if (cells = row.css('td')).nil?
|
|
225
|
+
|
|
226
|
+
album = AlbumData.new
|
|
227
|
+
album.updated_on = @updated_on
|
|
228
|
+
@exclude_album = false
|
|
229
|
+
|
|
230
|
+
# There is always a date cell
|
|
231
|
+
has_date_cell = parse_date_cell(cells,album)
|
|
232
|
+
|
|
233
|
+
# Sometimes there is not a topic, location, or language cell, but not all 3!
|
|
234
|
+
# - Put || last because of short-circuit ||!
|
|
235
|
+
# - For some reason, "or" does not work (even though it is supposed to be non-short-circuit)
|
|
236
|
+
has_other_cell = parse_topic_cell(cells,album)
|
|
237
|
+
has_other_cell = parse_location_cell(cells,album) || has_other_cell
|
|
238
|
+
has_other_cell = parse_language_cell(cells,album) || has_other_cell
|
|
239
|
+
|
|
240
|
+
if !has_date_cell || !has_other_cell || @exclude_album
|
|
241
|
+
# - If it doesn't have any cells, it is probably javascript or something else, so don't log it
|
|
242
|
+
# - If @exclude_album, then it has already been logged, so don't log it
|
|
243
|
+
if (!has_date_cell && has_other_cell) || (has_date_cell && !@exclude_album)
|
|
244
|
+
log.warn("Excluding album: #{row_pos},#{album.date_begin},#{album.date_end},#{album.title}," +
|
|
245
|
+
"#{album.locations},#{album.languages}")
|
|
246
|
+
row_pos += 1
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
next
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
# Is it actually old or new?
|
|
253
|
+
if @artist.albums.key?(album.url) && album == @artist.albums[album.url]
|
|
254
|
+
album.updated_on = @artist.albums[album.url].updated_on
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
album.url = Util.fix_link(album.url)
|
|
258
|
+
|
|
259
|
+
@artist.albums[album.url] = album
|
|
260
|
+
|
|
261
|
+
if !@release.albums.include?(album.url)
|
|
262
|
+
@release.albums.push(album.url)
|
|
263
|
+
@release.updated_on = @updated_on
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
row_pos += 1
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
return @release
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def parse_date_cell(cells,album)
|
|
273
|
+
# Get url from date because sometimes there is not a topic
|
|
274
|
+
|
|
275
|
+
return false if cells.length <= 1
|
|
276
|
+
return false if (cell = cells[1]).nil?
|
|
277
|
+
return false if (cell = cell.css('a')).nil?
|
|
278
|
+
return false if cell.length < 1
|
|
279
|
+
|
|
280
|
+
# For 2014 albums
|
|
281
|
+
cells = cell
|
|
282
|
+
cell = nil
|
|
283
|
+
|
|
284
|
+
cells.each do |c|
|
|
285
|
+
if !c.nil?() && !Util.empty_s?(c.content) && !c['href'].nil?()
|
|
286
|
+
cell = c
|
|
287
|
+
break
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
return false if cell.nil?()
|
|
292
|
+
|
|
293
|
+
r_date = self.class.parse_kryon_date(Util.clean_data(cell.content),@title)
|
|
294
|
+
album.date_begin = r_date[0]
|
|
295
|
+
album.date_end = r_date[1]
|
|
296
|
+
album.url = Util.clean_link(@release.url,cell['href'])
|
|
297
|
+
|
|
298
|
+
return false if (album.date_begin.empty? || album.url.empty?)
|
|
299
|
+
return true
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def parse_language_cell(cells,album)
|
|
303
|
+
return false if cells.length <= 4
|
|
304
|
+
return false if (cell = cells[4]).nil?
|
|
305
|
+
return false if (cell = cell.content).nil?
|
|
306
|
+
|
|
307
|
+
cell = Util.clean_data(cell)
|
|
308
|
+
# For the official site, they always have English, so add it if not present
|
|
309
|
+
album.languages = Iso.languages.find_by_kryon(cell,add_english: true)
|
|
310
|
+
|
|
311
|
+
return false if album.languages.nil?() || album.languages.empty?()
|
|
312
|
+
return true
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
def parse_location_cell(cells,album)
|
|
316
|
+
return false if cells.length <= 3
|
|
317
|
+
return false if (cell = cells[3]).nil?
|
|
318
|
+
return false if (cell = cell.content).nil?
|
|
319
|
+
return false if cell =~ /[[:space:]]*RADIO[[:space:]]+SHOW[[:space:]]*/ # 2014
|
|
320
|
+
return false if (cell = Util.clean_data(cell)).empty?()
|
|
321
|
+
|
|
322
|
+
album.locations = Iso.find_kryon_locations(cell)
|
|
323
|
+
|
|
324
|
+
return false if album.locations.nil?() || album.locations.empty?()
|
|
325
|
+
|
|
326
|
+
return true
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def parse_topic_cell(cells,album)
|
|
330
|
+
return false if cells.length <= 2
|
|
331
|
+
return false if (cell = cells[2]).nil?
|
|
332
|
+
return false if (cell = cell.css('a')).nil?
|
|
333
|
+
return false if cell.length < 1
|
|
334
|
+
|
|
335
|
+
# For 2017 "San Jose, California (3)"
|
|
336
|
+
cells = cell
|
|
337
|
+
cell = nil
|
|
338
|
+
|
|
339
|
+
cells.each do |c|
|
|
340
|
+
if !c.nil?() && !Util.empty_s?(c.content)
|
|
341
|
+
cell = c
|
|
342
|
+
break
|
|
343
|
+
end
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
return false if cell.nil?()
|
|
347
|
+
|
|
348
|
+
album.title = Util.fix_shortwith_text(Util.clean_data(cell.content))
|
|
349
|
+
|
|
350
|
+
exclude_topics = /
|
|
351
|
+
GROUP[[:space:]]+PHOTO|
|
|
352
|
+
PLEASE[[:space:]]+READ
|
|
353
|
+
/ix
|
|
354
|
+
|
|
355
|
+
if album.title =~ exclude_topics
|
|
356
|
+
log.warn("Excluding album: Topic[#{album.title}]")
|
|
357
|
+
@exclude_album = true
|
|
358
|
+
return false
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Sometimes, the date cell's href is an image (See 2016 'Las Vegas, NV - "Numerology" - (3)')
|
|
362
|
+
good_urls = /
|
|
363
|
+
\.html?[[:space:]]*\z
|
|
364
|
+
/ix
|
|
365
|
+
|
|
366
|
+
date_url = album.url
|
|
367
|
+
topic_url = Util.clean_link(@release.url,cell['href'])
|
|
368
|
+
|
|
369
|
+
# Sometimes, the date cell's href is wrong (See 2016 '"Five Concepts for the New Human" (2)')
|
|
370
|
+
if album.url !~ good_urls || (!Util.empty_s?(topic_url) && date_url != topic_url)
|
|
371
|
+
album.url = topic_url
|
|
372
|
+
log.warn("Using topic cell's href for URL: #{File.basename(date_url)}=>#{File.basename(album.url)}")
|
|
373
|
+
|
|
374
|
+
if Util.empty_s?(album.url)
|
|
375
|
+
msg = "Date and topic cells' hrefs are empty: Topic[#{album.title}]"
|
|
376
|
+
|
|
377
|
+
if DevOpts.instance.dev?()
|
|
378
|
+
raise msg
|
|
379
|
+
else
|
|
380
|
+
log.warn(msg)
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
return false
|
|
384
|
+
end
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
return false if album.title.empty?
|
|
388
|
+
return true
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
def self.fix_kryon_year_title(year)
|
|
392
|
+
year = '2002_05' if year == '2002-2005'
|
|
393
|
+
|
|
394
|
+
return year
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def self.get_kryon_year_mirrors(year)
|
|
398
|
+
year = fix_kryon_year_title(year)
|
|
399
|
+
|
|
400
|
+
mirrors = {
|
|
401
|
+
'original' => "https://www.kryon.com/freeAudio_folder/#{year}_freeAudio.html"
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
return mirrors
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
def self.get_kryon_year_url(year,url_version=2)
|
|
408
|
+
year = fix_kryon_year_title(year)
|
|
409
|
+
|
|
410
|
+
return "https://www.kryon.com/freeAudio_folder/mobile_pages/#{year}_freeAudio_m.html"
|
|
411
|
+
end
|
|
412
|
+
end
|
|
413
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
#--
|
|
6
|
+
# This file is part of UncleKryon-server.
|
|
7
|
+
# Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
|
|
8
|
+
#
|
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
12
|
+
# (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17
|
+
# GNU General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the GNU General Public License
|
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
|
21
|
+
#++
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
require 'bundler/setup'
|
|
25
|
+
|
|
26
|
+
module UncleKryon
|
|
27
|
+
class Server
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
#--
|
|
6
|
+
# This file is part of UncleKryon-server.
|
|
7
|
+
# Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
|
|
8
|
+
#
|
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
12
|
+
# (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17
|
+
# GNU General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the GNU General Public License
|
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
|
21
|
+
#++
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
require 'bundler/setup'
|
|
25
|
+
|
|
26
|
+
require 'nbayes'
|
|
27
|
+
|
|
28
|
+
require 'unclekryon/dev_opts'
|
|
29
|
+
require 'unclekryon/log'
|
|
30
|
+
require 'unclekryon/util'
|
|
31
|
+
|
|
32
|
+
module UncleKryon
|
|
33
|
+
class Trainer
|
|
34
|
+
attr_accessor :max_tag_id_length
|
|
35
|
+
attr_accessor :max_tag_length
|
|
36
|
+
attr_accessor :tags
|
|
37
|
+
attr_accessor :trainer
|
|
38
|
+
|
|
39
|
+
def self.to_tokens(text)
|
|
40
|
+
tokens = []
|
|
41
|
+
|
|
42
|
+
text.split(/[[:space:]]+/).each() do |t|
|
|
43
|
+
t.gsub!(/[[:punct:][:cntrl:]]+/,'')
|
|
44
|
+
tokens.push(t) if !t.empty?()
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
return tokens
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def initialize(tags={})
|
|
51
|
+
@max_tag_id_length = 0
|
|
52
|
+
@max_tag_length = 0
|
|
53
|
+
@tags = tags
|
|
54
|
+
@trainer = NBayes::Base.new()
|
|
55
|
+
|
|
56
|
+
init_lengths()
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def init_lengths()
|
|
60
|
+
@max_tag_id_length = 0
|
|
61
|
+
@max_tag_length = 0
|
|
62
|
+
|
|
63
|
+
@tags.each do |id,tag|
|
|
64
|
+
@max_tag_id_length = id.length if id.length > @max_tag_id_length
|
|
65
|
+
@max_tag_length = tag.length if tag.length > @max_tag_length
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
@max_tag_id_length += 2 # Indention
|
|
69
|
+
@max_tag_id_length = 7 if @max_tag_id_length < 7 # For "<Enter>" option
|
|
70
|
+
@max_tag_length = -@max_tag_length # Left justify
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def train(text)
|
|
74
|
+
guess_tag = self.tag(text) # Try and guess
|
|
75
|
+
tokens = self.class.to_tokens(text)
|
|
76
|
+
|
|
77
|
+
puts '#################'
|
|
78
|
+
puts '# Training Tags #'
|
|
79
|
+
puts '#################'
|
|
80
|
+
|
|
81
|
+
tf = '%%%is = %%%is' % [@max_tag_id_length,@max_tag_length]
|
|
82
|
+
@tags.each do |id,tag|
|
|
83
|
+
puts tf % [id,tag]
|
|
84
|
+
end
|
|
85
|
+
puts "<Enter> = Guess: #{guess_tag}"
|
|
86
|
+
|
|
87
|
+
puts '-----------------'
|
|
88
|
+
puts text
|
|
89
|
+
puts '-----------------'
|
|
90
|
+
print 'What is it? '
|
|
91
|
+
|
|
92
|
+
# Use -t/--test option
|
|
93
|
+
if DevOpts.instance.test?()
|
|
94
|
+
puts (tag_id = @tags.keys.sample()) # For testing purposes
|
|
95
|
+
else
|
|
96
|
+
tag_id = STDIN.gets().chomp().strip() # STDIN because app accepts args
|
|
97
|
+
end
|
|
98
|
+
puts
|
|
99
|
+
|
|
100
|
+
if tag_id.empty?()
|
|
101
|
+
raise "Invalid guess tag[#{guess_tag}]" if !@tags.value?(guess_tag)
|
|
102
|
+
tag = guess_tag
|
|
103
|
+
else
|
|
104
|
+
raise "Invalid tag ID[#{tag_id}]" if !@tags.include?(tag_id)
|
|
105
|
+
tag = @tags[tag_id]
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
@trainer.train(tokens,tag)
|
|
109
|
+
|
|
110
|
+
return tag
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def tag(text)
|
|
114
|
+
return @trainer.classify(self.class.to_tokens(text)).max_class
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def to_s()
|
|
118
|
+
s = ''
|
|
119
|
+
s << @trainer.to_yaml()
|
|
120
|
+
s << "\n"
|
|
121
|
+
s << @trainer.data.category_stats()
|
|
122
|
+
|
|
123
|
+
return s
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
class Trainers
|
|
128
|
+
attr_accessor :filepath
|
|
129
|
+
attr_accessor :trainers
|
|
130
|
+
|
|
131
|
+
def initialize(filepath=nil)
|
|
132
|
+
@filepath = filepath
|
|
133
|
+
@trainers = {}
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def load_file()
|
|
137
|
+
if @filepath.nil?() || (@filepath = @filepath.strip()).empty?()
|
|
138
|
+
raise ArgumentError,'Training filepath cannot be empty'
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
if File.exist?(@filepath)
|
|
142
|
+
y = YAML.load_file(@filepath)
|
|
143
|
+
|
|
144
|
+
y.each() do |id,trainer|
|
|
145
|
+
if !@trainers.key?(id)
|
|
146
|
+
@trainers[id] = trainer
|
|
147
|
+
else
|
|
148
|
+
@trainers[id].tags = trainer.tags.merge(@trainers[id].tags)
|
|
149
|
+
@trainers[id].trainer = trainer.trainer
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
@trainers[id].trainer.reset_after_import()
|
|
153
|
+
@trainers[id].init_lengths()
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def save_to_file()
|
|
159
|
+
if @filepath.nil?() || (@filepath = @filepath.strip()).empty?()
|
|
160
|
+
raise ArgumentError,'Training filepath cannot be empty'
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
Util.mk_dirs_from_filepath(@filepath)
|
|
164
|
+
|
|
165
|
+
File.open(@filepath,'w') do |f|
|
|
166
|
+
f.write(to_s())
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def [](id)
|
|
171
|
+
@trainers[id]
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def []=(id,trainer)
|
|
175
|
+
@trainers[id] = trainer
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def to_s()
|
|
179
|
+
return YAML.dump(@trainers)
|
|
180
|
+
end
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
if $0 == __FILE__
|
|
185
|
+
fp = 'test.yaml'
|
|
186
|
+
ts = UncleKryon::Trainers.new(fp)
|
|
187
|
+
|
|
188
|
+
ctx = ['dark black bitter',
|
|
189
|
+
'double espresso steamed milk foam',
|
|
190
|
+
'espresso steamed milk']
|
|
191
|
+
ttx = ['no withering and oxidation',
|
|
192
|
+
'broom-like, South Africa',
|
|
193
|
+
'young, minimal']
|
|
194
|
+
|
|
195
|
+
if File.exist?(fp)
|
|
196
|
+
ts.load_file()
|
|
197
|
+
puts ts
|
|
198
|
+
puts
|
|
199
|
+
|
|
200
|
+
puts '[Coffee]'
|
|
201
|
+
ctx.each do |v|
|
|
202
|
+
puts "'#{v}' => #{ts['coffee'].tag(v)}"
|
|
203
|
+
end
|
|
204
|
+
puts
|
|
205
|
+
|
|
206
|
+
puts '[Tea]'
|
|
207
|
+
ttx.each do |v|
|
|
208
|
+
puts "'#{v}' => #{ts['tea'].tag(v)}"
|
|
209
|
+
end
|
|
210
|
+
puts
|
|
211
|
+
|
|
212
|
+
puts 'What kind of drink would you like?'
|
|
213
|
+
txt = STDIN.gets().chomp().strip()
|
|
214
|
+
puts "coffee => #{ts['coffee'].tag(txt)}"
|
|
215
|
+
puts "tea => #{ts['tea'].tag(txt)}"
|
|
216
|
+
else
|
|
217
|
+
ts['coffee'] = UncleKryon::Trainer.new(
|
|
218
|
+
{'b'=>'black','c'=>'cappuccino','l'=>'latte'})
|
|
219
|
+
ts['tea'] = UncleKryon::Trainer.new(
|
|
220
|
+
{'g'=>'green','r'=>'red','w'=>'white'})
|
|
221
|
+
|
|
222
|
+
ctx.each do |v|
|
|
223
|
+
ts['coffee'].train(v)
|
|
224
|
+
end
|
|
225
|
+
ttx.each do |v|
|
|
226
|
+
ts['tea'].train(v)
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
ts.save_to_file()
|
|
230
|
+
end
|
|
231
|
+
end
|