unclekryon 0.4.9.pre.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +34 -0
- data/Gemfile.lock +43 -0
- data/LICENSE +674 -0
- data/README.md +55 -0
- data/Rakefile +59 -0
- data/bin/unclekryon +30 -0
- data/iso/can_provs_terrs.yaml +54 -0
- data/iso/countries.yaml +3050 -0
- data/iso/iso.yaml +8 -0
- data/iso/languages.yaml +5641 -0
- data/iso/regions.yaml +42 -0
- data/iso/subregions.yaml +6 -0
- data/iso/usa_states.yaml +230 -0
- data/lib/unclekryon.rb +384 -0
- data/lib/unclekryon/data/album_data.rb +147 -0
- data/lib/unclekryon/data/artist_data.rb +109 -0
- data/lib/unclekryon/data/artist_data_data.rb +146 -0
- data/lib/unclekryon/data/aum_data.rb +75 -0
- data/lib/unclekryon/data/base_data.rb +79 -0
- data/lib/unclekryon/data/pic_data.rb +76 -0
- data/lib/unclekryon/data/release_data.rb +57 -0
- data/lib/unclekryon/data/social_data.rb +39 -0
- data/lib/unclekryon/data/timespan_data.rb +70 -0
- data/lib/unclekryon/dev_opts.rb +41 -0
- data/lib/unclekryon/hacker.rb +327 -0
- data/lib/unclekryon/iso.rb +341 -0
- data/lib/unclekryon/iso/base_iso.rb +196 -0
- data/lib/unclekryon/iso/can_prov_terr.rb +113 -0
- data/lib/unclekryon/iso/country.rb +133 -0
- data/lib/unclekryon/iso/language.rb +241 -0
- data/lib/unclekryon/iso/region.rb +53 -0
- data/lib/unclekryon/iso/subregion.rb +53 -0
- data/lib/unclekryon/iso/usa_state.rb +106 -0
- data/lib/unclekryon/jsoner.rb +124 -0
- data/lib/unclekryon/log.rb +111 -0
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +499 -0
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +413 -0
- data/lib/unclekryon/server.rb +29 -0
- data/lib/unclekryon/trainer.rb +231 -0
- data/lib/unclekryon/uploader.rb +29 -0
- data/lib/unclekryon/util.rb +228 -0
- data/lib/unclekryon/version.rb +26 -0
- data/unclekryon.gemspec +67 -0
- metadata +189 -0
@@ -0,0 +1,413 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of UncleKryon-server.
|
7
|
+
# Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'date'
|
25
|
+
require 'nokogiri'
|
26
|
+
require 'open-uri'
|
27
|
+
|
28
|
+
require 'unclekryon/iso'
|
29
|
+
require 'unclekryon/log'
|
30
|
+
require 'unclekryon/trainer'
|
31
|
+
require 'unclekryon/util'
|
32
|
+
|
33
|
+
require 'unclekryon/data/album_data'
|
34
|
+
require 'unclekryon/data/artist_data_data'
|
35
|
+
require 'unclekryon/data/release_data'
|
36
|
+
|
37
|
+
module UncleKryon
|
38
|
+
class KryonAumYearParser
|
39
|
+
include Logging
|
40
|
+
|
41
|
+
attr_accessor :artist
|
42
|
+
attr_accessor :exclude_album
|
43
|
+
attr_accessor :release
|
44
|
+
attr_accessor :title
|
45
|
+
attr_accessor :trainers
|
46
|
+
attr_accessor :training
|
47
|
+
attr_reader :updated_on
|
48
|
+
attr_accessor :url
|
49
|
+
|
50
|
+
alias_method :training?,:training
|
51
|
+
|
52
|
+
def initialize(title=nil,url=nil,artist=ArtistDataData.new(),training: false,train_filepath: nil,
|
53
|
+
updated_on: nil,**options)
|
54
|
+
@artist = artist
|
55
|
+
@exclude_album = false
|
56
|
+
@title = title
|
57
|
+
@trainers = Trainers.new(train_filepath)
|
58
|
+
@training = training
|
59
|
+
@updated_on = Util.format_datetime(DateTime.now()) if Util.empty_s?(updated_on)
|
60
|
+
@url = Util.empty_s?(url) ? self.class.get_kryon_year_url(title) : url
|
61
|
+
end
|
62
|
+
|
63
|
+
def self.parse_kryon_date(date,year=nil)
|
64
|
+
# Don't modify args and clean them up so can use /\s/ instead of /[[:space:]]/
|
65
|
+
date = Util.clean_data(date.clone())
|
66
|
+
year = Util.clean_data(year.clone())
|
67
|
+
|
68
|
+
# Fix misspellings and/or weird shortenings
|
69
|
+
date.gsub!(/Feburary/i,'February') # "Feburary 2-13, 2017"
|
70
|
+
date.gsub!(/SEPT(\s+|\-)/i,'Sep\1') # "SEPT 29 - OCT 9, 2017", "Sept-Oct 2015"
|
71
|
+
date.gsub!(/Septembe\s+/i,'September ') # "Septembe 4, 2016"
|
72
|
+
date.gsub!(/Ocotber/i,'October') # "Ocotber 10, 2015"
|
73
|
+
|
74
|
+
comma = date.include?(',') ? ',' : '' # "May 6 2017"
|
75
|
+
r = Array.new(2)
|
76
|
+
|
77
|
+
begin
|
78
|
+
if date.include?('-')
|
79
|
+
# "Sept-Oct 2015"
|
80
|
+
if date =~ /\A[[:alpha:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
|
81
|
+
r[1] = Date.strptime(date,'%b-%b %Y')
|
82
|
+
r[0] = Date.strptime(date,'%b')
|
83
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
84
|
+
# "4/28/12 - 4/29/12"
|
85
|
+
elsif date =~ /\A[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\-/
|
86
|
+
date = date.split(/\s*-\s*/)
|
87
|
+
|
88
|
+
r[0] = Date.strptime(date[0],'%m/%d/%y')
|
89
|
+
r[1] = Date.strptime(date[1],'%m/%d/%y')
|
90
|
+
# "10-17 to 11-18, 2012"
|
91
|
+
elsif date =~ /\A[[:digit:]]+\s*\-\s*[[:digit:]]+\s+to\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\s*,\s*[[:digit:]]+\z/i
|
92
|
+
date = date.split(/\s*to\s*/i)
|
93
|
+
|
94
|
+
r[1] = Date.strptime(date[1],'%m-%d, %Y')
|
95
|
+
r[0] = Date.strptime(date[0],'%m-%d')
|
96
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
97
|
+
else
|
98
|
+
# "SEPT 29 - OCT 9, 2017", "May 31-June 1, 2014"
|
99
|
+
if date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+[\,\s]+[[:digit:]]+\z/
|
100
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
101
|
+
r1f = "%B %d-%B %d#{comma} %Y"
|
102
|
+
# "OCT 25 - NOV 3" (2014)
|
103
|
+
elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
|
104
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
105
|
+
r1f = '%B %d-%B %d'
|
106
|
+
|
107
|
+
if !year.nil?()
|
108
|
+
date << ", #{year}"
|
109
|
+
r1f << ", %Y"
|
110
|
+
end
|
111
|
+
# "December 12-13"
|
112
|
+
elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
|
113
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
114
|
+
|
115
|
+
# "September 16 - 2018"
|
116
|
+
if date =~ /-[[:digit:]]{4}\z/
|
117
|
+
r1f = '%B %d-%Y'
|
118
|
+
else
|
119
|
+
r1f = '%B %d-%d'.dup()
|
120
|
+
|
121
|
+
if !year.nil?()
|
122
|
+
date << ", #{year}"
|
123
|
+
r1f << ', %Y'
|
124
|
+
end
|
125
|
+
end
|
126
|
+
# "June 30-July 1-2018"
|
127
|
+
elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
|
128
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
129
|
+
r1f = '%B %d-%B %d-%Y'
|
130
|
+
# "September 7 & 9-2018"
|
131
|
+
elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s+\&\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
|
132
|
+
date = date.gsub(/\s*\-\s*/,'-')
|
133
|
+
r1f = '%B %d & %d-%Y'
|
134
|
+
else
|
135
|
+
# "OCT 27 - 28 - 29, 2017"; remove spaces around dashes
|
136
|
+
date.gsub!(/\s+\-\s+/,'-')
|
137
|
+
|
138
|
+
# "June 7-9-16-17" & "June 9-10-11-12"
|
139
|
+
if date =~ /\A[[:alpha:]]+\s*[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\z/
|
140
|
+
r1f = "%B %d-%d-%d-%d"
|
141
|
+
|
142
|
+
if !year.nil?()
|
143
|
+
date << ", #{year}"
|
144
|
+
r1f << ", %Y"
|
145
|
+
end
|
146
|
+
else
|
147
|
+
# "MAY 15-16-17, 2017" and "January 7-8, 2017"
|
148
|
+
r1f = (date =~ /\-.*\-/) ? "%B %d-%d-%d#{comma} %Y" : "%B %d-%d#{comma} %Y"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
r[1] = Date.strptime(date,r1f)
|
153
|
+
r[0] = Date.strptime(date,'%B %d')
|
154
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
155
|
+
end
|
156
|
+
elsif date.include?('/')
|
157
|
+
# "1/7/2012"
|
158
|
+
if date =~ /\A[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\/\s*[[:digit:]]+\z/
|
159
|
+
date = date.gsub(/\s+/,'')
|
160
|
+
|
161
|
+
r[0] = Date.strptime(date,'%m/%d/%Y')
|
162
|
+
r[1] = nil
|
163
|
+
else
|
164
|
+
# "JULY/AUG 2017"
|
165
|
+
r[1] = Date.strptime(date,'%b/%b %Y')
|
166
|
+
r[0] = Date.strptime(date,'%b')
|
167
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
168
|
+
end
|
169
|
+
else
|
170
|
+
# "April 11, 12, 2015"
|
171
|
+
if date =~ /\A[[:alpha:]]+\s*[[:digit:]]+\s*,\s*[[:digit:]]+\s*,\s*[[:digit:]]+\z/
|
172
|
+
r[1] = Date.strptime(date,'%B %d, %d, %Y')
|
173
|
+
r[0] = Date.strptime(date,'%B %d')
|
174
|
+
r[0] = Date.new(r[1].year,r[0].month,r[0].day)
|
175
|
+
# "March, 2014"
|
176
|
+
elsif date =~ /\A[[:alpha:]]+\s*,\s*[[:digit:]]+\z/
|
177
|
+
r[0] = Date.strptime(date,'%B, %Y')
|
178
|
+
r[1] = nil
|
179
|
+
else
|
180
|
+
r[0] = Date.strptime(date,"%B %d#{comma} %Y")
|
181
|
+
r[1] = nil
|
182
|
+
end
|
183
|
+
end
|
184
|
+
rescue ArgumentError => e
|
185
|
+
Log.instance.fatal("Invalid Date: '#{date}'",error: e)
|
186
|
+
raise
|
187
|
+
end
|
188
|
+
|
189
|
+
r[0] = (!r[0].nil?) ? Util.format_date(r[0]) : ''
|
190
|
+
r[1] = (!r[1].nil?) ? Util.format_date(r[1]) : ''
|
191
|
+
|
192
|
+
return r
|
193
|
+
end
|
194
|
+
|
195
|
+
def parse_site(title=nil,url=nil,artist=nil)
|
196
|
+
@artist = artist unless artist.nil?()
|
197
|
+
@title = title unless title.nil?()
|
198
|
+
|
199
|
+
@url = Util.empty_s?(url) ? self.class.get_kryon_year_url(@title) : url
|
200
|
+
|
201
|
+
raise ArgumentError,"Artist cannot be nil" if @artist.nil?()
|
202
|
+
raise ArgumentError,"Title cannot be empty" if @title.nil?() || (@title = @title.strip()).empty?()
|
203
|
+
raise ArgumentError,"URL cannot be empty" if @url.nil?() || (@url = @url.strip()).empty?()
|
204
|
+
|
205
|
+
@release = @artist.releases[@title]
|
206
|
+
@trainers.load_file()
|
207
|
+
|
208
|
+
if @release.nil?
|
209
|
+
@release = ReleaseData.new
|
210
|
+
@release.mirrors = self.class.get_kryon_year_mirrors(@title)
|
211
|
+
@release.title = @title
|
212
|
+
@release.updated_on = @updated_on
|
213
|
+
@release.url = @url
|
214
|
+
|
215
|
+
@artist.releases[@title] = @release
|
216
|
+
end
|
217
|
+
|
218
|
+
doc = Nokogiri::HTML(open(@release.url),nil,'utf-8') # Force utf-8 encoding
|
219
|
+
row_pos = 1
|
220
|
+
rows = doc.css('table tr tr')
|
221
|
+
|
222
|
+
rows.each() do |row|
|
223
|
+
next if row.nil?
|
224
|
+
next if (cells = row.css('td')).nil?
|
225
|
+
|
226
|
+
album = AlbumData.new
|
227
|
+
album.updated_on = @updated_on
|
228
|
+
@exclude_album = false
|
229
|
+
|
230
|
+
# There is always a date cell
|
231
|
+
has_date_cell = parse_date_cell(cells,album)
|
232
|
+
|
233
|
+
# Sometimes there is not a topic, location, or language cell, but not all 3!
|
234
|
+
# - Put || last because of short-circuit ||!
|
235
|
+
# - For some reason, "or" does not work (even though it is supposed to be non-short-circuit)
|
236
|
+
has_other_cell = parse_topic_cell(cells,album)
|
237
|
+
has_other_cell = parse_location_cell(cells,album) || has_other_cell
|
238
|
+
has_other_cell = parse_language_cell(cells,album) || has_other_cell
|
239
|
+
|
240
|
+
if !has_date_cell || !has_other_cell || @exclude_album
|
241
|
+
# - If it doesn't have any cells, it is probably javascript or something else, so don't log it
|
242
|
+
# - If @exclude_album, then it has already been logged, so don't log it
|
243
|
+
if (!has_date_cell && has_other_cell) || (has_date_cell && !@exclude_album)
|
244
|
+
log.warn("Excluding album: #{row_pos},#{album.date_begin},#{album.date_end},#{album.title}," +
|
245
|
+
"#{album.locations},#{album.languages}")
|
246
|
+
row_pos += 1
|
247
|
+
end
|
248
|
+
|
249
|
+
next
|
250
|
+
end
|
251
|
+
|
252
|
+
# Is it actually old or new?
|
253
|
+
if @artist.albums.key?(album.url) && album == @artist.albums[album.url]
|
254
|
+
album.updated_on = @artist.albums[album.url].updated_on
|
255
|
+
end
|
256
|
+
|
257
|
+
album.url = Util.fix_link(album.url)
|
258
|
+
|
259
|
+
@artist.albums[album.url] = album
|
260
|
+
|
261
|
+
if !@release.albums.include?(album.url)
|
262
|
+
@release.albums.push(album.url)
|
263
|
+
@release.updated_on = @updated_on
|
264
|
+
end
|
265
|
+
|
266
|
+
row_pos += 1
|
267
|
+
end
|
268
|
+
|
269
|
+
return @release
|
270
|
+
end
|
271
|
+
|
272
|
+
def parse_date_cell(cells,album)
|
273
|
+
# Get url from date because sometimes there is not a topic
|
274
|
+
|
275
|
+
return false if cells.length <= 1
|
276
|
+
return false if (cell = cells[1]).nil?
|
277
|
+
return false if (cell = cell.css('a')).nil?
|
278
|
+
return false if cell.length < 1
|
279
|
+
|
280
|
+
# For 2014 albums
|
281
|
+
cells = cell
|
282
|
+
cell = nil
|
283
|
+
|
284
|
+
cells.each do |c|
|
285
|
+
if !c.nil?() && !Util.empty_s?(c.content) && !c['href'].nil?()
|
286
|
+
cell = c
|
287
|
+
break
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
return false if cell.nil?()
|
292
|
+
|
293
|
+
r_date = self.class.parse_kryon_date(Util.clean_data(cell.content),@title)
|
294
|
+
album.date_begin = r_date[0]
|
295
|
+
album.date_end = r_date[1]
|
296
|
+
album.url = Util.clean_link(@release.url,cell['href'])
|
297
|
+
|
298
|
+
return false if (album.date_begin.empty? || album.url.empty?)
|
299
|
+
return true
|
300
|
+
end
|
301
|
+
|
302
|
+
def parse_language_cell(cells,album)
|
303
|
+
return false if cells.length <= 4
|
304
|
+
return false if (cell = cells[4]).nil?
|
305
|
+
return false if (cell = cell.content).nil?
|
306
|
+
|
307
|
+
cell = Util.clean_data(cell)
|
308
|
+
# For the official site, they always have English, so add it if not present
|
309
|
+
album.languages = Iso.languages.find_by_kryon(cell,add_english: true)
|
310
|
+
|
311
|
+
return false if album.languages.nil?() || album.languages.empty?()
|
312
|
+
return true
|
313
|
+
end
|
314
|
+
|
315
|
+
def parse_location_cell(cells,album)
|
316
|
+
return false if cells.length <= 3
|
317
|
+
return false if (cell = cells[3]).nil?
|
318
|
+
return false if (cell = cell.content).nil?
|
319
|
+
return false if cell =~ /[[:space:]]*RADIO[[:space:]]+SHOW[[:space:]]*/ # 2014
|
320
|
+
return false if (cell = Util.clean_data(cell)).empty?()
|
321
|
+
|
322
|
+
album.locations = Iso.find_kryon_locations(cell)
|
323
|
+
|
324
|
+
return false if album.locations.nil?() || album.locations.empty?()
|
325
|
+
|
326
|
+
return true
|
327
|
+
end
|
328
|
+
|
329
|
+
def parse_topic_cell(cells,album)
|
330
|
+
return false if cells.length <= 2
|
331
|
+
return false if (cell = cells[2]).nil?
|
332
|
+
return false if (cell = cell.css('a')).nil?
|
333
|
+
return false if cell.length < 1
|
334
|
+
|
335
|
+
# For 2017 "San Jose, California (3)"
|
336
|
+
cells = cell
|
337
|
+
cell = nil
|
338
|
+
|
339
|
+
cells.each do |c|
|
340
|
+
if !c.nil?() && !Util.empty_s?(c.content)
|
341
|
+
cell = c
|
342
|
+
break
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
return false if cell.nil?()
|
347
|
+
|
348
|
+
album.title = Util.fix_shortwith_text(Util.clean_data(cell.content))
|
349
|
+
|
350
|
+
exclude_topics = /
|
351
|
+
GROUP[[:space:]]+PHOTO|
|
352
|
+
PLEASE[[:space:]]+READ
|
353
|
+
/ix
|
354
|
+
|
355
|
+
if album.title =~ exclude_topics
|
356
|
+
log.warn("Excluding album: Topic[#{album.title}]")
|
357
|
+
@exclude_album = true
|
358
|
+
return false
|
359
|
+
end
|
360
|
+
|
361
|
+
# Sometimes, the date cell's href is an image (See 2016 'Las Vegas, NV - "Numerology" - (3)')
|
362
|
+
good_urls = /
|
363
|
+
\.html?[[:space:]]*\z
|
364
|
+
/ix
|
365
|
+
|
366
|
+
date_url = album.url
|
367
|
+
topic_url = Util.clean_link(@release.url,cell['href'])
|
368
|
+
|
369
|
+
# Sometimes, the date cell's href is wrong (See 2016 '"Five Concepts for the New Human" (2)')
|
370
|
+
if album.url !~ good_urls || (!Util.empty_s?(topic_url) && date_url != topic_url)
|
371
|
+
album.url = topic_url
|
372
|
+
log.warn("Using topic cell's href for URL: #{File.basename(date_url)}=>#{File.basename(album.url)}")
|
373
|
+
|
374
|
+
if Util.empty_s?(album.url)
|
375
|
+
msg = "Date and topic cells' hrefs are empty: Topic[#{album.title}]"
|
376
|
+
|
377
|
+
if DevOpts.instance.dev?()
|
378
|
+
raise msg
|
379
|
+
else
|
380
|
+
log.warn(msg)
|
381
|
+
end
|
382
|
+
|
383
|
+
return false
|
384
|
+
end
|
385
|
+
end
|
386
|
+
|
387
|
+
return false if album.title.empty?
|
388
|
+
return true
|
389
|
+
end
|
390
|
+
|
391
|
+
def self.fix_kryon_year_title(year)
|
392
|
+
year = '2002_05' if year == '2002-2005'
|
393
|
+
|
394
|
+
return year
|
395
|
+
end
|
396
|
+
|
397
|
+
def self.get_kryon_year_mirrors(year)
|
398
|
+
year = fix_kryon_year_title(year)
|
399
|
+
|
400
|
+
mirrors = {
|
401
|
+
'original' => "https://www.kryon.com/freeAudio_folder/#{year}_freeAudio.html"
|
402
|
+
}
|
403
|
+
|
404
|
+
return mirrors
|
405
|
+
end
|
406
|
+
|
407
|
+
def self.get_kryon_year_url(year,url_version=2)
|
408
|
+
year = fix_kryon_year_title(year)
|
409
|
+
|
410
|
+
return "https://www.kryon.com/freeAudio_folder/mobile_pages/#{year}_freeAudio_m.html"
|
411
|
+
end
|
412
|
+
end
|
413
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of UncleKryon-server.
|
7
|
+
# Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'bundler/setup'
|
25
|
+
|
26
|
+
module UncleKryon
|
27
|
+
class Server
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,231 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of UncleKryon-server.
|
7
|
+
# Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'bundler/setup'
|
25
|
+
|
26
|
+
require 'nbayes'
|
27
|
+
|
28
|
+
require 'unclekryon/dev_opts'
|
29
|
+
require 'unclekryon/log'
|
30
|
+
require 'unclekryon/util'
|
31
|
+
|
32
|
+
module UncleKryon
|
33
|
+
class Trainer
|
34
|
+
attr_accessor :max_tag_id_length
|
35
|
+
attr_accessor :max_tag_length
|
36
|
+
attr_accessor :tags
|
37
|
+
attr_accessor :trainer
|
38
|
+
|
39
|
+
def self.to_tokens(text)
|
40
|
+
tokens = []
|
41
|
+
|
42
|
+
text.split(/[[:space:]]+/).each() do |t|
|
43
|
+
t.gsub!(/[[:punct:][:cntrl:]]+/,'')
|
44
|
+
tokens.push(t) if !t.empty?()
|
45
|
+
end
|
46
|
+
|
47
|
+
return tokens
|
48
|
+
end
|
49
|
+
|
50
|
+
def initialize(tags={})
|
51
|
+
@max_tag_id_length = 0
|
52
|
+
@max_tag_length = 0
|
53
|
+
@tags = tags
|
54
|
+
@trainer = NBayes::Base.new()
|
55
|
+
|
56
|
+
init_lengths()
|
57
|
+
end
|
58
|
+
|
59
|
+
def init_lengths()
|
60
|
+
@max_tag_id_length = 0
|
61
|
+
@max_tag_length = 0
|
62
|
+
|
63
|
+
@tags.each do |id,tag|
|
64
|
+
@max_tag_id_length = id.length if id.length > @max_tag_id_length
|
65
|
+
@max_tag_length = tag.length if tag.length > @max_tag_length
|
66
|
+
end
|
67
|
+
|
68
|
+
@max_tag_id_length += 2 # Indention
|
69
|
+
@max_tag_id_length = 7 if @max_tag_id_length < 7 # For "<Enter>" option
|
70
|
+
@max_tag_length = -@max_tag_length # Left justify
|
71
|
+
end
|
72
|
+
|
73
|
+
def train(text)
|
74
|
+
guess_tag = self.tag(text) # Try and guess
|
75
|
+
tokens = self.class.to_tokens(text)
|
76
|
+
|
77
|
+
puts '#################'
|
78
|
+
puts '# Training Tags #'
|
79
|
+
puts '#################'
|
80
|
+
|
81
|
+
tf = '%%%is = %%%is' % [@max_tag_id_length,@max_tag_length]
|
82
|
+
@tags.each do |id,tag|
|
83
|
+
puts tf % [id,tag]
|
84
|
+
end
|
85
|
+
puts "<Enter> = Guess: #{guess_tag}"
|
86
|
+
|
87
|
+
puts '-----------------'
|
88
|
+
puts text
|
89
|
+
puts '-----------------'
|
90
|
+
print 'What is it? '
|
91
|
+
|
92
|
+
# Use -t/--test option
|
93
|
+
if DevOpts.instance.test?()
|
94
|
+
puts (tag_id = @tags.keys.sample()) # For testing purposes
|
95
|
+
else
|
96
|
+
tag_id = STDIN.gets().chomp().strip() # STDIN because app accepts args
|
97
|
+
end
|
98
|
+
puts
|
99
|
+
|
100
|
+
if tag_id.empty?()
|
101
|
+
raise "Invalid guess tag[#{guess_tag}]" if !@tags.value?(guess_tag)
|
102
|
+
tag = guess_tag
|
103
|
+
else
|
104
|
+
raise "Invalid tag ID[#{tag_id}]" if !@tags.include?(tag_id)
|
105
|
+
tag = @tags[tag_id]
|
106
|
+
end
|
107
|
+
|
108
|
+
@trainer.train(tokens,tag)
|
109
|
+
|
110
|
+
return tag
|
111
|
+
end
|
112
|
+
|
113
|
+
def tag(text)
|
114
|
+
return @trainer.classify(self.class.to_tokens(text)).max_class
|
115
|
+
end
|
116
|
+
|
117
|
+
def to_s()
|
118
|
+
s = ''
|
119
|
+
s << @trainer.to_yaml()
|
120
|
+
s << "\n"
|
121
|
+
s << @trainer.data.category_stats()
|
122
|
+
|
123
|
+
return s
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
class Trainers
|
128
|
+
attr_accessor :filepath
|
129
|
+
attr_accessor :trainers
|
130
|
+
|
131
|
+
def initialize(filepath=nil)
|
132
|
+
@filepath = filepath
|
133
|
+
@trainers = {}
|
134
|
+
end
|
135
|
+
|
136
|
+
def load_file()
|
137
|
+
if @filepath.nil?() || (@filepath = @filepath.strip()).empty?()
|
138
|
+
raise ArgumentError,'Training filepath cannot be empty'
|
139
|
+
end
|
140
|
+
|
141
|
+
if File.exist?(@filepath)
|
142
|
+
y = YAML.load_file(@filepath)
|
143
|
+
|
144
|
+
y.each() do |id,trainer|
|
145
|
+
if !@trainers.key?(id)
|
146
|
+
@trainers[id] = trainer
|
147
|
+
else
|
148
|
+
@trainers[id].tags = trainer.tags.merge(@trainers[id].tags)
|
149
|
+
@trainers[id].trainer = trainer.trainer
|
150
|
+
end
|
151
|
+
|
152
|
+
@trainers[id].trainer.reset_after_import()
|
153
|
+
@trainers[id].init_lengths()
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def save_to_file()
|
159
|
+
if @filepath.nil?() || (@filepath = @filepath.strip()).empty?()
|
160
|
+
raise ArgumentError,'Training filepath cannot be empty'
|
161
|
+
end
|
162
|
+
|
163
|
+
Util.mk_dirs_from_filepath(@filepath)
|
164
|
+
|
165
|
+
File.open(@filepath,'w') do |f|
|
166
|
+
f.write(to_s())
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def [](id)
|
171
|
+
@trainers[id]
|
172
|
+
end
|
173
|
+
|
174
|
+
def []=(id,trainer)
|
175
|
+
@trainers[id] = trainer
|
176
|
+
end
|
177
|
+
|
178
|
+
def to_s()
|
179
|
+
return YAML.dump(@trainers)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
if $0 == __FILE__
|
185
|
+
fp = 'test.yaml'
|
186
|
+
ts = UncleKryon::Trainers.new(fp)
|
187
|
+
|
188
|
+
ctx = ['dark black bitter',
|
189
|
+
'double espresso steamed milk foam',
|
190
|
+
'espresso steamed milk']
|
191
|
+
ttx = ['no withering and oxidation',
|
192
|
+
'broom-like, South Africa',
|
193
|
+
'young, minimal']
|
194
|
+
|
195
|
+
if File.exist?(fp)
|
196
|
+
ts.load_file()
|
197
|
+
puts ts
|
198
|
+
puts
|
199
|
+
|
200
|
+
puts '[Coffee]'
|
201
|
+
ctx.each do |v|
|
202
|
+
puts "'#{v}' => #{ts['coffee'].tag(v)}"
|
203
|
+
end
|
204
|
+
puts
|
205
|
+
|
206
|
+
puts '[Tea]'
|
207
|
+
ttx.each do |v|
|
208
|
+
puts "'#{v}' => #{ts['tea'].tag(v)}"
|
209
|
+
end
|
210
|
+
puts
|
211
|
+
|
212
|
+
puts 'What kind of drink would you like?'
|
213
|
+
txt = STDIN.gets().chomp().strip()
|
214
|
+
puts "coffee => #{ts['coffee'].tag(txt)}"
|
215
|
+
puts "tea => #{ts['tea'].tag(txt)}"
|
216
|
+
else
|
217
|
+
ts['coffee'] = UncleKryon::Trainer.new(
|
218
|
+
{'b'=>'black','c'=>'cappuccino','l'=>'latte'})
|
219
|
+
ts['tea'] = UncleKryon::Trainer.new(
|
220
|
+
{'g'=>'green','r'=>'red','w'=>'white'})
|
221
|
+
|
222
|
+
ctx.each do |v|
|
223
|
+
ts['coffee'].train(v)
|
224
|
+
end
|
225
|
+
ttx.each do |v|
|
226
|
+
ts['tea'].train(v)
|
227
|
+
end
|
228
|
+
|
229
|
+
ts.save_to_file()
|
230
|
+
end
|
231
|
+
end
|