unclekryon 0.4.9.pre.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +34 -0
  3. data/Gemfile.lock +43 -0
  4. data/LICENSE +674 -0
  5. data/README.md +55 -0
  6. data/Rakefile +59 -0
  7. data/bin/unclekryon +30 -0
  8. data/iso/can_provs_terrs.yaml +54 -0
  9. data/iso/countries.yaml +3050 -0
  10. data/iso/iso.yaml +8 -0
  11. data/iso/languages.yaml +5641 -0
  12. data/iso/regions.yaml +42 -0
  13. data/iso/subregions.yaml +6 -0
  14. data/iso/usa_states.yaml +230 -0
  15. data/lib/unclekryon.rb +384 -0
  16. data/lib/unclekryon/data/album_data.rb +147 -0
  17. data/lib/unclekryon/data/artist_data.rb +109 -0
  18. data/lib/unclekryon/data/artist_data_data.rb +146 -0
  19. data/lib/unclekryon/data/aum_data.rb +75 -0
  20. data/lib/unclekryon/data/base_data.rb +79 -0
  21. data/lib/unclekryon/data/pic_data.rb +76 -0
  22. data/lib/unclekryon/data/release_data.rb +57 -0
  23. data/lib/unclekryon/data/social_data.rb +39 -0
  24. data/lib/unclekryon/data/timespan_data.rb +70 -0
  25. data/lib/unclekryon/dev_opts.rb +41 -0
  26. data/lib/unclekryon/hacker.rb +327 -0
  27. data/lib/unclekryon/iso.rb +341 -0
  28. data/lib/unclekryon/iso/base_iso.rb +196 -0
  29. data/lib/unclekryon/iso/can_prov_terr.rb +113 -0
  30. data/lib/unclekryon/iso/country.rb +133 -0
  31. data/lib/unclekryon/iso/language.rb +241 -0
  32. data/lib/unclekryon/iso/region.rb +53 -0
  33. data/lib/unclekryon/iso/subregion.rb +53 -0
  34. data/lib/unclekryon/iso/usa_state.rb +106 -0
  35. data/lib/unclekryon/jsoner.rb +124 -0
  36. data/lib/unclekryon/log.rb +111 -0
  37. data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +499 -0
  38. data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +413 -0
  39. data/lib/unclekryon/server.rb +29 -0
  40. data/lib/unclekryon/trainer.rb +231 -0
  41. data/lib/unclekryon/uploader.rb +29 -0
  42. data/lib/unclekryon/util.rb +228 -0
  43. data/lib/unclekryon/version.rb +26 -0
  44. data/unclekryon.gemspec +67 -0
  45. metadata +189 -0
@@ -0,0 +1,413 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'date'
25
+ require 'nokogiri'
26
+ require 'open-uri'
27
+
28
+ require 'unclekryon/iso'
29
+ require 'unclekryon/log'
30
+ require 'unclekryon/trainer'
31
+ require 'unclekryon/util'
32
+
33
+ require 'unclekryon/data/album_data'
34
+ require 'unclekryon/data/artist_data_data'
35
+ require 'unclekryon/data/release_data'
36
+
37
+ module UncleKryon
38
+ class KryonAumYearParser
39
+ include Logging
40
+
41
+ attr_accessor :artist
42
+ attr_accessor :exclude_album
43
+ attr_accessor :release
44
+ attr_accessor :title
45
+ attr_accessor :trainers
46
+ attr_accessor :training
47
+ attr_reader :updated_on
48
+ attr_accessor :url
49
+
50
+ alias_method :training?,:training
51
+
52
+ def initialize(title=nil,url=nil,artist=ArtistDataData.new(),training: false,train_filepath: nil,
53
+ updated_on: nil,**options)
54
+ @artist = artist
55
+ @exclude_album = false
56
+ @title = title
57
+ @trainers = Trainers.new(train_filepath)
58
+ @training = training
59
+ @updated_on = Util.format_datetime(DateTime.now()) if Util.empty_s?(updated_on)
60
+ @url = Util.empty_s?(url) ? self.class.get_kryon_year_url(title) : url
61
+ end
62
+
63
+ def self.parse_kryon_date(date,year=nil)
64
+ # Don't modify args and clean them up so can use /\s/ instead of /[[:space:]]/
65
+ date = Util.clean_data(date.clone())
66
+ year = Util.clean_data(year.clone())
67
+
68
+ # Fix misspellings and/or weird shortenings
69
+ date.gsub!(/Feburary/i,'February') # "Feburary 2-13, 2017"
70
+ date.gsub!(/SEPT(\s+|\-)/i,'Sep\1') # "SEPT 29 - OCT 9, 2017", "Sept-Oct 2015"
71
+ date.gsub!(/Septembe\s+/i,'September ') # "Septembe 4, 2016"
72
+ date.gsub!(/Ocotber/i,'October') # "Ocotber 10, 2015"
73
+
74
+ comma = date.include?(',') ? ',' : '' # "May 6 2017"
75
+ r = Array.new(2)
76
+
77
+ begin
78
+ if date.include?('-')
79
+ # "Sept-Oct 2015"
80
+ if date =~ /\A[[:alpha:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
81
+ r[1] = Date.strptime(date,'%b-%b %Y')
82
+ r[0] = Date.strptime(date,'%b')
83
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
84
+ # "4/28/12 - 4/29/12"
85
+ elsif date =~ /\A[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\-/
86
+ date = date.split(/\s*-\s*/)
87
+
88
+ r[0] = Date.strptime(date[0],'%m/%d/%y')
89
+ r[1] = Date.strptime(date[1],'%m/%d/%y')
90
+ # "10-17 to 11-18, 2012"
91
+ elsif date =~ /\A[[:digit:]]+\s*\-\s*[[:digit:]]+\s+to\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\s*,\s*[[:digit:]]+\z/i
92
+ date = date.split(/\s*to\s*/i)
93
+
94
+ r[1] = Date.strptime(date[1],'%m-%d, %Y')
95
+ r[0] = Date.strptime(date[0],'%m-%d')
96
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
97
+ else
98
+ # "SEPT 29 - OCT 9, 2017", "May 31-June 1, 2014"
99
+ if date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+[\,\s]+[[:digit:]]+\z/
100
+ date = date.gsub(/\s*\-\s*/,'-')
101
+ r1f = "%B %d-%B %d#{comma} %Y"
102
+ # "OCT 25 - NOV 3" (2014)
103
+ elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
104
+ date = date.gsub(/\s*\-\s*/,'-')
105
+ r1f = '%B %d-%B %d'
106
+
107
+ if !year.nil?()
108
+ date << ", #{year}"
109
+ r1f << ", %Y"
110
+ end
111
+ # "December 12-13"
112
+ elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
113
+ date = date.gsub(/\s*\-\s*/,'-')
114
+
115
+ # "September 16 - 2018"
116
+ if date =~ /-[[:digit:]]{4}\z/
117
+ r1f = '%B %d-%Y'
118
+ else
119
+ r1f = '%B %d-%d'.dup()
120
+
121
+ if !year.nil?()
122
+ date << ", #{year}"
123
+ r1f << ', %Y'
124
+ end
125
+ end
126
+ # "June 30-July 1-2018"
127
+ elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
128
+ date = date.gsub(/\s*\-\s*/,'-')
129
+ r1f = '%B %d-%B %d-%Y'
130
+ # "September 7 & 9-2018"
131
+ elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s+\&\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
132
+ date = date.gsub(/\s*\-\s*/,'-')
133
+ r1f = '%B %d & %d-%Y'
134
+ else
135
+ # "OCT 27 - 28 - 29, 2017"; remove spaces around dashes
136
+ date.gsub!(/\s+\-\s+/,'-')
137
+
138
+ # "June 7-9-16-17" & "June 9-10-11-12"
139
+ if date =~ /\A[[:alpha:]]+\s*[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\z/
140
+ r1f = "%B %d-%d-%d-%d"
141
+
142
+ if !year.nil?()
143
+ date << ", #{year}"
144
+ r1f << ", %Y"
145
+ end
146
+ else
147
+ # "MAY 15-16-17, 2017" and "January 7-8, 2017"
148
+ r1f = (date =~ /\-.*\-/) ? "%B %d-%d-%d#{comma} %Y" : "%B %d-%d#{comma} %Y"
149
+ end
150
+ end
151
+
152
+ r[1] = Date.strptime(date,r1f)
153
+ r[0] = Date.strptime(date,'%B %d')
154
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
155
+ end
156
+ elsif date.include?('/')
157
+ # "1/7/2012"
158
+ if date =~ /\A[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\/\s*[[:digit:]]+\z/
159
+ date = date.gsub(/\s+/,'')
160
+
161
+ r[0] = Date.strptime(date,'%m/%d/%Y')
162
+ r[1] = nil
163
+ else
164
+ # "JULY/AUG 2017"
165
+ r[1] = Date.strptime(date,'%b/%b %Y')
166
+ r[0] = Date.strptime(date,'%b')
167
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
168
+ end
169
+ else
170
+ # "April 11, 12, 2015"
171
+ if date =~ /\A[[:alpha:]]+\s*[[:digit:]]+\s*,\s*[[:digit:]]+\s*,\s*[[:digit:]]+\z/
172
+ r[1] = Date.strptime(date,'%B %d, %d, %Y')
173
+ r[0] = Date.strptime(date,'%B %d')
174
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
175
+ # "March, 2014"
176
+ elsif date =~ /\A[[:alpha:]]+\s*,\s*[[:digit:]]+\z/
177
+ r[0] = Date.strptime(date,'%B, %Y')
178
+ r[1] = nil
179
+ else
180
+ r[0] = Date.strptime(date,"%B %d#{comma} %Y")
181
+ r[1] = nil
182
+ end
183
+ end
184
+ rescue ArgumentError => e
185
+ Log.instance.fatal("Invalid Date: '#{date}'",error: e)
186
+ raise
187
+ end
188
+
189
+ r[0] = (!r[0].nil?) ? Util.format_date(r[0]) : ''
190
+ r[1] = (!r[1].nil?) ? Util.format_date(r[1]) : ''
191
+
192
+ return r
193
+ end
194
+
195
+ def parse_site(title=nil,url=nil,artist=nil)
196
+ @artist = artist unless artist.nil?()
197
+ @title = title unless title.nil?()
198
+
199
+ @url = Util.empty_s?(url) ? self.class.get_kryon_year_url(@title) : url
200
+
201
+ raise ArgumentError,"Artist cannot be nil" if @artist.nil?()
202
+ raise ArgumentError,"Title cannot be empty" if @title.nil?() || (@title = @title.strip()).empty?()
203
+ raise ArgumentError,"URL cannot be empty" if @url.nil?() || (@url = @url.strip()).empty?()
204
+
205
+ @release = @artist.releases[@title]
206
+ @trainers.load_file()
207
+
208
+ if @release.nil?
209
+ @release = ReleaseData.new
210
+ @release.mirrors = self.class.get_kryon_year_mirrors(@title)
211
+ @release.title = @title
212
+ @release.updated_on = @updated_on
213
+ @release.url = @url
214
+
215
+ @artist.releases[@title] = @release
216
+ end
217
+
218
+ doc = Nokogiri::HTML(open(@release.url),nil,'utf-8') # Force utf-8 encoding
219
+ row_pos = 1
220
+ rows = doc.css('table tr tr')
221
+
222
+ rows.each() do |row|
223
+ next if row.nil?
224
+ next if (cells = row.css('td')).nil?
225
+
226
+ album = AlbumData.new
227
+ album.updated_on = @updated_on
228
+ @exclude_album = false
229
+
230
+ # There is always a date cell
231
+ has_date_cell = parse_date_cell(cells,album)
232
+
233
+ # Sometimes there is not a topic, location, or language cell, but not all 3!
234
+ # - Put || last because of short-circuit ||!
235
+ # - For some reason, "or" does not work (even though it is supposed to be non-short-circuit)
236
+ has_other_cell = parse_topic_cell(cells,album)
237
+ has_other_cell = parse_location_cell(cells,album) || has_other_cell
238
+ has_other_cell = parse_language_cell(cells,album) || has_other_cell
239
+
240
+ if !has_date_cell || !has_other_cell || @exclude_album
241
+ # - If it doesn't have any cells, it is probably javascript or something else, so don't log it
242
+ # - If @exclude_album, then it has already been logged, so don't log it
243
+ if (!has_date_cell && has_other_cell) || (has_date_cell && !@exclude_album)
244
+ log.warn("Excluding album: #{row_pos},#{album.date_begin},#{album.date_end},#{album.title}," +
245
+ "#{album.locations},#{album.languages}")
246
+ row_pos += 1
247
+ end
248
+
249
+ next
250
+ end
251
+
252
+ # Is it actually old or new?
253
+ if @artist.albums.key?(album.url) && album == @artist.albums[album.url]
254
+ album.updated_on = @artist.albums[album.url].updated_on
255
+ end
256
+
257
+ album.url = Util.fix_link(album.url)
258
+
259
+ @artist.albums[album.url] = album
260
+
261
+ if !@release.albums.include?(album.url)
262
+ @release.albums.push(album.url)
263
+ @release.updated_on = @updated_on
264
+ end
265
+
266
+ row_pos += 1
267
+ end
268
+
269
+ return @release
270
+ end
271
+
272
+ def parse_date_cell(cells,album)
273
+ # Get url from date because sometimes there is not a topic
274
+
275
+ return false if cells.length <= 1
276
+ return false if (cell = cells[1]).nil?
277
+ return false if (cell = cell.css('a')).nil?
278
+ return false if cell.length < 1
279
+
280
+ # For 2014 albums
281
+ cells = cell
282
+ cell = nil
283
+
284
+ cells.each do |c|
285
+ if !c.nil?() && !Util.empty_s?(c.content) && !c['href'].nil?()
286
+ cell = c
287
+ break
288
+ end
289
+ end
290
+
291
+ return false if cell.nil?()
292
+
293
+ r_date = self.class.parse_kryon_date(Util.clean_data(cell.content),@title)
294
+ album.date_begin = r_date[0]
295
+ album.date_end = r_date[1]
296
+ album.url = Util.clean_link(@release.url,cell['href'])
297
+
298
+ return false if (album.date_begin.empty? || album.url.empty?)
299
+ return true
300
+ end
301
+
302
+ def parse_language_cell(cells,album)
303
+ return false if cells.length <= 4
304
+ return false if (cell = cells[4]).nil?
305
+ return false if (cell = cell.content).nil?
306
+
307
+ cell = Util.clean_data(cell)
308
+ # For the official site, they always have English, so add it if not present
309
+ album.languages = Iso.languages.find_by_kryon(cell,add_english: true)
310
+
311
+ return false if album.languages.nil?() || album.languages.empty?()
312
+ return true
313
+ end
314
+
315
+ def parse_location_cell(cells,album)
316
+ return false if cells.length <= 3
317
+ return false if (cell = cells[3]).nil?
318
+ return false if (cell = cell.content).nil?
319
+ return false if cell =~ /[[:space:]]*RADIO[[:space:]]+SHOW[[:space:]]*/ # 2014
320
+ return false if (cell = Util.clean_data(cell)).empty?()
321
+
322
+ album.locations = Iso.find_kryon_locations(cell)
323
+
324
+ return false if album.locations.nil?() || album.locations.empty?()
325
+
326
+ return true
327
+ end
328
+
329
+ def parse_topic_cell(cells,album)
330
+ return false if cells.length <= 2
331
+ return false if (cell = cells[2]).nil?
332
+ return false if (cell = cell.css('a')).nil?
333
+ return false if cell.length < 1
334
+
335
+ # For 2017 "San Jose, California (3)"
336
+ cells = cell
337
+ cell = nil
338
+
339
+ cells.each do |c|
340
+ if !c.nil?() && !Util.empty_s?(c.content)
341
+ cell = c
342
+ break
343
+ end
344
+ end
345
+
346
+ return false if cell.nil?()
347
+
348
+ album.title = Util.fix_shortwith_text(Util.clean_data(cell.content))
349
+
350
+ exclude_topics = /
351
+ GROUP[[:space:]]+PHOTO|
352
+ PLEASE[[:space:]]+READ
353
+ /ix
354
+
355
+ if album.title =~ exclude_topics
356
+ log.warn("Excluding album: Topic[#{album.title}]")
357
+ @exclude_album = true
358
+ return false
359
+ end
360
+
361
+ # Sometimes, the date cell's href is an image (See 2016 'Las Vegas, NV - "Numerology" - (3)')
362
+ good_urls = /
363
+ \.html?[[:space:]]*\z
364
+ /ix
365
+
366
+ date_url = album.url
367
+ topic_url = Util.clean_link(@release.url,cell['href'])
368
+
369
+ # Sometimes, the date cell's href is wrong (See 2016 '"Five Concepts for the New Human" (2)')
370
+ if album.url !~ good_urls || (!Util.empty_s?(topic_url) && date_url != topic_url)
371
+ album.url = topic_url
372
+ log.warn("Using topic cell's href for URL: #{File.basename(date_url)}=>#{File.basename(album.url)}")
373
+
374
+ if Util.empty_s?(album.url)
375
+ msg = "Date and topic cells' hrefs are empty: Topic[#{album.title}]"
376
+
377
+ if DevOpts.instance.dev?()
378
+ raise msg
379
+ else
380
+ log.warn(msg)
381
+ end
382
+
383
+ return false
384
+ end
385
+ end
386
+
387
+ return false if album.title.empty?
388
+ return true
389
+ end
390
+
391
+ def self.fix_kryon_year_title(year)
392
+ year = '2002_05' if year == '2002-2005'
393
+
394
+ return year
395
+ end
396
+
397
+ def self.get_kryon_year_mirrors(year)
398
+ year = fix_kryon_year_title(year)
399
+
400
+ mirrors = {
401
+ 'original' => "https://www.kryon.com/freeAudio_folder/#{year}_freeAudio.html"
402
+ }
403
+
404
+ return mirrors
405
+ end
406
+
407
+ def self.get_kryon_year_url(year,url_version=2)
408
+ year = fix_kryon_year_title(year)
409
+
410
+ return "https://www.kryon.com/freeAudio_folder/mobile_pages/#{year}_freeAudio_m.html"
411
+ end
412
+ end
413
+ end
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ module UncleKryon
27
+ class Server
28
+ end
29
+ end
@@ -0,0 +1,231 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ require 'nbayes'
27
+
28
+ require 'unclekryon/dev_opts'
29
+ require 'unclekryon/log'
30
+ require 'unclekryon/util'
31
+
32
+ module UncleKryon
33
+ class Trainer
34
+ attr_accessor :max_tag_id_length
35
+ attr_accessor :max_tag_length
36
+ attr_accessor :tags
37
+ attr_accessor :trainer
38
+
39
+ def self.to_tokens(text)
40
+ tokens = []
41
+
42
+ text.split(/[[:space:]]+/).each() do |t|
43
+ t.gsub!(/[[:punct:][:cntrl:]]+/,'')
44
+ tokens.push(t) if !t.empty?()
45
+ end
46
+
47
+ return tokens
48
+ end
49
+
50
+ def initialize(tags={})
51
+ @max_tag_id_length = 0
52
+ @max_tag_length = 0
53
+ @tags = tags
54
+ @trainer = NBayes::Base.new()
55
+
56
+ init_lengths()
57
+ end
58
+
59
+ def init_lengths()
60
+ @max_tag_id_length = 0
61
+ @max_tag_length = 0
62
+
63
+ @tags.each do |id,tag|
64
+ @max_tag_id_length = id.length if id.length > @max_tag_id_length
65
+ @max_tag_length = tag.length if tag.length > @max_tag_length
66
+ end
67
+
68
+ @max_tag_id_length += 2 # Indention
69
+ @max_tag_id_length = 7 if @max_tag_id_length < 7 # For "<Enter>" option
70
+ @max_tag_length = -@max_tag_length # Left justify
71
+ end
72
+
73
+ def train(text)
74
+ guess_tag = self.tag(text) # Try and guess
75
+ tokens = self.class.to_tokens(text)
76
+
77
+ puts '#################'
78
+ puts '# Training Tags #'
79
+ puts '#################'
80
+
81
+ tf = '%%%is = %%%is' % [@max_tag_id_length,@max_tag_length]
82
+ @tags.each do |id,tag|
83
+ puts tf % [id,tag]
84
+ end
85
+ puts "<Enter> = Guess: #{guess_tag}"
86
+
87
+ puts '-----------------'
88
+ puts text
89
+ puts '-----------------'
90
+ print 'What is it? '
91
+
92
+ # Use -t/--test option
93
+ if DevOpts.instance.test?()
94
+ puts (tag_id = @tags.keys.sample()) # For testing purposes
95
+ else
96
+ tag_id = STDIN.gets().chomp().strip() # STDIN because app accepts args
97
+ end
98
+ puts
99
+
100
+ if tag_id.empty?()
101
+ raise "Invalid guess tag[#{guess_tag}]" if !@tags.value?(guess_tag)
102
+ tag = guess_tag
103
+ else
104
+ raise "Invalid tag ID[#{tag_id}]" if !@tags.include?(tag_id)
105
+ tag = @tags[tag_id]
106
+ end
107
+
108
+ @trainer.train(tokens,tag)
109
+
110
+ return tag
111
+ end
112
+
113
+ def tag(text)
114
+ return @trainer.classify(self.class.to_tokens(text)).max_class
115
+ end
116
+
117
+ def to_s()
118
+ s = ''
119
+ s << @trainer.to_yaml()
120
+ s << "\n"
121
+ s << @trainer.data.category_stats()
122
+
123
+ return s
124
+ end
125
+ end
126
+
127
+ class Trainers
128
+ attr_accessor :filepath
129
+ attr_accessor :trainers
130
+
131
+ def initialize(filepath=nil)
132
+ @filepath = filepath
133
+ @trainers = {}
134
+ end
135
+
136
+ def load_file()
137
+ if @filepath.nil?() || (@filepath = @filepath.strip()).empty?()
138
+ raise ArgumentError,'Training filepath cannot be empty'
139
+ end
140
+
141
+ if File.exist?(@filepath)
142
+ y = YAML.load_file(@filepath)
143
+
144
+ y.each() do |id,trainer|
145
+ if !@trainers.key?(id)
146
+ @trainers[id] = trainer
147
+ else
148
+ @trainers[id].tags = trainer.tags.merge(@trainers[id].tags)
149
+ @trainers[id].trainer = trainer.trainer
150
+ end
151
+
152
+ @trainers[id].trainer.reset_after_import()
153
+ @trainers[id].init_lengths()
154
+ end
155
+ end
156
+ end
157
+
158
+ def save_to_file()
159
+ if @filepath.nil?() || (@filepath = @filepath.strip()).empty?()
160
+ raise ArgumentError,'Training filepath cannot be empty'
161
+ end
162
+
163
+ Util.mk_dirs_from_filepath(@filepath)
164
+
165
+ File.open(@filepath,'w') do |f|
166
+ f.write(to_s())
167
+ end
168
+ end
169
+
170
+ def [](id)
171
+ @trainers[id]
172
+ end
173
+
174
+ def []=(id,trainer)
175
+ @trainers[id] = trainer
176
+ end
177
+
178
+ def to_s()
179
+ return YAML.dump(@trainers)
180
+ end
181
+ end
182
+ end
183
+
184
+ if $0 == __FILE__
185
+ fp = 'test.yaml'
186
+ ts = UncleKryon::Trainers.new(fp)
187
+
188
+ ctx = ['dark black bitter',
189
+ 'double espresso steamed milk foam',
190
+ 'espresso steamed milk']
191
+ ttx = ['no withering and oxidation',
192
+ 'broom-like, South Africa',
193
+ 'young, minimal']
194
+
195
+ if File.exist?(fp)
196
+ ts.load_file()
197
+ puts ts
198
+ puts
199
+
200
+ puts '[Coffee]'
201
+ ctx.each do |v|
202
+ puts "'#{v}' => #{ts['coffee'].tag(v)}"
203
+ end
204
+ puts
205
+
206
+ puts '[Tea]'
207
+ ttx.each do |v|
208
+ puts "'#{v}' => #{ts['tea'].tag(v)}"
209
+ end
210
+ puts
211
+
212
+ puts 'What kind of drink would you like?'
213
+ txt = STDIN.gets().chomp().strip()
214
+ puts "coffee => #{ts['coffee'].tag(txt)}"
215
+ puts "tea => #{ts['tea'].tag(txt)}"
216
+ else
217
+ ts['coffee'] = UncleKryon::Trainer.new(
218
+ {'b'=>'black','c'=>'cappuccino','l'=>'latte'})
219
+ ts['tea'] = UncleKryon::Trainer.new(
220
+ {'g'=>'green','r'=>'red','w'=>'white'})
221
+
222
+ ctx.each do |v|
223
+ ts['coffee'].train(v)
224
+ end
225
+ ttx.each do |v|
226
+ ts['tea'].train(v)
227
+ end
228
+
229
+ ts.save_to_file()
230
+ end
231
+ end