unclekryon 0.4.9.pre.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +34 -0
  3. data/Gemfile.lock +43 -0
  4. data/LICENSE +674 -0
  5. data/README.md +55 -0
  6. data/Rakefile +59 -0
  7. data/bin/unclekryon +30 -0
  8. data/iso/can_provs_terrs.yaml +54 -0
  9. data/iso/countries.yaml +3050 -0
  10. data/iso/iso.yaml +8 -0
  11. data/iso/languages.yaml +5641 -0
  12. data/iso/regions.yaml +42 -0
  13. data/iso/subregions.yaml +6 -0
  14. data/iso/usa_states.yaml +230 -0
  15. data/lib/unclekryon.rb +384 -0
  16. data/lib/unclekryon/data/album_data.rb +147 -0
  17. data/lib/unclekryon/data/artist_data.rb +109 -0
  18. data/lib/unclekryon/data/artist_data_data.rb +146 -0
  19. data/lib/unclekryon/data/aum_data.rb +75 -0
  20. data/lib/unclekryon/data/base_data.rb +79 -0
  21. data/lib/unclekryon/data/pic_data.rb +76 -0
  22. data/lib/unclekryon/data/release_data.rb +57 -0
  23. data/lib/unclekryon/data/social_data.rb +39 -0
  24. data/lib/unclekryon/data/timespan_data.rb +70 -0
  25. data/lib/unclekryon/dev_opts.rb +41 -0
  26. data/lib/unclekryon/hacker.rb +327 -0
  27. data/lib/unclekryon/iso.rb +341 -0
  28. data/lib/unclekryon/iso/base_iso.rb +196 -0
  29. data/lib/unclekryon/iso/can_prov_terr.rb +113 -0
  30. data/lib/unclekryon/iso/country.rb +133 -0
  31. data/lib/unclekryon/iso/language.rb +241 -0
  32. data/lib/unclekryon/iso/region.rb +53 -0
  33. data/lib/unclekryon/iso/subregion.rb +53 -0
  34. data/lib/unclekryon/iso/usa_state.rb +106 -0
  35. data/lib/unclekryon/jsoner.rb +124 -0
  36. data/lib/unclekryon/log.rb +111 -0
  37. data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +499 -0
  38. data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +413 -0
  39. data/lib/unclekryon/server.rb +29 -0
  40. data/lib/unclekryon/trainer.rb +231 -0
  41. data/lib/unclekryon/uploader.rb +29 -0
  42. data/lib/unclekryon/util.rb +228 -0
  43. data/lib/unclekryon/version.rb +26 -0
  44. data/unclekryon.gemspec +67 -0
  45. metadata +189 -0
@@ -0,0 +1,413 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'date'
25
+ require 'nokogiri'
26
+ require 'open-uri'
27
+
28
+ require 'unclekryon/iso'
29
+ require 'unclekryon/log'
30
+ require 'unclekryon/trainer'
31
+ require 'unclekryon/util'
32
+
33
+ require 'unclekryon/data/album_data'
34
+ require 'unclekryon/data/artist_data_data'
35
+ require 'unclekryon/data/release_data'
36
+
37
+ module UncleKryon
38
+ class KryonAumYearParser
39
+ include Logging
40
+
41
+ attr_accessor :artist
42
+ attr_accessor :exclude_album
43
+ attr_accessor :release
44
+ attr_accessor :title
45
+ attr_accessor :trainers
46
+ attr_accessor :training
47
+ attr_reader :updated_on
48
+ attr_accessor :url
49
+
50
+ alias_method :training?,:training
51
+
52
+ def initialize(title=nil,url=nil,artist=ArtistDataData.new(),training: false,train_filepath: nil,
53
+ updated_on: nil,**options)
54
+ @artist = artist
55
+ @exclude_album = false
56
+ @title = title
57
+ @trainers = Trainers.new(train_filepath)
58
+ @training = training
59
+ @updated_on = Util.format_datetime(DateTime.now()) if Util.empty_s?(updated_on)
60
+ @url = Util.empty_s?(url) ? self.class.get_kryon_year_url(title) : url
61
+ end
62
+
63
+ def self.parse_kryon_date(date,year=nil)
64
+ # Don't modify args and clean them up so can use /\s/ instead of /[[:space:]]/
65
+ date = Util.clean_data(date.clone())
66
+ year = Util.clean_data(year.clone())
67
+
68
+ # Fix misspellings and/or weird shortenings
69
+ date.gsub!(/Feburary/i,'February') # "Feburary 2-13, 2017"
70
+ date.gsub!(/SEPT(\s+|\-)/i,'Sep\1') # "SEPT 29 - OCT 9, 2017", "Sept-Oct 2015"
71
+ date.gsub!(/Septembe\s+/i,'September ') # "Septembe 4, 2016"
72
+ date.gsub!(/Ocotber/i,'October') # "Ocotber 10, 2015"
73
+
74
+ comma = date.include?(',') ? ',' : '' # "May 6 2017"
75
+ r = Array.new(2)
76
+
77
+ begin
78
+ if date.include?('-')
79
+ # "Sept-Oct 2015"
80
+ if date =~ /\A[[:alpha:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
81
+ r[1] = Date.strptime(date,'%b-%b %Y')
82
+ r[0] = Date.strptime(date,'%b')
83
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
84
+ # "4/28/12 - 4/29/12"
85
+ elsif date =~ /\A[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\-/
86
+ date = date.split(/\s*-\s*/)
87
+
88
+ r[0] = Date.strptime(date[0],'%m/%d/%y')
89
+ r[1] = Date.strptime(date[1],'%m/%d/%y')
90
+ # "10-17 to 11-18, 2012"
91
+ elsif date =~ /\A[[:digit:]]+\s*\-\s*[[:digit:]]+\s+to\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\s*,\s*[[:digit:]]+\z/i
92
+ date = date.split(/\s*to\s*/i)
93
+
94
+ r[1] = Date.strptime(date[1],'%m-%d, %Y')
95
+ r[0] = Date.strptime(date[0],'%m-%d')
96
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
97
+ else
98
+ # "SEPT 29 - OCT 9, 2017", "May 31-June 1, 2014"
99
+ if date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+[\,\s]+[[:digit:]]+\z/
100
+ date = date.gsub(/\s*\-\s*/,'-')
101
+ r1f = "%B %d-%B %d#{comma} %Y"
102
+ # "OCT 25 - NOV 3" (2014)
103
+ elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\z/
104
+ date = date.gsub(/\s*\-\s*/,'-')
105
+ r1f = '%B %d-%B %d'
106
+
107
+ if !year.nil?()
108
+ date << ", #{year}"
109
+ r1f << ", %Y"
110
+ end
111
+ # "December 12-13"
112
+ elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
113
+ date = date.gsub(/\s*\-\s*/,'-')
114
+
115
+ # "September 16 - 2018"
116
+ if date =~ /-[[:digit:]]{4}\z/
117
+ r1f = '%B %d-%Y'
118
+ else
119
+ r1f = '%B %d-%d'.dup()
120
+
121
+ if !year.nil?()
122
+ date << ", #{year}"
123
+ r1f << ', %Y'
124
+ end
125
+ end
126
+ # "June 30-July 1-2018"
127
+ elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:alpha:]]+\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
128
+ date = date.gsub(/\s*\-\s*/,'-')
129
+ r1f = '%B %d-%B %d-%Y'
130
+ # "September 7 & 9-2018"
131
+ elsif date =~ /\A[[:alpha:]]+\s+[[:digit:]]+\s+\&\s+[[:digit:]]+\s*\-\s*[[:digit:]]+\z/
132
+ date = date.gsub(/\s*\-\s*/,'-')
133
+ r1f = '%B %d & %d-%Y'
134
+ else
135
+ # "OCT 27 - 28 - 29, 2017"; remove spaces around dashes
136
+ date.gsub!(/\s+\-\s+/,'-')
137
+
138
+ # "June 7-9-16-17" & "June 9-10-11-12"
139
+ if date =~ /\A[[:alpha:]]+\s*[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\-[[:digit:]]+\z/
140
+ r1f = "%B %d-%d-%d-%d"
141
+
142
+ if !year.nil?()
143
+ date << ", #{year}"
144
+ r1f << ", %Y"
145
+ end
146
+ else
147
+ # "MAY 15-16-17, 2017" and "January 7-8, 2017"
148
+ r1f = (date =~ /\-.*\-/) ? "%B %d-%d-%d#{comma} %Y" : "%B %d-%d#{comma} %Y"
149
+ end
150
+ end
151
+
152
+ r[1] = Date.strptime(date,r1f)
153
+ r[0] = Date.strptime(date,'%B %d')
154
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
155
+ end
156
+ elsif date.include?('/')
157
+ # "1/7/2012"
158
+ if date =~ /\A[[:digit:]]+\s*\/\s*[[:digit:]]+\s*\/\s*[[:digit:]]+\z/
159
+ date = date.gsub(/\s+/,'')
160
+
161
+ r[0] = Date.strptime(date,'%m/%d/%Y')
162
+ r[1] = nil
163
+ else
164
+ # "JULY/AUG 2017"
165
+ r[1] = Date.strptime(date,'%b/%b %Y')
166
+ r[0] = Date.strptime(date,'%b')
167
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
168
+ end
169
+ else
170
+ # "April 11, 12, 2015"
171
+ if date =~ /\A[[:alpha:]]+\s*[[:digit:]]+\s*,\s*[[:digit:]]+\s*,\s*[[:digit:]]+\z/
172
+ r[1] = Date.strptime(date,'%B %d, %d, %Y')
173
+ r[0] = Date.strptime(date,'%B %d')
174
+ r[0] = Date.new(r[1].year,r[0].month,r[0].day)
175
+ # "March, 2014"
176
+ elsif date =~ /\A[[:alpha:]]+\s*,\s*[[:digit:]]+\z/
177
+ r[0] = Date.strptime(date,'%B, %Y')
178
+ r[1] = nil
179
+ else
180
+ r[0] = Date.strptime(date,"%B %d#{comma} %Y")
181
+ r[1] = nil
182
+ end
183
+ end
184
+ rescue ArgumentError => e
185
+ Log.instance.fatal("Invalid Date: '#{date}'",error: e)
186
+ raise
187
+ end
188
+
189
+ r[0] = (!r[0].nil?) ? Util.format_date(r[0]) : ''
190
+ r[1] = (!r[1].nil?) ? Util.format_date(r[1]) : ''
191
+
192
+ return r
193
+ end
194
+
195
+ def parse_site(title=nil,url=nil,artist=nil)
196
+ @artist = artist unless artist.nil?()
197
+ @title = title unless title.nil?()
198
+
199
+ @url = Util.empty_s?(url) ? self.class.get_kryon_year_url(@title) : url
200
+
201
+ raise ArgumentError,"Artist cannot be nil" if @artist.nil?()
202
+ raise ArgumentError,"Title cannot be empty" if @title.nil?() || (@title = @title.strip()).empty?()
203
+ raise ArgumentError,"URL cannot be empty" if @url.nil?() || (@url = @url.strip()).empty?()
204
+
205
+ @release = @artist.releases[@title]
206
+ @trainers.load_file()
207
+
208
+ if @release.nil?
209
+ @release = ReleaseData.new
210
+ @release.mirrors = self.class.get_kryon_year_mirrors(@title)
211
+ @release.title = @title
212
+ @release.updated_on = @updated_on
213
+ @release.url = @url
214
+
215
+ @artist.releases[@title] = @release
216
+ end
217
+
218
+ doc = Nokogiri::HTML(open(@release.url),nil,'utf-8') # Force utf-8 encoding
219
+ row_pos = 1
220
+ rows = doc.css('table tr tr')
221
+
222
+ rows.each() do |row|
223
+ next if row.nil?
224
+ next if (cells = row.css('td')).nil?
225
+
226
+ album = AlbumData.new
227
+ album.updated_on = @updated_on
228
+ @exclude_album = false
229
+
230
+ # There is always a date cell
231
+ has_date_cell = parse_date_cell(cells,album)
232
+
233
+ # Sometimes there is not a topic, location, or language cell, but not all 3!
234
+ # - Put || last because of short-circuit ||!
235
+ # - For some reason, "or" does not work (even though it is supposed to be non-short-circuit)
236
+ has_other_cell = parse_topic_cell(cells,album)
237
+ has_other_cell = parse_location_cell(cells,album) || has_other_cell
238
+ has_other_cell = parse_language_cell(cells,album) || has_other_cell
239
+
240
+ if !has_date_cell || !has_other_cell || @exclude_album
241
+ # - If it doesn't have any cells, it is probably javascript or something else, so don't log it
242
+ # - If @exclude_album, then it has already been logged, so don't log it
243
+ if (!has_date_cell && has_other_cell) || (has_date_cell && !@exclude_album)
244
+ log.warn("Excluding album: #{row_pos},#{album.date_begin},#{album.date_end},#{album.title}," +
245
+ "#{album.locations},#{album.languages}")
246
+ row_pos += 1
247
+ end
248
+
249
+ next
250
+ end
251
+
252
+ # Is it actually old or new?
253
+ if @artist.albums.key?(album.url) && album == @artist.albums[album.url]
254
+ album.updated_on = @artist.albums[album.url].updated_on
255
+ end
256
+
257
+ album.url = Util.fix_link(album.url)
258
+
259
+ @artist.albums[album.url] = album
260
+
261
+ if !@release.albums.include?(album.url)
262
+ @release.albums.push(album.url)
263
+ @release.updated_on = @updated_on
264
+ end
265
+
266
+ row_pos += 1
267
+ end
268
+
269
+ return @release
270
+ end
271
+
272
+ def parse_date_cell(cells,album)
273
+ # Get url from date because sometimes there is not a topic
274
+
275
+ return false if cells.length <= 1
276
+ return false if (cell = cells[1]).nil?
277
+ return false if (cell = cell.css('a')).nil?
278
+ return false if cell.length < 1
279
+
280
+ # For 2014 albums
281
+ cells = cell
282
+ cell = nil
283
+
284
+ cells.each do |c|
285
+ if !c.nil?() && !Util.empty_s?(c.content) && !c['href'].nil?()
286
+ cell = c
287
+ break
288
+ end
289
+ end
290
+
291
+ return false if cell.nil?()
292
+
293
+ r_date = self.class.parse_kryon_date(Util.clean_data(cell.content),@title)
294
+ album.date_begin = r_date[0]
295
+ album.date_end = r_date[1]
296
+ album.url = Util.clean_link(@release.url,cell['href'])
297
+
298
+ return false if (album.date_begin.empty? || album.url.empty?)
299
+ return true
300
+ end
301
+
302
+ def parse_language_cell(cells,album)
303
+ return false if cells.length <= 4
304
+ return false if (cell = cells[4]).nil?
305
+ return false if (cell = cell.content).nil?
306
+
307
+ cell = Util.clean_data(cell)
308
+ # For the official site, they always have English, so add it if not present
309
+ album.languages = Iso.languages.find_by_kryon(cell,add_english: true)
310
+
311
+ return false if album.languages.nil?() || album.languages.empty?()
312
+ return true
313
+ end
314
+
315
+ def parse_location_cell(cells,album)
316
+ return false if cells.length <= 3
317
+ return false if (cell = cells[3]).nil?
318
+ return false if (cell = cell.content).nil?
319
+ return false if cell =~ /[[:space:]]*RADIO[[:space:]]+SHOW[[:space:]]*/ # 2014
320
+ return false if (cell = Util.clean_data(cell)).empty?()
321
+
322
+ album.locations = Iso.find_kryon_locations(cell)
323
+
324
+ return false if album.locations.nil?() || album.locations.empty?()
325
+
326
+ return true
327
+ end
328
+
329
+ def parse_topic_cell(cells,album)
330
+ return false if cells.length <= 2
331
+ return false if (cell = cells[2]).nil?
332
+ return false if (cell = cell.css('a')).nil?
333
+ return false if cell.length < 1
334
+
335
+ # For 2017 "San Jose, California (3)"
336
+ cells = cell
337
+ cell = nil
338
+
339
+ cells.each do |c|
340
+ if !c.nil?() && !Util.empty_s?(c.content)
341
+ cell = c
342
+ break
343
+ end
344
+ end
345
+
346
+ return false if cell.nil?()
347
+
348
+ album.title = Util.fix_shortwith_text(Util.clean_data(cell.content))
349
+
350
+ exclude_topics = /
351
+ GROUP[[:space:]]+PHOTO|
352
+ PLEASE[[:space:]]+READ
353
+ /ix
354
+
355
+ if album.title =~ exclude_topics
356
+ log.warn("Excluding album: Topic[#{album.title}]")
357
+ @exclude_album = true
358
+ return false
359
+ end
360
+
361
+ # Sometimes, the date cell's href is an image (See 2016 'Las Vegas, NV - "Numerology" - (3)')
362
+ good_urls = /
363
+ \.html?[[:space:]]*\z
364
+ /ix
365
+
366
+ date_url = album.url
367
+ topic_url = Util.clean_link(@release.url,cell['href'])
368
+
369
+ # Sometimes, the date cell's href is wrong (See 2016 '"Five Concepts for the New Human" (2)')
370
+ if album.url !~ good_urls || (!Util.empty_s?(topic_url) && date_url != topic_url)
371
+ album.url = topic_url
372
+ log.warn("Using topic cell's href for URL: #{File.basename(date_url)}=>#{File.basename(album.url)}")
373
+
374
+ if Util.empty_s?(album.url)
375
+ msg = "Date and topic cells' hrefs are empty: Topic[#{album.title}]"
376
+
377
+ if DevOpts.instance.dev?()
378
+ raise msg
379
+ else
380
+ log.warn(msg)
381
+ end
382
+
383
+ return false
384
+ end
385
+ end
386
+
387
+ return false if album.title.empty?
388
+ return true
389
+ end
390
+
391
+ def self.fix_kryon_year_title(year)
392
+ year = '2002_05' if year == '2002-2005'
393
+
394
+ return year
395
+ end
396
+
397
+ def self.get_kryon_year_mirrors(year)
398
+ year = fix_kryon_year_title(year)
399
+
400
+ mirrors = {
401
+ 'original' => "https://www.kryon.com/freeAudio_folder/#{year}_freeAudio.html"
402
+ }
403
+
404
+ return mirrors
405
+ end
406
+
407
+ def self.get_kryon_year_url(year,url_version=2)
408
+ year = fix_kryon_year_title(year)
409
+
410
+ return "https://www.kryon.com/freeAudio_folder/mobile_pages/#{year}_freeAudio_m.html"
411
+ end
412
+ end
413
+ end
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ module UncleKryon
27
+ class Server
28
+ end
29
+ end
@@ -0,0 +1,231 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+ # frozen_string_literal: true
4
+
5
+ #--
6
+ # This file is part of UncleKryon-server.
7
+ # Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
8
+ #
9
+ # UncleKryon-server is free software: you can redistribute it and/or modify
10
+ # it under the terms of the GNU General Public License as published by
11
+ # the Free Software Foundation, either version 3 of the License, or
12
+ # (at your option) any later version.
13
+ #
14
+ # UncleKryon-server is distributed in the hope that it will be useful,
15
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
+ # GNU General Public License for more details.
18
+ #
19
+ # You should have received a copy of the GNU General Public License
20
+ # along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
21
+ #++
22
+
23
+
24
+ require 'bundler/setup'
25
+
26
+ require 'nbayes'
27
+
28
+ require 'unclekryon/dev_opts'
29
+ require 'unclekryon/log'
30
+ require 'unclekryon/util'
31
+
32
+ module UncleKryon
33
+ class Trainer
34
+ attr_accessor :max_tag_id_length
35
+ attr_accessor :max_tag_length
36
+ attr_accessor :tags
37
+ attr_accessor :trainer
38
+
39
+ def self.to_tokens(text)
40
+ tokens = []
41
+
42
+ text.split(/[[:space:]]+/).each() do |t|
43
+ t.gsub!(/[[:punct:][:cntrl:]]+/,'')
44
+ tokens.push(t) if !t.empty?()
45
+ end
46
+
47
+ return tokens
48
+ end
49
+
50
+ def initialize(tags={})
51
+ @max_tag_id_length = 0
52
+ @max_tag_length = 0
53
+ @tags = tags
54
+ @trainer = NBayes::Base.new()
55
+
56
+ init_lengths()
57
+ end
58
+
59
+ def init_lengths()
60
+ @max_tag_id_length = 0
61
+ @max_tag_length = 0
62
+
63
+ @tags.each do |id,tag|
64
+ @max_tag_id_length = id.length if id.length > @max_tag_id_length
65
+ @max_tag_length = tag.length if tag.length > @max_tag_length
66
+ end
67
+
68
+ @max_tag_id_length += 2 # Indention
69
+ @max_tag_id_length = 7 if @max_tag_id_length < 7 # For "<Enter>" option
70
+ @max_tag_length = -@max_tag_length # Left justify
71
+ end
72
+
73
+ def train(text)
74
+ guess_tag = self.tag(text) # Try and guess
75
+ tokens = self.class.to_tokens(text)
76
+
77
+ puts '#################'
78
+ puts '# Training Tags #'
79
+ puts '#################'
80
+
81
+ tf = '%%%is = %%%is' % [@max_tag_id_length,@max_tag_length]
82
+ @tags.each do |id,tag|
83
+ puts tf % [id,tag]
84
+ end
85
+ puts "<Enter> = Guess: #{guess_tag}"
86
+
87
+ puts '-----------------'
88
+ puts text
89
+ puts '-----------------'
90
+ print 'What is it? '
91
+
92
+ # Use -t/--test option
93
+ if DevOpts.instance.test?()
94
+ puts (tag_id = @tags.keys.sample()) # For testing purposes
95
+ else
96
+ tag_id = STDIN.gets().chomp().strip() # STDIN because app accepts args
97
+ end
98
+ puts
99
+
100
+ if tag_id.empty?()
101
+ raise "Invalid guess tag[#{guess_tag}]" if !@tags.value?(guess_tag)
102
+ tag = guess_tag
103
+ else
104
+ raise "Invalid tag ID[#{tag_id}]" if !@tags.include?(tag_id)
105
+ tag = @tags[tag_id]
106
+ end
107
+
108
+ @trainer.train(tokens,tag)
109
+
110
+ return tag
111
+ end
112
+
113
+ def tag(text)
114
+ return @trainer.classify(self.class.to_tokens(text)).max_class
115
+ end
116
+
117
+ def to_s()
118
+ s = ''
119
+ s << @trainer.to_yaml()
120
+ s << "\n"
121
+ s << @trainer.data.category_stats()
122
+
123
+ return s
124
+ end
125
+ end
126
+
127
+ class Trainers
128
+ attr_accessor :filepath
129
+ attr_accessor :trainers
130
+
131
+ def initialize(filepath=nil)
132
+ @filepath = filepath
133
+ @trainers = {}
134
+ end
135
+
136
+ def load_file()
137
+ if @filepath.nil?() || (@filepath = @filepath.strip()).empty?()
138
+ raise ArgumentError,'Training filepath cannot be empty'
139
+ end
140
+
141
+ if File.exist?(@filepath)
142
+ y = YAML.load_file(@filepath)
143
+
144
+ y.each() do |id,trainer|
145
+ if !@trainers.key?(id)
146
+ @trainers[id] = trainer
147
+ else
148
+ @trainers[id].tags = trainer.tags.merge(@trainers[id].tags)
149
+ @trainers[id].trainer = trainer.trainer
150
+ end
151
+
152
+ @trainers[id].trainer.reset_after_import()
153
+ @trainers[id].init_lengths()
154
+ end
155
+ end
156
+ end
157
+
158
+ def save_to_file()
159
+ if @filepath.nil?() || (@filepath = @filepath.strip()).empty?()
160
+ raise ArgumentError,'Training filepath cannot be empty'
161
+ end
162
+
163
+ Util.mk_dirs_from_filepath(@filepath)
164
+
165
+ File.open(@filepath,'w') do |f|
166
+ f.write(to_s())
167
+ end
168
+ end
169
+
170
+ def [](id)
171
+ @trainers[id]
172
+ end
173
+
174
+ def []=(id,trainer)
175
+ @trainers[id] = trainer
176
+ end
177
+
178
+ def to_s()
179
+ return YAML.dump(@trainers)
180
+ end
181
+ end
182
+ end
183
+
184
+ if $0 == __FILE__
185
+ fp = 'test.yaml'
186
+ ts = UncleKryon::Trainers.new(fp)
187
+
188
+ ctx = ['dark black bitter',
189
+ 'double espresso steamed milk foam',
190
+ 'espresso steamed milk']
191
+ ttx = ['no withering and oxidation',
192
+ 'broom-like, South Africa',
193
+ 'young, minimal']
194
+
195
+ if File.exist?(fp)
196
+ ts.load_file()
197
+ puts ts
198
+ puts
199
+
200
+ puts '[Coffee]'
201
+ ctx.each do |v|
202
+ puts "'#{v}' => #{ts['coffee'].tag(v)}"
203
+ end
204
+ puts
205
+
206
+ puts '[Tea]'
207
+ ttx.each do |v|
208
+ puts "'#{v}' => #{ts['tea'].tag(v)}"
209
+ end
210
+ puts
211
+
212
+ puts 'What kind of drink would you like?'
213
+ txt = STDIN.gets().chomp().strip()
214
+ puts "coffee => #{ts['coffee'].tag(txt)}"
215
+ puts "tea => #{ts['tea'].tag(txt)}"
216
+ else
217
+ ts['coffee'] = UncleKryon::Trainer.new(
218
+ {'b'=>'black','c'=>'cappuccino','l'=>'latte'})
219
+ ts['tea'] = UncleKryon::Trainer.new(
220
+ {'g'=>'green','r'=>'red','w'=>'white'})
221
+
222
+ ctx.each do |v|
223
+ ts['coffee'].train(v)
224
+ end
225
+ ttx.each do |v|
226
+ ts['tea'].train(v)
227
+ end
228
+
229
+ ts.save_to_file()
230
+ end
231
+ end