unclekryon 0.4.9.pre.alpha → 0.4.12.pre.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +10 -26
- data/Gemfile.lock +20 -24
- data/README.md +3 -2
- data/Rakefile +11 -30
- data/bin/unclekryon +4 -15
- data/hax/kryon.yaml +28 -0
- data/hax/kryon_aums_2002-2005.yaml +460 -0
- data/hax/kryon_aums_2006.yaml +601 -0
- data/hax/kryon_aums_2007.yaml +1024 -0
- data/hax/kryon_aums_2008.yaml +950 -0
- data/hax/kryon_aums_2009.yaml +496 -0
- data/hax/kryon_aums_2010.yaml +1443 -0
- data/hax/kryon_aums_2011.yaml +1458 -0
- data/hax/kryon_aums_2012.yaml +2123 -0
- data/hax/kryon_aums_2013.yaml +1647 -0
- data/hax/kryon_aums_2014.yaml +2478 -0
- data/hax/kryon_aums_2015.yaml +3386 -0
- data/hax/kryon_aums_2016.yaml +3476 -0
- data/hax/kryon_aums_2017.yaml +3712 -0
- data/hax/kryon_aums_2018.yaml +3654 -0
- data/lib/unclekryon/data/album_data.rb +74 -82
- data/lib/unclekryon/data/artist_data.rb +24 -36
- data/lib/unclekryon/data/artist_data_data.rb +29 -41
- data/lib/unclekryon/data/aum_data.rb +20 -32
- data/lib/unclekryon/data/base_data.rb +27 -39
- data/lib/unclekryon/data/pic_data.rb +25 -37
- data/lib/unclekryon/data/release_data.rb +14 -26
- data/lib/unclekryon/data/social_data.rb +6 -18
- data/lib/unclekryon/data/timespan_data.rb +16 -28
- data/lib/unclekryon/dev_opts.rb +7 -19
- data/lib/unclekryon/hacker.rb +121 -135
- data/lib/unclekryon/iso/base_iso.rb +69 -81
- data/lib/unclekryon/iso/can_prov_terr.rb +34 -47
- data/lib/unclekryon/iso/country.rb +34 -51
- data/lib/unclekryon/iso/language.rb +84 -98
- data/lib/unclekryon/iso/region.rb +8 -29
- data/lib/unclekryon/iso/subregion.rb +8 -29
- data/lib/unclekryon/iso/usa_state.rb +28 -41
- data/lib/unclekryon/iso.rb +128 -138
- data/lib/unclekryon/jsoner.rb +31 -50
- data/lib/unclekryon/log.rb +34 -46
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +163 -167
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +122 -127
- data/lib/unclekryon/server.rb +8 -17
- data/lib/unclekryon/trainer.rb +68 -85
- data/lib/unclekryon/uploader.rb +8 -17
- data/lib/unclekryon/util.rb +80 -92
- data/lib/unclekryon/version.rb +4 -16
- data/lib/unclekryon.rb +166 -166
- data/train/kryon.yaml +6077 -0
- data/unclekryon.gemspec +49 -49
- metadata +50 -22
@@ -1,23 +1,11 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2017-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2017-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
@@ -38,7 +26,7 @@ require 'unclekryon/data/timespan_data'
|
|
38
26
|
module UncleKryon
|
39
27
|
class KryonAumYearAlbumParser
|
40
28
|
include Logging
|
41
|
-
|
29
|
+
|
42
30
|
attr_accessor :album
|
43
31
|
attr_accessor :artist
|
44
32
|
attr_accessor :options
|
@@ -46,215 +34,215 @@ module UncleKryon
|
|
46
34
|
attr_accessor :training
|
47
35
|
attr_accessor :updated_on
|
48
36
|
attr_accessor :url
|
49
|
-
|
37
|
+
|
50
38
|
alias_method :training?,:training
|
51
|
-
|
39
|
+
|
52
40
|
def initialize(artist=nil,url=nil,album: nil,training: false,train_filepath: nil,updated_on: nil,
|
53
41
|
**options)
|
54
42
|
@album = album
|
55
43
|
@artist = artist
|
56
44
|
@options = options
|
57
|
-
@updated_on = Util.format_datetime(DateTime.now
|
45
|
+
@updated_on = Util.format_datetime(DateTime.now) if Util.empty_s?(updated_on)
|
58
46
|
@url = url
|
59
|
-
|
47
|
+
|
60
48
|
@trainers = Trainers.new(train_filepath)
|
61
49
|
@training = training
|
62
|
-
|
50
|
+
|
63
51
|
@trainers['aum_year_album'] = Trainer.new({
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
52
|
+
'alds' => 'album_dates',
|
53
|
+
'altt' => 'album_title',
|
54
|
+
'allo' => 'album_locations',
|
55
|
+
'almi' => 'album_mini_desc',
|
56
|
+
'alma' => 'album_main_desc',
|
57
|
+
'aust' => 'aum_subtitle',
|
58
|
+
'aulg' => 'aum_languages', # See 2018 "Montreal QB w/Robert Coxon (3)" aums' subtitles "FRENCH"
|
59
|
+
'autt' => 'aum_title',
|
60
|
+
'autm' => 'aum_timespan',
|
61
|
+
'ausz' => 'aum_filesize',
|
62
|
+
'aufn' => 'aum_filename',
|
63
|
+
'audu' => 'dump',
|
64
|
+
'i' => 'ignore',
|
65
|
+
})
|
78
66
|
@trainers['aum_year_album_mini_desc'] = Trainer.new({
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
67
|
+
'd' => 'date',
|
68
|
+
'l' => 'location',
|
69
|
+
's' => 'desc',
|
70
|
+
'i' => 'ignore',
|
71
|
+
})
|
84
72
|
end
|
85
|
-
|
73
|
+
|
86
74
|
def parse_site(artist=nil,url=nil)
|
87
|
-
@artist = artist unless artist.nil?
|
88
|
-
@url = url unless url.nil?
|
89
|
-
|
75
|
+
@artist = artist unless artist.nil?
|
76
|
+
@url = url unless url.nil?
|
77
|
+
|
90
78
|
# URLs that return 404 or are empty; fix by hand
|
91
|
-
exclude_urls =
|
92
|
-
awakeningzone\.com
|
93
|
-
www\.talkshoe\.com
|
94
|
-
|
95
|
-
|
79
|
+
exclude_urls = %r{
|
80
|
+
awakeningzone\.com/Episode\.aspx\?EpisodeID\=|
|
81
|
+
www\.talkshoe\.com/talkshoe/web/audioPop\.jsp\?episodeId\=
|
82
|
+
}ix
|
83
|
+
|
96
84
|
if @url =~ exclude_urls
|
97
85
|
log.warn("Excluding Album URL #{@url}")
|
98
86
|
return
|
99
87
|
end
|
100
|
-
|
101
|
-
@trainers.load_file
|
102
|
-
|
103
|
-
raise ArgumentError,
|
104
|
-
raise ArgumentError,
|
105
|
-
|
88
|
+
|
89
|
+
@trainers.load_file
|
90
|
+
|
91
|
+
raise ArgumentError,'Artist cannot be nil' if @artist.nil?
|
92
|
+
raise ArgumentError,'URL cannot be empty' if @url.nil? || (@url = @url.strip).empty?
|
93
|
+
|
106
94
|
# Album data (flags are okay) should never go in this, only for aums, pics, etc.
|
107
95
|
@local_dump = {
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
96
|
+
album_dates: false,
|
97
|
+
album_title: false,
|
98
|
+
album_locations: false,
|
99
|
+
album_mini_desc: false,
|
100
|
+
album_main_desc: false,
|
101
|
+
aums: 0,
|
102
|
+
aum_subtitle: [],
|
103
|
+
aum_languages: [],
|
104
|
+
aum_title: [],
|
105
|
+
aum_timespan: [],
|
106
|
+
aum_filesize: [],
|
107
|
+
aum_filename: [],
|
108
|
+
}
|
109
|
+
|
122
110
|
# Force 'utf-8'
|
123
111
|
# - See charset "X-MAC-ROMAN" in 2017 "The Discovery Series", 2016 "Kryon in Budapest (5)"
|
124
|
-
doc = Nokogiri::HTML(
|
125
|
-
|
112
|
+
doc = Nokogiri::HTML(URI(@url).open,nil,'utf-8')
|
113
|
+
|
126
114
|
old_album = @artist.albums[@url]
|
127
|
-
|
128
|
-
@album = old_album.clone
|
115
|
+
|
116
|
+
@album = old_album.clone
|
129
117
|
@album.updated_on = @updated_on
|
130
118
|
@album.url = @url
|
131
|
-
|
132
|
-
if old_album.nil?
|
119
|
+
|
120
|
+
if old_album.nil?
|
133
121
|
@artist.albums[@url] = @album
|
134
122
|
end
|
135
|
-
|
123
|
+
|
136
124
|
parse_dump(doc,@album) # Must be first because other methods rely on @local_dump
|
137
|
-
|
125
|
+
|
138
126
|
return @album if @training # Currently, no other training occurs
|
139
|
-
|
127
|
+
|
140
128
|
parse_pics(doc,@album)
|
141
129
|
parse_aums(doc,@album)
|
142
|
-
|
130
|
+
|
143
131
|
if @album == old_album
|
144
132
|
@album.updated_on = old_album.updated_on
|
145
133
|
end
|
146
|
-
|
134
|
+
|
147
135
|
@artist.albums[@url] = @album
|
148
|
-
|
136
|
+
|
149
137
|
return @album
|
150
138
|
end
|
151
|
-
|
139
|
+
|
152
140
|
def parse_aums(doc,album)
|
153
141
|
links = doc.css('a')
|
154
|
-
|
142
|
+
|
155
143
|
return if links.nil?
|
156
|
-
|
144
|
+
|
157
145
|
i = 0 # Don't do #each_with_index() because sometimes we next
|
158
|
-
|
146
|
+
|
159
147
|
links.each do |link|
|
160
148
|
next if link.nil?
|
161
|
-
|
149
|
+
|
162
150
|
audio_file_regex = /\.mp3/i
|
163
151
|
href = link['href']
|
164
|
-
exclude_links =
|
165
|
-
files\.kryonespanol\.com
|
166
|
-
|
167
|
-
|
152
|
+
exclude_links = %r{
|
153
|
+
files\.kryonespanol\.com/audio/
|
154
|
+
}ix
|
155
|
+
|
168
156
|
next if href.nil? || href.empty?
|
169
157
|
next if href !~ audio_file_regex
|
170
158
|
next if href =~ exclude_links
|
171
|
-
|
159
|
+
|
172
160
|
aum = AumData.new
|
173
161
|
aum.url = Util.clean_data(href)
|
174
162
|
aum.filename = Util.parse_url_filename(aum.url)
|
175
163
|
aum.updated_on = @updated_on
|
176
|
-
|
177
|
-
if aum.url =~
|
164
|
+
|
165
|
+
if aum.url =~ %r{\A\.\.?/}
|
178
166
|
aum.url = Util.clean_link(@url,aum.url)
|
179
167
|
end
|
180
|
-
|
168
|
+
|
181
169
|
# Filesize
|
182
|
-
if !DevOpts.instance.test?
|
170
|
+
if !DevOpts.instance.test?
|
183
171
|
# Getting header data is slow, so only do it when not testing
|
184
172
|
begin
|
185
173
|
r = Util.get_url_header_data(aum.url)
|
186
174
|
aum.filesize = r['content-length']
|
187
175
|
aum.filesize = aum.filesize[0] if aum.filesize.is_a?(Array)
|
188
|
-
rescue => e
|
176
|
+
rescue StandardError => e
|
189
177
|
raise e.exception("#{e.message}; couldn't get header data for #{aum.url}")
|
190
178
|
end
|
191
179
|
end
|
192
|
-
|
180
|
+
|
193
181
|
# Subtitle
|
194
182
|
if i < @local_dump[:aum_subtitle].length
|
195
183
|
aum.subtitle = @local_dump[:aum_subtitle][i]
|
196
184
|
else
|
197
185
|
log.warn("No subtitle for: #{aum.filename},#{aum.url}")
|
198
186
|
end
|
199
|
-
|
187
|
+
|
200
188
|
# Languages
|
201
189
|
aum.languages = @local_dump[:aum_languages][i] if i < @local_dump[:aum_languages].length
|
202
|
-
|
190
|
+
|
203
191
|
# Title
|
204
192
|
if i < @local_dump[:aum_title].length
|
205
193
|
aum.title = @local_dump[:aum_title][i]
|
206
194
|
else
|
207
195
|
# Set title to something at least
|
208
|
-
if !(afn = aum.filename).nil?
|
196
|
+
if !(afn = aum.filename).nil? && !afn.strip.empty?
|
209
197
|
# More descriptive than subtitle
|
210
|
-
aum.title = afn.gsub(audio_file_regex,'').strip
|
198
|
+
aum.title = afn.gsub(audio_file_regex,'').strip
|
211
199
|
log.warn("Using filename as title: #{aum.title}")
|
212
200
|
else
|
213
201
|
aum.title = aum.subtitle
|
214
202
|
log.warn("Using subtitle as title: #{aum.title}")
|
215
203
|
end
|
216
204
|
end
|
217
|
-
|
205
|
+
|
218
206
|
# Timespan
|
219
207
|
if i < @local_dump[:aum_timespan].length
|
220
208
|
aum.timespan = @local_dump[:aum_timespan][i]
|
221
209
|
else
|
222
210
|
msg = "No timespan for: #{aum.title},#{aum.subtitle},#{aum.filename},#{aum.url}"
|
223
|
-
|
211
|
+
|
224
212
|
log.warn(msg)
|
225
|
-
|
213
|
+
|
226
214
|
#if DevOpts.instance.dev?()
|
227
215
|
# raise "#{msg}:\n#{@local_dump}\n#{album.dump}"
|
228
216
|
#else
|
229
217
|
# log.warn(msg)
|
230
218
|
#end
|
231
219
|
end
|
232
|
-
|
220
|
+
|
233
221
|
# Filesize, if not set
|
234
|
-
if (aum.filesize.nil?
|
222
|
+
if (aum.filesize.nil? || aum.filesize.strip.empty?) && i < @local_dump[:aum_filesize].length
|
235
223
|
aum.filesize = @local_dump[:aum_filesize][i]
|
236
224
|
log.warn("Using local dump filesize: #{aum.filesize}")
|
237
225
|
end
|
238
|
-
|
226
|
+
|
239
227
|
i += 1
|
240
|
-
|
228
|
+
|
241
229
|
# Is it old?
|
242
230
|
if album.aums.key?(aum.url) && aum == album.aums[aum.url]
|
243
231
|
aum.updated_on = album.aums[aum.url].updated_on
|
244
232
|
else # New
|
245
233
|
album.updated_on = @updated_on
|
246
234
|
end
|
247
|
-
|
235
|
+
|
248
236
|
album.aums[aum.url] = aum
|
249
237
|
end
|
250
238
|
end
|
251
|
-
|
239
|
+
|
252
240
|
def parse_dump(doc,album)
|
253
241
|
album.dump = []
|
254
242
|
tds = doc.css('td')
|
255
|
-
|
243
|
+
|
256
244
|
return if tds.nil?
|
257
|
-
|
245
|
+
|
258
246
|
filename_regex = /\.mp3[[:space:]]*\z/i
|
259
247
|
# 2017 "Petra, Jordan (5)" has a ":" in the megabytes cell
|
260
248
|
size_regex = /\A[[:space:]]*[[:digit:]]+(\.|\:|[[:digit:]]|[[:space:]])*megabytes[[:space:]]*\z/i
|
@@ -267,28 +255,32 @@ module UncleKryon
|
|
267
255
|
# 2017 "KRYON INDIA-NEPAL TOUR PART 1 (10)" doesn't have the word "megabytes"
|
268
256
|
time_or_size_regex = /\A[[:space:]]*[[:digit:]]+(\:|\.|[[:digit:]]|[[:space:]])*\z/i
|
269
257
|
# 2015 ones have a lot of "13:12 Min - 15.9 megs"
|
270
|
-
time_and_size_regex = /\A
|
271
|
-
|
258
|
+
time_and_size_regex = /\A
|
259
|
+
[[:space:]]*[[:digit:]]+[\:\.][[:digit:]]+
|
260
|
+
[[:space:]]+Min[[:space:]]+\-
|
261
|
+
[[:space:]]+[[:digit:]]+\.?[[:digit:]]*[[:space:]]*megs
|
262
|
+
/xi
|
263
|
+
|
272
264
|
size_count = 0
|
273
265
|
time_count = 0
|
274
|
-
|
266
|
+
|
275
267
|
tds.each do |td|
|
276
268
|
next if td.nil?
|
277
269
|
next if td.content.nil?
|
278
|
-
|
270
|
+
|
279
271
|
orig_c = Util.clean_charset(td.content)
|
280
272
|
c = Util.clean_data(orig_c)
|
281
|
-
|
273
|
+
|
282
274
|
next if c.empty?
|
283
275
|
#if c =~ exclude_content_regex
|
284
276
|
# log.warn("Excluding content: #{c}")
|
285
277
|
# next
|
286
278
|
#end
|
287
|
-
|
279
|
+
|
288
280
|
add_to_dump = true
|
289
|
-
|
281
|
+
|
290
282
|
if c =~ time_regex
|
291
|
-
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s
|
283
|
+
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s)
|
292
284
|
add_to_dump = false
|
293
285
|
time_count += 1
|
294
286
|
elsif c =~ size_regex
|
@@ -298,22 +290,22 @@ module UncleKryon
|
|
298
290
|
elsif c =~ time_or_size_regex
|
299
291
|
# Time is usually before size
|
300
292
|
if time_count == size_count
|
301
|
-
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s
|
293
|
+
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s)
|
302
294
|
time_count += 1
|
303
295
|
else
|
304
296
|
@local_dump[:aum_filesize].push(c)
|
305
297
|
size_count += 1
|
306
298
|
end
|
307
|
-
|
299
|
+
|
308
300
|
add_to_dump = false
|
309
301
|
elsif c =~ time_and_size_regex
|
310
302
|
time_and_size = c.split(/[[:space:]]*\-[[:space:]]*/) # Split on '-'
|
311
|
-
|
312
|
-
@local_dump[:aum_timespan].push(TimespanData.new(time_and_size[0]).to_s
|
303
|
+
|
304
|
+
@local_dump[:aum_timespan].push(TimespanData.new(time_and_size[0]).to_s)
|
313
305
|
time_count += 1
|
314
306
|
@local_dump[:aum_filesize].push(time_and_size[1])
|
315
307
|
size_count += 1
|
316
|
-
|
308
|
+
|
317
309
|
add_to_dump = false
|
318
310
|
elsif c =~ filename_regex
|
319
311
|
@local_dump[:aums] += 1
|
@@ -322,31 +314,35 @@ module UncleKryon
|
|
322
314
|
# Paragraphs
|
323
315
|
pars = orig_c.gsub(/\A[[:space:]]+/,'').gsub(/[[:space:]]+\z/,'')
|
324
316
|
pars = pars.split(/[\r\n\p{Zl}\p{Zp}]{2,}/)
|
325
|
-
|
326
|
-
pars.each
|
327
|
-
par = par.gsub(/[[:blank:]]+/,' ').strip
|
317
|
+
|
318
|
+
pars.each do |par|
|
319
|
+
par = par.gsub(/[[:blank:]]+/,' ').strip
|
328
320
|
par = Util.fix_shortwith_text(par)
|
329
|
-
|
330
|
-
next if par.empty?
|
331
|
-
|
321
|
+
|
322
|
+
next if par.empty?
|
323
|
+
|
332
324
|
if @training
|
333
325
|
if @trainers['aum_year_album'].train(par) == 'album_mini_desc'
|
334
|
-
par.split(/\n+/).each
|
326
|
+
par.split(/\n+/).each do |p|
|
335
327
|
@trainers['aum_year_album_mini_desc'].train(p)
|
336
328
|
end
|
337
329
|
end
|
338
330
|
else
|
339
331
|
#has_header = @local_dump[:album_title] || @local_dump[:album_dates] ||
|
340
|
-
# @local_dump[:album_locations] || @local_dump[:album_mini_desc] ||
|
332
|
+
# @local_dump[:album_locations] || @local_dump[:album_mini_desc] ||
|
333
|
+
# @local_dump[:album_main_desc]
|
341
334
|
has_header = true
|
342
335
|
tag = @trainers['aum_year_album'].tag(par)
|
343
|
-
|
336
|
+
|
344
337
|
# For 2017 "RETURN TO LEMURIA (7)"
|
345
|
-
if par =~ /\A[[:space:]]*MEDITATION[[:space:]]+-
|
338
|
+
if par =~ /\A[[:space:]]*MEDITATION[[:space:]]+-
|
339
|
+
[[:space:]]+Kalei[[:space:]]+-
|
340
|
+
[[:space:]]+John[[:space:]]+-
|
341
|
+
[[:space:]]+Amber[[:space:]]*\z/xi
|
346
342
|
tag = 'aum_title'
|
347
343
|
log.warn("Changing tag to aum_title: #{Util.clean_data(par)}")
|
348
344
|
end
|
349
|
-
|
345
|
+
|
350
346
|
case tag
|
351
347
|
when 'album_title'
|
352
348
|
if !@local_dump[:album_title]
|
@@ -361,17 +357,17 @@ module UncleKryon
|
|
361
357
|
@local_dump[:album_locations] = true
|
362
358
|
end
|
363
359
|
when 'album_mini_desc'
|
364
|
-
par.split(/\n+/).each
|
360
|
+
par.split(/\n+/).each do |p|
|
365
361
|
p = Util.clean_data(p)
|
366
|
-
|
367
|
-
if !p.empty?
|
362
|
+
|
363
|
+
if !p.empty?
|
368
364
|
case @trainers['aum_year_album_mini_desc'].tag(p)
|
369
365
|
when 'desc'
|
370
366
|
if !@local_dump[:album_mini_desc]
|
371
367
|
@local_dump[:album_mini_desc] = true
|
372
368
|
album.mini_desc = p
|
373
369
|
else
|
374
|
-
album.mini_desc << ' | ' if !album.mini_desc.strip
|
370
|
+
album.mini_desc << ' | ' if !album.mini_desc.strip.empty?
|
375
371
|
album.mini_desc << p
|
376
372
|
end
|
377
373
|
when 'ignore'
|
@@ -379,21 +375,21 @@ module UncleKryon
|
|
379
375
|
end
|
380
376
|
end
|
381
377
|
end
|
382
|
-
|
378
|
+
|
383
379
|
add_to_dump = false
|
384
380
|
when 'album_main_desc'
|
385
381
|
if !@local_dump[:album_main_desc]
|
386
382
|
@local_dump[:album_main_desc] = true
|
387
|
-
album.main_desc = ''.dup
|
383
|
+
album.main_desc = ''.dup
|
388
384
|
else
|
389
|
-
album.main_desc << "\n\n" if !album.main_desc.strip
|
385
|
+
album.main_desc << "\n\n" if !album.main_desc.strip.empty?
|
390
386
|
end
|
391
|
-
|
392
|
-
par.split(/\n+/).each
|
387
|
+
|
388
|
+
par.split(/\n+/).each do |p|
|
393
389
|
album.main_desc << Util.clean_data(p) << "\n"
|
394
390
|
end
|
395
|
-
|
396
|
-
album.main_desc = album.main_desc.strip
|
391
|
+
|
392
|
+
album.main_desc = album.main_desc.strip # Remove last newline
|
397
393
|
add_to_dump = false
|
398
394
|
when 'ignore'
|
399
395
|
log.warn("Excluding content: #{Util.clean_data(par)}")
|
@@ -413,19 +409,19 @@ module UncleKryon
|
|
413
409
|
add_to_dump = false
|
414
410
|
when 'aum_title'
|
415
411
|
@local_dump[:aum_title].push(Util.clean_data(par))
|
416
|
-
|
412
|
+
|
417
413
|
# Special case for 2017 "LISBON, PORTUGAL (Fatima Tour) (3)"
|
418
414
|
if par =~ /\A[[:space:]]*Lisbon[[:space:]]+Channeling[[:space:]]+1[[:space:]]*\z/i
|
419
|
-
@local_dump[:aum_title].push('Lisbon Channeling 2')
|
420
|
-
@local_dump[:aum_title].push('Lisbon Channeling 3')
|
415
|
+
@local_dump[:aum_title].push('Lisbon Channeling 2')
|
416
|
+
@local_dump[:aum_title].push('Lisbon Channeling 3')
|
421
417
|
log.warn("Adding aum_titles for: #{Util.clean_data(par)}")
|
422
418
|
end
|
423
419
|
# For 2017 "KRYON INDIA-NEPAL TOUR PART 1 (10)" & "KRYON INDIA-NEPAL TOUR PART 2 (8)"
|
424
420
|
if par =~ /\A[[:space:]]*PAGE[[:space:]]*(ONE|TWO)[[:space:]]*\z/i
|
425
|
-
p = @local_dump[:aum_title].pop
|
421
|
+
p = @local_dump[:aum_title].pop
|
426
422
|
log.warn("Ignoring aum title: #{p}")
|
427
423
|
end
|
428
|
-
|
424
|
+
|
429
425
|
add_to_dump = false
|
430
426
|
when 'aum_filename'
|
431
427
|
add_to_dump = false
|
@@ -435,21 +431,21 @@ module UncleKryon
|
|
435
431
|
end
|
436
432
|
end
|
437
433
|
end
|
438
|
-
|
434
|
+
|
439
435
|
if add_to_dump
|
440
436
|
album.dump.push(c)
|
441
|
-
|
437
|
+
|
442
438
|
# For now, don't do this; if the font size is big, it's bad for mobile anyway
|
443
439
|
#album.dump.push(Util.clean_data(td.to_s())) # For bold, etc. html
|
444
440
|
end
|
445
441
|
end
|
446
442
|
end
|
447
|
-
|
443
|
+
|
448
444
|
def parse_pics(doc,album)
|
449
445
|
imgs = doc.css('img')
|
450
|
-
|
446
|
+
|
451
447
|
return if imgs.nil?
|
452
|
-
|
448
|
+
|
453
449
|
exclude_imgs = /
|
454
450
|
buttonMP3\.png|
|
455
451
|
freedownloadtype\.gif|
|
@@ -461,37 +457,37 @@ module UncleKryon
|
|
461
457
|
NavMenu\_master\.png|
|
462
458
|
testimonials\.png
|
463
459
|
/ix
|
464
|
-
|
460
|
+
|
465
461
|
imgs.each do |img|
|
466
462
|
next if img.nil?
|
467
|
-
|
463
|
+
|
468
464
|
src = img['src']
|
469
|
-
|
465
|
+
|
470
466
|
next if src.nil? || src.empty?
|
471
467
|
if src =~ exclude_imgs
|
472
468
|
log.warn("Excluding image: #{src}")
|
473
469
|
next
|
474
470
|
end
|
475
|
-
|
476
|
-
pic = PicData.new
|
477
|
-
|
471
|
+
|
472
|
+
pic = PicData.new
|
473
|
+
|
478
474
|
pic.url = Util.clean_link(url,src)
|
479
475
|
pic.filename = Util.parse_url_filename(pic.url)
|
480
|
-
|
476
|
+
|
481
477
|
pic.alt = img['alt']
|
482
478
|
pic.alt = '' if Util.empty_s?(pic.alt)
|
483
479
|
pic.caption = ''
|
484
|
-
|
480
|
+
|
485
481
|
pic.name = Util.empty_s?(pic.alt) ? File.basename(pic.filename,File.extname(pic.filename)) : pic.alt
|
486
482
|
pic.updated_on = @updated_on
|
487
|
-
|
483
|
+
|
488
484
|
# Is it old?
|
489
485
|
if album.pics.key?(pic.url) && pic == album.pics[pic.url]
|
490
486
|
pic.updated_on = album.pics[pic.url].updated_on
|
491
487
|
else # New
|
492
488
|
album.updated_on = @updated_on
|
493
489
|
end
|
494
|
-
|
490
|
+
|
495
491
|
album.pics[pic.url] = pic
|
496
492
|
end
|
497
493
|
end
|