unclekryon 0.4.10 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +6 -19
- data/Gemfile.lock +19 -23
- data/README.md +3 -2
- data/Rakefile +11 -30
- data/bin/unclekryon +4 -15
- data/hax/kryon.yaml +28 -0
- data/hax/kryon_aums_2002-2005.yaml +460 -0
- data/hax/kryon_aums_2006.yaml +601 -0
- data/hax/kryon_aums_2007.yaml +1024 -0
- data/hax/kryon_aums_2008.yaml +950 -0
- data/hax/kryon_aums_2009.yaml +496 -0
- data/hax/kryon_aums_2010.yaml +1443 -0
- data/hax/kryon_aums_2011.yaml +1458 -0
- data/hax/kryon_aums_2012.yaml +2123 -0
- data/hax/kryon_aums_2013.yaml +1647 -0
- data/hax/kryon_aums_2014.yaml +2478 -0
- data/hax/kryon_aums_2015.yaml +3386 -0
- data/hax/kryon_aums_2016.yaml +3476 -0
- data/hax/kryon_aums_2017.yaml +3712 -0
- data/hax/kryon_aums_2018.yaml +3654 -0
- data/lib/unclekryon.rb +165 -165
- data/lib/unclekryon/data/album_data.rb +74 -82
- data/lib/unclekryon/data/artist_data.rb +24 -36
- data/lib/unclekryon/data/artist_data_data.rb +29 -41
- data/lib/unclekryon/data/aum_data.rb +20 -32
- data/lib/unclekryon/data/base_data.rb +27 -39
- data/lib/unclekryon/data/pic_data.rb +25 -37
- data/lib/unclekryon/data/release_data.rb +14 -26
- data/lib/unclekryon/data/social_data.rb +6 -18
- data/lib/unclekryon/data/timespan_data.rb +16 -28
- data/lib/unclekryon/dev_opts.rb +7 -19
- data/lib/unclekryon/hacker.rb +119 -133
- data/lib/unclekryon/iso.rb +128 -138
- data/lib/unclekryon/iso/base_iso.rb +69 -81
- data/lib/unclekryon/iso/can_prov_terr.rb +34 -47
- data/lib/unclekryon/iso/country.rb +36 -49
- data/lib/unclekryon/iso/language.rb +86 -96
- data/lib/unclekryon/iso/region.rb +11 -25
- data/lib/unclekryon/iso/subregion.rb +11 -25
- data/lib/unclekryon/iso/usa_state.rb +28 -41
- data/lib/unclekryon/jsoner.rb +31 -50
- data/lib/unclekryon/log.rb +34 -46
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +163 -167
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +122 -127
- data/lib/unclekryon/server.rb +8 -17
- data/lib/unclekryon/trainer.rb +69 -83
- data/lib/unclekryon/uploader.rb +8 -17
- data/lib/unclekryon/util.rb +80 -92
- data/lib/unclekryon/version.rb +4 -16
- data/train/kryon.yaml +6077 -0
- data/unclekryon.gemspec +44 -42
- metadata +59 -16
@@ -1,23 +1,11 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2017-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2017-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
@@ -38,7 +26,7 @@ require 'unclekryon/data/timespan_data'
|
|
38
26
|
module UncleKryon
|
39
27
|
class KryonAumYearAlbumParser
|
40
28
|
include Logging
|
41
|
-
|
29
|
+
|
42
30
|
attr_accessor :album
|
43
31
|
attr_accessor :artist
|
44
32
|
attr_accessor :options
|
@@ -46,215 +34,215 @@ module UncleKryon
|
|
46
34
|
attr_accessor :training
|
47
35
|
attr_accessor :updated_on
|
48
36
|
attr_accessor :url
|
49
|
-
|
37
|
+
|
50
38
|
alias_method :training?,:training
|
51
|
-
|
39
|
+
|
52
40
|
def initialize(artist=nil,url=nil,album: nil,training: false,train_filepath: nil,updated_on: nil,
|
53
41
|
**options)
|
54
42
|
@album = album
|
55
43
|
@artist = artist
|
56
44
|
@options = options
|
57
|
-
@updated_on = Util.format_datetime(DateTime.now
|
45
|
+
@updated_on = Util.format_datetime(DateTime.now) if Util.empty_s?(updated_on)
|
58
46
|
@url = url
|
59
|
-
|
47
|
+
|
60
48
|
@trainers = Trainers.new(train_filepath)
|
61
49
|
@training = training
|
62
|
-
|
50
|
+
|
63
51
|
@trainers['aum_year_album'] = Trainer.new({
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
52
|
+
'alds' => 'album_dates',
|
53
|
+
'altt' => 'album_title',
|
54
|
+
'allo' => 'album_locations',
|
55
|
+
'almi' => 'album_mini_desc',
|
56
|
+
'alma' => 'album_main_desc',
|
57
|
+
'aust' => 'aum_subtitle',
|
58
|
+
'aulg' => 'aum_languages', # See 2018 "Montreal QB w/Robert Coxon (3)" aums' subtitles "FRENCH"
|
59
|
+
'autt' => 'aum_title',
|
60
|
+
'autm' => 'aum_timespan',
|
61
|
+
'ausz' => 'aum_filesize',
|
62
|
+
'aufn' => 'aum_filename',
|
63
|
+
'audu' => 'dump',
|
64
|
+
'i' => 'ignore',
|
65
|
+
})
|
78
66
|
@trainers['aum_year_album_mini_desc'] = Trainer.new({
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
67
|
+
'd' => 'date',
|
68
|
+
'l' => 'location',
|
69
|
+
's' => 'desc',
|
70
|
+
'i' => 'ignore',
|
71
|
+
})
|
84
72
|
end
|
85
|
-
|
73
|
+
|
86
74
|
def parse_site(artist=nil,url=nil)
|
87
|
-
@artist = artist unless artist.nil?
|
88
|
-
@url = url unless url.nil?
|
89
|
-
|
75
|
+
@artist = artist unless artist.nil?
|
76
|
+
@url = url unless url.nil?
|
77
|
+
|
90
78
|
# URLs that return 404 or are empty; fix by hand
|
91
|
-
exclude_urls =
|
92
|
-
awakeningzone\.com
|
93
|
-
www\.talkshoe\.com
|
94
|
-
|
95
|
-
|
79
|
+
exclude_urls = %r{
|
80
|
+
awakeningzone\.com/Episode\.aspx\?EpisodeID\=|
|
81
|
+
www\.talkshoe\.com/talkshoe/web/audioPop\.jsp\?episodeId\=
|
82
|
+
}ix
|
83
|
+
|
96
84
|
if @url =~ exclude_urls
|
97
85
|
log.warn("Excluding Album URL #{@url}")
|
98
86
|
return
|
99
87
|
end
|
100
|
-
|
101
|
-
@trainers.load_file
|
102
|
-
|
103
|
-
raise ArgumentError,
|
104
|
-
raise ArgumentError,
|
105
|
-
|
88
|
+
|
89
|
+
@trainers.load_file
|
90
|
+
|
91
|
+
raise ArgumentError,'Artist cannot be nil' if @artist.nil?
|
92
|
+
raise ArgumentError,'URL cannot be empty' if @url.nil? || (@url = @url.strip).empty?
|
93
|
+
|
106
94
|
# Album data (flags are okay) should never go in this, only for aums, pics, etc.
|
107
95
|
@local_dump = {
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
96
|
+
album_dates: false,
|
97
|
+
album_title: false,
|
98
|
+
album_locations: false,
|
99
|
+
album_mini_desc: false,
|
100
|
+
album_main_desc: false,
|
101
|
+
aums: 0,
|
102
|
+
aum_subtitle: [],
|
103
|
+
aum_languages: [],
|
104
|
+
aum_title: [],
|
105
|
+
aum_timespan: [],
|
106
|
+
aum_filesize: [],
|
107
|
+
aum_filename: [],
|
108
|
+
}
|
109
|
+
|
122
110
|
# Force 'utf-8'
|
123
111
|
# - See charset "X-MAC-ROMAN" in 2017 "The Discovery Series", 2016 "Kryon in Budapest (5)"
|
124
|
-
doc = Nokogiri::HTML(
|
125
|
-
|
112
|
+
doc = Nokogiri::HTML(URI(@url).open,nil,'utf-8')
|
113
|
+
|
126
114
|
old_album = @artist.albums[@url]
|
127
|
-
|
128
|
-
@album = old_album.clone
|
115
|
+
|
116
|
+
@album = old_album.clone
|
129
117
|
@album.updated_on = @updated_on
|
130
118
|
@album.url = @url
|
131
|
-
|
132
|
-
if old_album.nil?
|
119
|
+
|
120
|
+
if old_album.nil?
|
133
121
|
@artist.albums[@url] = @album
|
134
122
|
end
|
135
|
-
|
123
|
+
|
136
124
|
parse_dump(doc,@album) # Must be first because other methods rely on @local_dump
|
137
|
-
|
125
|
+
|
138
126
|
return @album if @training # Currently, no other training occurs
|
139
|
-
|
127
|
+
|
140
128
|
parse_pics(doc,@album)
|
141
129
|
parse_aums(doc,@album)
|
142
|
-
|
130
|
+
|
143
131
|
if @album == old_album
|
144
132
|
@album.updated_on = old_album.updated_on
|
145
133
|
end
|
146
|
-
|
134
|
+
|
147
135
|
@artist.albums[@url] = @album
|
148
|
-
|
136
|
+
|
149
137
|
return @album
|
150
138
|
end
|
151
|
-
|
139
|
+
|
152
140
|
def parse_aums(doc,album)
|
153
141
|
links = doc.css('a')
|
154
|
-
|
142
|
+
|
155
143
|
return if links.nil?
|
156
|
-
|
144
|
+
|
157
145
|
i = 0 # Don't do #each_with_index() because sometimes we next
|
158
|
-
|
146
|
+
|
159
147
|
links.each do |link|
|
160
148
|
next if link.nil?
|
161
|
-
|
149
|
+
|
162
150
|
audio_file_regex = /\.mp3/i
|
163
151
|
href = link['href']
|
164
|
-
exclude_links =
|
165
|
-
files\.kryonespanol\.com
|
166
|
-
|
167
|
-
|
152
|
+
exclude_links = %r{
|
153
|
+
files\.kryonespanol\.com/audio/
|
154
|
+
}ix
|
155
|
+
|
168
156
|
next if href.nil? || href.empty?
|
169
157
|
next if href !~ audio_file_regex
|
170
158
|
next if href =~ exclude_links
|
171
|
-
|
159
|
+
|
172
160
|
aum = AumData.new
|
173
161
|
aum.url = Util.clean_data(href)
|
174
162
|
aum.filename = Util.parse_url_filename(aum.url)
|
175
163
|
aum.updated_on = @updated_on
|
176
|
-
|
177
|
-
if aum.url =~
|
164
|
+
|
165
|
+
if aum.url =~ %r{\A\.\.?/}
|
178
166
|
aum.url = Util.clean_link(@url,aum.url)
|
179
167
|
end
|
180
|
-
|
168
|
+
|
181
169
|
# Filesize
|
182
|
-
if !DevOpts.instance.test?
|
170
|
+
if !DevOpts.instance.test?
|
183
171
|
# Getting header data is slow, so only do it when not testing
|
184
172
|
begin
|
185
173
|
r = Util.get_url_header_data(aum.url)
|
186
174
|
aum.filesize = r['content-length']
|
187
175
|
aum.filesize = aum.filesize[0] if aum.filesize.is_a?(Array)
|
188
|
-
rescue => e
|
176
|
+
rescue StandardError => e
|
189
177
|
raise e.exception("#{e.message}; couldn't get header data for #{aum.url}")
|
190
178
|
end
|
191
179
|
end
|
192
|
-
|
180
|
+
|
193
181
|
# Subtitle
|
194
182
|
if i < @local_dump[:aum_subtitle].length
|
195
183
|
aum.subtitle = @local_dump[:aum_subtitle][i]
|
196
184
|
else
|
197
185
|
log.warn("No subtitle for: #{aum.filename},#{aum.url}")
|
198
186
|
end
|
199
|
-
|
187
|
+
|
200
188
|
# Languages
|
201
189
|
aum.languages = @local_dump[:aum_languages][i] if i < @local_dump[:aum_languages].length
|
202
|
-
|
190
|
+
|
203
191
|
# Title
|
204
192
|
if i < @local_dump[:aum_title].length
|
205
193
|
aum.title = @local_dump[:aum_title][i]
|
206
194
|
else
|
207
195
|
# Set title to something at least
|
208
|
-
if !(afn = aum.filename).nil?
|
196
|
+
if !(afn = aum.filename).nil? && !afn.strip.empty?
|
209
197
|
# More descriptive than subtitle
|
210
|
-
aum.title = afn.gsub(audio_file_regex,'').strip
|
198
|
+
aum.title = afn.gsub(audio_file_regex,'').strip
|
211
199
|
log.warn("Using filename as title: #{aum.title}")
|
212
200
|
else
|
213
201
|
aum.title = aum.subtitle
|
214
202
|
log.warn("Using subtitle as title: #{aum.title}")
|
215
203
|
end
|
216
204
|
end
|
217
|
-
|
205
|
+
|
218
206
|
# Timespan
|
219
207
|
if i < @local_dump[:aum_timespan].length
|
220
208
|
aum.timespan = @local_dump[:aum_timespan][i]
|
221
209
|
else
|
222
210
|
msg = "No timespan for: #{aum.title},#{aum.subtitle},#{aum.filename},#{aum.url}"
|
223
|
-
|
211
|
+
|
224
212
|
log.warn(msg)
|
225
|
-
|
213
|
+
|
226
214
|
#if DevOpts.instance.dev?()
|
227
215
|
# raise "#{msg}:\n#{@local_dump}\n#{album.dump}"
|
228
216
|
#else
|
229
217
|
# log.warn(msg)
|
230
218
|
#end
|
231
219
|
end
|
232
|
-
|
220
|
+
|
233
221
|
# Filesize, if not set
|
234
|
-
if (aum.filesize.nil?
|
222
|
+
if (aum.filesize.nil? || aum.filesize.strip.empty?) && i < @local_dump[:aum_filesize].length
|
235
223
|
aum.filesize = @local_dump[:aum_filesize][i]
|
236
224
|
log.warn("Using local dump filesize: #{aum.filesize}")
|
237
225
|
end
|
238
|
-
|
226
|
+
|
239
227
|
i += 1
|
240
|
-
|
228
|
+
|
241
229
|
# Is it old?
|
242
230
|
if album.aums.key?(aum.url) && aum == album.aums[aum.url]
|
243
231
|
aum.updated_on = album.aums[aum.url].updated_on
|
244
232
|
else # New
|
245
233
|
album.updated_on = @updated_on
|
246
234
|
end
|
247
|
-
|
235
|
+
|
248
236
|
album.aums[aum.url] = aum
|
249
237
|
end
|
250
238
|
end
|
251
|
-
|
239
|
+
|
252
240
|
def parse_dump(doc,album)
|
253
241
|
album.dump = []
|
254
242
|
tds = doc.css('td')
|
255
|
-
|
243
|
+
|
256
244
|
return if tds.nil?
|
257
|
-
|
245
|
+
|
258
246
|
filename_regex = /\.mp3[[:space:]]*\z/i
|
259
247
|
# 2017 "Petra, Jordan (5)" has a ":" in the megabytes cell
|
260
248
|
size_regex = /\A[[:space:]]*[[:digit:]]+(\.|\:|[[:digit:]]|[[:space:]])*megabytes[[:space:]]*\z/i
|
@@ -267,28 +255,32 @@ module UncleKryon
|
|
267
255
|
# 2017 "KRYON INDIA-NEPAL TOUR PART 1 (10)" doesn't have the word "megabytes"
|
268
256
|
time_or_size_regex = /\A[[:space:]]*[[:digit:]]+(\:|\.|[[:digit:]]|[[:space:]])*\z/i
|
269
257
|
# 2015 ones have a lot of "13:12 Min - 15.9 megs"
|
270
|
-
time_and_size_regex = /\A
|
271
|
-
|
258
|
+
time_and_size_regex = /\A
|
259
|
+
[[:space:]]*[[:digit:]]+[\:\.][[:digit:]]+
|
260
|
+
[[:space:]]+Min[[:space:]]+\-
|
261
|
+
[[:space:]]+[[:digit:]]+\.?[[:digit:]]*[[:space:]]*megs
|
262
|
+
/xi
|
263
|
+
|
272
264
|
size_count = 0
|
273
265
|
time_count = 0
|
274
|
-
|
266
|
+
|
275
267
|
tds.each do |td|
|
276
268
|
next if td.nil?
|
277
269
|
next if td.content.nil?
|
278
|
-
|
270
|
+
|
279
271
|
orig_c = Util.clean_charset(td.content)
|
280
272
|
c = Util.clean_data(orig_c)
|
281
|
-
|
273
|
+
|
282
274
|
next if c.empty?
|
283
275
|
#if c =~ exclude_content_regex
|
284
276
|
# log.warn("Excluding content: #{c}")
|
285
277
|
# next
|
286
278
|
#end
|
287
|
-
|
279
|
+
|
288
280
|
add_to_dump = true
|
289
|
-
|
281
|
+
|
290
282
|
if c =~ time_regex
|
291
|
-
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s
|
283
|
+
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s)
|
292
284
|
add_to_dump = false
|
293
285
|
time_count += 1
|
294
286
|
elsif c =~ size_regex
|
@@ -298,22 +290,22 @@ module UncleKryon
|
|
298
290
|
elsif c =~ time_or_size_regex
|
299
291
|
# Time is usually before size
|
300
292
|
if time_count == size_count
|
301
|
-
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s
|
293
|
+
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s)
|
302
294
|
time_count += 1
|
303
295
|
else
|
304
296
|
@local_dump[:aum_filesize].push(c)
|
305
297
|
size_count += 1
|
306
298
|
end
|
307
|
-
|
299
|
+
|
308
300
|
add_to_dump = false
|
309
301
|
elsif c =~ time_and_size_regex
|
310
302
|
time_and_size = c.split(/[[:space:]]*\-[[:space:]]*/) # Split on '-'
|
311
|
-
|
312
|
-
@local_dump[:aum_timespan].push(TimespanData.new(time_and_size[0]).to_s
|
303
|
+
|
304
|
+
@local_dump[:aum_timespan].push(TimespanData.new(time_and_size[0]).to_s)
|
313
305
|
time_count += 1
|
314
306
|
@local_dump[:aum_filesize].push(time_and_size[1])
|
315
307
|
size_count += 1
|
316
|
-
|
308
|
+
|
317
309
|
add_to_dump = false
|
318
310
|
elsif c =~ filename_regex
|
319
311
|
@local_dump[:aums] += 1
|
@@ -322,31 +314,35 @@ module UncleKryon
|
|
322
314
|
# Paragraphs
|
323
315
|
pars = orig_c.gsub(/\A[[:space:]]+/,'').gsub(/[[:space:]]+\z/,'')
|
324
316
|
pars = pars.split(/[\r\n\p{Zl}\p{Zp}]{2,}/)
|
325
|
-
|
326
|
-
pars.each
|
327
|
-
par = par.gsub(/[[:blank:]]+/,' ').strip
|
317
|
+
|
318
|
+
pars.each do |par|
|
319
|
+
par = par.gsub(/[[:blank:]]+/,' ').strip
|
328
320
|
par = Util.fix_shortwith_text(par)
|
329
|
-
|
330
|
-
next if par.empty?
|
331
|
-
|
321
|
+
|
322
|
+
next if par.empty?
|
323
|
+
|
332
324
|
if @training
|
333
325
|
if @trainers['aum_year_album'].train(par) == 'album_mini_desc'
|
334
|
-
par.split(/\n+/).each
|
326
|
+
par.split(/\n+/).each do |p|
|
335
327
|
@trainers['aum_year_album_mini_desc'].train(p)
|
336
328
|
end
|
337
329
|
end
|
338
330
|
else
|
339
331
|
#has_header = @local_dump[:album_title] || @local_dump[:album_dates] ||
|
340
|
-
# @local_dump[:album_locations] || @local_dump[:album_mini_desc] ||
|
332
|
+
# @local_dump[:album_locations] || @local_dump[:album_mini_desc] ||
|
333
|
+
# @local_dump[:album_main_desc]
|
341
334
|
has_header = true
|
342
335
|
tag = @trainers['aum_year_album'].tag(par)
|
343
|
-
|
336
|
+
|
344
337
|
# For 2017 "RETURN TO LEMURIA (7)"
|
345
|
-
if par =~ /\A[[:space:]]*MEDITATION[[:space:]]+-
|
338
|
+
if par =~ /\A[[:space:]]*MEDITATION[[:space:]]+-
|
339
|
+
[[:space:]]+Kalei[[:space:]]+-
|
340
|
+
[[:space:]]+John[[:space:]]+-
|
341
|
+
[[:space:]]+Amber[[:space:]]*\z/xi
|
346
342
|
tag = 'aum_title'
|
347
343
|
log.warn("Changing tag to aum_title: #{Util.clean_data(par)}")
|
348
344
|
end
|
349
|
-
|
345
|
+
|
350
346
|
case tag
|
351
347
|
when 'album_title'
|
352
348
|
if !@local_dump[:album_title]
|
@@ -361,17 +357,17 @@ module UncleKryon
|
|
361
357
|
@local_dump[:album_locations] = true
|
362
358
|
end
|
363
359
|
when 'album_mini_desc'
|
364
|
-
par.split(/\n+/).each
|
360
|
+
par.split(/\n+/).each do |p|
|
365
361
|
p = Util.clean_data(p)
|
366
|
-
|
367
|
-
if !p.empty?
|
362
|
+
|
363
|
+
if !p.empty?
|
368
364
|
case @trainers['aum_year_album_mini_desc'].tag(p)
|
369
365
|
when 'desc'
|
370
366
|
if !@local_dump[:album_mini_desc]
|
371
367
|
@local_dump[:album_mini_desc] = true
|
372
368
|
album.mini_desc = p
|
373
369
|
else
|
374
|
-
album.mini_desc << ' | ' if !album.mini_desc.strip
|
370
|
+
album.mini_desc << ' | ' if !album.mini_desc.strip.empty?
|
375
371
|
album.mini_desc << p
|
376
372
|
end
|
377
373
|
when 'ignore'
|
@@ -379,21 +375,21 @@ module UncleKryon
|
|
379
375
|
end
|
380
376
|
end
|
381
377
|
end
|
382
|
-
|
378
|
+
|
383
379
|
add_to_dump = false
|
384
380
|
when 'album_main_desc'
|
385
381
|
if !@local_dump[:album_main_desc]
|
386
382
|
@local_dump[:album_main_desc] = true
|
387
|
-
album.main_desc = ''.dup
|
383
|
+
album.main_desc = ''.dup
|
388
384
|
else
|
389
|
-
album.main_desc << "\n\n" if !album.main_desc.strip
|
385
|
+
album.main_desc << "\n\n" if !album.main_desc.strip.empty?
|
390
386
|
end
|
391
|
-
|
392
|
-
par.split(/\n+/).each
|
387
|
+
|
388
|
+
par.split(/\n+/).each do |p|
|
393
389
|
album.main_desc << Util.clean_data(p) << "\n"
|
394
390
|
end
|
395
|
-
|
396
|
-
album.main_desc = album.main_desc.strip
|
391
|
+
|
392
|
+
album.main_desc = album.main_desc.strip # Remove last newline
|
397
393
|
add_to_dump = false
|
398
394
|
when 'ignore'
|
399
395
|
log.warn("Excluding content: #{Util.clean_data(par)}")
|
@@ -413,19 +409,19 @@ module UncleKryon
|
|
413
409
|
add_to_dump = false
|
414
410
|
when 'aum_title'
|
415
411
|
@local_dump[:aum_title].push(Util.clean_data(par))
|
416
|
-
|
412
|
+
|
417
413
|
# Special case for 2017 "LISBON, PORTUGAL (Fatima Tour) (3)"
|
418
414
|
if par =~ /\A[[:space:]]*Lisbon[[:space:]]+Channeling[[:space:]]+1[[:space:]]*\z/i
|
419
|
-
@local_dump[:aum_title].push('Lisbon Channeling 2')
|
420
|
-
@local_dump[:aum_title].push('Lisbon Channeling 3')
|
415
|
+
@local_dump[:aum_title].push('Lisbon Channeling 2')
|
416
|
+
@local_dump[:aum_title].push('Lisbon Channeling 3')
|
421
417
|
log.warn("Adding aum_titles for: #{Util.clean_data(par)}")
|
422
418
|
end
|
423
419
|
# For 2017 "KRYON INDIA-NEPAL TOUR PART 1 (10)" & "KRYON INDIA-NEPAL TOUR PART 2 (8)"
|
424
420
|
if par =~ /\A[[:space:]]*PAGE[[:space:]]*(ONE|TWO)[[:space:]]*\z/i
|
425
|
-
p = @local_dump[:aum_title].pop
|
421
|
+
p = @local_dump[:aum_title].pop
|
426
422
|
log.warn("Ignoring aum title: #{p}")
|
427
423
|
end
|
428
|
-
|
424
|
+
|
429
425
|
add_to_dump = false
|
430
426
|
when 'aum_filename'
|
431
427
|
add_to_dump = false
|
@@ -435,21 +431,21 @@ module UncleKryon
|
|
435
431
|
end
|
436
432
|
end
|
437
433
|
end
|
438
|
-
|
434
|
+
|
439
435
|
if add_to_dump
|
440
436
|
album.dump.push(c)
|
441
|
-
|
437
|
+
|
442
438
|
# For now, don't do this; if the font size is big, it's bad for mobile anyway
|
443
439
|
#album.dump.push(Util.clean_data(td.to_s())) # For bold, etc. html
|
444
440
|
end
|
445
441
|
end
|
446
442
|
end
|
447
|
-
|
443
|
+
|
448
444
|
def parse_pics(doc,album)
|
449
445
|
imgs = doc.css('img')
|
450
|
-
|
446
|
+
|
451
447
|
return if imgs.nil?
|
452
|
-
|
448
|
+
|
453
449
|
exclude_imgs = /
|
454
450
|
buttonMP3\.png|
|
455
451
|
freedownloadtype\.gif|
|
@@ -461,37 +457,37 @@ module UncleKryon
|
|
461
457
|
NavMenu\_master\.png|
|
462
458
|
testimonials\.png
|
463
459
|
/ix
|
464
|
-
|
460
|
+
|
465
461
|
imgs.each do |img|
|
466
462
|
next if img.nil?
|
467
|
-
|
463
|
+
|
468
464
|
src = img['src']
|
469
|
-
|
465
|
+
|
470
466
|
next if src.nil? || src.empty?
|
471
467
|
if src =~ exclude_imgs
|
472
468
|
log.warn("Excluding image: #{src}")
|
473
469
|
next
|
474
470
|
end
|
475
|
-
|
476
|
-
pic = PicData.new
|
477
|
-
|
471
|
+
|
472
|
+
pic = PicData.new
|
473
|
+
|
478
474
|
pic.url = Util.clean_link(url,src)
|
479
475
|
pic.filename = Util.parse_url_filename(pic.url)
|
480
|
-
|
476
|
+
|
481
477
|
pic.alt = img['alt']
|
482
478
|
pic.alt = '' if Util.empty_s?(pic.alt)
|
483
479
|
pic.caption = ''
|
484
|
-
|
480
|
+
|
485
481
|
pic.name = Util.empty_s?(pic.alt) ? File.basename(pic.filename,File.extname(pic.filename)) : pic.alt
|
486
482
|
pic.updated_on = @updated_on
|
487
|
-
|
483
|
+
|
488
484
|
# Is it old?
|
489
485
|
if album.pics.key?(pic.url) && pic == album.pics[pic.url]
|
490
486
|
pic.updated_on = album.pics[pic.url].updated_on
|
491
487
|
else # New
|
492
488
|
album.updated_on = @updated_on
|
493
489
|
end
|
494
|
-
|
490
|
+
|
495
491
|
album.pics[pic.url] = pic
|
496
492
|
end
|
497
493
|
end
|