unclekryon 0.4.9.pre.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +34 -0
- data/Gemfile.lock +43 -0
- data/LICENSE +674 -0
- data/README.md +55 -0
- data/Rakefile +59 -0
- data/bin/unclekryon +30 -0
- data/iso/can_provs_terrs.yaml +54 -0
- data/iso/countries.yaml +3050 -0
- data/iso/iso.yaml +8 -0
- data/iso/languages.yaml +5641 -0
- data/iso/regions.yaml +42 -0
- data/iso/subregions.yaml +6 -0
- data/iso/usa_states.yaml +230 -0
- data/lib/unclekryon.rb +384 -0
- data/lib/unclekryon/data/album_data.rb +147 -0
- data/lib/unclekryon/data/artist_data.rb +109 -0
- data/lib/unclekryon/data/artist_data_data.rb +146 -0
- data/lib/unclekryon/data/aum_data.rb +75 -0
- data/lib/unclekryon/data/base_data.rb +79 -0
- data/lib/unclekryon/data/pic_data.rb +76 -0
- data/lib/unclekryon/data/release_data.rb +57 -0
- data/lib/unclekryon/data/social_data.rb +39 -0
- data/lib/unclekryon/data/timespan_data.rb +70 -0
- data/lib/unclekryon/dev_opts.rb +41 -0
- data/lib/unclekryon/hacker.rb +327 -0
- data/lib/unclekryon/iso.rb +341 -0
- data/lib/unclekryon/iso/base_iso.rb +196 -0
- data/lib/unclekryon/iso/can_prov_terr.rb +113 -0
- data/lib/unclekryon/iso/country.rb +133 -0
- data/lib/unclekryon/iso/language.rb +241 -0
- data/lib/unclekryon/iso/region.rb +53 -0
- data/lib/unclekryon/iso/subregion.rb +53 -0
- data/lib/unclekryon/iso/usa_state.rb +106 -0
- data/lib/unclekryon/jsoner.rb +124 -0
- data/lib/unclekryon/log.rb +111 -0
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +499 -0
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +413 -0
- data/lib/unclekryon/server.rb +29 -0
- data/lib/unclekryon/trainer.rb +231 -0
- data/lib/unclekryon/uploader.rb +29 -0
- data/lib/unclekryon/util.rb +228 -0
- data/lib/unclekryon/version.rb +26 -0
- data/unclekryon.gemspec +67 -0
- metadata +189 -0
|
@@ -0,0 +1,499 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# encoding: UTF-8
|
|
3
|
+
# frozen_string_literal: true
|
|
4
|
+
|
|
5
|
+
#--
|
|
6
|
+
# This file is part of UncleKryon-server.
|
|
7
|
+
# Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
|
|
8
|
+
#
|
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
|
12
|
+
# (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17
|
+
# GNU General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the GNU General Public License
|
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
|
21
|
+
#++
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
require 'nokogiri'
|
|
25
|
+
require 'open-uri'
|
|
26
|
+
|
|
27
|
+
require 'unclekryon/dev_opts'
|
|
28
|
+
require 'unclekryon/iso'
|
|
29
|
+
require 'unclekryon/log'
|
|
30
|
+
require 'unclekryon/trainer'
|
|
31
|
+
require 'unclekryon/util'
|
|
32
|
+
|
|
33
|
+
require 'unclekryon/data/album_data'
|
|
34
|
+
require 'unclekryon/data/aum_data'
|
|
35
|
+
require 'unclekryon/data/pic_data'
|
|
36
|
+
require 'unclekryon/data/timespan_data'
|
|
37
|
+
|
|
38
|
+
module UncleKryon
|
|
39
|
+
class KryonAumYearAlbumParser
|
|
40
|
+
include Logging
|
|
41
|
+
|
|
42
|
+
attr_accessor :album
|
|
43
|
+
attr_accessor :artist
|
|
44
|
+
attr_accessor :options
|
|
45
|
+
attr_accessor :trainers
|
|
46
|
+
attr_accessor :training
|
|
47
|
+
attr_accessor :updated_on
|
|
48
|
+
attr_accessor :url
|
|
49
|
+
|
|
50
|
+
alias_method :training?,:training
|
|
51
|
+
|
|
52
|
+
def initialize(artist=nil,url=nil,album: nil,training: false,train_filepath: nil,updated_on: nil,
|
|
53
|
+
**options)
|
|
54
|
+
@album = album
|
|
55
|
+
@artist = artist
|
|
56
|
+
@options = options
|
|
57
|
+
@updated_on = Util.format_datetime(DateTime.now()) if Util.empty_s?(updated_on)
|
|
58
|
+
@url = url
|
|
59
|
+
|
|
60
|
+
@trainers = Trainers.new(train_filepath)
|
|
61
|
+
@training = training
|
|
62
|
+
|
|
63
|
+
@trainers['aum_year_album'] = Trainer.new({
|
|
64
|
+
'alds'=>'album_dates',
|
|
65
|
+
'altt'=>'album_title',
|
|
66
|
+
'allo'=>'album_locations',
|
|
67
|
+
'almi'=>'album_mini_desc',
|
|
68
|
+
'alma'=>'album_main_desc',
|
|
69
|
+
'aust'=>'aum_subtitle',
|
|
70
|
+
'aulg'=>'aum_languages', # See 2018 "Montreal QB w/Robert Coxon (3)" aums' subtitles "FRENCH"
|
|
71
|
+
'autt'=>'aum_title',
|
|
72
|
+
'autm'=>'aum_timespan',
|
|
73
|
+
'ausz'=>'aum_filesize',
|
|
74
|
+
'aufn'=>'aum_filename',
|
|
75
|
+
'audu'=>'dump',
|
|
76
|
+
'i' =>'ignore'
|
|
77
|
+
})
|
|
78
|
+
@trainers['aum_year_album_mini_desc'] = Trainer.new({
|
|
79
|
+
'd'=>'date',
|
|
80
|
+
'l'=>'location',
|
|
81
|
+
's'=>'desc',
|
|
82
|
+
'i'=>'ignore'
|
|
83
|
+
})
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def parse_site(artist=nil,url=nil)
|
|
87
|
+
@artist = artist unless artist.nil?()
|
|
88
|
+
@url = url unless url.nil?()
|
|
89
|
+
|
|
90
|
+
# URLs that return 404 or are empty; fix by hand
|
|
91
|
+
exclude_urls = /
|
|
92
|
+
awakeningzone\.com\/Episode\.aspx\?EpisodeID\=|
|
|
93
|
+
www\.talkshoe\.com\/talkshoe\/web\/audioPop\.jsp\?episodeId\=
|
|
94
|
+
/ix
|
|
95
|
+
|
|
96
|
+
if @url =~ exclude_urls
|
|
97
|
+
log.warn("Excluding Album URL #{@url}")
|
|
98
|
+
return
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
@trainers.load_file()
|
|
102
|
+
|
|
103
|
+
raise ArgumentError,"Artist cannot be nil" if @artist.nil?()
|
|
104
|
+
raise ArgumentError,"URL cannot be empty" if @url.nil?() || (@url = @url.strip()).empty?()
|
|
105
|
+
|
|
106
|
+
# Album data (flags are okay) should never go in this, only for aums, pics, etc.
|
|
107
|
+
@local_dump = {
|
|
108
|
+
:album_dates=>false,
|
|
109
|
+
:album_title=>false,
|
|
110
|
+
:album_locations=>false,
|
|
111
|
+
:album_mini_desc=>false,
|
|
112
|
+
:album_main_desc=>false,
|
|
113
|
+
:aums=>0,
|
|
114
|
+
:aum_subtitle=>[],
|
|
115
|
+
:aum_languages=>[],
|
|
116
|
+
:aum_title=>[],
|
|
117
|
+
:aum_timespan=>[],
|
|
118
|
+
:aum_filesize=>[],
|
|
119
|
+
:aum_filename=>[]
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
# Force 'utf-8'
|
|
123
|
+
# - See charset "X-MAC-ROMAN" in 2017 "The Discovery Series", 2016 "Kryon in Budapest (5)"
|
|
124
|
+
doc = Nokogiri::HTML(open(@url),nil,'utf-8')
|
|
125
|
+
|
|
126
|
+
old_album = @artist.albums[@url]
|
|
127
|
+
|
|
128
|
+
@album = old_album.clone()
|
|
129
|
+
@album.updated_on = @updated_on
|
|
130
|
+
@album.url = @url
|
|
131
|
+
|
|
132
|
+
if old_album.nil?()
|
|
133
|
+
@artist.albums[@url] = @album
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
parse_dump(doc,@album) # Must be first because other methods rely on @local_dump
|
|
137
|
+
|
|
138
|
+
return @album if @training # Currently, no other training occurs
|
|
139
|
+
|
|
140
|
+
parse_pics(doc,@album)
|
|
141
|
+
parse_aums(doc,@album)
|
|
142
|
+
|
|
143
|
+
if @album == old_album
|
|
144
|
+
@album.updated_on = old_album.updated_on
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
@artist.albums[@url] = @album
|
|
148
|
+
|
|
149
|
+
return @album
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def parse_aums(doc,album)
|
|
153
|
+
links = doc.css('a')
|
|
154
|
+
|
|
155
|
+
return if links.nil?
|
|
156
|
+
|
|
157
|
+
i = 0 # Don't do #each_with_index() because sometimes we next
|
|
158
|
+
|
|
159
|
+
links.each do |link|
|
|
160
|
+
next if link.nil?
|
|
161
|
+
|
|
162
|
+
audio_file_regex = /\.mp3/i
|
|
163
|
+
href = link['href']
|
|
164
|
+
exclude_links = /
|
|
165
|
+
files\.kryonespanol\.com\/audio\/
|
|
166
|
+
/ix
|
|
167
|
+
|
|
168
|
+
next if href.nil? || href.empty?
|
|
169
|
+
next if href !~ audio_file_regex
|
|
170
|
+
next if href =~ exclude_links
|
|
171
|
+
|
|
172
|
+
aum = AumData.new
|
|
173
|
+
aum.url = Util.clean_data(href)
|
|
174
|
+
aum.filename = Util.parse_url_filename(aum.url)
|
|
175
|
+
aum.updated_on = @updated_on
|
|
176
|
+
|
|
177
|
+
if aum.url =~ /\A\.\.?\//
|
|
178
|
+
aum.url = Util.clean_link(@url,aum.url)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Filesize
|
|
182
|
+
if !DevOpts.instance.test?()
|
|
183
|
+
# Getting header data is slow, so only do it when not testing
|
|
184
|
+
begin
|
|
185
|
+
r = Util.get_url_header_data(aum.url)
|
|
186
|
+
aum.filesize = r['content-length']
|
|
187
|
+
aum.filesize = aum.filesize[0] if aum.filesize.is_a?(Array)
|
|
188
|
+
rescue => e
|
|
189
|
+
raise e.exception("#{e.message}; couldn't get header data for #{aum.url}")
|
|
190
|
+
end
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
# Subtitle
|
|
194
|
+
if i < @local_dump[:aum_subtitle].length
|
|
195
|
+
aum.subtitle = @local_dump[:aum_subtitle][i]
|
|
196
|
+
else
|
|
197
|
+
log.warn("No subtitle for: #{aum.filename},#{aum.url}")
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Languages
|
|
201
|
+
aum.languages = @local_dump[:aum_languages][i] if i < @local_dump[:aum_languages].length
|
|
202
|
+
|
|
203
|
+
# Title
|
|
204
|
+
if i < @local_dump[:aum_title].length
|
|
205
|
+
aum.title = @local_dump[:aum_title][i]
|
|
206
|
+
else
|
|
207
|
+
# Set title to something at least
|
|
208
|
+
if !(afn = aum.filename).nil?() && !afn.strip().empty?()
|
|
209
|
+
# More descriptive than subtitle
|
|
210
|
+
aum.title = afn.gsub(audio_file_regex,'').strip()
|
|
211
|
+
log.warn("Using filename as title: #{aum.title}")
|
|
212
|
+
else
|
|
213
|
+
aum.title = aum.subtitle
|
|
214
|
+
log.warn("Using subtitle as title: #{aum.title}")
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
# Timespan
|
|
219
|
+
if i < @local_dump[:aum_timespan].length
|
|
220
|
+
aum.timespan = @local_dump[:aum_timespan][i]
|
|
221
|
+
else
|
|
222
|
+
msg = "No timespan for: #{aum.title},#{aum.subtitle},#{aum.filename},#{aum.url}"
|
|
223
|
+
|
|
224
|
+
log.warn(msg)
|
|
225
|
+
|
|
226
|
+
#if DevOpts.instance.dev?()
|
|
227
|
+
# raise "#{msg}:\n#{@local_dump}\n#{album.dump}"
|
|
228
|
+
#else
|
|
229
|
+
# log.warn(msg)
|
|
230
|
+
#end
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Filesize, if not set
|
|
234
|
+
if (aum.filesize.nil?() || aum.filesize.strip().empty?) && i < @local_dump[:aum_filesize].length
|
|
235
|
+
aum.filesize = @local_dump[:aum_filesize][i]
|
|
236
|
+
log.warn("Using local dump filesize: #{aum.filesize}")
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
i += 1
|
|
240
|
+
|
|
241
|
+
# Is it old?
|
|
242
|
+
if album.aums.key?(aum.url) && aum == album.aums[aum.url]
|
|
243
|
+
aum.updated_on = album.aums[aum.url].updated_on
|
|
244
|
+
else # New
|
|
245
|
+
album.updated_on = @updated_on
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
album.aums[aum.url] = aum
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def parse_dump(doc,album)
|
|
253
|
+
album.dump = []
|
|
254
|
+
tds = doc.css('td')
|
|
255
|
+
|
|
256
|
+
return if tds.nil?
|
|
257
|
+
|
|
258
|
+
filename_regex = /\.mp3[[:space:]]*\z/i
|
|
259
|
+
# 2017 "Petra, Jordan (5)" has a ":" in the megabytes cell
|
|
260
|
+
size_regex = /\A[[:space:]]*[[:digit:]]+(\.|\:|[[:digit:]]|[[:space:]])*megabytes[[:space:]]*\z/i
|
|
261
|
+
# 2017 "Monument Valley Tour (11)" has a "." in the minutes cell
|
|
262
|
+
# 2017 "SUMMER LIGHT CONFERENCE PANEL (1)" is a special case ("One hour 6 minutes - (66 minutes)")
|
|
263
|
+
time_regex = /
|
|
264
|
+
\A[[:space:]]*[[:digit:]]+(\:|\.|[[:digit:]]|[[:space:]])*(minutes|Min)[[:space:]]*\z|
|
|
265
|
+
\([[:space:]]*[[:digit:]]+[[:space:]]+minutes[[:space:]]*\)[[:space:]]*\z
|
|
266
|
+
/ix
|
|
267
|
+
# 2017 "KRYON INDIA-NEPAL TOUR PART 1 (10)" doesn't have the word "megabytes"
|
|
268
|
+
time_or_size_regex = /\A[[:space:]]*[[:digit:]]+(\:|\.|[[:digit:]]|[[:space:]])*\z/i
|
|
269
|
+
# 2015 ones have a lot of "13:12 Min - 15.9 megs"
|
|
270
|
+
time_and_size_regex = /\A[[:space:]]*[[:digit:]]+[\:\.][[:digit:]]+[[:space:]]+Min[[:space:]]+\-[[:space:]]+[[:digit:]]+\.?[[:digit:]]*[[:space:]]*megs/i
|
|
271
|
+
|
|
272
|
+
size_count = 0
|
|
273
|
+
time_count = 0
|
|
274
|
+
|
|
275
|
+
tds.each do |td|
|
|
276
|
+
next if td.nil?
|
|
277
|
+
next if td.content.nil?
|
|
278
|
+
|
|
279
|
+
orig_c = Util.clean_charset(td.content)
|
|
280
|
+
c = Util.clean_data(orig_c)
|
|
281
|
+
|
|
282
|
+
next if c.empty?
|
|
283
|
+
#if c =~ exclude_content_regex
|
|
284
|
+
# log.warn("Excluding content: #{c}")
|
|
285
|
+
# next
|
|
286
|
+
#end
|
|
287
|
+
|
|
288
|
+
add_to_dump = true
|
|
289
|
+
|
|
290
|
+
if c =~ time_regex
|
|
291
|
+
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s())
|
|
292
|
+
add_to_dump = false
|
|
293
|
+
time_count += 1
|
|
294
|
+
elsif c =~ size_regex
|
|
295
|
+
@local_dump[:aum_filesize].push(c)
|
|
296
|
+
add_to_dump = false
|
|
297
|
+
size_count += 1
|
|
298
|
+
elsif c =~ time_or_size_regex
|
|
299
|
+
# Time is usually before size
|
|
300
|
+
if time_count == size_count
|
|
301
|
+
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s())
|
|
302
|
+
time_count += 1
|
|
303
|
+
else
|
|
304
|
+
@local_dump[:aum_filesize].push(c)
|
|
305
|
+
size_count += 1
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
add_to_dump = false
|
|
309
|
+
elsif c =~ time_and_size_regex
|
|
310
|
+
time_and_size = c.split(/[[:space:]]*\-[[:space:]]*/) # Split on '-'
|
|
311
|
+
|
|
312
|
+
@local_dump[:aum_timespan].push(TimespanData.new(time_and_size[0]).to_s())
|
|
313
|
+
time_count += 1
|
|
314
|
+
@local_dump[:aum_filesize].push(time_and_size[1])
|
|
315
|
+
size_count += 1
|
|
316
|
+
|
|
317
|
+
add_to_dump = false
|
|
318
|
+
elsif c =~ filename_regex
|
|
319
|
+
@local_dump[:aums] += 1
|
|
320
|
+
add_to_dump = false
|
|
321
|
+
else
|
|
322
|
+
# Paragraphs
|
|
323
|
+
pars = orig_c.gsub(/\A[[:space:]]+/,'').gsub(/[[:space:]]+\z/,'')
|
|
324
|
+
pars = pars.split(/[\r\n\p{Zl}\p{Zp}]{2,}/)
|
|
325
|
+
|
|
326
|
+
pars.each() do |par|
|
|
327
|
+
par = par.gsub(/[[:blank:]]+/,' ').strip()
|
|
328
|
+
par = Util.fix_shortwith_text(par)
|
|
329
|
+
|
|
330
|
+
next if par.empty?()
|
|
331
|
+
|
|
332
|
+
if @training
|
|
333
|
+
if @trainers['aum_year_album'].train(par) == 'album_mini_desc'
|
|
334
|
+
par.split(/\n+/).each() do |p|
|
|
335
|
+
@trainers['aum_year_album_mini_desc'].train(p)
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
else
|
|
339
|
+
#has_header = @local_dump[:album_title] || @local_dump[:album_dates] ||
|
|
340
|
+
# @local_dump[:album_locations] || @local_dump[:album_mini_desc] || @local_dump[:album_main_desc]
|
|
341
|
+
has_header = true
|
|
342
|
+
tag = @trainers['aum_year_album'].tag(par)
|
|
343
|
+
|
|
344
|
+
# For 2017 "RETURN TO LEMURIA (7)"
|
|
345
|
+
if par =~ /\A[[:space:]]*MEDITATION[[:space:]]+-[[:space:]]+Kalei[[:space:]]+-[[:space:]]+John[[:space:]]+-[[:space:]]+Amber[[:space:]]*\z/i
|
|
346
|
+
tag = 'aum_title'
|
|
347
|
+
log.warn("Changing tag to aum_title: #{Util.clean_data(par)}")
|
|
348
|
+
end
|
|
349
|
+
|
|
350
|
+
case tag
|
|
351
|
+
when 'album_title'
|
|
352
|
+
if !@local_dump[:album_title]
|
|
353
|
+
@local_dump[:album_title] = true
|
|
354
|
+
end
|
|
355
|
+
when 'album_dates'
|
|
356
|
+
if !@local_dump[:album_dates]
|
|
357
|
+
@local_dump[:album_dates] = true
|
|
358
|
+
end
|
|
359
|
+
when 'album_locations'
|
|
360
|
+
if !@local_dump[:album_locations]
|
|
361
|
+
@local_dump[:album_locations] = true
|
|
362
|
+
end
|
|
363
|
+
when 'album_mini_desc'
|
|
364
|
+
par.split(/\n+/).each() do |p|
|
|
365
|
+
p = Util.clean_data(p)
|
|
366
|
+
|
|
367
|
+
if !p.empty?()
|
|
368
|
+
case @trainers['aum_year_album_mini_desc'].tag(p)
|
|
369
|
+
when 'desc'
|
|
370
|
+
if !@local_dump[:album_mini_desc]
|
|
371
|
+
@local_dump[:album_mini_desc] = true
|
|
372
|
+
album.mini_desc = p
|
|
373
|
+
else
|
|
374
|
+
album.mini_desc << ' | ' if !album.mini_desc.strip().empty?()
|
|
375
|
+
album.mini_desc << p
|
|
376
|
+
end
|
|
377
|
+
when 'ignore'
|
|
378
|
+
log.warn("Excluding mini desc content: #{p}")
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
add_to_dump = false
|
|
384
|
+
when 'album_main_desc'
|
|
385
|
+
if !@local_dump[:album_main_desc]
|
|
386
|
+
@local_dump[:album_main_desc] = true
|
|
387
|
+
album.main_desc = ''.dup()
|
|
388
|
+
else
|
|
389
|
+
album.main_desc << "\n\n" if !album.main_desc.strip().empty?()
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
par.split(/\n+/).each() do |p|
|
|
393
|
+
album.main_desc << Util.clean_data(p) << "\n"
|
|
394
|
+
end
|
|
395
|
+
|
|
396
|
+
album.main_desc = album.main_desc.strip() # Remove last newline
|
|
397
|
+
add_to_dump = false
|
|
398
|
+
when 'ignore'
|
|
399
|
+
log.warn("Excluding content: #{Util.clean_data(par)}")
|
|
400
|
+
add_to_dump = false
|
|
401
|
+
else
|
|
402
|
+
if !has_header
|
|
403
|
+
log.warn("No header yet so ignoring: #{Util.clean_data(par)}")
|
|
404
|
+
else
|
|
405
|
+
case tag
|
|
406
|
+
when 'aum_subtitle'
|
|
407
|
+
@local_dump[:aum_subtitle].push(Util.clean_data(par))
|
|
408
|
+
add_to_dump = false
|
|
409
|
+
when 'aum_languages'
|
|
410
|
+
p = Util.clean_data(par)
|
|
411
|
+
@local_dump[:aum_languages].push(Iso.languages.find_by_kryon(p))
|
|
412
|
+
@local_dump[:aum_subtitle].push(p)
|
|
413
|
+
add_to_dump = false
|
|
414
|
+
when 'aum_title'
|
|
415
|
+
@local_dump[:aum_title].push(Util.clean_data(par))
|
|
416
|
+
|
|
417
|
+
# Special case for 2017 "LISBON, PORTUGAL (Fatima Tour) (3)"
|
|
418
|
+
if par =~ /\A[[:space:]]*Lisbon[[:space:]]+Channeling[[:space:]]+1[[:space:]]*\z/i
|
|
419
|
+
@local_dump[:aum_title].push('Lisbon Channeling 2');
|
|
420
|
+
@local_dump[:aum_title].push('Lisbon Channeling 3');
|
|
421
|
+
log.warn("Adding aum_titles for: #{Util.clean_data(par)}")
|
|
422
|
+
end
|
|
423
|
+
# For 2017 "KRYON INDIA-NEPAL TOUR PART 1 (10)" & "KRYON INDIA-NEPAL TOUR PART 2 (8)"
|
|
424
|
+
if par =~ /\A[[:space:]]*PAGE[[:space:]]*(ONE|TWO)[[:space:]]*\z/i
|
|
425
|
+
p = @local_dump[:aum_title].pop()
|
|
426
|
+
log.warn("Ignoring aum title: #{p}")
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
add_to_dump = false
|
|
430
|
+
when 'aum_filename'
|
|
431
|
+
add_to_dump = false
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
end
|
|
435
|
+
end
|
|
436
|
+
end
|
|
437
|
+
end
|
|
438
|
+
|
|
439
|
+
if add_to_dump
|
|
440
|
+
album.dump.push(c)
|
|
441
|
+
|
|
442
|
+
# For now, don't do this; if the font size is big, it's bad for mobile anyway
|
|
443
|
+
#album.dump.push(Util.clean_data(td.to_s())) # For bold, etc. html
|
|
444
|
+
end
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
def parse_pics(doc,album)
|
|
449
|
+
imgs = doc.css('img')
|
|
450
|
+
|
|
451
|
+
return if imgs.nil?
|
|
452
|
+
|
|
453
|
+
exclude_imgs = /
|
|
454
|
+
buttonMP3\.png|
|
|
455
|
+
freedownloadtype\.gif|
|
|
456
|
+
handani\.gif|
|
|
457
|
+
Kryonglobe\.jpg|
|
|
458
|
+
MP3\-download\.jpg|
|
|
459
|
+
MP3\-graphic\(SM\)\.jpg|
|
|
460
|
+
NavMenu\_AUDIOmaster\.png|
|
|
461
|
+
NavMenu\_master\.png|
|
|
462
|
+
testimonials\.png
|
|
463
|
+
/ix
|
|
464
|
+
|
|
465
|
+
imgs.each do |img|
|
|
466
|
+
next if img.nil?
|
|
467
|
+
|
|
468
|
+
src = img['src']
|
|
469
|
+
|
|
470
|
+
next if src.nil? || src.empty?
|
|
471
|
+
if src =~ exclude_imgs
|
|
472
|
+
log.warn("Excluding image: #{src}")
|
|
473
|
+
next
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
pic = PicData.new()
|
|
477
|
+
|
|
478
|
+
pic.url = Util.clean_link(url,src)
|
|
479
|
+
pic.filename = Util.parse_url_filename(pic.url)
|
|
480
|
+
|
|
481
|
+
pic.alt = img['alt']
|
|
482
|
+
pic.alt = '' if Util.empty_s?(pic.alt)
|
|
483
|
+
pic.caption = ''
|
|
484
|
+
|
|
485
|
+
pic.name = Util.empty_s?(pic.alt) ? File.basename(pic.filename,File.extname(pic.filename)) : pic.alt
|
|
486
|
+
pic.updated_on = @updated_on
|
|
487
|
+
|
|
488
|
+
# Is it old?
|
|
489
|
+
if album.pics.key?(pic.url) && pic == album.pics[pic.url]
|
|
490
|
+
pic.updated_on = album.pics[pic.url].updated_on
|
|
491
|
+
else # New
|
|
492
|
+
album.updated_on = @updated_on
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
album.pics[pic.url] = pic
|
|
496
|
+
end
|
|
497
|
+
end
|
|
498
|
+
end
|
|
499
|
+
end
|