unclekryon 0.4.9.pre.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +34 -0
- data/Gemfile.lock +43 -0
- data/LICENSE +674 -0
- data/README.md +55 -0
- data/Rakefile +59 -0
- data/bin/unclekryon +30 -0
- data/iso/can_provs_terrs.yaml +54 -0
- data/iso/countries.yaml +3050 -0
- data/iso/iso.yaml +8 -0
- data/iso/languages.yaml +5641 -0
- data/iso/regions.yaml +42 -0
- data/iso/subregions.yaml +6 -0
- data/iso/usa_states.yaml +230 -0
- data/lib/unclekryon.rb +384 -0
- data/lib/unclekryon/data/album_data.rb +147 -0
- data/lib/unclekryon/data/artist_data.rb +109 -0
- data/lib/unclekryon/data/artist_data_data.rb +146 -0
- data/lib/unclekryon/data/aum_data.rb +75 -0
- data/lib/unclekryon/data/base_data.rb +79 -0
- data/lib/unclekryon/data/pic_data.rb +76 -0
- data/lib/unclekryon/data/release_data.rb +57 -0
- data/lib/unclekryon/data/social_data.rb +39 -0
- data/lib/unclekryon/data/timespan_data.rb +70 -0
- data/lib/unclekryon/dev_opts.rb +41 -0
- data/lib/unclekryon/hacker.rb +327 -0
- data/lib/unclekryon/iso.rb +341 -0
- data/lib/unclekryon/iso/base_iso.rb +196 -0
- data/lib/unclekryon/iso/can_prov_terr.rb +113 -0
- data/lib/unclekryon/iso/country.rb +133 -0
- data/lib/unclekryon/iso/language.rb +241 -0
- data/lib/unclekryon/iso/region.rb +53 -0
- data/lib/unclekryon/iso/subregion.rb +53 -0
- data/lib/unclekryon/iso/usa_state.rb +106 -0
- data/lib/unclekryon/jsoner.rb +124 -0
- data/lib/unclekryon/log.rb +111 -0
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +499 -0
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +413 -0
- data/lib/unclekryon/server.rb +29 -0
- data/lib/unclekryon/trainer.rb +231 -0
- data/lib/unclekryon/uploader.rb +29 -0
- data/lib/unclekryon/util.rb +228 -0
- data/lib/unclekryon/version.rb +26 -0
- data/unclekryon.gemspec +67 -0
- metadata +189 -0
@@ -0,0 +1,499 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
#--
|
6
|
+
# This file is part of UncleKryon-server.
|
7
|
+
# Copyright (c) 2017-2019 Jonathan Bradley Whited (@esotericpig)
|
8
|
+
#
|
9
|
+
# UncleKryon-server is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License
|
20
|
+
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
21
|
+
#++
|
22
|
+
|
23
|
+
|
24
|
+
require 'nokogiri'
|
25
|
+
require 'open-uri'
|
26
|
+
|
27
|
+
require 'unclekryon/dev_opts'
|
28
|
+
require 'unclekryon/iso'
|
29
|
+
require 'unclekryon/log'
|
30
|
+
require 'unclekryon/trainer'
|
31
|
+
require 'unclekryon/util'
|
32
|
+
|
33
|
+
require 'unclekryon/data/album_data'
|
34
|
+
require 'unclekryon/data/aum_data'
|
35
|
+
require 'unclekryon/data/pic_data'
|
36
|
+
require 'unclekryon/data/timespan_data'
|
37
|
+
|
38
|
+
module UncleKryon
|
39
|
+
class KryonAumYearAlbumParser
|
40
|
+
include Logging
|
41
|
+
|
42
|
+
attr_accessor :album
|
43
|
+
attr_accessor :artist
|
44
|
+
attr_accessor :options
|
45
|
+
attr_accessor :trainers
|
46
|
+
attr_accessor :training
|
47
|
+
attr_accessor :updated_on
|
48
|
+
attr_accessor :url
|
49
|
+
|
50
|
+
alias_method :training?,:training
|
51
|
+
|
52
|
+
def initialize(artist=nil,url=nil,album: nil,training: false,train_filepath: nil,updated_on: nil,
|
53
|
+
**options)
|
54
|
+
@album = album
|
55
|
+
@artist = artist
|
56
|
+
@options = options
|
57
|
+
@updated_on = Util.format_datetime(DateTime.now()) if Util.empty_s?(updated_on)
|
58
|
+
@url = url
|
59
|
+
|
60
|
+
@trainers = Trainers.new(train_filepath)
|
61
|
+
@training = training
|
62
|
+
|
63
|
+
@trainers['aum_year_album'] = Trainer.new({
|
64
|
+
'alds'=>'album_dates',
|
65
|
+
'altt'=>'album_title',
|
66
|
+
'allo'=>'album_locations',
|
67
|
+
'almi'=>'album_mini_desc',
|
68
|
+
'alma'=>'album_main_desc',
|
69
|
+
'aust'=>'aum_subtitle',
|
70
|
+
'aulg'=>'aum_languages', # See 2018 "Montreal QB w/Robert Coxon (3)" aums' subtitles "FRENCH"
|
71
|
+
'autt'=>'aum_title',
|
72
|
+
'autm'=>'aum_timespan',
|
73
|
+
'ausz'=>'aum_filesize',
|
74
|
+
'aufn'=>'aum_filename',
|
75
|
+
'audu'=>'dump',
|
76
|
+
'i' =>'ignore'
|
77
|
+
})
|
78
|
+
@trainers['aum_year_album_mini_desc'] = Trainer.new({
|
79
|
+
'd'=>'date',
|
80
|
+
'l'=>'location',
|
81
|
+
's'=>'desc',
|
82
|
+
'i'=>'ignore'
|
83
|
+
})
|
84
|
+
end
|
85
|
+
|
86
|
+
def parse_site(artist=nil,url=nil)
|
87
|
+
@artist = artist unless artist.nil?()
|
88
|
+
@url = url unless url.nil?()
|
89
|
+
|
90
|
+
# URLs that return 404 or are empty; fix by hand
|
91
|
+
exclude_urls = /
|
92
|
+
awakeningzone\.com\/Episode\.aspx\?EpisodeID\=|
|
93
|
+
www\.talkshoe\.com\/talkshoe\/web\/audioPop\.jsp\?episodeId\=
|
94
|
+
/ix
|
95
|
+
|
96
|
+
if @url =~ exclude_urls
|
97
|
+
log.warn("Excluding Album URL #{@url}")
|
98
|
+
return
|
99
|
+
end
|
100
|
+
|
101
|
+
@trainers.load_file()
|
102
|
+
|
103
|
+
raise ArgumentError,"Artist cannot be nil" if @artist.nil?()
|
104
|
+
raise ArgumentError,"URL cannot be empty" if @url.nil?() || (@url = @url.strip()).empty?()
|
105
|
+
|
106
|
+
# Album data (flags are okay) should never go in this, only for aums, pics, etc.
|
107
|
+
@local_dump = {
|
108
|
+
:album_dates=>false,
|
109
|
+
:album_title=>false,
|
110
|
+
:album_locations=>false,
|
111
|
+
:album_mini_desc=>false,
|
112
|
+
:album_main_desc=>false,
|
113
|
+
:aums=>0,
|
114
|
+
:aum_subtitle=>[],
|
115
|
+
:aum_languages=>[],
|
116
|
+
:aum_title=>[],
|
117
|
+
:aum_timespan=>[],
|
118
|
+
:aum_filesize=>[],
|
119
|
+
:aum_filename=>[]
|
120
|
+
}
|
121
|
+
|
122
|
+
# Force 'utf-8'
|
123
|
+
# - See charset "X-MAC-ROMAN" in 2017 "The Discovery Series", 2016 "Kryon in Budapest (5)"
|
124
|
+
doc = Nokogiri::HTML(open(@url),nil,'utf-8')
|
125
|
+
|
126
|
+
old_album = @artist.albums[@url]
|
127
|
+
|
128
|
+
@album = old_album.clone()
|
129
|
+
@album.updated_on = @updated_on
|
130
|
+
@album.url = @url
|
131
|
+
|
132
|
+
if old_album.nil?()
|
133
|
+
@artist.albums[@url] = @album
|
134
|
+
end
|
135
|
+
|
136
|
+
parse_dump(doc,@album) # Must be first because other methods rely on @local_dump
|
137
|
+
|
138
|
+
return @album if @training # Currently, no other training occurs
|
139
|
+
|
140
|
+
parse_pics(doc,@album)
|
141
|
+
parse_aums(doc,@album)
|
142
|
+
|
143
|
+
if @album == old_album
|
144
|
+
@album.updated_on = old_album.updated_on
|
145
|
+
end
|
146
|
+
|
147
|
+
@artist.albums[@url] = @album
|
148
|
+
|
149
|
+
return @album
|
150
|
+
end
|
151
|
+
|
152
|
+
def parse_aums(doc,album)
|
153
|
+
links = doc.css('a')
|
154
|
+
|
155
|
+
return if links.nil?
|
156
|
+
|
157
|
+
i = 0 # Don't do #each_with_index() because sometimes we next
|
158
|
+
|
159
|
+
links.each do |link|
|
160
|
+
next if link.nil?
|
161
|
+
|
162
|
+
audio_file_regex = /\.mp3/i
|
163
|
+
href = link['href']
|
164
|
+
exclude_links = /
|
165
|
+
files\.kryonespanol\.com\/audio\/
|
166
|
+
/ix
|
167
|
+
|
168
|
+
next if href.nil? || href.empty?
|
169
|
+
next if href !~ audio_file_regex
|
170
|
+
next if href =~ exclude_links
|
171
|
+
|
172
|
+
aum = AumData.new
|
173
|
+
aum.url = Util.clean_data(href)
|
174
|
+
aum.filename = Util.parse_url_filename(aum.url)
|
175
|
+
aum.updated_on = @updated_on
|
176
|
+
|
177
|
+
if aum.url =~ /\A\.\.?\//
|
178
|
+
aum.url = Util.clean_link(@url,aum.url)
|
179
|
+
end
|
180
|
+
|
181
|
+
# Filesize
|
182
|
+
if !DevOpts.instance.test?()
|
183
|
+
# Getting header data is slow, so only do it when not testing
|
184
|
+
begin
|
185
|
+
r = Util.get_url_header_data(aum.url)
|
186
|
+
aum.filesize = r['content-length']
|
187
|
+
aum.filesize = aum.filesize[0] if aum.filesize.is_a?(Array)
|
188
|
+
rescue => e
|
189
|
+
raise e.exception("#{e.message}; couldn't get header data for #{aum.url}")
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
# Subtitle
|
194
|
+
if i < @local_dump[:aum_subtitle].length
|
195
|
+
aum.subtitle = @local_dump[:aum_subtitle][i]
|
196
|
+
else
|
197
|
+
log.warn("No subtitle for: #{aum.filename},#{aum.url}")
|
198
|
+
end
|
199
|
+
|
200
|
+
# Languages
|
201
|
+
aum.languages = @local_dump[:aum_languages][i] if i < @local_dump[:aum_languages].length
|
202
|
+
|
203
|
+
# Title
|
204
|
+
if i < @local_dump[:aum_title].length
|
205
|
+
aum.title = @local_dump[:aum_title][i]
|
206
|
+
else
|
207
|
+
# Set title to something at least
|
208
|
+
if !(afn = aum.filename).nil?() && !afn.strip().empty?()
|
209
|
+
# More descriptive than subtitle
|
210
|
+
aum.title = afn.gsub(audio_file_regex,'').strip()
|
211
|
+
log.warn("Using filename as title: #{aum.title}")
|
212
|
+
else
|
213
|
+
aum.title = aum.subtitle
|
214
|
+
log.warn("Using subtitle as title: #{aum.title}")
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Timespan
|
219
|
+
if i < @local_dump[:aum_timespan].length
|
220
|
+
aum.timespan = @local_dump[:aum_timespan][i]
|
221
|
+
else
|
222
|
+
msg = "No timespan for: #{aum.title},#{aum.subtitle},#{aum.filename},#{aum.url}"
|
223
|
+
|
224
|
+
log.warn(msg)
|
225
|
+
|
226
|
+
#if DevOpts.instance.dev?()
|
227
|
+
# raise "#{msg}:\n#{@local_dump}\n#{album.dump}"
|
228
|
+
#else
|
229
|
+
# log.warn(msg)
|
230
|
+
#end
|
231
|
+
end
|
232
|
+
|
233
|
+
# Filesize, if not set
|
234
|
+
if (aum.filesize.nil?() || aum.filesize.strip().empty?) && i < @local_dump[:aum_filesize].length
|
235
|
+
aum.filesize = @local_dump[:aum_filesize][i]
|
236
|
+
log.warn("Using local dump filesize: #{aum.filesize}")
|
237
|
+
end
|
238
|
+
|
239
|
+
i += 1
|
240
|
+
|
241
|
+
# Is it old?
|
242
|
+
if album.aums.key?(aum.url) && aum == album.aums[aum.url]
|
243
|
+
aum.updated_on = album.aums[aum.url].updated_on
|
244
|
+
else # New
|
245
|
+
album.updated_on = @updated_on
|
246
|
+
end
|
247
|
+
|
248
|
+
album.aums[aum.url] = aum
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
def parse_dump(doc,album)
|
253
|
+
album.dump = []
|
254
|
+
tds = doc.css('td')
|
255
|
+
|
256
|
+
return if tds.nil?
|
257
|
+
|
258
|
+
filename_regex = /\.mp3[[:space:]]*\z/i
|
259
|
+
# 2017 "Petra, Jordan (5)" has a ":" in the megabytes cell
|
260
|
+
size_regex = /\A[[:space:]]*[[:digit:]]+(\.|\:|[[:digit:]]|[[:space:]])*megabytes[[:space:]]*\z/i
|
261
|
+
# 2017 "Monument Valley Tour (11)" has a "." in the minutes cell
|
262
|
+
# 2017 "SUMMER LIGHT CONFERENCE PANEL (1)" is a special case ("One hour 6 minutes - (66 minutes)")
|
263
|
+
time_regex = /
|
264
|
+
\A[[:space:]]*[[:digit:]]+(\:|\.|[[:digit:]]|[[:space:]])*(minutes|Min)[[:space:]]*\z|
|
265
|
+
\([[:space:]]*[[:digit:]]+[[:space:]]+minutes[[:space:]]*\)[[:space:]]*\z
|
266
|
+
/ix
|
267
|
+
# 2017 "KRYON INDIA-NEPAL TOUR PART 1 (10)" doesn't have the word "megabytes"
|
268
|
+
time_or_size_regex = /\A[[:space:]]*[[:digit:]]+(\:|\.|[[:digit:]]|[[:space:]])*\z/i
|
269
|
+
# 2015 ones have a lot of "13:12 Min - 15.9 megs"
|
270
|
+
time_and_size_regex = /\A[[:space:]]*[[:digit:]]+[\:\.][[:digit:]]+[[:space:]]+Min[[:space:]]+\-[[:space:]]+[[:digit:]]+\.?[[:digit:]]*[[:space:]]*megs/i
|
271
|
+
|
272
|
+
size_count = 0
|
273
|
+
time_count = 0
|
274
|
+
|
275
|
+
tds.each do |td|
|
276
|
+
next if td.nil?
|
277
|
+
next if td.content.nil?
|
278
|
+
|
279
|
+
orig_c = Util.clean_charset(td.content)
|
280
|
+
c = Util.clean_data(orig_c)
|
281
|
+
|
282
|
+
next if c.empty?
|
283
|
+
#if c =~ exclude_content_regex
|
284
|
+
# log.warn("Excluding content: #{c}")
|
285
|
+
# next
|
286
|
+
#end
|
287
|
+
|
288
|
+
add_to_dump = true
|
289
|
+
|
290
|
+
if c =~ time_regex
|
291
|
+
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s())
|
292
|
+
add_to_dump = false
|
293
|
+
time_count += 1
|
294
|
+
elsif c =~ size_regex
|
295
|
+
@local_dump[:aum_filesize].push(c)
|
296
|
+
add_to_dump = false
|
297
|
+
size_count += 1
|
298
|
+
elsif c =~ time_or_size_regex
|
299
|
+
# Time is usually before size
|
300
|
+
if time_count == size_count
|
301
|
+
@local_dump[:aum_timespan].push(TimespanData.new(c).to_s())
|
302
|
+
time_count += 1
|
303
|
+
else
|
304
|
+
@local_dump[:aum_filesize].push(c)
|
305
|
+
size_count += 1
|
306
|
+
end
|
307
|
+
|
308
|
+
add_to_dump = false
|
309
|
+
elsif c =~ time_and_size_regex
|
310
|
+
time_and_size = c.split(/[[:space:]]*\-[[:space:]]*/) # Split on '-'
|
311
|
+
|
312
|
+
@local_dump[:aum_timespan].push(TimespanData.new(time_and_size[0]).to_s())
|
313
|
+
time_count += 1
|
314
|
+
@local_dump[:aum_filesize].push(time_and_size[1])
|
315
|
+
size_count += 1
|
316
|
+
|
317
|
+
add_to_dump = false
|
318
|
+
elsif c =~ filename_regex
|
319
|
+
@local_dump[:aums] += 1
|
320
|
+
add_to_dump = false
|
321
|
+
else
|
322
|
+
# Paragraphs
|
323
|
+
pars = orig_c.gsub(/\A[[:space:]]+/,'').gsub(/[[:space:]]+\z/,'')
|
324
|
+
pars = pars.split(/[\r\n\p{Zl}\p{Zp}]{2,}/)
|
325
|
+
|
326
|
+
pars.each() do |par|
|
327
|
+
par = par.gsub(/[[:blank:]]+/,' ').strip()
|
328
|
+
par = Util.fix_shortwith_text(par)
|
329
|
+
|
330
|
+
next if par.empty?()
|
331
|
+
|
332
|
+
if @training
|
333
|
+
if @trainers['aum_year_album'].train(par) == 'album_mini_desc'
|
334
|
+
par.split(/\n+/).each() do |p|
|
335
|
+
@trainers['aum_year_album_mini_desc'].train(p)
|
336
|
+
end
|
337
|
+
end
|
338
|
+
else
|
339
|
+
#has_header = @local_dump[:album_title] || @local_dump[:album_dates] ||
|
340
|
+
# @local_dump[:album_locations] || @local_dump[:album_mini_desc] || @local_dump[:album_main_desc]
|
341
|
+
has_header = true
|
342
|
+
tag = @trainers['aum_year_album'].tag(par)
|
343
|
+
|
344
|
+
# For 2017 "RETURN TO LEMURIA (7)"
|
345
|
+
if par =~ /\A[[:space:]]*MEDITATION[[:space:]]+-[[:space:]]+Kalei[[:space:]]+-[[:space:]]+John[[:space:]]+-[[:space:]]+Amber[[:space:]]*\z/i
|
346
|
+
tag = 'aum_title'
|
347
|
+
log.warn("Changing tag to aum_title: #{Util.clean_data(par)}")
|
348
|
+
end
|
349
|
+
|
350
|
+
case tag
|
351
|
+
when 'album_title'
|
352
|
+
if !@local_dump[:album_title]
|
353
|
+
@local_dump[:album_title] = true
|
354
|
+
end
|
355
|
+
when 'album_dates'
|
356
|
+
if !@local_dump[:album_dates]
|
357
|
+
@local_dump[:album_dates] = true
|
358
|
+
end
|
359
|
+
when 'album_locations'
|
360
|
+
if !@local_dump[:album_locations]
|
361
|
+
@local_dump[:album_locations] = true
|
362
|
+
end
|
363
|
+
when 'album_mini_desc'
|
364
|
+
par.split(/\n+/).each() do |p|
|
365
|
+
p = Util.clean_data(p)
|
366
|
+
|
367
|
+
if !p.empty?()
|
368
|
+
case @trainers['aum_year_album_mini_desc'].tag(p)
|
369
|
+
when 'desc'
|
370
|
+
if !@local_dump[:album_mini_desc]
|
371
|
+
@local_dump[:album_mini_desc] = true
|
372
|
+
album.mini_desc = p
|
373
|
+
else
|
374
|
+
album.mini_desc << ' | ' if !album.mini_desc.strip().empty?()
|
375
|
+
album.mini_desc << p
|
376
|
+
end
|
377
|
+
when 'ignore'
|
378
|
+
log.warn("Excluding mini desc content: #{p}")
|
379
|
+
end
|
380
|
+
end
|
381
|
+
end
|
382
|
+
|
383
|
+
add_to_dump = false
|
384
|
+
when 'album_main_desc'
|
385
|
+
if !@local_dump[:album_main_desc]
|
386
|
+
@local_dump[:album_main_desc] = true
|
387
|
+
album.main_desc = ''.dup()
|
388
|
+
else
|
389
|
+
album.main_desc << "\n\n" if !album.main_desc.strip().empty?()
|
390
|
+
end
|
391
|
+
|
392
|
+
par.split(/\n+/).each() do |p|
|
393
|
+
album.main_desc << Util.clean_data(p) << "\n"
|
394
|
+
end
|
395
|
+
|
396
|
+
album.main_desc = album.main_desc.strip() # Remove last newline
|
397
|
+
add_to_dump = false
|
398
|
+
when 'ignore'
|
399
|
+
log.warn("Excluding content: #{Util.clean_data(par)}")
|
400
|
+
add_to_dump = false
|
401
|
+
else
|
402
|
+
if !has_header
|
403
|
+
log.warn("No header yet so ignoring: #{Util.clean_data(par)}")
|
404
|
+
else
|
405
|
+
case tag
|
406
|
+
when 'aum_subtitle'
|
407
|
+
@local_dump[:aum_subtitle].push(Util.clean_data(par))
|
408
|
+
add_to_dump = false
|
409
|
+
when 'aum_languages'
|
410
|
+
p = Util.clean_data(par)
|
411
|
+
@local_dump[:aum_languages].push(Iso.languages.find_by_kryon(p))
|
412
|
+
@local_dump[:aum_subtitle].push(p)
|
413
|
+
add_to_dump = false
|
414
|
+
when 'aum_title'
|
415
|
+
@local_dump[:aum_title].push(Util.clean_data(par))
|
416
|
+
|
417
|
+
# Special case for 2017 "LISBON, PORTUGAL (Fatima Tour) (3)"
|
418
|
+
if par =~ /\A[[:space:]]*Lisbon[[:space:]]+Channeling[[:space:]]+1[[:space:]]*\z/i
|
419
|
+
@local_dump[:aum_title].push('Lisbon Channeling 2');
|
420
|
+
@local_dump[:aum_title].push('Lisbon Channeling 3');
|
421
|
+
log.warn("Adding aum_titles for: #{Util.clean_data(par)}")
|
422
|
+
end
|
423
|
+
# For 2017 "KRYON INDIA-NEPAL TOUR PART 1 (10)" & "KRYON INDIA-NEPAL TOUR PART 2 (8)"
|
424
|
+
if par =~ /\A[[:space:]]*PAGE[[:space:]]*(ONE|TWO)[[:space:]]*\z/i
|
425
|
+
p = @local_dump[:aum_title].pop()
|
426
|
+
log.warn("Ignoring aum title: #{p}")
|
427
|
+
end
|
428
|
+
|
429
|
+
add_to_dump = false
|
430
|
+
when 'aum_filename'
|
431
|
+
add_to_dump = false
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end
|
435
|
+
end
|
436
|
+
end
|
437
|
+
end
|
438
|
+
|
439
|
+
if add_to_dump
|
440
|
+
album.dump.push(c)
|
441
|
+
|
442
|
+
# For now, don't do this; if the font size is big, it's bad for mobile anyway
|
443
|
+
#album.dump.push(Util.clean_data(td.to_s())) # For bold, etc. html
|
444
|
+
end
|
445
|
+
end
|
446
|
+
end
|
447
|
+
|
448
|
+
def parse_pics(doc,album)
|
449
|
+
imgs = doc.css('img')
|
450
|
+
|
451
|
+
return if imgs.nil?
|
452
|
+
|
453
|
+
exclude_imgs = /
|
454
|
+
buttonMP3\.png|
|
455
|
+
freedownloadtype\.gif|
|
456
|
+
handani\.gif|
|
457
|
+
Kryonglobe\.jpg|
|
458
|
+
MP3\-download\.jpg|
|
459
|
+
MP3\-graphic\(SM\)\.jpg|
|
460
|
+
NavMenu\_AUDIOmaster\.png|
|
461
|
+
NavMenu\_master\.png|
|
462
|
+
testimonials\.png
|
463
|
+
/ix
|
464
|
+
|
465
|
+
imgs.each do |img|
|
466
|
+
next if img.nil?
|
467
|
+
|
468
|
+
src = img['src']
|
469
|
+
|
470
|
+
next if src.nil? || src.empty?
|
471
|
+
if src =~ exclude_imgs
|
472
|
+
log.warn("Excluding image: #{src}")
|
473
|
+
next
|
474
|
+
end
|
475
|
+
|
476
|
+
pic = PicData.new()
|
477
|
+
|
478
|
+
pic.url = Util.clean_link(url,src)
|
479
|
+
pic.filename = Util.parse_url_filename(pic.url)
|
480
|
+
|
481
|
+
pic.alt = img['alt']
|
482
|
+
pic.alt = '' if Util.empty_s?(pic.alt)
|
483
|
+
pic.caption = ''
|
484
|
+
|
485
|
+
pic.name = Util.empty_s?(pic.alt) ? File.basename(pic.filename,File.extname(pic.filename)) : pic.alt
|
486
|
+
pic.updated_on = @updated_on
|
487
|
+
|
488
|
+
# Is it old?
|
489
|
+
if album.pics.key?(pic.url) && pic == album.pics[pic.url]
|
490
|
+
pic.updated_on = album.pics[pic.url].updated_on
|
491
|
+
else # New
|
492
|
+
album.updated_on = @updated_on
|
493
|
+
end
|
494
|
+
|
495
|
+
album.pics[pic.url] = pic
|
496
|
+
end
|
497
|
+
end
|
498
|
+
end
|
499
|
+
end
|