unclekryon 0.4.10 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +6 -19
- data/Gemfile.lock +19 -23
- data/README.md +3 -2
- data/Rakefile +11 -30
- data/bin/unclekryon +4 -15
- data/hax/kryon.yaml +28 -0
- data/hax/kryon_aums_2002-2005.yaml +460 -0
- data/hax/kryon_aums_2006.yaml +601 -0
- data/hax/kryon_aums_2007.yaml +1024 -0
- data/hax/kryon_aums_2008.yaml +950 -0
- data/hax/kryon_aums_2009.yaml +496 -0
- data/hax/kryon_aums_2010.yaml +1443 -0
- data/hax/kryon_aums_2011.yaml +1458 -0
- data/hax/kryon_aums_2012.yaml +2123 -0
- data/hax/kryon_aums_2013.yaml +1647 -0
- data/hax/kryon_aums_2014.yaml +2478 -0
- data/hax/kryon_aums_2015.yaml +3386 -0
- data/hax/kryon_aums_2016.yaml +3476 -0
- data/hax/kryon_aums_2017.yaml +3712 -0
- data/hax/kryon_aums_2018.yaml +3654 -0
- data/lib/unclekryon.rb +165 -165
- data/lib/unclekryon/data/album_data.rb +74 -82
- data/lib/unclekryon/data/artist_data.rb +24 -36
- data/lib/unclekryon/data/artist_data_data.rb +29 -41
- data/lib/unclekryon/data/aum_data.rb +20 -32
- data/lib/unclekryon/data/base_data.rb +27 -39
- data/lib/unclekryon/data/pic_data.rb +25 -37
- data/lib/unclekryon/data/release_data.rb +14 -26
- data/lib/unclekryon/data/social_data.rb +6 -18
- data/lib/unclekryon/data/timespan_data.rb +16 -28
- data/lib/unclekryon/dev_opts.rb +7 -19
- data/lib/unclekryon/hacker.rb +119 -133
- data/lib/unclekryon/iso.rb +128 -138
- data/lib/unclekryon/iso/base_iso.rb +69 -81
- data/lib/unclekryon/iso/can_prov_terr.rb +34 -47
- data/lib/unclekryon/iso/country.rb +36 -49
- data/lib/unclekryon/iso/language.rb +86 -96
- data/lib/unclekryon/iso/region.rb +11 -25
- data/lib/unclekryon/iso/subregion.rb +11 -25
- data/lib/unclekryon/iso/usa_state.rb +28 -41
- data/lib/unclekryon/jsoner.rb +31 -50
- data/lib/unclekryon/log.rb +34 -46
- data/lib/unclekryon/parsers/kryon_aum_year_album_parser.rb +163 -167
- data/lib/unclekryon/parsers/kryon_aum_year_parser.rb +122 -127
- data/lib/unclekryon/server.rb +8 -17
- data/lib/unclekryon/trainer.rb +69 -83
- data/lib/unclekryon/uploader.rb +8 -17
- data/lib/unclekryon/util.rb +80 -92
- data/lib/unclekryon/version.rb +4 -16
- data/train/kryon.yaml +6077 -0
- data/unclekryon.gemspec +44 -42
- metadata +59 -16
data/lib/unclekryon/server.rb
CHANGED
@@ -1,29 +1,20 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2017-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2017-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
module UncleKryon
|
27
13
|
class Server
|
14
|
+
# TODO: server
|
28
15
|
end
|
29
16
|
end
|
17
|
+
|
18
|
+
if $PROGRAM_NAME == __FILE__
|
19
|
+
# Test here.
|
20
|
+
end
|
data/lib/unclekryon/trainer.rb
CHANGED
@@ -1,28 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2017-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2017-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
require 'nbayes'
|
27
13
|
|
28
14
|
require 'unclekryon/dev_opts'
|
@@ -35,197 +21,197 @@ module UncleKryon
|
|
35
21
|
attr_accessor :max_tag_length
|
36
22
|
attr_accessor :tags
|
37
23
|
attr_accessor :trainer
|
38
|
-
|
24
|
+
|
39
25
|
def self.to_tokens(text)
|
40
26
|
tokens = []
|
41
|
-
|
42
|
-
text.split(/[[:space:]]+/).each
|
27
|
+
|
28
|
+
text.split(/[[:space:]]+/).each do |t|
|
43
29
|
t.gsub!(/[[:punct:][:cntrl:]]+/,'')
|
44
|
-
tokens.push(t) if !t.empty?
|
30
|
+
tokens.push(t) if !t.empty?
|
45
31
|
end
|
46
|
-
|
32
|
+
|
47
33
|
return tokens
|
48
34
|
end
|
49
|
-
|
35
|
+
|
50
36
|
def initialize(tags={})
|
51
37
|
@max_tag_id_length = 0
|
52
38
|
@max_tag_length = 0
|
53
39
|
@tags = tags
|
54
|
-
@trainer = NBayes::Base.new
|
55
|
-
|
56
|
-
init_lengths
|
40
|
+
@trainer = NBayes::Base.new
|
41
|
+
|
42
|
+
init_lengths
|
57
43
|
end
|
58
|
-
|
59
|
-
def init_lengths
|
44
|
+
|
45
|
+
def init_lengths
|
60
46
|
@max_tag_id_length = 0
|
61
47
|
@max_tag_length = 0
|
62
|
-
|
48
|
+
|
63
49
|
@tags.each do |id,tag|
|
64
50
|
@max_tag_id_length = id.length if id.length > @max_tag_id_length
|
65
51
|
@max_tag_length = tag.length if tag.length > @max_tag_length
|
66
52
|
end
|
67
|
-
|
53
|
+
|
68
54
|
@max_tag_id_length += 2 # Indention
|
69
55
|
@max_tag_id_length = 7 if @max_tag_id_length < 7 # For "<Enter>" option
|
70
56
|
@max_tag_length = -@max_tag_length # Left justify
|
71
57
|
end
|
72
|
-
|
58
|
+
|
73
59
|
def train(text)
|
74
|
-
guess_tag =
|
60
|
+
guess_tag = tag(text) # Try and guess
|
75
61
|
tokens = self.class.to_tokens(text)
|
76
|
-
|
62
|
+
|
77
63
|
puts '#################'
|
78
64
|
puts '# Training Tags #'
|
79
65
|
puts '#################'
|
80
|
-
|
66
|
+
|
81
67
|
tf = '%%%is = %%%is' % [@max_tag_id_length,@max_tag_length]
|
82
68
|
@tags.each do |id,tag|
|
83
69
|
puts tf % [id,tag]
|
84
70
|
end
|
85
71
|
puts "<Enter> = Guess: #{guess_tag}"
|
86
|
-
|
72
|
+
|
87
73
|
puts '-----------------'
|
88
74
|
puts text
|
89
75
|
puts '-----------------'
|
90
76
|
print 'What is it? '
|
91
|
-
|
77
|
+
|
92
78
|
# Use -t/--test option
|
93
|
-
if DevOpts.instance.test?
|
94
|
-
puts
|
79
|
+
if DevOpts.instance.test?
|
80
|
+
puts(tag_id = @tags.keys.sample) # For testing purposes
|
95
81
|
else
|
96
|
-
tag_id =
|
82
|
+
tag_id = $stdin.gets.chomp.strip # $stdin because app accepts args
|
97
83
|
end
|
98
84
|
puts
|
99
|
-
|
100
|
-
if tag_id.empty?
|
85
|
+
|
86
|
+
if tag_id.empty?
|
101
87
|
raise "Invalid guess tag[#{guess_tag}]" if !@tags.value?(guess_tag)
|
102
88
|
tag = guess_tag
|
103
89
|
else
|
104
90
|
raise "Invalid tag ID[#{tag_id}]" if !@tags.include?(tag_id)
|
105
91
|
tag = @tags[tag_id]
|
106
92
|
end
|
107
|
-
|
93
|
+
|
108
94
|
@trainer.train(tokens,tag)
|
109
|
-
|
95
|
+
|
110
96
|
return tag
|
111
97
|
end
|
112
|
-
|
98
|
+
|
113
99
|
def tag(text)
|
114
100
|
return @trainer.classify(self.class.to_tokens(text)).max_class
|
115
101
|
end
|
116
|
-
|
117
|
-
def to_s
|
102
|
+
|
103
|
+
def to_s
|
118
104
|
s = ''
|
119
|
-
s << @trainer.to_yaml
|
105
|
+
s << @trainer.to_yaml
|
120
106
|
s << "\n"
|
121
|
-
s << @trainer.data.category_stats
|
122
|
-
|
107
|
+
s << @trainer.data.category_stats
|
108
|
+
|
123
109
|
return s
|
124
110
|
end
|
125
111
|
end
|
126
|
-
|
112
|
+
|
127
113
|
class Trainers
|
128
114
|
attr_accessor :filepath
|
129
115
|
attr_accessor :trainers
|
130
|
-
|
116
|
+
|
131
117
|
def initialize(filepath=nil)
|
132
118
|
@filepath = filepath
|
133
119
|
@trainers = {}
|
134
120
|
end
|
135
|
-
|
136
|
-
def load_file
|
137
|
-
if @filepath.nil?
|
121
|
+
|
122
|
+
def load_file
|
123
|
+
if @filepath.nil? || (@filepath = @filepath.strip).empty?
|
138
124
|
raise ArgumentError,'Training filepath cannot be empty'
|
139
125
|
end
|
140
|
-
|
126
|
+
|
141
127
|
if File.exist?(@filepath)
|
142
128
|
y = YAML.load_file(@filepath)
|
143
|
-
|
144
|
-
y.each
|
129
|
+
|
130
|
+
y.each do |id,trainer|
|
145
131
|
if !@trainers.key?(id)
|
146
132
|
@trainers[id] = trainer
|
147
133
|
else
|
148
134
|
@trainers[id].tags = trainer.tags.merge(@trainers[id].tags)
|
149
135
|
@trainers[id].trainer = trainer.trainer
|
150
136
|
end
|
151
|
-
|
152
|
-
@trainers[id].trainer.reset_after_import
|
153
|
-
@trainers[id].init_lengths
|
137
|
+
|
138
|
+
@trainers[id].trainer.reset_after_import
|
139
|
+
@trainers[id].init_lengths
|
154
140
|
end
|
155
141
|
end
|
156
142
|
end
|
157
|
-
|
158
|
-
def save_to_file
|
159
|
-
if @filepath.nil?
|
143
|
+
|
144
|
+
def save_to_file
|
145
|
+
if @filepath.nil? || (@filepath = @filepath.strip).empty?
|
160
146
|
raise ArgumentError,'Training filepath cannot be empty'
|
161
147
|
end
|
162
|
-
|
148
|
+
|
163
149
|
Util.mk_dirs_from_filepath(@filepath)
|
164
|
-
|
150
|
+
|
165
151
|
File.open(@filepath,'w') do |f|
|
166
|
-
f.write(to_s
|
152
|
+
f.write(to_s)
|
167
153
|
end
|
168
154
|
end
|
169
|
-
|
155
|
+
|
170
156
|
def [](id)
|
171
157
|
@trainers[id]
|
172
158
|
end
|
173
|
-
|
159
|
+
|
174
160
|
def []=(id,trainer)
|
175
161
|
@trainers[id] = trainer
|
176
162
|
end
|
177
|
-
|
178
|
-
def to_s
|
163
|
+
|
164
|
+
def to_s
|
179
165
|
return YAML.dump(@trainers)
|
180
166
|
end
|
181
167
|
end
|
182
168
|
end
|
183
169
|
|
184
|
-
if $
|
170
|
+
if $PROGRAM_NAME == __FILE__
|
185
171
|
fp = 'test.yaml'
|
186
172
|
ts = UncleKryon::Trainers.new(fp)
|
187
|
-
|
173
|
+
|
188
174
|
ctx = ['dark black bitter',
|
189
175
|
'double espresso steamed milk foam',
|
190
176
|
'espresso steamed milk']
|
191
177
|
ttx = ['no withering and oxidation',
|
192
178
|
'broom-like, South Africa',
|
193
179
|
'young, minimal']
|
194
|
-
|
180
|
+
|
195
181
|
if File.exist?(fp)
|
196
|
-
ts.load_file
|
182
|
+
ts.load_file
|
197
183
|
puts ts
|
198
184
|
puts
|
199
|
-
|
185
|
+
|
200
186
|
puts '[Coffee]'
|
201
187
|
ctx.each do |v|
|
202
188
|
puts "'#{v}' => #{ts['coffee'].tag(v)}"
|
203
189
|
end
|
204
190
|
puts
|
205
|
-
|
191
|
+
|
206
192
|
puts '[Tea]'
|
207
193
|
ttx.each do |v|
|
208
194
|
puts "'#{v}' => #{ts['tea'].tag(v)}"
|
209
195
|
end
|
210
196
|
puts
|
211
|
-
|
197
|
+
|
212
198
|
puts 'What kind of drink would you like?'
|
213
|
-
txt =
|
199
|
+
txt = $stdin.gets.chomp.strip
|
214
200
|
puts "coffee => #{ts['coffee'].tag(txt)}"
|
215
201
|
puts "tea => #{ts['tea'].tag(txt)}"
|
216
202
|
else
|
217
203
|
ts['coffee'] = UncleKryon::Trainer.new(
|
218
|
-
{'b'=>'black','c'=>'cappuccino','l'=>'latte'})
|
204
|
+
{'b' => 'black','c' => 'cappuccino','l' => 'latte'})
|
219
205
|
ts['tea'] = UncleKryon::Trainer.new(
|
220
|
-
{'g'=>'green','r'=>'red','w'=>'white'})
|
221
|
-
|
206
|
+
{'g' => 'green','r' => 'red','w' => 'white'})
|
207
|
+
|
222
208
|
ctx.each do |v|
|
223
209
|
ts['coffee'].train(v)
|
224
210
|
end
|
225
211
|
ttx.each do |v|
|
226
212
|
ts['tea'].train(v)
|
227
213
|
end
|
228
|
-
|
229
|
-
ts.save_to_file
|
214
|
+
|
215
|
+
ts.save_to_file
|
230
216
|
end
|
231
217
|
end
|
data/lib/unclekryon/uploader.rb
CHANGED
@@ -1,29 +1,20 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2017-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2017-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
|
-
require 'bundler/setup'
|
25
|
-
|
26
12
|
module UncleKryon
|
27
13
|
class Uploader
|
14
|
+
# TODO: uploader
|
28
15
|
end
|
29
16
|
end
|
17
|
+
|
18
|
+
if $PROGRAM_NAME == __FILE__
|
19
|
+
# Test here.
|
20
|
+
end
|
data/lib/unclekryon/util.rb
CHANGED
@@ -1,30 +1,18 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of UncleKryon-server.
|
7
|
-
# Copyright (c) 2017-
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# UncleKryon-server is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU General Public License
|
20
|
-
# along with UncleKryon-server. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2017-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
12
|
+
require 'cgi'
|
24
13
|
require 'date'
|
25
14
|
require 'fileutils'
|
26
15
|
require 'uri'
|
27
|
-
|
28
16
|
require 'net/http'
|
29
17
|
|
30
18
|
require 'unclekryon/dev_opts'
|
@@ -34,71 +22,71 @@ module UncleKryon
|
|
34
22
|
module Util
|
35
23
|
DATE_FORMAT = '%F'
|
36
24
|
DATETIME_FORMAT = '%F %T'
|
37
|
-
|
25
|
+
|
38
26
|
def self.add_trail_slash(url)
|
39
27
|
#url = url + '/' if url !~ /\/\z/
|
40
28
|
#return url
|
41
|
-
|
29
|
+
|
42
30
|
return File.join(url,'')
|
43
31
|
end
|
44
|
-
|
32
|
+
|
45
33
|
def self.clean_charset(str)
|
46
34
|
return str.encode('utf-8','MacRoman',universal_newline: true) # X-MAC-ROMAN
|
47
35
|
end
|
48
|
-
|
36
|
+
|
49
37
|
def self.clean_data(str)
|
50
38
|
# Have to use "[[:space:]]" for " " and "<br/>"
|
51
39
|
# This is necessary for "<br />\s+" (see 2015 "KRYON IN LIMA, PERU (2)")
|
52
|
-
str = str.clone
|
40
|
+
str = str.clone
|
53
41
|
str.gsub!(/[[:space:]]+/,' ') # Replace all spaces with one space
|
54
|
-
str.strip!
|
42
|
+
str.strip!
|
55
43
|
return clean_charset(str)
|
56
44
|
end
|
57
|
-
|
45
|
+
|
58
46
|
def self.clean_link(url,link)
|
59
|
-
if url !~
|
47
|
+
if url !~ %r{/\z}
|
60
48
|
# Don't know if the end is a filename or a dirname, so just assume it is a filename and chop it off
|
61
49
|
url = File.dirname(url)
|
62
50
|
url = add_trail_slash(url)
|
63
51
|
end
|
64
|
-
|
52
|
+
|
65
53
|
# 1st, handle "/" (because you won't have "/../filename", which is invalid)
|
66
|
-
slash_regex =
|
67
|
-
|
54
|
+
slash_regex = %r{\A(/+\.*/*)+}
|
55
|
+
|
68
56
|
if link =~ slash_regex
|
69
57
|
link = link.gsub(slash_regex,'')
|
70
58
|
link = get_top_link(url) + link # get_top_link(...) adds a slash
|
71
|
-
|
59
|
+
|
72
60
|
return link # Already handles "../" or "./" in the regex
|
73
61
|
end
|
74
|
-
|
62
|
+
|
75
63
|
# 2nd, handle "../" (and potentially "../././/" or "..//")
|
76
64
|
# - Ignores "./" if has it
|
77
|
-
dotdot_regex =
|
65
|
+
dotdot_regex = %r{\A(\.\./)((\./)*(/)*)*} # \A (../) ( (./)* (/)* )*
|
78
66
|
num_dirs = 0 # Could be a boolean; left as int because of legacy code
|
79
|
-
|
67
|
+
|
80
68
|
while link =~ dotdot_regex
|
81
|
-
num_dirs
|
69
|
+
num_dirs += 1
|
82
70
|
link = link.gsub(dotdot_regex,'')
|
83
71
|
url = File.dirname(url)
|
84
72
|
end
|
85
|
-
|
73
|
+
|
86
74
|
if num_dirs > 0
|
87
75
|
link = add_trail_slash(url) + link
|
88
|
-
|
76
|
+
|
89
77
|
return link # Already handled "./" in the regex
|
90
78
|
end
|
91
|
-
|
79
|
+
|
92
80
|
# 3rd, handle "./"
|
93
|
-
dot_regex =
|
94
|
-
|
81
|
+
dot_regex = %r{\A(\./+)+}
|
82
|
+
|
95
83
|
if link =~ dot_regex
|
96
84
|
link = link.gsub(dot_regex,'')
|
97
85
|
link = url + link # Slash already added at top of method
|
98
|
-
|
86
|
+
|
99
87
|
return link
|
100
88
|
end
|
101
|
-
|
89
|
+
|
102
90
|
# 4th, handle no path
|
103
91
|
#if link !~ /#{get_top_link(url)}/i
|
104
92
|
if link !~ /\Ahttps?:/i
|
@@ -106,123 +94,123 @@ module UncleKryon
|
|
106
94
|
else
|
107
95
|
link = link.sub(/\Ahttp:/i,'https:')
|
108
96
|
end
|
109
|
-
|
97
|
+
|
110
98
|
return link
|
111
99
|
end
|
112
|
-
|
100
|
+
|
113
101
|
def self.empty_s?(str)
|
114
|
-
return str.nil?
|
102
|
+
return str.nil? || str.gsub(/[[:space:]]+/,'').empty?
|
115
103
|
end
|
116
|
-
|
104
|
+
|
117
105
|
def self.fix_link(url)
|
118
106
|
# If we do URI.escape(), then if it's already "%20",
|
119
107
|
# then it will convert it to "%2520"
|
120
|
-
|
108
|
+
|
121
109
|
url = url.gsub(/[[:space:]]/,'%20')
|
122
|
-
|
110
|
+
|
123
111
|
return url
|
124
112
|
end
|
125
|
-
|
113
|
+
|
126
114
|
def self.fix_shortwith_text(text)
|
127
|
-
if text =~ /
|
115
|
+
if text =~ %r{w/[[:alnum:]]}i
|
128
116
|
# I think it looks better with a space, personally.
|
129
117
|
# Some grammar guides say no space, but the Chicago style guide says there should be a space when it
|
130
118
|
# is a word by itself.
|
131
|
-
text = text.gsub(/
|
119
|
+
text = text.gsub(%r{w/}i,'w/ ')
|
132
120
|
end
|
133
|
-
|
121
|
+
|
134
122
|
return text
|
135
123
|
end
|
136
|
-
|
124
|
+
|
137
125
|
def self.format_date(date)
|
138
|
-
return date.nil?
|
126
|
+
return date.nil? ? nil : date.strftime(DATE_FORMAT)
|
139
127
|
end
|
140
|
-
|
128
|
+
|
141
129
|
def self.format_datetime(datetime)
|
142
|
-
return datetime.nil?
|
130
|
+
return datetime.nil? ? nil : datetime.strftime(DATETIME_FORMAT)
|
143
131
|
end
|
144
|
-
|
132
|
+
|
145
133
|
def self.get_top_link(url)
|
146
|
-
raise "No top link: #{url}" if DevOpts.instance.dev?
|
147
|
-
|
134
|
+
raise "No top link: #{url}" if DevOpts.instance.dev? && url !~ /\Ahttps?\:/i
|
135
|
+
|
148
136
|
http_regex = /\Ahttps?\:|\A\./i # Check '.' to prevent infinite loop
|
149
|
-
|
137
|
+
|
150
138
|
while File.basename(File.dirname(url)) !~ http_regex
|
151
|
-
url = File.dirname(url).strip
|
152
|
-
|
153
|
-
break if url == '.' || url.empty?
|
139
|
+
url = File.dirname(url).strip
|
140
|
+
|
141
|
+
break if url == '.' || url.empty?
|
154
142
|
end
|
155
|
-
|
143
|
+
|
156
144
|
return add_trail_slash(url)
|
157
145
|
end
|
158
|
-
|
146
|
+
|
159
147
|
def self.get_url_header_data(url)
|
160
148
|
uri = URI(url)
|
161
149
|
r = {}
|
162
|
-
|
150
|
+
|
163
151
|
Net::HTTP.start(uri.host,uri.port) do |http|
|
164
152
|
resp = http.request_head(uri)
|
165
|
-
r = resp.to_hash
|
153
|
+
r = resp.to_hash
|
166
154
|
end
|
167
|
-
|
155
|
+
|
168
156
|
return r
|
169
157
|
end
|
170
|
-
|
158
|
+
|
171
159
|
def self.hash_def(hash,keys,value)
|
172
160
|
v = hash
|
173
|
-
|
174
|
-
|
161
|
+
|
162
|
+
(0..keys.length - 2).each do |i|
|
175
163
|
v = v[keys[i]]
|
176
164
|
end
|
177
|
-
|
178
|
-
v[keys[keys.length-1]] = value if v[keys[keys.length-1]].nil?
|
179
|
-
return v[keys[keys.length-1]]
|
165
|
+
|
166
|
+
v[keys[keys.length - 1]] = value if v[keys[keys.length - 1]].nil?
|
167
|
+
return v[keys[keys.length - 1]]
|
180
168
|
end
|
181
|
-
|
169
|
+
|
182
170
|
def self.hash_def_all(hash,keys,value)
|
183
171
|
v = hash
|
184
|
-
|
185
|
-
|
186
|
-
if v[keys[i]].nil?
|
172
|
+
|
173
|
+
(0..keys.length - 2).each do |i|
|
174
|
+
if v[keys[i]].nil?
|
187
175
|
v[keys[i]] = {}
|
188
176
|
v = v[keys[i]]
|
189
177
|
end
|
190
178
|
end
|
191
|
-
|
192
|
-
v[keys[keys.length-1]] = value if v[keys[keys.length-1]].nil?
|
193
|
-
return v[keys[keys.length-1]]
|
179
|
+
|
180
|
+
v[keys[keys.length - 1]] = value if v[keys[keys.length - 1]].nil?
|
181
|
+
return v[keys[keys.length - 1]]
|
194
182
|
end
|
195
|
-
|
183
|
+
|
196
184
|
def self.mk_dirs_from_filepath(filepath)
|
197
185
|
dirname = File.dirname(filepath)
|
198
|
-
|
199
|
-
if !dirname.nil?
|
200
|
-
raise "Spaces around dirname: '#{dirname}'" if dirname != dirname.strip
|
201
|
-
|
186
|
+
|
187
|
+
if !dirname.nil?
|
188
|
+
raise "Spaces around dirname: '#{dirname}'" if dirname != dirname.strip
|
189
|
+
|
202
190
|
if !Dir.exist?(dirname)
|
203
191
|
Log.instance.info("Making dirs: '#{dirname}'...")
|
204
192
|
FileUtils.mkdir_p(dirname)
|
205
193
|
end
|
206
194
|
end
|
207
195
|
end
|
208
|
-
|
196
|
+
|
209
197
|
def self.parse_date_s(str)
|
210
|
-
return
|
198
|
+
return empty_s?(str) ? nil : Date.strptime(str,DATE_FORMAT)
|
211
199
|
end
|
212
|
-
|
200
|
+
|
213
201
|
def self.parse_datetime_s(str)
|
214
|
-
return
|
202
|
+
return empty_s?(str) ? nil : DateTime.strptime(str,DATETIME_FORMAT)
|
215
203
|
end
|
216
|
-
|
204
|
+
|
217
205
|
def self.parse_url_filename(url)
|
218
206
|
uri = URI.parse(url)
|
219
207
|
r = File.basename(uri.path)
|
220
|
-
r =
|
221
|
-
return r.strip
|
208
|
+
r = CGI.unescape(r)
|
209
|
+
return r.strip
|
222
210
|
end
|
223
|
-
|
211
|
+
|
224
212
|
def self.safe_max(a,b)
|
225
|
-
return a.nil?
|
213
|
+
return a.nil? ? b : (b.nil? ? a : ((a > b) ? a : b))
|
226
214
|
end
|
227
215
|
end
|
228
216
|
end
|