blackstack-core 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/examples/example01.rb +300 -0
- data/examples/example02.rb +3 -0
- data/lib/blackstack-core.rb +26 -0
- data/lib/extend_datetime.rb +6 -0
- data/lib/extend_exception.rb +11 -0
- data/lib/extend_fixnum.rb +14 -0
- data/lib/extend_float.rb +6 -0
- data/lib/extend_string.rb +143 -0
- data/lib/extend_time.rb +11 -0
- data/lib/functions.rb +839 -0
- metadata +113 -0
data/lib/functions.rb
ADDED
@@ -0,0 +1,839 @@
|
|
1
|
+
|
2
|
+
module BlackStack
|
3
|
+
|
4
|
+
# -----------------------------------------------------------------------------------------
|
5
|
+
# PRY Supporting Functions
|
6
|
+
# -----------------------------------------------------------------------------------------
|
7
|
+
module Debugging
|
8
|
+
@@allow_breakpoints = false
|
9
|
+
@@verbose = false
|
10
|
+
|
11
|
+
# return true if breakpoints are allowed
|
12
|
+
def self.allow_breakpoints
|
13
|
+
@@allow_breakpoints
|
14
|
+
end
|
15
|
+
|
16
|
+
# set breakpoints allowed if the hash contains a key :allow_breakpoints with a value of true
|
17
|
+
def self.set(h)
|
18
|
+
@@allow_breakpoints = h[:allow_breakpoints] if h[:allow_breakpoints].is_a?(TrueClass)
|
19
|
+
@@verbose = h[:verbose] if h[:verbose].is_a?(TrueClass)
|
20
|
+
|
21
|
+
if !@@allow_breakpoints
|
22
|
+
# monkey patching the pry method to not break on breakpoints
|
23
|
+
new_pry = lambda do
|
24
|
+
print "Breakpoint are not allowed" if @@verbose
|
25
|
+
end
|
26
|
+
|
27
|
+
Binding.class_eval do
|
28
|
+
alias_method :old_pry, :pry
|
29
|
+
define_method :pry, new_pry
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# -----------------------------------------------------------------------------------------
|
36
|
+
# OCRA Supporting Functions
|
37
|
+
# -----------------------------------------------------------------------------------------
|
38
|
+
module OCRA
|
39
|
+
# OCRA files run into a temp folder, where the script is unpacked.
|
40
|
+
#
|
41
|
+
# This function is useful to require a configuration file when the
|
42
|
+
# script is running inside an OCRA temp folder, since the local folder
|
43
|
+
# of the running command is not the filder where the exe file is hosted.
|
44
|
+
#
|
45
|
+
# More information:
|
46
|
+
# * https://stackoverflow.com/questions/1937743/how-to-get-the-current-working-directorys-absolute-path-from-irb
|
47
|
+
# * https://stackoverflow.com/questions/8577223/ruby-get-the-file-being-executed
|
48
|
+
# * https://stackoverflow.com/questions/7399882/ruby-getting-path-from-pathfilename/7400057
|
49
|
+
#
|
50
|
+
def self.require_in_working_path(filename, path, show_path_info=false)
|
51
|
+
puts '' if show_path_info
|
52
|
+
path = File.expand_path File.dirname(path)
|
53
|
+
#path = Dir.pwd
|
54
|
+
puts "require_in_working_path.path:#{path}:." if show_path_info
|
55
|
+
file = "#{path}/#{filename}"
|
56
|
+
puts "require_in_working_path.file:#{file}:." if show_path_info
|
57
|
+
require file
|
58
|
+
end
|
59
|
+
end # module OCRA
|
60
|
+
|
61
|
+
# -----------------------------------------------------------------------------------------
|
62
|
+
# DateTime Functions
|
63
|
+
# -----------------------------------------------------------------------------------------
|
64
|
+
module DateTime
|
65
|
+
# -----------------------------------------------------------------------------------------
|
66
|
+
# Encoding
|
67
|
+
# -----------------------------------------------------------------------------------------
|
68
|
+
module Encoding
|
69
|
+
# Convierte un objeto date-time a un string con formato sql-datetime (yyyy-mm-dd hh:mm:ss).
|
70
|
+
def self.datetime_to_sql(o)
|
71
|
+
return o.strftime("%Y-%m-%d %H:%M:%S")
|
72
|
+
end
|
73
|
+
end # module Encode
|
74
|
+
|
75
|
+
# -----------------------------------------------------------------------------------------
|
76
|
+
# Miscelaneous
|
77
|
+
# -----------------------------------------------------------------------------------------
|
78
|
+
module Misc
|
79
|
+
def self.datetime_values_check(year,month,day,hour,minute,second)
|
80
|
+
if (year.to_i<1900 || year.to_i>=2100)
|
81
|
+
return false
|
82
|
+
end
|
83
|
+
|
84
|
+
if (month.to_i<1 || month.to_i>12)
|
85
|
+
return false
|
86
|
+
end
|
87
|
+
|
88
|
+
# TODO: Considerar la cantidad de dias de cada mes, y los anios biciestos. Buscar alguna funcion existente.
|
89
|
+
if (day.to_i<1 || day.to_i>31)
|
90
|
+
return false
|
91
|
+
end
|
92
|
+
|
93
|
+
if (hour.to_i<0 || hour.to_i>23)
|
94
|
+
return false
|
95
|
+
end
|
96
|
+
|
97
|
+
if (minute.to_i<0 || minute.to_i>59)
|
98
|
+
return false
|
99
|
+
end
|
100
|
+
|
101
|
+
if (second.to_i<0 || second.to_i>59)
|
102
|
+
return false
|
103
|
+
end
|
104
|
+
|
105
|
+
return true
|
106
|
+
end # datetime_values_check
|
107
|
+
end # module Misc
|
108
|
+
end # module DateTime
|
109
|
+
|
110
|
+
# -----------------------------------------------------------------------------------------
|
111
|
+
# Numeric Functions
|
112
|
+
# -----------------------------------------------------------------------------------------
|
113
|
+
module Number
|
114
|
+
# -----------------------------------------------------------------------------------------
|
115
|
+
# Encoding
|
116
|
+
# -----------------------------------------------------------------------------------------
|
117
|
+
module Encoding
|
118
|
+
# Converts number to a string with a format like xx,xxx,xxx.xxxx
|
119
|
+
# number: it may be int or float
|
120
|
+
def self.format_with_separator(number)
|
121
|
+
whole_part, decimal_part = number.to_s.split('.')
|
122
|
+
[whole_part.gsub(/(\d)(?=\d{3}+$)/, '\1,'), decimal_part].compact.join('.')
|
123
|
+
end
|
124
|
+
|
125
|
+
# Convierte una cantidad de minutos a una leyenda legible por el usuario.
|
126
|
+
# Ejemplo: "2 days, 5 hours"
|
127
|
+
# Ejemplo: "4 hours, 30 minutes"
|
128
|
+
# Ejemplo: "3 days, 4 hour"
|
129
|
+
def self.encode_minutes(n)
|
130
|
+
# TODO: validar que n sea un entero mayor a 0
|
131
|
+
if (n<0)
|
132
|
+
return "?"
|
133
|
+
end
|
134
|
+
if (n<60)
|
135
|
+
return "#{n} minutes"
|
136
|
+
elsif (n<24*60)
|
137
|
+
return "#{(n/60).to_i} hours, #{n-60*(n/60).to_i} minutes"
|
138
|
+
else
|
139
|
+
return "#{(n/(24*60)).to_i} days, #{((n-24*60*(n/(24*60)).to_i)/60).to_i} hours"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end # module Encode
|
143
|
+
end # module Number
|
144
|
+
|
145
|
+
# -----------------------------------------------------------------------------------------
|
146
|
+
# String Functions
|
147
|
+
# -----------------------------------------------------------------------------------------
|
148
|
+
module Strings
|
149
|
+
|
150
|
+
GUID_SIZE = 36
|
151
|
+
MATCH_PASSWORD = /(?=.*[a-zA-Z])(?=.*[0-9]).{6,}/
|
152
|
+
MATCH_GUID = /{?[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]\-[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]\-[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]-[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]\-[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]}?/
|
153
|
+
MATCH_FILENAME = /[\w\-\_\.]+/
|
154
|
+
MATCH_EMAIL = /[A-Z0-9._%a-z\-]+@(?:[A-Z0-9a-z\-]+\.)+[A-Za-z]{1,25}/
|
155
|
+
MATCH_DOMAIN = /(?:[A-Z0-9a-z\-]+\.)+[A-Za-z]{2,10}/
|
156
|
+
MATCH_DATE_STANDARD = /\d{4}\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])/
|
157
|
+
MATCH_PHONE = /(?:\+\d{1,2}\s)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}/
|
158
|
+
|
159
|
+
# Note: MATCH_URL gets the URL up to '?', but it doesn't retrieves the parameters.
|
160
|
+
# Exmaple:
|
161
|
+
# https://foo.com/bar?param1=value1¶m2=value2 --> https://foo.com/bar?
|
162
|
+
# https://foo.com/bar/?param1=value1¶m2=value2 --> https://foo.com/bar/?
|
163
|
+
MATCH_URL = /(https?:\/\/)?([\da-z\.-]+)([\.\:])([\da-z]{2,6})([\/[\da-z\.\-]+]*[\da-z])(\/)?(\?)?/i
|
164
|
+
|
165
|
+
MATCH_LINKEDIN_COMPANY_URL = /(https?:\/\/)?(www\\.)?linkedin\.com\/company\//
|
166
|
+
MATCH_FIXNUM = /[0-9]+/
|
167
|
+
MATCH_CONTENT_SPINNING = /{[^}]+}/
|
168
|
+
MATCH_SPINNED_TEXT = /code me/ # TODO: define this regex for the issue #1226
|
169
|
+
|
170
|
+
# -----------------------------------------------------------------------------------------
|
171
|
+
# Fuzzy String Comparsion Functions: How similar are 2 strings that are not exactly equal.
|
172
|
+
# -----------------------------------------------------------------------------------------
|
173
|
+
module SQL
|
174
|
+
def self.string_to_sql_string(s)
|
175
|
+
#return s.force_encoding("UTF-8").gsub("'", "''").to_s
|
176
|
+
return s.gsub("'", "''").to_s
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# -----------------------------------------------------------------------------------------
|
181
|
+
# Fuzzy String Comparsion Functions: How similar are 2 strings that are not exactly equal.
|
182
|
+
# -----------------------------------------------------------------------------------------
|
183
|
+
module Comparing
|
184
|
+
# retorna 0 si los strings son iguales
|
185
|
+
# https://stackoverflow.com/questions/16323571/measure-the-distance-between-two-strings-with-ruby
|
186
|
+
def self.levenshtein_distance(s, t)
|
187
|
+
s.downcase!
|
188
|
+
t.downcase!
|
189
|
+
|
190
|
+
m = s.length
|
191
|
+
n = t.length
|
192
|
+
return m if n == 0
|
193
|
+
return n if m == 0
|
194
|
+
d = Array.new(m+1) {Array.new(n+1)}
|
195
|
+
|
196
|
+
(0..m).each {|i| d[i][0] = i}
|
197
|
+
(0..n).each {|j| d[0][j] = j}
|
198
|
+
(1..n).each do |j|
|
199
|
+
(1..m).each do |i|
|
200
|
+
d[i][j] = if s[i-1] == t[j-1] # adjust index into string
|
201
|
+
d[i-1][j-1] # no operation required
|
202
|
+
else
|
203
|
+
[ d[i-1][j]+1, # deletion
|
204
|
+
d[i][j-1]+1, # insertion
|
205
|
+
d[i-1][j-1]+1, # substitution
|
206
|
+
].min
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
210
|
+
d[m][n]
|
211
|
+
end
|
212
|
+
|
213
|
+
# retorna la cantidad de palabras con mas de 3 caracteres que se encuentran en el parametro s
|
214
|
+
def self.max_sardi_distance(s)
|
215
|
+
s.downcase!
|
216
|
+
s.gsub!(/-/,' ')
|
217
|
+
ss = s.scan(/\b([a-z]+)\b/)
|
218
|
+
n = 0
|
219
|
+
ss.each { |x|
|
220
|
+
x = x[0]
|
221
|
+
if (x.size > 3) # para evitar keywords triviales como 'and'
|
222
|
+
n += 1
|
223
|
+
end
|
224
|
+
}
|
225
|
+
n
|
226
|
+
end
|
227
|
+
|
228
|
+
# retorna la cantidad de palabras con mas de 3 caracteres del parametro s que se encuentran en el parametro t
|
229
|
+
def self.sardi_distance(s, t)
|
230
|
+
s.downcase!
|
231
|
+
t.downcase!
|
232
|
+
s.gsub!(/-/,' ')
|
233
|
+
t.gsub!(/-/,' ')
|
234
|
+
max_distance = max_sardi_distance(s)
|
235
|
+
ss = s.scan(/\b([a-z]+)\b/)
|
236
|
+
tt = t.scan(/\b([a-z]+)\b/)
|
237
|
+
n = 0
|
238
|
+
ss.each { |x|
|
239
|
+
x = x[0]
|
240
|
+
if (x.size > 3) # para evitar keywords triviales como 'and'
|
241
|
+
if ( tt.select { |y| y[0] == x }.size > 0 )
|
242
|
+
n += 1
|
243
|
+
end
|
244
|
+
end
|
245
|
+
}
|
246
|
+
return max_distance - n
|
247
|
+
end
|
248
|
+
end # module Comparing
|
249
|
+
|
250
|
+
# -----------------------------------------------------------------------------------------
|
251
|
+
# Encoding: Make a string nice to be shown into an HTML string.
|
252
|
+
# -----------------------------------------------------------------------------------------
|
253
|
+
module Encoding
|
254
|
+
# Then it makes it compatible with UTF-8.
|
255
|
+
# More details here: https://bitbucket.org/leandro_sardi/blackstack/issues/961
|
256
|
+
def self.encode_string(s)
|
257
|
+
s.encode("UTF-8")
|
258
|
+
end
|
259
|
+
|
260
|
+
# Escape the string to be shown into an HTML screen.
|
261
|
+
# Then it makes it compatible with UTF-8.
|
262
|
+
# More details here: https://bitbucket.org/leandro_sardi/blackstack/issues/961
|
263
|
+
def self.encode_html(s)
|
264
|
+
encode_string(CGI.escapeHTML(s.to_s))
|
265
|
+
end
|
266
|
+
|
267
|
+
# Generates a description string from an exception object.
|
268
|
+
# Eescapes the string to be shown into an HTML screen.
|
269
|
+
# Makes it compatible with UTF-8.
|
270
|
+
# More details here: https://bitbucket.org/leandro_sardi/blackstack/issues/961
|
271
|
+
def self.encode_exception(e, include_backtrace=true)
|
272
|
+
ret = encode_html(e.to_s)
|
273
|
+
if (include_backtrace == true)
|
274
|
+
e.backtrace.each { |s|
|
275
|
+
ret += "<br/>" + encode_html(s)
|
276
|
+
} # e.backtrace.each
|
277
|
+
end # if
|
278
|
+
ret
|
279
|
+
end
|
280
|
+
|
281
|
+
# Returns a string with a description of a period of time, to be shown in the screen.
|
282
|
+
# period: it may be 'H', 'D', 'W', 'M', 'Y'
|
283
|
+
# units: it is a positive integer
|
284
|
+
def self.encode_period(period, units)
|
285
|
+
s = "Last "
|
286
|
+
s += units.to_i.to_s + " " if units.to_i > 1
|
287
|
+
s += "Hours" if period.upcase == "H" && units.to_i != 1
|
288
|
+
s += "Days" if period.upcase == "D" && units.to_i != 1
|
289
|
+
s += "Weeks" if period.upcase == "W" && units.to_i != 1
|
290
|
+
s += "Months" if period.upcase == "M" && units.to_i != 1
|
291
|
+
s += "Years" if period.upcase == "Y" && units.to_i != 1
|
292
|
+
s += "Hour" if period.upcase == "H" && units.to_i == 1
|
293
|
+
s += "Day" if period.upcase == "D" && units.to_i == 1
|
294
|
+
s += "Week" if period.upcase == "W" && units.to_i == 1
|
295
|
+
s += "Month" if period.upcase == "M" && units.to_i == 1
|
296
|
+
s += "Year" if period.upcase == "Y" && units.to_i == 1
|
297
|
+
s
|
298
|
+
end
|
299
|
+
|
300
|
+
#
|
301
|
+
def self.encode_guid(s)
|
302
|
+
return s.gsub('{',"").gsub('}',"").upcase
|
303
|
+
end
|
304
|
+
|
305
|
+
#
|
306
|
+
def self.encode_javascript(s)
|
307
|
+
s.to_s.gsub("'", "\\\\'").gsub("\r", "' + String.fromCharCode(13) + '").gsub("\n", "' + String.fromCharCode(10) + '")
|
308
|
+
end
|
309
|
+
|
310
|
+
end # module Encoding
|
311
|
+
|
312
|
+
# -----------------------------------------------------------------------------------------
|
313
|
+
# DateTime
|
314
|
+
# -----------------------------------------------------------------------------------------
|
315
|
+
module DateTime
|
316
|
+
# Check the string has the format yyyymmddhhmmss.
|
317
|
+
# => Return true if success. Otherwise, return false.
|
318
|
+
# => Year cannot be lower than 1900.
|
319
|
+
# => Year cannot be higher or equal than 2100.
|
320
|
+
def self.datetime_api_check(s)
|
321
|
+
return false if (s.size!=14)
|
322
|
+
year = s[0..3]
|
323
|
+
month = s[4..5]
|
324
|
+
day = s[6..7]
|
325
|
+
hour = s[8..9]
|
326
|
+
minute = s[10..11]
|
327
|
+
second = s[12..13]
|
328
|
+
BlackStack::DateTime::Misc::datetime_values_check(year,month,day,hour,minute,second)
|
329
|
+
end # def datetime_api_check
|
330
|
+
|
331
|
+
# Check the string has the format yyyy-mm-dd hh:mm:ss.
|
332
|
+
# => Return true if success. Otherwise, return false.
|
333
|
+
# => Year cannot be lower than 1900.
|
334
|
+
# => Year cannot be higher or equal than 2100.
|
335
|
+
def self.datetime_sql_check(s)
|
336
|
+
return false if (s.size!=19)
|
337
|
+
year = s[0..3]
|
338
|
+
month = s[5..6]
|
339
|
+
day = s[8..9]
|
340
|
+
hour = s[11..12]
|
341
|
+
minute = s[14..15]
|
342
|
+
second = s[17..18]
|
343
|
+
BlackStack::DateTime::Misc::datetime_values_check(year,month,day,hour,minute,second)
|
344
|
+
end # def datetime_sql_check
|
345
|
+
|
346
|
+
# Convierte un string con formato api-datatime (yyyymmddhhmmss) a un string con formato sql-datetime (yyyy-mm-dd hh:mm:ss).
|
347
|
+
def self.datetime_api_to_sql(s)
|
348
|
+
raise "Wrong Api DataTime Format." if (datetime_api_check(s)==false)
|
349
|
+
year = s[0..3]
|
350
|
+
month = s[4..5]
|
351
|
+
day = s[6..7]
|
352
|
+
hour = s[8..9]
|
353
|
+
minute = s[10..11]
|
354
|
+
second = s[12..13]
|
355
|
+
ret = "#{year}-#{month}-#{day} #{hour}:#{minute}:#{second}"
|
356
|
+
return ret
|
357
|
+
end # def datetime_api_to_sql
|
358
|
+
|
359
|
+
# Convierte un string con formato sql-datatime a un string con formato sql-datetime.
|
360
|
+
def self.datetime_sql_to_api(s)
|
361
|
+
raise "Wrong SQL DataTime Format." if (datetime_sql_check(s)==false)
|
362
|
+
year = s[0..3]
|
363
|
+
month = s[5..6]
|
364
|
+
day = s[8..9]
|
365
|
+
hour = s[11..12]
|
366
|
+
minute = s[14..15]
|
367
|
+
second = s[17..18]
|
368
|
+
ret = "#{year}#{month}#{day}#{hour}#{minute}#{second}"
|
369
|
+
return ret
|
370
|
+
end # def datetime_sql_to_api
|
371
|
+
end # module DateTime
|
372
|
+
|
373
|
+
|
374
|
+
# -----------------------------------------------------------------------------------------
|
375
|
+
# Spinning
|
376
|
+
# -----------------------------------------------------------------------------------------
|
377
|
+
module Spinning
|
378
|
+
# Esta funcion retorna una variacion al azar del texto que se pasa.
|
379
|
+
# Esta funcion se ocupa de dividir el texto en partes, para eviar el error "too big to product" que arroja la libraría.
|
380
|
+
def self.random_spinning_variation(text)
|
381
|
+
ret = text
|
382
|
+
|
383
|
+
text.scan(MATCH_CONTENT_SPINNING).each { |s|
|
384
|
+
a = ContentSpinning.new(s).spin
|
385
|
+
rep = a[rand(a.size)]
|
386
|
+
ret = ret.gsub(s, rep)
|
387
|
+
a = nil
|
388
|
+
}
|
389
|
+
|
390
|
+
return ret
|
391
|
+
end
|
392
|
+
|
393
|
+
# retorna true si la sintaxis del texto spineado es correcta
|
394
|
+
# caso contrario retorna false
|
395
|
+
# no soporta spinnings anidados. ejemplo: {my|our|{a car of mine}}
|
396
|
+
def self.valid_spinning_syntax?(s)
|
397
|
+
# valido que exste
|
398
|
+
n = 0
|
399
|
+
s.split('').each { |c|
|
400
|
+
n+=1 if c=='{'
|
401
|
+
n-=1 if c=='}'
|
402
|
+
if n!=0 && n!=1
|
403
|
+
#raise "Closing spining char '}' with not previous opening spining char '{'." if n<0
|
404
|
+
#raise "Opening spining char '{' inside another spining block." if n>1
|
405
|
+
return false if n<0 # Closing spining char '}' with not previous opening spining char '{'.
|
406
|
+
return false if n>1 # Opening spining char '{' inside another spining block.
|
407
|
+
end
|
408
|
+
}
|
409
|
+
|
410
|
+
return false if n!=0
|
411
|
+
|
412
|
+
# obtengo cada uno de los spinnings
|
413
|
+
s.scan(MATCH_CONTENT_SPINNING).each { |x|
|
414
|
+
a = x.split('|')
|
415
|
+
raise "No variations delimited by '|' inside spinning block." if a.size <= 1
|
416
|
+
}
|
417
|
+
|
418
|
+
true
|
419
|
+
end
|
420
|
+
|
421
|
+
# returns true if the text is spinned.
|
422
|
+
# otherwise, returns false.
|
423
|
+
def self.spintax?(s)
|
424
|
+
s.scan(MATCH_CONTENT_SPINNING).size > 0
|
425
|
+
end
|
426
|
+
end # module Spinning
|
427
|
+
|
428
|
+
|
429
|
+
# -----------------------------------------------------------------------------------------
|
430
|
+
# Miscelaneus
|
431
|
+
# -----------------------------------------------------------------------------------------
|
432
|
+
module Misc
|
433
|
+
# make a Ruby string safe for a filesystem.
|
434
|
+
# References:
|
435
|
+
# => https://stackoverflow.com/questions/1939333/how-to-make-a-ruby-string-safe-for-a-filesystem
|
436
|
+
# => http://devblog.muziboo.com/2008/06/17/attachment-fu-sanitize-filename-regex-and-unicode-gotcha/
|
437
|
+
def self.sanitize_filename(filename)
|
438
|
+
ret = filename.strip do |name|
|
439
|
+
# NOTE: File.basename doesn't work right with Windows paths on Unix
|
440
|
+
# get only the filename, not the whole path
|
441
|
+
name.gsub!(/^.*(\\|\/)/, '')
|
442
|
+
|
443
|
+
# Strip out the non-ascii character
|
444
|
+
name.gsub!(/[^0-9A-Za-z.\-]/, '_')
|
445
|
+
end
|
446
|
+
return ret
|
447
|
+
end
|
448
|
+
end # module Misc
|
449
|
+
|
450
|
+
|
451
|
+
# -----------------------------------------------------------------------------------------
|
452
|
+
# Email Appending Functions
|
453
|
+
# -----------------------------------------------------------------------------------------
|
454
|
+
module Appending
|
455
|
+
APPEND_PATTERN_FNAME_DOT_LNAME = 0
|
456
|
+
APPEND_PATTERN_FNAME = 1
|
457
|
+
APPEND_PATTERN_LNAME = 2
|
458
|
+
APPEND_PATTERN_F_LNAME = 3
|
459
|
+
APPEND_PATTERN_F_DOT_LNAME = 4
|
460
|
+
|
461
|
+
#
|
462
|
+
def self.name_pattern(pattern, fname, lname)
|
463
|
+
if (pattern==APPEND_PATTERN_FNAME_DOT_LNAME)
|
464
|
+
return "#{fname}.#{lname}"
|
465
|
+
elsif (pattern==APPEND_PATTERN_FNAME)
|
466
|
+
return "#{fname}"
|
467
|
+
elsif (pattern==APPEND_PATTERN_LNAME)
|
468
|
+
return "#{lname}"
|
469
|
+
elsif (pattern==APPEND_PATTERN_F_LNAME)
|
470
|
+
return "#{fname[0]}#{lname}"
|
471
|
+
elsif (pattern==APPEND_PATTERN_F_DOT_LNAME)
|
472
|
+
return "#{fname[0]}.#{lname}"
|
473
|
+
else
|
474
|
+
raise "getNamePattern: Unknown pattern code."
|
475
|
+
end
|
476
|
+
end
|
477
|
+
|
478
|
+
#
|
479
|
+
def self.get_email_variations(first_name, last_name, domain, is_a_big_company)
|
480
|
+
variations = Array.new
|
481
|
+
variations << first_name + "." + last_name + "@" + domain
|
482
|
+
variations << first_name[0] + last_name + "@" + domain
|
483
|
+
variations << first_name + "_" + last_name + "@" + domain
|
484
|
+
variations << first_name[0] + "." + last_name + "@" + domain
|
485
|
+
if (is_a_big_company == false)
|
486
|
+
variations << last_name + "@" + domain
|
487
|
+
variations << first_name + "@" + domain
|
488
|
+
end
|
489
|
+
#variations << first_name + "." + last_name + "@" + domain
|
490
|
+
#variations << first_name + "_" + last_name + "@" + domain
|
491
|
+
#variations << last_name + "." + first_name + "@" + domain
|
492
|
+
#variations << last_name + "_" + first_name + "@" + domain
|
493
|
+
#variations << first_name[0] + "." + last_name + "@" + domain
|
494
|
+
#variations << first_name + "." + last_name[0] + "@" + domain
|
495
|
+
#variations << last_name[0] + "." + first_name + "@" + domain
|
496
|
+
#variations << last_name + "." + first_name[0] + "@" + domain
|
497
|
+
#variations << first_name[0] + last_name + "@" + domain
|
498
|
+
#variations << first_name + last_name[0] + "@" + domain
|
499
|
+
#variations << last_name[0] + first_name + "@" + domain
|
500
|
+
#variations << last_name + first_name[0] + "@" + domain
|
501
|
+
#variations << first_name + "@" + domain
|
502
|
+
#variations << last_name + "@" + domain
|
503
|
+
return variations
|
504
|
+
end
|
505
|
+
end # module Appending
|
506
|
+
end # module String
|
507
|
+
|
508
|
+
# -----------------------------------------------------------------------------------------
|
509
|
+
# Network
|
510
|
+
# -----------------------------------------------------------------------------------------
|
511
|
+
module Netting
|
512
|
+
CALL_METHOD_GET = 'get'
|
513
|
+
CALL_METHOD_POST = 'post'
|
514
|
+
DEFAULT_SSL_VERIFY_MODE = OpenSSL::SSL::VERIFY_NONE
|
515
|
+
SUCCESS = 'success'
|
516
|
+
|
517
|
+
@@lockfiles = []
|
518
|
+
|
519
|
+
@@max_api_call_channels = 0 # 0 means infinite
|
520
|
+
|
521
|
+
def self.max_api_call_channels()
|
522
|
+
@@max_api_call_channels
|
523
|
+
end
|
524
|
+
|
525
|
+
def self.lockfiles()
|
526
|
+
@@lockfiles
|
527
|
+
end
|
528
|
+
|
529
|
+
def self.set(h)
|
530
|
+
@@max_api_call_channels = h[:max_api_call_channels]
|
531
|
+
@@lockfiles = []
|
532
|
+
|
533
|
+
i = 0
|
534
|
+
while i<@@max_api_call_channels
|
535
|
+
@@lockfiles << File.open("./apicall.channel_#{i.to_s}.lock", "w")
|
536
|
+
i+=1
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
540
|
+
|
541
|
+
class ApiCallException < StandardError
|
542
|
+
attr_accessor :description
|
543
|
+
|
544
|
+
def initialize(s)
|
545
|
+
self.description = s
|
546
|
+
end
|
547
|
+
|
548
|
+
def to_s
|
549
|
+
self.description
|
550
|
+
end
|
551
|
+
end
|
552
|
+
|
553
|
+
# New call_get
|
554
|
+
def self.call_get(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true)
|
555
|
+
uri = URI(url)
|
556
|
+
uri.query = URI.encode_www_form(params)
|
557
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
|
558
|
+
req = Net::HTTP::Get.new uri
|
559
|
+
#req.body = body if !body.nil?
|
560
|
+
res = http.request req
|
561
|
+
case res
|
562
|
+
when Net::HTTPSuccess then res
|
563
|
+
when Net::HTTPRedirection then BlackStack::Netting::call_get(URI(res['location']), params, false) if support_redirections
|
564
|
+
else
|
565
|
+
res.error!
|
566
|
+
end
|
567
|
+
end
|
568
|
+
end
|
569
|
+
|
570
|
+
# Call the API and return th result.
|
571
|
+
# url: valid internet address
|
572
|
+
# params: hash of params to attach in the call
|
573
|
+
# ssl_verify_mode: you can disabele SSL verification here.
|
574
|
+
# max_channels: this method use lockfiles to prevent an excesive number of API calls from each datacenter. There is not allowed more simultaneous calls than max_channels.
|
575
|
+
# TODO: setup max_simultaneus_calls in the configurtion file.
|
576
|
+
# TODO: parameter support_redirections has been deprecated.
|
577
|
+
def self.call_post(url, params = {}, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, support_redirections=true)
|
578
|
+
=begin
|
579
|
+
# build the lockfile name
|
580
|
+
x = 0
|
581
|
+
if BlackStack::Netting.max_api_call_channels.to_i > 0
|
582
|
+
raise "Max Channels cannot be higher than #{BlackStack::Netting.lockfiles.size.to_s}" if BlackStack::Netting.max_api_call_channels > BlackStack::Netting.lockfiles.size
|
583
|
+
x = rand(BlackStack::Netting.max_api_call_channels)
|
584
|
+
# lock the file
|
585
|
+
BlackStack::Netting.lockfiles[x].flock(File::LOCK_EX) if use_lockfile
|
586
|
+
end
|
587
|
+
=end
|
588
|
+
begin
|
589
|
+
#puts
|
590
|
+
#puts "call_post:#{url}:."
|
591
|
+
# do the call
|
592
|
+
uri = URI(url)
|
593
|
+
ret = Net::HTTP.start(uri.host, uri.port, :use_ssl => uri.scheme == 'https', :verify_mode => ssl_verify_mode) do |http|
|
594
|
+
req = Net::HTTP::Post.new(uri)
|
595
|
+
req['Content-Type'] = 'application/json'
|
596
|
+
req.set_form_data(params)
|
597
|
+
#req.body = body if !body.nil?
|
598
|
+
res = http.request req
|
599
|
+
case res
|
600
|
+
when Net::HTTPSuccess then res
|
601
|
+
#when Net::HTTPRedirection then BlackStack::Netting::call_post(URI(res['location']), params, BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, false) if support_redirections
|
602
|
+
else
|
603
|
+
res.error!
|
604
|
+
end
|
605
|
+
end
|
606
|
+
|
607
|
+
# release the file
|
608
|
+
# BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
|
609
|
+
rescue => e
|
610
|
+
# release the file
|
611
|
+
# BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
|
612
|
+
|
613
|
+
# elevo la excepcion
|
614
|
+
raise e
|
615
|
+
ensure
|
616
|
+
# release the file
|
617
|
+
# BlackStack::Netting.lockfiles[x].flock(File::LOCK_UN) if use_lockfile && BlackStack::Netting.max_api_call_channels.to_i > 0
|
618
|
+
end
|
619
|
+
|
620
|
+
# return
|
621
|
+
ret
|
622
|
+
end
|
623
|
+
|
624
|
+
#
|
625
|
+
def self.api_call(url, params={}, method=BlackStack::Netting::CALL_METHOD_POST, ssl_verify_mode=BlackStack::Netting::DEFAULT_SSL_VERIFY_MODE, max_retries=5)
|
626
|
+
nTries = 0
|
627
|
+
bSuccess = false
|
628
|
+
parsed = nil
|
629
|
+
sError = ""
|
630
|
+
while (nTries < max_retries && bSuccess == false)
|
631
|
+
begin
|
632
|
+
nTries = nTries + 1
|
633
|
+
uri = URI(url)
|
634
|
+
res = BlackStack::Netting::call_post(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_POST
|
635
|
+
res = BlackStack::Netting::call_get(uri, params, ssl_verify_mode) if method==BlackStack::Netting::CALL_METHOD_GET
|
636
|
+
parsed = JSON.parse(res.body)
|
637
|
+
if (parsed['status']==BlackStack::Netting::SUCCESS)
|
638
|
+
bSuccess = true
|
639
|
+
else
|
640
|
+
sError = "Status: #{parsed['status'].to_s}. Description: #{parsed['value'].to_s}."
|
641
|
+
end
|
642
|
+
rescue Errno::ECONNREFUSED => e
|
643
|
+
sError = "Errno::ECONNREFUSED:" + e.to_console
|
644
|
+
rescue => e2
|
645
|
+
sError = "Exception:" + e2.to_console
|
646
|
+
end
|
647
|
+
end # while
|
648
|
+
|
649
|
+
if (bSuccess==false)
|
650
|
+
raise "#{sError}"
|
651
|
+
end
|
652
|
+
end # apicall
|
653
|
+
|
654
|
+
# Download a file from an url to a local folder.
|
655
|
+
# url: must be somedomain.net instead of somedomain.net/, otherwise, it will throw exception.
|
656
|
+
# to: must be a valid path to a folder.
|
657
|
+
def self.download(url, to)
|
658
|
+
uri = URI(url)
|
659
|
+
domain = uri.host.start_with?('www.') ? uri.host[4..-1] : uri.host
|
660
|
+
path = uri.path
|
661
|
+
filename = path.split("/").last
|
662
|
+
Net::HTTP.start(domain) do |http|
|
663
|
+
resp = http.get(path)
|
664
|
+
open(to, "wb") do |file|
|
665
|
+
file.write(resp.body)
|
666
|
+
end
|
667
|
+
end
|
668
|
+
end
|
669
|
+
|
670
|
+
# Return the extension of the last path into an URL.
|
671
|
+
# Example: get_url_extension("http://connect.data.com/sitemap_index.xml?foo_param=foo_value") => ".xml"
|
672
|
+
def self.get_url_extension(url)
|
673
|
+
return File.extname(URI.parse(url).path.to_s)
|
674
|
+
end
|
675
|
+
|
676
|
+
# Removes the 'www.' from an URL.
|
677
|
+
def self.get_host_without_www(url)
|
678
|
+
url = "http://#{url}" if URI.parse(url).scheme.nil?
|
679
|
+
host = URI.parse(url).host.downcase
|
680
|
+
host.start_with?('www.') ? host[4..-1] : host
|
681
|
+
end
|
682
|
+
|
683
|
+
# Get the final URL if a web page is redirecting.
|
684
|
+
def self.get_redirect(url)
|
685
|
+
uri = URI.parse(url)
|
686
|
+
protocol = uri.scheme
|
687
|
+
host = uri.host.downcase
|
688
|
+
res = Net::HTTP.get_response(uri)
|
689
|
+
"#{protocol}://#{host}#{res['location']}"
|
690
|
+
end
|
691
|
+
|
692
|
+
# returns the age in days of the given file
|
693
|
+
def self.file_age(filename)
|
694
|
+
(Time.now - File.ctime(filename))/(24*3600)
|
695
|
+
end
|
696
|
+
|
697
|
+
|
698
|
+
# TODO: Is not guaranteed this function works with 100% of the redirect-urls. This problem requires analysis and development of a general purpose algorith
|
699
|
+
# This function gets the final url from a redirect url.
|
700
|
+
# Not all the redirect-urls works the same way.
|
701
|
+
# Below are 3 examples. Each one works with 1 of the 2 strategies applied by this funcion.
|
702
|
+
# => url = "https://www.google.com.ar/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CB0QFjAAahUKEwjCg8zMsNvGAhXMMj4KHWBfA50&url=https%3A%2F%2Fwww.linkedin.com%2Fpub%2Fdavid-bell%2F5%2F76a%2F12&ei=IGalVcLzFMzl-AHgvo3oCQ&usg=AFQjCNGMbF2vRIOWsRjF-bjjoG6Nl1wg_g&sig2=ZP6ZbZxpmTHw82rIP7YYew&bvm=bv.97653015,d.cWw"
|
703
|
+
# => url = "https://www.google.com.ar/url?q=https://www.linkedin.com/pub/mark-greene/2/bb8/b59&sa=U&ved=0CDoQFjAIahUKEwiqivi5sdvGAhWJg5AKHSzkB5o&usg=AFQjCNGE09H9hf92mfvwPVnComssDjBBCw"
|
704
|
+
# If the url is not a redirect-url, this function returns the same url.
|
705
|
+
=begin
|
706
|
+
def get_redirect(url)
|
707
|
+
begin
|
708
|
+
res = nil
|
709
|
+
httpc = HTTPClient.new
|
710
|
+
resp = httpc.get(url)
|
711
|
+
res = resp.header['Location']
|
712
|
+
|
713
|
+
if res.size == 0
|
714
|
+
uri = URI.parse(url)
|
715
|
+
uri_params = CGI.parse(uri.query)
|
716
|
+
redirected_url = uri_params['url'][0]
|
717
|
+
|
718
|
+
if ( redirected_url != nil )
|
719
|
+
res = redirected_url
|
720
|
+
else
|
721
|
+
res = url
|
722
|
+
end
|
723
|
+
else
|
724
|
+
res = res[0]
|
725
|
+
end
|
726
|
+
rescue
|
727
|
+
res = url
|
728
|
+
end
|
729
|
+
return res
|
730
|
+
end
|
731
|
+
=end
|
732
|
+
# returns a hash with the parametes in the url
|
733
|
+
def self.params(url)
|
734
|
+
# TODO: Corregir este parche:
|
735
|
+
# => El codigo de abajo usa la URL de una busqueda en google. Esta url generara una excepcion cuando se intenta parsear sus parametros.
|
736
|
+
# => Ejecutar las 2 lineas de abajo para verificar.
|
737
|
+
# => url = "https://www.google.com/webhp#q=[lead+generation]+%22John%22+%22Greater+New+York+City+Area+*+Financial+Services%22+site:linkedin.com%2Fpub+-site:linkedin.com%2Fpub%2Fdir"
|
738
|
+
# => p = CGI::parse(URI.parse(url).query)
|
739
|
+
# => La linea de abajo hace un gsbub que hace que esta url siga funcionando como busqueda de google, y ademas se posible parsearla.
|
740
|
+
url = url.gsub("webhp#q=", "webhp?q=")
|
741
|
+
|
742
|
+
return CGI::parse(URI.parse(url).query)
|
743
|
+
end
|
744
|
+
|
745
|
+
# Add a parameter to the url. It doesn't validate if the param already exists.
|
746
|
+
def self.add_param(url, param_name, param_value)
|
747
|
+
uri = URI(url)
|
748
|
+
params = URI.decode_www_form(uri.query || '')
|
749
|
+
|
750
|
+
if (params.size==0)
|
751
|
+
params << [param_name, param_value]
|
752
|
+
uri.query = URI.encode_www_form(params)
|
753
|
+
return uri.to_s
|
754
|
+
else
|
755
|
+
uri.query = URI.encode_www_form(params)
|
756
|
+
return uri.to_s + "&" + param_name + "=" + param_value
|
757
|
+
end
|
758
|
+
end
|
759
|
+
|
760
|
+
# Changes the value of a parameter in the url. It doesn't validate if the param already exists.
|
761
|
+
def self.change_param(url, param_name, param_value)
|
762
|
+
uri = URI(url)
|
763
|
+
# params = URI.decode_www_form(uri.query || [])
|
764
|
+
params = CGI.parse(uri.query)
|
765
|
+
params["start"] = param_value
|
766
|
+
uri.query = URI.encode_www_form(params)
|
767
|
+
uri.to_s
|
768
|
+
end
|
769
|
+
|
770
|
+
# Change or add the value of a parameter in the url, depending if the parameter already exists or not.
|
771
|
+
def self.set_param(url, param_name, param_value)
|
772
|
+
params = BlackStack::Netting::params(url)
|
773
|
+
if ( params.has_key?(param_name) == true )
|
774
|
+
newurl = BlackStack::Netting::change_param(url, param_name, param_value)
|
775
|
+
else
|
776
|
+
newurl = BlackStack::Netting::add_param(url, param_name, param_value)
|
777
|
+
end
|
778
|
+
return newurl
|
779
|
+
end
|
780
|
+
|
781
|
+
# get the domain from any url
|
782
|
+
def self.getDomainFromUrl(url)
|
783
|
+
if (url !~ /^http:\/\//i && url !~ /^https:\/\//i)
|
784
|
+
url = "http://#{url}"
|
785
|
+
end
|
786
|
+
|
787
|
+
if (URI.parse(url).host == nil)
|
788
|
+
raise "Cannot get domain for #{url}"
|
789
|
+
end
|
790
|
+
|
791
|
+
if (url.to_s.length>0)
|
792
|
+
return URI.parse(url).host.sub(/^www\./, '')
|
793
|
+
else
|
794
|
+
return nil
|
795
|
+
end
|
796
|
+
end
|
797
|
+
|
798
|
+
def self.getDomainFromEmail(email)
|
799
|
+
if email.email?
|
800
|
+
return email.split("@").last
|
801
|
+
else
|
802
|
+
raise "getDomainFromEmail: Wrong email format."
|
803
|
+
end
|
804
|
+
end
|
805
|
+
|
806
|
+
def self.getWhoisDomains(domain, allow_heuristic_to_avoid_hosting_companies=false)
|
807
|
+
a = Array.new
|
808
|
+
c = Whois::Client.new
|
809
|
+
r = c.lookup(domain)
|
810
|
+
|
811
|
+
res = r.to_s.scan(/Registrant Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
|
812
|
+
if (res!=nil)
|
813
|
+
a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
|
814
|
+
end
|
815
|
+
|
816
|
+
res = r.to_s.scan(/Admin Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
|
817
|
+
if (res!=nil)
|
818
|
+
a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
|
819
|
+
end
|
820
|
+
|
821
|
+
res = r.to_s.scan(/Tech Email: (#{BlackStack::Strings::MATCH_EMAIL})/).first
|
822
|
+
if (res!=nil)
|
823
|
+
a << BlackStack::Netting::getDomainFromEmail(res[0].downcase)
|
824
|
+
end
|
825
|
+
|
826
|
+
# remover duplicados
|
827
|
+
a = a.uniq
|
828
|
+
|
829
|
+
#
|
830
|
+
if (allow_heuristic_to_avoid_hosting_companies==true)
|
831
|
+
# TODO: develop this feature
|
832
|
+
end
|
833
|
+
|
834
|
+
return a
|
835
|
+
end
|
836
|
+
|
837
|
+
end # module Netting
|
838
|
+
|
839
|
+
end # module BlackStack
|