web-utils 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +18 -0
- data/lib/web_utils.rb +61 -14
- data/test/test_web_utils.rb +38 -0
- data/web-utils.gemspec +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9092bbc88a4edc5b36139415b3ab2696b94a2cf1
|
4
|
+
data.tar.gz: ef75ff41c727b948930fd406b11c9c4ed568c2e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ccb2d2015f992091d2ca80dddb580820789ecd946944e46c683bec2324db658182d57f19821b1f3550cab62d3c98b1b4475fb06b97f8e1dc5c9d6e85756d1a1d
|
7
|
+
data.tar.gz: 24e85f2f6d86858861d9b6d872abee962c1ac39e431e264ed41adbbfe820731f3073a5fd4e1afe3f787bc02f605e999a7f17f1a7073accad9df73909a9e6b247
|
data/README.md
CHANGED
@@ -238,5 +238,23 @@ You basically pass the `Request` object to the method and it
|
|
238
238
|
looks at the referrer and returns true if it was not on the same
|
239
239
|
domain. Essentially tells you if the visitor just arrived.
|
240
240
|
|
241
|
+
`beeing_crawled?(request)`
|
242
|
+
--------------------------
|
243
|
+
|
244
|
+
While this method is useful is only checks the presence of
|
245
|
+
these words `/bot|crawl|slurp|spider/i` to determine if the user
|
246
|
+
agent is a crawler or not. So it is pretty weak. If you have a
|
247
|
+
better way, please make a pull request.
|
248
|
+
|
249
|
+
`h(text)`
|
250
|
+
---------
|
251
|
+
|
252
|
+
Just the usual shortcut for `Rack::Utils.escape_html`.
|
253
|
+
|
254
|
+
`u(text)`
|
255
|
+
---------
|
256
|
+
|
257
|
+
Just the usual shortcut for `Rack::Utils.escape` for escaping
|
258
|
+
what you want to put in a URL.
|
241
259
|
|
242
260
|
|
data/lib/web_utils.rb
CHANGED
@@ -106,12 +106,17 @@ module WebUtils
|
|
106
106
|
end
|
107
107
|
module_function :ensure_key
|
108
108
|
|
109
|
-
|
109
|
+
ACCENTS =
|
110
110
|
"ÀÁÂÃÄÅàáâãäåĀāĂ㥹ÇçĆćĈĉĊċČčÐðĎďĐđÈÉÊËèéêëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħÌÍÎÏìíîïĨĩĪīĬĭĮįİıĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓÔÕÖØòóôõöøŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞ"
|
111
|
-
|
111
|
+
WITHOUT_ACCENTS =
|
112
112
|
"AAAAAAaaaaaaAaAaAaCcCcCcCcCcDdDdDdEEEEeeeeEeEeEeEeEeGgGgGgGgHhHhIIIIiiiiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnNnnNnOOOOOOooooooOoOoOoRrRrRrSsSsSsSssT"
|
113
113
|
def slugify s, force_lower=true
|
114
|
-
s = s.to_s
|
114
|
+
s = s.to_s
|
115
|
+
.tr(ACCENTS, WITHOUT_ACCENTS)
|
116
|
+
.tr(' .,;:?!/\'"()[]{}<>','-')
|
117
|
+
.gsub(/&/, 'and')
|
118
|
+
.gsub(/-+/,'-')
|
119
|
+
.gsub(/(^-|-$)/,'')
|
115
120
|
s = s.downcase if force_lower
|
116
121
|
escape(s)
|
117
122
|
end
|
@@ -135,15 +140,26 @@ module WebUtils
|
|
135
140
|
end
|
136
141
|
module_function :each_stub
|
137
142
|
|
138
|
-
|
143
|
+
TYPECASTABLE = [:bool, :boolean, :nil, :int, :integer, :float]
|
144
|
+
def automatic_typecast str, casted=TYPECASTABLE
|
139
145
|
return str unless str.is_a?(String)
|
140
|
-
|
146
|
+
casted = casted.map do |sym|
|
147
|
+
case sym
|
148
|
+
when :int
|
149
|
+
:integer
|
150
|
+
when :bool
|
151
|
+
:boolean
|
152
|
+
else
|
153
|
+
sym
|
154
|
+
end
|
155
|
+
end
|
156
|
+
if casted.include?(:boolean) and str=='true'
|
141
157
|
true
|
142
|
-
elsif casted.include?(:
|
158
|
+
elsif casted.include?(:boolean) and str=='false'
|
143
159
|
false
|
144
160
|
elsif casted.include?(:nil) and str==''
|
145
161
|
nil
|
146
|
-
elsif casted.include?(:
|
162
|
+
elsif casted.include?(:integer) and str=~/^-?\d+$/
|
147
163
|
str.to_i
|
148
164
|
elsif casted.include?(:float) and str=~/^-?\d*\.\d+$/
|
149
165
|
str.to_f
|
@@ -181,38 +197,53 @@ module WebUtils
|
|
181
197
|
end
|
182
198
|
module_function :external_link?
|
183
199
|
|
200
|
+
EMAIL_REGEX = /([^\s]+@[^\s]*[a-zA-Z])/
|
201
|
+
LINK_REGEX = /\b((https?:\/\/|ftps?:\/\/|www\.)([A-Za-z0-9\-_=%&@\?\.\/]+))\b/
|
184
202
|
def automatic_html s, br='<br>'
|
185
203
|
replaced = s.to_s.
|
186
|
-
gsub(
|
204
|
+
gsub(LINK_REGEX) do |str|
|
187
205
|
url = complete_link $1
|
188
206
|
"<a href='#{url}' target='_blank'>#{$1}</a>"
|
189
207
|
end.
|
190
|
-
gsub(
|
208
|
+
gsub(EMAIL_REGEX) do |str|
|
191
209
|
"<a href='mailto:#{$1.downcase}'>#{$1}</a>"
|
192
210
|
end
|
193
211
|
nl2br(replaced,br).gsub("@", "@")
|
194
212
|
end
|
195
213
|
module_function :automatic_html
|
196
214
|
|
215
|
+
TAG_REGEX = /<[^>]*>/
|
197
216
|
def truncate s,c=320,ellipsis='...'
|
198
|
-
s.to_s
|
217
|
+
s.to_s
|
218
|
+
.gsub(TAG_REGEX, '')
|
219
|
+
.gsub(/\n/, ' ')
|
220
|
+
.sub(/^(.{#{c}}\w*).*$/m, '\1'+ellipsis)
|
199
221
|
end
|
200
222
|
module_function :truncate
|
201
223
|
|
202
224
|
def display_price int
|
203
|
-
|
204
|
-
|
225
|
+
unless int.is_a?(Integer)
|
226
|
+
raise(TypeError, 'The price needs to be the price in cents/pence as an integer')
|
227
|
+
end
|
228
|
+
("%.2f" % (int/100.0))
|
229
|
+
.sub(/\.00/, '')
|
230
|
+
.reverse
|
231
|
+
.gsub(/(\d{3})(?=\d)/, '\\1,')
|
232
|
+
.reverse
|
205
233
|
end
|
206
234
|
module_function :display_price
|
207
235
|
|
208
236
|
def parse_price string
|
209
|
-
|
237
|
+
unless string.is_a?(String)
|
238
|
+
raise(TypeError, 'The price needs to be parsed from a String')
|
239
|
+
end
|
210
240
|
("%.2f" % string.gsub(/[^\d\.\-]/, '')).gsub(/\./,'').to_i
|
211
241
|
end
|
212
242
|
module_function :parse_price
|
213
243
|
|
214
244
|
def branded_filename path, brand='WebUtils'
|
215
|
-
"#{File.dirname(path)}/#{brand}-#{File.basename(path)}"
|
245
|
+
"#{File.dirname(path)}/#{brand}-#{File.basename(path)}"
|
246
|
+
.sub(/^\.\//,'')
|
216
247
|
end
|
217
248
|
module_function :branded_filename
|
218
249
|
|
@@ -228,5 +259,21 @@ module WebUtils
|
|
228
259
|
end
|
229
260
|
module_function :initial_request?
|
230
261
|
|
262
|
+
BOT_REGEX = /bot|crawl|slurp|spider/i
|
263
|
+
def beeing_crawled? request
|
264
|
+
request.user_agent =~ BOT_REGEX
|
265
|
+
end
|
266
|
+
module_function :beeing_crawled?
|
267
|
+
|
268
|
+
def h text
|
269
|
+
escape_html text
|
270
|
+
end
|
271
|
+
module_function :h
|
272
|
+
|
273
|
+
def u text
|
274
|
+
escape text
|
275
|
+
end
|
276
|
+
module_function :u
|
277
|
+
|
231
278
|
end
|
232
279
|
|
data/test/test_web_utils.rb
CHANGED
@@ -329,6 +329,8 @@ describe WebUtils do
|
|
329
329
|
it 'Can change what is typecasted' do
|
330
330
|
assert_equal '10', utils.automatic_typecast('10', [:bool,:nil])
|
331
331
|
assert_equal true, utils.automatic_typecast('true', [:bool,:nil])
|
332
|
+
assert_equal '10', utils.automatic_typecast('10', [])
|
333
|
+
assert_equal '10', utils.automatic_typecast('10', [:fake])
|
332
334
|
end
|
333
335
|
end
|
334
336
|
describe 'when not a string' do
|
@@ -520,5 +522,41 @@ describe WebUtils do
|
|
520
522
|
end
|
521
523
|
end
|
522
524
|
|
525
|
+
describe '#beeing_crawled?' do
|
526
|
+
let(:req) {
|
527
|
+
Rack::Request.new(
|
528
|
+
Rack::MockRequest.env_for(
|
529
|
+
'/path',
|
530
|
+
{'HTTP_USER_AGENT'=>user_agent}
|
531
|
+
)
|
532
|
+
)
|
533
|
+
}
|
534
|
+
describe 'When user agent matches' do
|
535
|
+
let(:user_agent) {'Mega Bot from hell Version 6.6.6'}
|
536
|
+
it 'Returns true' do
|
537
|
+
assert utils.beeing_crawled?(req)
|
538
|
+
end
|
539
|
+
end
|
540
|
+
describe 'When user does not match' do
|
541
|
+
let(:user_agent) {'Firefox'}
|
542
|
+
it 'Returns false' do
|
543
|
+
refute utils.beeing_crawled?(req)
|
544
|
+
end
|
545
|
+
end
|
546
|
+
end
|
547
|
+
|
548
|
+
describe '#h' do
|
549
|
+
it 'Is sugar for Rack::Utils#escape_html' do
|
550
|
+
assert_equal '<tag>Boom</tag>', utils.h('<tag>Boom</tag>')
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
describe '#u' do
|
555
|
+
it 'Is sugar for Rack::Utils#escape' do
|
556
|
+
assert_equal '%2Ffilthy%2Furl%3Fmother%3Dfucker', utils.u('/filthy/url?mother=fucker')
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
|
523
561
|
end
|
524
562
|
|
data/web-utils.gemspec
CHANGED