web-utils 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +18 -0
- data/lib/web_utils.rb +61 -14
- data/test/test_web_utils.rb +38 -0
- data/web-utils.gemspec +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9092bbc88a4edc5b36139415b3ab2696b94a2cf1
|
4
|
+
data.tar.gz: ef75ff41c727b948930fd406b11c9c4ed568c2e7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ccb2d2015f992091d2ca80dddb580820789ecd946944e46c683bec2324db658182d57f19821b1f3550cab62d3c98b1b4475fb06b97f8e1dc5c9d6e85756d1a1d
|
7
|
+
data.tar.gz: 24e85f2f6d86858861d9b6d872abee962c1ac39e431e264ed41adbbfe820731f3073a5fd4e1afe3f787bc02f605e999a7f17f1a7073accad9df73909a9e6b247
|
data/README.md
CHANGED
@@ -238,5 +238,23 @@ You basically pass the `Request` object to the method and it
|
|
238
238
|
looks at the referrer and returns true if it was not on the same
|
239
239
|
domain. Essentially tells you if the visitor just arrived.
|
240
240
|
|
241
|
+
`beeing_crawled?(request)`
|
242
|
+
--------------------------
|
243
|
+
|
244
|
+
While this method is useful is only checks the presence of
|
245
|
+
these words `/bot|crawl|slurp|spider/i` to determine if the user
|
246
|
+
agent is a crawler or not. So it is pretty weak. If you have a
|
247
|
+
better way, please make a pull request.
|
248
|
+
|
249
|
+
`h(text)`
|
250
|
+
---------
|
251
|
+
|
252
|
+
Just the usual shortcut for `Rack::Utils.escape_html`.
|
253
|
+
|
254
|
+
`u(text)`
|
255
|
+
---------
|
256
|
+
|
257
|
+
Just the usual shortcut for `Rack::Utils.escape` for escaping
|
258
|
+
what you want to put in a URL.
|
241
259
|
|
242
260
|
|
data/lib/web_utils.rb
CHANGED
@@ -106,12 +106,17 @@ module WebUtils
|
|
106
106
|
end
|
107
107
|
module_function :ensure_key
|
108
108
|
|
109
|
-
|
109
|
+
ACCENTS =
|
110
110
|
"ÀÁÂÃÄÅàáâãäåĀāĂ㥹ÇçĆćĈĉĊċČčÐðĎďĐđÈÉÊËèéêëĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħÌÍÎÏìíîïĨĩĪīĬĭĮįİıĴĵĶķĸĹĺĻļĽľĿŀŁłÑñŃńŅņŇňʼnŊŋÒÓÔÕÖØòóôõöøŌōŎŏŐőŔŕŖŗŘřŚśŜŝŞ"
|
111
|
-
|
111
|
+
WITHOUT_ACCENTS =
|
112
112
|
"AAAAAAaaaaaaAaAaAaCcCcCcCcCcDdDdDdEEEEeeeeEeEeEeEeEeGgGgGgGgHhHhIIIIiiiiIiIiIiIiIiJjKkkLlLlLlLlLlNnNnNnNnnNnOOOOOOooooooOoOoOoRrRrRrSsSsSsSssT"
|
113
113
|
def slugify s, force_lower=true
|
114
|
-
s = s.to_s
|
114
|
+
s = s.to_s
|
115
|
+
.tr(ACCENTS, WITHOUT_ACCENTS)
|
116
|
+
.tr(' .,;:?!/\'"()[]{}<>','-')
|
117
|
+
.gsub(/&/, 'and')
|
118
|
+
.gsub(/-+/,'-')
|
119
|
+
.gsub(/(^-|-$)/,'')
|
115
120
|
s = s.downcase if force_lower
|
116
121
|
escape(s)
|
117
122
|
end
|
@@ -135,15 +140,26 @@ module WebUtils
|
|
135
140
|
end
|
136
141
|
module_function :each_stub
|
137
142
|
|
138
|
-
|
143
|
+
TYPECASTABLE = [:bool, :boolean, :nil, :int, :integer, :float]
|
144
|
+
def automatic_typecast str, casted=TYPECASTABLE
|
139
145
|
return str unless str.is_a?(String)
|
140
|
-
|
146
|
+
casted = casted.map do |sym|
|
147
|
+
case sym
|
148
|
+
when :int
|
149
|
+
:integer
|
150
|
+
when :bool
|
151
|
+
:boolean
|
152
|
+
else
|
153
|
+
sym
|
154
|
+
end
|
155
|
+
end
|
156
|
+
if casted.include?(:boolean) and str=='true'
|
141
157
|
true
|
142
|
-
elsif casted.include?(:
|
158
|
+
elsif casted.include?(:boolean) and str=='false'
|
143
159
|
false
|
144
160
|
elsif casted.include?(:nil) and str==''
|
145
161
|
nil
|
146
|
-
elsif casted.include?(:
|
162
|
+
elsif casted.include?(:integer) and str=~/^-?\d+$/
|
147
163
|
str.to_i
|
148
164
|
elsif casted.include?(:float) and str=~/^-?\d*\.\d+$/
|
149
165
|
str.to_f
|
@@ -181,38 +197,53 @@ module WebUtils
|
|
181
197
|
end
|
182
198
|
module_function :external_link?
|
183
199
|
|
200
|
+
EMAIL_REGEX = /([^\s]+@[^\s]*[a-zA-Z])/
|
201
|
+
LINK_REGEX = /\b((https?:\/\/|ftps?:\/\/|www\.)([A-Za-z0-9\-_=%&@\?\.\/]+))\b/
|
184
202
|
def automatic_html s, br='<br>'
|
185
203
|
replaced = s.to_s.
|
186
|
-
gsub(
|
204
|
+
gsub(LINK_REGEX) do |str|
|
187
205
|
url = complete_link $1
|
188
206
|
"<a href='#{url}' target='_blank'>#{$1}</a>"
|
189
207
|
end.
|
190
|
-
gsub(
|
208
|
+
gsub(EMAIL_REGEX) do |str|
|
191
209
|
"<a href='mailto:#{$1.downcase}'>#{$1}</a>"
|
192
210
|
end
|
193
211
|
nl2br(replaced,br).gsub("@", "@")
|
194
212
|
end
|
195
213
|
module_function :automatic_html
|
196
214
|
|
215
|
+
TAG_REGEX = /<[^>]*>/
|
197
216
|
def truncate s,c=320,ellipsis='...'
|
198
|
-
s.to_s
|
217
|
+
s.to_s
|
218
|
+
.gsub(TAG_REGEX, '')
|
219
|
+
.gsub(/\n/, ' ')
|
220
|
+
.sub(/^(.{#{c}}\w*).*$/m, '\1'+ellipsis)
|
199
221
|
end
|
200
222
|
module_function :truncate
|
201
223
|
|
202
224
|
def display_price int
|
203
|
-
|
204
|
-
|
225
|
+
unless int.is_a?(Integer)
|
226
|
+
raise(TypeError, 'The price needs to be the price in cents/pence as an integer')
|
227
|
+
end
|
228
|
+
("%.2f" % (int/100.0))
|
229
|
+
.sub(/\.00/, '')
|
230
|
+
.reverse
|
231
|
+
.gsub(/(\d{3})(?=\d)/, '\\1,')
|
232
|
+
.reverse
|
205
233
|
end
|
206
234
|
module_function :display_price
|
207
235
|
|
208
236
|
def parse_price string
|
209
|
-
|
237
|
+
unless string.is_a?(String)
|
238
|
+
raise(TypeError, 'The price needs to be parsed from a String')
|
239
|
+
end
|
210
240
|
("%.2f" % string.gsub(/[^\d\.\-]/, '')).gsub(/\./,'').to_i
|
211
241
|
end
|
212
242
|
module_function :parse_price
|
213
243
|
|
214
244
|
def branded_filename path, brand='WebUtils'
|
215
|
-
"#{File.dirname(path)}/#{brand}-#{File.basename(path)}"
|
245
|
+
"#{File.dirname(path)}/#{brand}-#{File.basename(path)}"
|
246
|
+
.sub(/^\.\//,'')
|
216
247
|
end
|
217
248
|
module_function :branded_filename
|
218
249
|
|
@@ -228,5 +259,21 @@ module WebUtils
|
|
228
259
|
end
|
229
260
|
module_function :initial_request?
|
230
261
|
|
262
|
+
BOT_REGEX = /bot|crawl|slurp|spider/i
|
263
|
+
def beeing_crawled? request
|
264
|
+
request.user_agent =~ BOT_REGEX
|
265
|
+
end
|
266
|
+
module_function :beeing_crawled?
|
267
|
+
|
268
|
+
def h text
|
269
|
+
escape_html text
|
270
|
+
end
|
271
|
+
module_function :h
|
272
|
+
|
273
|
+
def u text
|
274
|
+
escape text
|
275
|
+
end
|
276
|
+
module_function :u
|
277
|
+
|
231
278
|
end
|
232
279
|
|
data/test/test_web_utils.rb
CHANGED
@@ -329,6 +329,8 @@ describe WebUtils do
|
|
329
329
|
it 'Can change what is typecasted' do
|
330
330
|
assert_equal '10', utils.automatic_typecast('10', [:bool,:nil])
|
331
331
|
assert_equal true, utils.automatic_typecast('true', [:bool,:nil])
|
332
|
+
assert_equal '10', utils.automatic_typecast('10', [])
|
333
|
+
assert_equal '10', utils.automatic_typecast('10', [:fake])
|
332
334
|
end
|
333
335
|
end
|
334
336
|
describe 'when not a string' do
|
@@ -520,5 +522,41 @@ describe WebUtils do
|
|
520
522
|
end
|
521
523
|
end
|
522
524
|
|
525
|
+
describe '#beeing_crawled?' do
|
526
|
+
let(:req) {
|
527
|
+
Rack::Request.new(
|
528
|
+
Rack::MockRequest.env_for(
|
529
|
+
'/path',
|
530
|
+
{'HTTP_USER_AGENT'=>user_agent}
|
531
|
+
)
|
532
|
+
)
|
533
|
+
}
|
534
|
+
describe 'When user agent matches' do
|
535
|
+
let(:user_agent) {'Mega Bot from hell Version 6.6.6'}
|
536
|
+
it 'Returns true' do
|
537
|
+
assert utils.beeing_crawled?(req)
|
538
|
+
end
|
539
|
+
end
|
540
|
+
describe 'When user does not match' do
|
541
|
+
let(:user_agent) {'Firefox'}
|
542
|
+
it 'Returns false' do
|
543
|
+
refute utils.beeing_crawled?(req)
|
544
|
+
end
|
545
|
+
end
|
546
|
+
end
|
547
|
+
|
548
|
+
describe '#h' do
|
549
|
+
it 'Is sugar for Rack::Utils#escape_html' do
|
550
|
+
assert_equal '<tag>Boom</tag>', utils.h('<tag>Boom</tag>')
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
describe '#u' do
|
555
|
+
it 'Is sugar for Rack::Utils#escape' do
|
556
|
+
assert_equal '%2Ffilthy%2Furl%3Fmother%3Dfucker', utils.u('/filthy/url?mother=fucker')
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
|
523
561
|
end
|
524
562
|
|
data/web-utils.gemspec
CHANGED