crawler_detect 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/crawler_detect.gemspec +1 -1
- data/lib/crawler_detect/library/crawlers.rb +107 -58
- data/lib/crawler_detect/library/exclusions.rb +1 -1
- data/lib/crawler_detect/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9f469a4f042b4ce7600daf8732b42a0943af6c466f028db67366381846155df1
|
|
4
|
+
data.tar.gz: 469b493cacd0d76d0a9bfb94d7905dc39298d297a6eff084c3461351c22e60ce
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5ac0fe561eea7de310abc90f2265770218f004d7c069baaf80e22ecee02ef1b9ed01dc68b2b35e166738320e3d7c269f156bae4580e74c05ed505f64d6942596
|
|
7
|
+
data.tar.gz: dafeb5060813d91c75a1ffbcf063f5fd229b635ea3d83883e5cf546fe6286fa68e5ea1c9834caeda40997e43c0494e6fc90f842d680eaaeca783517c967e2cb2
|
data/crawler_detect.gemspec
CHANGED
|
@@ -25,7 +25,7 @@ Gem::Specification.new do |spec|
|
|
|
25
25
|
spec.require_paths = ["lib"]
|
|
26
26
|
|
|
27
27
|
spec.add_development_dependency "bundler", "~> 1.15"
|
|
28
|
-
spec.add_development_dependency "rake", "
|
|
28
|
+
spec.add_development_dependency "rake", ">= 10.0"
|
|
29
29
|
spec.add_development_dependency "rspec", "~> 3.0"
|
|
30
30
|
|
|
31
31
|
spec.add_development_dependency "activesupport", "~> 5.2.0"
|
|
@@ -7,12 +7,6 @@ module CrawlerDetect
|
|
|
7
7
|
CRAWLERS = %q[
|
|
8
8
|
.*Java.*outbrain
|
|
9
9
|
YLT
|
|
10
|
-
008\/
|
|
11
|
-
192\.comAgent
|
|
12
|
-
2ip\.ru
|
|
13
|
-
404checker
|
|
14
|
-
404enemy
|
|
15
|
-
80legs
|
|
16
10
|
^b0t$
|
|
17
11
|
^bluefish
|
|
18
12
|
^Calypso v\/
|
|
@@ -39,9 +33,17 @@ module CrawlerDetect
|
|
|
39
33
|
^WordPress\.com
|
|
40
34
|
^XRL\/[0-9]
|
|
41
35
|
^ZmEu
|
|
36
|
+
008\/
|
|
37
|
+
13TABS
|
|
38
|
+
192\.comAgent
|
|
39
|
+
2ip\.ru
|
|
40
|
+
404checker
|
|
41
|
+
404enemy
|
|
42
|
+
7Siters
|
|
43
|
+
80legs
|
|
44
|
+
a\.pr-cy\.ru
|
|
42
45
|
a3logics\.in
|
|
43
46
|
A6-Indexer
|
|
44
|
-
a\.pr-cy\.ru
|
|
45
47
|
Abonti
|
|
46
48
|
Aboundex
|
|
47
49
|
aboutthedomain
|
|
@@ -61,6 +63,7 @@ AHC
|
|
|
61
63
|
aihit
|
|
62
64
|
aiohttp\/
|
|
63
65
|
Airmail
|
|
66
|
+
Akamai_Site_Analyzer
|
|
64
67
|
akka-http\/
|
|
65
68
|
akula\/
|
|
66
69
|
alertra
|
|
@@ -90,13 +93,15 @@ AportWorm\/[0-9]
|
|
|
90
93
|
AppBeat\/[0-9]
|
|
91
94
|
AppEngine-Google
|
|
92
95
|
AppStoreScraperZ
|
|
96
|
+
Aprc\/[0-9]
|
|
93
97
|
Arachmo
|
|
94
98
|
arachnode
|
|
95
99
|
Arachnophilia
|
|
96
100
|
aria2
|
|
97
101
|
Arukereso
|
|
98
|
-
asafaweb
|
|
102
|
+
asafaweb\.com
|
|
99
103
|
AskQuickly
|
|
104
|
+
Ask Jeeves
|
|
100
105
|
ASPSeek
|
|
101
106
|
Asterias
|
|
102
107
|
Astute
|
|
@@ -108,12 +113,14 @@ axios\/
|
|
|
108
113
|
B-l-i-t-z-B-O-T
|
|
109
114
|
Backlink-Ceck
|
|
110
115
|
backlink-check
|
|
116
|
+
BacklinkHttpStatus
|
|
111
117
|
BackStreet
|
|
112
118
|
BackWeb
|
|
113
119
|
Bad-Neighborhood
|
|
114
120
|
Badass
|
|
115
121
|
baidu\.com
|
|
116
122
|
Bandit
|
|
123
|
+
basicstate
|
|
117
124
|
BatchFTP
|
|
118
125
|
Battleztar\ Bazinga
|
|
119
126
|
baypup\/[0-9]
|
|
@@ -150,6 +157,7 @@ Braintree-Webhooks
|
|
|
150
157
|
Branch Metrics API
|
|
151
158
|
Branch-Passthrough
|
|
152
159
|
Brandprotect
|
|
160
|
+
BrandVerity\/[0-9]
|
|
153
161
|
Brandwatch
|
|
154
162
|
Brodie\/
|
|
155
163
|
Browsershots
|
|
@@ -165,12 +173,13 @@ BuzzSumo
|
|
|
165
173
|
CAAM\/[0-9]
|
|
166
174
|
CakePHP
|
|
167
175
|
Calculon
|
|
176
|
+
Canary%20Mail
|
|
168
177
|
CapsuleChecker
|
|
169
178
|
CaretNail
|
|
170
179
|
catexplorador
|
|
171
|
-
cb crawl
|
|
172
180
|
CC Metadata Scaper
|
|
173
181
|
Cegbfeieh
|
|
182
|
+
censys
|
|
174
183
|
Cerberian Drtrs
|
|
175
184
|
CERT\.at-Statistics-Survey
|
|
176
185
|
cg-eye
|
|
@@ -184,6 +193,7 @@ ChinaClaw
|
|
|
184
193
|
Chirp\/[0-9]
|
|
185
194
|
chkme\.com
|
|
186
195
|
Chlooe
|
|
196
|
+
Chromaxa
|
|
187
197
|
CirrusExplorer\/
|
|
188
198
|
CISPA Vulnerability Notification
|
|
189
199
|
Citoid
|
|
@@ -210,19 +220,20 @@ CopyRightCheck
|
|
|
210
220
|
Copyscape
|
|
211
221
|
Cosmos4j\.feedback
|
|
212
222
|
Covario-IDS
|
|
213
|
-
CrawlForMe\/[0-9]
|
|
214
223
|
Crescent
|
|
215
|
-
cron-job\.org
|
|
216
224
|
Crowsnest
|
|
225
|
+
Criteo
|
|
217
226
|
CSHttp
|
|
218
227
|
curb
|
|
219
228
|
Curious George
|
|
220
229
|
curl
|
|
221
230
|
cuwhois\/[0-9]
|
|
222
231
|
cybo\.com
|
|
232
|
+
DAP\/NetHTTP
|
|
223
233
|
DareBoost
|
|
224
234
|
DatabaseDriverMysqli
|
|
225
235
|
DataCha0s
|
|
236
|
+
Datafeedwatch
|
|
226
237
|
Datanyze
|
|
227
238
|
DataparkSearch
|
|
228
239
|
dataprovider
|
|
@@ -237,6 +248,7 @@ Digg
|
|
|
237
248
|
Digincore
|
|
238
249
|
DigitalPebble
|
|
239
250
|
Dirbuster
|
|
251
|
+
Disqus\/
|
|
240
252
|
Dispatch\/
|
|
241
253
|
DittoSpyder
|
|
242
254
|
dlvr
|
|
@@ -267,16 +279,16 @@ ec2linkfinder
|
|
|
267
279
|
eCairn-Grabber
|
|
268
280
|
eCatch
|
|
269
281
|
ECCP
|
|
270
|
-
echocrawl
|
|
271
282
|
eContext\/
|
|
272
283
|
Ecxi
|
|
273
284
|
EirGrabber
|
|
274
285
|
ElectricMonk
|
|
275
286
|
elefent
|
|
276
287
|
EMail Exractor
|
|
277
|
-
Email%20Extractor%20Lite
|
|
278
288
|
EMail\ Wolf
|
|
289
|
+
Email%20Extractor
|
|
279
290
|
EmailWolf
|
|
291
|
+
Embarcadero
|
|
280
292
|
Embed PHP Library
|
|
281
293
|
Embedly
|
|
282
294
|
endo\/
|
|
@@ -289,8 +301,7 @@ Evrinid
|
|
|
289
301
|
ExactSearch
|
|
290
302
|
ExaleadCloudview
|
|
291
303
|
Excel\/
|
|
292
|
-
|
|
293
|
-
ExperianCrawlUK
|
|
304
|
+
exif
|
|
294
305
|
Exploratodo
|
|
295
306
|
Express WebPictures
|
|
296
307
|
ExtractorPro
|
|
@@ -304,6 +315,7 @@ Faraday v
|
|
|
304
315
|
fasthttp
|
|
305
316
|
Faveeo
|
|
306
317
|
Favicon downloader
|
|
318
|
+
faviconkit
|
|
307
319
|
FavOrg
|
|
308
320
|
Feed Wrangler
|
|
309
321
|
Feedable\/
|
|
@@ -337,6 +349,7 @@ Flunky
|
|
|
337
349
|
flynxapp
|
|
338
350
|
forensiq
|
|
339
351
|
FoundSeoTool\/[0-9]
|
|
352
|
+
http:\/\/www.neomo.de\/
|
|
340
353
|
free thumbnails
|
|
341
354
|
Freeuploader
|
|
342
355
|
FreeWebMonitoring SiteChecker
|
|
@@ -349,10 +362,12 @@ geek-tools
|
|
|
349
362
|
Genderanalyzer
|
|
350
363
|
Genieo
|
|
351
364
|
GentleSource
|
|
365
|
+
GetCode
|
|
352
366
|
Getintent
|
|
353
367
|
GetLinkInfo
|
|
354
368
|
getprismatic\.com
|
|
355
369
|
GetRight
|
|
370
|
+
getroot
|
|
356
371
|
GetURLInfo\/[0-9]
|
|
357
372
|
GetWeb
|
|
358
373
|
Ghost Inspector
|
|
@@ -362,15 +377,16 @@ github-camo
|
|
|
362
377
|
github\.com\/
|
|
363
378
|
Go [\d\.]* package http
|
|
364
379
|
Go http package
|
|
365
|
-
Go!Zilla
|
|
366
380
|
Go-Ahead-Got-It
|
|
367
381
|
Go-http-client
|
|
382
|
+
Go!Zilla
|
|
368
383
|
gobyus
|
|
369
384
|
gofetch
|
|
370
385
|
GomezAgent
|
|
371
386
|
gooblog
|
|
372
387
|
Goodzer\/[0-9]
|
|
373
|
-
|
|
388
|
+
Google AppsViewer
|
|
389
|
+
Google Desktop
|
|
374
390
|
Google favicon
|
|
375
391
|
Google Keyword Suggestion
|
|
376
392
|
Google Keyword Tool
|
|
@@ -378,6 +394,7 @@ Google Page Speed Insights
|
|
|
378
394
|
Google PP Default
|
|
379
395
|
Google Search Console
|
|
380
396
|
Google Web Preview
|
|
397
|
+
google_partner_monitoring
|
|
381
398
|
Google-Adwords
|
|
382
399
|
Google-Apps-Script
|
|
383
400
|
Google-Calendar-Importer
|
|
@@ -388,10 +405,13 @@ Google-SearchByImage
|
|
|
388
405
|
Google-Site-Verification
|
|
389
406
|
Google-Structured-Data-Testing-Tool
|
|
390
407
|
Google-Youtube-Links
|
|
391
|
-
|
|
408
|
+
google-xrawler
|
|
409
|
+
GoogleCloudMonitoring
|
|
392
410
|
GoogleDocs
|
|
393
411
|
GoogleHC\/
|
|
394
412
|
GoogleProducer
|
|
413
|
+
GoogleSites
|
|
414
|
+
Google-Transparency-Report
|
|
395
415
|
Gookey
|
|
396
416
|
GoScraper
|
|
397
417
|
GoSpotCheck
|
|
@@ -404,6 +424,8 @@ GrabNet
|
|
|
404
424
|
Grafula
|
|
405
425
|
Grammarly
|
|
406
426
|
GrapeFX
|
|
427
|
+
Gregarius
|
|
428
|
+
GRequests
|
|
407
429
|
grokkit
|
|
408
430
|
grouphigh
|
|
409
431
|
grub-client
|
|
@@ -415,14 +437,17 @@ gvfs\/
|
|
|
415
437
|
HAA(A)?RTLAND http client
|
|
416
438
|
Haansoft
|
|
417
439
|
hackney\/
|
|
440
|
+
Hadi Agent
|
|
418
441
|
Hatena
|
|
419
442
|
Havij
|
|
420
443
|
hawkReader
|
|
444
|
+
HeadlessChrome
|
|
421
445
|
HEADMasterSEO
|
|
422
446
|
HeartRails_Capture
|
|
423
447
|
help@dataminr\.com
|
|
424
448
|
heritrix
|
|
425
449
|
historious\/
|
|
450
|
+
hkedcity
|
|
426
451
|
hledejLevne\.cz\/[0-9]
|
|
427
452
|
Hloader
|
|
428
453
|
HMView
|
|
@@ -436,6 +461,10 @@ ht:\/\/check
|
|
|
436
461
|
htdig
|
|
437
462
|
HTMLparser
|
|
438
463
|
htmlyse\.com
|
|
464
|
+
HTTP Banner Detection
|
|
465
|
+
HTTP_Compression_Test
|
|
466
|
+
http_request2
|
|
467
|
+
http_requester
|
|
439
468
|
http-get
|
|
440
469
|
HTTP-Header-Abfrage
|
|
441
470
|
http-kit
|
|
@@ -443,22 +472,20 @@ http-request\/
|
|
|
443
472
|
HTTP-Tiny
|
|
444
473
|
HTTP::Lite
|
|
445
474
|
http\.rb\/
|
|
446
|
-
|
|
447
|
-
http_request2
|
|
448
|
-
http_requester
|
|
475
|
+
http_get
|
|
449
476
|
HttpComponents
|
|
450
477
|
httphr
|
|
451
478
|
HTTPMon
|
|
479
|
+
httpRequest
|
|
452
480
|
httpscheck
|
|
453
481
|
httpssites_power
|
|
454
482
|
httpunit
|
|
455
483
|
HttpUrlConnection
|
|
456
484
|
httrack
|
|
457
485
|
huaweisymantec
|
|
458
|
-
HubPages.*crawlingpolicy
|
|
459
486
|
HubSpot
|
|
460
487
|
Humanlinks
|
|
461
|
-
HyperZbozi
|
|
488
|
+
HyperZbozi\.cz Feeder
|
|
462
489
|
i2kconnect\/
|
|
463
490
|
Iblog
|
|
464
491
|
ichiro
|
|
@@ -469,11 +496,12 @@ IDwhois\/[0-9]
|
|
|
469
496
|
Iframely
|
|
470
497
|
igdeSpyder
|
|
471
498
|
IlTrovatore
|
|
472
|
-
ImageVisu\/
|
|
473
499
|
Image\ Fetch
|
|
474
500
|
Image\ Sucker
|
|
475
501
|
ImageEngine\/
|
|
502
|
+
ImageVisu\/
|
|
476
503
|
Imagga
|
|
504
|
+
imagineeasy
|
|
477
505
|
imgsizer
|
|
478
506
|
InAGist
|
|
479
507
|
inbound\.li parser
|
|
@@ -491,8 +519,8 @@ Integrity
|
|
|
491
519
|
integromedb
|
|
492
520
|
Intelliseek
|
|
493
521
|
InterGET
|
|
494
|
-
Internet\ Ninja
|
|
495
522
|
internet_archive
|
|
523
|
+
Internet\ Ninja
|
|
496
524
|
InternetSeer
|
|
497
525
|
internetVista monitor
|
|
498
526
|
intraVnews
|
|
@@ -507,7 +535,10 @@ Iria
|
|
|
507
535
|
Irokez
|
|
508
536
|
isitup\.org
|
|
509
537
|
iskanie
|
|
538
|
+
isUp\.li
|
|
539
|
+
iThemes Sync\/[0-9]
|
|
510
540
|
iZSearch
|
|
541
|
+
JAHHO
|
|
511
542
|
janforman
|
|
512
543
|
Jaunt\/
|
|
513
544
|
Jbrofuzz
|
|
@@ -555,7 +586,7 @@ LibVLC
|
|
|
555
586
|
LibWeb
|
|
556
587
|
Libwhisker
|
|
557
588
|
libwww
|
|
558
|
-
Licorne
|
|
589
|
+
Licorne
|
|
559
590
|
Liferea\/
|
|
560
591
|
Lightspeedsystems
|
|
561
592
|
Likse
|
|
@@ -590,7 +621,7 @@ LYT\.SR
|
|
|
590
621
|
mabontland
|
|
591
622
|
Mag-Net
|
|
592
623
|
MagpieRSS
|
|
593
|
-
Mail
|
|
624
|
+
Mail\.Ru
|
|
594
625
|
MailChimp
|
|
595
626
|
Majestic12
|
|
596
627
|
makecontact\/
|
|
@@ -605,6 +636,7 @@ Mata\ Hari
|
|
|
605
636
|
Mediapartners-Google
|
|
606
637
|
mediawords
|
|
607
638
|
MegaIndex\.ru
|
|
639
|
+
MeltwaterNews
|
|
608
640
|
Melvil Rawi\/
|
|
609
641
|
MergeFlow-PageReader
|
|
610
642
|
Metaspinner
|
|
@@ -619,7 +651,7 @@ Microsoft\ Data\ Access
|
|
|
619
651
|
MIDown\ tool
|
|
620
652
|
MIIxpc
|
|
621
653
|
Mindjet
|
|
622
|
-
Miniature
|
|
654
|
+
Miniature\.io\/
|
|
623
655
|
Miniflux
|
|
624
656
|
Mister\ PiX
|
|
625
657
|
mixdata dot com
|
|
@@ -630,8 +662,8 @@ Mnogosearch
|
|
|
630
662
|
mogimogi
|
|
631
663
|
Mojeek
|
|
632
664
|
Mojolicious \(Perl\)
|
|
633
|
-
monitis
|
|
634
665
|
Monit\/
|
|
666
|
+
monitis
|
|
635
667
|
Monitority\/[0-9]
|
|
636
668
|
montastic
|
|
637
669
|
MonTools
|
|
@@ -687,6 +719,8 @@ Nmap Scripting Engine
|
|
|
687
719
|
node-superagent
|
|
688
720
|
node-urllib\/
|
|
689
721
|
node\.io
|
|
722
|
+
Nodemeter
|
|
723
|
+
NodePing
|
|
690
724
|
nominet\.org\.uk
|
|
691
725
|
Norton-Safeweb
|
|
692
726
|
Notifixious
|
|
@@ -700,9 +734,9 @@ NYU
|
|
|
700
734
|
Ocelli\/[0-9]
|
|
701
735
|
Octopus
|
|
702
736
|
oegp
|
|
703
|
-
og-scraper\/
|
|
704
737
|
Offline Explorer
|
|
705
738
|
Offline\ Navigator
|
|
739
|
+
og-scraper\/
|
|
706
740
|
okhttp
|
|
707
741
|
Omea Reader
|
|
708
742
|
omgili
|
|
@@ -724,9 +758,9 @@ ownCloud News
|
|
|
724
758
|
OxfordCloudService\/[0-9]
|
|
725
759
|
Page Analyzer
|
|
726
760
|
Page Valet
|
|
727
|
-
page2rss
|
|
728
|
-
page\ scorer
|
|
729
761
|
page_verifier
|
|
762
|
+
page\ scorer
|
|
763
|
+
page2rss
|
|
730
764
|
PageAnalyzer
|
|
731
765
|
PageGrabber
|
|
732
766
|
PagePeeker
|
|
@@ -740,7 +774,6 @@ Pavuk
|
|
|
740
774
|
PayPal IPN
|
|
741
775
|
pcBrowser
|
|
742
776
|
Pcore-HTTP
|
|
743
|
-
PEAR HTTPRequest
|
|
744
777
|
Pearltrees
|
|
745
778
|
PECL::HTTP
|
|
746
779
|
peerindex
|
|
@@ -750,7 +783,6 @@ Perlu -
|
|
|
750
783
|
PhantomJS Screenshoter
|
|
751
784
|
PhantomJS\/
|
|
752
785
|
Photon\/
|
|
753
|
-
phpcrawl
|
|
754
786
|
phpservermon
|
|
755
787
|
Pi-Monster
|
|
756
788
|
Picscout
|
|
@@ -767,10 +799,10 @@ pinterest\.com
|
|
|
767
799
|
Pixray
|
|
768
800
|
Pizilla
|
|
769
801
|
Plagger\/
|
|
770
|
-
PleaseCrawl
|
|
771
802
|
Ploetz \+ Zeller
|
|
772
803
|
Plukkie
|
|
773
804
|
plumanalytics
|
|
805
|
+
PocketImageCache
|
|
774
806
|
PocketParser
|
|
775
807
|
Pockey
|
|
776
808
|
POE-Component-Client-HTTP
|
|
@@ -788,7 +820,7 @@ PritTorrent\/[0-9]
|
|
|
788
820
|
Prlog
|
|
789
821
|
probethenet
|
|
790
822
|
Project 25499
|
|
791
|
-
Promotion_Tools_www
|
|
823
|
+
Promotion_Tools_www\.searchenginepromotionhelp\.com
|
|
792
824
|
prospectb2b
|
|
793
825
|
Protopage
|
|
794
826
|
ProWebWalker
|
|
@@ -806,15 +838,16 @@ Qirina Hurdler
|
|
|
806
838
|
QQDownload
|
|
807
839
|
QrafterPro
|
|
808
840
|
Qseero
|
|
809
|
-
Qualidator
|
|
841
|
+
Qualidator\.com SiteAnalyzer
|
|
810
842
|
QueryN\ Metasearch
|
|
843
|
+
queuedriver
|
|
811
844
|
Quora Link Preview
|
|
812
845
|
Qwantify
|
|
813
846
|
Radian6
|
|
814
847
|
RankActive
|
|
815
848
|
RankFlex
|
|
816
849
|
RankSonicSiteAuditor
|
|
817
|
-
|
|
850
|
+
Re-re Studio
|
|
818
851
|
Readability
|
|
819
852
|
RealDownload
|
|
820
853
|
RealPlayer%20Downloader
|
|
@@ -835,28 +868,31 @@ Robosourcer
|
|
|
835
868
|
Robozilla\/[0-9]
|
|
836
869
|
ROI Hunter
|
|
837
870
|
RPT-HTTPClient
|
|
871
|
+
rss reader
|
|
838
872
|
RSSOwl
|
|
839
873
|
RssReader\/
|
|
840
874
|
safe-agent-scanner
|
|
841
875
|
SalesIntelligent
|
|
842
876
|
Saleslift
|
|
877
|
+
Sendsay\.Ru
|
|
843
878
|
SauceNAO
|
|
844
879
|
SBIder
|
|
845
880
|
scalaj-http
|
|
846
881
|
scan\.lol
|
|
847
882
|
ScanAlert
|
|
848
|
-
ScrapeBox Page Scanner
|
|
849
883
|
Scoop
|
|
850
884
|
scooter
|
|
851
885
|
ScoutJet
|
|
852
886
|
ScoutURLMonitor
|
|
887
|
+
ScrapeBox Page Scanner
|
|
888
|
+
SimpleScraper
|
|
853
889
|
Scrapy
|
|
854
890
|
Screaming
|
|
855
891
|
ScreenShotService\/[0-9]
|
|
856
892
|
Scrubby
|
|
857
893
|
Scrutiny\/
|
|
858
|
-
Search37\/
|
|
859
894
|
search\.thunderstone
|
|
895
|
+
Search37\/
|
|
860
896
|
Searchestate
|
|
861
897
|
SearchExpress
|
|
862
898
|
SearchSight
|
|
@@ -868,19 +904,20 @@ Semrush
|
|
|
868
904
|
sentry\/
|
|
869
905
|
SEO Browser
|
|
870
906
|
Seo Servis
|
|
871
|
-
seo-nastroj
|
|
907
|
+
seo-nastroj\.cz
|
|
908
|
+
seo4ajax
|
|
872
909
|
Seobility
|
|
873
910
|
SEOCentro
|
|
874
911
|
SeoCheck
|
|
875
912
|
SEOkicks
|
|
876
913
|
Seomoz
|
|
877
914
|
SEOprofiler
|
|
915
|
+
SEOsearch\/
|
|
878
916
|
SeopultContentAnalyzer
|
|
879
917
|
seoscanners
|
|
880
918
|
SEOstats
|
|
881
919
|
Server Density Service Monitoring
|
|
882
920
|
servernfo\.com
|
|
883
|
-
SetCronJob\/
|
|
884
921
|
sexsearcher
|
|
885
922
|
Seznam
|
|
886
923
|
Shelob
|
|
@@ -897,8 +934,8 @@ SimplyFast
|
|
|
897
934
|
Siphon
|
|
898
935
|
SISTRIX
|
|
899
936
|
Site-Shot\/
|
|
900
|
-
Site24x7
|
|
901
937
|
Site\ Sucker
|
|
938
|
+
Site24x7
|
|
902
939
|
SiteBar
|
|
903
940
|
Sitebeam
|
|
904
941
|
Sitebulb\/
|
|
@@ -908,6 +945,7 @@ SiteGuardian
|
|
|
908
945
|
Siteimprove
|
|
909
946
|
SiteIndexed
|
|
910
947
|
Sitemap(s)? Generator
|
|
948
|
+
SitemapGenerator
|
|
911
949
|
SiteMonitor
|
|
912
950
|
Siteshooter B0t
|
|
913
951
|
SiteSnagger
|
|
@@ -923,9 +961,10 @@ SlySearch
|
|
|
923
961
|
SmartDownload
|
|
924
962
|
SMRF URL Expander
|
|
925
963
|
SMUrlExpander
|
|
926
|
-
Snarfer\/
|
|
927
964
|
Snake
|
|
928
965
|
Snappy
|
|
966
|
+
SnapSearch
|
|
967
|
+
Snarfer\/
|
|
929
968
|
SniffRSS
|
|
930
969
|
sniptracker
|
|
931
970
|
Snoopy
|
|
@@ -935,6 +974,7 @@ SortSite
|
|
|
935
974
|
Sottopop
|
|
936
975
|
sovereign\.ai
|
|
937
976
|
SpaceBison
|
|
977
|
+
SpamExperts
|
|
938
978
|
Spammen
|
|
939
979
|
Spanner
|
|
940
980
|
spaziodati
|
|
@@ -956,8 +996,9 @@ Statastico\/
|
|
|
956
996
|
StatusCake
|
|
957
997
|
Steeler
|
|
958
998
|
Stratagems Kumo
|
|
959
|
-
Stroke
|
|
999
|
+
Stroke\.cz
|
|
960
1000
|
StudioFACA
|
|
1001
|
+
StumbleUpon
|
|
961
1002
|
suchen
|
|
962
1003
|
Sucuri
|
|
963
1004
|
summify
|
|
@@ -976,6 +1017,7 @@ T0PHackTeam
|
|
|
976
1017
|
tAkeOut
|
|
977
1018
|
Tarantula\/
|
|
978
1019
|
Taringa UGC
|
|
1020
|
+
TarmotGezgin
|
|
979
1021
|
Teleport
|
|
980
1022
|
Telesoft
|
|
981
1023
|
Telesphoreo
|
|
@@ -995,18 +1037,21 @@ theoldreader\.com
|
|
|
995
1037
|
Thinklab
|
|
996
1038
|
Thumbshots
|
|
997
1039
|
ThumbSniper
|
|
1040
|
+
timewe\.net
|
|
998
1041
|
TinEye
|
|
999
1042
|
Tiny Tiny RSS
|
|
1000
1043
|
TLSProbe\/
|
|
1001
1044
|
Toata
|
|
1002
1045
|
topster
|
|
1003
|
-
touche
|
|
1004
|
-
Traackr
|
|
1046
|
+
touche\.com
|
|
1047
|
+
Traackr\.com
|
|
1048
|
+
tracemyfile
|
|
1005
1049
|
TrapitAgent
|
|
1006
1050
|
Trendiction
|
|
1007
|
-
Trendsmap
|
|
1051
|
+
Trendsmap
|
|
1008
1052
|
trendspottr\.com
|
|
1009
1053
|
truwoGPS
|
|
1054
|
+
TryJsoup
|
|
1010
1055
|
TulipChain
|
|
1011
1056
|
Turingos
|
|
1012
1057
|
Turnitin
|
|
@@ -1021,7 +1066,6 @@ Twisted PageGetter
|
|
|
1021
1066
|
Typhoeus
|
|
1022
1067
|
ubermetrics-technologies
|
|
1023
1068
|
uclassify
|
|
1024
|
-
uCrawlr\/
|
|
1025
1069
|
UdmSearch
|
|
1026
1070
|
unirest-java
|
|
1027
1071
|
UniversalFeedParser
|
|
@@ -1034,7 +1078,7 @@ Upflow
|
|
|
1034
1078
|
Uptimia
|
|
1035
1079
|
URL Verifier
|
|
1036
1080
|
URLChecker
|
|
1037
|
-
URLitor
|
|
1081
|
+
URLitor\.com
|
|
1038
1082
|
urlresolver
|
|
1039
1083
|
Urlstat
|
|
1040
1084
|
UrlTrends Ranking Updater
|
|
@@ -1059,16 +1103,18 @@ VSB-TUO\/[0-9]
|
|
|
1059
1103
|
Vulnbusters Meter
|
|
1060
1104
|
VYU2
|
|
1061
1105
|
w3af\.org
|
|
1062
|
-
W3C-checklink
|
|
1063
|
-
W3C-mobileOK
|
|
1064
1106
|
W3C_I18n-Checker
|
|
1065
1107
|
W3C_Unicorn
|
|
1108
|
+
W3C-checklink
|
|
1109
|
+
W3C-mobileOK
|
|
1110
|
+
WAC-OFU
|
|
1066
1111
|
Wallpapers\/[0-9]+
|
|
1067
1112
|
WallpapersHD
|
|
1068
1113
|
wangling
|
|
1069
1114
|
Wappalyzer
|
|
1070
1115
|
WatchMouse
|
|
1071
1116
|
WbSrch\/
|
|
1117
|
+
WDT\.io
|
|
1072
1118
|
web-capture\.net
|
|
1073
1119
|
Web-Monitoring
|
|
1074
1120
|
Web-sniffer
|
|
@@ -1089,6 +1135,7 @@ webcollage
|
|
|
1089
1135
|
WebCookies
|
|
1090
1136
|
WebCopier
|
|
1091
1137
|
WebCorp
|
|
1138
|
+
WebDataStats\/[0-9]
|
|
1092
1139
|
WebDoc
|
|
1093
1140
|
WebEnhancer
|
|
1094
1141
|
WebFetch
|
|
@@ -1121,7 +1168,6 @@ Webthumb\/[0-9]
|
|
|
1121
1168
|
WebThumbnail
|
|
1122
1169
|
WebWhacker
|
|
1123
1170
|
WebZIP
|
|
1124
|
-
WeCrawlForThePeace
|
|
1125
1171
|
WeLikeLinks
|
|
1126
1172
|
WEPA
|
|
1127
1173
|
WeSEE
|
|
@@ -1136,7 +1182,6 @@ Whibse
|
|
|
1136
1182
|
WhoRunsCoinHive
|
|
1137
1183
|
Whynder Magnet
|
|
1138
1184
|
Windows-RSS-Platform
|
|
1139
|
-
WinHttpRequest
|
|
1140
1185
|
WinPodder
|
|
1141
1186
|
wkhtmlto
|
|
1142
1187
|
wmtips
|
|
@@ -1153,6 +1198,7 @@ wscheck
|
|
|
1153
1198
|
Wtrace
|
|
1154
1199
|
WWW-Collector-E
|
|
1155
1200
|
WWW-Mechanize
|
|
1201
|
+
WWW::Document
|
|
1156
1202
|
WWW::Mechanize
|
|
1157
1203
|
www\.monitor\.us
|
|
1158
1204
|
WWWOFFLE
|
|
@@ -1161,9 +1207,9 @@ x22Mozilla
|
|
|
1161
1207
|
XaxisSemanticsClassifier
|
|
1162
1208
|
Xenu Link Sleuth
|
|
1163
1209
|
XING-contenttabreceiver\/[0-9]
|
|
1164
|
-
XmlSitemapGenerator
|
|
1165
1210
|
xpymep([0-9]?)\.exe
|
|
1166
1211
|
Y!J-(ASR|BSC)
|
|
1212
|
+
Y\!J-BRW
|
|
1167
1213
|
Yaanb
|
|
1168
1214
|
yacy
|
|
1169
1215
|
Yahoo Ad monitoring
|
|
@@ -1180,20 +1226,23 @@ yoogliFetchAgent
|
|
|
1180
1226
|
YottaaMonitor
|
|
1181
1227
|
Your-Website-Sucks\/[0-9]
|
|
1182
1228
|
yourls\.org
|
|
1229
|
+
YoYs\.net
|
|
1230
|
+
YP\.PL
|
|
1231
|
+
Zabbix
|
|
1183
1232
|
Zade
|
|
1184
1233
|
Zao
|
|
1185
1234
|
Zauba
|
|
1186
1235
|
Zemanta Aggregator
|
|
1187
|
-
Zend\\\\Http\\\\Client
|
|
1188
1236
|
Zend_Http_Client
|
|
1237
|
+
Zend\\\Http\\\Client
|
|
1189
1238
|
Zermelo
|
|
1190
1239
|
Zeus
|
|
1191
1240
|
zgrab
|
|
1192
1241
|
ZnajdzFoto
|
|
1193
1242
|
Zombie\.js
|
|
1243
|
+
Zoom\.Mac
|
|
1194
1244
|
ZyBorg
|
|
1195
|
-
|
|
1196
|
-
[a-z0-9\-_]*(bot|crawler|archiver|transcoder|spider|uptime|validator|fetcher)
|
|
1245
|
+
[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron)
|
|
1197
1246
|
].strip.split(/\n+/).freeze
|
|
1198
1247
|
end
|
|
1199
1248
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: crawler_detect
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Pavel Kozlov
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2018-
|
|
11
|
+
date: 2018-12-31 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -28,14 +28,14 @@ dependencies:
|
|
|
28
28
|
name: rake
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
|
-
- - "
|
|
31
|
+
- - ">="
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
33
|
version: '10.0'
|
|
34
34
|
type: :development
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
|
-
- - "
|
|
38
|
+
- - ">="
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: '10.0'
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
@@ -166,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
166
166
|
version: '0'
|
|
167
167
|
requirements: []
|
|
168
168
|
rubyforge_project:
|
|
169
|
-
rubygems_version: 2.7.
|
|
169
|
+
rubygems_version: 2.7.6
|
|
170
170
|
signing_key:
|
|
171
171
|
specification_version: 4
|
|
172
172
|
summary: 'CrawlerDetect: detect bots/crawlers'
|