crawler_detect 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/crawler_detect.gemspec +1 -1
- data/lib/crawler_detect/library/crawlers.rb +107 -58
- data/lib/crawler_detect/library/exclusions.rb +1 -1
- data/lib/crawler_detect/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9f469a4f042b4ce7600daf8732b42a0943af6c466f028db67366381846155df1
|
4
|
+
data.tar.gz: 469b493cacd0d76d0a9bfb94d7905dc39298d297a6eff084c3461351c22e60ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5ac0fe561eea7de310abc90f2265770218f004d7c069baaf80e22ecee02ef1b9ed01dc68b2b35e166738320e3d7c269f156bae4580e74c05ed505f64d6942596
|
7
|
+
data.tar.gz: dafeb5060813d91c75a1ffbcf063f5fd229b635ea3d83883e5cf546fe6286fa68e5ea1c9834caeda40997e43c0494e6fc90f842d680eaaeca783517c967e2cb2
|
data/crawler_detect.gemspec
CHANGED
@@ -25,7 +25,7 @@ Gem::Specification.new do |spec|
|
|
25
25
|
spec.require_paths = ["lib"]
|
26
26
|
|
27
27
|
spec.add_development_dependency "bundler", "~> 1.15"
|
28
|
-
spec.add_development_dependency "rake", "
|
28
|
+
spec.add_development_dependency "rake", ">= 10.0"
|
29
29
|
spec.add_development_dependency "rspec", "~> 3.0"
|
30
30
|
|
31
31
|
spec.add_development_dependency "activesupport", "~> 5.2.0"
|
@@ -7,12 +7,6 @@ module CrawlerDetect
|
|
7
7
|
CRAWLERS = %q[
|
8
8
|
.*Java.*outbrain
|
9
9
|
YLT
|
10
|
-
008\/
|
11
|
-
192\.comAgent
|
12
|
-
2ip\.ru
|
13
|
-
404checker
|
14
|
-
404enemy
|
15
|
-
80legs
|
16
10
|
^b0t$
|
17
11
|
^bluefish
|
18
12
|
^Calypso v\/
|
@@ -39,9 +33,17 @@ module CrawlerDetect
|
|
39
33
|
^WordPress\.com
|
40
34
|
^XRL\/[0-9]
|
41
35
|
^ZmEu
|
36
|
+
008\/
|
37
|
+
13TABS
|
38
|
+
192\.comAgent
|
39
|
+
2ip\.ru
|
40
|
+
404checker
|
41
|
+
404enemy
|
42
|
+
7Siters
|
43
|
+
80legs
|
44
|
+
a\.pr-cy\.ru
|
42
45
|
a3logics\.in
|
43
46
|
A6-Indexer
|
44
|
-
a\.pr-cy\.ru
|
45
47
|
Abonti
|
46
48
|
Aboundex
|
47
49
|
aboutthedomain
|
@@ -61,6 +63,7 @@ AHC
|
|
61
63
|
aihit
|
62
64
|
aiohttp\/
|
63
65
|
Airmail
|
66
|
+
Akamai_Site_Analyzer
|
64
67
|
akka-http\/
|
65
68
|
akula\/
|
66
69
|
alertra
|
@@ -90,13 +93,15 @@ AportWorm\/[0-9]
|
|
90
93
|
AppBeat\/[0-9]
|
91
94
|
AppEngine-Google
|
92
95
|
AppStoreScraperZ
|
96
|
+
Aprc\/[0-9]
|
93
97
|
Arachmo
|
94
98
|
arachnode
|
95
99
|
Arachnophilia
|
96
100
|
aria2
|
97
101
|
Arukereso
|
98
|
-
asafaweb
|
102
|
+
asafaweb\.com
|
99
103
|
AskQuickly
|
104
|
+
Ask Jeeves
|
100
105
|
ASPSeek
|
101
106
|
Asterias
|
102
107
|
Astute
|
@@ -108,12 +113,14 @@ axios\/
|
|
108
113
|
B-l-i-t-z-B-O-T
|
109
114
|
Backlink-Ceck
|
110
115
|
backlink-check
|
116
|
+
BacklinkHttpStatus
|
111
117
|
BackStreet
|
112
118
|
BackWeb
|
113
119
|
Bad-Neighborhood
|
114
120
|
Badass
|
115
121
|
baidu\.com
|
116
122
|
Bandit
|
123
|
+
basicstate
|
117
124
|
BatchFTP
|
118
125
|
Battleztar\ Bazinga
|
119
126
|
baypup\/[0-9]
|
@@ -150,6 +157,7 @@ Braintree-Webhooks
|
|
150
157
|
Branch Metrics API
|
151
158
|
Branch-Passthrough
|
152
159
|
Brandprotect
|
160
|
+
BrandVerity\/[0-9]
|
153
161
|
Brandwatch
|
154
162
|
Brodie\/
|
155
163
|
Browsershots
|
@@ -165,12 +173,13 @@ BuzzSumo
|
|
165
173
|
CAAM\/[0-9]
|
166
174
|
CakePHP
|
167
175
|
Calculon
|
176
|
+
Canary%20Mail
|
168
177
|
CapsuleChecker
|
169
178
|
CaretNail
|
170
179
|
catexplorador
|
171
|
-
cb crawl
|
172
180
|
CC Metadata Scaper
|
173
181
|
Cegbfeieh
|
182
|
+
censys
|
174
183
|
Cerberian Drtrs
|
175
184
|
CERT\.at-Statistics-Survey
|
176
185
|
cg-eye
|
@@ -184,6 +193,7 @@ ChinaClaw
|
|
184
193
|
Chirp\/[0-9]
|
185
194
|
chkme\.com
|
186
195
|
Chlooe
|
196
|
+
Chromaxa
|
187
197
|
CirrusExplorer\/
|
188
198
|
CISPA Vulnerability Notification
|
189
199
|
Citoid
|
@@ -210,19 +220,20 @@ CopyRightCheck
|
|
210
220
|
Copyscape
|
211
221
|
Cosmos4j\.feedback
|
212
222
|
Covario-IDS
|
213
|
-
CrawlForMe\/[0-9]
|
214
223
|
Crescent
|
215
|
-
cron-job\.org
|
216
224
|
Crowsnest
|
225
|
+
Criteo
|
217
226
|
CSHttp
|
218
227
|
curb
|
219
228
|
Curious George
|
220
229
|
curl
|
221
230
|
cuwhois\/[0-9]
|
222
231
|
cybo\.com
|
232
|
+
DAP\/NetHTTP
|
223
233
|
DareBoost
|
224
234
|
DatabaseDriverMysqli
|
225
235
|
DataCha0s
|
236
|
+
Datafeedwatch
|
226
237
|
Datanyze
|
227
238
|
DataparkSearch
|
228
239
|
dataprovider
|
@@ -237,6 +248,7 @@ Digg
|
|
237
248
|
Digincore
|
238
249
|
DigitalPebble
|
239
250
|
Dirbuster
|
251
|
+
Disqus\/
|
240
252
|
Dispatch\/
|
241
253
|
DittoSpyder
|
242
254
|
dlvr
|
@@ -267,16 +279,16 @@ ec2linkfinder
|
|
267
279
|
eCairn-Grabber
|
268
280
|
eCatch
|
269
281
|
ECCP
|
270
|
-
echocrawl
|
271
282
|
eContext\/
|
272
283
|
Ecxi
|
273
284
|
EirGrabber
|
274
285
|
ElectricMonk
|
275
286
|
elefent
|
276
287
|
EMail Exractor
|
277
|
-
Email%20Extractor%20Lite
|
278
288
|
EMail\ Wolf
|
289
|
+
Email%20Extractor
|
279
290
|
EmailWolf
|
291
|
+
Embarcadero
|
280
292
|
Embed PHP Library
|
281
293
|
Embedly
|
282
294
|
endo\/
|
@@ -289,8 +301,7 @@ Evrinid
|
|
289
301
|
ExactSearch
|
290
302
|
ExaleadCloudview
|
291
303
|
Excel\/
|
292
|
-
|
293
|
-
ExperianCrawlUK
|
304
|
+
exif
|
294
305
|
Exploratodo
|
295
306
|
Express WebPictures
|
296
307
|
ExtractorPro
|
@@ -304,6 +315,7 @@ Faraday v
|
|
304
315
|
fasthttp
|
305
316
|
Faveeo
|
306
317
|
Favicon downloader
|
318
|
+
faviconkit
|
307
319
|
FavOrg
|
308
320
|
Feed Wrangler
|
309
321
|
Feedable\/
|
@@ -337,6 +349,7 @@ Flunky
|
|
337
349
|
flynxapp
|
338
350
|
forensiq
|
339
351
|
FoundSeoTool\/[0-9]
|
352
|
+
http:\/\/www.neomo.de\/
|
340
353
|
free thumbnails
|
341
354
|
Freeuploader
|
342
355
|
FreeWebMonitoring SiteChecker
|
@@ -349,10 +362,12 @@ geek-tools
|
|
349
362
|
Genderanalyzer
|
350
363
|
Genieo
|
351
364
|
GentleSource
|
365
|
+
GetCode
|
352
366
|
Getintent
|
353
367
|
GetLinkInfo
|
354
368
|
getprismatic\.com
|
355
369
|
GetRight
|
370
|
+
getroot
|
356
371
|
GetURLInfo\/[0-9]
|
357
372
|
GetWeb
|
358
373
|
Ghost Inspector
|
@@ -362,15 +377,16 @@ github-camo
|
|
362
377
|
github\.com\/
|
363
378
|
Go [\d\.]* package http
|
364
379
|
Go http package
|
365
|
-
Go!Zilla
|
366
380
|
Go-Ahead-Got-It
|
367
381
|
Go-http-client
|
382
|
+
Go!Zilla
|
368
383
|
gobyus
|
369
384
|
gofetch
|
370
385
|
GomezAgent
|
371
386
|
gooblog
|
372
387
|
Goodzer\/[0-9]
|
373
|
-
|
388
|
+
Google AppsViewer
|
389
|
+
Google Desktop
|
374
390
|
Google favicon
|
375
391
|
Google Keyword Suggestion
|
376
392
|
Google Keyword Tool
|
@@ -378,6 +394,7 @@ Google Page Speed Insights
|
|
378
394
|
Google PP Default
|
379
395
|
Google Search Console
|
380
396
|
Google Web Preview
|
397
|
+
google_partner_monitoring
|
381
398
|
Google-Adwords
|
382
399
|
Google-Apps-Script
|
383
400
|
Google-Calendar-Importer
|
@@ -388,10 +405,13 @@ Google-SearchByImage
|
|
388
405
|
Google-Site-Verification
|
389
406
|
Google-Structured-Data-Testing-Tool
|
390
407
|
Google-Youtube-Links
|
391
|
-
|
408
|
+
google-xrawler
|
409
|
+
GoogleCloudMonitoring
|
392
410
|
GoogleDocs
|
393
411
|
GoogleHC\/
|
394
412
|
GoogleProducer
|
413
|
+
GoogleSites
|
414
|
+
Google-Transparency-Report
|
395
415
|
Gookey
|
396
416
|
GoScraper
|
397
417
|
GoSpotCheck
|
@@ -404,6 +424,8 @@ GrabNet
|
|
404
424
|
Grafula
|
405
425
|
Grammarly
|
406
426
|
GrapeFX
|
427
|
+
Gregarius
|
428
|
+
GRequests
|
407
429
|
grokkit
|
408
430
|
grouphigh
|
409
431
|
grub-client
|
@@ -415,14 +437,17 @@ gvfs\/
|
|
415
437
|
HAA(A)?RTLAND http client
|
416
438
|
Haansoft
|
417
439
|
hackney\/
|
440
|
+
Hadi Agent
|
418
441
|
Hatena
|
419
442
|
Havij
|
420
443
|
hawkReader
|
444
|
+
HeadlessChrome
|
421
445
|
HEADMasterSEO
|
422
446
|
HeartRails_Capture
|
423
447
|
help@dataminr\.com
|
424
448
|
heritrix
|
425
449
|
historious\/
|
450
|
+
hkedcity
|
426
451
|
hledejLevne\.cz\/[0-9]
|
427
452
|
Hloader
|
428
453
|
HMView
|
@@ -436,6 +461,10 @@ ht:\/\/check
|
|
436
461
|
htdig
|
437
462
|
HTMLparser
|
438
463
|
htmlyse\.com
|
464
|
+
HTTP Banner Detection
|
465
|
+
HTTP_Compression_Test
|
466
|
+
http_request2
|
467
|
+
http_requester
|
439
468
|
http-get
|
440
469
|
HTTP-Header-Abfrage
|
441
470
|
http-kit
|
@@ -443,22 +472,20 @@ http-request\/
|
|
443
472
|
HTTP-Tiny
|
444
473
|
HTTP::Lite
|
445
474
|
http\.rb\/
|
446
|
-
|
447
|
-
http_request2
|
448
|
-
http_requester
|
475
|
+
http_get
|
449
476
|
HttpComponents
|
450
477
|
httphr
|
451
478
|
HTTPMon
|
479
|
+
httpRequest
|
452
480
|
httpscheck
|
453
481
|
httpssites_power
|
454
482
|
httpunit
|
455
483
|
HttpUrlConnection
|
456
484
|
httrack
|
457
485
|
huaweisymantec
|
458
|
-
HubPages.*crawlingpolicy
|
459
486
|
HubSpot
|
460
487
|
Humanlinks
|
461
|
-
HyperZbozi
|
488
|
+
HyperZbozi\.cz Feeder
|
462
489
|
i2kconnect\/
|
463
490
|
Iblog
|
464
491
|
ichiro
|
@@ -469,11 +496,12 @@ IDwhois\/[0-9]
|
|
469
496
|
Iframely
|
470
497
|
igdeSpyder
|
471
498
|
IlTrovatore
|
472
|
-
ImageVisu\/
|
473
499
|
Image\ Fetch
|
474
500
|
Image\ Sucker
|
475
501
|
ImageEngine\/
|
502
|
+
ImageVisu\/
|
476
503
|
Imagga
|
504
|
+
imagineeasy
|
477
505
|
imgsizer
|
478
506
|
InAGist
|
479
507
|
inbound\.li parser
|
@@ -491,8 +519,8 @@ Integrity
|
|
491
519
|
integromedb
|
492
520
|
Intelliseek
|
493
521
|
InterGET
|
494
|
-
Internet\ Ninja
|
495
522
|
internet_archive
|
523
|
+
Internet\ Ninja
|
496
524
|
InternetSeer
|
497
525
|
internetVista monitor
|
498
526
|
intraVnews
|
@@ -507,7 +535,10 @@ Iria
|
|
507
535
|
Irokez
|
508
536
|
isitup\.org
|
509
537
|
iskanie
|
538
|
+
isUp\.li
|
539
|
+
iThemes Sync\/[0-9]
|
510
540
|
iZSearch
|
541
|
+
JAHHO
|
511
542
|
janforman
|
512
543
|
Jaunt\/
|
513
544
|
Jbrofuzz
|
@@ -555,7 +586,7 @@ LibVLC
|
|
555
586
|
LibWeb
|
556
587
|
Libwhisker
|
557
588
|
libwww
|
558
|
-
Licorne
|
589
|
+
Licorne
|
559
590
|
Liferea\/
|
560
591
|
Lightspeedsystems
|
561
592
|
Likse
|
@@ -590,7 +621,7 @@ LYT\.SR
|
|
590
621
|
mabontland
|
591
622
|
Mag-Net
|
592
623
|
MagpieRSS
|
593
|
-
Mail
|
624
|
+
Mail\.Ru
|
594
625
|
MailChimp
|
595
626
|
Majestic12
|
596
627
|
makecontact\/
|
@@ -605,6 +636,7 @@ Mata\ Hari
|
|
605
636
|
Mediapartners-Google
|
606
637
|
mediawords
|
607
638
|
MegaIndex\.ru
|
639
|
+
MeltwaterNews
|
608
640
|
Melvil Rawi\/
|
609
641
|
MergeFlow-PageReader
|
610
642
|
Metaspinner
|
@@ -619,7 +651,7 @@ Microsoft\ Data\ Access
|
|
619
651
|
MIDown\ tool
|
620
652
|
MIIxpc
|
621
653
|
Mindjet
|
622
|
-
Miniature
|
654
|
+
Miniature\.io\/
|
623
655
|
Miniflux
|
624
656
|
Mister\ PiX
|
625
657
|
mixdata dot com
|
@@ -630,8 +662,8 @@ Mnogosearch
|
|
630
662
|
mogimogi
|
631
663
|
Mojeek
|
632
664
|
Mojolicious \(Perl\)
|
633
|
-
monitis
|
634
665
|
Monit\/
|
666
|
+
monitis
|
635
667
|
Monitority\/[0-9]
|
636
668
|
montastic
|
637
669
|
MonTools
|
@@ -687,6 +719,8 @@ Nmap Scripting Engine
|
|
687
719
|
node-superagent
|
688
720
|
node-urllib\/
|
689
721
|
node\.io
|
722
|
+
Nodemeter
|
723
|
+
NodePing
|
690
724
|
nominet\.org\.uk
|
691
725
|
Norton-Safeweb
|
692
726
|
Notifixious
|
@@ -700,9 +734,9 @@ NYU
|
|
700
734
|
Ocelli\/[0-9]
|
701
735
|
Octopus
|
702
736
|
oegp
|
703
|
-
og-scraper\/
|
704
737
|
Offline Explorer
|
705
738
|
Offline\ Navigator
|
739
|
+
og-scraper\/
|
706
740
|
okhttp
|
707
741
|
Omea Reader
|
708
742
|
omgili
|
@@ -724,9 +758,9 @@ ownCloud News
|
|
724
758
|
OxfordCloudService\/[0-9]
|
725
759
|
Page Analyzer
|
726
760
|
Page Valet
|
727
|
-
page2rss
|
728
|
-
page\ scorer
|
729
761
|
page_verifier
|
762
|
+
page\ scorer
|
763
|
+
page2rss
|
730
764
|
PageAnalyzer
|
731
765
|
PageGrabber
|
732
766
|
PagePeeker
|
@@ -740,7 +774,6 @@ Pavuk
|
|
740
774
|
PayPal IPN
|
741
775
|
pcBrowser
|
742
776
|
Pcore-HTTP
|
743
|
-
PEAR HTTPRequest
|
744
777
|
Pearltrees
|
745
778
|
PECL::HTTP
|
746
779
|
peerindex
|
@@ -750,7 +783,6 @@ Perlu -
|
|
750
783
|
PhantomJS Screenshoter
|
751
784
|
PhantomJS\/
|
752
785
|
Photon\/
|
753
|
-
phpcrawl
|
754
786
|
phpservermon
|
755
787
|
Pi-Monster
|
756
788
|
Picscout
|
@@ -767,10 +799,10 @@ pinterest\.com
|
|
767
799
|
Pixray
|
768
800
|
Pizilla
|
769
801
|
Plagger\/
|
770
|
-
PleaseCrawl
|
771
802
|
Ploetz \+ Zeller
|
772
803
|
Plukkie
|
773
804
|
plumanalytics
|
805
|
+
PocketImageCache
|
774
806
|
PocketParser
|
775
807
|
Pockey
|
776
808
|
POE-Component-Client-HTTP
|
@@ -788,7 +820,7 @@ PritTorrent\/[0-9]
|
|
788
820
|
Prlog
|
789
821
|
probethenet
|
790
822
|
Project 25499
|
791
|
-
Promotion_Tools_www
|
823
|
+
Promotion_Tools_www\.searchenginepromotionhelp\.com
|
792
824
|
prospectb2b
|
793
825
|
Protopage
|
794
826
|
ProWebWalker
|
@@ -806,15 +838,16 @@ Qirina Hurdler
|
|
806
838
|
QQDownload
|
807
839
|
QrafterPro
|
808
840
|
Qseero
|
809
|
-
Qualidator
|
841
|
+
Qualidator\.com SiteAnalyzer
|
810
842
|
QueryN\ Metasearch
|
843
|
+
queuedriver
|
811
844
|
Quora Link Preview
|
812
845
|
Qwantify
|
813
846
|
Radian6
|
814
847
|
RankActive
|
815
848
|
RankFlex
|
816
849
|
RankSonicSiteAuditor
|
817
|
-
|
850
|
+
Re-re Studio
|
818
851
|
Readability
|
819
852
|
RealDownload
|
820
853
|
RealPlayer%20Downloader
|
@@ -835,28 +868,31 @@ Robosourcer
|
|
835
868
|
Robozilla\/[0-9]
|
836
869
|
ROI Hunter
|
837
870
|
RPT-HTTPClient
|
871
|
+
rss reader
|
838
872
|
RSSOwl
|
839
873
|
RssReader\/
|
840
874
|
safe-agent-scanner
|
841
875
|
SalesIntelligent
|
842
876
|
Saleslift
|
877
|
+
Sendsay\.Ru
|
843
878
|
SauceNAO
|
844
879
|
SBIder
|
845
880
|
scalaj-http
|
846
881
|
scan\.lol
|
847
882
|
ScanAlert
|
848
|
-
ScrapeBox Page Scanner
|
849
883
|
Scoop
|
850
884
|
scooter
|
851
885
|
ScoutJet
|
852
886
|
ScoutURLMonitor
|
887
|
+
ScrapeBox Page Scanner
|
888
|
+
SimpleScraper
|
853
889
|
Scrapy
|
854
890
|
Screaming
|
855
891
|
ScreenShotService\/[0-9]
|
856
892
|
Scrubby
|
857
893
|
Scrutiny\/
|
858
|
-
Search37\/
|
859
894
|
search\.thunderstone
|
895
|
+
Search37\/
|
860
896
|
Searchestate
|
861
897
|
SearchExpress
|
862
898
|
SearchSight
|
@@ -868,19 +904,20 @@ Semrush
|
|
868
904
|
sentry\/
|
869
905
|
SEO Browser
|
870
906
|
Seo Servis
|
871
|
-
seo-nastroj
|
907
|
+
seo-nastroj\.cz
|
908
|
+
seo4ajax
|
872
909
|
Seobility
|
873
910
|
SEOCentro
|
874
911
|
SeoCheck
|
875
912
|
SEOkicks
|
876
913
|
Seomoz
|
877
914
|
SEOprofiler
|
915
|
+
SEOsearch\/
|
878
916
|
SeopultContentAnalyzer
|
879
917
|
seoscanners
|
880
918
|
SEOstats
|
881
919
|
Server Density Service Monitoring
|
882
920
|
servernfo\.com
|
883
|
-
SetCronJob\/
|
884
921
|
sexsearcher
|
885
922
|
Seznam
|
886
923
|
Shelob
|
@@ -897,8 +934,8 @@ SimplyFast
|
|
897
934
|
Siphon
|
898
935
|
SISTRIX
|
899
936
|
Site-Shot\/
|
900
|
-
Site24x7
|
901
937
|
Site\ Sucker
|
938
|
+
Site24x7
|
902
939
|
SiteBar
|
903
940
|
Sitebeam
|
904
941
|
Sitebulb\/
|
@@ -908,6 +945,7 @@ SiteGuardian
|
|
908
945
|
Siteimprove
|
909
946
|
SiteIndexed
|
910
947
|
Sitemap(s)? Generator
|
948
|
+
SitemapGenerator
|
911
949
|
SiteMonitor
|
912
950
|
Siteshooter B0t
|
913
951
|
SiteSnagger
|
@@ -923,9 +961,10 @@ SlySearch
|
|
923
961
|
SmartDownload
|
924
962
|
SMRF URL Expander
|
925
963
|
SMUrlExpander
|
926
|
-
Snarfer\/
|
927
964
|
Snake
|
928
965
|
Snappy
|
966
|
+
SnapSearch
|
967
|
+
Snarfer\/
|
929
968
|
SniffRSS
|
930
969
|
sniptracker
|
931
970
|
Snoopy
|
@@ -935,6 +974,7 @@ SortSite
|
|
935
974
|
Sottopop
|
936
975
|
sovereign\.ai
|
937
976
|
SpaceBison
|
977
|
+
SpamExperts
|
938
978
|
Spammen
|
939
979
|
Spanner
|
940
980
|
spaziodati
|
@@ -956,8 +996,9 @@ Statastico\/
|
|
956
996
|
StatusCake
|
957
997
|
Steeler
|
958
998
|
Stratagems Kumo
|
959
|
-
Stroke
|
999
|
+
Stroke\.cz
|
960
1000
|
StudioFACA
|
1001
|
+
StumbleUpon
|
961
1002
|
suchen
|
962
1003
|
Sucuri
|
963
1004
|
summify
|
@@ -976,6 +1017,7 @@ T0PHackTeam
|
|
976
1017
|
tAkeOut
|
977
1018
|
Tarantula\/
|
978
1019
|
Taringa UGC
|
1020
|
+
TarmotGezgin
|
979
1021
|
Teleport
|
980
1022
|
Telesoft
|
981
1023
|
Telesphoreo
|
@@ -995,18 +1037,21 @@ theoldreader\.com
|
|
995
1037
|
Thinklab
|
996
1038
|
Thumbshots
|
997
1039
|
ThumbSniper
|
1040
|
+
timewe\.net
|
998
1041
|
TinEye
|
999
1042
|
Tiny Tiny RSS
|
1000
1043
|
TLSProbe\/
|
1001
1044
|
Toata
|
1002
1045
|
topster
|
1003
|
-
touche
|
1004
|
-
Traackr
|
1046
|
+
touche\.com
|
1047
|
+
Traackr\.com
|
1048
|
+
tracemyfile
|
1005
1049
|
TrapitAgent
|
1006
1050
|
Trendiction
|
1007
|
-
Trendsmap
|
1051
|
+
Trendsmap
|
1008
1052
|
trendspottr\.com
|
1009
1053
|
truwoGPS
|
1054
|
+
TryJsoup
|
1010
1055
|
TulipChain
|
1011
1056
|
Turingos
|
1012
1057
|
Turnitin
|
@@ -1021,7 +1066,6 @@ Twisted PageGetter
|
|
1021
1066
|
Typhoeus
|
1022
1067
|
ubermetrics-technologies
|
1023
1068
|
uclassify
|
1024
|
-
uCrawlr\/
|
1025
1069
|
UdmSearch
|
1026
1070
|
unirest-java
|
1027
1071
|
UniversalFeedParser
|
@@ -1034,7 +1078,7 @@ Upflow
|
|
1034
1078
|
Uptimia
|
1035
1079
|
URL Verifier
|
1036
1080
|
URLChecker
|
1037
|
-
URLitor
|
1081
|
+
URLitor\.com
|
1038
1082
|
urlresolver
|
1039
1083
|
Urlstat
|
1040
1084
|
UrlTrends Ranking Updater
|
@@ -1059,16 +1103,18 @@ VSB-TUO\/[0-9]
|
|
1059
1103
|
Vulnbusters Meter
|
1060
1104
|
VYU2
|
1061
1105
|
w3af\.org
|
1062
|
-
W3C-checklink
|
1063
|
-
W3C-mobileOK
|
1064
1106
|
W3C_I18n-Checker
|
1065
1107
|
W3C_Unicorn
|
1108
|
+
W3C-checklink
|
1109
|
+
W3C-mobileOK
|
1110
|
+
WAC-OFU
|
1066
1111
|
Wallpapers\/[0-9]+
|
1067
1112
|
WallpapersHD
|
1068
1113
|
wangling
|
1069
1114
|
Wappalyzer
|
1070
1115
|
WatchMouse
|
1071
1116
|
WbSrch\/
|
1117
|
+
WDT\.io
|
1072
1118
|
web-capture\.net
|
1073
1119
|
Web-Monitoring
|
1074
1120
|
Web-sniffer
|
@@ -1089,6 +1135,7 @@ webcollage
|
|
1089
1135
|
WebCookies
|
1090
1136
|
WebCopier
|
1091
1137
|
WebCorp
|
1138
|
+
WebDataStats\/[0-9]
|
1092
1139
|
WebDoc
|
1093
1140
|
WebEnhancer
|
1094
1141
|
WebFetch
|
@@ -1121,7 +1168,6 @@ Webthumb\/[0-9]
|
|
1121
1168
|
WebThumbnail
|
1122
1169
|
WebWhacker
|
1123
1170
|
WebZIP
|
1124
|
-
WeCrawlForThePeace
|
1125
1171
|
WeLikeLinks
|
1126
1172
|
WEPA
|
1127
1173
|
WeSEE
|
@@ -1136,7 +1182,6 @@ Whibse
|
|
1136
1182
|
WhoRunsCoinHive
|
1137
1183
|
Whynder Magnet
|
1138
1184
|
Windows-RSS-Platform
|
1139
|
-
WinHttpRequest
|
1140
1185
|
WinPodder
|
1141
1186
|
wkhtmlto
|
1142
1187
|
wmtips
|
@@ -1153,6 +1198,7 @@ wscheck
|
|
1153
1198
|
Wtrace
|
1154
1199
|
WWW-Collector-E
|
1155
1200
|
WWW-Mechanize
|
1201
|
+
WWW::Document
|
1156
1202
|
WWW::Mechanize
|
1157
1203
|
www\.monitor\.us
|
1158
1204
|
WWWOFFLE
|
@@ -1161,9 +1207,9 @@ x22Mozilla
|
|
1161
1207
|
XaxisSemanticsClassifier
|
1162
1208
|
Xenu Link Sleuth
|
1163
1209
|
XING-contenttabreceiver\/[0-9]
|
1164
|
-
XmlSitemapGenerator
|
1165
1210
|
xpymep([0-9]?)\.exe
|
1166
1211
|
Y!J-(ASR|BSC)
|
1212
|
+
Y\!J-BRW
|
1167
1213
|
Yaanb
|
1168
1214
|
yacy
|
1169
1215
|
Yahoo Ad monitoring
|
@@ -1180,20 +1226,23 @@ yoogliFetchAgent
|
|
1180
1226
|
YottaaMonitor
|
1181
1227
|
Your-Website-Sucks\/[0-9]
|
1182
1228
|
yourls\.org
|
1229
|
+
YoYs\.net
|
1230
|
+
YP\.PL
|
1231
|
+
Zabbix
|
1183
1232
|
Zade
|
1184
1233
|
Zao
|
1185
1234
|
Zauba
|
1186
1235
|
Zemanta Aggregator
|
1187
|
-
Zend\\\\Http\\\\Client
|
1188
1236
|
Zend_Http_Client
|
1237
|
+
Zend\\\Http\\\Client
|
1189
1238
|
Zermelo
|
1190
1239
|
Zeus
|
1191
1240
|
zgrab
|
1192
1241
|
ZnajdzFoto
|
1193
1242
|
Zombie\.js
|
1243
|
+
Zoom\.Mac
|
1194
1244
|
ZyBorg
|
1195
|
-
|
1196
|
-
[a-z0-9\-_]*(bot|crawler|archiver|transcoder|spider|uptime|validator|fetcher)
|
1245
|
+
[a-z0-9\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron)
|
1197
1246
|
].strip.split(/\n+/).freeze
|
1198
1247
|
end
|
1199
1248
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crawler_detect
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pavel Kozlov
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -28,14 +28,14 @@ dependencies:
|
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- - "
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '10.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- - "
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
@@ -166,7 +166,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
166
|
version: '0'
|
167
167
|
requirements: []
|
168
168
|
rubyforge_project:
|
169
|
-
rubygems_version: 2.7.
|
169
|
+
rubygems_version: 2.7.6
|
170
170
|
signing_key:
|
171
171
|
specification_version: 4
|
172
172
|
summary: 'CrawlerDetect: detect bots/crawlers'
|