crawler_detect 0.1.11 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,57 +2,13 @@
2
2
 
3
3
  module CrawlerDetect
4
4
  module Library
5
+ # @since 0.1.0
5
6
  module Exclusions
6
- EXCLUSIONS = %q[
7
- Safari.[\d\.]*
8
- Firefox.[\d\.]*
9
- Chrome.[\d\.]*
10
- Chromium.[\d\.]*
11
- MSIE.[\d\.]
12
- Opera\/[\d\.]*
13
- Mozilla.[\d\.]*
14
- AppleWebKit.[\d\.]*
15
- Trident.[\d\.]*
16
- Windows NT.[\d\.]*
17
- Android [\d\.]*
18
- Macintosh.
19
- Ubuntu
20
- Linux
21
- [ ]Intel
22
- Mac OS X [\d_]*
23
- (like )?Gecko(.[\d\.]*)?
24
- KHTML,
25
- CriOS.[\d\.]*
26
- CPU iPhone OS ([0-9_])* like Mac OS X
27
- CPU OS ([0-9_])* like Mac OS X
28
- iPod
29
- compatible
30
- x86_..
31
- i686
32
- x64
33
- X11
34
- rv:[\d\.]*
35
- Version.[\d\.]*
36
- WOW64
37
- Win64
38
- Dalvik.[\d\.]*
39
- \.NET CLR [\d\.]*
40
- Presto.[\d\.]*
41
- Media Center PC
42
- BlackBerry
43
- Build
44
- Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.
45
- Opera
46
- \.NET[\d\.]*
47
- cubot
48
- ; M bot
49
- ; CRONO
50
- ; B bot
51
- ; IDbot
52
- ; ID bot
53
- ; POWER BOT
54
- ;
55
- ].strip.split(/\n+/).freeze
7
+ extend Loader
8
+
9
+ def self.data
10
+ @data ||= load_raw(CrawlerDetect.config.settings.raw_exclusions_path).freeze
11
+ end
56
12
  end
57
13
  end
58
14
  end
@@ -2,24 +2,13 @@
2
2
 
3
3
  module CrawlerDetect
4
4
  module Library
5
+ # @since 0.1.0
5
6
  module Headers
6
- HEADERS = [
7
- # The default User-Agent string.
8
- "HTTP_USER_AGENT",
9
- # Header can occur on devices using Opera Mini.
10
- "HTTP_X_OPERAMINI_PHONE_UA",
11
- # Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
12
- "HTTP_X_DEVICE_USER_AGENT",
13
- "HTTP_X_ORIGINAL_USER_AGENT",
14
- "HTTP_X_SKYFIRE_PHONE",
15
- "HTTP_X_BOLT_PHONE_UA",
16
- "HTTP_DEVICE_STOCK_UA",
17
- "HTTP_X_UCBROWSER_DEVICE_UA",
18
- # Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address
19
- "HTTP_FROM",
20
- # Seen in use by Netsparker
21
- "HTTP_X_SCANNER",
22
- ].freeze
7
+ extend Loader
8
+
9
+ def self.data
10
+ @data ||= load_raw(CrawlerDetect.config.settings.raw_headers_path).freeze
11
+ end
23
12
  end
24
13
  end
25
14
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ module Library
5
+ # since 1.0.0
6
+ module Loader
7
+ # Load JSON raw file
8
+ def load_raw(path)
9
+ JSON.parse(File.read(path))
10
+ end
11
+
12
+ # Remove cached raw data
13
+ def reload_data
14
+ remove_instance_variable(:@data) if instance_variable_defined?(:@data)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1 @@
1
+ [" YLT","^b0t$","^bluefish ","^Calypso v\\\/","^Corax","^COMODO DCV","^DangDang","^DavClnt","^DHSH","^FDM ","^git\\\/","^Goose\\\/","^Grabber","^HTTPClient\\\/","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mail\\\/","^Mget","^Microsoft URL Control","^NG\\\/[0-9\\.]","^NING\\\/","^Nuclei","^PHP\\\/","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^Swurl ","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","008\\\/","13TABS","192\\.comAgent","2GDPR\\\/","2ip\\.ru","404enemy","7Siters","80legs","a\\.pr-cy\\.ru","a3logics\\.in","A6-Indexer","Abonti","Aboundex","aboutthedomain","Accoona-AI-Agent","acebookexternalhit\\\/","acoon","acrylicapps\\.com\\\/pulp","Acunetix","AdAuth\\\/","adbeat","AddThis","ADmantX","AdminLabs","adressendeutschland","adreview\\\/","adscanner","Adstxtaggregator","adstxt-worker","adstxt\\.com","AffiliateLabz\\\/","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","Alligator","allloadin","AllSubmitter","alyze\\.info","amagit","^Amazon Simple Notification Service Agent$","Anarchie","AndroidDownloadManager","Anemone","AngleSharp","annotate_google","Anthill","Ant\\.com","Anturis Agent","AnyEvent-HTTP\\\/","Apache Droid","Apache OpenOffice","Apache-HttpAsyncClient","Apache-HttpClient","ApacheBench","Apexoo","apimon\\.de","APIs-Google","AportWorm\\\/","AppBeat\\\/","AppEngine-Google","AppleSyndication","Aprc\\\/[0-9]","Arachmo","arachnode","Arachnophilia","aria2","Arukereso","asafaweb","Asana\\\/","AskQuickly","Ask Jeeves","ASPSeek","Asterias","Astute","asynchttp","Attach","attohttpc","autocite","AutomaticWPTester","Autonomy","axios\\\/","AWS Security Scanner","B-l-i-t-z-B-O-T","Backlink-Ceck","backlink-check","BacklinkHttpStatus","BackStreet","BackupLand","BackWeb","Bad-Neighborhood","Badass","baidu\\.com","Bandit","basicstate","BatchFTP","Battleztar Bazinga","baypup\\\/","BazQux","BBBike","BCKLINKS","BDFetch","BegunAdvertising","Bewica-security-scan","Bidtellect","BigBozz","Bigfoot","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","Bitacle","biz_Directory","BKCTwitterUnshortener\\\/","Black Hole","Blackboard Safeassign","BlackWidow","BlockNote\\.Net","BlogBridge","Bloglines","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","BlowFish","boitho\\.com-dc","Boost\\.Beast","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brandprotect","BrandVerity","Brandwatch","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Buddy","BuiltWith","Bullseye","BunnySlippers","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","CakePHP","Calculon","Canary%20Mail","CaretNail","catexplorador","CC Metadata Scaper","Cegbfeieh","censys","centuryb.o.t9[at]gmail.com","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cf-facebook","cg-eye","changedetection","ChangesMeter","Charlotte","CheckHost","checkprivacy","CherryPicker","ChinaClaw","Chirp\\\/","chkme\\.com","Chlooe","Chromaxa","CirrusExplorer","CISPA Vulnerability Notification","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping","CloudEndure","CloudFlare-AlwaysOnline","Cloudflare-Healthchecks","Cloudinary","cmcm\\.com","coccoc","cognitiveseo","colly -","CommaFeed","Commons-HttpClient","commonscan","contactbigdatafr","contentkingapp","Contextual Code Sites Explorer","convera","CookieReports","copyright sheriff","CopyRightCheck","Copyscape","cortex\\\/","Cosmos4j\\.feedback","Covario-IDS","Craw\\\/","Crescent","Crowsnest","Criteo","CSHttp","CSSCheck","Cula\\\/","curb","Curious George","curl","cuwhois\\\/","cybo\\.com","DAP\\\/NetHTTP","DareBoost","DatabaseDriverMysqli","DataCha0s","Datafeedwatch","Datanyze","DataparkSearch","dataprovider","DataXu","Daum(oa)?[ \\\/][0-9]","dBpoweramp","ddline","deeris","delve\\.ai","Demon","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Devil","Digg","Digincore","DigitalPebble","Dirbuster","Discourse Forum Onebox","Disqus\\\/","Dispatch\\\/","DittoSpyder","dlvr","DMBrowser","DNSPod-reporting","docoloc","Dolphin http client","DomainAppender","DomainLabz","Domains Project\\\/","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","Download Wonder","downnotifier","DowntimeDetector","Drip","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","DTS Agent","dubaiindex","DuplexWeb-Google","DynatraceSynthetic","EARTHCOM","Easy-Thumb","EasyDL","Ebingbong","ec2linkfinder","eCairn-Grabber","eCatch","ECCP","eContext\\\/","Ecxi","EirGrabber","ElectricMonk","elefent","EMail Exractor","EMail Wolf","EmailWolf","Embarcadero","Embed PHP Library","Embedly","endo\\\/","europarchive\\.org","evc-batch","EventMachine HttpClient","Everwall Link Expander","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","exif","ExoRank","Exploratodo","Express WebPictures","Extreme Picture Finder","EyeNetIE","ezooms","facebookexternalhit","facebookexternalua","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","faviconkit","faviconarchive","FavOrg","Feed Wrangler","Feedable\\\/","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/","FeedBurner","feeder","Feedly","Feedshow\\\/","FeedshowOnline","Feedspot","FeedViewer\\\/","Feedwind\\\/","FeedZcollector","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","FHscan","Fiery%20Feeds","Filestack","Fimap","findlink","findthatfile","FlashGet","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","Flock\\\/","Florienzh\\\/","fluffy","Flunky","flynxapp","forensiq","FoundSeoTool","http:\\\/\\\/www.neomo.de\\\/","free thumbnails","Freeuploader","FreshRSS","Funnelback","Fuzz Faster U Fool","G-i-g-a-b-o-t","g00g1e\\.net","ganarvisitas","gdnplus\\.com","geek-tools","Genieo","GentleSource","GetCode","Getintent","GetLinkInfo","getprismatic","GetRight","getroot","GetURLInfo\\\/","GetWeb","Geziyor","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","GitHub-Hookshot","github\\.com","Goldfire Server","Go [\\d\\.]* package http","Go http package","Go-Ahead-Got-It","Go-http-client","Go!Zilla","gobyus","Gofeed","gofetch","GomezAgent","gooblog","Goodzer\\\/","Google AppsViewer","Google Desktop","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google-Podcast","Google PP Default","Google Search Console","Google Web Preview","Google-Ads-Overview","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-Publisher-Plugin","Google-Read-Aloud","Google-SearchByImage","Google-Site-Verification","Google-speakr","Google-Structured-Data-Testing-Tool","Google-Youtube-Links","google-xrawler","GoogleDocs","GoogleHC\\\/","GoogleProducer","GoogleSites","Google-Transparency-Report","Gookey","GoSpotCheck","gosquared-thumbnailer","Gotit","GoZilla","grabify","GrabNet","Grafula","Grammarly","GrapeFX","GreatNews","Gregarius","GRequests","grokkit","grouphigh","grub-client","gSOAP\\\/","GT::WWW","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","Haansoft","hackney\\\/","Hadi Agent","HappyApps-WebCheck","Hatena","Havij","HaxerMen","HeadlessChrome","HEADMasterSEO","HeartRails_Capture","help@dataminr\\.com","heritrix","Hexometer","historious","hkedcity","hledejLevne\\.cz","Hloader","HMView","Holmes","HonesoSearchEngine","HootSuite Image proxy","Hootsuite-WebFeed","hosterstats","HostTracker","ht:\\\/\\\/check","htdig","HTMLparser","htmlyse","HTTP Banner Detection","HTTP_Compression_Test","http_request2","http_requester","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP::Lite","http\\.rb\\\/","http_get","HttpComponents","httphr","HTTPMon","HTTPie","httpRequest","httpscheck","httpssites_power","httpunit","HttpUrlConnection","httrack","huaweisymantec","HubSpot ","HubSpot-Link-Resolver","Humanlinks","i2kconnect\\\/","Iblog","ichiro","Id-search","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/","Iframely","igdeSpyder","iGooglePortal","IlTrovatore","Image Fetch","Image Sucker","ImageEngine\\\/","ImageVisu\\\/","Imagga","imagineeasy","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","Indy Library","InetURL","infegy","infohelfer","InfoTekies","InfoWizards Reciprocal Link","inpwrd\\.com","instabid","Instapaper","Integrity","integromedb","Intelliseek","InterGET","internet_archive","Internet Ninja","InternetSeer","internetVista monitor","internetwache","intraVnews","IODC","IOI","iplabel","ips-agent","IPS\\\/[0-9]","IPWorks HTTP\\\/S Component","iqdb\\\/","Iria","Irokez","isitup\\.org","iskanie","isUp\\.li","iThemes Sync\\\/","IZaBEE","iZSearch","JAHHO","janforman","Jaunt\\\/","Java.*outbrain","javelin\\.io","Jbrofuzz","Jersey\\\/","JetCar","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JolokiaPwn","Joomla","Jorgee","JS-Kit","JustView","Kaspersky Lab CFR link resolver","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Density","Keywords Research","khttp\\\/","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","kubectl","kube-probe","kulturarw3","KumKie","L\\.webis","Larbin","Lavf\\\/","LeechFTP","LeechGet","letsencrypt","Lftp","LibVLC","LibWeb","Libwhisker","libwww","Licorne","Liferea\\\/","Lightspeedsystems","Lighthouse","Likse","limber\\.io","Link Valet","link_thumbnailer","LinkAnalyser","LinkAlarm\\\/","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkScan","LinksManager","LinkPreview","LinkTiger","LinkWalker","Lipperhey","Litemage_walker","livedoor ScreenShot","LoadImpactRload","localsearch-web","LongURL API","longurl-r-package","looid\\.com","looksystems\\.net","ltx71","lua-resty-http","Lush Http Client","lwp-request","lwp-trivial","LWP::Simple","lycos","LYT\\.SR","mabontland","MacOutlook\\\/","Mag-Net","MagpieRSS","Mail\\.Ru","MailChimp","Majestic12","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","MarkMonitor","MarkWatch","Mass Downloader","masscan\\\/","Mata Hari","mattermost","Mediametric","Mediapartners-Google","mediawords","MegaIndex\\.ru","MeltwaterNews","Melvil Rawi","MemGator","Metaspinner","MetaURI","MFC_Tear_Sample","Microsearch","Microsoft\\.Data\\.Mashup","Microsoft Office","Microsoft Outlook","Microsoft Windows Network Diagnostics","Microsoft-WebDAV-MiniRedir","Microsoft Data Access","MIDown tool","MIIxpc","Mindjet","Miniature\\.io","Miniflux","Mister PiX","mixdata dot com","mixed-content-scan","mixnode","Mnogosearch","mogimogi","Mojeek","Mojolicious \\(Perl\\)","Monit\\\/","monitis","Monitority\\\/","montastic","MonTools","Moreover","Morfeus Fucking Scanner","Morning Paper","MovableType","mowser","Mr\\.4x3 Powered","Mrcgiguy","MS Web Services Client Protocol","MSFrontPage","mShots","MuckRack\\\/","muhstik-scan","MVAClient","MxToolbox\\\/","myseosnapshot","nagios","Najdi\\.si","Name Intelligence","NameFo\\.com","Nameprotect","nationalarchives","Navroad","NearSite","Needle","Nessus","Net Vampire","NetAnts","NETCRAFT","NetLyzer","NetMechanic","NetNewsWire","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetTrack","Netvibes","NetZIP","Neustar WPM","NeutrinoAPI","NewRelicPinger","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","NetSystemsResearch","Nexgate Ruby Client","nghttp2","NG-Search","Nibbler","NICErsPRO","Nikto","nineconnections","NLNZ_IAHarvester","Nmap Scripting Engine","node-superagent","node-urllib","node\\.io","Nodemeter","NodePing","nominet\\.org\\.uk","nominet\\.uk","Norton-Safeweb","Notifixious","notifyninja","NotionEmbedder","nuhk","nutch","Nuzzel","nWormFeedFinder","nyawc\\\/","Nymesis","NYU","Ocelli\\\/","Octopus","oegp","Offline Explorer","Offline Navigator","OgScrper","okhttp","omgili","OMSC","Online Domain Tools","OpenCalaisSemanticProxy","Openfind","OpenLinkProfiler","Openstat\\\/","OpenVAS","OPPO A33","Optimizer","Open Source RSS","Orbiter","OrgProbe\\\/","orion-semantics","Outlook-Express","Outlook-iOS","ow\\.ly","Owler","Owlin","ownCloud News","OxfordCloudService","Page Valet","page_verifier","page scorer","page2rss","PageFreezer","PageGrabber","PagePeeker","PageScorer","Pagespeed\\\/","Panopta","panscient","Papa Foto","parsijoo","Pavuk","PayPal IPN","pcBrowser","Pcore-HTTP","PDF24 URL To PDF","Pearltrees","PECL::HTTP","peerindex","Peew","PeoplePal","Perlu -","PhantomJS Screenshoter","PhantomJS\\\/","Photon\\\/","phpservermon","php-requests","Pi-Monster","Picscout","Picsearch","PictureFinder","Pimonster","ping\\.blo\\.gs","Pingability","PingAdmin\\.Ru","Pingdom","Pingoscope","PingSpot","pinterest\\.com","Pixray","Pizilla","Plagger\\\/","Ploetz \\+ Zeller","Plukkie","plumanalytics","PocketImageCache","PocketParser","Pockey","PodcastAddict\\\/","POE-Component-Client-HTTP","Polymail\\\/","Pompos","Porkbun","Port Monitor","postano","PostmanRuntime","postplanner\\.com","PostPost","postrank","PowerPoint\\\/","Prebid","Priceonomics Analysis Engine","PrintFriendly","PritTorrent","Prlog","probethenet","Project ?25499","prospectb2b","Protopage","ProWebWalker","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","Pump","python-httpx","Python-httplib2","python-requests","Python-urllib","Qirina Hurdler","QQDownload","QrafterPro","Qseero","Qualidator","QueryN Metasearch","queuedriver","QuiteRSS","Quora Link Preview","Qwantify","Radian6","Railgun\\\/","RankActive","RankFlex","RankSonicSiteAuditor","Re-re Studio","ReactorNetty","Readability","RealDownload","RealPlayer%20Downloader","RebelMouse","Recorder","RecurPost\\\/","redback\\\/","ReederForMac","Reeder\\\/","ReGet","RepoMonkey","request\\.js","reqwest\\\/","ResponseCodeTest","RestSharp","Riddler","Rival IQ","Robosourcer","Robozilla","ROI Hunter","RPT-HTTPClient","RSSOwl","RSSMix\\\/","RyowlEngine","safe-agent-scanner","SalesIntelligent","Saleslift","Sendsay\\.Ru","SauceNAO","SBIder","sc-downloader","scalaj-http","Scamadviser-Frontend","scan\\.lol","ScanAlert","Scoop","scooter","ScoutJet","ScopeContentAG-HTTP-Client","ScoutURLMonitor","ScrapeBox Page Scanner","Scrapy","Screaming","ScreenShotService","Scrubby","Scrutiny\\\/","search\\.thunderstone","Search37","searchenginepromotionhelp","Searchestate","SearchExpress","SearchSight","SearchWP","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","Semrush","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj\\.cz","seo4ajax","Seobility","SEOCentro","SeoCheck","SEOkicks","SEOlizer","Seomoz","SEOprofiler","SEOsearch","seoscanners","seositecheckup","SEOstats","servernfo","sexsearcher","Seznam","Shelob","Shodan","Shoppimon","ShopWiki","shortURL lengthener","ShortLinkTranslate","shrinktheweb","Sideqik","Siege","SimplePie","SimplyFast","Siphon","SISTRIX","Site-Shot\\\/","Site Sucker","Site24x7","SiteBar","Sitebeam","Sitebulb\\\/","SiteCondor","SiteExplorer","SiteGuardian","Siteimprove","SiteIndexed","Sitemap(s)? Generator","SitemapGenerator","SiteMonitor","Siteshooter B0t","SiteSnagger","SiteSucker","SiteTruth","Sitevigil","sitexy\\.com","SkypeUriPreview","Slack\\\/","slider\\.com","sli-systems\\.com","slurp","SlySearch","SmartDownload","SMRF URL Expander","SMUrlExpander","Snake","Snappy","SnapSearch","Snarfer\\\/","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","Sottopop","sovereign\\.ai","SpaceBison","SpamExperts","Spammen","Spanner","spaziodati","SPDYCheck","Specificfeeds","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","spyonweb","sqlmap","Sqlworm","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","Statically-Screenshot","StatusCake","Steeler","Stratagems Kumo","Stripe\\\/","Stroke\\.cz","StudioFACA","StumbleUpon","suchen","Sucuri","summify","SuperHTTP","Surphace Scout","Suzuran","swcd ","Symfony BrowserKit","Symfony2 BrowserKit","Syndirella\\\/","SynHttpClient-Built","Sysomos","sysscan","Szukacz","T0PHackTeam","tAkeOut","Tarantula\\\/","Taringa UGC","TarmotGezgin","techiaith\\.cymru","Teleport","Telesoft","Telesphoreo","Telesphorep","Tenon\\.io","teoma","terrainformatica","Test Certificate Info","testuri","Tetrahedron","TextRazor Downloader","The Drop Reaper","The Expert HTML Source Viewer","The Knowledge AI","The Intraformant","theinternetrules","TheNomad","Thinklab","Thumbshots","ThumbSniper","Thumbor","timewe\\.net","TinEye","Tiny Tiny RSS","TLSProbe\\\/","Toata","topster","touche\\.com","Traackr\\.com","tracemyfile","Trackuity","TrapitAgent","Trendiction","Trendsmap","trendspottr","truwoGPS","TryJsoup","TulipChain","Turingos","Turnitin","tweetedtimes","Tweetminster","Tweezler\\\/","twibble","Twice","Twikle","Twingly","Twisted PageGetter","Typhoeus","ubermetrics-technologies","uclassify","UdmSearch","unchaos","unirest-java","ultimate_sitemap_parser","UniversalFeedParser","Unshorten\\.It","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","Urlcheckr","URL Verifier","URLitor","urlresolver","Urlstat","URLTester","UrlTrends Ranking Updater","URLy Warning","URLy\\.Warning","Vacuum","Vagabondo","VB Project","vBSEO","VCI","via ggpht\\.com GoogleImageProxy","Virusdie","visionutils","vkShare","VoidEYE","Voil","voltron","voyager\\\/","VSAgent\\\/","VSB-TUO\\\/","Vulnbusters Meter","VYU2","w3af\\.org","W3C_Unicorn","W3C-checklink","W3C-mobileOK","WAC-OFU","Wallpapers\\\/[0-9]+","WallpapersHD","WakeletLinkExpander","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","WDT\\.io","web-capture\\.net","Web-sniffer","Web Auto","Web Collage","Web Enhancer","Web Fetch","Web Fuck","Web Pix","Web Sauger","Web spyder","Web Sucker","Webalta","Webauskunft","WebAuto","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDataStats","WebDoc","WebEnhancer","WebFetch","WebFuck","WebGazer","WebGo IS","WebImageCollector","WebImages","WebIndex","webkit2png","WebLeacher","webmastercoffee","webmon ","WebPix","WebReaper","WebSauger","webscreenie","Webshag","Webshot","Website Quester","websitepulse agent","WebsiteQuester","Websnapr","WebSniffer","Webster","WebStripper","WebSucker","Webthumb\\\/","WebThumbnail","WebWhacker","WebZIP","WeLikeLinks","WEPA","WeSEE","wf84","Wfuzz\\\/","wget","WhatCMS","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoAPI\\\/","WhoRunsCoinHive","Whynder Magnet","WinHTTP\\\/","WinHttp-Autoproxy-Service","Windows-RSS-Platform","WinPodder","wkhtmlto","wmtips","Woko","Wolfram HTTPClient","woorankreview","Word\\\/","WordPress\\\/","worldping-api","WordupinfoSearch","wotbox","WP Engine Install Performance API","WP Rocket","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Collector-E","WWW-Mechanize","WWW::Document","WWW::Mechanize","www\\.monitor\\.us","WWWOFFLE","x09Mozilla","x22Mozilla","XaxisSemanticsClassifier","XenForo\\\/","Xenu Link Sleuth","XING-contenttabreceiver","xpymep([0-9]?)\\.exe","Y!J-(ASR|BSC)","Y\\!J-BRW","Yaanb","yacy","Yahoo Link Preview","YahooCacheSystem","YahooMailProxy","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti","Yo-yo","Yoleo Consumer","yomins\\.com","yoogliFetchAgent","YottaaMonitor","Your-Website-Sucks","yourls\\.org","YoYs\\.net","YP\\.PL","Zabbix","Zade","Zao","Zauba","Zemanta Aggregator","Zend_Http_Client","Zend\\\\Http\\\\Client","Zermelo","Zeus ","zgrab","ZnajdzFoto","ZnHTTP","Zombie\\.js","Zoom\\.Mac","ZoteroTranslationServer","ZyBorg","[a-z0-9\\-_]*(bot|crawl|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)"]
@@ -0,0 +1 @@
1
+ ["Safari.[\\d\\.]*","Firefox.[\\d\\.]*"," Chrome.[\\d\\.]*","Chromium.[\\d\\.]*","MSIE.[\\d\\.]","Opera\\\/[\\d\\.]*","Mozilla.[\\d\\.]*","AppleWebKit.[\\d\\.]*","Trident.[\\d\\.]*","Windows NT.[\\d\\.]*","Android [\\d\\.]*","Macintosh.","Ubuntu","Linux","[ ]Intel","Mac OS X [\\d_]*","(like )?Gecko(.[\\d\\.]*)?","KHTML,","CriOS.[\\d\\.]*","CPU iPhone OS ([0-9_])* like Mac OS X","CPU OS ([0-9_])* like Mac OS X","iPod","compatible","x86_..","i686","x64","X11","rv:[\\d\\.]*","Version.[\\d\\.]*","WOW64","Win64","Dalvik.[\\d\\.]*"," \\.NET CLR [\\d\\.]*","Presto.[\\d\\.]*","Media Center PC","BlackBerry","Build","Opera Mini\\\/\\d{1,2}\\.\\d{1,2}\\.[\\d\\.]*\\\/\\d{1,2}\\.","Opera"," \\.NET[\\d\\.]*","cubot","; M bot","; CRONO","; B bot","; IDbot","; ID bot","; POWER BOT",";"]
@@ -0,0 +1 @@
1
+ ["HTTP_USER_AGENT","HTTP_X_OPERAMINI_PHONE_UA","HTTP_X_DEVICE_USER_AGENT","HTTP_X_ORIGINAL_USER_AGENT","HTTP_X_SKYFIRE_PHONE","HTTP_X_BOLT_PHONE_UA","HTTP_DEVICE_STOCK_UA","HTTP_X_UCBROWSER_DEVICE_UA","HTTP_FROM","HTTP_X_SCANNER"]
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # @since 0.1.0
3
4
  module CrawlerDetect
4
- VERSION = "0.1.11"
5
+ VERSION = "1.1.0"
5
6
  end
@@ -1,6 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rack
4
+ # Rack-based interface to detect crawlers
5
+ #
6
+ # @since 0.1.0
4
7
  class CrawlerDetect
5
8
  def initialize(app, options = {})
6
9
  Rack::Request::Helpers.module_eval do
@@ -22,24 +25,24 @@ module Rack
22
25
 
23
26
  private
24
27
 
25
- def set_env_variables!(env)
26
- ua = user_agent(env)
27
- return unless ua
28
- detector = ::CrawlerDetect::Detector.new(ua)
29
- env["rack.crawler_detect"] = {
30
- is_crawler: detector.is_crawler?,
31
- crawler_name: detector.crawler_name,
32
- }
33
- end
28
+ def set_env_variables!(env)
29
+ ua = user_agent(env)
30
+ return unless ua
31
+ detector = ::CrawlerDetect::Detector.new(ua)
32
+ env["rack.crawler_detect"] = {
33
+ is_crawler: detector.is_crawler?,
34
+ crawler_name: detector.crawler_name
35
+ }
36
+ end
34
37
 
35
- def user_agent(env)
36
- user_agent_headers.map do |header|
37
- env[header]
38
- end.compact.join(" ")
39
- end
38
+ def user_agent(env)
39
+ user_agent_headers.map do |header|
40
+ env[header]
41
+ end.compact.join(" ")
42
+ end
40
43
 
41
- def user_agent_headers
42
- ::CrawlerDetect::Library.get_array("headers")
43
- end
44
+ def user_agent_headers
45
+ ::CrawlerDetect::Library.get_array("headers")
46
+ end
44
47
  end
45
48
  end
metadata CHANGED
@@ -1,29 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crawler_detect
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pavel Kozlov
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-12-26 00:00:00.000000000 Z
11
+ date: 2020-11-04 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: qonfig
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.24'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.24'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: activesupport
15
29
  requirement: !ruby/object:Gem::Requirement
16
30
  requirements:
17
31
  - - "~>"
18
32
  - !ruby/object:Gem::Version
19
- version: 5.2.0
33
+ version: 6.0.3
20
34
  type: :development
21
35
  prerelease: false
22
36
  version_requirements: !ruby/object:Gem::Requirement
23
37
  requirements:
24
38
  - - "~>"
25
39
  - !ruby/object:Gem::Version
26
- version: 5.2.0
40
+ version: 6.0.3
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -122,6 +136,20 @@ dependencies:
122
136
  - - "~>"
123
137
  - !ruby/object:Gem::Version
124
138
  version: '3.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: armitage-rubocop
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '='
144
+ - !ruby/object:Gem::Version
145
+ version: '0.82'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '='
151
+ - !ruby/object:Gem::Version
152
+ version: '0.82'
125
153
  description: CrawlerDetect is a library to detect bots/crawlers via the user agent
126
154
  email:
127
155
  - loadkpi@gmail.com
@@ -133,17 +161,25 @@ files:
133
161
  - ".rspec"
134
162
  - ".rubocop.yml"
135
163
  - ".travis.yml"
164
+ - CHANGELOG.md
136
165
  - Gemfile
166
+ - Gemfile.lock
137
167
  - LICENSE.txt
138
168
  - README.md
139
169
  - Rakefile
170
+ - bin/update_raw_files
140
171
  - crawler_detect.gemspec
141
172
  - lib/crawler_detect.rb
173
+ - lib/crawler_detect/config.rb
142
174
  - lib/crawler_detect/detector.rb
143
175
  - lib/crawler_detect/library.rb
144
176
  - lib/crawler_detect/library/crawlers.rb
145
177
  - lib/crawler_detect/library/exclusions.rb
146
178
  - lib/crawler_detect/library/headers.rb
179
+ - lib/crawler_detect/library/loader.rb
180
+ - lib/crawler_detect/library/raw/Crawlers.json
181
+ - lib/crawler_detect/library/raw/Exclusions.json
182
+ - lib/crawler_detect/library/raw/Headers.json
147
183
  - lib/crawler_detect/version.rb
148
184
  - lib/rack/crawler_detect.rb
149
185
  homepage: https://github.com/loadkpi/crawler_detect
@@ -165,7 +201,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
165
201
  - !ruby/object:Gem::Version
166
202
  version: '0'
167
203
  requirements: []
168
- rubygems_version: 3.0.6
204
+ rubygems_version: 3.1.2
169
205
  signing_key:
170
206
  specification_version: 4
171
207
  summary: 'CrawlerDetect: detect bots/crawlers'