impressionist-cody 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/main.yml +25 -0
- data/.gitignore +17 -0
- data/.rspec +1 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +660 -0
- data/CHANGELOG.rdoc +96 -0
- data/Gemfile +22 -0
- data/LICENSE.txt +20 -0
- data/README.md +265 -0
- data/Rakefile +20 -0
- data/UPGRADE_GUIDE.md +13 -0
- data/app/assets/config/manifest.js +3 -0
- data/app/controllers/impressionist_controller.rb +166 -0
- data/app/models/impression.rb +2 -0
- data/app/models/impressionist/bots.rb +1468 -0
- data/app/models/impressionist/impressionable.rb +62 -0
- data/impressionist.gemspec +30 -0
- data/lib/generators/active_record/impressionist_generator.rb +22 -0
- data/lib/generators/active_record/templates/create_impressions_table.rb.erb +32 -0
- data/lib/generators/impressionist_generator.rb +13 -0
- data/lib/generators/mongo_mapper/impressionist_generator.rb +8 -0
- data/lib/generators/mongoid/impressionist_generator.rb +8 -0
- data/lib/generators/templates/impression.rb.erb +8 -0
- data/lib/impressionist/bots.rb +21 -0
- data/lib/impressionist/controllers/mongoid/impressionist_controller.rb +10 -0
- data/lib/impressionist/counter_cache.rb +76 -0
- data/lib/impressionist/engine.rb +45 -0
- data/lib/impressionist/is_impressionable.rb +23 -0
- data/lib/impressionist/load.rb +11 -0
- data/lib/impressionist/models/active_record/impression.rb +14 -0
- data/lib/impressionist/models/active_record/impressionist/impressionable.rb +12 -0
- data/lib/impressionist/models/mongo_mapper/impression.rb +18 -0
- data/lib/impressionist/models/mongo_mapper/impressionist/impressionable.rb +21 -0
- data/lib/impressionist/models/mongoid/impression.rb +26 -0
- data/lib/impressionist/models/mongoid/impressionist/impressionable.rb +28 -0
- data/lib/impressionist/rails_toggle.rb +26 -0
- data/lib/impressionist/setup_association.rb +53 -0
- data/lib/impressionist/update_counters.rb +77 -0
- data/lib/impressionist/version.rb +3 -0
- data/lib/impressionist.rb +12 -0
- data/logo.png +0 -0
- data/spec/controllers/articles_controller_spec.rb +113 -0
- data/spec/controllers/dummy_controller_spec.rb +13 -0
- data/spec/controllers/impressionist_uniqueness_spec.rb +463 -0
- data/spec/controllers/posts_controller_spec.rb +36 -0
- data/spec/controllers/widgets_controller_spec.rb +103 -0
- data/spec/counter_caching_spec.rb +49 -0
- data/spec/dummy/.ruby-version +1 -0
- data/spec/dummy/Rakefile +6 -0
- data/spec/dummy/app/assets/config/manifest.js +1 -0
- data/spec/dummy/app/assets/images/.keep +0 -0
- data/spec/dummy/app/assets/stylesheets/application.css +15 -0
- data/spec/dummy/app/channels/application_cable/channel.rb +4 -0
- data/spec/dummy/app/channels/application_cable/connection.rb +4 -0
- data/spec/dummy/app/controllers/application_controller.rb +2 -0
- data/spec/dummy/app/controllers/articles_controller.rb +22 -0
- data/spec/dummy/app/controllers/concerns/.keep +0 -0
- data/spec/dummy/app/controllers/dummy_controller.rb +6 -0
- data/spec/dummy/app/controllers/posts_controller.rb +23 -0
- data/spec/dummy/app/controllers/profiles_controller.rb +14 -0
- data/spec/dummy/app/controllers/widgets_controller.rb +12 -0
- data/spec/dummy/app/helpers/application_helper.rb +2 -0
- data/spec/dummy/app/javascript/packs/application.js +15 -0
- data/spec/dummy/app/jobs/application_job.rb +7 -0
- data/spec/dummy/app/mailers/application_mailer.rb +4 -0
- data/spec/dummy/app/models/application_record.rb +3 -0
- data/spec/dummy/app/models/article.rb +3 -0
- data/spec/dummy/app/models/concerns/.keep +0 -0
- data/spec/dummy/app/models/dummy.rb +7 -0
- data/spec/dummy/app/models/post.rb +3 -0
- data/spec/dummy/app/models/profile.rb +6 -0
- data/spec/dummy/app/models/user.rb +3 -0
- data/spec/dummy/app/models/widget.rb +3 -0
- data/spec/dummy/app/views/articles/index.html.erb +1 -0
- data/spec/dummy/app/views/articles/show.html.erb +1 -0
- data/spec/dummy/app/views/dummy/index.html.erb +0 -0
- data/spec/dummy/app/views/layouts/application.html.erb +14 -0
- data/spec/dummy/app/views/layouts/mailer.html.erb +13 -0
- data/spec/dummy/app/views/layouts/mailer.text.erb +1 -0
- data/spec/dummy/app/views/posts/edit.html.erb +0 -0
- data/spec/dummy/app/views/posts/index.html.erb +0 -0
- data/spec/dummy/app/views/posts/show.html.erb +0 -0
- data/spec/dummy/app/views/profiles/show.html.erb +3 -0
- data/spec/dummy/app/views/widgets/index.html.erb +0 -0
- data/spec/dummy/app/views/widgets/new.html.erb +0 -0
- data/spec/dummy/app/views/widgets/show.html.erb +0 -0
- data/spec/dummy/bin/rails +4 -0
- data/spec/dummy/bin/rake +4 -0
- data/spec/dummy/bin/setup +33 -0
- data/spec/dummy/config/application.rb +20 -0
- data/spec/dummy/config/boot.rb +5 -0
- data/spec/dummy/config/cable.yml +10 -0
- data/spec/dummy/config/database.yml +25 -0
- data/spec/dummy/config/environment.rb +5 -0
- data/spec/dummy/config/environments/development.rb +62 -0
- data/spec/dummy/config/environments/production.rb +112 -0
- data/spec/dummy/config/environments/test.rb +49 -0
- data/spec/dummy/config/initializers/application_controller_renderer.rb +8 -0
- data/spec/dummy/config/initializers/assets.rb +12 -0
- data/spec/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/spec/dummy/config/initializers/content_security_policy.rb +28 -0
- data/spec/dummy/config/initializers/cookies_serializer.rb +5 -0
- data/spec/dummy/config/initializers/filter_parameter_logging.rb +4 -0
- data/spec/dummy/config/initializers/impression.rb +8 -0
- data/spec/dummy/config/initializers/inflections.rb +16 -0
- data/spec/dummy/config/initializers/mime_types.rb +4 -0
- data/spec/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/spec/dummy/config/locales/en.yml +33 -0
- data/spec/dummy/config/puma.rb +38 -0
- data/spec/dummy/config/routes.rb +4 -0
- data/spec/dummy/config/spring.rb +6 -0
- data/spec/dummy/config/storage.yml +34 -0
- data/spec/dummy/config.ru +5 -0
- data/spec/dummy/config.ru2 +4 -0
- data/spec/dummy/db/development.sqlite3 +0 -0
- data/spec/dummy/db/migrate/20110201153144_create_articles.rb +13 -0
- data/spec/dummy/db/migrate/20110210205028_create_posts.rb +13 -0
- data/spec/dummy/db/migrate/20111127184039_create_widgets.rb +15 -0
- data/spec/dummy/db/migrate/20150207135825_create_profiles.rb +10 -0
- data/spec/dummy/db/migrate/20150207140310_create_friendly_id_slugs.rb +18 -0
- data/spec/dummy/db/migrate/20200720143817_create_impressions_table.rb +32 -0
- data/spec/dummy/db/schema.rb +77 -0
- data/spec/dummy/lib/assets/.keep +0 -0
- data/spec/dummy/log/.keep +0 -0
- data/spec/dummy/log/development.log +129 -0
- data/spec/dummy/public/404.html +67 -0
- data/spec/dummy/public/422.html +67 -0
- data/spec/dummy/public/500.html +66 -0
- data/spec/dummy/public/apple-touch-icon-precomposed.png +0 -0
- data/spec/dummy/public/apple-touch-icon.png +0 -0
- data/spec/dummy/public/favicon.ico +0 -0
- data/spec/dummy/storage/.keep +0 -0
- data/spec/fixtures/articles.yml +3 -0
- data/spec/fixtures/impressions.yml +43 -0
- data/spec/fixtures/posts.yml +3 -0
- data/spec/fixtures/profiles.yml +4 -0
- data/spec/fixtures/widgets.yml +4 -0
- data/spec/initializers_spec.rb +21 -0
- data/spec/models/bots_spec.rb +25 -0
- data/spec/models/impression_spec.rb +66 -0
- data/spec/rails_generators/rails_generators_spec.rb +23 -0
- data/spec/rails_helper.rb +11 -0
- data/spec/rails_toggle_spec.rb +31 -0
- data/spec/setup_association_spec.rb +48 -0
- data/spec/spec_helper.rb +43 -0
- data/upgrade_migrations/version_0_3_0.rb +27 -0
- data/upgrade_migrations/version_0_4_0.rb +9 -0
- data/upgrade_migrations/version_1_5_2.rb +12 -0
- metadata +302 -0
@@ -0,0 +1,1468 @@
|
|
1
|
+
module Impressionist
|
2
|
+
module Bots
|
3
|
+
|
4
|
+
def self.bot?(user_agent = nil)
|
5
|
+
return false if user_agent.nil?
|
6
|
+
WILD_CARDS.any? { |wc| user_agent.downcase.include?(wc) } || LIST.include?(user_agent)
|
7
|
+
end
|
8
|
+
|
9
|
+
WILD_CARDS = ["bot","yahoo","slurp","google","msn","crawler"]
|
10
|
+
|
11
|
+
LIST = ["<a href='http://www.unchaos.com/'> UnChaos </a> From Chaos To Order Hybrid Web Search Engine.(vadim_gonchar@unchaos.com)",
|
12
|
+
"<a href='http://www.unchaos.com/'> UnChaos Bot Hybrid Web Search Engine. </a> (vadim_gonchar@unchaos.com)",
|
13
|
+
"<b> UnChaosBot From Chaos To Order UnChaos Hybrid Web Search Engine at www.unchaos.com </b> (info@unchaos.com)",
|
14
|
+
"<http://www.sygol.com/> http://www.sygol.com",
|
15
|
+
"*/Nutch-0.9-dev",
|
16
|
+
"+SitiDi.net/SitiDiBot/1.0 (+Have Good Day)",
|
17
|
+
"-DIE-KRAEHE- META-SEARCH-ENGINE/1.1 http://www.die-kraehe.de",
|
18
|
+
"192.comAgent",
|
19
|
+
"4anything.com LinkChecker v2.0",
|
20
|
+
"8484 Boston Project v 1.0",
|
21
|
+
":robot/1.0 (linux) ( admin e-mail: undefined http://www.neofonie.de/loesungen/search/robot.html )",
|
22
|
+
"A-Online Search",
|
23
|
+
"A1 Sitemap Generator/1.0 (+http://www.micro-sys.dk/products/sitemap-generator/) miggibot/2006.01.24",
|
24
|
+
"aardvark-crawler",
|
25
|
+
"AbachoBOT",
|
26
|
+
"AbachoBOT (Mozilla compatible)",
|
27
|
+
"ABCdatos BotLink/5.xx.xxx#BBL",
|
28
|
+
"Aberja Checkomat",
|
29
|
+
"abot/0.1 (abot; http://www.abot.com; abot@abot.com)",
|
30
|
+
"About/0.1libwww-perl/5.47",
|
31
|
+
"Accelatech RSSCrawler/0.4",
|
32
|
+
"accoona",
|
33
|
+
"Accoona-AI-Agent/1.1.1 (crawler at accoona dot com)",
|
34
|
+
"Accoona-AI-Agent/1.1.2 (aicrawler at accoonabot dot com)",
|
35
|
+
"Ack (http://www.ackerm.com/)",
|
36
|
+
"AcoiRobot",
|
37
|
+
"Acoon Robot v1.50.001",
|
38
|
+
"Acoon Robot v1.52 (http://www.acoon.de)",
|
39
|
+
"Acoon-Robot 4.0.x.[xx] (http://www.acoon.de)",
|
40
|
+
"Acoon-Robot v3.xx (http://www.acoon.de and http://www.acoon.com)",
|
41
|
+
"Acorn/Nutch-0.9 (Non-Profit Search Engine; acorn.isara.org; acorn at isara dot org)",
|
42
|
+
"AESOP_com_SpiderMan",
|
43
|
+
"agadine/1.x.x (+http://www.agada.de)",
|
44
|
+
"Agent-SharewarePlazaFileCheckBot/2.0+(+http://www.SharewarePlaza.com)",
|
45
|
+
"AgentName/0.1 libwww-perl/5.48",
|
46
|
+
"AIBOT/2.1 By +(www.21seek.com A Real artificial intelligence search engine China)",
|
47
|
+
"aipbot/1.0 (aipbot; http://www.aipbot.com; aipbot@aipbot.com)",
|
48
|
+
"aipbot/2-beta (aipbot dev; http://aipbot.com; aipbot@aipbot.com)",
|
49
|
+
"Aladin/3.324",
|
50
|
+
"Aleksika Spider/1.0 (+http://www.aleksika.com/)",
|
51
|
+
"AlkalineBOT/1.3",
|
52
|
+
"AlkalineBOT/1.4 (1.4.0326.0 RTM)",
|
53
|
+
"Allesklar/0.1 libwww-perl/5.46",
|
54
|
+
"Allrati/1.1 (+)",
|
55
|
+
"AltaVista Intranet V2.0 AVS EVAL search@freeit.com",
|
56
|
+
"AltaVista Intranet V2.0 Compaq Altavista Eval sveand@altavista.net",
|
57
|
+
"AltaVista Intranet V2.0 evreka.com crawler@evreka.com",
|
58
|
+
"AltaVista V2.0B crawler@evreka.com",
|
59
|
+
"AmfibiBOT",
|
60
|
+
"Amfibibot/0.06 (Amfibi Web Search; http://www.amfibi.com; agent@amfibi.com)",
|
61
|
+
"Amfibibot/0.07 (Amfibi Robot; http://www.amfibi.com; agent@amfibi.com)",
|
62
|
+
"amibot",
|
63
|
+
"AnnoMille spider 0.1 alpha - http://www.annomille.it",
|
64
|
+
"AnswerBus (http://www.answerbus.com/)",
|
65
|
+
"antibot-V1.1.5/i586-linux-2.2",
|
66
|
+
"AnzwersCrawl/2.0 (anzwerscrawl@anzwers.com.au;Engine)",
|
67
|
+
"Apexoo Spider 1.x",
|
68
|
+
"Aport",
|
69
|
+
"appie 1.1 (www.walhello.com)",
|
70
|
+
"ArabyBot (compatible; Mozilla/5.0; GoogleBot; FAST Crawler 6.4; http://www.araby.com;)",
|
71
|
+
"ArachBot",
|
72
|
+
"Arachnoidea (arachnoidea@euroseek.com)",
|
73
|
+
"ArchitextSpider",
|
74
|
+
"archive.org_bot",
|
75
|
+
"Arikus_Spider",
|
76
|
+
"Arquivo-web-crawler (compatible; heritrix/1.12.1 +http://arquivo-web.fccn.pt)",
|
77
|
+
"ASAHA Search Engine Turkey V.001 (http://www.asaha.com/)",
|
78
|
+
"Asahina-Antenna/1.x",
|
79
|
+
"Asahina-Antenna/1.x (libhina.pl/x.x ; libtime.pl/x.x)",
|
80
|
+
"ask.24x.info",
|
81
|
+
"AskAboutOil/0.06-rcp (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@askaboutoil.com)",
|
82
|
+
"asked/Nutch-0.8 (web crawler; http://asked.jp; epicurus at gmail dot com)",
|
83
|
+
"ASPSeek/1.2.5",
|
84
|
+
"ASPseek/1.2.9d",
|
85
|
+
"ASPSeek/1.2.x",
|
86
|
+
"ASPSeek/1.2.xa",
|
87
|
+
"ASPseek/1.2.xx",
|
88
|
+
"ASPSeek/1.2.xxpre",
|
89
|
+
"ASSORT/0.10",
|
90
|
+
"asterias/2.0",
|
91
|
+
"AtlocalBot/1.1 +(http://www.atlocal.com/local-web-site-owner.html)",
|
92
|
+
"Atomic_Email_Hunter/4.0",
|
93
|
+
"Atomz/1.0",
|
94
|
+
"atSpider/1.0",
|
95
|
+
"Attentio/Nutch-0.9-dev (Attentio's beta blog crawler; www.attentio.com; info@attentio.com)",
|
96
|
+
"augurfind",
|
97
|
+
"augurnfind V-1.x",
|
98
|
+
"autoemailspider",
|
99
|
+
"autowebdir 1.1 (www.autowebdir.com)",
|
100
|
+
"AV Fetch 1.0",
|
101
|
+
"AVSearch-1.0(peter.turney@nrc.ca)",
|
102
|
+
"AVSearch-3.0(AltaVista/AVC)",
|
103
|
+
"axadine/ (Axadine Crawler; http://www.axada.de/; )",
|
104
|
+
"AxmoRobot - Crawling your site for better indexing on www.axmo.com search engine.",
|
105
|
+
"BabalooSpider/1.3 (BabalooSpider; http://www.babaloo.si; spider@babaloo.si)",
|
106
|
+
"BaboomBot/1.x.x (+http://www.baboom.us)",
|
107
|
+
"BaiduImagespider+(+http://www.baidu.jp/search/s308.html)",
|
108
|
+
"BaiDuSpider",
|
109
|
+
"Baiduspider+(+http://help.baidu.jp/system/05.html)",
|
110
|
+
"Baiduspider+(+http://www.baidu.com/search/spider.htm)",
|
111
|
+
"Baiduspider+(+http://www.baidu.com/search/spider_jp.html)",
|
112
|
+
"Balihoo/Nutch-1.0-dev (Crawler for Balihoo.com search engine - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)",
|
113
|
+
"BarraHomeCrawler (albertof@barrahome.org)",
|
114
|
+
"bdcindexer_2.6.2 (research@bdc)",
|
115
|
+
"BDFetch",
|
116
|
+
"BDNcentral Crawler v2.3 [en] (http://www.bdncentral.com/robot.html) (X11; I; Linux 2.0.44 i686)",
|
117
|
+
"beautybot/1.0 (+http://www.uchoose.de/crawler/beautybot/)",
|
118
|
+
"BebopBot/2.5.1 ( crawler http://www.apassion4jazz.net/bebopbot.html )",
|
119
|
+
"BigCliqueBOT/1.03-dev (bigclicbot; http://www.bigclique.com; bot@bigclique.com)",
|
120
|
+
"BIGLOTRON (Beta 2;GNU/Linux)",
|
121
|
+
"Bigsearch.ca/Nutch-x.x-dev (Bigsearch.ca Internet Spider; http://www.bigsearch.ca/; info@enhancededge.com)",
|
122
|
+
"BilgiBetaBot/0.8-dev (bilgi.com (Beta) ; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
|
123
|
+
"BilgiBot/1.0(beta) (http://www.bilgi.com/; bilgi at bilgi dot com)",
|
124
|
+
"Bitacle bot/1.1",
|
125
|
+
"Bitacle Robot (V:1.0;) (http://www.bitacle.com)",
|
126
|
+
"BlackWidow",
|
127
|
+
"Blaiz-Bee/1.0 (+http://www.blaiz.net)",
|
128
|
+
"Blaiz-Bee/2.00.8222 (BE Internet Search Engine http://www.rawgrunt.com)",
|
129
|
+
"Blaiz-Bee/2.00.xxxx (+http://www.blaiz.net)",
|
130
|
+
"BlitzBOT@tricus.net",
|
131
|
+
"BlitzBOT@tricus.net (Mozilla compatible)",
|
132
|
+
"BlogBot/1.x",
|
133
|
+
"Bloglines Title Fetch/1.0 (http://www.bloglines.com)",
|
134
|
+
"Bloglines-Images/0.1 (http://www.bloglines.com)",
|
135
|
+
"Bloglines/3.1 (http://www.bloglines.com)",
|
136
|
+
"Blogpulse (info@blogpulse.com)",
|
137
|
+
"BlogPulseLive (support@blogpulse.com)",
|
138
|
+
"BlogSearch/1.x +http://www.icerocket.com/",
|
139
|
+
"blogsearchbot-pumpkin-3",
|
140
|
+
"BlogsNowBot, V 2.01 (+http://www.blogsnow.com/)",
|
141
|
+
"BlogVibeBot-v1.1 (spider@blogvibe.nl)",
|
142
|
+
"blogWatcher_Spider/0.1 (http://www.lr.pi.titech.ac.jp/blogWatcher/)",
|
143
|
+
"BlogzIce/1.0 (+http://icerocket.com; rhodes@icerocket.com)",
|
144
|
+
"BlogzIce/1.0 +http://www.icerocket.com/",
|
145
|
+
"BloobyBot",
|
146
|
+
"Bloodhound/Nutch-0.9 (Testing Crawler for Research - obeys robots.txt and robots meta tags ; http://balihoo.com/index.aspx; robot at balihoo dot com)",
|
147
|
+
"boitho.com-dc/0.xx (http://www.boitho.com/dcbot.html)",
|
148
|
+
"boitho.com-robot/1.x",
|
149
|
+
"boitho.com-robot/1.x (http://www.boitho.com/bot.html)",
|
150
|
+
"BPImageWalker/2.0 (www.bdbrandprotect.com)",
|
151
|
+
"BravoBrian SpiderEngine MarcoPolo",
|
152
|
+
"BruinBot (+http://webarchive.cs.ucla.edu/bruinbot.html) ",
|
153
|
+
"BSDSeek/1.0",
|
154
|
+
"BTbot/0.x (+http://www.btbot.com/btbot.html)",
|
155
|
+
"BuildCMS crawler (http://www.buildcms.com/crawler)",
|
156
|
+
"BullsEye",
|
157
|
+
"bumblebee@relevare.com",
|
158
|
+
"BurstFindCrawler/1.1 (crawler.burstfind.com; http://crawler.burstfind.com; crawler@burstfind.com)",
|
159
|
+
"Buscaplus Robi/1.0 (http://www.buscaplus.com/robi/)",
|
160
|
+
"bwh3_user_agent",
|
161
|
+
"Cabot/Nutch-0.9 (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)",
|
162
|
+
"Cabot/Nutch-1.0-dev (Amfibi's web-crawling robot; http://www.amfibi.com/cabot/; agent@amfibi.com)",
|
163
|
+
"carleson/1.0",
|
164
|
+
"Carnegie_Mellon_University_Research_WebBOT-->PLEASE READ-->http://www.andrew.cmu.edu/~brgordon/webbot/index.html http://www.andrew.cmu.edu/~brgordon/webbot/index.html",
|
165
|
+
"Carnegie_Mellon_University_WebCrawler http://www.andrew.cmu.edu/~brgordon/webbot/index.html",
|
166
|
+
"Catall Spider",
|
167
|
+
"CazoodleBot/CazoodleBot-0.1 (CazoodleBot Crawler; http://www.cazoodle.com/cazoodlebot; cazoodlebot@cazoodle.com)",
|
168
|
+
"CCBot/1.0 (+http://www.commoncrawl.org/bot.html)",
|
169
|
+
"ccubee/x.x",
|
170
|
+
"Ceramic Tile Installation Guide (http://www.floorstransformed.com)",
|
171
|
+
"cfetch/1.0",
|
172
|
+
"China Local Browse 2.6",
|
173
|
+
"ChristCRAWLER 2.0",
|
174
|
+
"CipinetBot (http://www.cipinet.com/bot.html)",
|
175
|
+
"ClariaBot/1.0",
|
176
|
+
"Claymont.com",
|
177
|
+
"CloakDetect/0.9 (+http://fulltext.seznam.cz/)",
|
178
|
+
"Clushbot/2.x (+http://www.clush.com/bot.html)",
|
179
|
+
"Clushbot/3.x-BinaryFury (+http://www.clush.com/bot.html)",
|
180
|
+
"Clushbot/3.xx-Ajax (+http://www.clush.com/bot.html)",
|
181
|
+
"Clushbot/3.xx-Hector (+http://www.clush.com/bot.html)",
|
182
|
+
"Clushbot/3.xx-Peleus (+http://www.clush.com/bot.html)",
|
183
|
+
"Cogentbot/1.X (+http://www.cogentsoftwaresolutions.com/bot.html)",
|
184
|
+
"combine/0.0",
|
185
|
+
"Combine/2.0 http://combine.it.lth.se/",
|
186
|
+
"Combine/3 http://combine.it.lth.se/",
|
187
|
+
"Combine/x.0",
|
188
|
+
"cometrics-bot, http://www.cometrics.de",
|
189
|
+
"Computer_and_Automation_Research_Institute_Crawler crawler@ilab.sztaki.hu",
|
190
|
+
"Comrite/0.7.1 (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
|
191
|
+
"ContactBot/0.2",
|
192
|
+
"ContentSmartz",
|
193
|
+
"Convera Internet Spider V6.x",
|
194
|
+
"ConveraCrawler/0.2",
|
195
|
+
"ConveraCrawler/0.9d (+http://www.authoritativeweb.com/crawl)",
|
196
|
+
"ConveraMultiMediaCrawler/0.1 (+http://www.authoritativeweb.com/crawl)",
|
197
|
+
"CoolBot",
|
198
|
+
"cosmos/0.8_(robot@xyleme.com)",
|
199
|
+
"cosmos/0.9_(robot@xyleme.com)",
|
200
|
+
"CougarSearch/0.x (+http://www.cougarsearch.com/faq.shtml)",
|
201
|
+
"Covac TexAs Arachbot",
|
202
|
+
"Cowbot-0.1 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)",
|
203
|
+
"Cowbot-0.1.x (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)",
|
204
|
+
"CrawlConvera0.1 (CrawlConvera@yahoo.com)",
|
205
|
+
"Crawler (cometsearch@cometsystems.com)",
|
206
|
+
"Crawler admin@crawler.de",
|
207
|
+
"Crawler V 0.2.x admin@crawler.de",
|
208
|
+
"crawler@alexa.com",
|
209
|
+
"CrawlerBoy Pinpoint.com",
|
210
|
+
"Crawllybot/0.1 (Crawllybot; +http://www.crawlly.com; crawler@crawlly.com)",
|
211
|
+
"CreativeCommons/0.06-dev (Nutch; http://www.nutch.org/docs/en/bot.html; nutch-agent@lists.sourceforge.net)",
|
212
|
+
"CrocCrawler vx.3 [en] (http://www.croccrawler.com) (X11; I; Linux 2.0.44 i686)",
|
213
|
+
"csci_b659/0.13",
|
214
|
+
"Cuasarbot/0.9b http://www.cuasar.com/spider_beta/ ",
|
215
|
+
"CurryGuide SiteScan 1.1",
|
216
|
+
"Custom Spider www.bisnisseek.com /1.0",
|
217
|
+
"CyberPatrol SiteCat Webbot (http://www.cyberpatrol.com/cyberpatrolcrawler.asp)",
|
218
|
+
"CydralSpider/1.x (Cydral Web Image Search; http://www.cydral.com)",
|
219
|
+
"CydralSpider/3.0 (Cydral Image Search; http://www.cydral.com)",
|
220
|
+
"DataCha0s/2.0",
|
221
|
+
"DataCha0s/2.0",
|
222
|
+
"DataFountains/DMOZ Downloader",
|
223
|
+
"DataFountains/Dmoz Downloader (http://ivia.ucr.edu/useragents.shtml)",
|
224
|
+
"DataFountains/DMOZ Feature Vector Corpus Creator (http://ivia.ucr.edu/useragents.shtml)",
|
225
|
+
"DataparkSearch/4.47 (+http://dataparksearch.org/bot)",
|
226
|
+
"DataparkSearch/4.xx (http://www.dataparksearch.org/)",
|
227
|
+
"DataSpear/1.0 (Spider; http://www.dataspear.com/spider.html; spider@dataspear.com)",
|
228
|
+
"DataSpearSpiderBot/0.2 (DataSpear Spider Bot; http://dssb.dataspear.com/bot.html; dssb@dataspear.com)",
|
229
|
+
"DatenBot( http://www.sicher-durchs-netz.de/bot.html)",
|
230
|
+
"DaviesBot/1.7 (www.wholeweb.net)",
|
231
|
+
"daypopbot/0.x",
|
232
|
+
"dbDig(http://www.prairielandconsulting.com)",
|
233
|
+
"DBrowse 1.4b",
|
234
|
+
"DBrowse 1.4d",
|
235
|
+
"dCSbot/1.1",
|
236
|
+
"de.searchengine.comBot 1.2 (http://de.searchengine.com/spider)",
|
237
|
+
"deepak-USC/ISI",
|
238
|
+
"DeepIndex",
|
239
|
+
"DeepIndex ( http://www.zetbot.com )",
|
240
|
+
"DeepIndex (www.en.deepindex.com)",
|
241
|
+
"DeepIndexer.ca",
|
242
|
+
"Demo Bot DOT 16b",
|
243
|
+
"Demo Bot Z 16b",
|
244
|
+
"Denmex websearch (http://search.denmex.com)",
|
245
|
+
"dev-spider2.searchpsider.com/1.3b",
|
246
|
+
"DiaGem/1.1 (http://www.skyrocket.gr.jp/diagem.html)",
|
247
|
+
"Diamond/x.0",
|
248
|
+
"DiamondBot",
|
249
|
+
"Digger/1.0 JDK/1.3.0rc3",
|
250
|
+
"DigOut4U",
|
251
|
+
"DIIbot/1.2",
|
252
|
+
"disco/Nutch-0.9 (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)",
|
253
|
+
"disco/Nutch-1.0-dev (experimental crawler; www.discoveryengine.com; disco-crawl@discoveryengine.com)",
|
254
|
+
"DittoSpyder",
|
255
|
+
"dloader(NaverRobot)/1.0",
|
256
|
+
"DoCoMo/1.0/Nxxxi/c10",
|
257
|
+
"DoCoMo/1.0/Nxxxi/c10/TB",
|
258
|
+
"DoCoMo/2.0 P900iV(c100;TB;W24H11) ",
|
259
|
+
"DoCoMo/2.0 SH902i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)",
|
260
|
+
"DoCoMo/2.0/SO502i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)",
|
261
|
+
"dodgebot/experimental",
|
262
|
+
"Download-Tipp Linkcheck (http://download-tipp.de/)",
|
263
|
+
"Drecombot/1.0 (http://career.drecom.jp/bot.html)",
|
264
|
+
"DSurf15a 01",
|
265
|
+
"DSurf15a 71",
|
266
|
+
"DSurf15a 81",
|
267
|
+
"DSurf15a VA",
|
268
|
+
"dtSearchSpider",
|
269
|
+
"DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
|
270
|
+
"Dumbot(version 0.1 beta - dumbfind.com)",
|
271
|
+
"Dumbot(version 0.1 beta - http://www.dumbfind.com/dumbot.html)",
|
272
|
+
"Dumbot(version 0.1 beta)",
|
273
|
+
"e-sense 1.0 ea(www.vigiltech.com/esensedisclaim.html)",
|
274
|
+
"e-SocietyRobot(http://www.yama.info.waseda.ac.jp/~yamana/es/)",
|
275
|
+
"eApolloBot/2.0 (compatible; heritrix/2.0.0-SNAPSHOT-20071024.170148 +http://www.eapollo-opto.com)",
|
276
|
+
"EARTHCOM.info/1.x [www.earthcom.info]",
|
277
|
+
"EARTHCOM.info/1.xbeta [www.earthcom.info]",
|
278
|
+
"EasyDL/3.xx",
|
279
|
+
"EasyDL/3.xx http://keywen.com/Encyclopedia/Bot",
|
280
|
+
"EBrowse 1.4b",
|
281
|
+
"EchO!/2.0",
|
282
|
+
"Educate Search VxB",
|
283
|
+
"egothor/3.0a (+http://www.xdefine.org/robot.html)",
|
284
|
+
"EgotoBot/4.8 (+http://www.egoto.com/about.htm)",
|
285
|
+
"ejupiter.com",
|
286
|
+
"elfbot/1.0 (+http://www.uchoose.de/crawler/elfbot/)",
|
287
|
+
"ELI/20070402:2.0 (DAUM RSS Robot, Daum Communications Corp.; +http://ws.daum.net/aboutkr.html)",
|
288
|
+
"EmailSiphon",
|
289
|
+
"EmailSpider",
|
290
|
+
"EmailWolf 1.00",
|
291
|
+
"EMPAS_ROBOT",
|
292
|
+
"EnaBot/1.x (http://www.enaball.com/crawler.html)",
|
293
|
+
"Enfish Tracker",
|
294
|
+
"Enterprise_Search/1.0",
|
295
|
+
"Enterprise_Search/1.0.xxx",
|
296
|
+
"Enterprise_Search/1.00.xxx;MSSQL (http://www.innerprise.net/es-spider.asp)",
|
297
|
+
"envolk/1.7 (+http://www.envolk.com/envolkspiderinfo.php)",
|
298
|
+
"envolk[ITS]spider/1.6(+http://www.envolk.com/envolkspider.html)",
|
299
|
+
"EroCrawler",
|
300
|
+
"ES.NET_Crawler/2.0 (http://search.innerprise.net/)",
|
301
|
+
"eseek-larbin_2.6.2 (crawler@exactseek.com)",
|
302
|
+
"ESISmartSpider",
|
303
|
+
"eStyleSearch 4 (compatible; MSIE 6.0; Windows NT 5.0)",
|
304
|
+
"ESurf15a 15",
|
305
|
+
"EuripBot/0.x (+http://www.eurip.com) GetFile",
|
306
|
+
"EuripBot/0.x (+http://www.eurip.com) GetRobots",
|
307
|
+
"EuripBot/0.x (+http://www.eurip.com) PreCheck",
|
308
|
+
"Eurobot/1.0 (http://www.ayell.eu)",
|
309
|
+
"EvaalSE - bot@evaal.com",
|
310
|
+
"eventax/1.3 (eventax; http://www.eventax.de/; info@eventax.de)",
|
311
|
+
"Everest-Vulcan Inc./0.1 (R&D project; host=e-1-24; http://everest.vulcan.com/crawlerhelp)",
|
312
|
+
"Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)",
|
313
|
+
"Exabot-Images/1.0",
|
314
|
+
"Exabot-Test/1.0",
|
315
|
+
"Exabot/2.0",
|
316
|
+
"Exabot/3.0",
|
317
|
+
"ExactSeek Crawler/0.1",
|
318
|
+
"exactseek-crawler-2.63 (crawler@exactseek.com)",
|
319
|
+
"exactseek-pagereaper-2.63 (crawler@exactseek.com)",
|
320
|
+
"exactseek.com",
|
321
|
+
"Exalead NG/MimeLive Client (convert/http/0.120)",
|
322
|
+
"Excalibur Internet Spider V6.5.4",
|
323
|
+
"Execrawl/1.0 (Execrawl; http://www.execrawl.com/; bot@execrawl.com)",
|
324
|
+
"exooba crawler/exooba crawler (crawler for exooba.com; http://www.exooba.com/; info at exooba dot com)",
|
325
|
+
"exooba/exooba crawler (exooba; exooba)",
|
326
|
+
"ExperimentalHenrytheMiragoRobot",
|
327
|
+
"ExtractorPro",
|
328
|
+
"EyeCatcher (Download-tipp.de)/1.0",
|
329
|
+
"Factbot 1.09 (see http://www.factbites.com/webmasters.php)",
|
330
|
+
"factbot : http://www.factbites.com/robots",
|
331
|
+
"Fast Crawler Gold Edition",
|
332
|
+
"FAST Enterprise Crawler 6 (Experimental)",
|
333
|
+
"FAST Enterprise Crawler 6 / Scirus scirus-crawler@fast.no; http://www.scirus.com/srsapp/contactus/",
|
334
|
+
"FAST Enterprise Crawler 6 used by Cobra Development (admin@fastsearch.com)",
|
335
|
+
"FAST Enterprise Crawler 6 used by Comperio AS (sts@comperio.no)",
|
336
|
+
"FAST Enterprise Crawler 6 used by FAST (FAST)",
|
337
|
+
"FAST Enterprise Crawler 6 used by Pages Jaunes (pvincent@pagesjaunes.fr)",
|
338
|
+
"FAST Enterprise Crawler 6 used by Sensis.com.au Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
|
339
|
+
"FAST Enterprise Crawler 6 used by Singapore Press Holdings (crawler@sphsearch.sg)",
|
340
|
+
"FAST Enterprise Crawler/6 (www.fastsearch.com)",
|
341
|
+
"FAST Enterprise Crawler/6.4 (helpdesk at fast.no)",
|
342
|
+
"FAST FirstPage retriever (compatible; MSIE 5.5; Mozilla/4.0)",
|
343
|
+
"FAST MetaWeb Crawler (helpdesk at fastsearch dot com)",
|
344
|
+
"Fast PartnerSite Crawler",
|
345
|
+
"FAST-WebCrawler/2.2.10 (Multimedia Search) (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)",
|
346
|
+
"FAST-WebCrawler/2.2.6 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)",
|
347
|
+
"FAST-WebCrawler/2.2.7 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no",
|
348
|
+
"FAST-WebCrawler/2.2.8 (crawler@fast.no; http://www.fast.no/faq/faqfastwebsearch/faqfastwebcrawler.html)http://www.fast.no",
|
349
|
+
"FAST-WebCrawler/3.2 test",
|
350
|
+
"FAST-WebCrawler/3.3 (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
|
351
|
+
"FAST-WebCrawler/3.4/Nirvana (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
|
352
|
+
"FAST-WebCrawler/3.4/PartnerSite (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
|
353
|
+
"FAST-WebCrawler/3.5 (atw-crawler at fast dot no; http://fast.no/support.php?c=faqs/crawler)",
|
354
|
+
"FAST-WebCrawler/3.6 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
|
355
|
+
"FAST-WebCrawler/3.6/FirstPage (crawler@fast.no; http://fast.no/support.php?c=faqs/crawler)",
|
356
|
+
"FAST-WebCrawler/3.7 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
|
357
|
+
"FAST-WebCrawler/3.7/FirstPage (atw-crawler at fast dot no;http://fast.no/support/crawler.asp)",
|
358
|
+
"FAST-WebCrawler/3.8 (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
|
359
|
+
"FAST-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
|
360
|
+
"FAST-WebCrawler/3.x Multimedia",
|
361
|
+
"FAST-WebCrawler/3.x Multimedia (mm dash crawler at fast dot no)",
|
362
|
+
"fastbot crawler beta 2.0 (+http://www.fastbot.de)",
|
363
|
+
"FastBug http://www.ay-up.com",
|
364
|
+
"FastCrawler 3.0.1 (crawler@1klik.dk)",
|
365
|
+
"FastSearch Web Crawler for Verizon SuperPages (kevin.watters@fastsearch.com)",
|
366
|
+
"Favcollector/2.0 (info@favcollector.com http://www.favcollector.com/)",
|
367
|
+
"favo.eu crawler/0.6 (http://www.favo.eu)",
|
368
|
+
"Faxobot/1.0",
|
369
|
+
"Feed Seeker Bot (RSS Feed Seeker http://www.MyNewFavoriteThing.com/fsb.php)",
|
370
|
+
"Feed24.com",
|
371
|
+
"FeedChecker/0.01",
|
372
|
+
"Feedfetcher-Google; (+http://www.google.com/feedfetcher.html)",
|
373
|
+
"FeedHub FeedDiscovery/1.0 (http://www.feedhub.com)",
|
374
|
+
"FeedHub MetaDataFetcher/1.0 (http://www.feedhub.com)",
|
375
|
+
"Feedjit Favicon Crawler 1.0",
|
376
|
+
"Feedster Crawler/3.0; Feedster, Inc.",
|
377
|
+
"Felix - Mixcat Crawler (+http://mixcat.com)",
|
378
|
+
"FFC Trap Door Spider",
|
379
|
+
"Filtrbox/1.0",
|
380
|
+
"Findexa Crawler (http://www.findexa.no/gulesider/article26548.ece)",
|
381
|
+
"findlinks/x.xxx (+http://wortschatz.uni-leipzig.de/findlinks/) ",
|
382
|
+
"FineBot",
|
383
|
+
"Firefly/1.0",
|
384
|
+
"Firefly/1.0 (compatible; Mozilla 4.0; MSIE 5.5)",
|
385
|
+
"Firefox (kastaneta03@hotmail.com)",
|
386
|
+
"Firefox_1.0.6 (kasparek@naparek.cz)",
|
387
|
+
"FirstGov.gov Search - POC:firstgov.webmasters@gsa.gov",
|
388
|
+
"firstsbot",
|
389
|
+
"Flapbot/0.7.2 (Flaptor Crawler; http://www.flaptor.com; crawler at flaptor period com)",
|
390
|
+
"Flexum spider",
|
391
|
+
"Flexum/2.0",
|
392
|
+
"FlickBot 2.0 RPT-HTTPClient/0.3-3",
|
393
|
+
"flunky",
|
394
|
+
"FnooleBot/2.5.2 (+http://www.fnoole.com/addurl.html)",
|
395
|
+
"FocusedSampler/1.0",
|
396
|
+
"Folkd.com Spider/0.1 beta 1 (www.folkd.com)",
|
397
|
+
"Fooky.com/ScorpionBot/ScoutOut; http://www.fooky.com/scorpionbots",
|
398
|
+
"Francis/1.0 (francis@neomo.de http://www.neomo.de/)",
|
399
|
+
"Franklin Locator 1.8",
|
400
|
+
"FreeFind.com-SiteSearchEngine/1.0 (http://freefind.com; spiderinfo@freefind.com)",
|
401
|
+
"FreshNotes crawler< report problems to crawler-at-freshnotes-dot-com",
|
402
|
+
"FSurf15a 01",
|
403
|
+
"FTB-Bot http://www.findthebest.co.uk/",
|
404
|
+
"Full Web Bot 0416B",
|
405
|
+
"Full Web Bot 0516B",
|
406
|
+
"Full Web Bot 2816B",
|
407
|
+
"FuseBulb.Com",
|
408
|
+
"FyberSpider (+http://www.fybersearch.com/fyberspider.php)",
|
409
|
+
"GAIS Robot/1.0B2",
|
410
|
+
"Gaisbot/3.0 (indexer@gais.cs.ccu.edu.tw; http://gais.cs.ccu.edu.tw/robot.php)",
|
411
|
+
"Gaisbot/3.0+(robot06@gais.cs.ccu.edu.tw;+http://gais.cs.ccu.edu.tw/robot.php)",
|
412
|
+
"GalaxyBot/1.0 (http://www.galaxy.com/galaxybot.html)",
|
413
|
+
"Gallent Search Spider v1.4 Robot 2 (http://robot.GallentSearch.com)",
|
414
|
+
"gamekitbot/1.0 (+http://www.uchoose.de/crawler/gamekitbot/)",
|
415
|
+
"GammaSpider/1.0",
|
416
|
+
"gazz/x.x (gazz@nttrd.com)",
|
417
|
+
"generic_crawler/01.0217/",
|
418
|
+
"genieBot (http://64.5.245.11/faq/faq.html)",
|
419
|
+
"geniebot wgao@genieknows.com",
|
420
|
+
"GeonaBot 1.x; http://www.geona.com/",
|
421
|
+
"gigabaz/3.1x (baz@gigabaz.com; http://gigabaz.com/gigabaz/)",
|
422
|
+
"Gigabot/2.0 (gigablast.com)",
|
423
|
+
"Gigabot/2.0/gigablast.com/spider.html",
|
424
|
+
"Gigabot/2.0; http://www.gigablast.com/spider.html",
|
425
|
+
"Gigabot/2.0att",
|
426
|
+
"Gigabot/3.0 (http://www.gigablast.com/spider.html)",
|
427
|
+
"Gigabot/x.0",
|
428
|
+
"GigabotSiteSearch/2.0 (sitesearch.gigablast.com)",
|
429
|
+
"GNODSPIDER (www.gnod.net)",
|
430
|
+
"Goblin/0.9 (http://www.goguides.org/)",
|
431
|
+
"Goblin/0.9.x (http://www.goguides.org/goblin-info.html)",
|
432
|
+
"GoForIt.com",
|
433
|
+
"GOFORITBOT ( http://www.goforit.com/about/ )",
|
434
|
+
"gonzo1[P] +http://www.suchen.de/popups/faq.jsp",
|
435
|
+
"gonzo2[P] +http://www.suchen.de/faq.html",
|
436
|
+
"Goofer/0.2",
|
437
|
+
"Googlebot-Image/1.0",
|
438
|
+
"Googlebot-Image/1.0 ( http://www.googlebot.com/bot.html)",
|
439
|
+
"Googlebot/2.1 ( http://www.google.com/bot.html)",
|
440
|
+
"Googlebot/2.1 ( http://www.googlebot.com/bot.html)",
|
441
|
+
"Googlebot/Test ( http://www.googlebot.com/bot.html)",
|
442
|
+
"GrapeFX/0.3 libwww/5.4.0",
|
443
|
+
"great-plains-web-spider/flatlandbot (Flatland Industries Web Spider; http://www.flatlandindustries.com/flatlandbot.php; jason@flatlandindustries.com)",
|
444
|
+
"GrigorBot 0.8 (http://www.grigor.biz/bot.html)",
|
445
|
+
"Gromit/1.0",
|
446
|
+
"grub crawler(http://www.grub.org)",
|
447
|
+
"grub-client",
|
448
|
+
"gsa-crawler (Enterprise; GID-01422; jplastiras@google.com)",
|
449
|
+
"gsa-crawler (Enterprise; GID-01742;gsatesting@rediffmail.com)",
|
450
|
+
"gsa-crawler (Enterprise; GIX-02057; dm@enhesa.com)",
|
451
|
+
"gsa-crawler (Enterprise; GIX-03519; cknuetter@stubhub.com)",
|
452
|
+
"gsa-crawler (Enterprise; GIX-0xxxx; enterprise-training@google.com)",
|
453
|
+
"Guestbook Auto Submitter",
|
454
|
+
"Gulliver/1.3",
|
455
|
+
"Gulper Web Bot 0.2.4 (www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)",
|
456
|
+
"Gungho/0.08004 (http://code.google.com/p/gungho-crawler/wiki/Index)",
|
457
|
+
"GurujiBot/1.0 (+http://www.guruji.com/WebmasterFAQ.html)",
|
458
|
+
"GurujiImageBot/1.0 (+http://www.guruji.com/en/WebmasterFAQ.html)",
|
459
|
+
"HappyFunBot/1.1",
|
460
|
+
"Harvest-NG/1.0.2",
|
461
|
+
"Hatena Antenna/0.4 (http://a.hatena.ne.jp/help#robot)",
|
462
|
+
"Hatena Pagetitle Agent/1.0",
|
463
|
+
"Hatena RSS/0.3 (http://r.hatena.ne.jp)",
|
464
|
+
"hbtronix.spider.2 -- http://hbtronix.de/spider.php",
|
465
|
+
"HeinrichderMiragoRobot",
|
466
|
+
"HeinrichderMiragoRobot (http://www.miragorobot.com/scripts/deinfo.asp)",
|
467
|
+
"Helix/1.x ( http://www.sitesearch.ca/helix/)",
|
468
|
+
"HenriLeRobotMirago (http://www.miragorobot.com/scripts/frinfo.asp)",
|
469
|
+
"HenrytheMiragoRobot",
|
470
|
+
"HenryTheMiragoRobot (http://www.miragorobot.com/scripts/mrinfo.asp)",
|
471
|
+
"Hi! I'm CsCrawler my homepage: http://www.kde.cs.uni-kassel.de/lehre/ss2005/googlespam/crawler.html RPT-HTTPClient/0.3-3",
|
472
|
+
"Hippias/0.9 Beta",
|
473
|
+
"HitList",
|
474
|
+
"Hitwise Spider v1.0 http://www.hitwise.com",
|
475
|
+
"holmes/3.11 (http://morfeo.centrum.cz/bot)",
|
476
|
+
"holmes/3.9 (onet.pl)",
|
477
|
+
"holmes/3.xx (OnetSzukaj/5.0; +http://szukaj.onet.pl)",
|
478
|
+
"holmes/x.x",
|
479
|
+
"HolmesBot (http://holmes.ge)",
|
480
|
+
"HomePageSearch(hpsearch.uni-trier.de)",
|
481
|
+
"Homerbot: www.homerweb.com",
|
482
|
+
"Honda-Search/0.7.2 (Nutch; http://lucene.apache.org/nutch/bot.html; search@honda-search.com)",
|
483
|
+
"HooWWWer/2.1.3 (debugging run) (+http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-info<at>hiit.fi)",
|
484
|
+
"HooWWWer/2.1.x ( http://cosco.hiit.fi/search/hoowwwer/ | mailto:crawler-info<at>hiit.fi)",
|
485
|
+
"HPL/Nutch-0.9 -",
|
486
|
+
"htdig/3.1.6 (http://computerorgs.com)",
|
487
|
+
"htdig/3.1.6 (unconfigured@htdig.searchengine.maintainer)",
|
488
|
+
"htdig/3.1.x (root@localhost)",
|
489
|
+
"http://Ask.24x.Info/ (http://narres.it/)",
|
490
|
+
"http://hilfe.acont.de/bot.html ACONTBOT",
|
491
|
+
"http://www.almaden.ibm.com/cs/crawler",
|
492
|
+
"http://www.almaden.ibm.com/cs/crawler [rc1.wf.ibm.com]",
|
493
|
+
"http://www.almaden.ibm.com/cs/crawler [wf216]",
|
494
|
+
"http://www.istarthere.com_spider@istarthere.com",
|
495
|
+
"http://www.monogol.de",
|
496
|
+
"http://www.trendtech.dk/spider.asp)",
|
497
|
+
"i1searchbot/2.0 (i1search web crawler; http://www.i1search.com; crawler@i1search.com)",
|
498
|
+
"IAArchiver-1.0",
|
499
|
+
"iaskspider2 (iask@staff.sina.com.cn)",
|
500
|
+
"ia_archiver",
|
501
|
+
"ia_archiver-web.archive.org",
|
502
|
+
"ia_archiver/1.6",
|
503
|
+
"ICC-Crawler(Mozilla-compatible; http://kc.nict.go.jp/icc/crawl.html; icc-crawl(at)ml(dot)nict(dot)go(dot)jp)",
|
504
|
+
"ICC-Crawler(Mozilla-compatible;http://kc.nict.go.jp/icc/crawl.html;icc-crawl-contact(at)ml(dot)nict(dot)go(dot)jp)",
|
505
|
+
"iCCrawler (http://www.iccenter.net)",
|
506
|
+
"ICCrawler - ICjobs (http://www.icjobs.de/bot.htm)",
|
507
|
+
"ichiro/x.0 (http://help.goo.ne.jp/door/crawler.html)",
|
508
|
+
"ichiro/x.0 (ichiro@nttr.co.jp)",
|
509
|
+
"IconSurf/2.0 favicon finder (see http://iconsurf.com/robot.html)",
|
510
|
+
"IconSurf/2.0 favicon monitor (see http://iconsurf.com/robot.html)",
|
511
|
+
"ICRA_label_spider/x.0",
|
512
|
+
"icsbot-0.1",
|
513
|
+
"ideare - SignSite/1.x",
|
514
|
+
"iFeed.jp/2.0 (www.psychedelix.com/agents/agents.rss; 0 subscribers)",
|
515
|
+
"igdeSpyder (compatible; igde.ru; +http://igde.ru/doc/tech.html)",
|
516
|
+
"IIITBOT/1.1 (Indian Language Web Search Engine; http://webkhoj.iiit.net; pvvpr at iiit dot ac dot in)",
|
517
|
+
"ilial/Nutch-0.9 (Ilial, Inc. is a Los Angeles based Internet startup company. For more information please visit http://www.ilial.com/crawler; http://www.ilial.com/crawler; crawl@ilial.com)",
|
518
|
+
"ilial/Nutch-0.9-dev",
|
519
|
+
"IlseBot/1.x",
|
520
|
+
"IlTrovatore-Setaccio ( http://www.iltrovatore.it)",
|
521
|
+
"Iltrovatore-Setaccio/0.3-dev (Indexing; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)",
|
522
|
+
"IlTrovatore-Setaccio/1.2 ( http://www.iltrovatore.it/aiuto/faq.html)",
|
523
|
+
"Iltrovatore-Setaccio/1.2 (It-bot; http://www.iltrovatore.it/bot.html; info@iltrovatore.it)",
|
524
|
+
"iltrovatore-setaccio/1.2-dev (spidering; http://www.iltrovatore.it/aiuto/.....)",
|
525
|
+
"IlTrovatore/1.2 (IlTrovatore; http://www.iltrovatore.it/bot.html; bot@iltrovatore.it)",
|
526
|
+
"ImageWalker/2.0 (www.bdbrandprotect.com)",
|
527
|
+
"IncyWincy data gatherer(webmaster@loopimprovements.com",
|
528
|
+
"IncyWincy page crawler(webmaster@loopimprovements.com",
|
529
|
+
"IncyWincy(http://www.look.com)",
|
530
|
+
"IncyWincy(http://www.loopimprovements.com/robot.html)",
|
531
|
+
"IncyWincy/2.1(loopimprovements.com/robot.html)",
|
532
|
+
"IndexTheWeb.com Crawler7",
|
533
|
+
"Industry Program 1.0.x",
|
534
|
+
"Inet library",
|
535
|
+
"info@pubblisito.com- (http://www.pubblisito.com) il Sud dei Motori di Ricerca",
|
536
|
+
"InfoFly/1.0 (http://www.versions-project.org/)",
|
537
|
+
"INFOMINE/8.0 Adders",
|
538
|
+
"INFOMINE/8.0 RemoteServices",
|
539
|
+
"INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)",
|
540
|
+
"InfoNaviRobot(F107)",
|
541
|
+
"InfoSeek Sidewinder/0.9",
|
542
|
+
"InfoSeek Sidewinder/1.0A",
|
543
|
+
"InfoSeek Sidewinder/1.1A",
|
544
|
+
"Infoseek SideWinder/1.45 (Compatible; MSIE 10.0; UNIX)",
|
545
|
+
"Infoseek SideWinder/2.0B (Linux 2.4 i686)",
|
546
|
+
"INGRID/3.0 MT (webcrawler@NOSPAMexperimental.net; http://webmaster.ilse.nl/jsp/webmaster.jsp)",
|
547
|
+
"Inktomi Search",
|
548
|
+
"InnerpriseBot/1.0 (http://www.innerprise.com/)",
|
549
|
+
"Insitor.com search and find world wide!",
|
550
|
+
"Insitornaut",
|
551
|
+
"Internet Ninja x.0",
|
552
|
+
"InternetArchive/0.8-dev(Nutch;http://lucene.apache.org/nutch/bot.html;nutch-agent@lucene.apache",
|
553
|
+
"InternetSeer.com",
|
554
|
+
"IOI/2.0 (ISC Open Index crawler; http://index.isc.org/; bot@index.isc.org)",
|
555
|
+
"IPiumBot laurion(dot)com",
|
556
|
+
"IpselonBot/0.xx-beta (Ipselon; http://www.ipselon.com; ipselonbot@ipselon.com)",
|
557
|
+
"IRLbot/1.0 ( http://irl.cs.tamu.edu/crawler)",
|
558
|
+
"IRLbot/3.0 (compatible; MSIE 6.0; http://irl.cs.tamu.edu/crawler/)",
|
559
|
+
"ISC Systems iRc Search 2.1",
|
560
|
+
"IUPUI Research Bot v 1.9a",
|
561
|
+
"IWAgent/ 1.0 - www.brandprotect.com",
|
562
|
+
"Jabot/6.x (http://odin.ingrid.org/)",
|
563
|
+
"Jabot/7.x.x (http://odin.ingrid.org/)",
|
564
|
+
"Jack",
|
565
|
+
"Jambot/0.1.x (Jambot; http://www.jambot.com/blog; crawler@jambot.com)",
|
566
|
+
"Jambot/0.2.1 (Jambot; http://www.jambot.com/blog/static.php?page=webmaster-robot; crawler@jambot.com)",
|
567
|
+
"Jayde Crawler. http://www.jayde.com",
|
568
|
+
"Jetbot/1.0",
|
569
|
+
"JobSpider_BA/1.1",
|
570
|
+
"Jyxobot/x",
|
571
|
+
"k2spider",
|
572
|
+
"KAIST AITrc Crawler",
|
573
|
+
"KakleBot - www.kakle.com/0.1 (KakleBot - www.kakle.com; http:// www.kakle.com/bot.html; support@kakle.com)",
|
574
|
+
"kalooga/kalooga-4.0-dev-datahouse (Kalooga; http://www.kalooga.com; info@kalooga.com)",
|
575
|
+
"kalooga/KaloogaBot (Kalooga; http://www.kalooga.com/info.html?page=crawler; crawler@kalooga.com)",
|
576
|
+
"Kenjin Spider",
|
577
|
+
"Kevin http://dznet.com/kevin/",
|
578
|
+
"Kevin http://websitealert.net/kevin/",
|
579
|
+
"KE_1.0/2.0 libwww/5.2.8",
|
580
|
+
"KFSW-Bot (Version: 1.01 powered by KFSW www.kfsw.de)",
|
581
|
+
"kinja-imagebot (http://www.kinja.com/)",
|
582
|
+
"kinjabot (http://www.kinja.com)",
|
583
|
+
"KIT-Fireball/2.0",
|
584
|
+
"KIT-Fireball/2.0 (compatible; Mozilla 4.0; MSIE 5.5)",
|
585
|
+
"KnowItAll(knowitall@cs.washington.edu)",
|
586
|
+
"Knowledge.com/0.x",
|
587
|
+
"Krugle/Krugle,Nutch/0.8+ (Krugle web crawler; http://www.krugle.com/crawler/info.html; webcrawler@krugle.com)",
|
588
|
+
"KSbot/1.0 (KnowledgeStorm crawler; http://www.knowledgestorm.com/resources/content/crawler/index.html; crawleradmin@knowledgestorm.com)",
|
589
|
+
"kuloko-bot/0.x",
|
590
|
+
"kulokobot www.kuloko.com kuloko@backweave.com",
|
591
|
+
"kulturarw3/0.1",
|
592
|
+
"LapozzBot/1.4 ( http://robot.lapozz.com)",
|
593
|
+
"LapozzBot/1.5 (+http://robot.lapozz.hu)",
|
594
|
+
"larbin (samualt9@bigfoot.com)",
|
595
|
+
"LARBIN-EXPERIMENTAL (efp@gmx.net)",
|
596
|
+
"larbin_2.1.1 larbin2.1.1@somewhere.com",
|
597
|
+
"larbin_2.2.0 (crawl@compete.com)",
|
598
|
+
"larbin_2.2.1_de_Viennot (Laurent.Viennot@inria.fr)",
|
599
|
+
"larbin_2.2.2 (sugayama@lab7.kuis.kyoto-u.ac.jp)",
|
600
|
+
"larbin_2.2.2_guillaume (guillaume@liafa.jussieu.fr)",
|
601
|
+
"larbin_2.6.0 (larbin2.6.0@unspecified.mail)",
|
602
|
+
"larbin_2.6.1 (larbin2.6.1@unspecified.mail)",
|
603
|
+
"larbin_2.6.2 (hamasaki@grad.nii.ac.jp)",
|
604
|
+
"larbin_2.6.2 (larbin2.6.2@unspecified.mail)",
|
605
|
+
"larbin_2.6.2 (listonATccDOTgatechDOTedu)",
|
606
|
+
"larbin_2.6.2 (pimenas@systems.tuc.gr)",
|
607
|
+
"larbin_2.6.2 (tom@lemurconsulting.com)",
|
608
|
+
"larbin_2.6.2 (vitalbox1@hotmail.com)",
|
609
|
+
"larbin_2.6.3 (ltaa_web_crawler@groupes.epfl.ch)",
|
610
|
+
"larbin_2.6.3 (wgao@genieknows.com)",
|
611
|
+
"larbin_2.6.3_for_(http://cosco.hiit.fi/search/) tsilande@hiit.fi",
|
612
|
+
"larbin_2.6_basileocaml (basile.starynkevitch@cea.fr)",
|
613
|
+
"larbin_devel (http://pauillac.inria.fr/~ailleret/prog/larbin/)",
|
614
|
+
"lawinfo-crawler/Nutch-0.9-dev (Crawler for lawinfo.com pages; http://www.lawinfo.com; webmaster@lawinfo.com)",
|
615
|
+
"LECodeChecker/3.0 libgetdoc/1.0",
|
616
|
+
"LEIA/2.90",
|
617
|
+
"LEIA/3.01pr (LEIAcrawler; [SNIP])",
|
618
|
+
"LetsCrawl.com/1.0 +http://letscrawl.com/",
|
619
|
+
"LexiBot/1.00",
|
620
|
+
"Libby_1.1/libwww-perl/5.47",
|
621
|
+
"LibertyW (+http://www.lw01.com)",
|
622
|
+
"libWeb/clsHTTP -- hiongun@kt.co.kr",
|
623
|
+
"libwww-perl/5.41",
|
624
|
+
"libwww-perl/5.45",
|
625
|
+
"libwww-perl/5.48",
|
626
|
+
"libwww-perl/5.52 FP/2.1",
|
627
|
+
"libwww-perl/5.52 FP/4.0",
|
628
|
+
"libwww-perl/5.65",
|
629
|
+
"libwww-perl/5.800",
|
630
|
+
"libwww/5.3.2",
|
631
|
+
"LijitSpider/Nutch-0.9 (Reports crawler; http://www.lijit.com/; info(a)lijit(d)com)",
|
632
|
+
"Lincoln State Web Browser",
|
633
|
+
"linkbot",
|
634
|
+
"linknzbot",
|
635
|
+
"Links 2.0 (http://gossamer-threads.com/scripts/links/)",
|
636
|
+
"Links SQL (http://gossamer-threads.com/scripts/links-sql/)",
|
637
|
+
"LinkScan/11.0beta2 UnixShareware robot from Elsop.com (used by Indiafocus/Indiainfo)",
|
638
|
+
"LinkScan/9.0g Unix",
|
639
|
+
"LinkScan/x.x Unix",
|
640
|
+
"LiveTrans/Nutch-0.9 (maintainer: cobain at iis dot sinica dot edu dot tw; http://wkd.iis.sinica.edu.tw/LiveTrans/)",
|
641
|
+
"Llaut/1.0 (http://mnm.uib.es/~gallir/llaut/bot.html)",
|
642
|
+
"LMQueueBot/0.2",
|
643
|
+
"lmspider (lmspider@scansoft.com)",
|
644
|
+
"LNSpiderguy",
|
645
|
+
"LocalBot/1.0 ( http://www.localbot.co.uk/)",
|
646
|
+
"LocalcomBot/1.2.x ( http://www.local.com/bot.htm)",
|
647
|
+
"Lockstep Spider/1.0",
|
648
|
+
"Look.com",
|
649
|
+
"Lovel as 1.0 ( +http://www.everatom.com)",
|
650
|
+
"LTI/LemurProject Nutch Spider/Nutch-1.0-dev (lti crawler for CMU; http://www.lti.cs.cmu.edu; changkuk at cmu dot edu)",
|
651
|
+
"LTI/LemurProject Nutch Spider/Nutch-1.0-dev (Research spider using Nutch; http://www.lemurproject.org; mhoy@cs.cmu.edu)",
|
652
|
+
"lwp-trivial/1.32",
|
653
|
+
"lwp-trivial/1.34",
|
654
|
+
"lwp-trivial/1.34",
|
655
|
+
"LWP::Simple/5.22",
|
656
|
+
"LWP::Simple/5.36",
|
657
|
+
"LWP::Simple/5.48",
|
658
|
+
"LWP::Simple/5.50",
|
659
|
+
"LWP::Simple/5.51",
|
660
|
+
"LWP::Simple/5.53",
|
661
|
+
"LWP::Simple/5.63",
|
662
|
+
"LWP::Simple/5.803",
|
663
|
+
"Lycos_Spider_(modspider)",
|
664
|
+
"Lycos_Spider_(T-Rex)",
|
665
|
+
"Lynx/2.8.4rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.6c (human-guided@lerly.net)",
|
666
|
+
"Mac Finder 1.0.xx",
|
667
|
+
"Mackster( http://www.ukwizz.com )",
|
668
|
+
"Mahiti.Com/Mahiti Crawler-1.0 (Mahiti.Com; http://mahiti.com ; mahiti.com)",
|
669
|
+
"Mail.Ru/1.0",
|
670
|
+
"mailto:webcraft@bea.com",
|
671
|
+
"mammoth/1.0 ( http://www.sli-systems.com/)",
|
672
|
+
"MantraAgent",
|
673
|
+
"MapoftheInternet.com ( http://MapoftheInternet.com)",
|
674
|
+
"Mariner/5.1b [de] (Win95; I ;Kolibri gncwebbot)",
|
675
|
+
"Marketwave Hit List",
|
676
|
+
"Martini",
|
677
|
+
"MARTINI",
|
678
|
+
"Marvin v0.3",
|
679
|
+
"MaSagool/1.0 (MaSagool; http://sagool.jp/; info@sagool.jp)",
|
680
|
+
"MasterSeek",
|
681
|
+
"Mata Hari/2.00 ",
|
682
|
+
"Matrix S.p.A. - FAST Enterprise Crawler 6 (Unknown admin e-mail address)",
|
683
|
+
"maxomobot/dev-20051201 (maxomo; http://67.102.134.34:4047/MAXOMO/MAXOMObot.html; maxomobot@maxomo.com)",
|
684
|
+
"MDbot/1.0 (+http://www.megadownload.net/bot.html)",
|
685
|
+
"MediaCrawler-1.0 (Experimental)",
|
686
|
+
"Mediapartners-Google/2.1 ( http://www.googlebot.com/bot.html)",
|
687
|
+
"MediaSearch/0.1",
|
688
|
+
"MegaSheep v1.0 (www.searchuk.com internet sheep)",
|
689
|
+
"Megite2.0 (http://www.megite.com)",
|
690
|
+
"Mercator-1.x",
|
691
|
+
"Mercator-2.0",
|
692
|
+
"Mercator-Scrub-1.1",
|
693
|
+
"Metaeuro Web Crawler/0.2 (MetaEuro Web Search Clustering Engine; http://www.metaeuro.com; crawler at metaeuro dot com)",
|
694
|
+
"MetaGer-LinkChecker",
|
695
|
+
"MetagerBot/0.8-dev (MetagerBot; http://metager.de; )",
|
696
|
+
"MetaGer_PreChecker0.1",
|
697
|
+
"Metaspinner/0.01 (Metaspinner; http://www.meta-spinner.de/; support@meta-spinner.de/)",
|
698
|
+
"metatagsdir/0.7 (+http://metatagsdir.com/directory/)",
|
699
|
+
"MFC Foundation Class Library 4.0",
|
700
|
+
"MicroBaz",
|
701
|
+
"Microsoft Small Business Indexer",
|
702
|
+
"Microsoft URL Control - 6.00.8xxx",
|
703
|
+
"MicrosoftPrototypeCrawler (How's my crawling? mailto:newbiecrawler@hotmail.com)",
|
704
|
+
"Missauga Locate 1.0.0",
|
705
|
+
"Missigua Locator 1.9",
|
706
|
+
"Missouri College Browse",
|
707
|
+
"Misterbot-Nutch/0.7.1 (Misterbot-Nutch; http://www.misterbot.fr; admin@misterbot.fr)",
|
708
|
+
"Miva (AlgoFeedback@miva.com)",
|
709
|
+
"Mizzu Labs 2.2",
|
710
|
+
"MJ12bot/vx.x.x (http://majestic12.co.uk/bot.php?+)",
|
711
|
+
"MJ12bot/vx.x.x (http://www.majestic12.co.uk/projects/dsearch/mj12bot.php)",
|
712
|
+
"MJBot (SEO assessment)",
|
713
|
+
"MLBot (www.metadatalabs.com)",
|
714
|
+
"MnogoSearch/3.2.xx",
|
715
|
+
"Mo College 1.9",
|
716
|
+
"moget/x.x (moget@goo.ne.jp)",
|
717
|
+
"mogimogi/1.0",
|
718
|
+
"MojeekBot/0.x (archi; http://www.mojeek.com/bot.html)",
|
719
|
+
"Morris - Mixcat Crawler ( http://mixcat.com)",
|
720
|
+
"Mouse-House/7.4 (spider_monkey spider info at www.mobrien.com/sm.shtml)",
|
721
|
+
"mozDex/0.xx-dev (mozDex; http://www.mozdex.com/en/bot.html; spider@mozdex.com)",
|
722
|
+
"Mozilla (Mozilla@somewhere.com)",
|
723
|
+
"Mozilla 4.0(compatible; BotSeer/1.0; +http://botseer.ist.psu.edu)",
|
724
|
+
"Mozilla/2.0 (compatible; Ask Jeeves)",
|
725
|
+
"Mozilla/2.0 (compatible; Ask Jeeves/Teoma)",
|
726
|
+
"Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml) ",
|
727
|
+
"Mozilla/2.0 (compatible; Ask Jeeves/Teoma; http://sp.ask.com/docs/about/tech_crawling.html)",
|
728
|
+
"Mozilla/2.0 (compatible; EZResult -- Internet Search Engine)",
|
729
|
+
"Mozilla/2.0 (compatible; NEWT ActiveX; Win32)",
|
730
|
+
"Mozilla/2.0 (compatible; T-H-U-N-D-E-R-S-T-O-N-E)",
|
731
|
+
"Mozilla/3.0 (compatible; Fluffy the spider; http://www.searchhippo.com/; info@searchhippo.com)",
|
732
|
+
"Mozilla/3.0 (compatible; Indy Library)",
|
733
|
+
"Mozilla/3.0 (compatible; MuscatFerret/1.5.4; claude@euroferret.com)",
|
734
|
+
"Mozilla/3.0 (compatible; MuscatFerret/1.5; olly@muscat.co.uk)",
|
735
|
+
"Mozilla/3.0 (compatible; MuscatFerret/1.6.x; claude@euroferret.com)",
|
736
|
+
"Mozilla/3.0 (compatible; scan4mail (advanced version) http://www.peterspages.net/?scan4mail)",
|
737
|
+
"Mozilla/3.0 (compatible; ScollSpider; http://www.webwobot.com)",
|
738
|
+
"Mozilla/3.0 (compatible; Webinator-DEV01.home.iprospect.com/2.56)",
|
739
|
+
"Mozilla/3.0 (compatible; Webinator-indexer.cyberalert.com/2.56)",
|
740
|
+
"Mozilla/3.0 (INGRID/3.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)",
|
741
|
+
"Mozilla/3.0 (Slurp.so/Goo; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
|
742
|
+
"Mozilla/3.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
|
743
|
+
"Mozilla/3.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
|
744
|
+
"Mozilla/3.0 (Vagabondo/1.1 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)",
|
745
|
+
"Mozilla/3.0 (Vagabondo/1.x MT; webagent@wise-guys.nl; http://webagent.wise-guys.nl/)",
|
746
|
+
"Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMexperimental.net; http://aanmelden.ilse.nl/?aanmeld_mode=webhints)",
|
747
|
+
"Mozilla/3.0 (Vagabondo/2.0 MT; webcrawler@NOSPAMwise-guys.nl; http://webagent.wise-guys.nl/)",
|
748
|
+
"Mozilla/3.01 (Compatible; Links2Go Similarity Engine)",
|
749
|
+
"Mozilla/4.0",
|
750
|
+
"Mozilla/4.0 (agadine3.0) www.agada.de",
|
751
|
+
"Mozilla/4.0 (compatible: AstraSpider V.2.1 : astrafind.com)",
|
752
|
+
"Mozilla/4.0 (compatible; Vagabondo/2.2; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)",
|
753
|
+
"Mozilla/4.0 (compatible; Vagabondo/4.0Beta; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)",
|
754
|
+
"Mozilla/4.0 (compatible; Advanced Email Extractor v2.xx)",
|
755
|
+
"Mozilla/4.0 (compatible; B_L_I_T_Z_B_O_T)",
|
756
|
+
"Mozilla/4.0 (compatible; ChristCrawler.com ChristCrawler@ChristCENTRAL.com)",
|
757
|
+
"Mozilla/4.0 (compatible; crawlx, crawler@trd.overture.com)",
|
758
|
+
"Mozilla/4.0 (compatible; DAUMOA-video; +http://ws.daum.net/aboutkr.html)",
|
759
|
+
"Mozilla/4.0 (compatible; FastCrawler3 support-fastcrawler3@fast.no)",
|
760
|
+
"Mozilla/4.0 (compatible; FDSE robot)",
|
761
|
+
"Mozilla/4.0 (compatible; GPU p2p crawler http://gpu.sourceforge.net/search_engine.php)",
|
762
|
+
"Mozilla/4.0 (compatible; grub-client-0.2.x; Crawl your stuff with http://grub.org)",
|
763
|
+
"Mozilla/4.0 (compatible; grub-client-0.3.x; Crawl your own stuff with http://grub.org)",
|
764
|
+
"Mozilla/4.0 (compatible; grub-client-2.x)",
|
765
|
+
"Mozilla/4.0 (compatible; Iplexx Spider/1.0 http://www.iplexx.at)",
|
766
|
+
"Mozilla/4.0 (compatible; MSIE 4.01; Vonna.com b o t)",
|
767
|
+
"Mozilla/4.0 (compatible; MSIE 4.01; Windows CE; PPC; 240x320; SPV M700; OpVer 19.123.2.733) OrangeBot-Mobile 2008.0 (mobilesearch.support@orange-ftgroup.com)",
|
768
|
+
"Mozilla/4.0 (compatible; MSIE 4.0; Windows NT; Site Server 3.0 Robot) Indonesia Interactive",
|
769
|
+
"Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0) (samualt9@bigfoot.com)",
|
770
|
+
"Mozilla/4.0 (compatible; MSIE 5.0; NetNose-Crawler 2.0; A New Search Experience: http://www.netnose.com)",
|
771
|
+
"Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) TrueRobot; 1.5",
|
772
|
+
"Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot BETA 1.2 (http://www.voila.com/)",
|
773
|
+
"Mozilla/4.0 (compatible; MSIE 5.0; Windows 95) VoilaBot; 1.6",
|
774
|
+
"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt; DTS Agent",
|
775
|
+
"Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com)",
|
776
|
+
"Mozilla/4.0 (compatible; MSIE 5.0; www.galaxy.com; www.psychedelix.com/; http://www.galaxy.com/info/crawler.html)",
|
777
|
+
"Mozilla/4.0 (compatible; MSIE 5.0; YANDEX)",
|
778
|
+
"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; obot)",
|
779
|
+
"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 4.0; QXW03018)",
|
780
|
+
"Mozilla/4.0 (compatible; MSIE 6.0 compatible; Asterias Crawler v4; +http://www.singingfish.com/help/spider.html; webmaster@singingfish.com); SpiderThread Revision: 3.10",
|
781
|
+
"Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Skampy/0.9.x [en]",
|
782
|
+
"Mozilla/4.0 (compatible; MSIE 6.0; TargetSeek/1.0; +http://www.targetgroups.net/TargetSeek.html)",
|
783
|
+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP entries t_st; http://tuezilla.de/t_st-odp-entries-agent.html)",
|
784
|
+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ODP links test; http://tuezilla.de/test-odp-links-agent.html)",
|
785
|
+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; ZoomSpider.net bot; .NET CLR 1.1.4322)",
|
786
|
+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; heritrix/1.3.0 http://www.cs.washington.edu/research/networking/websys/)",
|
787
|
+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; QihooBot 1.0 qihoobot@qihoo.net)",
|
788
|
+
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT; MS Search 4.0 Robot)",
|
789
|
+
"Mozilla/4.0 (compatible; MSIE enviable; DAUMOA 2.0; DAUM Web Robot; Daum Communications Corp., Korea; +http://ws.daum.net/aboutkr.html)",
|
790
|
+
"Mozilla/4.0 (compatible; MSIE is not me; DAUMOA/1.0.1; DAUM Web Robot; Daum Communications Corp., Korea)",
|
791
|
+
"Mozilla/4.0 (compatible; NaverBot/1.0; http://help.naver.com/delete_main.asp)",
|
792
|
+
"Mozilla/4.0 (compatible; SpeedySpider; www.entireweb.com)",
|
793
|
+
"Mozilla/4.0 (compatible; www.galaxy.com)",
|
794
|
+
"Mozilla/4.0 (compatible; Y!J; for robot study; keyoshid)",
|
795
|
+
"Mozilla/4.0 (compatible; Yahoo Japan; for robot study; kasugiya)",
|
796
|
+
"Mozilla/4.0 (JemmaTheTourist;http://www.activtourist.com)",
|
797
|
+
"Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
|
798
|
+
"Mozilla/4.0 (MobilePhone SCP-5500/US/1.0) NetFront/3.0 MMP/2.0 FAKE (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
|
799
|
+
"Mozilla/4.0 (Mozilla; http://www.mozilla.org/docs/en/bot.html; master@mozilla.com)",
|
800
|
+
"Mozilla/4.0 (Sleek Spider/1.2)",
|
801
|
+
"Mozilla/4.0 compatible FurlBot/Furl Search 2.0 (FurlBot; http://www.furl.net; wn.furlbot@looksmart.net)",
|
802
|
+
"Mozilla/4.0 compatible ZyBorg/1.0 (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)",
|
803
|
+
"Mozilla/4.0 compatible ZyBorg/1.0 (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)",
|
804
|
+
"Mozilla/4.0 compatible ZyBorg/1.0 Dead Link Checker (wn.zyborg@looksmart.net; http://www.WISEnutbot.com)",
|
805
|
+
"Mozilla/4.0 compatible ZyBorg/1.0 for Homepage (ZyBorg@WISEnutbot.com; http://www.WISEnutbot.com)",
|
806
|
+
"Mozilla/4.0 efp@gmx.net",
|
807
|
+
"Mozilla/4.0 [en] (Ask Jeeves Corporate Spider)",
|
808
|
+
"Mozilla/4.0(compatible; Zealbot 1.0)",
|
809
|
+
"Mozilla/4.04 (compatible; Dulance bot; +http://www.dulance.com/bot.jsp)",
|
810
|
+
"Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_TrueRobot/1.4 libwww/5.2.8",
|
811
|
+
"Mozilla/4.0_(compatible;_MSIE_5.0;_Windows_95)_VoilaBot/1.6 libwww/5.3.2",
|
812
|
+
"Mozilla/4.6 [en] (http://www.cnet.com/)",
|
813
|
+
"Mozilla/4.7",
|
814
|
+
"Mozilla/4.7 (compatible; http://eidetica.com/spider)",
|
815
|
+
"Mozilla/4.7 (compatible; Intelliseek; http://www.intelliseek.com)",
|
816
|
+
"Mozilla/4.7 (compatible; Whizbang)",
|
817
|
+
"Mozilla/4.7 (compatible; WhizBang; http://www.whizbang.com/crawler)",
|
818
|
+
"Mozilla/4.7 [en](BecomeBot@exava.com)",
|
819
|
+
"Mozilla/4.7 [en](Exabot@exava.com)",
|
820
|
+
"Mozilla/4.72 [en] (BACS http://www.ba.be)",
|
821
|
+
"Mozilla/5.0",
|
822
|
+
"Mozilla/5.0 (+http://www.eurekster.com/mammoth) Mammoth/0.1",
|
823
|
+
"Mozilla/5.0 (+http://www.sli-systems.com/) Mammoth/0.1",
|
824
|
+
"Mozilla/5.0 (Clustered-Search-Bot/1.0; support@clush.com; http://www.clush.com/)",
|
825
|
+
"Mozilla/5.0 (compatible; +http://www.evri.com/evrinid)",
|
826
|
+
"Mozilla/5.0 (compatible; 008/0.83; http://www.80legs.com/spider.html;) Gecko/2008032620",
|
827
|
+
"Mozilla/5.0 (compatible; Abonti/0.8 - http://www.abonti.com)",
|
828
|
+
"Mozilla/5.0 (compatible; aiHitBot/1.0; +http://www.aihit.com/)",
|
829
|
+
"Mozilla/5.0 (compatible; AnsearchBot/1.x; +http://www.ansearch.com.au/)",
|
830
|
+
"Mozilla/5.0 (compatible; archive.org_bot/1.10.0 +http://www.loc.gov/minerva/crawl.html)",
|
831
|
+
"Mozilla/5.0 (compatible; archive.org_bot/1.13.1x http://crawler.archive.org)",
|
832
|
+
"Mozilla/5.0 (compatible; archive.org_bot/1.5.0-200506132127 http://crawler.archive.org) Hurricane Katrina",
|
833
|
+
"Mozilla/5.0 (compatible; Ask Jeeves/Teoma; http://about.ask.com/en/docs/about/webmasters.shtml)",
|
834
|
+
"Mozilla/5.0 (compatible; BecomeBot/1.23; http://www.become.com/webmasters.html)",
|
835
|
+
"Mozilla/5.0 (compatible; BecomeBot/1.xx; MSIE 6.0 compatible; http://www.become.com/webmasters.html)",
|
836
|
+
"Mozilla/5.0 (compatible; BecomeBot/2.0beta; http://www.become.com/webmasters.html)",
|
837
|
+
"Mozilla/5.0 (compatible; BecomeBot/2.x; MSIE 6.0 compatible; http://www.become.com/site_owners.html)",
|
838
|
+
"Mozilla/5.0 (compatible; BecomeJPBot/2.3; MSIE 6.0 compatible; +http://www.become.co.jp/site_owners.html)",
|
839
|
+
"Mozilla/5.0 (compatible; BlogRefsBot/0.1; http://www.blogrefs.com/about/bloggers)",
|
840
|
+
"Mozilla/5.0 (compatible; Bot; +http://pressemitteilung.ws/spamfilter",
|
841
|
+
"Mozilla/5.0 (compatible; BuzzRankingBot/1.0; +http://www.buzzrankingbot.com/)",
|
842
|
+
"Mozilla/5.0 (compatible; Charlotte/1.0b; charlotte@betaspider.com)",
|
843
|
+
"Mozilla/5.0 (compatible; Charlotte/1.0b; http://www.searchme.com/support/)",
|
844
|
+
"Mozilla/5.0 (compatible; Crawling jpeg; http://www.yama.info.waseda.ac.jp)",
|
845
|
+
"Mozilla/5.0 (compatible; de/1.13.2 +http://www.de.com)",
|
846
|
+
"Mozilla/5.0 (compatible; Diffbot/0.1; +http://www.diffbot.com)",
|
847
|
+
"Mozilla/5.0 (compatible; DNS-Digger-Explorer/1.0; +http://www.dnsdigger.com)",
|
848
|
+
"Mozilla/5.0 (compatible; DNS-Digger/1.0; +http://www.dnsdigger.com)",
|
849
|
+
"Mozilla/5.0 (compatible; EARTHCOM.info/2.01; http://www.earthcom.info)",
|
850
|
+
"Mozilla/5.0 (compatible; EARTHCOM/2.2; +http://enter4u.eu)",
|
851
|
+
"Mozilla/5.0 (compatible; Exabot Test/3.0; +http://www.exabot.com/go/robot)",
|
852
|
+
"Mozilla/5.0 (compatible; FatBot 2.0; http://www.thefind.com/main/CrawlerFAQs.fhtml)",
|
853
|
+
"Mozilla/5.0 (compatible; Galbot/1.0; +http://www.galbot.com/bot.html)",
|
854
|
+
"mozilla/5.0 (compatible; genevabot http://www.healthdash.com)",
|
855
|
+
"Mozilla/5.0 (compatible; Googlebot/2.1; http://www.google.com/bot.html)",
|
856
|
+
"mozilla/5.0 (compatible; heritrix/1.0.4 http://innovationblog.com)",
|
857
|
+
"Mozilla/5.0 (compatible; heritrix/1.10.2 +http://i.stanford.edu/)",
|
858
|
+
"Mozilla/5.0 (compatible; heritrix/1.12.1 +http://newstin.com/)",
|
859
|
+
"Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com)",
|
860
|
+
"Mozilla/5.0 (compatible; heritrix/1.12.1 +http://www.page-store.com) [email:paul@page-store.com]",
|
861
|
+
"mozilla/5.0 (compatible; heritrix/1.3.0 http://archive.crawler.org)",
|
862
|
+
"Mozilla/5.0 (compatible; heritrix/1.4.0 +http://www.chepi.net)",
|
863
|
+
"Mozilla/5.0 (compatible; heritrix/1.4t http://www.truveo.com/)",
|
864
|
+
"Mozilla/5.0 (compatible; heritrix/1.5.0 http://www.l3s.de/~kohlschuetter/projects/crawling/)",
|
865
|
+
"Mozilla/5.0 (compatible; heritrix/1.5.0-200506231921 http://pandora.nla.gov.au/crawl.html)",
|
866
|
+
"Mozilla/5.0 (compatible; heritrix/1.6.0 http://www.worio.com/)",
|
867
|
+
"Mozilla/5.0 (compatible; heritrix/1.7.0 +http://www.greaterera.com/)",
|
868
|
+
"Mozilla/5.0 (compatible; heritrix/1.x.x +http://www.accelobot.com)",
|
869
|
+
"Mozilla/5.0 (compatible; heritrix/2.0.0-RC1 +http://www.aol.com)",
|
870
|
+
"Mozilla/5.0 (compatible; Hermit Search. Com; +http://www.hermitsearch.com)",
|
871
|
+
"Mozilla/5.0 (compatible; HyperixScoop/1.3; +http://www.hyperix.com)",
|
872
|
+
"Mozilla/5.0 (compatible; IDBot/1.0; +http://www.id-search.org/bot.html)",
|
873
|
+
"Mozilla/5.0 (compatible; InterseekWeb/3.x)",
|
874
|
+
"Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails)",
|
875
|
+
"Mozilla/5.0 (compatible; LemSpider 0.1)",
|
876
|
+
"Mozilla/5.0 (compatible; MojeekBot/2.0; http://www.mojeek.com/bot.html)",
|
877
|
+
"Mozilla/5.0 (compatible; MSIE 6.0; Podtech Network; crawler_admin@podtech.net)",
|
878
|
+
"Mozilla/5.0 (compatible; OnetSzukaj/5.0; http://szukaj.onet.pl)",
|
879
|
+
"Mozilla/5.0 (compatible; PalmeraBot; http://www.links24h.com/help/palmera) Version 0.001",
|
880
|
+
"Mozilla/5.0 (compatible; pogodak.ba/3.x)",
|
881
|
+
"Mozilla/5.0 (compatible; Pogodak.hr/3.1)",
|
882
|
+
"Mozilla/5.0 (compatible; PWeBot/3.1; http://www.programacionweb.net/robot.php)",
|
883
|
+
"Mozilla/5.0 (compatible; Quantcastbot/1.0; www.quantcast.com)",
|
884
|
+
"Mozilla/5.0 (compatible; ScoutJet; +http://www.scoutjet.com/)",
|
885
|
+
"Mozilla/5.0 (compatible; Scrubby/2.2; http://www.scrubtheweb.com/)",
|
886
|
+
"Mozilla/5.0 (compatible; ShunixBot/1.x.x +http://www.shunix.com/robot.htm)",
|
887
|
+
"Mozilla/5.0 (compatible; ShunixBot/1.x; http://www.shunix.com/bot.htm)",
|
888
|
+
"Mozilla/5.0 (compatible; SkreemRBot +http://skreemr.com)",
|
889
|
+
"Mozilla/5.0 (compatible; SummizeBot +http://www.summize.com)",
|
890
|
+
"Mozilla/5.0 (compatible; Synoobot/0.9; http://www.synoo.com/search/bot.html)",
|
891
|
+
"Mozilla/5.0 (compatible; Theophrastus/x.x; http://users.cs.cf.ac.uk/N.A.Smith/theophrastus.php)",
|
892
|
+
"Mozilla/5.0 (compatible; TridentSpider/3.1)",
|
893
|
+
"Mozilla/5.0 (compatible; Vagabondo/2.1; webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)",
|
894
|
+
"Mozilla/5.0 (compatible; Webduniabot/1.0; +http://search.webdunia.com/bot.aspx)",
|
895
|
+
"Mozilla/5.0 (compatible; worio bot heritrix/1.10.0 +http://worio.com)",
|
896
|
+
"Mozilla/5.0 (compatible; WoW Lemmings Kathune/2.0;http://www.wowlemmings.com/kathune.html)",
|
897
|
+
"Mozilla/5.0 (compatible; Yahoo! DE Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
|
898
|
+
"Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)",
|
899
|
+
"Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
|
900
|
+
"Mozilla/5.0 (compatible; Yoono; http://www.yoono.com/)",
|
901
|
+
"Mozilla/5.0 (compatible; YoudaoBot/1.0; http://www.youdao.com/help/webmaster/spider/; )",
|
902
|
+
"Mozilla/5.0 (compatible; Zenbot/1.3; +http://zen.co.za/webmasters/)",
|
903
|
+
"Mozilla/5.0 (compatible; zermelo +http://www.powerset.com) [email:paul@page-store.com,crawl@powerset.com]",
|
904
|
+
"Mozilla/5.0 (compatible;archive.org_bot/1.7.1; collectionId=316; Archive-It; +http://www.archive-it.org)",
|
905
|
+
"Mozilla/5.0 (compatible;archive.org_bot/heritrix-1.9.0-200608171144 +http://pandora.nla.gov.au/crawl.html)",
|
906
|
+
"Mozilla/5.0 (compatible;MAINSEEK_BOT)",
|
907
|
+
"Mozilla/5.0 (Slurp/cat; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
|
908
|
+
"Mozilla/5.0 (Slurp/si; slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
|
909
|
+
"Mozilla/5.0 (Twiceler-0.9 http://www.cuill.com/twiceler/robot.html)",
|
910
|
+
"Mozilla/5.0 (Version: xxxx Type:xx)",
|
911
|
+
"Mozilla/5.0 (wgao@genieknows.com)",
|
912
|
+
"Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.7) NimbleCrawler 1.11 obeys UserAgent NimbleCrawler For problems contact: crawler_at_dataalchemy.com",
|
913
|
+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)",
|
914
|
+
"Mozilla/5.0 (Windows; U; Windows NT 5.1; fr; rv:1.8.1) VoilaBot BETA 1.2 (support.voilabot@orange-ftgroup.com)",
|
915
|
+
"Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@health",
|
916
|
+
"Mozilla/5.0 (Windows;) NimbleCrawler 1.12 obeys UserAgent NimbleCrawler For problems contact: crawler@healthline.com",
|
917
|
+
"Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:Spinn3r (Spinn3r 3.1); http://spinn3r.com/robot) Gecko/20021130",
|
918
|
+
"Mozilla/5.0 URL-Spider",
|
919
|
+
"Mozilla/5.0 usww.com-Spider-for-w8.net",
|
920
|
+
"Mozilla/5.0 wgao@genieknows.com",
|
921
|
+
"Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4 www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot)",
|
922
|
+
"MQbot metaquerier.cs.uiuc.edu/crawler",
|
923
|
+
"MQBOT/Nutch-0.9-dev (MQBOT Nutch Crawler; http://falcon.cs.uiuc.edu; mqbot@cs.uiuc.edu)",
|
924
|
+
"msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)",
|
925
|
+
"msnbot-Products/1.0 (+http://search.msn.com/msnbot.htm)",
|
926
|
+
"MSNBOT/0.xx (http://search.msn.com/msnbot.htm)",
|
927
|
+
"msnbot/x.xx ( http://search.msn.com/msnbot.htm)",
|
928
|
+
"MSNBOT_Mobile MSMOBOT Mozilla/2.0 (compatible; MSIE 4.02; Windows CE; Default)",
|
929
|
+
"MSNPTC/1.0",
|
930
|
+
"MSRBOT (http://research.microsoft.com/research/sv/msrbot)",
|
931
|
+
"multicrawler ( http://sw.deri.org/2006/04/multicrawler/robots.html)",
|
932
|
+
"MultiText/0.1",
|
933
|
+
"MusicWalker2.0 ( http://www.somusical.com)",
|
934
|
+
"MVAClient",
|
935
|
+
"Mylinea.com Crawler 2.0",
|
936
|
+
"Naamah 1.0.1/Blogbot (http://blogbot.de/)",
|
937
|
+
"Naamah 1.0a/Blogbot (http://blogbot.de/)",
|
938
|
+
"NABOT/5.0",
|
939
|
+
"nabot_1.0",
|
940
|
+
"NameOfAgent (CMS Spider)",
|
941
|
+
"NASA Search 1.0",
|
942
|
+
"NationalDirectory-WebSpider/1.3",
|
943
|
+
"NationalDirectoryAddURL/1.0",
|
944
|
+
"NaverBot-1.0 (NHN Corp. / +82-2-3011-1954 / nhnbot@naver.com)",
|
945
|
+
"NaverBot_dloader/1.5",
|
946
|
+
"NavissoBot",
|
947
|
+
"NavissoBot/1.7 (+http://navisso.com/)",
|
948
|
+
"NCSA Beta 1 (http://vias.ncsa.uiuc.edu/viasarchivinginformation.html)",
|
949
|
+
"Nebullabot/2.2 (http://bot.nebulla.info)",
|
950
|
+
"NEC Research Agent -- compuman at research.nj.nec.com",
|
951
|
+
"Net-Seekr Bot/Net-Seekr Bot V1 (http://www.net-seekr.com)",
|
952
|
+
"NetinfoBot/1.0 (http://netinfo.bg/netinfobot.html)",
|
953
|
+
"NetLookout/2.24",
|
954
|
+
"Netluchs/0.8-dev ( ; http://www.netluchs.de/; ___don't___spam_me_@netluchs.de)",
|
955
|
+
"NetNoseCrawler/v1.0",
|
956
|
+
"Netprospector JavaCrawler",
|
957
|
+
"NetResearchServer(http://www.look.com)",
|
958
|
+
"NetResearchServer/x.x(loopimprovements.com/robot.html)",
|
959
|
+
"NetSeer/Nutch-0.9 (NetSeer Crawler; http://www.netseer.com; crawler@netseer.com)",
|
960
|
+
"NetSprint -- 2.0",
|
961
|
+
"NetWhatCrawler/0.06-dev (NetWhatCrawler from NetWhat.com; http://www.netwhat.com; support@netwhat.com)",
|
962
|
+
"NetZippy",
|
963
|
+
"NextGenSearchBot 1 (for information visit http://www.eliyon.com/NextGenSearchBot)",
|
964
|
+
"NextopiaBOT (+http://www.nextopia.com) distributed crawler client beta v0.x",
|
965
|
+
"NG-Search/0.90 (NG-SearchBot; http://www.ng-search.com; )",
|
966
|
+
"NG/1.0",
|
967
|
+
"NG/4.0.1229",
|
968
|
+
"NITLE Blog Spider/0.01",
|
969
|
+
"Noago Spider",
|
970
|
+
"Nokia-WAPToolkit/1.2 googlebot(at)googlebot.com",
|
971
|
+
"Nokia6610/1.0 (3.09) Profile/MIDP-1.0 Configuration/CLDC-1.0 (compatible;YahooSeeker/M1A1-R2D2; http://help.yahoo.com/help/us/ysearch/crawling/crawling-01.html)",
|
972
|
+
"NokodoBot/1.x (+http://nokodo.com/bot.htm)",
|
973
|
+
"Norbert the Spider(Burf.com)",
|
974
|
+
"noxtrumbot/1.0 (crawler@noxtrum.com)",
|
975
|
+
"noyona_0_1",
|
976
|
+
"NP/0.1 (NP; http://www.nameprotect.com; npbot@nameprotect.com)",
|
977
|
+
"NPBot (http://www.nameprotect.com/botinfo.html)",
|
978
|
+
"NPBot-1/2.0",
|
979
|
+
"Nsauditor/1.x",
|
980
|
+
"nsyght.com/Nutch-1.0-dev (nsyght.com; Nsyght.com)",
|
981
|
+
"nsyght.com/Nutch-x.x (nsyght.com; search.nsyght.com)",
|
982
|
+
"nttdirectory_robot/0.9 (super-robot@super.navi.ocn.ne.jp)",
|
983
|
+
"nuSearch Spider <a href='http://www.nusearch.com'>www.nusearch.com</a> (compatible; MSIE 4.01)",
|
984
|
+
"NuSearch Spider (compatible; MSIE 6.0)",
|
985
|
+
"NuSearch Spider www.nusearch.com",
|
986
|
+
"Nutch",
|
987
|
+
"Nutch crawler/Nutch-0.9 (picapage.com; admin@picapage.com)",
|
988
|
+
"Nutch/Nutch-0.9 (Eurobot; http://www.ayell.eu )",
|
989
|
+
"NutchCVS/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)",
|
990
|
+
"NutchCVS/0.7.1 (Nutch running at UW; http://www.nutch.org/docs/en/bot.html; sycrawl@cs.washington.edu)",
|
991
|
+
"NutchEC2Test/Nutch-0.9-dev (Testing Nutch on Amazon EC2.; http://lucene.apache.org/nutch/bot.html; ec2test at lucene.com)",
|
992
|
+
"NutchOrg/0.0x-dev (Nutch; http://www.nutch.org/docs/bot.html; nutch-agent@lists.sourceforge.net)",
|
993
|
+
"nutchsearch/Nutch-0.9 (Nutch Search 1.0; herceg_novi at yahoo dot com)",
|
994
|
+
"NutchVinegarCrawl/Nutch-0.8.1 (Vinegar; http://www.cs.washington.edu; eytanadar at gmail dot com)",
|
995
|
+
"obidos-bot (just looking for books.)",
|
996
|
+
"ObjectsSearch/0.01-dev (ObjectsSearch;http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)",
|
997
|
+
"ObjectsSearch/0.0x (ObjectsSearch; http://www.ObjectsSearch.com/bot.html; support@thesoftwareobjects.com)",
|
998
|
+
"oBot ((compatible;Win32))",
|
999
|
+
"Ocelli/1.x (http://www.globalspec.com/Ocelli)",
|
1000
|
+
"Octora Beta - www.octora.com",
|
1001
|
+
"Octora Beta Bot - www.octora.com",
|
1002
|
+
"OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Internet CategorizerOmniExplorer http://www.omni-explorer.com/ car & shopping search (64.62.175.xxx)",
|
1003
|
+
"OmniExplorer_Bot/1.0x (+http://www.omni-explorer.com) Job Crawler",
|
1004
|
+
"OmniExplorer_Bot/1.1x (+http://www.omni-explorer.com) Torrent Crawler",
|
1005
|
+
"OmniExplorer_Bot/x.xx (+http://www.omni-explorer.com) WorldIndexer",
|
1006
|
+
"Onet.pl SA- http://szukaj.onet.pl",
|
1007
|
+
"OntoSpider/1.0 libwww-perl/5.65",
|
1008
|
+
"OOZBOT/0.20 ( http://www.setooz.com/oozbot.html ; agentname at setooz dot_com )",
|
1009
|
+
"OpenAcoon v4.0.x (www.openacoon.de)",
|
1010
|
+
"Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)",
|
1011
|
+
"Openfind data gatherer- Openbot/3.0+(robot-response@openfind.com.tw;+http://www.openfind.com.tw/robot.html)",
|
1012
|
+
"Openfind Robot/1.1A2",
|
1013
|
+
"OpenISearch/1.x (www.openisearch.com)",
|
1014
|
+
"OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)",
|
1015
|
+
"OpenTextSiteCrawler/2.9.2",
|
1016
|
+
"OpenWebSpider/0.x.x (http://www.openwebspider.org)",
|
1017
|
+
"OpenWebSpider/x",
|
1018
|
+
"OpidooBOT (larbin2.6.3@unspecified.mail)",
|
1019
|
+
"Oracle Ultra Search",
|
1020
|
+
"OrangeSpider",
|
1021
|
+
"Orbiter/T-2.0 (+http://www.dailyorbit.com/bot.htm)",
|
1022
|
+
"Overture-WebCrawler/3.8/Fresh (atw-crawler at fast dot no; http://fast.no/support/crawler.asp)",
|
1023
|
+
"ozelot/2.7.3 (Search engine indexer; www.flying-cat.de/ozelot; ozelot@flying-cat.de)",
|
1024
|
+
"PADLibrary Spider",
|
1025
|
+
"PageBitesHyperBot/600 (http://www.pagebites.com/)",
|
1026
|
+
"Pagebull http://www.pagebull.com/",
|
1027
|
+
"page_verifier (http://www.securecomputing.com/goto/pv)",
|
1028
|
+
"parallelContextFocusCrawler1.1parallelContextFocusCrawler1.1",
|
1029
|
+
"ParaSite/1.0b (http://www.ianett.com/parasite/)",
|
1030
|
+
"Patwebbot (http://www.herz-power.de/technik.html)",
|
1031
|
+
"PBrowse 1.4b",
|
1032
|
+
"pd02_1.0.0 pd02_1.0.0@dzimi@post.sk",
|
1033
|
+
"PEERbot www.peerbot.com",
|
1034
|
+
"PEval 1.4b",
|
1035
|
+
"PicoSearch/1.0",
|
1036
|
+
"Piffany_Web_Scraper_v0.x",
|
1037
|
+
"Piffany_Web_Spider_v0.x",
|
1038
|
+
"pipeLiner/0.3a (PipeLine Spider;http://www.pipeline-search.com/webmaster.html; webmaster'at'pipeline-search.com)",
|
1039
|
+
"pipeLiner/0.xx (PipeLine Spider; http://www.pipeline-search.com/webmaster.html)",
|
1040
|
+
"Pita",
|
1041
|
+
"PJspider/3.0 (pjspider@portaljuice.com; http://www.portaljuice.com)",
|
1042
|
+
"PlagiarBot/1.0",
|
1043
|
+
"PluckFeedCrawler/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://www.pluck.com; 1 subscribers)",
|
1044
|
+
"Pluggd/Nutch-0.9 (automated crawler http://www.pluggd.com;support at pluggd dot com)",
|
1045
|
+
"Poirot",
|
1046
|
+
"polybot 1.0 (http://cis.poly.edu/polybot/)",
|
1047
|
+
"Pompos/1.x http://dir.com/pompos.html",
|
1048
|
+
"Pompos/1.x pompos@iliad.fr",
|
1049
|
+
"Popdexter/1.0",
|
1050
|
+
"Port Huron Labs",
|
1051
|
+
"PortalBSpider/2.0 (spider@portalb.com)",
|
1052
|
+
"potbot 1.0",
|
1053
|
+
"PRCrawler/Nutch-0.9 (data mining development project; crawler@projectrialto.com)",
|
1054
|
+
"PrivacyFinder Cache Bot v1.0",
|
1055
|
+
"PrivacyFinder/1.1",
|
1056
|
+
"Production Bot 0116B",
|
1057
|
+
"Production Bot 2016B",
|
1058
|
+
"Production Bot DOT 3016B",
|
1059
|
+
"Program Shareware 1.0.2",
|
1060
|
+
"Project XP5 [2.03.07-111203]",
|
1061
|
+
"PROve AnswerBot 4.0",
|
1062
|
+
"ProWebGuide Link Checker (http://www.prowebguide.com)",
|
1063
|
+
"psbot/0.1 (+http://www.picsearch.com/bot.html)",
|
1064
|
+
"PSurf15a 11",
|
1065
|
+
"PSurf15a 51",
|
1066
|
+
"PSurf15a VA",
|
1067
|
+
"psycheclone",
|
1068
|
+
"PubCrawl (pubcrawl.stanford.edu)",
|
1069
|
+
"pulseBot (pulse Web Miner)",
|
1070
|
+
"PWeBot/1.2 Inspector (http://www.programacionweb.net/robot.php)",
|
1071
|
+
"PycURL",
|
1072
|
+
"Python-urllib/1.1x",
|
1073
|
+
"Python-urllib/2.0a1",
|
1074
|
+
"Qango.com Web Directory (http://www.qango.com/)",
|
1075
|
+
"QEAVis Agent/Nutch-0.9 (Quantitative Evaluation of Academic Websites Visibility; http://nlp.uned.es/qeavis",
|
1076
|
+
"QPCreep Test Rig ( We are not indexing- just testing )",
|
1077
|
+
"QuepasaCreep ( crawler@quepasacorp.com )",
|
1078
|
+
"QuepasaCreep v0.9.1x",
|
1079
|
+
"QueryN Metasearch",
|
1080
|
+
"QweeryBot/3.01 ( http://qweerybot.qweery.nl)",
|
1081
|
+
"Qweery_robot.txt_CheckBot/3.01 (http://qweerybot.qweery.com)",
|
1082
|
+
"R6_CommentReader_(www.radian6.com/crawler)",
|
1083
|
+
"R6_FeedFetcher_(www.radian6.com/crawler)",
|
1084
|
+
"rabaz (rabaz at gigabaz dot com)",
|
1085
|
+
"RaBot/1.0 Agent-admin/phortse@hanmail.net",
|
1086
|
+
"ramBot xtreme x.x",
|
1087
|
+
"RAMPyBot - www.giveRAMP.com/0.1 (RAMPyBot - www.giveRAMP.com; http://www.giveramp.com/bot.html; support@giveRAMP.com)",
|
1088
|
+
"RAMPyBot/0.8-dev (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)",
|
1089
|
+
"Rankivabot/3.2 (www.rankiva.com; 3.2; vzmxikn)",
|
1090
|
+
"Rational SiteCheck (Windows NT)",
|
1091
|
+
"Reaper [2.03.10-031204] (http://www.sitesearch.ca/reaper/)",
|
1092
|
+
"Reaper/2.0x (+http://www.sitesearch.ca/reaper)",
|
1093
|
+
"RedCarpet/1.2 (http://www.redcarpet-inc.com/robots.html)",
|
1094
|
+
"RedCell/0.1 (InfoSec Search Bot (Coming Soon); http://www.telegenetic.net/bot.html; lhall@telegenetic.net)",
|
1095
|
+
"RedCell/0.1 (RedCell; telegenetic.net/bot.html; lhall_at_telegenetic.net)",
|
1096
|
+
"RedKernel WWW-Spider 2/0 (+http://www-spider.redkernel-softwares.com/)",
|
1097
|
+
"rico/0.1",
|
1098
|
+
"RixBot (http://babelserver.org/rix)",
|
1099
|
+
"RoboCrawl (http://www.canadiancontent.net)",
|
1100
|
+
"RoboCrawl (www.canadiancontent.net)",
|
1101
|
+
"RoboPal (http://www.findpal.com/)",
|
1102
|
+
"Robot/www.pj-search.com",
|
1103
|
+
"Robot: NutchCrawler- Owner: wdavies@acm.org",
|
1104
|
+
"Robot@SuperSnooper.Com",
|
1105
|
+
"Robozilla/1.0",
|
1106
|
+
"Rotondo/3.1 libwww/5.3.1",
|
1107
|
+
"RRC (crawler_admin@bigfoot.com)",
|
1108
|
+
"RSSMicro.com RSS/Atom Feed Robot",
|
1109
|
+
"RSurf15a 41",
|
1110
|
+
"RSurf15a 51",
|
1111
|
+
"RSurf15a 81",
|
1112
|
+
"RufusBot (Rufus Web Miner; http://64.124.122.252/feedback.html)",
|
1113
|
+
"RufusBot (Rufus Web Miner; http://www.webaroo.com/rooSiteOwners.html)",
|
1114
|
+
"sait/Nutch-0.9 (SAIT Research; http://www.samsung.com)",
|
1115
|
+
"SandCrawler - Compatibility Testing",
|
1116
|
+
"SapphireWebCrawler/1.0 (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)",
|
1117
|
+
"SapphireWebCrawler/Nutch-1.0-dev (Sapphire Web Crawler using Nutch; http://boston.lti.cs.cmu.edu/crawler/; mhoy@cs.cmu.edu)",
|
1118
|
+
"savvybot/0.2",
|
1119
|
+
"SBIder/0.7 (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)",
|
1120
|
+
"SBIder/0.8-dev (SBIder; http://www.sitesell.com/sbider.html; http://support.sitesell.com/contact-support.html)",
|
1121
|
+
"ScanWeb",
|
1122
|
+
"ScholarUniverse/0.8 (Nutch;+http://scholaruniverse.com/bot.jsp; fetch-agent@scholaruniverse.com)",
|
1123
|
+
"schwarzmann.biz-Spider_for_paddel.org+(http://www.innerprise.net/usp-spider.asp)",
|
1124
|
+
"ScollSpider/2.0 (+http://www.webwobot.com/ScollSpider.php)",
|
1125
|
+
"Scooter-3.0.EU",
|
1126
|
+
"Scooter-3.0.FS",
|
1127
|
+
"Scooter-3.0.HD",
|
1128
|
+
"Scooter-3.0.VNS",
|
1129
|
+
"Scooter-3.0QI",
|
1130
|
+
"Scooter-3.2",
|
1131
|
+
"Scooter-3.2.BT",
|
1132
|
+
"Scooter-3.2.DIL",
|
1133
|
+
"Scooter-3.2.EX",
|
1134
|
+
"Scooter-3.2.JT",
|
1135
|
+
"Scooter-3.2.NIV",
|
1136
|
+
"Scooter-3.2.SF0",
|
1137
|
+
"Scooter-3.2.snippet",
|
1138
|
+
"Scooter-3.3dev",
|
1139
|
+
"Scooter-ARS-1.1",
|
1140
|
+
"Scooter-ARS-1.1-ih",
|
1141
|
+
"scooter-venus-3.0.vns",
|
1142
|
+
"Scooter-W3-1.0",
|
1143
|
+
"Scooter-W3.1.2",
|
1144
|
+
"Scooter/1.0",
|
1145
|
+
"Scooter/1.0 scooter@pa.dec.com",
|
1146
|
+
"Scooter/1.1 (custom)",
|
1147
|
+
"Scooter/2.0 G.R.A.B. V1.1.0",
|
1148
|
+
"Scooter/2.0 G.R.A.B. X2.0",
|
1149
|
+
"Scooter/3.3",
|
1150
|
+
"Scooter/3.3.QA.pczukor",
|
1151
|
+
"Scooter/3.3.vscooter",
|
1152
|
+
"Scooter/3.3_SF",
|
1153
|
+
"Scooter2_Mercator_x-x.0",
|
1154
|
+
"Scooter_bh0-3.0.3",
|
1155
|
+
"Scooter_trk3-3.0.3",
|
1156
|
+
"ScoutAbout",
|
1157
|
+
"ScoutAnt/0.1; +http://www.ant.com/what_is_ant.com/",
|
1158
|
+
"scoutmaster",
|
1159
|
+
"Scrubby/2.x (http://www.scrubtheweb.com/)",
|
1160
|
+
"Scrubby/3.0 (+http://www.scrubtheweb.com/help/technology.html)",
|
1161
|
+
"Search+",
|
1162
|
+
"Search-Engine-Studio",
|
1163
|
+
"search.ch V1.4",
|
1164
|
+
"search.ch V1.4.2 (spiderman@search.ch; http://www.search.ch)",
|
1165
|
+
"Search/1.0 (http://www.innerprise.net/es-spider.asp)",
|
1166
|
+
"searchbot admin@google.com",
|
1167
|
+
"SearchByUsa/2 (SearchByUsa; http://www.SearchByUsa.com/bot.html; info@SearchByUsa.com)",
|
1168
|
+
"SearchdayBot",
|
1169
|
+
"SearchExpress Spider0.99",
|
1170
|
+
"SearchGuild/DMOZ/Experiment (searchguild@gmail.com)",
|
1171
|
+
"SearchGuild_DMOZ_Experiment (chris@searchguild.com)",
|
1172
|
+
"Searchit-Now Robot/2.2 (+http://www.searchit-now.co.uk)",
|
1173
|
+
"Searchmee! Spider v0.98a",
|
1174
|
+
"SearchSight/2.0 (http://SearchSight.com/)",
|
1175
|
+
"SearchSpider.com/1.1",
|
1176
|
+
"Searchspider/1.2 (SearchSpider; http://www.searchspider.com; webmaster@searchspider.com)",
|
1177
|
+
"SearchTone2.0 - IDEARE",
|
1178
|
+
"Seekbot/1.0 (http://www.seekbot.net/bot.html) HTTPFetcher/0.3",
|
1179
|
+
"Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.0 (XDF)",
|
1180
|
+
"Seekbot/1.0 (http://www.seekbot.net/bot.html) RobotsTxtFetcher/1.2",
|
1181
|
+
"Seeker.lookseek.com",
|
1182
|
+
"Semager/1.1 (http://www.semager.de/blog/semager-bots/)",
|
1183
|
+
"Semager/1.x (http://www.semager.de)",
|
1184
|
+
"Sensis Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
|
1185
|
+
"Sensis.com.au Web Crawler (search_comments\\at\\sensis\\dot\\com\\dot\\au)",
|
1186
|
+
"SeznamBot/1.0",
|
1187
|
+
"SeznamBot/1.0 (+http://fulltext.seznam.cz/)",
|
1188
|
+
"SeznamBot/2.0-test (+http://fulltext.sblog.cz/)",
|
1189
|
+
"ShablastBot 1.0",
|
1190
|
+
"Shim Crawler",
|
1191
|
+
"Shim-Crawler(Mozilla-compatible; http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp)",
|
1192
|
+
"ShopWiki/1.0 ( +http://www.shopwiki.com/)",
|
1193
|
+
"ShopWiki/1.0 ( +http://www.shopwiki.com/wiki/Help:Bot)",
|
1194
|
+
"Shoula.com Crawler 2.0",
|
1195
|
+
"SietsCrawler/1.1 (+http://www.siets.biz)",
|
1196
|
+
"Sigram/Nutch-1.0-dev (Test agent for Nutch development; http://www.sigram.com/bot.html; bot at sigram dot com)",
|
1197
|
+
"Siigle Orumcex v.001 Turkey (http://www.siigle.com)",
|
1198
|
+
"silk/1.0",
|
1199
|
+
"silk/1.0 (+http://www.slider.com/silk.htm)/3.7",
|
1200
|
+
"Sirketcebot/v.01 (http://www.sirketce.com/bot.html)",
|
1201
|
+
"SiteSpider +(http://www.SiteSpider.com/)",
|
1202
|
+
"SiteTruth.com site rating system",
|
1203
|
+
"SiteXpert",
|
1204
|
+
"Skampy/0.9.x (http://www.skaffe.com/skampy-info.html)",
|
1205
|
+
"Skimpy/0.x (http://www.skaffe.com/skampy-info.html)",
|
1206
|
+
"Skywalker/0.1 (Skywalker; anonymous; anonymous)",
|
1207
|
+
"Slarp/0.1",
|
1208
|
+
"Slider_Search_v1-de",
|
1209
|
+
"Slurp/2.0 (slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
|
1210
|
+
"Slurp/2.0-KiteWeekly (slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
|
1211
|
+
"Slurp/si (slurp@inktomi.com; http://www.inktomi.com/slurp.html)",
|
1212
|
+
"Slurpy Verifier/1.0",
|
1213
|
+
"SlySearch (slysearch@slysearch.com)",
|
1214
|
+
"SlySearch/1.0 http://www.plagiarism.org/crawler/robotinfo.html",
|
1215
|
+
"SlySearch/1.x http://www.slysearch.com",
|
1216
|
+
"smartwit.com",
|
1217
|
+
"SmiffyDCMetaSpider/1.0",
|
1218
|
+
"snap.com beta crawler v0",
|
1219
|
+
"Snapbot/1.0",
|
1220
|
+
"Snapbot/1.0 (Snap Shots, +http://www.snap.com)",
|
1221
|
+
"SnykeBot/0.6 (http://www.snyke.com)",
|
1222
|
+
"SocSciBot ()",
|
1223
|
+
"SoftHypermarketFileCheckBot/1.0+(+http://www.softhypermaket.com)",
|
1224
|
+
"sogou develop spider",
|
1225
|
+
"Sogou Orion spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
|
1226
|
+
"sogou spider",
|
1227
|
+
"Sogou web spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
|
1228
|
+
"sohu agent",
|
1229
|
+
"sohu-search",
|
1230
|
+
"Sosospider+(+http://help.soso.com/webspider.htm)",
|
1231
|
+
"speedfind ramBot xtreme 8.1",
|
1232
|
+
"Speedy Spider (Beta/x.x; speedy@entireweb.com)",
|
1233
|
+
"Speedy Spider (Entireweb; Beta/1.0; http://www.entireweb.com/about/search_tech/speedyspider/)",
|
1234
|
+
"Speedy_Spider (http://www.entireweb.com)",
|
1235
|
+
"Sphere Scout&v4.0 - scout at sphere dot com",
|
1236
|
+
"Sphider",
|
1237
|
+
"Spida/0.1",
|
1238
|
+
"Spider-Sleek/2.0 (+http://search-info.com/linktous.html)",
|
1239
|
+
"spider.batsch.com",
|
1240
|
+
"spider.yellopet.com - www.yellopet.com",
|
1241
|
+
"Spider/maxbot.com admin@maxbot.com",
|
1242
|
+
"SpiderKU/0.x",
|
1243
|
+
"SpiderMan",
|
1244
|
+
"SpiderMonkey/7.0x (SpiderMonkey.ca info at http://spidermonkey.ca/sm.shtml)",
|
1245
|
+
"Spinne/2.0",
|
1246
|
+
"Spinne/2.0 med",
|
1247
|
+
"Spinne/2.0 med_AH",
|
1248
|
+
"Spock Crawler (http://www.spock.com/crawler)",
|
1249
|
+
"sportsuchmaschine.de-Robot (Version: 1.02- powered by www.sportsuchmaschine.de)",
|
1250
|
+
"sproose/0.1-alpha (sproose crawler; http://www.sproose.com/bot.html; crawler@sproose.com)",
|
1251
|
+
"Sqworm/2.9.81-BETA (beta_release; 20011102-760; i686-pc-linux-gnu)",
|
1252
|
+
"Sqworm/2.9.85-BETA (beta_release; 20011115-775; i686-pc-linux-gnu)",
|
1253
|
+
"SSurf15a 11 ",
|
1254
|
+
"StackRambler/x.x ",
|
1255
|
+
"stat statcrawler@gmail.com",
|
1256
|
+
"Steeler/1.x (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)",
|
1257
|
+
"Steeler/3.3 (http://www.tkl.iis.u-tokyo.ac.jp/~crawler/)",
|
1258
|
+
"Strategic Board Bot (+http://www.strategicboard.com)",
|
1259
|
+
"Strategic Board Bot (+http://www.strategicboard.com)",
|
1260
|
+
"Submission Spider at surfsafely.com",
|
1261
|
+
"suchbaer.de",
|
1262
|
+
"suchbaer.de (CrawlerAgent v0.103)",
|
1263
|
+
"suchbot",
|
1264
|
+
"Suchknecht.at-Robot",
|
1265
|
+
"suchpadbot/1.0 (+http://www.suchpad.de)",
|
1266
|
+
"SurferF3 1/0",
|
1267
|
+
"suzuran",
|
1268
|
+
"Swooglebot/2.0. (+http://swoogle.umbc.edu/swooglebot.htm)",
|
1269
|
+
"SWSBot-Images/1.2 http://www.smartwaresoft.com/swsbot12.html",
|
1270
|
+
"SygolBot http://www.sygol.net",
|
1271
|
+
"SynoBot",
|
1272
|
+
"Syntryx ANT Scout Chassis Pheromone; Mozilla/4.0 compatible crawler",
|
1273
|
+
"Szukacz/1.x",
|
1274
|
+
"Szukacz/1.x (robot; www.szukacz.pl/jakdzialarobot.html; szukacz@proszynski.pl)",
|
1275
|
+
"tags2dir.com/0.8 (+http://tags2dir.com/directory/)",
|
1276
|
+
"Tagword (http://tagword.com/dmoz_survey.php)",
|
1277
|
+
"Talkro Web-Shot/1.0 (E-mail: webshot@daumsoft.com- Home: http://222.122.15.190/webshot)",
|
1278
|
+
"TCDBOT/Nutch-0.8 (PhD student research;http://www.tcd.ie; mcgettrs at t c d dot IE)",
|
1279
|
+
"TECOMAC-Crawler/0.x",
|
1280
|
+
"Tecomi Bot (http://www.tecomi.com/bot.htm)",
|
1281
|
+
"Teemer (NetSeer, Inc. is a Los Angeles based Internet startup company.; http://www.netseer.com/crawler.html; crawler@netseer.com)",
|
1282
|
+
"Teoma MP",
|
1283
|
+
"teomaagent crawler-admin@teoma.com",
|
1284
|
+
"teomaagent1 [crawler-admin@teoma.com]",
|
1285
|
+
"teoma_agent1",
|
1286
|
+
"Teradex Mapper; mapper@teradex.com; http://www.teradex.com",
|
1287
|
+
"terraminds-bot/1.0 (support@terraminds.de)",
|
1288
|
+
"TerrawizBot/1.0 (+http://www.terrawiz.com/bot.html)",
|
1289
|
+
"Test spider",
|
1290
|
+
"TestCrawler/Nutch-0.9 (Testing Crawler for Research ; http://balihoo.com/index.aspx; tgautier at balihoo dot com)",
|
1291
|
+
"TheRarestParser/0.2a (http://therarestwords.com/)",
|
1292
|
+
"TheSuBot/0.1 (www.thesubot.de)",
|
1293
|
+
"thumbshots-de-Bot (Version: 1.02- powered by www.thumbshots.de)",
|
1294
|
+
"timboBot/0.9 http://www.breakingblogs.com/timbo_bot.html",
|
1295
|
+
"TinEye/1.1 (http://tineye.com/crawler.html)",
|
1296
|
+
"tivraSpider/1.0 (crawler@tivra.com)",
|
1297
|
+
"TJG/Spider",
|
1298
|
+
"Tkensaku/x.x(http://www.tkensaku.com/q.html)",
|
1299
|
+
"Topodia/1.2-dev (Topodia - Crawler for HTTP content indexing; http://www.topodia.com/; support@topodia.com)",
|
1300
|
+
"Toutatis x-xx.x (hoppa.com)",
|
1301
|
+
"Toutatis x.x (hoppa.com)",
|
1302
|
+
"Toutatis x.x-x",
|
1303
|
+
"traazibot/testengine (+http://www.traazi.de)",
|
1304
|
+
"Trampelpfad-Spider",
|
1305
|
+
"Trampelpfad-Spider-v0.1",
|
1306
|
+
"TSurf15a 11",
|
1307
|
+
"Tumblr/1.0 RSS syndication (+http://www.tumblr.com/) (support@tumblr.com)",
|
1308
|
+
"TurnitinBot/x.x (http://www.turnitin.com/robot/crawlerinfo.html)",
|
1309
|
+
"Turnpike Emporium LinkChecker/0.1",
|
1310
|
+
"TutorGig/1.5 (+http://www.tutorgig.com/crawler)",
|
1311
|
+
"Tutorial Crawler 1.4 (http://www.tutorgig.com/crawler)",
|
1312
|
+
"Twiceler www.cuill.com/robots.html",
|
1313
|
+
"Twiceler-0.9 http://www.cuill.com/twiceler/robot.html",
|
1314
|
+
"Tycoon Agent/Nutch-1.0-dev",
|
1315
|
+
"TygoBot",
|
1316
|
+
"TygoProwler",
|
1317
|
+
"UIowaCrawler/1.0",
|
1318
|
+
"UKWizz/Nutch-0.8.1 (UKWizz Nutch crawler; http://www.ukwizz.com/)",
|
1319
|
+
"Ultraseek",
|
1320
|
+
"Under the Rainbow 2.2",
|
1321
|
+
"UofTDB_experiment (leehyun@cs.toronto.edu)",
|
1322
|
+
"updated/0.1-alpha (updated crawler; http://www.updated.com; crawler@updated.com)",
|
1323
|
+
"updated/0.1beta (updated.com; http://www.updated.com; crawler@updated.om)",
|
1324
|
+
"Uptimebot",
|
1325
|
+
"UptimeBot(www.uptimebot.com)",
|
1326
|
+
"URL Spider Pro/x.xx (innerprise.net)",
|
1327
|
+
"urlfan-bot/1.0; +http://www.urlfan.com/site/bot/350.html",
|
1328
|
+
"URL_Spider_Pro/x.x",
|
1329
|
+
"URL_Spider_Pro/x.x+(http://www.innerprise.net/usp-spider.asp)",
|
1330
|
+
"User-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)",
|
1331
|
+
"User-Agent: Mozilla/4.0 (SKIZZLE! Distributed Internet Spider v1.0 - www.SKIZZLE.com)",
|
1332
|
+
"USyd-NLP-Spider (http://www.it.usyd.edu.au/~vinci/bot.html)",
|
1333
|
+
"VadixBot",
|
1334
|
+
"Vagabondo-WAP/2.0 (webcrawler at wise-guys dot nl; http://webagent.wise-guys.nl/)/1.0 Profile",
|
1335
|
+
"Vagabondo/1.x MT (webagent@wise-guys.nl)",
|
1336
|
+
"Vagabondo/2.0 MT",
|
1337
|
+
"Vagabondo/2.0 MT (webagent at wise-guys dot nl)",
|
1338
|
+
"Vagabondo/2.0 MT (webagent@NOSPAMwise-guys.nl)",
|
1339
|
+
"Vagabondo/3.0 (webagent at wise-guys dot nl)",
|
1340
|
+
"Vakes/0.01 (Vakes; http://www.vakes.com/; search@vakes.com)",
|
1341
|
+
"versus 0.2 (+http://versus.integis.ch)",
|
1342
|
+
"versus crawler eda.baykan@epfl.ch",
|
1343
|
+
"VeryGoodSearch.com.DaddyLongLegs",
|
1344
|
+
"verzamelgids.nl - Networking4all Bot/x.x",
|
1345
|
+
"Verzamelgids/2.2 (http://www.verzamelgids.nl)",
|
1346
|
+
"Vespa Crawler",
|
1347
|
+
"VisBot/2.0 (Visvo.com Crawler; http://www.visvo.com/bot.html; bot@visvo.com)",
|
1348
|
+
"Vision Research Lab image spider at vision.ece.ucsb.edu",
|
1349
|
+
"VMBot/0.x.x (VMBot; http://www.VerticalMatch.com/; vmbot@tradedot.com)",
|
1350
|
+
"Vortex/2.2 (+http://marty.anstey.ca/robots/vortex/)",
|
1351
|
+
"voyager-hc/1.0",
|
1352
|
+
"voyager/1.0",
|
1353
|
+
"voyager/2.0 (http://www.kosmix.com/html/crawler.html)",
|
1354
|
+
"VSE/1.0 (testcrawler@hotmail.com)",
|
1355
|
+
"VSE/1.0 (testcrawler@vivisimo.com)",
|
1356
|
+
"vspider",
|
1357
|
+
"vspider/3.x",
|
1358
|
+
"VWBOT/Nutch-0.9-dev (VWBOT Nutch Crawler; http://vwbot.cs.uiuc.edu;+vwbot@cs.uiuc.edu",
|
1359
|
+
"W3SiteSearch Crawler_v1.1 http://www.w3sitesearch.de",
|
1360
|
+
"wadaino.jp-crawler 0.2 (http://wadaino.jp/)",
|
1361
|
+
"Wavefire/0.8-dev (Wavefire; http://www.wavefire.com; info@wavefire.com)",
|
1362
|
+
"Waypath development crawler - info at waypath dot com",
|
1363
|
+
"Waypath Scout v2.x - info at waypath dot com",
|
1364
|
+
"Web Snooper",
|
1365
|
+
"web2express.org/Nutch-0.9-dev (leveled playing field; http://web2express.org/; info at web2express.org)",
|
1366
|
+
"WebAlta Crawler/1.2.1 (http://www.webalta.ru/bot.html)",
|
1367
|
+
"WebarooBot (Webaroo Bot; http://64.124.122.252/feedback.html)",
|
1368
|
+
"WebarooBot (Webaroo Bot; http://www.webaroo.com/rooSiteOwners.html)",
|
1369
|
+
"webbandit/4.xx.0",
|
1370
|
+
"Webclipping.com",
|
1371
|
+
"WebCompass 2.0",
|
1372
|
+
"WebCorp/1.0",
|
1373
|
+
"webcrawl.net",
|
1374
|
+
"WebFindBot(http://www.web-find.com)",
|
1375
|
+
"Webglimpse 2.xx.x (http://webglimpse.net)",
|
1376
|
+
"Weblog Attitude Diffusion 1.0",
|
1377
|
+
"webmeasurement-bot, http://rvs.informatik.uni-leipzig.de",
|
1378
|
+
"WebRankSpider/1.37 (+http://ulm191.server4you.de/crawler/)",
|
1379
|
+
"WebSearch.COM.AU/3.0.1 (The Australian Search Engine; http://WebSearch.COM.AU; Search@WebSearch.COM.AU)",
|
1380
|
+
"WebSearchBench WebCrawler v0.1(Experimental)",
|
1381
|
+
"WebsiteWorth v1.0",
|
1382
|
+
"Webspinne/1.0 webmaster@webspinne.de",
|
1383
|
+
"Websquash.com (Add url robot)",
|
1384
|
+
"WebStat/1.0 (Unix; beta; 20040314)",
|
1385
|
+
"Webster v0.3 ( http://webster.healeys.net/ )",
|
1386
|
+
"WebVac (webmaster@pita.stanford.edu)",
|
1387
|
+
"Webverzeichnis.de - Telefon: 01908 / 26005",
|
1388
|
+
"WebVulnCrawl.unknown/1.0 libwww-perl/5.803",
|
1389
|
+
"Wells Search II",
|
1390
|
+
"WEP Search 00",
|
1391
|
+
"WFARC",
|
1392
|
+
"whatUseek_winona/3.0",
|
1393
|
+
"WhizBang! Lab",
|
1394
|
+
"Willow Internet Crawler by Twotrees V2.1",
|
1395
|
+
"WinHTTP Example/1.0",
|
1396
|
+
"WinkBot/0.06 (Wink.com search engine web crawler; http://www.wink.com/Wink:WinkBot; winkbot@wink.com)",
|
1397
|
+
"WIRE/0.11 (Linux; i686; Bot,Robot,Spider,Crawler,aromano@cli.di.unipi.it)",
|
1398
|
+
"WIRE/0.x (Linux; i686; Bot,Robot,Spider,Crawler)",
|
1399
|
+
"WISEbot/1.0 (WISEbot@koreawisenut.com; http://wisebot.koreawisenut.com)",
|
1400
|
+
"worio heritrix bot (+http://worio.com/)",
|
1401
|
+
"woriobot ( http://www.worio.com/)",
|
1402
|
+
"WorldLight",
|
1403
|
+
"Wotbox/alpha0.6 (bot@wotbox.com; http://www.wotbox.com)",
|
1404
|
+
"Wotbox/alpha0.x.x (bot@wotbox.com; http://www.wotbox.com) Java/1.4.1_02",
|
1405
|
+
"WSB WebCrawler V1.0 (Beta)- cl@cs.uni-dortmund.de",
|
1406
|
+
"WSB, http://websearchbench.cs.uni-dortmund.de",
|
1407
|
+
"wume_crawler/1.1 (http://wume.cse.lehigh.edu/~xiq204/crawler/)",
|
1408
|
+
"Wwlib/Linux",
|
1409
|
+
"www.arianna.it",
|
1410
|
+
"WWWeasel Robot v1.00 (http://wwweasel.de)",
|
1411
|
+
"wwwster/1.x (Beta- mailto:gue@cis.uni-muenchen.de)",
|
1412
|
+
"X-Crawler ",
|
1413
|
+
"xirq/0.1-beta (xirq; http://www.xirq.com; xirq@xirq.com)",
|
1414
|
+
"xyro_(xcrawler@cosmos.inria.fr)",
|
1415
|
+
"Y!J-BSC/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
|
1416
|
+
"Y!J-SRD/1.0",
|
1417
|
+
"Y!J/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)",
|
1418
|
+
"yacy (www.yacy.net; v20040602; i386 Linux 2.4.26-gentoo-r13; java 1.4.2_06; MET/en)",
|
1419
|
+
"yacybot (x86 Windows XP 5.1; java 1.5.0_06; Europe/de) yacy.net",
|
1420
|
+
"Yahoo Pipes 1.0",
|
1421
|
+
"Yahoo! Mindset",
|
1422
|
+
"Yahoo-Blogs/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )",
|
1423
|
+
"Yahoo-MMAudVid/1.0 (mms dash mmaudvidcrawler dash support at yahoo dash inc dot com)",
|
1424
|
+
"Yahoo-MMAudVid/2.0(mms dash mm aud vid crawler dash support at yahoo dash inc.com ;Mozilla 4.0 compatible; MSIE 7.0;Windows NT 5.0; .NET CLR 2.0)",
|
1425
|
+
"Yahoo-MMCrawler/3.x (mm dash crawler at trd dot overture dot com)",
|
1426
|
+
"Yahoo-Test/4.0",
|
1427
|
+
"Yahoo-VerticalCrawler-FormerWebCrawler/3.9 crawler at trd dot overture dot com; http://www.alltheweb.com/help/webmaster/crawler",
|
1428
|
+
"YahooFeedSeeker/2.0 (compatible; Mozilla 4.0; MSIE 5.5; http://publisher.yahoo.com/rssguide)",
|
1429
|
+
"YahooSeeker-Testing/v3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/)",
|
1430
|
+
"YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)",
|
1431
|
+
"YahooSeeker/1.0 (compatible; Mozilla 4.0; MSIE 5.5; http://search.yahoo.com/yahooseeker.html)",
|
1432
|
+
"YahooSeeker/1.1 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/shop/merchant/)",
|
1433
|
+
"YahooSeeker/bsv3.9 (compatible; Mozilla 4.0; MSIE 5.5; http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html )",
|
1434
|
+
"YahooSeeker/CafeKelsa-dev (compatible; Konqueror/3.2; FreeBSD ;cafekelsa-dev-webmaster@yahoo-inc.com )",
|
1435
|
+
"Yandex/1.01.001 (compatible; Win16; I)",
|
1436
|
+
"Yanga WorldSearch Bot v1.1/beta (http://www.yanga.co.uk/)",
|
1437
|
+
"yarienavoir.net/0.2",
|
1438
|
+
"Yeti",
|
1439
|
+
"Yeti/0.01 (nhn/1noon, yetibot@naver.com, check robots.txt daily and follows it)",
|
1440
|
+
"Yeti/1.0 (NHN Corp.; http://help.naver.com/robots/)",
|
1441
|
+
"yggdrasil/Nutch-0.9 (yggdrasil biorelated search engine; www dot biotec dot tu minus dresden do de slash schroeder; heiko dot dietze at biotec dot tu minus dresden dot de)",
|
1442
|
+
"YodaoBot/1.0 (http://www.yodao.com/help/webmaster/spider/; )",
|
1443
|
+
"yoofind/yoofind-0.1-dev (yoono webcrawler; http://www.yoono.com ; MyEmail)",
|
1444
|
+
"yoogliFetchAgent/0.1",
|
1445
|
+
"yoono/1.0 web-crawler/1.0",
|
1446
|
+
"YottaCars_Bot/4.12 (+http://www.yottacars.com) Car Search Engine ",
|
1447
|
+
"YottaShopping_Bot/4.12 (+http://www.yottashopping.com) Shopping Search Engine",
|
1448
|
+
"Zao-Crawler",
|
1449
|
+
"Zao-Crawler 0.2b",
|
1450
|
+
"Zao/0.1 (http://www.kototoi.org/zao/)",
|
1451
|
+
"ZBot/1.00 (icaulfield@zeus.com)",
|
1452
|
+
"Zearchit",
|
1453
|
+
"ZeBot_lseek.net (bot@ze.bz)",
|
1454
|
+
"ZeBot_www.ze.bz (ze.bz@hotmail.com)",
|
1455
|
+
"zedzo.digest/0.1 (http://www.zedzo.com/)",
|
1456
|
+
"zermelo Mozilla/5.0 compatible; heritrix/1.12.1 (+http://www.powerset.com) [email:crawl@powerset.com,email:paul@page-store.com]",
|
1457
|
+
"zerxbot/Version 0.6 libwww-perl/5.79",
|
1458
|
+
"Zeus ThemeSite Viewer Webster Pro V2.9 Win32",
|
1459
|
+
"Zeus xxxxx Webster Pro V2.9 Win32",
|
1460
|
+
"Zeusbot/0.07 (Ulysseek's web-crawling robot; http://www.zeusbot.com; agent@zeusbot.com)",
|
1461
|
+
"ZipppBot/0.xx (ZipppBot; http://www.zippp.net; webmaster@zippp.net)",
|
1462
|
+
"ZIPPPCVS/0.xx (ZipppBot/.xx;http://www.zippp.net; webmaster@zippp.net)",
|
1463
|
+
"Zippy v2.0 - Zippyfinder.com",
|
1464
|
+
"ZoomSpider - wrensoft.com",
|
1465
|
+
"zspider/0.9-dev http://feedback.redkolibri.com/",
|
1466
|
+
"ZyBorg/1.0 (ZyBorg@WISEnut.com; http://www.WISEnut.com)"]
|
1467
|
+
end
|
1468
|
+
end
|