human_power 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +8 -1
- data/lib/human_power/version.rb +1 -1
- data/lib/human_power.rb +10 -2
- data/user_agents.yml +170 -0
- metadata +4 -4
- data/lib/human_power/user_agents.rb +0 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 85d862a0641b75ed9ca834a320e636899b0a707e
|
4
|
+
data.tar.gz: 8526d1ddd6b2a51439f7efb4339f397c5f575b07
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ff9ebfe171ddc808a642e6bda45e3475d0da4ea07d1f063694ae7d92807ff4cfd9042dbd643d11dcf30c6449377f5437c4c84139cd2153149c8dff7adbb347fb
|
7
|
+
data.tar.gz: becb2c0552d849252a8ee195cb5cb0e0dd9274ad093a3dc4ddf31dc6c64b92b6630b598d45ce8d605d54a74de7003801b03814c1b1d4ccbc61ef54b474e12b72
|
data/README.md
CHANGED
@@ -24,7 +24,7 @@ If you are using Rails, you can add a sample *config/robots.rb* configuration fi
|
|
24
24
|
|
25
25
|
$ rails g human_power:install
|
26
26
|
|
27
|
-
It will allow crawlers
|
27
|
+
It will allow crawlers access to the whole site by default.
|
28
28
|
|
29
29
|
Now you can restart your server and visit `/robots.txt` to see what's generated from the new configuration file.
|
30
30
|
|
@@ -70,6 +70,13 @@ sitemap sitemap_url
|
|
70
70
|
sitemap one_url, two_url
|
71
71
|
```
|
72
72
|
|
73
|
+
Then visit `/robots.txt` in your browser.
|
74
|
+
|
75
|
+
## Crawlers
|
76
|
+
|
77
|
+
Please see [user_agents.yml](https://github.com/lassebunk/human_power/blob/master/user_agents.yml) for a list of 170+ built-in user agents/crawlers you can use like shown above.
|
78
|
+
The list is from [UserAgentString.com](http://www.useragentstring.com/pages/Crawlerlist/).
|
79
|
+
|
73
80
|
## Caveats
|
74
81
|
|
75
82
|
Human Power is great for adding rules to your robots.txt.
|
data/lib/human_power/version.rb
CHANGED
data/lib/human_power.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require "human_power/version"
|
2
2
|
require "human_power/generator"
|
3
3
|
require "human_power/rule"
|
4
|
-
require "human_power/user_agents"
|
5
4
|
require "human_power/rails" if defined?(Rails)
|
6
5
|
|
7
6
|
module HumanPower
|
@@ -20,8 +19,17 @@ module HumanPower
|
|
20
19
|
user_agents[key] = user_agent_string
|
21
20
|
end
|
22
21
|
|
22
|
+
# Hash of registered user agents.
|
23
23
|
def user_agents
|
24
|
-
@user_agents ||=
|
24
|
+
@user_agents ||= load_user_agents
|
25
25
|
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
# Loads the built-in user agents from crawlers.yml.
|
30
|
+
def load_user_agents
|
31
|
+
path = File.expand_path("../../user_agents.yml", __FILE__)
|
32
|
+
Hash[YAML.load(open(path).read).map { |k, v| [k.to_sym, v] }]
|
33
|
+
end
|
26
34
|
end
|
27
35
|
end
|
data/user_agents.yml
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
008: 008
|
2
|
+
abacho_bot: ABACHOBot
|
3
|
+
accoona_ai_agent: Accoona-AI-Agent
|
4
|
+
add_sugar_spider_bot: AddSugarSpiderBot
|
5
|
+
any_apex_bot: AnyApexBot
|
6
|
+
arachmo: Arachmo
|
7
|
+
blitzbot: B-l-i-t-z-B-O-T
|
8
|
+
baiduspider: Baiduspider
|
9
|
+
become_bot: BecomeBot
|
10
|
+
beslist_bot: BeslistBot
|
11
|
+
billy_bob_bot: BillyBobBot
|
12
|
+
bimbot: Bimbot
|
13
|
+
bingbot: Bingbot
|
14
|
+
blitz_bot: BlitzBOT
|
15
|
+
boitho_com_dc: boitho.com-dc
|
16
|
+
boitho_com_robot: boitho.com-robot
|
17
|
+
btbot: btbot
|
18
|
+
catch_bot: CatchBot
|
19
|
+
cerberian_drtrs: Cerberian Drtrs
|
20
|
+
charlotte: Charlotte
|
21
|
+
convera_crawler: ConveraCrawler
|
22
|
+
cosmos: cosmos
|
23
|
+
covario_ids: Covario IDS
|
24
|
+
datapark_search: DataparkSearch
|
25
|
+
diamond_bot: DiamondBot
|
26
|
+
discobot: Discobot
|
27
|
+
dotbot: Dotbot
|
28
|
+
emerald_shield_com_web_bot: EmeraldShield.com WebBot
|
29
|
+
envolk_its_spider: envolk[ITS]spider
|
30
|
+
esperanza_bot: EsperanzaBot
|
31
|
+
exabot: Exabot
|
32
|
+
fast_enterprise_crawler: FAST Enterprise Crawler
|
33
|
+
fast_web_crawler: FAST-WebCrawler
|
34
|
+
fdse_robot: FDSE robot
|
35
|
+
find_links: FindLinks
|
36
|
+
furl_bot: FurlBot
|
37
|
+
fyber_spider: FyberSpider
|
38
|
+
g2crawler: g2crawler
|
39
|
+
gaisbot: Gaisbot
|
40
|
+
galaxy_bot: GalaxyBot
|
41
|
+
genie_bot: genieBot
|
42
|
+
gigabot: Gigabot
|
43
|
+
girafabot: Girafabot
|
44
|
+
googlebot: Googlebot
|
45
|
+
googlebot_image: Googlebot-Image
|
46
|
+
guruji_bot: GurujiBot
|
47
|
+
happy_fun_bot: HappyFunBot
|
48
|
+
hl_ftien_spider: hl_ftien_spider
|
49
|
+
holmes: Holmes
|
50
|
+
htdig: htdig
|
51
|
+
iaskspider: iaskspider
|
52
|
+
ia_archiver: ia_archiver
|
53
|
+
ic_crawler: iCCrawler
|
54
|
+
ichiro: ichiro
|
55
|
+
igde_spyder: igdeSpyder
|
56
|
+
irl_bot: IRLbot
|
57
|
+
issue_crawler: IssueCrawler
|
58
|
+
jaxified_bot: Jaxified Bot
|
59
|
+
jyxobot: Jyxobot
|
60
|
+
koepa_bot: KoepaBot
|
61
|
+
l_webis: L.webis
|
62
|
+
lapozz_bot: LapozzBot
|
63
|
+
larbin: Larbin
|
64
|
+
ld_spider: LDSpider
|
65
|
+
lexxe_bot: LexxeBot
|
66
|
+
linguee_bot: Linguee Bot
|
67
|
+
link_walker: LinkWalker
|
68
|
+
lmspider: lmspider
|
69
|
+
lwp_trivial: lwp-trivial
|
70
|
+
mabontland: mabontland
|
71
|
+
magpie_crawler: magpie-crawler
|
72
|
+
mediapartners_google: Mediapartners-Google
|
73
|
+
mj12bot: MJ12bot
|
74
|
+
mnogosearch: Mnogosearch
|
75
|
+
mogimogi: mogimogi
|
76
|
+
mojeek_bot: MojeekBot
|
77
|
+
moreoverbot: Moreoverbot
|
78
|
+
morning_paper: Morning Paper
|
79
|
+
msnbot: msnbot
|
80
|
+
msr_bot: MSRBot
|
81
|
+
mva_client: MVAClient
|
82
|
+
mxbot: mxbot
|
83
|
+
net_research_server: NetResearchServer
|
84
|
+
net_seer_crawler: NetSeer Crawler
|
85
|
+
news_gator: NewsGator
|
86
|
+
ng_search: NG-Search
|
87
|
+
nicebot: nicebot
|
88
|
+
noxtrumbot: noxtrumbot
|
89
|
+
nusearch_spider: Nusearch Spider
|
90
|
+
nutch_cvs: NutchCVS
|
91
|
+
nymesis: Nymesis
|
92
|
+
obot: obot
|
93
|
+
oegp: oegp
|
94
|
+
omgilibot: omgilibot
|
95
|
+
omni_explorer_bot: OmniExplorer_Bot
|
96
|
+
oozbot: OOZBOT
|
97
|
+
orbiter: Orbiter
|
98
|
+
page_bites_hyper_bot: PageBitesHyperBot
|
99
|
+
peew: Peew
|
100
|
+
polybot: polybot
|
101
|
+
pompos: Pompos
|
102
|
+
post_post: PostPost
|
103
|
+
psbot: Psbot
|
104
|
+
pyc_url: PycURL
|
105
|
+
qseero: Qseero
|
106
|
+
radian6: Radian6
|
107
|
+
rampy_bot: RAMPyBot
|
108
|
+
rufus_bot: RufusBot
|
109
|
+
sand_crawler: SandCrawler
|
110
|
+
sb_ider: SBIder
|
111
|
+
scout_jet: ScoutJet
|
112
|
+
scrubby: Scrubby
|
113
|
+
search_sight: SearchSight
|
114
|
+
seekbot: Seekbot
|
115
|
+
semanticdiscovery: semanticdiscovery
|
116
|
+
sensis_web_crawler: Sensis Web Crawler
|
117
|
+
seo_chat_bot: SEOChat::Bot
|
118
|
+
seznam_bot: SeznamBot
|
119
|
+
shim_crawler: Shim-Crawler
|
120
|
+
shop_wiki: ShopWiki
|
121
|
+
shoula_robot: Shoula robot
|
122
|
+
silk: silk
|
123
|
+
sitebot: Sitebot
|
124
|
+
snappy: Snappy
|
125
|
+
sogou_spider: sogou spider
|
126
|
+
sosospider: Sosospider
|
127
|
+
speedy_spider: Speedy Spider
|
128
|
+
sqworm: Sqworm
|
129
|
+
stack_rambler: StackRambler
|
130
|
+
suggybot: suggybot
|
131
|
+
survey_bot: SurveyBot
|
132
|
+
synoo_bot: SynooBot
|
133
|
+
teoma: Teoma
|
134
|
+
terrawiz_bot: TerrawizBot
|
135
|
+
the_su_bot: TheSuBot
|
136
|
+
thumbnail_cz_robot: Thumbnail.CZ robot
|
137
|
+
tin_eye: TinEye
|
138
|
+
truwo_gps: truwoGPS
|
139
|
+
turnitin_bot: TurnitinBot
|
140
|
+
tweeted_times_bot: TweetedTimes Bot
|
141
|
+
twenga_bot: TwengaBot
|
142
|
+
updated: updated
|
143
|
+
urlfilebot: Urlfilebot
|
144
|
+
vagabondo: Vagabondo
|
145
|
+
voila_bot: VoilaBot
|
146
|
+
vortex: Vortex
|
147
|
+
voyager: voyager
|
148
|
+
vyu2: VYU2
|
149
|
+
webcollage: webcollage
|
150
|
+
websquash_com: Websquash.com
|
151
|
+
wf84: wf84
|
152
|
+
wo_finde_ich_robot: WoFindeIch Robot
|
153
|
+
womlpe_factory: WomlpeFactory
|
154
|
+
xaldon_web_spider: Xaldon_WebSpider
|
155
|
+
yacy: yacy
|
156
|
+
yahoo_slurp: Yahoo! Slurp
|
157
|
+
yahoo_slurp_china: Yahoo! Slurp China
|
158
|
+
yahoo_seeker: YahooSeeker
|
159
|
+
yahoo_seeker_testing: YahooSeeker-Testing
|
160
|
+
yandex_bot: YandexBot
|
161
|
+
yandex_images: YandexImages
|
162
|
+
yasaklibot: Yasaklibot
|
163
|
+
yeti: Yeti
|
164
|
+
yodao_bot: YodaoBot
|
165
|
+
yoogli_fetch_agent: yoogliFetchAgent
|
166
|
+
youdao_bot: YoudaoBot
|
167
|
+
zao: Zao
|
168
|
+
zealbot: Zealbot
|
169
|
+
zspider: zspider
|
170
|
+
zy_borg: ZyBorg
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: human_power
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lasse Bunk
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -90,7 +90,6 @@ files:
|
|
90
90
|
- lib/human_power/rails/controller.rb
|
91
91
|
- lib/human_power/rails/engine.rb
|
92
92
|
- lib/human_power/rule.rb
|
93
|
-
- lib/human_power/user_agents.rb
|
94
93
|
- lib/human_power/version.rb
|
95
94
|
- test/dummy/README.rdoc
|
96
95
|
- test/dummy/Rakefile
|
@@ -134,6 +133,7 @@ files:
|
|
134
133
|
- test/generator_test.rb
|
135
134
|
- test/rails/integration_test.rb
|
136
135
|
- test/test_helper.rb
|
136
|
+
- user_agents.yml
|
137
137
|
homepage: https://github.com/lassebunk/human_power
|
138
138
|
licenses:
|
139
139
|
- MIT
|
@@ -154,7 +154,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
154
|
version: '0'
|
155
155
|
requirements: []
|
156
156
|
rubyforge_project:
|
157
|
-
rubygems_version: 2.
|
157
|
+
rubygems_version: 2.0.3
|
158
158
|
signing_key:
|
159
159
|
specification_version: 4
|
160
160
|
summary: Easy generation of robots.txt. Force the robots into submission!
|