indexable 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/indexable/indexable.rb +53 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0a6d2c79c0eb8d268fba3a60b0300c8ac464f32b
|
4
|
+
data.tar.gz: 795a31d66d9acfe8cc83e9dcf43d269886ae5833
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5bf720013d32fff089bdd6cece00eccc345592eb6def5c5d31670c7eb29af7d7398e8a1c51f898ee2427ccb1f2091bdd629938a006a2ac816a36a40b9dbd67f0
|
7
|
+
data.tar.gz: c76f9a4f1065c82c01a93d7c80c8ce8a71caf283b4f844d3b50b42d6041304c4bc8e1dd7358df0199cda77e98adaebf80837d4dbc59154c486365ef7accb1b3d
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rack/request'
|
2
|
+
require 'indexable/phantomjs'
|
3
|
+
|
4
|
+
module Indexable
|
5
|
+
class Middleware
|
6
|
+
CRAWLER_USER_AGENTS = [
|
7
|
+
/^Twitterbot/, /^curl/, /Googlebot/, /Mediapartners/, /Adsbot-Google/,
|
8
|
+
/\(.*http(s|\(s\))?:\/\/.*\)/
|
9
|
+
]
|
10
|
+
|
11
|
+
def initialize(app)
|
12
|
+
@app = app
|
13
|
+
end
|
14
|
+
|
15
|
+
# Detect whether the current request comes from a bot. Based on the logic used
|
16
|
+
# by Bustle.com (https://www.dropbox.com/s/s4oibqsxqpo3hll/bustle%20slizzle.pdf)
|
17
|
+
def request_from_crawler?(env)
|
18
|
+
user_agent = env["HTTP_USER_AGENT"]
|
19
|
+
params = Rack::Request.new(env).params
|
20
|
+
return false unless user_agent
|
21
|
+
return true if CRAWLER_USER_AGENTS.any? {|s| user_agent.match(s) }
|
22
|
+
return true if params.has_key?('_escaped_fragment_')
|
23
|
+
params['nojs'].eql?('true')
|
24
|
+
end
|
25
|
+
|
26
|
+
def call(env)
|
27
|
+
status, headers, content = *@app.call(env)
|
28
|
+
|
29
|
+
if status == 200 and headers['Content-Type'].match(/^text\/html/) and request_from_crawler?(env)
|
30
|
+
script = ::File.dirname(__FILE__) + "/render_page.js"
|
31
|
+
file = Tempfile.new(['indexable', '.html'])
|
32
|
+
|
33
|
+
if content.respond_to? :body
|
34
|
+
html = content.body
|
35
|
+
else
|
36
|
+
html = content.join('')
|
37
|
+
end
|
38
|
+
|
39
|
+
file.write html
|
40
|
+
file.close
|
41
|
+
begin
|
42
|
+
url = Rack::Request.new(env).url
|
43
|
+
content = [Phantomjs.new(script, file.path, url).run]
|
44
|
+
status = 500 if content[0] == "Couldn't render page... orz."
|
45
|
+
ensure
|
46
|
+
file.unlink
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
[status, headers, content]
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: indexable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vikhyat Korrapati
|
@@ -31,6 +31,7 @@ extensions: []
|
|
31
31
|
extra_rdoc_files: []
|
32
32
|
files:
|
33
33
|
- lib/indexable.rb
|
34
|
+
- lib/indexable/indexable.rb
|
34
35
|
- lib/indexable/render_page.js
|
35
36
|
- lib/indexable/phantomjs.rb
|
36
37
|
- lib/indexable/railtie.rb
|