indexable 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/indexable/indexable.rb +53 -0
  3. metadata +2 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 64a1e1b305b64588744f3df8c892511cb49936e0
4
- data.tar.gz: 13a2987bcff19059f7f1bed603b6ca48219917f9
3
+ metadata.gz: 0a6d2c79c0eb8d268fba3a60b0300c8ac464f32b
4
+ data.tar.gz: 795a31d66d9acfe8cc83e9dcf43d269886ae5833
5
5
  SHA512:
6
- metadata.gz: 4cc15e9a4ccff9f84f43bdf21b6bba08ca59c6b04463560f03da641305505fcb0d72e255e8824803fed0c7b0ba1838f3e589941a0a3f2242387d7e482c2488ca
7
- data.tar.gz: 18f3d230291c6bb07f628ebf7fd0b7f48811a7abda1aca2c026c87e9e3f6a20ef9c212af5335c89a5cb2b09772733f68b23868b2c76c054b827beb4ee95abccf
6
+ metadata.gz: 5bf720013d32fff089bdd6cece00eccc345592eb6def5c5d31670c7eb29af7d7398e8a1c51f898ee2427ccb1f2091bdd629938a006a2ac816a36a40b9dbd67f0
7
+ data.tar.gz: c76f9a4f1065c82c01a93d7c80c8ce8a71caf283b4f844d3b50b42d6041304c4bc8e1dd7358df0199cda77e98adaebf80837d4dbc59154c486365ef7accb1b3d
@@ -0,0 +1,53 @@
1
+ require 'rack/request'
2
+ require 'indexable/phantomjs'
3
+
4
+ module Indexable
5
+ class Middleware
6
+ CRAWLER_USER_AGENTS = [
7
+ /^Twitterbot/, /^curl/, /Googlebot/, /Mediapartners/, /Adsbot-Google/,
8
+ /\(.*http(s|\(s\))?:\/\/.*\)/
9
+ ]
10
+
11
+ def initialize(app)
12
+ @app = app
13
+ end
14
+
15
+ # Detect whether the current request comes from a bot. Based on the logic used
16
+ # by Bustle.com (https://www.dropbox.com/s/s4oibqsxqpo3hll/bustle%20slizzle.pdf)
17
+ def request_from_crawler?(env)
18
+ user_agent = env["HTTP_USER_AGENT"]
19
+ params = Rack::Request.new(env).params
20
+ return false unless user_agent
21
+ return true if CRAWLER_USER_AGENTS.any? {|s| user_agent.match(s) }
22
+ return true if params.has_key?('_escaped_fragment_')
23
+ params['nojs'].eql?('true')
24
+ end
25
+
26
+ def call(env)
27
+ status, headers, content = *@app.call(env)
28
+
29
+ if status == 200 and headers['Content-Type'].match(/^text\/html/) and request_from_crawler?(env)
30
+ script = ::File.dirname(__FILE__) + "/render_page.js"
31
+ file = Tempfile.new(['indexable', '.html'])
32
+
33
+ if content.respond_to? :body
34
+ html = content.body
35
+ else
36
+ html = content.join('')
37
+ end
38
+
39
+ file.write html
40
+ file.close
41
+ begin
42
+ url = Rack::Request.new(env).url
43
+ content = [Phantomjs.new(script, file.path, url).run]
44
+ status = 500 if content[0] == "Couldn't render page... orz."
45
+ ensure
46
+ file.unlink
47
+ end
48
+ end
49
+
50
+ [status, headers, content]
51
+ end
52
+ end
53
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indexable
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vikhyat Korrapati
@@ -31,6 +31,7 @@ extensions: []
31
31
  extra_rdoc_files: []
32
32
  files:
33
33
  - lib/indexable.rb
34
+ - lib/indexable/indexable.rb
34
35
  - lib/indexable/render_page.js
35
36
  - lib/indexable/phantomjs.rb
36
37
  - lib/indexable/railtie.rb