indexable 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/indexable/indexable.rb +53 -0
  3. metadata +2 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 64a1e1b305b64588744f3df8c892511cb49936e0
4
- data.tar.gz: 13a2987bcff19059f7f1bed603b6ca48219917f9
3
+ metadata.gz: 0a6d2c79c0eb8d268fba3a60b0300c8ac464f32b
4
+ data.tar.gz: 795a31d66d9acfe8cc83e9dcf43d269886ae5833
5
5
  SHA512:
6
- metadata.gz: 4cc15e9a4ccff9f84f43bdf21b6bba08ca59c6b04463560f03da641305505fcb0d72e255e8824803fed0c7b0ba1838f3e589941a0a3f2242387d7e482c2488ca
7
- data.tar.gz: 18f3d230291c6bb07f628ebf7fd0b7f48811a7abda1aca2c026c87e9e3f6a20ef9c212af5335c89a5cb2b09772733f68b23868b2c76c054b827beb4ee95abccf
6
+ metadata.gz: 5bf720013d32fff089bdd6cece00eccc345592eb6def5c5d31670c7eb29af7d7398e8a1c51f898ee2427ccb1f2091bdd629938a006a2ac816a36a40b9dbd67f0
7
+ data.tar.gz: c76f9a4f1065c82c01a93d7c80c8ce8a71caf283b4f844d3b50b42d6041304c4bc8e1dd7358df0199cda77e98adaebf80837d4dbc59154c486365ef7accb1b3d
@@ -0,0 +1,53 @@
1
+ require 'rack/request'
2
+ require 'indexable/phantomjs'
3
+
4
+ module Indexable
5
+ class Middleware
6
+ CRAWLER_USER_AGENTS = [
7
+ /^Twitterbot/, /^curl/, /Googlebot/, /Mediapartners/, /Adsbot-Google/,
8
+ /\(.*http(s|\(s\))?:\/\/.*\)/
9
+ ]
10
+
11
+ def initialize(app)
12
+ @app = app
13
+ end
14
+
15
+ # Detect whether the current request comes from a bot. Based on the logic used
16
+ # by Bustle.com (https://www.dropbox.com/s/s4oibqsxqpo3hll/bustle%20slizzle.pdf)
17
+ def request_from_crawler?(env)
18
+ user_agent = env["HTTP_USER_AGENT"]
19
+ params = Rack::Request.new(env).params
20
+ return false unless user_agent
21
+ return true if CRAWLER_USER_AGENTS.any? {|s| user_agent.match(s) }
22
+ return true if params.has_key?('_escaped_fragment_')
23
+ params['nojs'].eql?('true')
24
+ end
25
+
26
+ def call(env)
27
+ status, headers, content = *@app.call(env)
28
+
29
+ if status == 200 and headers['Content-Type'].match(/^text\/html/) and request_from_crawler?(env)
30
+ script = ::File.dirname(__FILE__) + "/render_page.js"
31
+ file = Tempfile.new(['indexable', '.html'])
32
+
33
+ if content.respond_to? :body
34
+ html = content.body
35
+ else
36
+ html = content.join('')
37
+ end
38
+
39
+ file.write html
40
+ file.close
41
+ begin
42
+ url = Rack::Request.new(env).url
43
+ content = [Phantomjs.new(script, file.path, url).run]
44
+ status = 500 if content[0] == "Couldn't render page... orz."
45
+ ensure
46
+ file.unlink
47
+ end
48
+ end
49
+
50
+ [status, headers, content]
51
+ end
52
+ end
53
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indexable
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vikhyat Korrapati
@@ -31,6 +31,7 @@ extensions: []
31
31
  extra_rdoc_files: []
32
32
  files:
33
33
  - lib/indexable.rb
34
+ - lib/indexable/indexable.rb
34
35
  - lib/indexable/render_page.js
35
36
  - lib/indexable/phantomjs.rb
36
37
  - lib/indexable/railtie.rb