spiderable 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in spiderable.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Nisarg Shah
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # Spiderable
2
+
3
+ Adds support for rails applications. (Visit http://www.spiderable.org if you don't have an account)
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'spiderable'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install spiderable
18
+
19
+ ## Usage
20
+
21
+ Add a configuration file (config/spiderable.rb):
22
+
23
+ Spiderable::Config.token = '[YOUR TOKEN]'
24
+
25
+ Thats it! To test it out, visit a url with ?_escaped_fragment_= at the end.
26
+
27
+ http://mysite.com?_escaped_fragment_=
28
+
29
+ ## Contributing
30
+
31
+ 1. Fork it
32
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
33
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
34
+ 4. Push to the branch (`git push origin my-new-feature`)
35
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,11 @@
1
+ module Spiderable
2
+ module Config
3
+ attr_accessor :token
4
+
5
+ extend self
6
+
7
+ def map
8
+ yield self
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,16 @@
1
+ module Spiderable
2
+ module Connect
3
+ BASE_URL = 'http://www.spiderable.org'
4
+
5
+ def self.get_url_contents(url)
6
+ contents = Faraday.get("#{BASE_URL}/api/v1/pages.json?url=#{url}&token=#{Config.token}")
7
+
8
+ if contents.status == 200
9
+ JSON.parse(contents.body)['contents']
10
+ else
11
+ contents.body
12
+ end
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,17 @@
1
+ require 'spiderable/railtie/middleware'
2
+
3
+ module Hashbang
4
+ module Railtie
5
+ class Engine < Rails::Engine
6
+ initializer "application_controller.initialize_spiderable" do |app|
7
+ if ['development', 'production'].include?(Rails.env)
8
+ app.config.middleware.use "Spiderable::Railtie::Middleware"
9
+ end
10
+ end
11
+
12
+ rake_tasks do
13
+ Dir[File.expand_path('../../tasks/*.rake', __FILE__)].each { |f| load f }
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,42 @@
1
+ module Spiderable
2
+ module Crawler
3
+ extend self
4
+
5
+ def urlFromRack(environment)
6
+ url = []
7
+ url << environment['rack.url_scheme'] + '://'
8
+ url << environment['HTTP_HOST']
9
+ url << environment['REQUEST_PATH']
10
+ url << '?' unless environment['QUERY_STRING'].starts_with? '_escaped_fragment_'
11
+ url << environment['QUERY_STRING'].gsub(/(\&)?_escaped_fragment_=/, '')
12
+
13
+ url.join ''
14
+ end
15
+
16
+ def urlFromUrl(url)
17
+ url.gsub(/[\?\&]_escaped_fragment_=/, '')
18
+ end
19
+ end
20
+
21
+ module Railtie
22
+ class Middleware
23
+ def initialize(application)
24
+ @application = application
25
+ end
26
+
27
+ def call(environment)
28
+ is_bot = environment["HTTP_USER_AGENT"].match(/\(.*https?:\/\/.*\)/) ||
29
+ environment['QUERY_STRING'].include?("_escaped_fragment_")
30
+
31
+ if is_bot
32
+ url = Crawler.urlFromRack(environment)
33
+ html = Connect.get_url_contents(url)
34
+
35
+ [200, {"Content-Type" => "text/html; charset=utf-8"}, [html]]
36
+ else
37
+ @application.call(environment)
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,3 @@
1
+ module Spiderable
2
+ VERSION = "0.0.1"
3
+ end
data/lib/spiderable.rb ADDED
@@ -0,0 +1,5 @@
1
+ require "spiderable/version"
2
+ require "faraday"
3
+ require "spiderable/config"
4
+ require "spiderable/connect"
5
+ require 'spiderable/railtie/engine' if defined? Rails
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'spiderable/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "spiderable"
8
+ gem.version = Spiderable::VERSION
9
+ gem.authors = ["Spiderable"]
10
+ gem.email = ["hello@spiderable.org"]
11
+ gem.description = %q{Allows your rails application to be spiderable by crawlers}
12
+ gem.summary = %q{Allows your rails application to be spiderable by crawlers}
13
+ gem.homepage = "http://www.spiderable.org"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_runtime_dependency 'faraday'
21
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spiderable
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Spiderable
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-06 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: faraday
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Allows your rails application to be spiderable by crawlers
31
+ email:
32
+ - hello@spiderable.org
33
+ executables: []
34
+ extensions: []
35
+ extra_rdoc_files: []
36
+ files:
37
+ - .gitignore
38
+ - Gemfile
39
+ - LICENSE.txt
40
+ - README.md
41
+ - Rakefile
42
+ - lib/spiderable.rb
43
+ - lib/spiderable/config.rb
44
+ - lib/spiderable/connect.rb
45
+ - lib/spiderable/railtie/engine.rb
46
+ - lib/spiderable/railtie/middleware.rb
47
+ - lib/spiderable/version.rb
48
+ - spiderable.gemspec
49
+ homepage: http://www.spiderable.org
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project:
69
+ rubygems_version: 1.8.24
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: Allows your rails application to be spiderable by crawlers
73
+ test_files: []