hashbang 0.0.1.alpha → 1.0.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +3 -0
- data/Gemfile.lock +28 -0
- data/README.md +17 -13
- data/hashbang.gemspec +4 -3
- data/lib/generators/hashbang_generator.rb +16 -0
- data/lib/generators/templates/config.rb +4 -0
- data/lib/generators/templates/config.ru +11 -0
- data/lib/generators/templates/unicorn.rb +3 -0
- data/lib/hashbang.rb +3 -4
- data/lib/hashbang/config.rb +19 -0
- data/lib/hashbang/crawler.rb +17 -13
- data/lib/hashbang/railtie/engine.rb +18 -0
- data/lib/hashbang/railtie/middleware.rb +21 -0
- data/lib/hashbang/standalone/middleware.rb +33 -0
- data/lib/tasks/hashbang.rake +24 -0
- metadata +33 -14
- data/lib/hashbang/middleware.rb +0 -35
- data/lib/hashbang/pool.rb +0 -40
- data/lib/hashbang/railtie.rb +0 -20
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
hashbang (0.0.1.alpha)
|
5
|
+
headless
|
6
|
+
sunscraper (~> 1.1.0.beta3)
|
7
|
+
unicorn
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: http://rubygems.org/
|
11
|
+
specs:
|
12
|
+
ffi (1.0.11)
|
13
|
+
headless (0.2.2)
|
14
|
+
kgio (2.7.2)
|
15
|
+
rack (1.4.1)
|
16
|
+
raindrops (0.8.0)
|
17
|
+
sunscraper (1.1.0.beta3)
|
18
|
+
ffi (>= 1.0.11)
|
19
|
+
unicorn (4.2.0)
|
20
|
+
kgio (~> 2.6)
|
21
|
+
rack
|
22
|
+
raindrops (~> 0.7)
|
23
|
+
|
24
|
+
PLATFORMS
|
25
|
+
ruby
|
26
|
+
|
27
|
+
DEPENDENCIES
|
28
|
+
hashbang!
|
data/README.md
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
# Bang. Hashbang.
|
2
2
|
|
3
|
-
Hashbang
|
4
|
-
[Google conventions](http://code.google.com/web/ajaxcrawling/). Make your Rails AJAX applications indexable in no time.
|
3
|
+
Hashbang is a tiny Rack proxy serving HTML dumps for your RICH web-applications according to
|
4
|
+
[Google AJAX Crawling conventions](http://code.google.com/web/ajaxcrawling/). Make your Rails AJAX applications indexable in no time.
|
5
5
|
|
6
|
-
Hashbang will
|
6
|
+
Using Rails generators Hashbang will setup small inner Rack application which will handle all magic requests containing `_escaped_fragment_` parameter. These requests will cause a subrequest to a real AJAX URL using virtual browser. This hidden browser will wait for some condition you define using well-known [Watir](http://watirwebdriver.com/) API. And then return an HTML dump.
|
7
7
|
|
8
8
|
Let's say for example you've got a request to `test.com/?_escaped_fragment_=/my_hidden_page`.
|
9
9
|
|
@@ -11,24 +11,28 @@ Hashbang will convert this URL to `test.com/#!/my_hidden_page` and open it in a
|
|
11
11
|
|
12
12
|
Virtual browser will call your lambda with `browser` object as parameter. With help of this lambda you can setup the wait behavior. Here is a great introduction to [Watir wait API](http://watirwebdriver.com/waiting/). Note that your lambda will act as a block to `Watir::Wait.until`.
|
13
13
|
|
14
|
-
## Installation
|
14
|
+
## Installation
|
15
15
|
|
16
16
|
Start from your Gemfile:
|
17
17
|
|
18
18
|
```
|
19
|
-
|
19
|
+
gem 'hashbang'
|
20
20
|
```
|
21
|
-
|
22
|
-
Waiter can be defined inside your environments:
|
23
21
|
|
24
|
-
|
25
|
-
|
22
|
+
And follow with basic generator:
|
23
|
+
|
24
|
+
```
|
25
|
+
rails g hashbang
|
26
26
|
```
|
27
27
|
|
28
|
-
This
|
28
|
+
This generator will create an inline Rack application at `hashbang/` dir. To set lambda you want to use to check if your page is ready refer to `hashbang/config.rb`.
|
29
|
+
|
30
|
+
## Environments are specific
|
31
|
+
|
32
|
+
While working at development environment, this gem will catch all the requests directly from rails using middleware and therefore it will just work (see P.S. below :). However due to security and performance reasons, at the production servers you are supposed to boot this Rack app separately and manually forward all magic requests to it. We'll describe typical production nginx/passenger setup later in this README.
|
29
33
|
|
30
|
-
|
34
|
+
P.S. Since basic development setup will use just one Rails instance in most cases all the requests to magic urls will lead to Deadlock! To solve this problem we've included the `rake hashbang:rails` command which will run your Rails project inside a [Unicorn](http://unicorn.bogomips.org/) with 2 instances.
|
31
35
|
|
32
|
-
|
36
|
+
## Memory consumption
|
33
37
|
|
34
|
-
|
38
|
+
This part of hashbang is currently in progress. We still bundle Watir chromedev for proof-of-concept reasons. The full version will come with Qt WebKit bindings.
|
data/hashbang.gemspec
CHANGED
@@ -1,16 +1,17 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = "hashbang"
|
3
|
-
s.version = "0.0.
|
3
|
+
s.version = "1.0.0.beta2"
|
4
4
|
s.platform = Gem::Platform::RUBY
|
5
5
|
s.summary = "Magic support of Google/Bing/... AJAX search indexing for your Rails apps"
|
6
6
|
s.email = "boris@roundlake.ru"
|
7
7
|
s.homepage = "http://roundlake.github.com/hashbang/"
|
8
|
-
s.description = "Hashbang
|
8
|
+
s.description = "Hashbang is a tiny Rack proxy serving HTML dumps for your RICH web-applications according to Google AJAX Crawling conventions. Make your Rails AJAX applications indexable in no time."
|
9
9
|
s.authors = ['Boris Staal']
|
10
10
|
|
11
11
|
s.files = `git ls-files`.split("\n")
|
12
12
|
s.require_paths = ["lib"]
|
13
13
|
|
14
|
-
s.add_dependency 'watir-webdriver'
|
15
14
|
s.add_dependency 'headless'
|
15
|
+
s.add_dependency 'sunscraper', '~> 1.1.0.beta3'
|
16
|
+
s.add_dependency 'unicorn'
|
16
17
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class HashbangGenerator < Rails::Generators::Base
|
2
|
+
source_root File.join(File.dirname(__FILE__), 'templates')
|
3
|
+
|
4
|
+
def create_files
|
5
|
+
empty_directory "hashbang"
|
6
|
+
template "config.ru", "hashbang/config.ru"
|
7
|
+
template "config.rb", "hashbang/config.rb"
|
8
|
+
template "unicorn.rb", "hashbang/unicorn.rb"
|
9
|
+
|
10
|
+
empty_directory "hashbang/tmp"
|
11
|
+
create_file "hashbang/tmp/.gitkeep"
|
12
|
+
|
13
|
+
empty_directory "hashbang/public"
|
14
|
+
create_file "hashbang/public/.gitkeep"
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
|
6
|
+
$: << Bundler.load.specs.find{|s| s.name == 'hashbang' }.full_gem_path + '/lib'
|
7
|
+
|
8
|
+
require 'hashbang'
|
9
|
+
require 'hashbang/standalone/middleware'
|
10
|
+
|
11
|
+
run Hashbang::Standalone::Middleware.new(File.expand_path('..', __FILE__))
|
data/lib/hashbang.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'headless'
|
2
|
-
require '
|
2
|
+
require 'sunscraper'
|
3
3
|
|
4
|
-
require 'hashbang/pool'
|
5
4
|
require 'hashbang/crawler'
|
6
|
-
require 'hashbang/
|
7
|
-
require 'hashbang/railtie' if defined? Rails
|
5
|
+
require 'hashbang/config'
|
6
|
+
require 'hashbang/railtie/engine' if defined? Rails
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Hashbang
|
2
|
+
module Config
|
3
|
+
attr_accessor :url, :timeout
|
4
|
+
|
5
|
+
extend self
|
6
|
+
|
7
|
+
def map
|
8
|
+
yield self
|
9
|
+
end
|
10
|
+
|
11
|
+
def load(path)
|
12
|
+
if File.exists? path
|
13
|
+
require path
|
14
|
+
end
|
15
|
+
|
16
|
+
self.url = /^#{url}/ unless self.url.is_a? Regexp
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/hashbang/crawler.rb
CHANGED
@@ -1,20 +1,24 @@
|
|
1
1
|
module Hashbang
|
2
|
-
|
3
|
-
|
4
|
-
browser = Pool.grab
|
2
|
+
module Crawler
|
3
|
+
extend self
|
5
4
|
|
6
|
-
|
7
|
-
|
5
|
+
def gimme(url, timeout)
|
6
|
+
Sunscraper.scrape_url url, timeout
|
7
|
+
end
|
8
|
+
|
9
|
+
def urlFromRack(environment)
|
10
|
+
url = []
|
11
|
+
url << environment['rack.url_scheme'] + '://'
|
12
|
+
url << environment['HTTP_HOST']
|
13
|
+
url << environment['REQUEST_PATH']
|
14
|
+
url << '?' unless environment['QUERY_STRING'].starts_with? '_escaped_fragment_'
|
15
|
+
url << environment['QUERY_STRING'].gsub(/(\&)?_escaped_fragment_=/, '#!')
|
8
16
|
|
9
|
-
|
10
|
-
|
11
|
-
end
|
17
|
+
url.join ''
|
18
|
+
end
|
12
19
|
|
13
|
-
|
14
|
-
|
15
|
-
ensure
|
16
|
-
Hashbang::Pool.release(browser)
|
17
|
-
end
|
20
|
+
def urlFromUrl(url)
|
21
|
+
url.gsub(/[\?\&]_escaped_fragment_=/, '#!')
|
18
22
|
end
|
19
23
|
end
|
20
24
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'hashbang/railtie/middleware'
|
2
|
+
|
3
|
+
module Hashbang
|
4
|
+
module Railtie
|
5
|
+
class Engine < Rails::Engine
|
6
|
+
initializer "application_controller.initialize_hashbang" do |app|
|
7
|
+
if Rails.env == 'development'
|
8
|
+
app.config.middleware.use "Hashbang::Railtie::Middleware"
|
9
|
+
Config::load Rails.root.join('hashbang/config.rb').to_s
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
rake_tasks do
|
14
|
+
Dir[File.expand_path('../../tasks/*.rake', __FILE__)].each { |f| load f }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Hashbang
|
2
|
+
module Railtie
|
3
|
+
class Middleware
|
4
|
+
def initialize(application)
|
5
|
+
@application = application
|
6
|
+
Headless.new.start
|
7
|
+
end
|
8
|
+
|
9
|
+
def call(environment)
|
10
|
+
if environment['QUERY_STRING'].include? "_escaped_fragment_"
|
11
|
+
url = Crawler.urlFromRack(environment)
|
12
|
+
html = Crawler.gimme url, 100000
|
13
|
+
|
14
|
+
[200, {"Content-Type" => "text/html; charset=utf-8"}, [html]]
|
15
|
+
else
|
16
|
+
@application.call(environment)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
module Hashbang
|
3
|
+
module Standalone
|
4
|
+
class Middleware
|
5
|
+
def initialize(root)
|
6
|
+
Config.load File.expand_path('config.rb', root)
|
7
|
+
Headless.new.start
|
8
|
+
end
|
9
|
+
|
10
|
+
def call(environment)
|
11
|
+
url = environment['QUERY_STRING'].split('&').find{|x| x[0,4] == 'url='}
|
12
|
+
|
13
|
+
unless url.to_s.length == 0
|
14
|
+
url = url.split('=')[1]
|
15
|
+
url = URI.unescape url
|
16
|
+
url = Crawler.urlFromUrl url
|
17
|
+
end
|
18
|
+
|
19
|
+
if url.to_s.length == 0 || !url.match(Config.url)
|
20
|
+
return [200, {"Content-Type" => "text/html; charset=utf-8"}, ['']]
|
21
|
+
end
|
22
|
+
|
23
|
+
html = Crawler.gimme url, Config.timeout
|
24
|
+
|
25
|
+
if html.respond_to? :force_encoding
|
26
|
+
html.force_encoding "UTF-8"
|
27
|
+
end
|
28
|
+
|
29
|
+
return [200, {"Content-Type" => "text/html; charset=utf-8"}, [html]]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
namespace :hashbang do
|
2
|
+
|
3
|
+
desc "Start development unicorn"
|
4
|
+
task :rails do
|
5
|
+
config = "#{Dir.getwd}/hashbang/unicorn.rb"
|
6
|
+
|
7
|
+
unless File.exists? config
|
8
|
+
raise "Hashbang subdirectory does not seem to exist. Did you run `rails g hashbang`?"
|
9
|
+
end
|
10
|
+
|
11
|
+
sh "bundle exec unicorn --config-file #{config}"
|
12
|
+
end
|
13
|
+
|
14
|
+
desc "Start production standaloner"
|
15
|
+
task :standalone do
|
16
|
+
rackup = "#{Dir.getwd}/hashbang/config.ru"
|
17
|
+
|
18
|
+
unless File.exists? rackup
|
19
|
+
raise "Hashbang subdirectory does not seem to exist. Did you run `rails g hashbang`?"
|
20
|
+
end
|
21
|
+
|
22
|
+
sh "bundle exec rackup #{rackup}"
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hashbang
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 1.0.0.beta2
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-03-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
16
|
-
requirement: &
|
15
|
+
name: headless
|
16
|
+
requirement: &70344278924840 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,21 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70344278924840
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
|
-
name:
|
27
|
-
requirement: &
|
26
|
+
name: sunscraper
|
27
|
+
requirement: &70344278922420 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.1.0.beta3
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70344278922420
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: unicorn
|
38
|
+
requirement: &70344278920940 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ! '>='
|
@@ -32,24 +43,32 @@ dependencies:
|
|
32
43
|
version: '0'
|
33
44
|
type: :runtime
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
36
|
-
description: Hashbang
|
37
|
-
according to Google conventions. Make your Rails AJAX applications
|
38
|
-
no time.
|
46
|
+
version_requirements: *70344278920940
|
47
|
+
description: Hashbang is a tiny Rack proxy serving HTML dumps for your RICH web-applications
|
48
|
+
according to Google AJAX Crawling conventions. Make your Rails AJAX applications
|
49
|
+
indexable in no time.
|
39
50
|
email: boris@roundlake.ru
|
40
51
|
executables: []
|
41
52
|
extensions: []
|
42
53
|
extra_rdoc_files: []
|
43
54
|
files:
|
55
|
+
- Gemfile
|
56
|
+
- Gemfile.lock
|
44
57
|
- README.md
|
45
58
|
- Rakefile
|
46
59
|
- hashbang.gemspec
|
47
60
|
- init.rb
|
61
|
+
- lib/generators/hashbang_generator.rb
|
62
|
+
- lib/generators/templates/config.rb
|
63
|
+
- lib/generators/templates/config.ru
|
64
|
+
- lib/generators/templates/unicorn.rb
|
48
65
|
- lib/hashbang.rb
|
66
|
+
- lib/hashbang/config.rb
|
49
67
|
- lib/hashbang/crawler.rb
|
50
|
-
- lib/hashbang/
|
51
|
-
- lib/hashbang/
|
52
|
-
- lib/hashbang/
|
68
|
+
- lib/hashbang/railtie/engine.rb
|
69
|
+
- lib/hashbang/railtie/middleware.rb
|
70
|
+
- lib/hashbang/standalone/middleware.rb
|
71
|
+
- lib/tasks/hashbang.rake
|
53
72
|
homepage: http://roundlake.github.com/hashbang/
|
54
73
|
licenses: []
|
55
74
|
post_install_message:
|
data/lib/hashbang/middleware.rb
DELETED
@@ -1,35 +0,0 @@
|
|
1
|
-
module Hashbang
|
2
|
-
class Middleware
|
3
|
-
def initialize(application, config)
|
4
|
-
@application = application
|
5
|
-
@config = config
|
6
|
-
|
7
|
-
Headless.new.start
|
8
|
-
Pool.setup config.pool_size
|
9
|
-
end
|
10
|
-
|
11
|
-
def call(environment)
|
12
|
-
if environment['QUERY_STRING'].include? "_escaped_fragment_"
|
13
|
-
url = []
|
14
|
-
url << environment['rack.url_scheme'] + '://'
|
15
|
-
url << environment['HTTP_HOST']
|
16
|
-
url << environment['REQUEST_PATH']
|
17
|
-
url << '?' unless environment['QUERY_STRING'].starts_with? '_escaped_fragment_'
|
18
|
-
url << environment['QUERY_STRING'].gsub(/(\&)?_escaped_fragment_=/, '#!')
|
19
|
-
url = url.join ''
|
20
|
-
|
21
|
-
if @config.waiter
|
22
|
-
html = Crawler.gimme(url) do |browser|
|
23
|
-
@config.waiter.call(browser)
|
24
|
-
end
|
25
|
-
else
|
26
|
-
html = Crawler.gimme url
|
27
|
-
end
|
28
|
-
|
29
|
-
[200, {"Content-Type" => "text/html"}, [html]]
|
30
|
-
else
|
31
|
-
@application.call(environment)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
data/lib/hashbang/pool.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
module Hashbang
|
2
|
-
class Pool
|
3
|
-
cattr_accessor :inited
|
4
|
-
cattr_accessor :pool_size
|
5
|
-
cattr_accessor :browsers
|
6
|
-
cattr_accessor :taken
|
7
|
-
|
8
|
-
def self.setup(quantity=1)
|
9
|
-
@@pool_size = quantity
|
10
|
-
|
11
|
-
self.init if !Rails || Rails.env != 'development'
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.init
|
15
|
-
@@browsers = []
|
16
|
-
@@taken = []
|
17
|
-
|
18
|
-
@@pool_size.times do
|
19
|
-
@@browsers << browser = Watir::Browser.new
|
20
|
-
at_exit { browser.close if browser.exists? }
|
21
|
-
end
|
22
|
-
|
23
|
-
@@inited = true
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.grab
|
27
|
-
self.init unless @@inited
|
28
|
-
|
29
|
-
raise "Pool is empty" if @@browsers.length == 0
|
30
|
-
|
31
|
-
@@taken << browser = @@browsers.pop
|
32
|
-
browser
|
33
|
-
end
|
34
|
-
|
35
|
-
def self.release(browser)
|
36
|
-
@@taken.delete browser
|
37
|
-
@@browsers << browser
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
data/lib/hashbang/railtie.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
module Hashbang
|
2
|
-
class Railtie < Rails::Railtie
|
3
|
-
config.hashbang = ActiveSupport::OrderedOptions.new
|
4
|
-
|
5
|
-
config.hashbang.pool_size = 1
|
6
|
-
config.hashbang.waiter = false
|
7
|
-
|
8
|
-
waiters = {
|
9
|
-
:joosy => -> b { b.execute_script("return Joosy.Application.loading") == false }
|
10
|
-
}
|
11
|
-
|
12
|
-
initializer "application_controller.initialize_hashbang" do |app|
|
13
|
-
if app.config.hashbang.waiter.is_a? Symbol
|
14
|
-
app.config.hashbang.waiter = waiters[app.config.hashbang.waiter]
|
15
|
-
end
|
16
|
-
|
17
|
-
app.config.middleware.use "Hashbang::Middleware", app.config.hashbang
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|