hashbang 0.0.1.alpha
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +34 -0
- data/Rakefile +8 -0
- data/hashbang.gemspec +16 -0
- data/init.rb +1 -0
- data/lib/hashbang.rb +7 -0
- data/lib/hashbang/crawler.rb +20 -0
- data/lib/hashbang/middleware.rb +35 -0
- data/lib/hashbang/pool.rb +40 -0
- data/lib/hashbang/railtie.rb +20 -0
- metadata +77 -0
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Bang. Hashbang.
|
2
|
+
|
3
|
+
Hashbang will automatically enable serving HTML dumps for your AJAX web-applications according to
|
4
|
+
[Google conventions](http://code.google.com/web/ajaxcrawling/). Make your Rails AJAX applications indexable in no time.
|
5
|
+
|
6
|
+
Hashbang will make all incoming requests containing magic `_escaped_fragment_` parameter to be served in a special way. This case will cause a subrequest to a real AJAX URL using virtual browser. This hidden browser will wait for some condition you define using well-known [Watir](http://watirwebdriver.com/) API. And then return an HTML dump.
|
7
|
+
|
8
|
+
Let's say for example you've got a request to `test.com/?_escaped_fragment_=/my_hidden_page`.
|
9
|
+
|
10
|
+
Hashbang will convert this URL to `test.com/#!/my_hidden_page` and open it in a virtual browser.
|
11
|
+
|
12
|
+
Virtual browser will call your lambda with `browser` object as parameter. With help of this lambda you can setup the wait behavior. Here is a great introduction to [Watir wait API](http://watirwebdriver.com/waiting/). Note that your lambda will act as a block to `Watir::Wait.until`.
|
13
|
+
|
14
|
+
## Installation & Example
|
15
|
+
|
16
|
+
Start from your Gemfile:
|
17
|
+
|
18
|
+
```
|
19
|
+
gem 'hashbang'
|
20
|
+
```
|
21
|
+
|
22
|
+
Waiter can be defined inside your environments:
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
config.hashbang.waiter = -> b { b.execute_script("return Joosy.Application.loading") == false }
|
26
|
+
```
|
27
|
+
|
28
|
+
This code will wait for javascript Joosy.Application.loading variable to be set to false.
|
29
|
+
|
30
|
+
## Development environment
|
31
|
+
|
32
|
+
By its nature Hashbang gem requires your server to accept at least 2 concurrent connections. However WEBRick or Thin (which are mostly used as development servers) will only serve one Rails instance (and connection). That's why if you want to debug your AJAX crawling, you'll have to use `unicorn` with at least 2 workers.
|
33
|
+
|
34
|
+
Note either that browser load behavior differs between development and production environments. Hashbang will use lazy load for development and therefore it may take some time for crawler to respond. While running in production environment it will start and cache browser instance at rails startup to provide the best possible response speed.
|
data/Rakefile
ADDED
data/hashbang.gemspec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "hashbang"
|
3
|
+
s.version = "0.0.1.alpha"
|
4
|
+
s.platform = Gem::Platform::RUBY
|
5
|
+
s.summary = "Magic support of Google/Bing/... AJAX search indexing for your Rails apps"
|
6
|
+
s.email = "boris@roundlake.ru"
|
7
|
+
s.homepage = "http://roundlake.github.com/hashbang/"
|
8
|
+
s.description = "Hashbang will automatically enable serving HTML dumps for your AJAX web-applications according to Google conventions. Make your Rails AJAX applications indexable in no time."
|
9
|
+
s.authors = ['Boris Staal']
|
10
|
+
|
11
|
+
s.files = `git ls-files`.split("\n")
|
12
|
+
s.require_paths = ["lib"]
|
13
|
+
|
14
|
+
s.add_dependency 'watir-webdriver'
|
15
|
+
s.add_dependency 'headless'
|
16
|
+
end
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'hashbang'
|
data/lib/hashbang.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
module Hashbang
|
2
|
+
class Crawler
|
3
|
+
def self.gimme(url, &block)
|
4
|
+
browser = Pool.grab
|
5
|
+
|
6
|
+
begin
|
7
|
+
browser.goto url
|
8
|
+
|
9
|
+
if block_given?
|
10
|
+
Watir::Wait.until { block.call browser }
|
11
|
+
end
|
12
|
+
|
13
|
+
browser.html
|
14
|
+
rescue
|
15
|
+
ensure
|
16
|
+
Hashbang::Pool.release(browser)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Hashbang
|
2
|
+
class Middleware
|
3
|
+
def initialize(application, config)
|
4
|
+
@application = application
|
5
|
+
@config = config
|
6
|
+
|
7
|
+
Headless.new.start
|
8
|
+
Pool.setup config.pool_size
|
9
|
+
end
|
10
|
+
|
11
|
+
def call(environment)
|
12
|
+
if environment['QUERY_STRING'].include? "_escaped_fragment_"
|
13
|
+
url = []
|
14
|
+
url << environment['rack.url_scheme'] + '://'
|
15
|
+
url << environment['HTTP_HOST']
|
16
|
+
url << environment['REQUEST_PATH']
|
17
|
+
url << '?' unless environment['QUERY_STRING'].starts_with? '_escaped_fragment_'
|
18
|
+
url << environment['QUERY_STRING'].gsub(/(\&)?_escaped_fragment_=/, '#!')
|
19
|
+
url = url.join ''
|
20
|
+
|
21
|
+
if @config.waiter
|
22
|
+
html = Crawler.gimme(url) do |browser|
|
23
|
+
@config.waiter.call(browser)
|
24
|
+
end
|
25
|
+
else
|
26
|
+
html = Crawler.gimme url
|
27
|
+
end
|
28
|
+
|
29
|
+
[200, {"Content-Type" => "text/html"}, [html]]
|
30
|
+
else
|
31
|
+
@application.call(environment)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module Hashbang
|
2
|
+
class Pool
|
3
|
+
cattr_accessor :inited
|
4
|
+
cattr_accessor :pool_size
|
5
|
+
cattr_accessor :browsers
|
6
|
+
cattr_accessor :taken
|
7
|
+
|
8
|
+
def self.setup(quantity=1)
|
9
|
+
@@pool_size = quantity
|
10
|
+
|
11
|
+
self.init if !Rails || Rails.env != 'development'
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.init
|
15
|
+
@@browsers = []
|
16
|
+
@@taken = []
|
17
|
+
|
18
|
+
@@pool_size.times do
|
19
|
+
@@browsers << browser = Watir::Browser.new
|
20
|
+
at_exit { browser.close if browser.exists? }
|
21
|
+
end
|
22
|
+
|
23
|
+
@@inited = true
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.grab
|
27
|
+
self.init unless @@inited
|
28
|
+
|
29
|
+
raise "Pool is empty" if @@browsers.length == 0
|
30
|
+
|
31
|
+
@@taken << browser = @@browsers.pop
|
32
|
+
browser
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.release(browser)
|
36
|
+
@@taken.delete browser
|
37
|
+
@@browsers << browser
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Hashbang
|
2
|
+
class Railtie < Rails::Railtie
|
3
|
+
config.hashbang = ActiveSupport::OrderedOptions.new
|
4
|
+
|
5
|
+
config.hashbang.pool_size = 1
|
6
|
+
config.hashbang.waiter = false
|
7
|
+
|
8
|
+
waiters = {
|
9
|
+
:joosy => -> b { b.execute_script("return Joosy.Application.loading") == false }
|
10
|
+
}
|
11
|
+
|
12
|
+
initializer "application_controller.initialize_hashbang" do |app|
|
13
|
+
if app.config.hashbang.waiter.is_a? Symbol
|
14
|
+
app.config.hashbang.waiter = waiters[app.config.hashbang.waiter]
|
15
|
+
end
|
16
|
+
|
17
|
+
app.config.middleware.use "Hashbang::Middleware", app.config.hashbang
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
metadata
ADDED
@@ -0,0 +1,77 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hashbang
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1.alpha
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Boris Staal
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-02-15 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: watir-webdriver
|
16
|
+
requirement: &70179137315820 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70179137315820
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: headless
|
27
|
+
requirement: &70179137315360 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70179137315360
|
36
|
+
description: Hashbang will automatically enable serving HTML dumps for your AJAX web-applications
|
37
|
+
according to Google conventions. Make your Rails AJAX applications indexable in
|
38
|
+
no time.
|
39
|
+
email: boris@roundlake.ru
|
40
|
+
executables: []
|
41
|
+
extensions: []
|
42
|
+
extra_rdoc_files: []
|
43
|
+
files:
|
44
|
+
- README.md
|
45
|
+
- Rakefile
|
46
|
+
- hashbang.gemspec
|
47
|
+
- init.rb
|
48
|
+
- lib/hashbang.rb
|
49
|
+
- lib/hashbang/crawler.rb
|
50
|
+
- lib/hashbang/middleware.rb
|
51
|
+
- lib/hashbang/pool.rb
|
52
|
+
- lib/hashbang/railtie.rb
|
53
|
+
homepage: http://roundlake.github.com/hashbang/
|
54
|
+
licenses: []
|
55
|
+
post_install_message:
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ! '>='
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: '0'
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ! '>'
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: 1.3.1
|
71
|
+
requirements: []
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.8.15
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: Magic support of Google/Bing/... AJAX search indexing for your Rails apps
|
77
|
+
test_files: []
|