headless-rails 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/headless/ajax_crawler/request_matcher.rb +32 -0
- data/lib/headless/{rails/escaped_fragment_extractor.rb → ajax_crawler/url_extractor.rb} +16 -12
- data/lib/headless/ajax_crawler.rb +8 -0
- data/lib/headless/rails/version.rb +1 -1
- data/lib/headless/rails.rb +4 -6
- data/spec/headless/ajax_crawler/request_matcher_spec.rb +37 -0
- data/spec/headless/ajax_crawler/url_extractor_spec.rb +59 -0
- data/spec/headless/rails_spec.rb +9 -6
- data/spec/spec_helper.rb +5 -0
- data/spec/support/example_requests.rb +48 -0
- metadata +11 -5
- data/spec/headless/rails/escaped_fragment_extractor_spec.rb +0 -38
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8f528076eccde288b3cec6d1fe33dcbddbea353
|
4
|
+
data.tar.gz: 0ac9c6ce2bb2856dd14b8c24588d4fd546e0408d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4ec9616d598293f33e8876f5649e14b5bbfaee4fa7da824bd6b0990cb701025ed26799c9171c91b476cd57e68bad63b3ece0b2cbc7b63b6737b0df041d9614f9
|
7
|
+
data.tar.gz: ab7267fdb39ab4699200ccc3ca0cb0943697209c2c95eb0ba5d55c7970ce9c38b6bfc5106d1452f48b54dc44e3abc43dd6ba645026e29401cadc7821964e83fb
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Headless::Rails
|
2
2
|
|
3
|
+
[![Build Status](https://travis-ci.org/headlessapp/headless-rails.png?branch=master)](https://travis-ci.org/headlessapp/headless-rails)
|
4
|
+
|
3
5
|
API wrapper for the headlessapp.com service.
|
4
6
|
|
5
7
|
Seamlessly integrate AJAX webcrawler support into your javascript application.
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Headless
|
2
|
+
module AjaxCrawler
|
3
|
+
class RequestMatcher< Struct.new(:request)
|
4
|
+
|
5
|
+
def self.call(request)
|
6
|
+
new(request).match?
|
7
|
+
end
|
8
|
+
|
9
|
+
def match?
|
10
|
+
is_get? && has_escaped_fragment?
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def is_get?
|
16
|
+
request.get?
|
17
|
+
end
|
18
|
+
|
19
|
+
def has_escaped_fragment?
|
20
|
+
get_params.has_key?(escaped_fragment_key)
|
21
|
+
end
|
22
|
+
|
23
|
+
def escaped_fragment_key
|
24
|
+
::Headless::AjaxCrawler::ESCAPED_FRAGMENT_KEY
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_params
|
28
|
+
request.GET
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -1,24 +1,26 @@
|
|
1
1
|
require 'rack/utils'
|
2
2
|
|
3
3
|
module Headless
|
4
|
-
module
|
5
|
-
class
|
4
|
+
module AjaxCrawler
|
5
|
+
class UrlExtractor < Struct.new(:request)
|
6
6
|
|
7
7
|
def self.call(request)
|
8
8
|
new(request).call
|
9
9
|
end
|
10
10
|
|
11
11
|
def call
|
12
|
-
url = "#{
|
13
|
-
url << "?#{query_string}"
|
14
|
-
url << "##{
|
12
|
+
url = "#{scheme}://#{host_with_port}#{path}"
|
13
|
+
url << "?#{query_string}" unless query_string.empty?
|
14
|
+
url << "##{escaped_fragment}" unless escaped_fragment.nil? || escaped_fragment.empty?
|
15
15
|
url
|
16
16
|
end
|
17
17
|
|
18
18
|
private
|
19
19
|
|
20
|
-
|
21
|
-
|
20
|
+
# delegate to request
|
21
|
+
|
22
|
+
def scheme
|
23
|
+
request.scheme
|
22
24
|
end
|
23
25
|
|
24
26
|
def host_with_port
|
@@ -29,22 +31,24 @@ module Headless
|
|
29
31
|
request.path
|
30
32
|
end
|
31
33
|
|
32
|
-
def
|
33
|
-
|
34
|
+
def get_params
|
35
|
+
request.GET
|
34
36
|
end
|
35
37
|
|
38
|
+
# extract the fragment
|
39
|
+
|
36
40
|
def escaped_fragment
|
37
|
-
|
41
|
+
get_params[escaped_fragment_key]
|
38
42
|
end
|
39
43
|
|
40
44
|
def params_without_fragment
|
41
|
-
without =
|
45
|
+
without = get_params.dup
|
42
46
|
without.delete(escaped_fragment_key)
|
43
47
|
without
|
44
48
|
end
|
45
49
|
|
46
50
|
def escaped_fragment_key
|
47
|
-
::Headless::
|
51
|
+
::Headless::AjaxCrawler::ESCAPED_FRAGMENT_KEY
|
48
52
|
end
|
49
53
|
|
50
54
|
def query_string
|
data/lib/headless/rails.rb
CHANGED
@@ -1,17 +1,15 @@
|
|
1
1
|
require "headless/rails/version"
|
2
|
-
require "headless/
|
2
|
+
require "headless/ajax_crawler"
|
3
3
|
require "headless/api_client"
|
4
4
|
|
5
5
|
module Headless
|
6
6
|
module Rails
|
7
7
|
|
8
|
-
ESCAPED_FRAGMENT_KEY = "_escaped_fragment_".freeze
|
9
|
-
|
10
8
|
def respond_to_ajax_crawlers
|
11
|
-
if
|
12
|
-
url = ::Headless::
|
9
|
+
if ::Headless::AjaxCrawler::RequestMatcher.call(request)
|
10
|
+
url = ::Headless::AjaxCrawler::UrlExtractor.call(request)
|
13
11
|
crawled = ::Headless::APIClient.crawl(url)
|
14
|
-
render text
|
12
|
+
render :text => crawled.content if crawled.success?
|
15
13
|
end
|
16
14
|
end
|
17
15
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
require "headless/ajax_crawler/request_matcher"
|
3
|
+
|
4
|
+
RSpec.describe Headless::AjaxCrawler::RequestMatcher do
|
5
|
+
|
6
|
+
describe "call" do
|
7
|
+
|
8
|
+
it "returns false with no fragment" do
|
9
|
+
request = request_for_uri("/user/matthewrudy")
|
10
|
+
expect(call(request)).to be_false
|
11
|
+
end
|
12
|
+
|
13
|
+
it "returns false with no fragment and some params" do
|
14
|
+
request = request_for_uri("/user/matthewrudy?foo=bar")
|
15
|
+
expect(call(request)).to be_false
|
16
|
+
end
|
17
|
+
|
18
|
+
it "returns true with an empty escaped_fragment" do
|
19
|
+
request = request_for_uri("/user/matthewrudy?_escaped_fragment_=")
|
20
|
+
expect(call(request)).to be_true
|
21
|
+
end
|
22
|
+
|
23
|
+
it "returns true with an escaped_fragment" do
|
24
|
+
request = request_for_uri("/user/matthewrudy?foo=bar&_escaped_fragment_=/show/twitter")
|
25
|
+
expect(call(request)).to be_true
|
26
|
+
end
|
27
|
+
|
28
|
+
it "returns false for a POST request, even with an escaped_fragment" do
|
29
|
+
request = post_request_for_uri("/user/matthewrudy?foo=bar&_escaped_fragment_=/show/twitter")
|
30
|
+
expect(call(request)).to be_false
|
31
|
+
end
|
32
|
+
|
33
|
+
def call(request)
|
34
|
+
described_class.call(request)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'rack/request'
|
3
|
+
|
4
|
+
RSpec.describe Headless::AjaxCrawler::UrlExtractor do
|
5
|
+
|
6
|
+
describe "call" do
|
7
|
+
|
8
|
+
let(:request) { request_for_uri(uri) }
|
9
|
+
|
10
|
+
subject do
|
11
|
+
described_class.call(request)
|
12
|
+
end
|
13
|
+
|
14
|
+
context "with no escaped_fragment" do
|
15
|
+
|
16
|
+
let(:uri) { "/user/matthewrudy" }
|
17
|
+
|
18
|
+
it "returns a full url" do
|
19
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context "with some params" do
|
24
|
+
|
25
|
+
let(:uri) { "/user/matthewrudy?foo=bar" }
|
26
|
+
|
27
|
+
it "keeps the params intact" do
|
28
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy?foo=bar"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context "with an empty escaped_fragment" do
|
33
|
+
|
34
|
+
let(:uri) { "/user/matthewrudy?_escaped_fragment_=" }
|
35
|
+
|
36
|
+
it "returns a full url with the escaped fragment removed" do
|
37
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "with an escaped_fragment" do
|
42
|
+
|
43
|
+
let(:uri) { "/user/matthewrudy?_escaped_fragment_=/show/twitter" }
|
44
|
+
|
45
|
+
it "returns the full url with an anchor appended" do
|
46
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy#/show/twitter"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "with an escaped_fragment and some params" do
|
51
|
+
|
52
|
+
let(:uri) { "/user/matthewrudy?foo=bar&_escaped_fragment_=/show/twitter" }
|
53
|
+
|
54
|
+
it "returns the full url with an anchor appended" do
|
55
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy?foo=bar#/show/twitter"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/spec/headless/rails_spec.rb
CHANGED
@@ -9,11 +9,11 @@ RSpec.describe Headless::Rails do
|
|
9
9
|
double(:controller,
|
10
10
|
:request => request,
|
11
11
|
:params => {},
|
12
|
-
:render => nil
|
12
|
+
:render => nil
|
13
13
|
)
|
14
14
|
end
|
15
15
|
|
16
|
-
let(:request) {
|
16
|
+
let(:request) { request_for_uri("/user/matthewrudy?foo=bar") }
|
17
17
|
let(:headless_response) { double(:headless_response, :success? => true, :content => :headless_content) }
|
18
18
|
|
19
19
|
describe "respond_to_ajax_crawlers" do
|
@@ -22,6 +22,9 @@ RSpec.describe Headless::Rails do
|
|
22
22
|
end
|
23
23
|
|
24
24
|
context "with no escaped_fragment" do
|
25
|
+
|
26
|
+
let(:request) { request_for_uri("/user/matthewrudy?foo=bar") }
|
27
|
+
|
25
28
|
before do
|
26
29
|
controller.stub(:params).and_return({})
|
27
30
|
end
|
@@ -39,11 +42,11 @@ RSpec.describe Headless::Rails do
|
|
39
42
|
end
|
40
43
|
|
41
44
|
context "with an escaped fragment" do
|
42
|
-
before do
|
43
|
-
controller.stub(:params).and_return({"_escaped_fragment_" => "something"})
|
44
45
|
|
45
|
-
|
46
|
-
|
46
|
+
let(:request) { request_for_uri("/user/matthewrudy?foo=bar&_escaped_fragment_=/show/twitter") }
|
47
|
+
|
48
|
+
before do
|
49
|
+
expect(Headless::APIClient).to receive(:crawl).with("http://localhost:3000/user/matthewrudy?foo=bar#/show/twitter").and_return(headless_response)
|
47
50
|
end
|
48
51
|
|
49
52
|
it "extracts the fragment, fetches content from the api, and renders its" do
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
module ExampleRequests
|
2
|
+
|
3
|
+
def request_env_for_uri(uri, http_verb="GET")
|
4
|
+
path, query = uri.split("?")
|
5
|
+
|
6
|
+
{
|
7
|
+
"SERVER_SOFTWARE" => "thin 1.5.1 codename Straight Razor",
|
8
|
+
"HTTP_VERSION" => "HTTP/1.1",
|
9
|
+
"HTTP_CONNECTION" => "keep-alive",
|
10
|
+
"HTTP_ACCEPT" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
11
|
+
"HTTP_USER_AGENT" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36",
|
12
|
+
"HTTP_ACCEPT_ENCODING" => "gzip,deflate,sdch",
|
13
|
+
"HTTP_ACCEPT_LANGUAGE" => "en-GB,en;q=0.8,en-US;q=0.6,zh;q=0.4",
|
14
|
+
"HTTP_COOKIE" => "foo=bar",
|
15
|
+
"GATEWAY_INTERFACE" => "CGI/1.2",
|
16
|
+
|
17
|
+
"SCRIPT_NAME" => "",
|
18
|
+
"REMOTE_ADDR" => "127.0.0.1",
|
19
|
+
|
20
|
+
# http method
|
21
|
+
"REQUEST_METHOD" => http_verb,
|
22
|
+
|
23
|
+
# protocol
|
24
|
+
"SERVER_PROTOCOL" => "HTTP/1.1",
|
25
|
+
"rack.url_scheme" => "http",
|
26
|
+
|
27
|
+
# host and port
|
28
|
+
"SERVER_NAME" => "localhost",
|
29
|
+
"HTTP_HOST" => "localhost:3000",
|
30
|
+
"SERVER_PORT" => "3000",
|
31
|
+
|
32
|
+
# path and query string
|
33
|
+
"REQUEST_PATH" => path,
|
34
|
+
"PATH_INFO" => path,
|
35
|
+
"REQUEST_URI" => uri,
|
36
|
+
"QUERY_STRING" => query,
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def request_for_uri(uri)
|
41
|
+
::Rack::Request.new(request_env_for_uri(uri))
|
42
|
+
end
|
43
|
+
|
44
|
+
def post_request_for_uri(uri)
|
45
|
+
::Rack::Request.new(request_env_for_uri(uri, "POST"))
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: headless-rails
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew Rudy Jacobs
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -109,16 +109,20 @@ files:
|
|
109
109
|
- README.md
|
110
110
|
- Rakefile
|
111
111
|
- headless-rails.gemspec
|
112
|
+
- lib/headless/ajax_crawler.rb
|
113
|
+
- lib/headless/ajax_crawler/request_matcher.rb
|
114
|
+
- lib/headless/ajax_crawler/url_extractor.rb
|
112
115
|
- lib/headless/api_client.rb
|
113
116
|
- lib/headless/api_response.rb
|
114
117
|
- lib/headless/rails.rb
|
115
|
-
- lib/headless/rails/escaped_fragment_extractor.rb
|
116
118
|
- lib/headless/rails/version.rb
|
119
|
+
- spec/headless/ajax_crawler/request_matcher_spec.rb
|
120
|
+
- spec/headless/ajax_crawler/url_extractor_spec.rb
|
117
121
|
- spec/headless/ap_response_spec.rb
|
118
122
|
- spec/headless/api_client_spec.rb
|
119
|
-
- spec/headless/rails/escaped_fragment_extractor_spec.rb
|
120
123
|
- spec/headless/rails_spec.rb
|
121
124
|
- spec/spec_helper.rb
|
125
|
+
- spec/support/example_requests.rb
|
122
126
|
homepage: http://www.headlessapp.com
|
123
127
|
licenses:
|
124
128
|
- MIT
|
@@ -144,8 +148,10 @@ signing_key:
|
|
144
148
|
specification_version: 4
|
145
149
|
summary: API wrapper for the HeadlessApp.com
|
146
150
|
test_files:
|
151
|
+
- spec/headless/ajax_crawler/request_matcher_spec.rb
|
152
|
+
- spec/headless/ajax_crawler/url_extractor_spec.rb
|
147
153
|
- spec/headless/ap_response_spec.rb
|
148
154
|
- spec/headless/api_client_spec.rb
|
149
|
-
- spec/headless/rails/escaped_fragment_extractor_spec.rb
|
150
155
|
- spec/headless/rails_spec.rb
|
151
156
|
- spec/spec_helper.rb
|
157
|
+
- spec/support/example_requests.rb
|
@@ -1,38 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe Headless::Rails::EscapedFragmentExtractor do
|
4
|
-
|
5
|
-
def request_with_fragment(escaped_fragment)
|
6
|
-
double(:request,
|
7
|
-
:protocol => "http://",
|
8
|
-
:host_with_port => "localhost:3000",
|
9
|
-
:path => "/user/matthewrudy",
|
10
|
-
:GET => {
|
11
|
-
"_escaped_fragment_" => escaped_fragment,
|
12
|
-
}
|
13
|
-
)
|
14
|
-
end
|
15
|
-
|
16
|
-
describe "call" do
|
17
|
-
|
18
|
-
context "with an empty escaped_fragment" do
|
19
|
-
subject do
|
20
|
-
described_class.call(request_with_fragment(""))
|
21
|
-
end
|
22
|
-
|
23
|
-
it "returns a full url with the escaped fragment removed" do
|
24
|
-
expect(subject).to eq "http://localhost:3000/user/matthewrudy"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
context "with an escaped_fragment" do
|
29
|
-
subject do
|
30
|
-
described_class.call(request_with_fragment("/show/twitter"))
|
31
|
-
end
|
32
|
-
|
33
|
-
it "returns the full url with an anchor appended" do
|
34
|
-
expect(subject).to eq "http://localhost:3000/user/matthewrudy#/show/twitter"
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|