headless-rails 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/headless/ajax_crawler/request_matcher.rb +32 -0
- data/lib/headless/{rails/escaped_fragment_extractor.rb → ajax_crawler/url_extractor.rb} +16 -12
- data/lib/headless/ajax_crawler.rb +8 -0
- data/lib/headless/rails/version.rb +1 -1
- data/lib/headless/rails.rb +4 -6
- data/spec/headless/ajax_crawler/request_matcher_spec.rb +37 -0
- data/spec/headless/ajax_crawler/url_extractor_spec.rb +59 -0
- data/spec/headless/rails_spec.rb +9 -6
- data/spec/spec_helper.rb +5 -0
- data/spec/support/example_requests.rb +48 -0
- metadata +11 -5
- data/spec/headless/rails/escaped_fragment_extractor_spec.rb +0 -38
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8f528076eccde288b3cec6d1fe33dcbddbea353
|
4
|
+
data.tar.gz: 0ac9c6ce2bb2856dd14b8c24588d4fd546e0408d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4ec9616d598293f33e8876f5649e14b5bbfaee4fa7da824bd6b0990cb701025ed26799c9171c91b476cd57e68bad63b3ece0b2cbc7b63b6737b0df041d9614f9
|
7
|
+
data.tar.gz: ab7267fdb39ab4699200ccc3ca0cb0943697209c2c95eb0ba5d55c7970ce9c38b6bfc5106d1452f48b54dc44e3abc43dd6ba645026e29401cadc7821964e83fb
|
data/README.md
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Headless::Rails
|
2
2
|
|
3
|
+
[](https://travis-ci.org/headlessapp/headless-rails)
|
4
|
+
|
3
5
|
API wrapper for the headlessapp.com service.
|
4
6
|
|
5
7
|
Seamlessly integrate AJAX webcrawler support into your javascript application.
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Headless
|
2
|
+
module AjaxCrawler
|
3
|
+
class RequestMatcher< Struct.new(:request)
|
4
|
+
|
5
|
+
def self.call(request)
|
6
|
+
new(request).match?
|
7
|
+
end
|
8
|
+
|
9
|
+
def match?
|
10
|
+
is_get? && has_escaped_fragment?
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def is_get?
|
16
|
+
request.get?
|
17
|
+
end
|
18
|
+
|
19
|
+
def has_escaped_fragment?
|
20
|
+
get_params.has_key?(escaped_fragment_key)
|
21
|
+
end
|
22
|
+
|
23
|
+
def escaped_fragment_key
|
24
|
+
::Headless::AjaxCrawler::ESCAPED_FRAGMENT_KEY
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_params
|
28
|
+
request.GET
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -1,24 +1,26 @@
|
|
1
1
|
require 'rack/utils'
|
2
2
|
|
3
3
|
module Headless
|
4
|
-
module
|
5
|
-
class
|
4
|
+
module AjaxCrawler
|
5
|
+
class UrlExtractor < Struct.new(:request)
|
6
6
|
|
7
7
|
def self.call(request)
|
8
8
|
new(request).call
|
9
9
|
end
|
10
10
|
|
11
11
|
def call
|
12
|
-
url = "#{
|
13
|
-
url << "?#{query_string}"
|
14
|
-
url << "##{
|
12
|
+
url = "#{scheme}://#{host_with_port}#{path}"
|
13
|
+
url << "?#{query_string}" unless query_string.empty?
|
14
|
+
url << "##{escaped_fragment}" unless escaped_fragment.nil? || escaped_fragment.empty?
|
15
15
|
url
|
16
16
|
end
|
17
17
|
|
18
18
|
private
|
19
19
|
|
20
|
-
|
21
|
-
|
20
|
+
# delegate to request
|
21
|
+
|
22
|
+
def scheme
|
23
|
+
request.scheme
|
22
24
|
end
|
23
25
|
|
24
26
|
def host_with_port
|
@@ -29,22 +31,24 @@ module Headless
|
|
29
31
|
request.path
|
30
32
|
end
|
31
33
|
|
32
|
-
def
|
33
|
-
|
34
|
+
def get_params
|
35
|
+
request.GET
|
34
36
|
end
|
35
37
|
|
38
|
+
# extract the fragment
|
39
|
+
|
36
40
|
def escaped_fragment
|
37
|
-
|
41
|
+
get_params[escaped_fragment_key]
|
38
42
|
end
|
39
43
|
|
40
44
|
def params_without_fragment
|
41
|
-
without =
|
45
|
+
without = get_params.dup
|
42
46
|
without.delete(escaped_fragment_key)
|
43
47
|
without
|
44
48
|
end
|
45
49
|
|
46
50
|
def escaped_fragment_key
|
47
|
-
::Headless::
|
51
|
+
::Headless::AjaxCrawler::ESCAPED_FRAGMENT_KEY
|
48
52
|
end
|
49
53
|
|
50
54
|
def query_string
|
data/lib/headless/rails.rb
CHANGED
@@ -1,17 +1,15 @@
|
|
1
1
|
require "headless/rails/version"
|
2
|
-
require "headless/
|
2
|
+
require "headless/ajax_crawler"
|
3
3
|
require "headless/api_client"
|
4
4
|
|
5
5
|
module Headless
|
6
6
|
module Rails
|
7
7
|
|
8
|
-
ESCAPED_FRAGMENT_KEY = "_escaped_fragment_".freeze
|
9
|
-
|
10
8
|
def respond_to_ajax_crawlers
|
11
|
-
if
|
12
|
-
url = ::Headless::
|
9
|
+
if ::Headless::AjaxCrawler::RequestMatcher.call(request)
|
10
|
+
url = ::Headless::AjaxCrawler::UrlExtractor.call(request)
|
13
11
|
crawled = ::Headless::APIClient.crawl(url)
|
14
|
-
render text
|
12
|
+
render :text => crawled.content if crawled.success?
|
15
13
|
end
|
16
14
|
end
|
17
15
|
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
require "headless/ajax_crawler/request_matcher"
|
3
|
+
|
4
|
+
RSpec.describe Headless::AjaxCrawler::RequestMatcher do
|
5
|
+
|
6
|
+
describe "call" do
|
7
|
+
|
8
|
+
it "returns false with no fragment" do
|
9
|
+
request = request_for_uri("/user/matthewrudy")
|
10
|
+
expect(call(request)).to be_false
|
11
|
+
end
|
12
|
+
|
13
|
+
it "returns false with no fragment and some params" do
|
14
|
+
request = request_for_uri("/user/matthewrudy?foo=bar")
|
15
|
+
expect(call(request)).to be_false
|
16
|
+
end
|
17
|
+
|
18
|
+
it "returns true with an empty escaped_fragment" do
|
19
|
+
request = request_for_uri("/user/matthewrudy?_escaped_fragment_=")
|
20
|
+
expect(call(request)).to be_true
|
21
|
+
end
|
22
|
+
|
23
|
+
it "returns true with an escaped_fragment" do
|
24
|
+
request = request_for_uri("/user/matthewrudy?foo=bar&_escaped_fragment_=/show/twitter")
|
25
|
+
expect(call(request)).to be_true
|
26
|
+
end
|
27
|
+
|
28
|
+
it "returns false for a POST request, even with an escaped_fragment" do
|
29
|
+
request = post_request_for_uri("/user/matthewrudy?foo=bar&_escaped_fragment_=/show/twitter")
|
30
|
+
expect(call(request)).to be_false
|
31
|
+
end
|
32
|
+
|
33
|
+
def call(request)
|
34
|
+
described_class.call(request)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'rack/request'
|
3
|
+
|
4
|
+
RSpec.describe Headless::AjaxCrawler::UrlExtractor do
|
5
|
+
|
6
|
+
describe "call" do
|
7
|
+
|
8
|
+
let(:request) { request_for_uri(uri) }
|
9
|
+
|
10
|
+
subject do
|
11
|
+
described_class.call(request)
|
12
|
+
end
|
13
|
+
|
14
|
+
context "with no escaped_fragment" do
|
15
|
+
|
16
|
+
let(:uri) { "/user/matthewrudy" }
|
17
|
+
|
18
|
+
it "returns a full url" do
|
19
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context "with some params" do
|
24
|
+
|
25
|
+
let(:uri) { "/user/matthewrudy?foo=bar" }
|
26
|
+
|
27
|
+
it "keeps the params intact" do
|
28
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy?foo=bar"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
context "with an empty escaped_fragment" do
|
33
|
+
|
34
|
+
let(:uri) { "/user/matthewrudy?_escaped_fragment_=" }
|
35
|
+
|
36
|
+
it "returns a full url with the escaped fragment removed" do
|
37
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "with an escaped_fragment" do
|
42
|
+
|
43
|
+
let(:uri) { "/user/matthewrudy?_escaped_fragment_=/show/twitter" }
|
44
|
+
|
45
|
+
it "returns the full url with an anchor appended" do
|
46
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy#/show/twitter"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "with an escaped_fragment and some params" do
|
51
|
+
|
52
|
+
let(:uri) { "/user/matthewrudy?foo=bar&_escaped_fragment_=/show/twitter" }
|
53
|
+
|
54
|
+
it "returns the full url with an anchor appended" do
|
55
|
+
expect(subject).to eq "http://localhost:3000/user/matthewrudy?foo=bar#/show/twitter"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/spec/headless/rails_spec.rb
CHANGED
@@ -9,11 +9,11 @@ RSpec.describe Headless::Rails do
|
|
9
9
|
double(:controller,
|
10
10
|
:request => request,
|
11
11
|
:params => {},
|
12
|
-
:render => nil
|
12
|
+
:render => nil
|
13
13
|
)
|
14
14
|
end
|
15
15
|
|
16
|
-
let(:request) {
|
16
|
+
let(:request) { request_for_uri("/user/matthewrudy?foo=bar") }
|
17
17
|
let(:headless_response) { double(:headless_response, :success? => true, :content => :headless_content) }
|
18
18
|
|
19
19
|
describe "respond_to_ajax_crawlers" do
|
@@ -22,6 +22,9 @@ RSpec.describe Headless::Rails do
|
|
22
22
|
end
|
23
23
|
|
24
24
|
context "with no escaped_fragment" do
|
25
|
+
|
26
|
+
let(:request) { request_for_uri("/user/matthewrudy?foo=bar") }
|
27
|
+
|
25
28
|
before do
|
26
29
|
controller.stub(:params).and_return({})
|
27
30
|
end
|
@@ -39,11 +42,11 @@ RSpec.describe Headless::Rails do
|
|
39
42
|
end
|
40
43
|
|
41
44
|
context "with an escaped fragment" do
|
42
|
-
before do
|
43
|
-
controller.stub(:params).and_return({"_escaped_fragment_" => "something"})
|
44
45
|
|
45
|
-
|
46
|
-
|
46
|
+
let(:request) { request_for_uri("/user/matthewrudy?foo=bar&_escaped_fragment_=/show/twitter") }
|
47
|
+
|
48
|
+
before do
|
49
|
+
expect(Headless::APIClient).to receive(:crawl).with("http://localhost:3000/user/matthewrudy?foo=bar#/show/twitter").and_return(headless_response)
|
47
50
|
end
|
48
51
|
|
49
52
|
it "extracts the fragment, fetches content from the api, and renders its" do
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,48 @@
|
|
1
|
+
module ExampleRequests
|
2
|
+
|
3
|
+
def request_env_for_uri(uri, http_verb="GET")
|
4
|
+
path, query = uri.split("?")
|
5
|
+
|
6
|
+
{
|
7
|
+
"SERVER_SOFTWARE" => "thin 1.5.1 codename Straight Razor",
|
8
|
+
"HTTP_VERSION" => "HTTP/1.1",
|
9
|
+
"HTTP_CONNECTION" => "keep-alive",
|
10
|
+
"HTTP_ACCEPT" => "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
11
|
+
"HTTP_USER_AGENT" => "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.95 Safari/537.36",
|
12
|
+
"HTTP_ACCEPT_ENCODING" => "gzip,deflate,sdch",
|
13
|
+
"HTTP_ACCEPT_LANGUAGE" => "en-GB,en;q=0.8,en-US;q=0.6,zh;q=0.4",
|
14
|
+
"HTTP_COOKIE" => "foo=bar",
|
15
|
+
"GATEWAY_INTERFACE" => "CGI/1.2",
|
16
|
+
|
17
|
+
"SCRIPT_NAME" => "",
|
18
|
+
"REMOTE_ADDR" => "127.0.0.1",
|
19
|
+
|
20
|
+
# http method
|
21
|
+
"REQUEST_METHOD" => http_verb,
|
22
|
+
|
23
|
+
# protocol
|
24
|
+
"SERVER_PROTOCOL" => "HTTP/1.1",
|
25
|
+
"rack.url_scheme" => "http",
|
26
|
+
|
27
|
+
# host and port
|
28
|
+
"SERVER_NAME" => "localhost",
|
29
|
+
"HTTP_HOST" => "localhost:3000",
|
30
|
+
"SERVER_PORT" => "3000",
|
31
|
+
|
32
|
+
# path and query string
|
33
|
+
"REQUEST_PATH" => path,
|
34
|
+
"PATH_INFO" => path,
|
35
|
+
"REQUEST_URI" => uri,
|
36
|
+
"QUERY_STRING" => query,
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
def request_for_uri(uri)
|
41
|
+
::Rack::Request.new(request_env_for_uri(uri))
|
42
|
+
end
|
43
|
+
|
44
|
+
def post_request_for_uri(uri)
|
45
|
+
::Rack::Request.new(request_env_for_uri(uri, "POST"))
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: headless-rails
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthew Rudy Jacobs
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -109,16 +109,20 @@ files:
|
|
109
109
|
- README.md
|
110
110
|
- Rakefile
|
111
111
|
- headless-rails.gemspec
|
112
|
+
- lib/headless/ajax_crawler.rb
|
113
|
+
- lib/headless/ajax_crawler/request_matcher.rb
|
114
|
+
- lib/headless/ajax_crawler/url_extractor.rb
|
112
115
|
- lib/headless/api_client.rb
|
113
116
|
- lib/headless/api_response.rb
|
114
117
|
- lib/headless/rails.rb
|
115
|
-
- lib/headless/rails/escaped_fragment_extractor.rb
|
116
118
|
- lib/headless/rails/version.rb
|
119
|
+
- spec/headless/ajax_crawler/request_matcher_spec.rb
|
120
|
+
- spec/headless/ajax_crawler/url_extractor_spec.rb
|
117
121
|
- spec/headless/ap_response_spec.rb
|
118
122
|
- spec/headless/api_client_spec.rb
|
119
|
-
- spec/headless/rails/escaped_fragment_extractor_spec.rb
|
120
123
|
- spec/headless/rails_spec.rb
|
121
124
|
- spec/spec_helper.rb
|
125
|
+
- spec/support/example_requests.rb
|
122
126
|
homepage: http://www.headlessapp.com
|
123
127
|
licenses:
|
124
128
|
- MIT
|
@@ -144,8 +148,10 @@ signing_key:
|
|
144
148
|
specification_version: 4
|
145
149
|
summary: API wrapper for the HeadlessApp.com
|
146
150
|
test_files:
|
151
|
+
- spec/headless/ajax_crawler/request_matcher_spec.rb
|
152
|
+
- spec/headless/ajax_crawler/url_extractor_spec.rb
|
147
153
|
- spec/headless/ap_response_spec.rb
|
148
154
|
- spec/headless/api_client_spec.rb
|
149
|
-
- spec/headless/rails/escaped_fragment_extractor_spec.rb
|
150
155
|
- spec/headless/rails_spec.rb
|
151
156
|
- spec/spec_helper.rb
|
157
|
+
- spec/support/example_requests.rb
|
@@ -1,38 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
|
3
|
-
RSpec.describe Headless::Rails::EscapedFragmentExtractor do
|
4
|
-
|
5
|
-
def request_with_fragment(escaped_fragment)
|
6
|
-
double(:request,
|
7
|
-
:protocol => "http://",
|
8
|
-
:host_with_port => "localhost:3000",
|
9
|
-
:path => "/user/matthewrudy",
|
10
|
-
:GET => {
|
11
|
-
"_escaped_fragment_" => escaped_fragment,
|
12
|
-
}
|
13
|
-
)
|
14
|
-
end
|
15
|
-
|
16
|
-
describe "call" do
|
17
|
-
|
18
|
-
context "with an empty escaped_fragment" do
|
19
|
-
subject do
|
20
|
-
described_class.call(request_with_fragment(""))
|
21
|
-
end
|
22
|
-
|
23
|
-
it "returns a full url with the escaped fragment removed" do
|
24
|
-
expect(subject).to eq "http://localhost:3000/user/matthewrudy"
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
context "with an escaped_fragment" do
|
29
|
-
subject do
|
30
|
-
described_class.call(request_with_fragment("/show/twitter"))
|
31
|
-
end
|
32
|
-
|
33
|
-
it "returns the full url with an anchor appended" do
|
34
|
-
expect(subject).to eq "http://localhost:3000/user/matthewrudy#/show/twitter"
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|