diffbot_simple 0.0.4 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +42 -20
- data/lib/diffbot_simple/symbolize.rb +11 -8
- data/lib/diffbot_simple/v2/analyze.rb +1 -6
- data/lib/diffbot_simple/v2/api_helper.rb +8 -2
- data/lib/diffbot_simple/v2/article.rb +1 -1
- data/lib/diffbot_simple/v2/bulk.rb +11 -0
- data/lib/diffbot_simple/v2/bulk_api.rb +8 -0
- data/lib/diffbot_simple/v2/client.rb +13 -6
- data/lib/diffbot_simple/v2/crawl.rb +53 -0
- data/lib/diffbot_simple/v2/crawlbot_api.rb +22 -0
- data/lib/diffbot_simple/v2/custom.rb +1 -5
- data/lib/diffbot_simple/v2/image.rb +0 -4
- data/lib/diffbot_simple/v2/product.rb +0 -4
- data/lib/diffbot_simple/version.rb +1 -1
- data/lib/diffbot_simple.rb +4 -1
- data/spec/serialize_test_data.json +38 -0
- data/spec/symbolize_spec.rb +17 -0
- data/spec/{analyze_spec.rb → v2/analyze_spec.rb} +4 -4
- data/spec/v2/article_spec.rb +12 -12
- data/spec/v2/bulk_api_spec.rb +54 -0
- data/spec/v2/bulk_spec.rb +56 -0
- data/spec/v2/client_spec.rb +35 -4
- data/spec/v2/crawl_spec.rb +66 -0
- data/spec/v2/crawlbot_api_spec.rb +54 -0
- data/spec/v2/custom_spec.rb +4 -4
- data/spec/{image_spec.rb → v2/image_spec.rb} +4 -4
- data/spec/{product_spec.rb → v2/product_spec.rb} +4 -4
- metadata +24 -11
- data/lib/diffbot_simple/v2/crawlbot.rb +0 -75
- data/spec/v2/crawlbot_spec.rb +0 -113
data/spec/v2/article_spec.rb
CHANGED
@@ -6,7 +6,7 @@ module DiffbotSimple::V2
|
|
6
6
|
let(:url) { "http://foo.bar" }
|
7
7
|
let(:article) { client.article }
|
8
8
|
let(:api_url) { "#{base_url}/article" }
|
9
|
-
let(:
|
9
|
+
let(:request_response) do
|
10
10
|
{
|
11
11
|
body:
|
12
12
|
{ :type=>"article", :icon=>"http://www.diffbot.com/favicon.ico", :title=>"Diffbot's New Product API Teaches Robots to Shop Online", :author=>"John Davi"}.to_json
|
@@ -20,33 +20,33 @@ module DiffbotSimple::V2
|
|
20
20
|
expect(stubbed_request).to have_been_requested
|
21
21
|
end
|
22
22
|
it "should return the response body as an symbolized hash" do
|
23
|
-
expect(subject).to eql JSON.parse(
|
23
|
+
expect(subject).to eql JSON.parse(request_response[:body], symbolize_names: true)
|
24
24
|
end
|
25
|
-
it "should respond and return the apis url in
|
26
|
-
expect(article.
|
25
|
+
it "should respond and return the apis url in to_api_url" do
|
26
|
+
expect(article.to_api_url).to eql api_url
|
27
27
|
end
|
28
28
|
end
|
29
29
|
context "when asking for a single article with no additional options" do
|
30
|
-
let(:subject) { article.
|
31
|
-
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url }).to_return(
|
30
|
+
let(:subject) { article.request url: url }
|
31
|
+
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url }).to_return(request_response) }
|
32
32
|
it_should_behave_like "an article request"
|
33
33
|
end
|
34
34
|
|
35
35
|
context "when asking for a single article with some additional options" do
|
36
|
-
let(:subject) { article.
|
37
|
-
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, fields: "meta,querystring,images(*)" }).to_return(
|
36
|
+
let(:subject) { article.request url: url, fields: "meta,querystring,images(*)" }
|
37
|
+
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, fields: "meta,querystring,images(*)" }).to_return(request_response) }
|
38
38
|
it_should_behave_like "an article request"
|
39
39
|
end
|
40
40
|
context "when asking for a single article with custom headers" do
|
41
|
-
let(:subject) { article.
|
42
|
-
let(:stubbed_request) { stub_request(:get, "#{base_url}/article").with(query: {token: token, url: url }, headers: { "X-Forward-User-Agent" => "I AM CHROME" }).to_return(
|
41
|
+
let(:subject) { article.request url: url, custom_headers: { "X-Forward-User-Agent" => "I AM CHROME" } }
|
42
|
+
let(:stubbed_request) { stub_request(:get, "#{base_url}/article").with(query: {token: token, url: url }, headers: { "X-Forward-User-Agent" => "I AM CHROME" }).to_return(request_response) }
|
43
43
|
it_should_behave_like "an article request"
|
44
44
|
end
|
45
45
|
|
46
46
|
context "when posting a body directly to analyze" do
|
47
47
|
let(:body) { "<html><fake><body>" }
|
48
|
-
let(:subject) { article.
|
49
|
-
let(:stubbed_request) { stub_request(:post, "#{base_url}/article").with(query: {token: token, url: url }, body: body).to_return(
|
48
|
+
let(:subject) { article.request url: url, body: body }
|
49
|
+
let(:stubbed_request) { stub_request(:post, "#{base_url}/article").with(query: {token: token, url: url }, body: body).to_return(request_response) }
|
50
50
|
it_should_behave_like "an article request"
|
51
51
|
end
|
52
52
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
module DiffbotSimple::V2
|
3
|
+
describe BulkApi do
|
4
|
+
before(:each) { stubbed_request }
|
5
|
+
let(:bulk_api) { BulkApi.new token: token, api_client: ApiClient.new }
|
6
|
+
let(:response_body) { {body: MultiJson.dump(response) } }
|
7
|
+
let(:filtered_response) { response[:jobs].select { |e| e[:type] == "bulk" } }
|
8
|
+
let(:response) { {jobs: [{ type: "bulk", foo: "bar" },{ type: "crawl", should_not: "return" }]} }
|
9
|
+
let(:name) { :my_bulk_job }
|
10
|
+
let(:bulk_url) { "#{base_url}/bulk" }
|
11
|
+
shared_examples_for "a correct single request" do
|
12
|
+
it "should make the stubbed_request" do
|
13
|
+
subject
|
14
|
+
expect(stubbed_request).to have_been_requested
|
15
|
+
end
|
16
|
+
it "should return a hashified response" do
|
17
|
+
expect(subject).to eql filtered_response.first
|
18
|
+
end
|
19
|
+
end
|
20
|
+
context "when asking for all bulk jobs" do
|
21
|
+
let(:subject) { bulk_api.all }
|
22
|
+
let(:stubbed_request) { stub_request(:get, bulk_url).with(query: {token: token}).to_return(response_body) }
|
23
|
+
it "should return the jobs-array" do
|
24
|
+
expect(subject).to eql filtered_response
|
25
|
+
end
|
26
|
+
it "should make the stubbed_request" do
|
27
|
+
subject
|
28
|
+
expect(stubbed_request).to have_been_requested
|
29
|
+
end
|
30
|
+
end
|
31
|
+
context "when asking for a named bulk job" do
|
32
|
+
let(:subject) { bulk_api.single name: name }
|
33
|
+
let(:stubbed_request) { stub_request(:get, bulk_url).with(query: {token: token, name: name.to_s}).to_return(response_body) }
|
34
|
+
it_should_behave_like "a correct single request"
|
35
|
+
end
|
36
|
+
context "when supplying more arguments to a named bulk job" do
|
37
|
+
let(:urls) { "http://foo.bar,http://bar.foo" }
|
38
|
+
let(:notifyEmail) { "noreply@foo.bar" }
|
39
|
+
let(:api_url) { "#{base_url}/product" }
|
40
|
+
let(:subject) { bulk_api.single name: name, urls: urls, notifyEmail: notifyEmail, apiUrl: api_url }
|
41
|
+
let(:stubbed_request) { stub_request(:get, bulk_url).with(query: {token: token, name: name.to_s, apiUrl: api_url, urls: urls, notifyEmail: notifyEmail}).to_return(response_body) }
|
42
|
+
it_should_behave_like "a correct single request"
|
43
|
+
end
|
44
|
+
context "when asking for results for a named bulk job" do
|
45
|
+
let(:download_url) { "http://foo.bar" }
|
46
|
+
let(:subject) { bulk_api.results url: download_url }
|
47
|
+
let(:stubbed_request) { stub_request(:get, download_url).to_return([]) }
|
48
|
+
it "should make the stubbed_request" do
|
49
|
+
subject
|
50
|
+
expect(stubbed_request).to have_been_requested
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
module DiffbotSimple::V2
|
3
|
+
describe Bulk do
|
4
|
+
let(:bulk_api) { double("bulk_api") }
|
5
|
+
let(:name) { "test" }
|
6
|
+
let(:subject) { Bulk.new bulk_api: bulk_api, name: name}
|
7
|
+
before(:each) { expect(bulk_api).to receive(:single).with(name: name).and_return({ notifyEmail: nil, downloadJson: "download_url" }) }
|
8
|
+
context "when pausing" do
|
9
|
+
it "should send pause = 1 to bulk api" do
|
10
|
+
expect(bulk_api).to receive(:single).with(name: name, pause: 1).and_return({})
|
11
|
+
subject.pause
|
12
|
+
end
|
13
|
+
end
|
14
|
+
context "when unpausing" do
|
15
|
+
it "should send pause = 0 to bulk api" do
|
16
|
+
expect(bulk_api).to receive(:single).with(name: name, pause: 0).and_return({})
|
17
|
+
subject.unpause
|
18
|
+
end
|
19
|
+
end
|
20
|
+
context "when deleting" do
|
21
|
+
it "should send delete = 1 to bulk api" do
|
22
|
+
expect(bulk_api).to receive(:single).with(name: name, delete: 1).and_return({})
|
23
|
+
subject.delete!
|
24
|
+
end
|
25
|
+
end
|
26
|
+
context "when downloading results" do
|
27
|
+
it "should call for results on bulk api" do
|
28
|
+
expect(bulk_api).to receive(:results).with(url: "download_url").and_return([])
|
29
|
+
subject.results
|
30
|
+
end
|
31
|
+
end
|
32
|
+
context "when updating general parameters" do
|
33
|
+
it "should send these to bulk api" do
|
34
|
+
expect(bulk_api).to receive(:single).with(name: name, notifyEmail: "foo@b.ar").and_return({})
|
35
|
+
subject.notifyEmail "foo@b.ar"
|
36
|
+
end
|
37
|
+
it "should even do it as an setter" do
|
38
|
+
expect(bulk_api).to receive(:single).with(name: name, notifyEmail: "foo@b.ar").and_return({})
|
39
|
+
subject.notifyEmail = "foo@b.ar"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
context "when updating several properties at once" do
|
43
|
+
it "should send them to bulk api" do
|
44
|
+
expect(bulk_api).to receive(:single).with(name: name, notifyEmail: "foo@b.ar", repeat: 7.0).and_return({})
|
45
|
+
subject.update notifyEmail: "foo@b.ar", repeat: 7.0
|
46
|
+
end
|
47
|
+
end
|
48
|
+
context "when adding an array of urls to process" do
|
49
|
+
let(:urls) {["http://foo.bar", "http://bar.foo"]}
|
50
|
+
it "should send them space delimited to bulk api" do
|
51
|
+
expect(bulk_api).to receive(:single).with(name: name, urls: urls.join(" ")).and_return({})
|
52
|
+
subject.process urls
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/spec/v2/client_spec.rb
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
module DiffbotSimple::V2
|
3
3
|
describe Client do
|
4
|
-
let(:
|
4
|
+
let(:fake_bulk_api) { double("bulk_api") }
|
5
|
+
let(:fake_crawlbot_api) { double("crawlbot_api") }
|
6
|
+
let(:subject) { Client.new token: "FOO_TOKEN", bulk_api: fake_bulk_api, crawlbot_api: fake_crawlbot_api }
|
5
7
|
context "when initializing with a token" do
|
6
8
|
it "should not raise an error" do
|
7
9
|
expect{subject}.to_not raise_error
|
8
10
|
end
|
9
|
-
it "should respond to crawlbot" do
|
10
|
-
expect(subject).to respond_to :crawlbot
|
11
|
-
end
|
12
11
|
end
|
13
12
|
context 'when initializing without token' do
|
14
13
|
let(:subject) { Client.new }
|
@@ -16,5 +15,37 @@ module DiffbotSimple::V2
|
|
16
15
|
expect{subject}.to raise_error ArgumentError
|
17
16
|
end
|
18
17
|
end
|
18
|
+
context "when asking for bulk, " do
|
19
|
+
context "all" do
|
20
|
+
let(:all_bulk) { subject.bulk }
|
21
|
+
it "should use bulk api" do
|
22
|
+
expect(fake_bulk_api).to receive(:all).and_return({})
|
23
|
+
all_bulk
|
24
|
+
end
|
25
|
+
end
|
26
|
+
context "named" do
|
27
|
+
let(:named) { subject.bulk name: :foo }
|
28
|
+
it "should use bulk api" do
|
29
|
+
expect(fake_bulk_api).to receive(:single).with({ name: :foo }).and_return({})
|
30
|
+
named
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
context "when asking for crawl, " do
|
35
|
+
context "all" do
|
36
|
+
let(:all_crawls) { subject.crawl }
|
37
|
+
it "should use crawlbot api" do
|
38
|
+
expect(fake_crawlbot_api).to receive(:all).and_return({})
|
39
|
+
all_crawls
|
40
|
+
end
|
41
|
+
end
|
42
|
+
context "named" do
|
43
|
+
let(:named) { subject.crawl name: :foo }
|
44
|
+
it "should use crawlbot api" do
|
45
|
+
expect(fake_crawlbot_api).to receive(:single).with({ name: :foo }).and_return({})
|
46
|
+
named
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
19
50
|
end
|
20
51
|
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
module DiffbotSimple::V2
|
3
|
+
describe Crawl do
|
4
|
+
let(:crawlbot_api) { double("crawlbot_api") }
|
5
|
+
let(:name) { "test" }
|
6
|
+
let(:subject) { Crawl.new crawlbot_api: crawlbot_api, name: name}
|
7
|
+
before(:each) { expect(crawlbot_api).to receive(:single).with(name: name).and_return({ notifyEmail: nil, downloadJson: "download_url" }) }
|
8
|
+
context "when pausing" do
|
9
|
+
it "should send pause = 1 to crawl api" do
|
10
|
+
expect(crawlbot_api).to receive(:single).with(name: name, pause: 1).and_return({})
|
11
|
+
subject.pause
|
12
|
+
end
|
13
|
+
end
|
14
|
+
context "when unpausing" do
|
15
|
+
it "should send pause = 0 to crawl api" do
|
16
|
+
expect(crawlbot_api).to receive(:single).with(name: name, pause: 0).and_return({})
|
17
|
+
subject.unpause
|
18
|
+
end
|
19
|
+
end
|
20
|
+
context "when deleting" do
|
21
|
+
it "should send delete = 1 to crawl api" do
|
22
|
+
expect(crawlbot_api).to receive(:single).with(name: name, delete: 1).and_return({})
|
23
|
+
subject.delete!
|
24
|
+
end
|
25
|
+
end
|
26
|
+
context "when restarting" do
|
27
|
+
it "should send restart = 1 to crawl api" do
|
28
|
+
expect(crawlbot_api).to receive(:single).with(name: name, restart: 1).and_return({})
|
29
|
+
subject.restart
|
30
|
+
end
|
31
|
+
end
|
32
|
+
context "when downloading results" do
|
33
|
+
it "should call for results on crawl api" do
|
34
|
+
expect(crawlbot_api).to receive(:results).with(url: "download_url").and_return([])
|
35
|
+
subject.results
|
36
|
+
end
|
37
|
+
end
|
38
|
+
context "when updating general parameters" do
|
39
|
+
it "should send these to crawl api" do
|
40
|
+
expect(crawlbot_api).to receive(:single).with(name: name, notifyEmail: "foo@b.ar").and_return({})
|
41
|
+
subject.notifyEmail "foo@b.ar"
|
42
|
+
end
|
43
|
+
it "should even do it as an setter" do
|
44
|
+
expect(crawlbot_api).to receive(:single).with(name: name, notifyEmail: "foo@b.ar").and_return({})
|
45
|
+
subject.notifyEmail = "foo@b.ar"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
context "when asking for current value as method call" do
|
49
|
+
it "should return an existing parameter" do
|
50
|
+
expect(subject.downloadJson).to eql "download_url"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
context "when asking to refresh" do
|
54
|
+
it "should ask api for parameters" do
|
55
|
+
expect(crawlbot_api).to receive(:single).with(name: name).and_return({})
|
56
|
+
subject.refresh
|
57
|
+
end
|
58
|
+
end
|
59
|
+
context "when updating several properties at once" do
|
60
|
+
it "should send them to crawl api" do
|
61
|
+
expect(crawlbot_api).to receive(:single).with(name: name, notifyEmail: "foo@b.ar", repeat: 7.0).and_return({})
|
62
|
+
subject.update notifyEmail: "foo@b.ar", repeat: 7.0
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
module DiffbotSimple::V2
|
3
|
+
describe CrawlbotApi do
|
4
|
+
before(:each) { stubbed_request }
|
5
|
+
let(:crawlbot_api) { CrawlbotApi.new token: token, api_client: ApiClient.new }
|
6
|
+
let(:response_body) { {body: MultiJson.dump(response) } }
|
7
|
+
let(:filtered_response) { response[:jobs].select { |e| e[:type] == "crawl" } }
|
8
|
+
let(:response) { {jobs: [{ type: "crawl", foo: "bar" },{ type: "bulk", should_not: "return" }]} }
|
9
|
+
let(:name) { :my_crawl }
|
10
|
+
let(:crawl_url) { "#{base_url}/crawl" }
|
11
|
+
shared_examples_for "a correct single request" do
|
12
|
+
it "should make the stubbed_request" do
|
13
|
+
subject
|
14
|
+
expect(stubbed_request).to have_been_requested
|
15
|
+
end
|
16
|
+
it "should return a hashified response" do
|
17
|
+
expect(subject).to eql filtered_response.first
|
18
|
+
end
|
19
|
+
end
|
20
|
+
context "when asking for all crawl jobs" do
|
21
|
+
let(:subject) { crawlbot_api.all }
|
22
|
+
let(:stubbed_request) { stub_request(:get, crawl_url).with(query: {token: token}).to_return(response_body) }
|
23
|
+
it "should return the jobs-array" do
|
24
|
+
expect(subject).to eql filtered_response
|
25
|
+
end
|
26
|
+
it "should make the stubbed_request" do
|
27
|
+
subject
|
28
|
+
expect(stubbed_request).to have_been_requested
|
29
|
+
end
|
30
|
+
end
|
31
|
+
context "when asking for a named crawl job" do
|
32
|
+
let(:subject) { crawlbot_api.single name: name }
|
33
|
+
let(:stubbed_request) { stub_request(:get, crawl_url).with(query: {token: token, name: name.to_s}).to_return(response_body) }
|
34
|
+
it_should_behave_like "a correct single request"
|
35
|
+
end
|
36
|
+
context "when supplying more arguments to a named crawl job" do
|
37
|
+
let(:seeds) { "http://foo.bar,http://bar.foo" }
|
38
|
+
let(:notifyEmail) { "noreply@foo.bar" }
|
39
|
+
let(:api_url) { "#{base_url}/product" }
|
40
|
+
let(:subject) { crawlbot_api.single name: name, seeds: seeds, notifyEmail: notifyEmail, apiUrl: api_url }
|
41
|
+
let(:stubbed_request) { stub_request(:get, crawl_url).with(query: {token: token, name: name.to_s, apiUrl: api_url, seeds: seeds, notifyEmail: notifyEmail}).to_return(response_body) }
|
42
|
+
it_should_behave_like "a correct single request"
|
43
|
+
end
|
44
|
+
context "when asking for results for a named crawl job" do
|
45
|
+
let(:download_url) { "http://foo.bar" }
|
46
|
+
let(:subject) { crawlbot_api.results url: download_url }
|
47
|
+
let(:stubbed_request) { stub_request(:get, download_url).to_return([]) }
|
48
|
+
it "should make the stubbed_request" do
|
49
|
+
subject
|
50
|
+
expect(stubbed_request).to have_been_requested
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/spec/v2/custom_spec.rb
CHANGED
@@ -18,17 +18,17 @@ module DiffbotSimple::V2
|
|
18
18
|
it "should return the response body as an symbolized hash" do
|
19
19
|
expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
|
20
20
|
end
|
21
|
-
it "should respond and return the apis url in
|
22
|
-
expect(custom.
|
21
|
+
it "should respond and return the apis url in to_api_url" do
|
22
|
+
expect(custom.to_api_url).to eql api_url
|
23
23
|
end
|
24
24
|
end
|
25
25
|
context "when asking for a custom api with no options" do
|
26
|
-
let(:subject) { custom.
|
26
|
+
let(:subject) { custom.request url: url }
|
27
27
|
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url }).to_return(single_response) }
|
28
28
|
it_should_behave_like "a custom request"
|
29
29
|
end
|
30
30
|
context "when asking for a custom api with custom options" do
|
31
|
-
let(:subject) { custom.
|
31
|
+
let(:subject) { custom.request url: url, timeout: 12000, callback: "my_callback" }
|
32
32
|
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, timeout: 12000, callback: "my_callback" }).to_return(single_response) }
|
33
33
|
it_should_behave_like "a custom request"
|
34
34
|
end
|
@@ -16,12 +16,12 @@ module DiffbotSimple::V2
|
|
16
16
|
it "should return the response body as an symbolized hash" do
|
17
17
|
expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
|
18
18
|
end
|
19
|
-
it "should respond and return the apis url in
|
20
|
-
expect(image.
|
19
|
+
it "should respond and return the apis url in to_api_url" do
|
20
|
+
expect(image.to_api_url).to eql api_url
|
21
21
|
end
|
22
22
|
end
|
23
23
|
context "when asking for an image with no options" do
|
24
|
-
let(:subject) { image.
|
24
|
+
let(:subject) { image.request url: url}
|
25
25
|
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url}).to_return(single_response) }
|
26
26
|
it_should_behave_like "an image request"
|
27
27
|
end
|
@@ -29,7 +29,7 @@ module DiffbotSimple::V2
|
|
29
29
|
let(:fields) {"a,b,c"}
|
30
30
|
let(:callback) { "my_callback" }
|
31
31
|
let(:timeout) { 4200 }
|
32
|
-
let(:subject) { image.
|
32
|
+
let(:subject) { image.request url: url, timeout: timeout, callback: callback, fields: fields }
|
33
33
|
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, timeout: timeout, callback: callback, fields: fields}).to_return(single_response) }
|
34
34
|
it_should_behave_like "an image request"
|
35
35
|
end
|
@@ -16,12 +16,12 @@ module DiffbotSimple::V2
|
|
16
16
|
it "should return the response body as an symbolized hash" do
|
17
17
|
expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
|
18
18
|
end
|
19
|
-
it "should respond and return the apis url in
|
20
|
-
expect(product.
|
19
|
+
it "should respond and return the apis url in to_api_url" do
|
20
|
+
expect(product.to_api_url).to eql api_url
|
21
21
|
end
|
22
22
|
end
|
23
23
|
context "when asking for a product with no options" do
|
24
|
-
let(:subject) { product.
|
24
|
+
let(:subject) { product.request url: url}
|
25
25
|
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url}).to_return(single_response) }
|
26
26
|
it_should_behave_like "a product request"
|
27
27
|
end
|
@@ -29,7 +29,7 @@ module DiffbotSimple::V2
|
|
29
29
|
let(:fields) {"a,b,c"}
|
30
30
|
let(:callback) { "my_callback" }
|
31
31
|
let(:timeout) { 4200 }
|
32
|
-
let(:subject) { product.
|
32
|
+
let(:subject) { product.request url: url, timeout: timeout, callback: callback, fields: fields }
|
33
33
|
let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, timeout: timeout, callback: callback, fields: fields}).to_return(single_response) }
|
34
34
|
it_should_behave_like "a product request"
|
35
35
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: diffbot_simple
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lars Krantz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-03-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -130,21 +130,29 @@ files:
|
|
130
130
|
- lib/diffbot_simple/v2/api_client.rb
|
131
131
|
- lib/diffbot_simple/v2/api_helper.rb
|
132
132
|
- lib/diffbot_simple/v2/article.rb
|
133
|
+
- lib/diffbot_simple/v2/bulk.rb
|
134
|
+
- lib/diffbot_simple/v2/bulk_api.rb
|
133
135
|
- lib/diffbot_simple/v2/client.rb
|
134
|
-
- lib/diffbot_simple/v2/
|
136
|
+
- lib/diffbot_simple/v2/crawl.rb
|
137
|
+
- lib/diffbot_simple/v2/crawlbot_api.rb
|
135
138
|
- lib/diffbot_simple/v2/custom.rb
|
136
139
|
- lib/diffbot_simple/v2/diffbot_error.rb
|
137
140
|
- lib/diffbot_simple/v2/image.rb
|
138
141
|
- lib/diffbot_simple/v2/product.rb
|
139
142
|
- lib/diffbot_simple/version.rb
|
140
|
-
- spec/
|
141
|
-
- spec/image_spec.rb
|
142
|
-
- spec/product_spec.rb
|
143
|
+
- spec/serialize_test_data.json
|
143
144
|
- spec/spec_helper.rb
|
145
|
+
- spec/symbolize_spec.rb
|
146
|
+
- spec/v2/analyze_spec.rb
|
144
147
|
- spec/v2/article_spec.rb
|
148
|
+
- spec/v2/bulk_api_spec.rb
|
149
|
+
- spec/v2/bulk_spec.rb
|
145
150
|
- spec/v2/client_spec.rb
|
146
|
-
- spec/v2/
|
151
|
+
- spec/v2/crawl_spec.rb
|
152
|
+
- spec/v2/crawlbot_api_spec.rb
|
147
153
|
- spec/v2/custom_spec.rb
|
154
|
+
- spec/v2/image_spec.rb
|
155
|
+
- spec/v2/product_spec.rb
|
148
156
|
homepage: ''
|
149
157
|
licenses:
|
150
158
|
- MIT
|
@@ -170,11 +178,16 @@ signing_key:
|
|
170
178
|
specification_version: 4
|
171
179
|
summary: A simple, nothing-fancy, helper for the Diffbot API
|
172
180
|
test_files:
|
173
|
-
- spec/
|
174
|
-
- spec/image_spec.rb
|
175
|
-
- spec/product_spec.rb
|
181
|
+
- spec/serialize_test_data.json
|
176
182
|
- spec/spec_helper.rb
|
183
|
+
- spec/symbolize_spec.rb
|
184
|
+
- spec/v2/analyze_spec.rb
|
177
185
|
- spec/v2/article_spec.rb
|
186
|
+
- spec/v2/bulk_api_spec.rb
|
187
|
+
- spec/v2/bulk_spec.rb
|
178
188
|
- spec/v2/client_spec.rb
|
179
|
-
- spec/v2/
|
189
|
+
- spec/v2/crawl_spec.rb
|
190
|
+
- spec/v2/crawlbot_api_spec.rb
|
180
191
|
- spec/v2/custom_spec.rb
|
192
|
+
- spec/v2/image_spec.rb
|
193
|
+
- spec/v2/product_spec.rb
|
@@ -1,75 +0,0 @@
|
|
1
|
-
module DiffbotSimple::V2
|
2
|
-
# Complies to http://www.diffbot.com/dev/docs/crawl/
|
3
|
-
class Crawlbot
|
4
|
-
include ApiHelper
|
5
|
-
def post_initialize
|
6
|
-
@api = :crawl
|
7
|
-
end
|
8
|
-
# Get all your crawls as an array
|
9
|
-
# The "jobs" parameter is stripped and only the array is returned
|
10
|
-
#
|
11
|
-
# @return [Array] your jobs from the "jobs"-array in api response
|
12
|
-
def all
|
13
|
-
execute_call()[:jobs]
|
14
|
-
end
|
15
|
-
|
16
|
-
# Gets, creates or updates a named crawl
|
17
|
-
#
|
18
|
-
# @name [String] name of the crawl to get/create/update
|
19
|
-
# @**options options from http://www.diffbot.com/dev/docs/crawl/ when updating or creating a crawl
|
20
|
-
# @return [Hash] with current parameters for the single crawl
|
21
|
-
def single_crawl name: nil, **options
|
22
|
-
raise ArgumentError.new "Must pass a name for the crawl" unless name
|
23
|
-
response = execute_call options.merge(name: name)
|
24
|
-
jobs = response[:jobs]
|
25
|
-
jobs.first
|
26
|
-
end
|
27
|
-
|
28
|
-
# Deletes a crawl
|
29
|
-
#
|
30
|
-
# @name [String] name of crawl to delete
|
31
|
-
# @return [Hash] statusmessage from diffbot, for example: {response: "Successfully deleted job."}
|
32
|
-
def delete name: nil
|
33
|
-
raise ArgumentError.new "Must pass a name for the crawl to delete" unless name
|
34
|
-
execute_call name: name, delete: 1
|
35
|
-
end
|
36
|
-
|
37
|
-
# Pauses a crawl
|
38
|
-
#
|
39
|
-
# @name [String] name of the crawl to pause
|
40
|
-
# @return [Hash] with current parameters for the single crawl
|
41
|
-
def pause name: nil
|
42
|
-
single_crawl name: name, pause: 1
|
43
|
-
end
|
44
|
-
|
45
|
-
# Unpauses/ resumes a crawl
|
46
|
-
#
|
47
|
-
# @name [String] name of the crawl to unpause
|
48
|
-
# @return [Hash] with current parameters for the single crawl
|
49
|
-
def unpause name: nil
|
50
|
-
single_crawl name: name, pause: 0
|
51
|
-
end
|
52
|
-
|
53
|
-
# Restarts a crawl
|
54
|
-
#
|
55
|
-
# @name [String] name of the crawl to restart
|
56
|
-
# @return [Hash] with current parameters for the crawl in jobs-key (as an array), and a response-text
|
57
|
-
def restart name: nil
|
58
|
-
raise ArgumentError.new "Must pass a name for the crawl to restart" unless name
|
59
|
-
execute_call name: name, restart: 1
|
60
|
-
end
|
61
|
-
|
62
|
-
# Get the crawl-result (downloadJson from diffbot crawl)
|
63
|
-
#
|
64
|
-
# @name [String] name of the crawl to get
|
65
|
-
# @return [Array] of results (hashes)
|
66
|
-
def result name: nil
|
67
|
-
crawl = single_crawl name: name
|
68
|
-
download_url = crawl[:downloadJson]
|
69
|
-
response = api_client.get download_url
|
70
|
-
symbolize response
|
71
|
-
end
|
72
|
-
|
73
|
-
|
74
|
-
end
|
75
|
-
end
|