diffbot_simple 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 25486e768597bc81f971624d2c26575f8c4a787e
4
+ data.tar.gz: 57f2933c6f76d5c2e1c61144d86be56f1d4645fd
5
+ SHA512:
6
+ metadata.gz: 6effb80f0a1d27ded4133eb17287335f0a4e0203b63453745bd48cde2ba5c50a9a47805d015a96279cfd280a7b56d9131dfa15a7cc7c23cf7cbcfa39ea972ea7
7
+ data.tar.gz: 205b68c3ef70977c1570c8511324390edfe021d24e2cbcdf723fa65e77298e2055a22e8ad3645a118118cbd9600beb6506c1918d071b44b54ee67cdd8022ff2f
@@ -0,0 +1,3 @@
1
+ Autotest.add_hook :initialize do |at|
2
+ %w{.git coverage}.each {|exception| at.add_exception(exception)}
3
+ end
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+ Gemfile.lock
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.0
4
+ - 2.0.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+ gem 'coveralls', require: false
3
+ # Specify your gem's dependencies in diffbot_simple.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Lars Krantz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,109 @@
1
+ [![Coverage Status](https://coveralls.io/repos/larskrantz/diffbot_simple/badge.png)](https://coveralls.io/r/larskrantz/diffbot_simple)
2
+ [![Build Status](https://travis-ci.org/larskrantz/diffbot_simple.png?branch=master)](https://travis-ci.org/larskrantz/diffbot_simple)
3
+ [![Code Climate](https://codeclimate.com/github/larskrantz/diffbot_simple.png)](https://codeclimate.com/github/larskrantz/diffbot_simple)
4
+
5
+ DiffbotSimple
6
+ =============
7
+
8
+ A simple, nothing-fancy, helper for the [Diffbot API](http://www.diffbot.com/).
9
+
10
+ Will not objectify any responses, just pass on the json data as hash with symbolized keys.
11
+ One exception to that rule, when using CrawlBot and requesting a single_crawl, it will return the single item in the :jobs-array, and when requesting all, it will return the array in :jobs.
12
+ Send options to the api as named args, se usage below with article and fields-argument.
13
+
14
+ ## Installation
15
+ ```ruby
16
+ gem 'diffbot-simple'
17
+ ```
18
+
19
+ ## Dependencies
20
+ * Ruby 2.0 or 2.1
21
+ * [rest-core](https://github.com/cardinalblue/rest-core)
22
+ * [multi_json](https://github.com/intridea/multi_json)
23
+
24
+
25
+ ## Usage
26
+ ```ruby
27
+ require 'diffbot_simple'
28
+
29
+ token = "my_diffbot_assigned_token"
30
+ client = DiffbotSimple::V2::Client.new token: token
31
+
32
+ article = client.article
33
+ url = "http://www.xconomy.com/san-francisco/2012/07/25/diffbot-is-using-computer-vision-to-reinvent-the-semantic-web/"
34
+ # Pass on diffbot parameters as options to the call
35
+ diffbot_response_as_symbolized_hash = article.single_article url: url, fields: "icon,title"
36
+ # =>
37
+ {
38
+ icon: "http://www.xconomy.com/wordpress/wp-content/themes/xconomy/images/favicon.ico",
39
+ author: "Wade Roush",
40
+ date: "7/25/12",
41
+ text: "...",
42
+ # and more, see http://www.diffbot.com/products/automatic/article/
43
+ }
44
+ ```
45
+
46
+ ### Supports these Diffbot apis
47
+ Please see [Diffbot Help and Documentation](http://www.diffbot.com/dev/docs/) for details and arguments.
48
+ Check the spec-directory too.
49
+
50
+ ```ruby
51
+ require 'diffbot_simple'
52
+
53
+ token = "my_diffbot_assigned_token"
54
+ client = DiffbotSimple::V2::Client.new token: token
55
+ url = "http://some_url_to_check"
56
+
57
+ # Custom API
58
+ custom = client.custom name: "my_custom_api_name"
59
+ response = custom.single_custom url: url
60
+
61
+ # Analyze API (beta)
62
+ analysis = client.analyze
63
+ response = analyze.single_analysis url: url
64
+
65
+ # Article API
66
+ article = client.article
67
+ response = article.single_article url: url
68
+
69
+ # Image API
70
+ image = client.image
71
+ response = image.single_image url: url
72
+
73
+ # Product API
74
+ product = client.product
75
+ response = product.single_product url: url
76
+
77
+ # Crawlbot API
78
+ crawlbot = client.crawlbot
79
+ all_my_crawls = crawlbot.all
80
+ current_settings = crawlbot.single_crawl name: "my_crawl"
81
+ # shorthand for using apiUrl, use the api object from client,
82
+ # it will create a correct value for you
83
+ # (custom, image, article, product or analyze for automatic)
84
+ # A call to single_crawl will create if not exists or update settings
85
+ settings = crawlbot.single_crawl name: "my_new_crawl", onlyProcessIfNew: 0, seeds: "http://www.upptec.se", apiUrl: custom
86
+ crawlbot.pause name: "my_new_crawl"
87
+ crawlbot.unpause name: "my_new_crawl"
88
+ crawlbot.restart name: "my_new_crawl"
89
+ result = crawlbot.result "my_new_crawl" # shorthand for downloading the json that are specifed in :downloadJson
90
+ crawlbot.delete name: "my_new_crawl"
91
+ ```
92
+
93
+ ### On error
94
+ If Diffbot returns an error, it will raise and fill `DiffbotSimple::V2::DiffbotError` with passed on info, as stated in [http://www.diffbot.com/dev/docs/error/](http://www.diffbot.com/dev/docs/error/) and put errorCode in `:error_code` and error in `:message` .
95
+
96
+ ## TODO
97
+ * Frontpage API
98
+ * Bulk API
99
+ * Async http fetching
100
+ * Batch API
101
+
102
+ ## Contributing
103
+
104
+ 1. Fork it
105
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
106
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
107
+ 4. Push to the branch (`git push origin my-new-feature`)
108
+ 5. Create new Pull Request
109
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'diffbot_simple/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "diffbot_simple"
8
+ spec.version = DiffbotSimple::VERSION
9
+ spec.authors = ["Lars Krantz"]
10
+ spec.email = ["lars.krantz@alaz.se"]
11
+ spec.summary = %q{A simple, nothing-fancy, helper for the Diffbot API}
12
+ # spec.description = %q{}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.required_ruby_version = "~> 2.0"
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.5"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec"
26
+ spec.add_development_dependency "autotest-standalone"
27
+ spec.add_development_dependency "webmock", "~> 1.17"
28
+
29
+ spec.add_runtime_dependency "rest-core", "~> 2.1"
30
+ spec.add_runtime_dependency "multi_json"
31
+
32
+ end
@@ -0,0 +1,14 @@
1
+ require 'diffbot_simple/version'
2
+ require 'diffbot_simple/symbolize'
3
+ require 'diffbot_simple/v2/diffbot_error'
4
+ require 'diffbot_simple/v2/api_helper'
5
+ require 'diffbot_simple/v2/client'
6
+ require 'diffbot_simple/v2/crawlbot'
7
+ require 'diffbot_simple/v2/article'
8
+ require 'diffbot_simple/v2/custom'
9
+ require 'diffbot_simple/v2/product'
10
+ require 'diffbot_simple/v2/image'
11
+ require 'diffbot_simple/v2/analyze'
12
+
13
+ module DiffbotSimple
14
+ end
@@ -0,0 +1,29 @@
1
+ module DiffbotSimple
2
+ module Symbolize
3
+ private
4
+ def y_combinator(&f)
5
+ lambda do |g|
6
+ f.call {|*args| g[g][*args]}
7
+ end.tap {|g| break g[g]}
8
+ end
9
+
10
+ def symbolize hash
11
+ return hash unless hash.kind_of? Hash or hash.kind_of? Array
12
+ sym_hash = y_combinator do |&f|
13
+ lambda do |h|
14
+ if h.kind_of? Array
15
+ h.map {|r| f.call(r)}
16
+ else
17
+ h.reduce({}) do |memo,(k,v)|
18
+ v = f.call(v) if v.kind_of? Hash
19
+ v = v.map {|u| f.call(u)} if v.kind_of? Array
20
+ memo[k.to_sym] = v
21
+ memo
22
+ end
23
+ end
24
+ end
25
+ end
26
+ sym_hash.call hash
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,18 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/analyze
3
+ class Analyze
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :analyze
7
+ end
8
+ def to_crawl_api_url
9
+ default = super
10
+ "#{default}?mode=auto"
11
+ end
12
+ def single_analysis url: nil, **options
13
+ raise ArgumentError.new "Must pass an url to fetch" unless url
14
+ execute_call options.merge(url: url)
15
+ end
16
+ alias :single_analyze :single_analysis
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ require 'multi_json'
2
+ require 'rest-core'
3
+ require 'rest-client'
4
+ module DiffbotSimple::V2
5
+ ApiClient = RestCore::Builder.client do
6
+ use RestCore::DefaultSite , 'http://api.diffbot.com/v2/'
7
+ use RestCore::JsonResponse, true
8
+ use RestCore::DefaultHeaders, {}
9
+ end
10
+ end
@@ -0,0 +1,44 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/crawl/
3
+ module ApiHelper
4
+ include DiffbotSimple::Symbolize
5
+ def initialize api_client: nil, token: nil
6
+ @api_client = api_client
7
+ @token = token
8
+ post_initialize
9
+ end
10
+ def post_initialize
11
+ raise "Must overload to set api path"
12
+ end
13
+ def to_crawl_api_url
14
+ "#{api_client.site}#{api}"
15
+ end
16
+ private
17
+ attr_reader :token, :api_client, :api
18
+ def execute_call custom_headers: nil, method: :get, payload: nil, **options
19
+ args = create_from_options options
20
+ opts = {}
21
+ opts[:headers] = custom_headers if custom_headers
22
+ response = api_client.get(api, args, opts) if method == :get
23
+ response = api_client.post(api, payload, args, opts) if method == :post
24
+ cleanup response
25
+ end
26
+ def cleanup response
27
+ result_hash = symbolize response
28
+ raise_if_error_response result_hash
29
+ result_hash
30
+ end
31
+ def create_from_options options
32
+ merged = options.merge({token: token})
33
+ merged[:apiUrl] = expand_api_url merged[:apiUrl] if merged[:apiUrl]
34
+ merged
35
+ end
36
+ def expand_api_url api_url
37
+ api_url.to_crawl_api_url if api_url.respond_to?(:to_crawl_api_url)
38
+ end
39
+ def raise_if_error_response result_from_diffbot
40
+ return unless result_from_diffbot[:error]
41
+ raise DiffbotError.new(result_from_diffbot[:error], result_from_diffbot[:errorCode])
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,18 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/article
3
+ class Article
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :article
7
+ end
8
+ def single_article url: nil, custom_headers: nil, body: nil, **options
9
+ raise ArgumentError.new "Must pass an url for the article api to fetch" unless url
10
+ if body
11
+ custom_headers ||= {}
12
+ custom_headers['Content-Type'] = 'text/html'
13
+ options[:method] = :post
14
+ end
15
+ execute_call options.merge(url: url, custom_headers: custom_headers, payload: body)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,30 @@
1
+ require_relative 'api_client'
2
+ module DiffbotSimple::V2
3
+ class Client
4
+ def initialize token: nil
5
+ raise ArgumentError.new("Must supply developer token") if token.to_s.empty?
6
+ @token = token
7
+ @api_client = ApiClient.new
8
+ end
9
+ def crawlbot
10
+ Crawlbot.new api_client: api_client, token: token
11
+ end
12
+ def article
13
+ Article.new api_client: api_client, token: token
14
+ end
15
+ def custom name: nil
16
+ Custom.new api_client: api_client, token: token, name: name
17
+ end
18
+ def product
19
+ Product.new api_client: api_client, token: token
20
+ end
21
+ def image
22
+ Image.new api_client: api_client, token: token
23
+ end
24
+ def analyze
25
+ Analyze.new api_client: api_client, token: token
26
+ end
27
+ private
28
+ attr_reader :token, :api_client
29
+ end
30
+ end
@@ -0,0 +1,75 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/crawl/
3
+ class Crawlbot
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :crawl
7
+ end
8
+ # Get all your crawls as an array
9
+ # The "jobs" parameter is stripped and only the array is returned
10
+ #
11
+ # @return [Array] your jobs from the "jobs"-array in api response
12
+ def all
13
+ execute_call()[:jobs]
14
+ end
15
+
16
+ # Gets, creates or updates a named crawl
17
+ #
18
+ # @name [String] name of the crawl to get/create/update
19
+ # @**options options from http://www.diffbot.com/dev/docs/crawl/ when updating or creating a crawl
20
+ # @return [Hash] with current parameters for the single crawl
21
+ def single_crawl name: nil, **options
22
+ raise ArgumentError.new "Must pass a name for the crawl" unless name
23
+ response = execute_call options.merge(name: name)
24
+ jobs = response[:jobs]
25
+ jobs.first
26
+ end
27
+
28
+ # Deletes a crawl
29
+ #
30
+ # @name [String] name of crawl to delete
31
+ # @return [Hash] statusmessage from diffbot, for example: {response: "Successfully deleted job."}
32
+ def delete name: nil
33
+ raise ArgumentError.new "Must pass a name for the crawl to delete" unless name
34
+ execute_call name: name, delete: 1
35
+ end
36
+
37
+ # Pauses a crawl
38
+ #
39
+ # @name [String] name of the crawl to pause
40
+ # @return [Hash] with current parameters for the single crawl
41
+ def pause name: nil
42
+ single_crawl name: name, pause: 1
43
+ end
44
+
45
+ # Unpauses/ resumes a crawl
46
+ #
47
+ # @name [String] name of the crawl to unpause
48
+ # @return [Hash] with current parameters for the single crawl
49
+ def unpause name: nil
50
+ single_crawl name: name, pause: 0
51
+ end
52
+
53
+ # Restarts a crawl
54
+ #
55
+ # @name [String] name of the crawl to restart
56
+ # @return [Hash] with current parameters for the crawl in jobs-key (as an array), and a response-text
57
+ def restart name: nil
58
+ raise ArgumentError.new "Must pass a name for the crawl to restart" unless name
59
+ execute_call name: name, restart: 1
60
+ end
61
+
62
+ # Get the crawl-result (downloadJson from diffbot crawl)
63
+ #
64
+ # @name [String] name of the crawl to restart
65
+ # @return [Array] of results (hashes)
66
+ def result name: name
67
+ crawl = single_crawl name: name
68
+ download_url = crawl[:downloadJson]
69
+ response = api_client.get download_url
70
+ symbolize response
71
+ end
72
+
73
+
74
+ end
75
+ end
@@ -0,0 +1,19 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/custom
3
+ class Custom
4
+ include ApiHelper
5
+ attr_reader :name
6
+ def initialize name: nil, **options
7
+ raise ArgumentError.new "Must pass a name for the custom api" unless name
8
+ @name = name
9
+ super options
10
+ end
11
+ def post_initialize
12
+ @api = "api/#{CGI::escape(name)}"
13
+ end
14
+ def single_custom url: nil, **options
15
+ raise ArgumentError.new "Must pass an url for the custom api to fetch" unless url
16
+ execute_call options.merge(url: url)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,9 @@
1
+ module DiffbotSimple::V2
2
+ class DiffbotError < StandardError
3
+ attr_reader :error_code
4
+ def initialize error_message, error_code = nil
5
+ @error_code = error_code
6
+ super error_message
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/image
3
+ class Image
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :image
7
+ end
8
+ def single_image url: nil, **options
9
+ raise ArgumentError.new "Must pass an url to fetch" unless url
10
+ execute_call options.merge(url: url)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/product
3
+ class Product
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :product
7
+ end
8
+ def single_product url: nil, **options
9
+ raise ArgumentError.new "Must pass an url to fetch" unless url
10
+ execute_call options.merge(url: url)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module DiffbotSimple
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffbotSimple::V2
4
+ describe Analyze do
5
+ let(:client) { Client.new token: token }
6
+ let(:url) { "http://foo.bar" }
7
+ let(:single_response) { { body: {url: url, foo: "bar"}.to_json} }
8
+ let(:analyze) { client.analyze }
9
+ let(:api_url) { "#{base_url}/analyze" }
10
+ shared_examples_for "an analyze request" do
11
+ before(:each) { stubbed_request }
12
+ it "should make a valid request to the analyze api" do
13
+ subject
14
+ expect(stubbed_request).to have_been_requested
15
+ end
16
+ it "should return the response body as an symbolized hash" do
17
+ expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
18
+ end
19
+ it "should respond and return the apis url in to_crawl_api_url" do
20
+ expect(analyze.to_crawl_api_url).to eql "#{api_url}?mode=auto"
21
+ end
22
+ end
23
+ context "when asking for an analyze with no options" do
24
+ let(:subject) { analyze.single_analysis url: url}
25
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url}).to_return(single_response) }
26
+ it_should_behave_like "an analyze request"
27
+ end
28
+ context "when asking for an analyze with analyze options" do
29
+ let(:fields) {"a,b,c"}
30
+ let(:mode) { "article" }
31
+ let(:stats) { true }
32
+ let(:subject) { analyze.single_analyze url: url, stats: stats, mode: mode, fields: fields }
33
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, stats: stats.to_s, mode: mode, fields: fields}).to_return(single_response) }
34
+ it_should_behave_like "an analyze request"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffbotSimple::V2
4
+ describe Image do
5
+ let(:client) { Client.new token: token }
6
+ let(:url) { "http://foo.bar" }
7
+ let(:single_response) { { body: {url: url, foo: "bar"}.to_json} }
8
+ let(:image) { client.image }
9
+ let(:api_url) { "#{base_url}/image" }
10
+ shared_examples_for "an image request" do
11
+ before(:each) { stubbed_request }
12
+ it "should make a valid request to the image api" do
13
+ subject
14
+ expect(stubbed_request).to have_been_requested
15
+ end
16
+ it "should return the response body as an symbolized hash" do
17
+ expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
18
+ end
19
+ it "should respond and return the apis url in to_crawl_api_url" do
20
+ expect(image.to_crawl_api_url).to eql api_url
21
+ end
22
+ end
23
+ context "when asking for an image with no options" do
24
+ let(:subject) { image.single_image url: url}
25
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url}).to_return(single_response) }
26
+ it_should_behave_like "an image request"
27
+ end
28
+ context "when asking for an image with image options" do
29
+ let(:fields) {"a,b,c"}
30
+ let(:callback) { "my_callback" }
31
+ let(:timeout) { 4200 }
32
+ let(:subject) { image.single_image url: url, timeout: timeout, callback: callback, fields: fields }
33
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, timeout: timeout, callback: callback, fields: fields}).to_return(single_response) }
34
+ it_should_behave_like "an image request"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffbotSimple::V2
4
+ describe Product do
5
+ let(:client) { Client.new token: token }
6
+ let(:url) { "http://foo.bar" }
7
+ let(:single_response) { { body: {url: url, foo: "bar"}.to_json} }
8
+ let(:product) { client.product }
9
+ let(:api_url) { "#{base_url}/product" }
10
+ shared_examples_for "a product request" do
11
+ before(:each) { stubbed_request }
12
+ it "should make a valid request to the product api" do
13
+ subject
14
+ expect(stubbed_request).to have_been_requested
15
+ end
16
+ it "should return the response body as an symbolized hash" do
17
+ expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
18
+ end
19
+ it "should respond and return the apis url in to_crawl_api_url" do
20
+ expect(product.to_crawl_api_url).to eql api_url
21
+ end
22
+ end
23
+ context "when asking for a product with no options" do
24
+ let(:subject) { product.single_product url: url}
25
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url}).to_return(single_response) }
26
+ it_should_behave_like "a product request"
27
+ end
28
+ context "when asking for a product with product options" do
29
+ let(:fields) {"a,b,c"}
30
+ let(:callback) { "my_callback" }
31
+ let(:timeout) { 4200 }
32
+ let(:subject) { product.single_product url: url, timeout: timeout, callback: callback, fields: fields }
33
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, timeout: timeout, callback: callback, fields: fields}).to_return(single_response) }
34
+ it_should_behave_like "a product request"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,28 @@
1
+ require 'diffbot_simple'
2
+ require 'webmock/rspec'
3
+ require 'coveralls'
4
+ Coveralls.wear!
5
+ # This file was generated by the `rspec --init` command. Conventionally, all
6
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
7
+ # Require this file using `require "spec_helper"` to ensure that it is only
8
+ # loaded once.
9
+ #
10
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
11
+ RSpec.configure do |config|
12
+ config.treat_symbols_as_metadata_keys_with_true_values = true
13
+ config.run_all_when_everything_filtered = true
14
+ config.filter_run :focus
15
+
16
+ # Run specs in random order to surface order dependencies. If you find an
17
+ # order dependency and want to debug it, you can fix the order by providing
18
+ # the seed, which is printed after each run.
19
+ # --seed 1234
20
+ config.order = 'random'
21
+ end
22
+
23
+ def base_url
24
+ "http://api.diffbot.com/v2"
25
+ end
26
+ def token
27
+ "TestToken"
28
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffbotSimple::V2
4
+ describe Article do
5
+ let(:client) { Client.new token: token }
6
+ let(:url) { "http://foo.bar" }
7
+ let(:article) { client.article }
8
+ let(:api_url) { "#{base_url}/article" }
9
+ let(:single_article_response) do
10
+ {
11
+ body:
12
+ { :type=>"article", :icon=>"http://www.diffbot.com/favicon.ico", :title=>"Diffbot's New Product API Teaches Robots to Shop Online", :author=>"John Davi"}.to_json
13
+ }
14
+ end
15
+
16
+ shared_examples_for "an article request" do
17
+ before(:each) { stubbed_request }
18
+ it "should make a valid request to the article api" do
19
+ subject
20
+ expect(stubbed_request).to have_been_requested
21
+ end
22
+ it "should return the response body as an symbolized hash" do
23
+ expect(subject).to eql JSON.parse(single_article_response[:body], symbolize_names: true)
24
+ end
25
+ it "should respond and return the apis url in to_crawl_api_url" do
26
+ expect(article.to_crawl_api_url).to eql api_url
27
+ end
28
+ end
29
+ context "when asking for a single article with no additional options" do
30
+ let(:subject) { article.single_article url: url }
31
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url }).to_return(single_article_response) }
32
+ it_should_behave_like "an article request"
33
+ end
34
+
35
+ context "when asking for a single article with some additional options" do
36
+ let(:subject) { article.single_article url: url, fields: "meta,querystring,images(*)" }
37
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, fields: "meta,querystring,images(*)" }).to_return(single_article_response) }
38
+ it_should_behave_like "an article request"
39
+ end
40
+ context "when asking for a single article with custom headers" do
41
+ let(:subject) { article.single_article url: url, custom_headers: { "X-Forward-User-Agent" => "I AM CHROME" } }
42
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/article").with(query: {token: token, url: url }, headers: { "X-Forward-User-Agent" => "I AM CHROME" }).to_return(single_article_response) }
43
+ it_should_behave_like "an article request"
44
+ end
45
+
46
+ context "when posting a body directly to analyze" do
47
+ let(:body) { "<html><fake><body>" }
48
+ let(:subject) { article.single_article url: url, body: body }
49
+ let(:stubbed_request) { stub_request(:post, "#{base_url}/article").with(query: {token: token, url: url }, body: body).to_return(single_article_response) }
50
+ it_should_behave_like "an article request"
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,20 @@
1
+ require 'spec_helper'
2
+ module DiffbotSimple::V2
3
+ describe Client do
4
+ let(:subject) { Client.new token: "FOO_TOKEN" }
5
+ context "when initializing with a token" do
6
+ it "should not raise an error" do
7
+ expect{subject}.to_not raise_error
8
+ end
9
+ it "should respond to crawlbot" do
10
+ expect(subject).to respond_to :crawlbot
11
+ end
12
+ end
13
+ context 'when initializing without token' do
14
+ let(:subject) { Client.new }
15
+ it "should raise an ArgumentError" do
16
+ expect{subject}.to raise_error ArgumentError
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,113 @@
1
+ require 'spec_helper'
2
+ module DiffbotSimple::V2
3
+ describe Crawlbot do
4
+ let(:client) { Client.new token: token }
5
+ let(:custom) { client.custom name: "my_custom_api" }
6
+ let(:single_crawl_response_body) {{body: '{"jobs":[{"foo":"bar"}]}'}}
7
+ let(:name) { "crawl_name"}
8
+ let(:subject) { client.crawlbot }
9
+ context "when retreiving all crawls" do
10
+ let(:all) { stubbed_request;subject.all; }
11
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/crawl").with(query: {token: token}).to_return( single_crawl_response_body) }
12
+ it "should make a request to /crawl with the token as argument" do
13
+ all
14
+ expect(stubbed_request).to have_been_requested
15
+ end
16
+ it "should return an crawl array " do
17
+ expect(all).to eql([{ foo: 'bar' }])
18
+ end
19
+ end
20
+ context "when asking for a named crawl" do
21
+ let(:named_crawl) { stubbed_request; subject.single_crawl name: name, onlyProcessIfNew: 0, apiUrl: custom }
22
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, onlyProcessIfNew: 0, apiUrl: custom.to_crawl_api_url}).to_return single_crawl_response_body() }
23
+ it "should make a request to /crawl with the token and name as arguments" do
24
+ named_crawl
25
+ expect(stubbed_request).to have_been_requested
26
+ end
27
+ it "should return an crawl hash" do
28
+ expect(named_crawl).to eql({ foo: 'bar' })
29
+ end
30
+ end
31
+ context "when deleting a named crawl" do
32
+ let(:delete) { stubbed_request; subject.delete name: name }
33
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, delete: 1 }).to_return(body: '{"response":"Successfully deleted job." }') }
34
+ it "should make the request to delete it" do
35
+ delete
36
+ expect(stubbed_request).to have_been_requested
37
+ end
38
+ it "should return the faked response" do
39
+ expect(delete).to eql({response: "Successfully deleted job."})
40
+ end
41
+ end
42
+ context "when pausing or unpausing a named crawl" do
43
+ let(:pause) { stubbed_pause_request; subject.pause name: name }
44
+ let(:unpause) { stubbed_unpause_request; subject.unpause name: name }
45
+ let(:stubbed_pause_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, pause: 1 }).to_return(single_crawl_response_body) }
46
+ let(:stubbed_unpause_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, pause: 0 }).to_return(single_crawl_response_body) }
47
+ it "should make the request to pause it" do
48
+ pause
49
+ expect(stubbed_pause_request).to have_been_requested
50
+ end
51
+ it "should make the request to unpause it" do
52
+ unpause
53
+ expect(stubbed_unpause_request).to have_been_requested
54
+ end
55
+ end
56
+ context "when restarting a named crawl" do
57
+ let(:restart) { stubbed_request; subject.restart name: name }
58
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, restart: 1 }).to_return(single_crawl_response_body) }
59
+ it "should make the request to restart it" do
60
+ restart
61
+ expect(stubbed_request).to have_been_requested
62
+ end
63
+ end
64
+ context "when requesting a crawls result" do
65
+ let(:result) { stubbed_crawl_request;stubbed_result_request; subject.result name: name }
66
+ let(:test_download_url) { "http://google.com" }
67
+ let(:stubbed_crawl_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token}).to_return(body: "{\"jobs\":[{\"downloadJson\":\"#{test_download_url}\"}]}") }
68
+ let(:stubbed_result_request) { stub_request(:get, test_download_url).to_return(body: "[{'f':'b'}]") }
69
+ it "should make the two requests to get the results" do
70
+ result
71
+ expect(stubbed_crawl_request).to have_been_requested
72
+ expect(stubbed_result_request).to have_been_requested
73
+ end
74
+ end
75
+ describe "if diffbots response is an error" do
76
+ let(:error_from_diffbot) { { error: "Your token has exceeded the allowed number of calls, or has otherwise been throttled for API abuse.", errorCode: 429 }.to_json }
77
+ let(:stubbed_request) { stub_request(:get, /#{base_url}\/crawl*/).to_return(body: error_from_diffbot) }
78
+ shared_examples_for "an error" do
79
+ it "and raise an DiffbotError" do
80
+ expect{raiser}.to raise_error DiffbotError
81
+ end
82
+ end
83
+ context "on all" do
84
+ let(:raiser) { stubbed_request;subject.all; }
85
+ it_should_behave_like "an error"
86
+ end
87
+ context "on single_crawl" do
88
+ let(:raiser) { stubbed_request;subject.single_crawl name: name; }
89
+ it_should_behave_like "an error"
90
+ end
91
+ context "on delete" do
92
+ let(:raiser) { stubbed_request;subject.delete name: name; }
93
+ it_should_behave_like "an error"
94
+ end
95
+ context "on pause" do
96
+ let(:raiser) { stubbed_request;subject.pause name: name; }
97
+ it_should_behave_like "an error"
98
+ end
99
+ context "on unpause" do
100
+ let(:raiser) { stubbed_request;subject.unpause name: name; }
101
+ it_should_behave_like "an error"
102
+ end
103
+ context "on restart" do
104
+ let(:raiser) { stubbed_request;subject.restart name: name; }
105
+ it_should_behave_like "an error"
106
+ end
107
+ context "on result" do
108
+ let(:raiser) { stubbed_request;subject.result name: name; }
109
+ it_should_behave_like "an error"
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,36 @@
1
+
2
+ require 'spec_helper'
3
+
4
+ module DiffbotSimple::V2
5
+ describe Custom do
6
+ let(:client) { Client.new token: token }
7
+ let(:url) { "http://foo.bar" }
8
+ let(:single_response) { { body: {url: url, foo: "bar"}.to_json} }
9
+ let(:custom_name) { "foobar" }
10
+ let(:api_url) { "#{base_url}/api/#{CGI::escape(custom_name)}" }
11
+ let(:custom) { client.custom name: custom_name }
12
+ shared_examples_for "a custom request" do
13
+ before(:each) { stubbed_request }
14
+ it "should make a valid request to the custom api" do
15
+ subject
16
+ expect(stubbed_request).to have_been_requested
17
+ end
18
+ it "should return the response body as an symbolized hash" do
19
+ expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
20
+ end
21
+ it "should respond and return the apis url in to_crawl_api_url" do
22
+ expect(custom.to_crawl_api_url).to eql api_url
23
+ end
24
+ end
25
+ context "when asking for a custom api with no options" do
26
+ let(:subject) { custom.single_custom url: url }
27
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url }).to_return(single_response) }
28
+ it_should_behave_like "a custom request"
29
+ end
30
+ context "when asking for a custom api with custom options" do
31
+ let(:subject) { custom.single_custom url: url, timeout: 12000, callback: "my_callback" }
32
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, timeout: 12000, callback: "my_callback" }).to_return(single_response) }
33
+ it_should_behave_like "a custom request"
34
+ end
35
+ end
36
+ end
metadata ADDED
@@ -0,0 +1,180 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: diffbot_simple
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Lars Krantz
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: autotest-standalone
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: webmock
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.17'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.17'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rest-core
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.1'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.1'
97
+ - !ruby/object:Gem::Dependency
98
+ name: multi_json
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description:
112
+ email:
113
+ - lars.krantz@alaz.se
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".autotest"
119
+ - ".gitignore"
120
+ - ".rspec"
121
+ - ".travis.yml"
122
+ - Gemfile
123
+ - LICENSE
124
+ - README.md
125
+ - Rakefile
126
+ - diffbot_simple.gemspec
127
+ - lib/diffbot_simple.rb
128
+ - lib/diffbot_simple/symbolize.rb
129
+ - lib/diffbot_simple/v2/analyze.rb
130
+ - lib/diffbot_simple/v2/api_client.rb
131
+ - lib/diffbot_simple/v2/api_helper.rb
132
+ - lib/diffbot_simple/v2/article.rb
133
+ - lib/diffbot_simple/v2/client.rb
134
+ - lib/diffbot_simple/v2/crawlbot.rb
135
+ - lib/diffbot_simple/v2/custom.rb
136
+ - lib/diffbot_simple/v2/diffbot_error.rb
137
+ - lib/diffbot_simple/v2/image.rb
138
+ - lib/diffbot_simple/v2/product.rb
139
+ - lib/diffbot_simple/version.rb
140
+ - spec/analyze_spec.rb
141
+ - spec/image_spec.rb
142
+ - spec/product_spec.rb
143
+ - spec/spec_helper.rb
144
+ - spec/v2/article_spec.rb
145
+ - spec/v2/client_spec.rb
146
+ - spec/v2/crawlbot_spec.rb
147
+ - spec/v2/custom_spec.rb
148
+ homepage: ''
149
+ licenses:
150
+ - MIT
151
+ metadata: {}
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - "~>"
159
+ - !ruby/object:Gem::Version
160
+ version: '2.0'
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubyforge_project:
168
+ rubygems_version: 2.2.2
169
+ signing_key:
170
+ specification_version: 4
171
+ summary: A simple, nothing-fancy, helper for the Diffbot API
172
+ test_files:
173
+ - spec/analyze_spec.rb
174
+ - spec/image_spec.rb
175
+ - spec/product_spec.rb
176
+ - spec/spec_helper.rb
177
+ - spec/v2/article_spec.rb
178
+ - spec/v2/client_spec.rb
179
+ - spec/v2/crawlbot_spec.rb
180
+ - spec/v2/custom_spec.rb