diffbot_simple 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 25486e768597bc81f971624d2c26575f8c4a787e
4
+ data.tar.gz: 57f2933c6f76d5c2e1c61144d86be56f1d4645fd
5
+ SHA512:
6
+ metadata.gz: 6effb80f0a1d27ded4133eb17287335f0a4e0203b63453745bd48cde2ba5c50a9a47805d015a96279cfd280a7b56d9131dfa15a7cc7c23cf7cbcfa39ea972ea7
7
+ data.tar.gz: 205b68c3ef70977c1570c8511324390edfe021d24e2cbcdf723fa65e77298e2055a22e8ad3645a118118cbd9600beb6506c1918d071b44b54ee67cdd8022ff2f
@@ -0,0 +1,3 @@
1
+ Autotest.add_hook :initialize do |at|
2
+ %w{.git coverage}.each {|exception| at.add_exception(exception)}
3
+ end
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+ Gemfile.lock
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
@@ -0,0 +1,4 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.1.0
4
+ - 2.0.0
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+ gem 'coveralls', require: false
3
+ # Specify your gem's dependencies in diffbot_simple.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 Lars Krantz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,109 @@
1
+ [![Coverage Status](https://coveralls.io/repos/larskrantz/diffbot_simple/badge.png)](https://coveralls.io/r/larskrantz/diffbot_simple)
2
+ [![Build Status](https://travis-ci.org/larskrantz/diffbot_simple.png?branch=master)](https://travis-ci.org/larskrantz/diffbot_simple)
3
+ [![Code Climate](https://codeclimate.com/github/larskrantz/diffbot_simple.png)](https://codeclimate.com/github/larskrantz/diffbot_simple)
4
+
5
+ DiffbotSimple
6
+ =============
7
+
8
+ A simple, nothing-fancy, helper for the [Diffbot API](http://www.diffbot.com/).
9
+
10
+ Will not objectify any responses, just pass on the json data as hash with symbolized keys.
11
+ One exception to that rule, when using CrawlBot and requesting a single_crawl, it will return the single item in the :jobs-array, and when requesting all, it will return the array in :jobs.
12
+ Send options to the api as named args, se usage below with article and fields-argument.
13
+
14
+ ## Installation
15
+ ```ruby
16
+ gem 'diffbot-simple'
17
+ ```
18
+
19
+ ## Dependencies
20
+ * Ruby 2.0 or 2.1
21
+ * [rest-core](https://github.com/cardinalblue/rest-core)
22
+ * [multi_json](https://github.com/intridea/multi_json)
23
+
24
+
25
+ ## Usage
26
+ ```ruby
27
+ require 'diffbot_simple'
28
+
29
+ token = "my_diffbot_assigned_token"
30
+ client = DiffbotSimple::V2::Client.new token: token
31
+
32
+ article = client.article
33
+ url = "http://www.xconomy.com/san-francisco/2012/07/25/diffbot-is-using-computer-vision-to-reinvent-the-semantic-web/"
34
+ # Pass on diffbot parameters as options to the call
35
+ diffbot_response_as_symbolized_hash = article.single_article url: url, fields: "icon,title"
36
+ # =>
37
+ {
38
+ icon: "http://www.xconomy.com/wordpress/wp-content/themes/xconomy/images/favicon.ico",
39
+ author: "Wade Roush",
40
+ date: "7/25/12",
41
+ text: "...",
42
+ # and more, see http://www.diffbot.com/products/automatic/article/
43
+ }
44
+ ```
45
+
46
+ ### Supports these Diffbot apis
47
+ Please see [Diffbot Help and Documentation](http://www.diffbot.com/dev/docs/) for details and arguments.
48
+ Check the spec-directory too.
49
+
50
+ ```ruby
51
+ require 'diffbot_simple'
52
+
53
+ token = "my_diffbot_assigned_token"
54
+ client = DiffbotSimple::V2::Client.new token: token
55
+ url = "http://some_url_to_check"
56
+
57
+ # Custom API
58
+ custom = client.custom name: "my_custom_api_name"
59
+ response = custom.single_custom url: url
60
+
61
+ # Analyze API (beta)
62
+ analysis = client.analyze
63
+ response = analyze.single_analysis url: url
64
+
65
+ # Article API
66
+ article = client.article
67
+ response = article.single_article url: url
68
+
69
+ # Image API
70
+ image = client.image
71
+ response = image.single_image url: url
72
+
73
+ # Product API
74
+ product = client.product
75
+ response = product.single_product url: url
76
+
77
+ # Crawlbot API
78
+ crawlbot = client.crawlbot
79
+ all_my_crawls = crawlbot.all
80
+ current_settings = crawlbot.single_crawl name: "my_crawl"
81
+ # shorthand for using apiUrl, use the api object from client,
82
+ # it will create a correct value for you
83
+ # (custom, image, article, product or analyze for automatic)
84
+ # A call to single_crawl will create if not exists or update settings
85
+ settings = crawlbot.single_crawl name: "my_new_crawl", onlyProcessIfNew: 0, seeds: "http://www.upptec.se", apiUrl: custom
86
+ crawlbot.pause name: "my_new_crawl"
87
+ crawlbot.unpause name: "my_new_crawl"
88
+ crawlbot.restart name: "my_new_crawl"
89
+ result = crawlbot.result "my_new_crawl" # shorthand for downloading the json that are specifed in :downloadJson
90
+ crawlbot.delete name: "my_new_crawl"
91
+ ```
92
+
93
+ ### On error
94
+ If Diffbot returns an error, it will raise and fill `DiffbotSimple::V2::DiffbotError` with passed on info, as stated in [http://www.diffbot.com/dev/docs/error/](http://www.diffbot.com/dev/docs/error/) and put errorCode in `:error_code` and error in `:message` .
95
+
96
+ ## TODO
97
+ * Frontpage API
98
+ * Bulk API
99
+ * Async http fetching
100
+ * Batch API
101
+
102
+ ## Contributing
103
+
104
+ 1. Fork it
105
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
106
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
107
+ 4. Push to the branch (`git push origin my-new-feature`)
108
+ 5. Create new Pull Request
109
+
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'diffbot_simple/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "diffbot_simple"
8
+ spec.version = DiffbotSimple::VERSION
9
+ spec.authors = ["Lars Krantz"]
10
+ spec.email = ["lars.krantz@alaz.se"]
11
+ spec.summary = %q{A simple, nothing-fancy, helper for the Diffbot API}
12
+ # spec.description = %q{}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.required_ruby_version = "~> 2.0"
17
+
18
+ spec.files = `git ls-files -z`.split("\x0")
19
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
20
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.5"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec"
26
+ spec.add_development_dependency "autotest-standalone"
27
+ spec.add_development_dependency "webmock", "~> 1.17"
28
+
29
+ spec.add_runtime_dependency "rest-core", "~> 2.1"
30
+ spec.add_runtime_dependency "multi_json"
31
+
32
+ end
@@ -0,0 +1,14 @@
1
+ require 'diffbot_simple/version'
2
+ require 'diffbot_simple/symbolize'
3
+ require 'diffbot_simple/v2/diffbot_error'
4
+ require 'diffbot_simple/v2/api_helper'
5
+ require 'diffbot_simple/v2/client'
6
+ require 'diffbot_simple/v2/crawlbot'
7
+ require 'diffbot_simple/v2/article'
8
+ require 'diffbot_simple/v2/custom'
9
+ require 'diffbot_simple/v2/product'
10
+ require 'diffbot_simple/v2/image'
11
+ require 'diffbot_simple/v2/analyze'
12
+
13
+ module DiffbotSimple
14
+ end
@@ -0,0 +1,29 @@
1
+ module DiffbotSimple
2
+ module Symbolize
3
+ private
4
+ def y_combinator(&f)
5
+ lambda do |g|
6
+ f.call {|*args| g[g][*args]}
7
+ end.tap {|g| break g[g]}
8
+ end
9
+
10
+ def symbolize hash
11
+ return hash unless hash.kind_of? Hash or hash.kind_of? Array
12
+ sym_hash = y_combinator do |&f|
13
+ lambda do |h|
14
+ if h.kind_of? Array
15
+ h.map {|r| f.call(r)}
16
+ else
17
+ h.reduce({}) do |memo,(k,v)|
18
+ v = f.call(v) if v.kind_of? Hash
19
+ v = v.map {|u| f.call(u)} if v.kind_of? Array
20
+ memo[k.to_sym] = v
21
+ memo
22
+ end
23
+ end
24
+ end
25
+ end
26
+ sym_hash.call hash
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,18 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/analyze
3
+ class Analyze
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :analyze
7
+ end
8
+ def to_crawl_api_url
9
+ default = super
10
+ "#{default}?mode=auto"
11
+ end
12
+ def single_analysis url: nil, **options
13
+ raise ArgumentError.new "Must pass an url to fetch" unless url
14
+ execute_call options.merge(url: url)
15
+ end
16
+ alias :single_analyze :single_analysis
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ require 'multi_json'
2
+ require 'rest-core'
3
+ require 'rest-client'
4
+ module DiffbotSimple::V2
5
+ ApiClient = RestCore::Builder.client do
6
+ use RestCore::DefaultSite , 'http://api.diffbot.com/v2/'
7
+ use RestCore::JsonResponse, true
8
+ use RestCore::DefaultHeaders, {}
9
+ end
10
+ end
@@ -0,0 +1,44 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/crawl/
3
+ module ApiHelper
4
+ include DiffbotSimple::Symbolize
5
+ def initialize api_client: nil, token: nil
6
+ @api_client = api_client
7
+ @token = token
8
+ post_initialize
9
+ end
10
+ def post_initialize
11
+ raise "Must overload to set api path"
12
+ end
13
+ def to_crawl_api_url
14
+ "#{api_client.site}#{api}"
15
+ end
16
+ private
17
+ attr_reader :token, :api_client, :api
18
+ def execute_call custom_headers: nil, method: :get, payload: nil, **options
19
+ args = create_from_options options
20
+ opts = {}
21
+ opts[:headers] = custom_headers if custom_headers
22
+ response = api_client.get(api, args, opts) if method == :get
23
+ response = api_client.post(api, payload, args, opts) if method == :post
24
+ cleanup response
25
+ end
26
+ def cleanup response
27
+ result_hash = symbolize response
28
+ raise_if_error_response result_hash
29
+ result_hash
30
+ end
31
+ def create_from_options options
32
+ merged = options.merge({token: token})
33
+ merged[:apiUrl] = expand_api_url merged[:apiUrl] if merged[:apiUrl]
34
+ merged
35
+ end
36
+ def expand_api_url api_url
37
+ api_url.to_crawl_api_url if api_url.respond_to?(:to_crawl_api_url)
38
+ end
39
+ def raise_if_error_response result_from_diffbot
40
+ return unless result_from_diffbot[:error]
41
+ raise DiffbotError.new(result_from_diffbot[:error], result_from_diffbot[:errorCode])
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,18 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/article
3
+ class Article
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :article
7
+ end
8
+ def single_article url: nil, custom_headers: nil, body: nil, **options
9
+ raise ArgumentError.new "Must pass an url for the article api to fetch" unless url
10
+ if body
11
+ custom_headers ||= {}
12
+ custom_headers['Content-Type'] = 'text/html'
13
+ options[:method] = :post
14
+ end
15
+ execute_call options.merge(url: url, custom_headers: custom_headers, payload: body)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,30 @@
1
+ require_relative 'api_client'
2
+ module DiffbotSimple::V2
3
+ class Client
4
+ def initialize token: nil
5
+ raise ArgumentError.new("Must supply developer token") if token.to_s.empty?
6
+ @token = token
7
+ @api_client = ApiClient.new
8
+ end
9
+ def crawlbot
10
+ Crawlbot.new api_client: api_client, token: token
11
+ end
12
+ def article
13
+ Article.new api_client: api_client, token: token
14
+ end
15
+ def custom name: nil
16
+ Custom.new api_client: api_client, token: token, name: name
17
+ end
18
+ def product
19
+ Product.new api_client: api_client, token: token
20
+ end
21
+ def image
22
+ Image.new api_client: api_client, token: token
23
+ end
24
+ def analyze
25
+ Analyze.new api_client: api_client, token: token
26
+ end
27
+ private
28
+ attr_reader :token, :api_client
29
+ end
30
+ end
@@ -0,0 +1,75 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/crawl/
3
+ class Crawlbot
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :crawl
7
+ end
8
+ # Get all your crawls as an array
9
+ # The "jobs" parameter is stripped and only the array is returned
10
+ #
11
+ # @return [Array] your jobs from the "jobs"-array in api response
12
+ def all
13
+ execute_call()[:jobs]
14
+ end
15
+
16
+ # Gets, creates or updates a named crawl
17
+ #
18
+ # @name [String] name of the crawl to get/create/update
19
+ # @**options options from http://www.diffbot.com/dev/docs/crawl/ when updating or creating a crawl
20
+ # @return [Hash] with current parameters for the single crawl
21
+ def single_crawl name: nil, **options
22
+ raise ArgumentError.new "Must pass a name for the crawl" unless name
23
+ response = execute_call options.merge(name: name)
24
+ jobs = response[:jobs]
25
+ jobs.first
26
+ end
27
+
28
+ # Deletes a crawl
29
+ #
30
+ # @name [String] name of crawl to delete
31
+ # @return [Hash] statusmessage from diffbot, for example: {response: "Successfully deleted job."}
32
+ def delete name: nil
33
+ raise ArgumentError.new "Must pass a name for the crawl to delete" unless name
34
+ execute_call name: name, delete: 1
35
+ end
36
+
37
+ # Pauses a crawl
38
+ #
39
+ # @name [String] name of the crawl to pause
40
+ # @return [Hash] with current parameters for the single crawl
41
+ def pause name: nil
42
+ single_crawl name: name, pause: 1
43
+ end
44
+
45
+ # Unpauses/ resumes a crawl
46
+ #
47
+ # @name [String] name of the crawl to unpause
48
+ # @return [Hash] with current parameters for the single crawl
49
+ def unpause name: nil
50
+ single_crawl name: name, pause: 0
51
+ end
52
+
53
+ # Restarts a crawl
54
+ #
55
+ # @name [String] name of the crawl to restart
56
+ # @return [Hash] with current parameters for the crawl in jobs-key (as an array), and a response-text
57
+ def restart name: nil
58
+ raise ArgumentError.new "Must pass a name for the crawl to restart" unless name
59
+ execute_call name: name, restart: 1
60
+ end
61
+
62
+ # Get the crawl-result (downloadJson from diffbot crawl)
63
+ #
64
+ # @name [String] name of the crawl to restart
65
+ # @return [Array] of results (hashes)
66
+ def result name: name
67
+ crawl = single_crawl name: name
68
+ download_url = crawl[:downloadJson]
69
+ response = api_client.get download_url
70
+ symbolize response
71
+ end
72
+
73
+
74
+ end
75
+ end
@@ -0,0 +1,19 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/custom
3
+ class Custom
4
+ include ApiHelper
5
+ attr_reader :name
6
+ def initialize name: nil, **options
7
+ raise ArgumentError.new "Must pass a name for the custom api" unless name
8
+ @name = name
9
+ super options
10
+ end
11
+ def post_initialize
12
+ @api = "api/#{CGI::escape(name)}"
13
+ end
14
+ def single_custom url: nil, **options
15
+ raise ArgumentError.new "Must pass an url for the custom api to fetch" unless url
16
+ execute_call options.merge(url: url)
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,9 @@
1
+ module DiffbotSimple::V2
2
+ class DiffbotError < StandardError
3
+ attr_reader :error_code
4
+ def initialize error_message, error_code = nil
5
+ @error_code = error_code
6
+ super error_message
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,13 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/image
3
+ class Image
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :image
7
+ end
8
+ def single_image url: nil, **options
9
+ raise ArgumentError.new "Must pass an url to fetch" unless url
10
+ execute_call options.merge(url: url)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,13 @@
1
+ module DiffbotSimple::V2
2
+ # Complies to http://www.diffbot.com/dev/docs/product
3
+ class Product
4
+ include ApiHelper
5
+ def post_initialize
6
+ @api = :product
7
+ end
8
+ def single_product url: nil, **options
9
+ raise ArgumentError.new "Must pass an url to fetch" unless url
10
+ execute_call options.merge(url: url)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module DiffbotSimple
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffbotSimple::V2
4
+ describe Analyze do
5
+ let(:client) { Client.new token: token }
6
+ let(:url) { "http://foo.bar" }
7
+ let(:single_response) { { body: {url: url, foo: "bar"}.to_json} }
8
+ let(:analyze) { client.analyze }
9
+ let(:api_url) { "#{base_url}/analyze" }
10
+ shared_examples_for "an analyze request" do
11
+ before(:each) { stubbed_request }
12
+ it "should make a valid request to the analyze api" do
13
+ subject
14
+ expect(stubbed_request).to have_been_requested
15
+ end
16
+ it "should return the response body as an symbolized hash" do
17
+ expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
18
+ end
19
+ it "should respond and return the apis url in to_crawl_api_url" do
20
+ expect(analyze.to_crawl_api_url).to eql "#{api_url}?mode=auto"
21
+ end
22
+ end
23
+ context "when asking for an analyze with no options" do
24
+ let(:subject) { analyze.single_analysis url: url}
25
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url}).to_return(single_response) }
26
+ it_should_behave_like "an analyze request"
27
+ end
28
+ context "when asking for an analyze with analyze options" do
29
+ let(:fields) {"a,b,c"}
30
+ let(:mode) { "article" }
31
+ let(:stats) { true }
32
+ let(:subject) { analyze.single_analyze url: url, stats: stats, mode: mode, fields: fields }
33
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, stats: stats.to_s, mode: mode, fields: fields}).to_return(single_response) }
34
+ it_should_behave_like "an analyze request"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffbotSimple::V2
4
+ describe Image do
5
+ let(:client) { Client.new token: token }
6
+ let(:url) { "http://foo.bar" }
7
+ let(:single_response) { { body: {url: url, foo: "bar"}.to_json} }
8
+ let(:image) { client.image }
9
+ let(:api_url) { "#{base_url}/image" }
10
+ shared_examples_for "an image request" do
11
+ before(:each) { stubbed_request }
12
+ it "should make a valid request to the image api" do
13
+ subject
14
+ expect(stubbed_request).to have_been_requested
15
+ end
16
+ it "should return the response body as an symbolized hash" do
17
+ expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
18
+ end
19
+ it "should respond and return the apis url in to_crawl_api_url" do
20
+ expect(image.to_crawl_api_url).to eql api_url
21
+ end
22
+ end
23
+ context "when asking for an image with no options" do
24
+ let(:subject) { image.single_image url: url}
25
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url}).to_return(single_response) }
26
+ it_should_behave_like "an image request"
27
+ end
28
+ context "when asking for an image with image options" do
29
+ let(:fields) {"a,b,c"}
30
+ let(:callback) { "my_callback" }
31
+ let(:timeout) { 4200 }
32
+ let(:subject) { image.single_image url: url, timeout: timeout, callback: callback, fields: fields }
33
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, timeout: timeout, callback: callback, fields: fields}).to_return(single_response) }
34
+ it_should_behave_like "an image request"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffbotSimple::V2
4
+ describe Product do
5
+ let(:client) { Client.new token: token }
6
+ let(:url) { "http://foo.bar" }
7
+ let(:single_response) { { body: {url: url, foo: "bar"}.to_json} }
8
+ let(:product) { client.product }
9
+ let(:api_url) { "#{base_url}/product" }
10
+ shared_examples_for "a product request" do
11
+ before(:each) { stubbed_request }
12
+ it "should make a valid request to the product api" do
13
+ subject
14
+ expect(stubbed_request).to have_been_requested
15
+ end
16
+ it "should return the response body as an symbolized hash" do
17
+ expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
18
+ end
19
+ it "should respond and return the apis url in to_crawl_api_url" do
20
+ expect(product.to_crawl_api_url).to eql api_url
21
+ end
22
+ end
23
+ context "when asking for a product with no options" do
24
+ let(:subject) { product.single_product url: url}
25
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url}).to_return(single_response) }
26
+ it_should_behave_like "a product request"
27
+ end
28
+ context "when asking for a product with product options" do
29
+ let(:fields) {"a,b,c"}
30
+ let(:callback) { "my_callback" }
31
+ let(:timeout) { 4200 }
32
+ let(:subject) { product.single_product url: url, timeout: timeout, callback: callback, fields: fields }
33
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, timeout: timeout, callback: callback, fields: fields}).to_return(single_response) }
34
+ it_should_behave_like "a product request"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,28 @@
1
+ require 'diffbot_simple'
2
+ require 'webmock/rspec'
3
+ require 'coveralls'
4
+ Coveralls.wear!
5
+ # This file was generated by the `rspec --init` command. Conventionally, all
6
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
7
+ # Require this file using `require "spec_helper"` to ensure that it is only
8
+ # loaded once.
9
+ #
10
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
11
+ RSpec.configure do |config|
12
+ config.treat_symbols_as_metadata_keys_with_true_values = true
13
+ config.run_all_when_everything_filtered = true
14
+ config.filter_run :focus
15
+
16
+ # Run specs in random order to surface order dependencies. If you find an
17
+ # order dependency and want to debug it, you can fix the order by providing
18
+ # the seed, which is printed after each run.
19
+ # --seed 1234
20
+ config.order = 'random'
21
+ end
22
+
23
+ def base_url
24
+ "http://api.diffbot.com/v2"
25
+ end
26
+ def token
27
+ "TestToken"
28
+ end
@@ -0,0 +1,53 @@
1
+ require 'spec_helper'
2
+
3
+ module DiffbotSimple::V2
4
+ describe Article do
5
+ let(:client) { Client.new token: token }
6
+ let(:url) { "http://foo.bar" }
7
+ let(:article) { client.article }
8
+ let(:api_url) { "#{base_url}/article" }
9
+ let(:single_article_response) do
10
+ {
11
+ body:
12
+ { :type=>"article", :icon=>"http://www.diffbot.com/favicon.ico", :title=>"Diffbot's New Product API Teaches Robots to Shop Online", :author=>"John Davi"}.to_json
13
+ }
14
+ end
15
+
16
+ shared_examples_for "an article request" do
17
+ before(:each) { stubbed_request }
18
+ it "should make a valid request to the article api" do
19
+ subject
20
+ expect(stubbed_request).to have_been_requested
21
+ end
22
+ it "should return the response body as an symbolized hash" do
23
+ expect(subject).to eql JSON.parse(single_article_response[:body], symbolize_names: true)
24
+ end
25
+ it "should respond and return the apis url in to_crawl_api_url" do
26
+ expect(article.to_crawl_api_url).to eql api_url
27
+ end
28
+ end
29
+ context "when asking for a single article with no additional options" do
30
+ let(:subject) { article.single_article url: url }
31
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url }).to_return(single_article_response) }
32
+ it_should_behave_like "an article request"
33
+ end
34
+
35
+ context "when asking for a single article with some additional options" do
36
+ let(:subject) { article.single_article url: url, fields: "meta,querystring,images(*)" }
37
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, fields: "meta,querystring,images(*)" }).to_return(single_article_response) }
38
+ it_should_behave_like "an article request"
39
+ end
40
+ context "when asking for a single article with custom headers" do
41
+ let(:subject) { article.single_article url: url, custom_headers: { "X-Forward-User-Agent" => "I AM CHROME" } }
42
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/article").with(query: {token: token, url: url }, headers: { "X-Forward-User-Agent" => "I AM CHROME" }).to_return(single_article_response) }
43
+ it_should_behave_like "an article request"
44
+ end
45
+
46
+ context "when posting a body directly to analyze" do
47
+ let(:body) { "<html><fake><body>" }
48
+ let(:subject) { article.single_article url: url, body: body }
49
+ let(:stubbed_request) { stub_request(:post, "#{base_url}/article").with(query: {token: token, url: url }, body: body).to_return(single_article_response) }
50
+ it_should_behave_like "an article request"
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,20 @@
1
+ require 'spec_helper'
2
+ module DiffbotSimple::V2
3
+ describe Client do
4
+ let(:subject) { Client.new token: "FOO_TOKEN" }
5
+ context "when initializing with a token" do
6
+ it "should not raise an error" do
7
+ expect{subject}.to_not raise_error
8
+ end
9
+ it "should respond to crawlbot" do
10
+ expect(subject).to respond_to :crawlbot
11
+ end
12
+ end
13
+ context 'when initializing without token' do
14
+ let(:subject) { Client.new }
15
+ it "should raise an ArgumentError" do
16
+ expect{subject}.to raise_error ArgumentError
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,113 @@
1
+ require 'spec_helper'
2
+ module DiffbotSimple::V2
3
+ describe Crawlbot do
4
+ let(:client) { Client.new token: token }
5
+ let(:custom) { client.custom name: "my_custom_api" }
6
+ let(:single_crawl_response_body) {{body: '{"jobs":[{"foo":"bar"}]}'}}
7
+ let(:name) { "crawl_name"}
8
+ let(:subject) { client.crawlbot }
9
+ context "when retreiving all crawls" do
10
+ let(:all) { stubbed_request;subject.all; }
11
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/crawl").with(query: {token: token}).to_return( single_crawl_response_body) }
12
+ it "should make a request to /crawl with the token as argument" do
13
+ all
14
+ expect(stubbed_request).to have_been_requested
15
+ end
16
+ it "should return an crawl array " do
17
+ expect(all).to eql([{ foo: 'bar' }])
18
+ end
19
+ end
20
+ context "when asking for a named crawl" do
21
+ let(:named_crawl) { stubbed_request; subject.single_crawl name: name, onlyProcessIfNew: 0, apiUrl: custom }
22
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, onlyProcessIfNew: 0, apiUrl: custom.to_crawl_api_url}).to_return single_crawl_response_body() }
23
+ it "should make a request to /crawl with the token and name as arguments" do
24
+ named_crawl
25
+ expect(stubbed_request).to have_been_requested
26
+ end
27
+ it "should return an crawl hash" do
28
+ expect(named_crawl).to eql({ foo: 'bar' })
29
+ end
30
+ end
31
+ context "when deleting a named crawl" do
32
+ let(:delete) { stubbed_request; subject.delete name: name }
33
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, delete: 1 }).to_return(body: '{"response":"Successfully deleted job." }') }
34
+ it "should make the request to delete it" do
35
+ delete
36
+ expect(stubbed_request).to have_been_requested
37
+ end
38
+ it "should return the faked response" do
39
+ expect(delete).to eql({response: "Successfully deleted job."})
40
+ end
41
+ end
42
+ context "when pausing or unpausing a named crawl" do
43
+ let(:pause) { stubbed_pause_request; subject.pause name: name }
44
+ let(:unpause) { stubbed_unpause_request; subject.unpause name: name }
45
+ let(:stubbed_pause_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, pause: 1 }).to_return(single_crawl_response_body) }
46
+ let(:stubbed_unpause_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, pause: 0 }).to_return(single_crawl_response_body) }
47
+ it "should make the request to pause it" do
48
+ pause
49
+ expect(stubbed_pause_request).to have_been_requested
50
+ end
51
+ it "should make the request to unpause it" do
52
+ unpause
53
+ expect(stubbed_unpause_request).to have_been_requested
54
+ end
55
+ end
56
+ context "when restarting a named crawl" do
57
+ let(:restart) { stubbed_request; subject.restart name: name }
58
+ let(:stubbed_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token, restart: 1 }).to_return(single_crawl_response_body) }
59
+ it "should make the request to restart it" do
60
+ restart
61
+ expect(stubbed_request).to have_been_requested
62
+ end
63
+ end
64
+ context "when requesting a crawls result" do
65
+ let(:result) { stubbed_crawl_request;stubbed_result_request; subject.result name: name }
66
+ let(:test_download_url) { "http://google.com" }
67
+ let(:stubbed_crawl_request) { stub_request(:get, "#{base_url}/crawl").with(query: { name: name, token: token}).to_return(body: "{\"jobs\":[{\"downloadJson\":\"#{test_download_url}\"}]}") }
68
+ let(:stubbed_result_request) { stub_request(:get, test_download_url).to_return(body: "[{'f':'b'}]") }
69
+ it "should make the two requests to get the results" do
70
+ result
71
+ expect(stubbed_crawl_request).to have_been_requested
72
+ expect(stubbed_result_request).to have_been_requested
73
+ end
74
+ end
75
+ describe "if diffbots response is an error" do
76
+ let(:error_from_diffbot) { { error: "Your token has exceeded the allowed number of calls, or has otherwise been throttled for API abuse.", errorCode: 429 }.to_json }
77
+ let(:stubbed_request) { stub_request(:get, /#{base_url}\/crawl*/).to_return(body: error_from_diffbot) }
78
+ shared_examples_for "an error" do
79
+ it "and raise an DiffbotError" do
80
+ expect{raiser}.to raise_error DiffbotError
81
+ end
82
+ end
83
+ context "on all" do
84
+ let(:raiser) { stubbed_request;subject.all; }
85
+ it_should_behave_like "an error"
86
+ end
87
+ context "on single_crawl" do
88
+ let(:raiser) { stubbed_request;subject.single_crawl name: name; }
89
+ it_should_behave_like "an error"
90
+ end
91
+ context "on delete" do
92
+ let(:raiser) { stubbed_request;subject.delete name: name; }
93
+ it_should_behave_like "an error"
94
+ end
95
+ context "on pause" do
96
+ let(:raiser) { stubbed_request;subject.pause name: name; }
97
+ it_should_behave_like "an error"
98
+ end
99
+ context "on unpause" do
100
+ let(:raiser) { stubbed_request;subject.unpause name: name; }
101
+ it_should_behave_like "an error"
102
+ end
103
+ context "on restart" do
104
+ let(:raiser) { stubbed_request;subject.restart name: name; }
105
+ it_should_behave_like "an error"
106
+ end
107
+ context "on result" do
108
+ let(:raiser) { stubbed_request;subject.result name: name; }
109
+ it_should_behave_like "an error"
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,36 @@
1
+
2
+ require 'spec_helper'
3
+
4
+ module DiffbotSimple::V2
5
+ describe Custom do
6
+ let(:client) { Client.new token: token }
7
+ let(:url) { "http://foo.bar" }
8
+ let(:single_response) { { body: {url: url, foo: "bar"}.to_json} }
9
+ let(:custom_name) { "foobar" }
10
+ let(:api_url) { "#{base_url}/api/#{CGI::escape(custom_name)}" }
11
+ let(:custom) { client.custom name: custom_name }
12
+ shared_examples_for "a custom request" do
13
+ before(:each) { stubbed_request }
14
+ it "should make a valid request to the custom api" do
15
+ subject
16
+ expect(stubbed_request).to have_been_requested
17
+ end
18
+ it "should return the response body as an symbolized hash" do
19
+ expect(subject).to eql JSON.parse(single_response[:body], symbolize_names: true)
20
+ end
21
+ it "should respond and return the apis url in to_crawl_api_url" do
22
+ expect(custom.to_crawl_api_url).to eql api_url
23
+ end
24
+ end
25
+ context "when asking for a custom api with no options" do
26
+ let(:subject) { custom.single_custom url: url }
27
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url }).to_return(single_response) }
28
+ it_should_behave_like "a custom request"
29
+ end
30
+ context "when asking for a custom api with custom options" do
31
+ let(:subject) { custom.single_custom url: url, timeout: 12000, callback: "my_callback" }
32
+ let(:stubbed_request) { stub_request(:get, api_url).with(query: {token: token, url: url, timeout: 12000, callback: "my_callback" }).to_return(single_response) }
33
+ it_should_behave_like "a custom request"
34
+ end
35
+ end
36
+ end
metadata ADDED
@@ -0,0 +1,180 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: diffbot_simple
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Lars Krantz
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-02-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: autotest-standalone
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: webmock
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.17'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.17'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rest-core
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.1'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.1'
97
+ - !ruby/object:Gem::Dependency
98
+ name: multi_json
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description:
112
+ email:
113
+ - lars.krantz@alaz.se
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".autotest"
119
+ - ".gitignore"
120
+ - ".rspec"
121
+ - ".travis.yml"
122
+ - Gemfile
123
+ - LICENSE
124
+ - README.md
125
+ - Rakefile
126
+ - diffbot_simple.gemspec
127
+ - lib/diffbot_simple.rb
128
+ - lib/diffbot_simple/symbolize.rb
129
+ - lib/diffbot_simple/v2/analyze.rb
130
+ - lib/diffbot_simple/v2/api_client.rb
131
+ - lib/diffbot_simple/v2/api_helper.rb
132
+ - lib/diffbot_simple/v2/article.rb
133
+ - lib/diffbot_simple/v2/client.rb
134
+ - lib/diffbot_simple/v2/crawlbot.rb
135
+ - lib/diffbot_simple/v2/custom.rb
136
+ - lib/diffbot_simple/v2/diffbot_error.rb
137
+ - lib/diffbot_simple/v2/image.rb
138
+ - lib/diffbot_simple/v2/product.rb
139
+ - lib/diffbot_simple/version.rb
140
+ - spec/analyze_spec.rb
141
+ - spec/image_spec.rb
142
+ - spec/product_spec.rb
143
+ - spec/spec_helper.rb
144
+ - spec/v2/article_spec.rb
145
+ - spec/v2/client_spec.rb
146
+ - spec/v2/crawlbot_spec.rb
147
+ - spec/v2/custom_spec.rb
148
+ homepage: ''
149
+ licenses:
150
+ - MIT
151
+ metadata: {}
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - "~>"
159
+ - !ruby/object:Gem::Version
160
+ version: '2.0'
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubyforge_project:
168
+ rubygems_version: 2.2.2
169
+ signing_key:
170
+ specification_version: 4
171
+ summary: A simple, nothing-fancy, helper for the Diffbot API
172
+ test_files:
173
+ - spec/analyze_spec.rb
174
+ - spec/image_spec.rb
175
+ - spec/product_spec.rb
176
+ - spec/spec_helper.rb
177
+ - spec/v2/article_spec.rb
178
+ - spec/v2/client_spec.rb
179
+ - spec/v2/crawlbot_spec.rb
180
+ - spec/v2/custom_spec.rb