biffbot 0.0.3 → 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +33 -0
- data/.travis.yml +25 -0
- data/README.md +31 -9
- data/Rakefile +23 -1
- data/biffbot.gemspec +17 -13
- data/lib/biffbot.rb +9 -5
- data/lib/biffbot/analyze.rb +9 -0
- data/lib/biffbot/article.rb +9 -0
- data/lib/biffbot/base.rb +38 -36
- data/lib/biffbot/bulk.rb +75 -0
- data/lib/biffbot/custom.rb +10 -0
- data/lib/biffbot/image.rb +9 -0
- data/lib/biffbot/product.rb +9 -0
- data/lib/biffbot/version.rb +1 -1
- data/spec/lib/supported/analyze_spec.rb +32 -0
- data/spec/lib/supported/article_spec.rb +28 -0
- data/spec/lib/supported/image_spec.rb +28 -0
- data/spec/lib/supported/product_spec.rb +33 -0
- data/spec/lib/unsupported/bulk_spec.rb +23 -0
- data/spec/lib/unsupported/custom_spec.rb +28 -0
- data/spec/spec_helper.rb +1 -2
- metadata +83 -17
- data/spec/biffbot_spec.rb +0 -21
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MzkzZWQzN2MyODFkMmNiZjU4MTU5NWM5ZTk3ZTI4MWM2ODBkMDU5OA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
Yzg4ZTdhMjI0MzM2NmMxMTVhMjA2MTIxOTUzZTMwYTdkOWRmZTQ0NA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZDNmMWQxZWY5YmJhMmQ0YzRiMjZlMTNkMDY1Yjg1OTJkNmYwYzQ4Yzc1MGJl
|
10
|
+
NzYwYjc4MzU4NWFmNjNhOWQ4MDIzNjliZjJkZTJmM2U2Y2Y5ZDI0OThiYTkw
|
11
|
+
MTA2ZjIxNTAxYjcwYzg2Yzc2ZWM4NWQ0ZjQ2ZWFmM2FlYWVhYzE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
Nzg5ZjY0MDFmYmMyYzlhNjhlY2FjM2U0ZmQ2NWFjNTNlZjg1ODczNjEzOTkw
|
14
|
+
MGVkMzE5ODNkYzM4MTFhM2Q3OTNhNGY1YzMzMDI5MGQxYzFmNjU5MzY1YTJl
|
15
|
+
YmE2YTk4ODIxNjg2YjNhYWMzODg4YzBmMjc0MWZkZGYyYTU0NzU=
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
Encoding:
|
2
|
+
Enabled: false
|
3
|
+
|
4
|
+
Style/HashSyntax:
|
5
|
+
Enabled: true
|
6
|
+
|
7
|
+
Style/MethodLength:
|
8
|
+
Enabled: false
|
9
|
+
|
10
|
+
LineLength:
|
11
|
+
Enabled: false
|
12
|
+
|
13
|
+
MethodDefParentheses:
|
14
|
+
EnforcedStyle: require_no_parentheses
|
15
|
+
|
16
|
+
SpaceInsideHashLiteralBraces:
|
17
|
+
EnforcedStyle: no_space
|
18
|
+
|
19
|
+
SignalException:
|
20
|
+
EnforcedStyle: only_raise
|
21
|
+
|
22
|
+
Documentation:
|
23
|
+
Enabled: false
|
24
|
+
|
25
|
+
ClassAndModuleChildren:
|
26
|
+
Enabled: false
|
27
|
+
|
28
|
+
# This rule doesn't work so well with multi argument exception ctors
|
29
|
+
RaiseArgs:
|
30
|
+
Enabled: false
|
31
|
+
|
32
|
+
EachWithObject:
|
33
|
+
Enabled: false
|
data/.travis.yml
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
language: ruby
|
2
|
+
cache: bundler
|
3
|
+
rvm:
|
4
|
+
- ruby-1.9.3
|
5
|
+
- ruby-2.0.0
|
6
|
+
- ruby-2.1
|
7
|
+
- ruby-head
|
8
|
+
script: bundle exec rake test
|
9
|
+
notifications:
|
10
|
+
email:
|
11
|
+
recipients:
|
12
|
+
- anurag@mohanty.io
|
13
|
+
on_success:
|
14
|
+
- always|never|change
|
15
|
+
on_failure:
|
16
|
+
- always|never|change
|
17
|
+
deploy:
|
18
|
+
provider: rubygems
|
19
|
+
api_key:
|
20
|
+
secure: rzBfMCE2wPxagwWN6ttk5IwCdnX5cI+QRDN9gBBjCWOsqn8cZQDzEfBs1S8zM79idbGY9bV62N8Dr5SgLitYc71YN8HnvNFBg9jjAlvu6ytS4o9L3LroS/cPPpDCgerqTli7Ol9XSjvN0KKHAeFyOZds8qSrn1vvqELnMDn9dOc=
|
21
|
+
gem: biffbot
|
22
|
+
on:
|
23
|
+
tags: true
|
24
|
+
all_branches: true
|
25
|
+
repo: tevren/biffbot
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Biffbot
|
2
|
+
[![Build Status](https://travis-ci.org/tevren/biffbot.svg?branch=master)](https://travis-ci.org/tevren/biffbot) [![Gem Version](https://badge.fury.io/rb/biffbot.svg)](http://badge.fury.io/rb/biffbot) [![Code Climate](https://codeclimate.com/github/tevren/biffbot/badges/gpa.svg)](https://codeclimate.com/github/tevren/biffbot)
|
2
3
|
|
3
|
-
Ruby gem to connect to diffbot's
|
4
|
+
Ruby gem to connect to diffbot's APIs
|
4
5
|
|
5
6
|
## Installation
|
6
7
|
|
@@ -19,19 +20,40 @@ Or install it yourself as:
|
|
19
20
|
## Usage
|
20
21
|
|
21
22
|
Usage example:
|
23
|
+
OLD DEPCRECATED METHOD:
|
22
24
|
|
23
|
-
|
25
|
+
require 'biffbot'
|
26
|
+
token = YOUR_DEVELOPER_TOKEN
|
27
|
+
biff = Biffbot::Base.new(token)
|
24
28
|
article = biff.parse("the url you want to parse",options)
|
25
29
|
|
26
|
-
|
30
|
+
NEW METHOD:
|
27
31
|
|
28
|
-
|
32
|
+
require 'biffbot'
|
33
|
+
token = YOUR_DEVELOPER_TOKEN
|
34
|
+
analyze = Biffbot::Analyze.new(token, url, {:type => 'article'})
|
35
|
+
article = Biffbot::Article.new(@token,url,some_hash_of_options)
|
29
36
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
37
|
+
|
38
|
+
The available classes are:
|
39
|
+
* Biffbot::Analyze
|
40
|
+
* Biffbot::Article
|
41
|
+
* Biffbot::Image
|
42
|
+
* Biffbot::Product
|
43
|
+
* Untested Classes (I don't have a paid account)
|
44
|
+
|
45
|
+
* Biffbot::Bulk
|
46
|
+
* Biffbot::Custom
|
47
|
+
|
48
|
+
## Testing
|
49
|
+
|
50
|
+
1. create a .env file in the following format:
|
51
|
+
|
52
|
+
DIFFBOT_DEV_TOKEN=YOUR_DIFFBOT_TOKEN
|
53
|
+
|
54
|
+
2. run the tests via rake
|
55
|
+
|
56
|
+
rake test
|
35
57
|
|
36
58
|
## Contributing
|
37
59
|
|
data/Rakefile
CHANGED
@@ -1 +1,23 @@
|
|
1
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
require 'dotenv/tasks'
|
4
|
+
|
5
|
+
desc 'Run supported specs'
|
6
|
+
RSpec::Core::RakeTask.new(:supported) do |task|
|
7
|
+
task.pattern = 'spec/lib/supported/*_spec.rb'
|
8
|
+
task.rspec_opts = ['--color', '--format', 'progress']
|
9
|
+
end
|
10
|
+
|
11
|
+
desc 'Run supported specs'
|
12
|
+
RSpec::Core::RakeTask.new(:unsupported) do |task|
|
13
|
+
task.pattern = 'spec/lib/unsupported/*_spec.rb'
|
14
|
+
task.rspec_opts = ['--color', '--format', 'progress']
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'Run rubocop tests'
|
18
|
+
task :rubocop do
|
19
|
+
sh "bundle exec rubocop -c #{File.dirname(__FILE__)}/.rubocop.yml #{File.dirname(__FILE__)}"
|
20
|
+
end
|
21
|
+
|
22
|
+
task test: [:dotenv, :supported, :rubocop]
|
23
|
+
task default: [:test, :release]
|
data/biffbot.gemspec
CHANGED
@@ -4,19 +4,23 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'biffbot/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |gem|
|
7
|
-
gem.name =
|
7
|
+
gem.name = 'biffbot'
|
8
8
|
gem.version = Biffbot::VERSION
|
9
|
-
gem.authors = [
|
10
|
-
gem.email = [
|
11
|
-
gem.description =
|
12
|
-
gem.summary =
|
13
|
-
gem.homepage =
|
9
|
+
gem.authors = ['Anurag Mohanty']
|
10
|
+
gem.email = ['tevren@gmail.com']
|
11
|
+
gem.description = 'Ruby gem to connect to diffbot\'s article api'
|
12
|
+
gem.summary = 'Given a url, pulls article content using diffbot\'s article extractor'
|
13
|
+
gem.homepage = 'https://github.com/tevren/biffbot'
|
14
14
|
|
15
|
-
gem.files = `git ls-files`.split(
|
16
|
-
gem.executables = gem.files.grep(
|
17
|
-
gem.test_files = gem.files.grep(
|
18
|
-
gem.require_paths = [
|
19
|
-
gem.add_dependency(
|
20
|
-
gem.add_dependency(
|
21
|
-
gem.
|
15
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
16
|
+
gem.executables = gem.files.grep(/^bin\//) { |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(/^(test|spec|features)\//)
|
18
|
+
gem.require_paths = ['lib']
|
19
|
+
gem.add_dependency('httparty')
|
20
|
+
gem.add_dependency('json', '~> 1.8')
|
21
|
+
gem.add_dependency('hashie', '~> 3.3')
|
22
|
+
gem.add_development_dependency('rspec')
|
23
|
+
gem.add_development_dependency('rake')
|
24
|
+
gem.add_development_dependency('rubocop')
|
25
|
+
gem.add_development_dependency('dotenv')
|
22
26
|
end
|
data/lib/biffbot.rb
CHANGED
@@ -1,12 +1,16 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'biffbot/version'
|
2
|
+
require 'biffbot/analyze'
|
3
|
+
require 'biffbot/article'
|
4
|
+
require 'biffbot/base'
|
5
|
+
require 'biffbot/bulk'
|
6
|
+
require 'biffbot/custom'
|
7
|
+
require 'biffbot/image'
|
8
|
+
require 'biffbot/product'
|
9
|
+
|
3
10
|
module Biffbot
|
4
|
-
# Your code goes here...
|
5
11
|
def self.reset!
|
6
12
|
@token = nil
|
7
13
|
end
|
8
14
|
class << self
|
9
|
-
#Your Diffbot API token.
|
10
|
-
attr_accessor :token
|
11
15
|
end
|
12
16
|
end
|
data/lib/biffbot/base.rb
CHANGED
@@ -1,40 +1,42 @@
|
|
1
1
|
require 'httparty'
|
2
2
|
require 'json'
|
3
|
-
|
3
|
+
require 'hashie'
|
4
|
+
require 'cgi'
|
4
5
|
module Biffbot
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
6
|
+
class Base < Hash
|
7
|
+
include Hashie::Extensions::Coercion
|
8
|
+
def parse token = '', type = 'article', url = '', options = {}
|
9
|
+
url = parse_options(options, generate_url(CGI.escape(url), token, type, options[:version]))
|
10
|
+
JSON.parse(HTTParty.get(url).body).each_pair do |key, value|
|
11
|
+
self[key] = value
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def generate_url url, token, type, version
|
16
|
+
case type
|
17
|
+
when 'analyze'
|
18
|
+
url = "http://api.diffbot.com/v3/#{type}?token=#{token}&url=#{url}"
|
19
|
+
when 'custom'
|
20
|
+
url = "http://api.diffbot.com/v3/#{options[:api_name]}?token=#{token}&url=#{url}"
|
21
|
+
when 'article', 'image', 'product'
|
22
|
+
url = "http://api.diffbot.com/v2/#{type}?token=#{token}&url=#{url}"
|
23
|
+
url = "http://api.diffbot.com/#{version}/#{type}?token=#{token}&url=#{url}" if version == 'v2' || version == 'v3'
|
24
|
+
end
|
25
|
+
url
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_options options = {}, request = ''
|
29
|
+
options.each do |opt, value|
|
30
|
+
case opt
|
31
|
+
when :timeout, :paging, :mode
|
32
|
+
request += "&#{opt}=#{value}"
|
33
|
+
when :callback, :stats
|
34
|
+
request += "&#{opt}"
|
35
|
+
when :fields
|
36
|
+
request += "&#{opt}=" + value.join(',') if value.is_a?(Array)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
request
|
40
|
+
end
|
41
|
+
end
|
40
42
|
end
|
data/lib/biffbot/bulk.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'httparty'
|
2
|
+
require 'json'
|
3
|
+
require 'hashie'
|
4
|
+
require 'cgi'
|
5
|
+
module Biffbot
|
6
|
+
class Bulk < Base
|
7
|
+
include Hashie::Extensions::Coercion
|
8
|
+
def initialize token
|
9
|
+
@token = token
|
10
|
+
end
|
11
|
+
|
12
|
+
def create_job name, api_type, urls = [], options = {}
|
13
|
+
api_url = "http://api.diffbot.com/v2/#{api_type}"
|
14
|
+
api_url = "http://api.diffbot.com/#{options[:version]}/#{api_type}" if options[:version] == 'v2' || options[:version] == 'v3'
|
15
|
+
api_url = parse_options(options, api_url)
|
16
|
+
endpoint = 'http://api.diffbot.com/v3/bulk'
|
17
|
+
post_body = generate_post_body(name, api_url, urls, options)
|
18
|
+
JSON.parse(HTTParty.post(endpoint, body: post_body.to_json, headers: {'Content-Type' => 'application/json'}).body).each_pair do |k, v|
|
19
|
+
self[k] = v
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def generate_post_body name, api_url, urls = [], options = {}
|
24
|
+
post_body = {token: @token, name: name, apiUrl: api_url, urls: urls}
|
25
|
+
options.each do |key, value|
|
26
|
+
next unless %w(notifyEmail maxRounds notifyWebHook pageProcessPattern).include?(key.to_s)
|
27
|
+
post_body[key] = value
|
28
|
+
end
|
29
|
+
post_body
|
30
|
+
end
|
31
|
+
|
32
|
+
def pause name
|
33
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}&pause=1"
|
34
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
35
|
+
self[key] = value
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def unpause name
|
40
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}&pause=0"
|
41
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
42
|
+
self[key] = value
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def restart name
|
47
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}&restart=1"
|
48
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
49
|
+
self[key] = value
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def delete name
|
54
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}&delete=1"
|
55
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
56
|
+
self[key] = value
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def retrieve_data jobName, _options = {}
|
61
|
+
# TODO: add support for csv
|
62
|
+
endpoint = "http://api.diffbot.com/v3/bulk/download/#{@token}-#{jobName}_data.json"
|
63
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
64
|
+
self[key] = value
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def view name
|
69
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}"
|
70
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
71
|
+
self[key] = value
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/lib/biffbot/version.rb
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# more tests to come...
|
4
|
+
describe Biffbot::Analyze do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html'
|
8
|
+
@article = Biffbot::Analyze.new(token, url, type: 'article')
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'analyze an article' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@article['type'].should include('article')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an human language field' do
|
17
|
+
@article['humanLanguage'].should_not be_nil
|
18
|
+
@article['humanLanguage'].should be_a_kind_of(String)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should have an url' do
|
22
|
+
@article['objects'].first['pageUrl'].should_not be_nil
|
23
|
+
@article['objects'].first['pageUrl'].should be_a_kind_of(String)
|
24
|
+
@article['objects'].first['pageUrl'].should include('http')
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should have some text' do
|
28
|
+
@article['objects'].first['text'].should_not be_nil
|
29
|
+
@article['objects'].first['text'].should be_a_kind_of(String)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently tests v2 article api
|
4
|
+
describe Biffbot::Article do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html'
|
8
|
+
@article = Biffbot::Article.new(token, url)
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'Biffbot::Article.parse' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@article['type'].should include('article')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an url' do
|
17
|
+
@article['url'].should_not be_nil
|
18
|
+
@article['url'].should be_a_kind_of(String)
|
19
|
+
@article['url'].should include('http')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should have the default fields for an article' do
|
23
|
+
%w(icon title text html date author images videos).each do |field|
|
24
|
+
@article[field].should_not be_nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently tests v2 image api
|
4
|
+
describe Biffbot::Image do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html'
|
8
|
+
@image = Biffbot::Image.new(token, url)
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'Biffbot::Image.parse' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@image['type'].should include('image')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an url' do
|
17
|
+
@image['url'].should_not be_nil
|
18
|
+
@image['url'].should be_a_kind_of(String)
|
19
|
+
@image['url'].should include('http')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should have the default fields for an image' do
|
23
|
+
%w(type title images url).each do |field|
|
24
|
+
@image[field].should_not be_nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently tests v2 product api
|
4
|
+
describe Biffbot::Product do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.amazon.com/Google-Chromecast-Streaming-Media-Player/dp/B00DR0PDNE/'
|
8
|
+
@product = Biffbot::Product.new(token, url)
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'Biffbot::Product.parse' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@product['type'].should include('product')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an url' do
|
17
|
+
@product['url'].should_not be_nil
|
18
|
+
@product['url'].should be_a_kind_of(String)
|
19
|
+
@product['url'].should include('http')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should have an offer price' do
|
23
|
+
@product['offerPrice'].should_not be_nil
|
24
|
+
@product['offerPrice'].should be_a_kind_of(String)
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should have the default fields for an image' do
|
28
|
+
%w(title text leafPage offerPrice images type products url).each do |field|
|
29
|
+
@product[field].should_not be_nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently tests v2 bulk api
|
4
|
+
describe Biffbot::Bulk do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
@bulk = Biffbot::Bulk.new(token)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'should create a bulk job' do
|
11
|
+
urls = ['http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/18/100000-prisoners-are-in-for-low-level-drug-offenses-obama-just-gave-relief-to-8/',
|
12
|
+
'http://www.wired.com/2013/01/4th-amendment-chest-trial/']
|
13
|
+
@bulk.create_job('test', 'article', urls).should_not be_nil
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should view the bulk job' do
|
17
|
+
@bulk.view('test').should_not be_nil
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should delete the bulk job' do
|
21
|
+
@bulk.delete('test').should_not be_nil
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently breaks until i figure out how to create a custom api
|
4
|
+
describe Biffbot::Custom do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html'
|
8
|
+
@article = Biffbot::Custom.new(token, url, 'article')
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'Biffbot::Article.parse' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@article['type'].should include('article')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an url' do
|
17
|
+
@article['url'].should_not be_nil
|
18
|
+
@article['url'].should be_a_kind_of(String)
|
19
|
+
@article['url'].should include('http')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should have the default fields for an article' do
|
23
|
+
%w(icon title text html date author images videos).each do |field|
|
24
|
+
@article[field].should_not be_nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biffbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Anurag Mohanty
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-12-22 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: httparty
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ! '>='
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -30,23 +27,34 @@ dependencies:
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: json
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
31
|
- - ~>
|
36
32
|
- !ruby/object:Gem::Version
|
37
|
-
version: 1.
|
33
|
+
version: '1.8'
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
38
|
- - ~>
|
44
39
|
- !ruby/object:Gem::Version
|
45
|
-
version: 1.
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: hashie
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.3'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.3'
|
46
55
|
- !ruby/object:Gem::Dependency
|
47
56
|
name: rspec
|
48
57
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
58
|
requirements:
|
51
59
|
- - ! '>='
|
52
60
|
- !ruby/object:Gem::Version
|
@@ -54,7 +62,48 @@ dependencies:
|
|
54
62
|
type: :development
|
55
63
|
prerelease: false
|
56
64
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rubocop
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: dotenv
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
107
|
requirements:
|
59
108
|
- - ! '>='
|
60
109
|
- !ruby/object:Gem::Version
|
@@ -67,40 +116,57 @@ extensions: []
|
|
67
116
|
extra_rdoc_files: []
|
68
117
|
files:
|
69
118
|
- .gitignore
|
119
|
+
- .rubocop.yml
|
120
|
+
- .travis.yml
|
70
121
|
- Gemfile
|
71
122
|
- LICENSE.txt
|
72
123
|
- README.md
|
73
124
|
- Rakefile
|
74
125
|
- biffbot.gemspec
|
75
126
|
- lib/biffbot.rb
|
127
|
+
- lib/biffbot/analyze.rb
|
128
|
+
- lib/biffbot/article.rb
|
76
129
|
- lib/biffbot/base.rb
|
130
|
+
- lib/biffbot/bulk.rb
|
131
|
+
- lib/biffbot/custom.rb
|
132
|
+
- lib/biffbot/image.rb
|
133
|
+
- lib/biffbot/product.rb
|
77
134
|
- lib/biffbot/version.rb
|
78
|
-
- spec/
|
135
|
+
- spec/lib/supported/analyze_spec.rb
|
136
|
+
- spec/lib/supported/article_spec.rb
|
137
|
+
- spec/lib/supported/image_spec.rb
|
138
|
+
- spec/lib/supported/product_spec.rb
|
139
|
+
- spec/lib/unsupported/bulk_spec.rb
|
140
|
+
- spec/lib/unsupported/custom_spec.rb
|
79
141
|
- spec/spec_helper.rb
|
80
142
|
homepage: https://github.com/tevren/biffbot
|
81
143
|
licenses: []
|
144
|
+
metadata: {}
|
82
145
|
post_install_message:
|
83
146
|
rdoc_options: []
|
84
147
|
require_paths:
|
85
148
|
- lib
|
86
149
|
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
-
none: false
|
88
150
|
requirements:
|
89
151
|
- - ! '>='
|
90
152
|
- !ruby/object:Gem::Version
|
91
153
|
version: '0'
|
92
154
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
-
none: false
|
94
155
|
requirements:
|
95
156
|
- - ! '>='
|
96
157
|
- !ruby/object:Gem::Version
|
97
158
|
version: '0'
|
98
159
|
requirements: []
|
99
160
|
rubyforge_project:
|
100
|
-
rubygems_version:
|
161
|
+
rubygems_version: 2.4.5
|
101
162
|
signing_key:
|
102
|
-
specification_version:
|
163
|
+
specification_version: 4
|
103
164
|
summary: Given a url, pulls article content using diffbot's article extractor
|
104
165
|
test_files:
|
105
|
-
- spec/
|
166
|
+
- spec/lib/supported/analyze_spec.rb
|
167
|
+
- spec/lib/supported/article_spec.rb
|
168
|
+
- spec/lib/supported/image_spec.rb
|
169
|
+
- spec/lib/supported/product_spec.rb
|
170
|
+
- spec/lib/unsupported/bulk_spec.rb
|
171
|
+
- spec/lib/unsupported/custom_spec.rb
|
106
172
|
- spec/spec_helper.rb
|
data/spec/biffbot_spec.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
# more tests to come...
|
3
|
-
describe Biffbot do
|
4
|
-
before :each do
|
5
|
-
@token = "YOUR TOKEN HERE"
|
6
|
-
@biffbot = Biffbot::Base.new(@token)
|
7
|
-
end
|
8
|
-
|
9
|
-
describe "#get_huffinton_post_article" do
|
10
|
-
it "not nil" do
|
11
|
-
@biffbot.parse("http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html").size.should_not be_nil
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
describe "#batch" do
|
16
|
-
it "not nil" do
|
17
|
-
articles = ["http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html", "http://www.nytimes.com/2013/02/08/business/asset-sales-help-quarterly-profit-at-times-company.html"]
|
18
|
-
@biffbot.batch(articles).size.should == 2
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|