biffbot 0.0.3 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +33 -0
- data/.travis.yml +25 -0
- data/README.md +31 -9
- data/Rakefile +23 -1
- data/biffbot.gemspec +17 -13
- data/lib/biffbot.rb +9 -5
- data/lib/biffbot/analyze.rb +9 -0
- data/lib/biffbot/article.rb +9 -0
- data/lib/biffbot/base.rb +38 -36
- data/lib/biffbot/bulk.rb +75 -0
- data/lib/biffbot/custom.rb +10 -0
- data/lib/biffbot/image.rb +9 -0
- data/lib/biffbot/product.rb +9 -0
- data/lib/biffbot/version.rb +1 -1
- data/spec/lib/supported/analyze_spec.rb +32 -0
- data/spec/lib/supported/article_spec.rb +28 -0
- data/spec/lib/supported/image_spec.rb +28 -0
- data/spec/lib/supported/product_spec.rb +33 -0
- data/spec/lib/unsupported/bulk_spec.rb +23 -0
- data/spec/lib/unsupported/custom_spec.rb +28 -0
- data/spec/spec_helper.rb +1 -2
- metadata +83 -17
- data/spec/biffbot_spec.rb +0 -21
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MzkzZWQzN2MyODFkMmNiZjU4MTU5NWM5ZTk3ZTI4MWM2ODBkMDU5OA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
Yzg4ZTdhMjI0MzM2NmMxMTVhMjA2MTIxOTUzZTMwYTdkOWRmZTQ0NA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZDNmMWQxZWY5YmJhMmQ0YzRiMjZlMTNkMDY1Yjg1OTJkNmYwYzQ4Yzc1MGJl
|
10
|
+
NzYwYjc4MzU4NWFmNjNhOWQ4MDIzNjliZjJkZTJmM2U2Y2Y5ZDI0OThiYTkw
|
11
|
+
MTA2ZjIxNTAxYjcwYzg2Yzc2ZWM4NWQ0ZjQ2ZWFmM2FlYWVhYzE=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
Nzg5ZjY0MDFmYmMyYzlhNjhlY2FjM2U0ZmQ2NWFjNTNlZjg1ODczNjEzOTkw
|
14
|
+
MGVkMzE5ODNkYzM4MTFhM2Q3OTNhNGY1YzMzMDI5MGQxYzFmNjU5MzY1YTJl
|
15
|
+
YmE2YTk4ODIxNjg2YjNhYWMzODg4YzBmMjc0MWZkZGYyYTU0NzU=
|
data/.gitignore
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
Encoding:
|
2
|
+
Enabled: false
|
3
|
+
|
4
|
+
Style/HashSyntax:
|
5
|
+
Enabled: true
|
6
|
+
|
7
|
+
Style/MethodLength:
|
8
|
+
Enabled: false
|
9
|
+
|
10
|
+
LineLength:
|
11
|
+
Enabled: false
|
12
|
+
|
13
|
+
MethodDefParentheses:
|
14
|
+
EnforcedStyle: require_no_parentheses
|
15
|
+
|
16
|
+
SpaceInsideHashLiteralBraces:
|
17
|
+
EnforcedStyle: no_space
|
18
|
+
|
19
|
+
SignalException:
|
20
|
+
EnforcedStyle: only_raise
|
21
|
+
|
22
|
+
Documentation:
|
23
|
+
Enabled: false
|
24
|
+
|
25
|
+
ClassAndModuleChildren:
|
26
|
+
Enabled: false
|
27
|
+
|
28
|
+
# This rule doesn't work so well with multi argument exception ctors
|
29
|
+
RaiseArgs:
|
30
|
+
Enabled: false
|
31
|
+
|
32
|
+
EachWithObject:
|
33
|
+
Enabled: false
|
data/.travis.yml
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
language: ruby
|
2
|
+
cache: bundler
|
3
|
+
rvm:
|
4
|
+
- ruby-1.9.3
|
5
|
+
- ruby-2.0.0
|
6
|
+
- ruby-2.1
|
7
|
+
- ruby-head
|
8
|
+
script: bundle exec rake test
|
9
|
+
notifications:
|
10
|
+
email:
|
11
|
+
recipients:
|
12
|
+
- anurag@mohanty.io
|
13
|
+
on_success:
|
14
|
+
- always|never|change
|
15
|
+
on_failure:
|
16
|
+
- always|never|change
|
17
|
+
deploy:
|
18
|
+
provider: rubygems
|
19
|
+
api_key:
|
20
|
+
secure: rzBfMCE2wPxagwWN6ttk5IwCdnX5cI+QRDN9gBBjCWOsqn8cZQDzEfBs1S8zM79idbGY9bV62N8Dr5SgLitYc71YN8HnvNFBg9jjAlvu6ytS4o9L3LroS/cPPpDCgerqTli7Ol9XSjvN0KKHAeFyOZds8qSrn1vvqELnMDn9dOc=
|
21
|
+
gem: biffbot
|
22
|
+
on:
|
23
|
+
tags: true
|
24
|
+
all_branches: true
|
25
|
+
repo: tevren/biffbot
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# Biffbot
|
2
|
+
[](https://travis-ci.org/tevren/biffbot) [](http://badge.fury.io/rb/biffbot) [](https://codeclimate.com/github/tevren/biffbot)
|
2
3
|
|
3
|
-
Ruby gem to connect to diffbot's
|
4
|
+
Ruby gem to connect to diffbot's APIs
|
4
5
|
|
5
6
|
## Installation
|
6
7
|
|
@@ -19,19 +20,40 @@ Or install it yourself as:
|
|
19
20
|
## Usage
|
20
21
|
|
21
22
|
Usage example:
|
23
|
+
OLD DEPCRECATED METHOD:
|
22
24
|
|
23
|
-
|
25
|
+
require 'biffbot'
|
26
|
+
token = YOUR_DEVELOPER_TOKEN
|
27
|
+
biff = Biffbot::Base.new(token)
|
24
28
|
article = biff.parse("the url you want to parse",options)
|
25
29
|
|
26
|
-
|
30
|
+
NEW METHOD:
|
27
31
|
|
28
|
-
|
32
|
+
require 'biffbot'
|
33
|
+
token = YOUR_DEVELOPER_TOKEN
|
34
|
+
analyze = Biffbot::Analyze.new(token, url, {:type => 'article'})
|
35
|
+
article = Biffbot::Article.new(@token,url,some_hash_of_options)
|
29
36
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
37
|
+
|
38
|
+
The available classes are:
|
39
|
+
* Biffbot::Analyze
|
40
|
+
* Biffbot::Article
|
41
|
+
* Biffbot::Image
|
42
|
+
* Biffbot::Product
|
43
|
+
* Untested Classes (I don't have a paid account)
|
44
|
+
|
45
|
+
* Biffbot::Bulk
|
46
|
+
* Biffbot::Custom
|
47
|
+
|
48
|
+
## Testing
|
49
|
+
|
50
|
+
1. create a .env file in the following format:
|
51
|
+
|
52
|
+
DIFFBOT_DEV_TOKEN=YOUR_DIFFBOT_TOKEN
|
53
|
+
|
54
|
+
2. run the tests via rake
|
55
|
+
|
56
|
+
rake test
|
35
57
|
|
36
58
|
## Contributing
|
37
59
|
|
data/Rakefile
CHANGED
@@ -1 +1,23 @@
|
|
1
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rspec/core/rake_task'
|
3
|
+
require 'dotenv/tasks'
|
4
|
+
|
5
|
+
desc 'Run supported specs'
|
6
|
+
RSpec::Core::RakeTask.new(:supported) do |task|
|
7
|
+
task.pattern = 'spec/lib/supported/*_spec.rb'
|
8
|
+
task.rspec_opts = ['--color', '--format', 'progress']
|
9
|
+
end
|
10
|
+
|
11
|
+
desc 'Run supported specs'
|
12
|
+
RSpec::Core::RakeTask.new(:unsupported) do |task|
|
13
|
+
task.pattern = 'spec/lib/unsupported/*_spec.rb'
|
14
|
+
task.rspec_opts = ['--color', '--format', 'progress']
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'Run rubocop tests'
|
18
|
+
task :rubocop do
|
19
|
+
sh "bundle exec rubocop -c #{File.dirname(__FILE__)}/.rubocop.yml #{File.dirname(__FILE__)}"
|
20
|
+
end
|
21
|
+
|
22
|
+
task test: [:dotenv, :supported, :rubocop]
|
23
|
+
task default: [:test, :release]
|
data/biffbot.gemspec
CHANGED
@@ -4,19 +4,23 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'biffbot/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |gem|
|
7
|
-
gem.name =
|
7
|
+
gem.name = 'biffbot'
|
8
8
|
gem.version = Biffbot::VERSION
|
9
|
-
gem.authors = [
|
10
|
-
gem.email = [
|
11
|
-
gem.description =
|
12
|
-
gem.summary =
|
13
|
-
gem.homepage =
|
9
|
+
gem.authors = ['Anurag Mohanty']
|
10
|
+
gem.email = ['tevren@gmail.com']
|
11
|
+
gem.description = 'Ruby gem to connect to diffbot\'s article api'
|
12
|
+
gem.summary = 'Given a url, pulls article content using diffbot\'s article extractor'
|
13
|
+
gem.homepage = 'https://github.com/tevren/biffbot'
|
14
14
|
|
15
|
-
gem.files = `git ls-files`.split(
|
16
|
-
gem.executables = gem.files.grep(
|
17
|
-
gem.test_files = gem.files.grep(
|
18
|
-
gem.require_paths = [
|
19
|
-
gem.add_dependency(
|
20
|
-
gem.add_dependency(
|
21
|
-
gem.
|
15
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
16
|
+
gem.executables = gem.files.grep(/^bin\//) { |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(/^(test|spec|features)\//)
|
18
|
+
gem.require_paths = ['lib']
|
19
|
+
gem.add_dependency('httparty')
|
20
|
+
gem.add_dependency('json', '~> 1.8')
|
21
|
+
gem.add_dependency('hashie', '~> 3.3')
|
22
|
+
gem.add_development_dependency('rspec')
|
23
|
+
gem.add_development_dependency('rake')
|
24
|
+
gem.add_development_dependency('rubocop')
|
25
|
+
gem.add_development_dependency('dotenv')
|
22
26
|
end
|
data/lib/biffbot.rb
CHANGED
@@ -1,12 +1,16 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'biffbot/version'
|
2
|
+
require 'biffbot/analyze'
|
3
|
+
require 'biffbot/article'
|
4
|
+
require 'biffbot/base'
|
5
|
+
require 'biffbot/bulk'
|
6
|
+
require 'biffbot/custom'
|
7
|
+
require 'biffbot/image'
|
8
|
+
require 'biffbot/product'
|
9
|
+
|
3
10
|
module Biffbot
|
4
|
-
# Your code goes here...
|
5
11
|
def self.reset!
|
6
12
|
@token = nil
|
7
13
|
end
|
8
14
|
class << self
|
9
|
-
#Your Diffbot API token.
|
10
|
-
attr_accessor :token
|
11
15
|
end
|
12
16
|
end
|
data/lib/biffbot/base.rb
CHANGED
@@ -1,40 +1,42 @@
|
|
1
1
|
require 'httparty'
|
2
2
|
require 'json'
|
3
|
-
|
3
|
+
require 'hashie'
|
4
|
+
require 'cgi'
|
4
5
|
module Biffbot
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
6
|
+
class Base < Hash
|
7
|
+
include Hashie::Extensions::Coercion
|
8
|
+
def parse token = '', type = 'article', url = '', options = {}
|
9
|
+
url = parse_options(options, generate_url(CGI.escape(url), token, type, options[:version]))
|
10
|
+
JSON.parse(HTTParty.get(url).body).each_pair do |key, value|
|
11
|
+
self[key] = value
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def generate_url url, token, type, version
|
16
|
+
case type
|
17
|
+
when 'analyze'
|
18
|
+
url = "http://api.diffbot.com/v3/#{type}?token=#{token}&url=#{url}"
|
19
|
+
when 'custom'
|
20
|
+
url = "http://api.diffbot.com/v3/#{options[:api_name]}?token=#{token}&url=#{url}"
|
21
|
+
when 'article', 'image', 'product'
|
22
|
+
url = "http://api.diffbot.com/v2/#{type}?token=#{token}&url=#{url}"
|
23
|
+
url = "http://api.diffbot.com/#{version}/#{type}?token=#{token}&url=#{url}" if version == 'v2' || version == 'v3'
|
24
|
+
end
|
25
|
+
url
|
26
|
+
end
|
27
|
+
|
28
|
+
def parse_options options = {}, request = ''
|
29
|
+
options.each do |opt, value|
|
30
|
+
case opt
|
31
|
+
when :timeout, :paging, :mode
|
32
|
+
request += "&#{opt}=#{value}"
|
33
|
+
when :callback, :stats
|
34
|
+
request += "&#{opt}"
|
35
|
+
when :fields
|
36
|
+
request += "&#{opt}=" + value.join(',') if value.is_a?(Array)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
request
|
40
|
+
end
|
41
|
+
end
|
40
42
|
end
|
data/lib/biffbot/bulk.rb
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'httparty'
|
2
|
+
require 'json'
|
3
|
+
require 'hashie'
|
4
|
+
require 'cgi'
|
5
|
+
module Biffbot
|
6
|
+
class Bulk < Base
|
7
|
+
include Hashie::Extensions::Coercion
|
8
|
+
def initialize token
|
9
|
+
@token = token
|
10
|
+
end
|
11
|
+
|
12
|
+
def create_job name, api_type, urls = [], options = {}
|
13
|
+
api_url = "http://api.diffbot.com/v2/#{api_type}"
|
14
|
+
api_url = "http://api.diffbot.com/#{options[:version]}/#{api_type}" if options[:version] == 'v2' || options[:version] == 'v3'
|
15
|
+
api_url = parse_options(options, api_url)
|
16
|
+
endpoint = 'http://api.diffbot.com/v3/bulk'
|
17
|
+
post_body = generate_post_body(name, api_url, urls, options)
|
18
|
+
JSON.parse(HTTParty.post(endpoint, body: post_body.to_json, headers: {'Content-Type' => 'application/json'}).body).each_pair do |k, v|
|
19
|
+
self[k] = v
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def generate_post_body name, api_url, urls = [], options = {}
|
24
|
+
post_body = {token: @token, name: name, apiUrl: api_url, urls: urls}
|
25
|
+
options.each do |key, value|
|
26
|
+
next unless %w(notifyEmail maxRounds notifyWebHook pageProcessPattern).include?(key.to_s)
|
27
|
+
post_body[key] = value
|
28
|
+
end
|
29
|
+
post_body
|
30
|
+
end
|
31
|
+
|
32
|
+
def pause name
|
33
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}&pause=1"
|
34
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
35
|
+
self[key] = value
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def unpause name
|
40
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}&pause=0"
|
41
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
42
|
+
self[key] = value
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def restart name
|
47
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}&restart=1"
|
48
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
49
|
+
self[key] = value
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def delete name
|
54
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}&delete=1"
|
55
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
56
|
+
self[key] = value
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def retrieve_data jobName, _options = {}
|
61
|
+
# TODO: add support for csv
|
62
|
+
endpoint = "http://api.diffbot.com/v3/bulk/download/#{@token}-#{jobName}_data.json"
|
63
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
64
|
+
self[key] = value
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def view name
|
69
|
+
endpoint = "http://api.diffbot.com/v3/bulk/?token=#{@token}&name=#{name}"
|
70
|
+
JSON.parse(HTTParty.get(endpoint).body).each_pair do |key, value|
|
71
|
+
self[key] = value
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
data/lib/biffbot/version.rb
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# more tests to come...
|
4
|
+
describe Biffbot::Analyze do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html'
|
8
|
+
@article = Biffbot::Analyze.new(token, url, type: 'article')
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'analyze an article' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@article['type'].should include('article')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an human language field' do
|
17
|
+
@article['humanLanguage'].should_not be_nil
|
18
|
+
@article['humanLanguage'].should be_a_kind_of(String)
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should have an url' do
|
22
|
+
@article['objects'].first['pageUrl'].should_not be_nil
|
23
|
+
@article['objects'].first['pageUrl'].should be_a_kind_of(String)
|
24
|
+
@article['objects'].first['pageUrl'].should include('http')
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should have some text' do
|
28
|
+
@article['objects'].first['text'].should_not be_nil
|
29
|
+
@article['objects'].first['text'].should be_a_kind_of(String)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently tests v2 article api
|
4
|
+
describe Biffbot::Article do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html'
|
8
|
+
@article = Biffbot::Article.new(token, url)
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'Biffbot::Article.parse' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@article['type'].should include('article')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an url' do
|
17
|
+
@article['url'].should_not be_nil
|
18
|
+
@article['url'].should be_a_kind_of(String)
|
19
|
+
@article['url'].should include('http')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should have the default fields for an article' do
|
23
|
+
%w(icon title text html date author images videos).each do |field|
|
24
|
+
@article[field].should_not be_nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently tests v2 image api
|
4
|
+
describe Biffbot::Image do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html'
|
8
|
+
@image = Biffbot::Image.new(token, url)
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'Biffbot::Image.parse' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@image['type'].should include('image')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an url' do
|
17
|
+
@image['url'].should_not be_nil
|
18
|
+
@image['url'].should be_a_kind_of(String)
|
19
|
+
@image['url'].should include('http')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should have the default fields for an image' do
|
23
|
+
%w(type title images url).each do |field|
|
24
|
+
@image[field].should_not be_nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently tests v2 product api
|
4
|
+
describe Biffbot::Product do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.amazon.com/Google-Chromecast-Streaming-Media-Player/dp/B00DR0PDNE/'
|
8
|
+
@product = Biffbot::Product.new(token, url)
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'Biffbot::Product.parse' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@product['type'].should include('product')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an url' do
|
17
|
+
@product['url'].should_not be_nil
|
18
|
+
@product['url'].should be_a_kind_of(String)
|
19
|
+
@product['url'].should include('http')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should have an offer price' do
|
23
|
+
@product['offerPrice'].should_not be_nil
|
24
|
+
@product['offerPrice'].should be_a_kind_of(String)
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should have the default fields for an image' do
|
28
|
+
%w(title text leafPage offerPrice images type products url).each do |field|
|
29
|
+
@product[field].should_not be_nil
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently tests v2 bulk api
|
4
|
+
describe Biffbot::Bulk do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
@bulk = Biffbot::Bulk.new(token)
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'should create a bulk job' do
|
11
|
+
urls = ['http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/18/100000-prisoners-are-in-for-low-level-drug-offenses-obama-just-gave-relief-to-8/',
|
12
|
+
'http://www.wired.com/2013/01/4th-amendment-chest-trial/']
|
13
|
+
@bulk.create_job('test', 'article', urls).should_not be_nil
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should view the bulk job' do
|
17
|
+
@bulk.view('test').should_not be_nil
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'should delete the bulk job' do
|
21
|
+
@bulk.delete('test').should_not be_nil
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
# currently breaks until i figure out how to create a custom api
|
4
|
+
describe Biffbot::Custom do
|
5
|
+
before :each do
|
6
|
+
token = ENV['DIFFBOT_DEV_TOKEN']
|
7
|
+
url = 'http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html'
|
8
|
+
@article = Biffbot::Custom.new(token, url, 'article')
|
9
|
+
end
|
10
|
+
|
11
|
+
describe 'Biffbot::Article.parse' do
|
12
|
+
it 'should have a type of article' do
|
13
|
+
@article['type'].should include('article')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should have an url' do
|
17
|
+
@article['url'].should_not be_nil
|
18
|
+
@article['url'].should be_a_kind_of(String)
|
19
|
+
@article['url'].should include('http')
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'should have the default fields for an article' do
|
23
|
+
%w(icon title text html date author images videos).each do |field|
|
24
|
+
@article[field].should_not be_nil
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: biffbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
5
|
-
prerelease:
|
4
|
+
version: 1.2.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Anurag Mohanty
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2014-12-22 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: httparty
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ! '>='
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -30,23 +27,34 @@ dependencies:
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: json
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
31
|
- - ~>
|
36
32
|
- !ruby/object:Gem::Version
|
37
|
-
version: 1.
|
33
|
+
version: '1.8'
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
38
|
- - ~>
|
44
39
|
- !ruby/object:Gem::Version
|
45
|
-
version: 1.
|
40
|
+
version: '1.8'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: hashie
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.3'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.3'
|
46
55
|
- !ruby/object:Gem::Dependency
|
47
56
|
name: rspec
|
48
57
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
58
|
requirements:
|
51
59
|
- - ! '>='
|
52
60
|
- !ruby/object:Gem::Version
|
@@ -54,7 +62,48 @@ dependencies:
|
|
54
62
|
type: :development
|
55
63
|
prerelease: false
|
56
64
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rake
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ! '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rubocop
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ! '>='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: dotenv
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
58
107
|
requirements:
|
59
108
|
- - ! '>='
|
60
109
|
- !ruby/object:Gem::Version
|
@@ -67,40 +116,57 @@ extensions: []
|
|
67
116
|
extra_rdoc_files: []
|
68
117
|
files:
|
69
118
|
- .gitignore
|
119
|
+
- .rubocop.yml
|
120
|
+
- .travis.yml
|
70
121
|
- Gemfile
|
71
122
|
- LICENSE.txt
|
72
123
|
- README.md
|
73
124
|
- Rakefile
|
74
125
|
- biffbot.gemspec
|
75
126
|
- lib/biffbot.rb
|
127
|
+
- lib/biffbot/analyze.rb
|
128
|
+
- lib/biffbot/article.rb
|
76
129
|
- lib/biffbot/base.rb
|
130
|
+
- lib/biffbot/bulk.rb
|
131
|
+
- lib/biffbot/custom.rb
|
132
|
+
- lib/biffbot/image.rb
|
133
|
+
- lib/biffbot/product.rb
|
77
134
|
- lib/biffbot/version.rb
|
78
|
-
- spec/
|
135
|
+
- spec/lib/supported/analyze_spec.rb
|
136
|
+
- spec/lib/supported/article_spec.rb
|
137
|
+
- spec/lib/supported/image_spec.rb
|
138
|
+
- spec/lib/supported/product_spec.rb
|
139
|
+
- spec/lib/unsupported/bulk_spec.rb
|
140
|
+
- spec/lib/unsupported/custom_spec.rb
|
79
141
|
- spec/spec_helper.rb
|
80
142
|
homepage: https://github.com/tevren/biffbot
|
81
143
|
licenses: []
|
144
|
+
metadata: {}
|
82
145
|
post_install_message:
|
83
146
|
rdoc_options: []
|
84
147
|
require_paths:
|
85
148
|
- lib
|
86
149
|
required_ruby_version: !ruby/object:Gem::Requirement
|
87
|
-
none: false
|
88
150
|
requirements:
|
89
151
|
- - ! '>='
|
90
152
|
- !ruby/object:Gem::Version
|
91
153
|
version: '0'
|
92
154
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
-
none: false
|
94
155
|
requirements:
|
95
156
|
- - ! '>='
|
96
157
|
- !ruby/object:Gem::Version
|
97
158
|
version: '0'
|
98
159
|
requirements: []
|
99
160
|
rubyforge_project:
|
100
|
-
rubygems_version:
|
161
|
+
rubygems_version: 2.4.5
|
101
162
|
signing_key:
|
102
|
-
specification_version:
|
163
|
+
specification_version: 4
|
103
164
|
summary: Given a url, pulls article content using diffbot's article extractor
|
104
165
|
test_files:
|
105
|
-
- spec/
|
166
|
+
- spec/lib/supported/analyze_spec.rb
|
167
|
+
- spec/lib/supported/article_spec.rb
|
168
|
+
- spec/lib/supported/image_spec.rb
|
169
|
+
- spec/lib/supported/product_spec.rb
|
170
|
+
- spec/lib/unsupported/bulk_spec.rb
|
171
|
+
- spec/lib/unsupported/custom_spec.rb
|
106
172
|
- spec/spec_helper.rb
|
data/spec/biffbot_spec.rb
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
require 'spec_helper'
|
2
|
-
# more tests to come...
|
3
|
-
describe Biffbot do
|
4
|
-
before :each do
|
5
|
-
@token = "YOUR TOKEN HERE"
|
6
|
-
@biffbot = Biffbot::Base.new(@token)
|
7
|
-
end
|
8
|
-
|
9
|
-
describe "#get_huffinton_post_article" do
|
10
|
-
it "not nil" do
|
11
|
-
@biffbot.parse("http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html").size.should_not be_nil
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
describe "#batch" do
|
16
|
-
it "not nil" do
|
17
|
-
articles = ["http://www.huffingtonpost.ca/2012/10/11/amanda-todd-teen-bullying-suicide-youtube_n_1959668.html", "http://www.nytimes.com/2013/02/08/business/asset-sales-help-quarterly-profit-at-times-company.html"]
|
18
|
-
@biffbot.batch(articles).size.should == 2
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|