copyscape 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +3 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +28 -0
- data/README.md +47 -0
- data/Rakefile +13 -0
- data/copyscape.gemspec +31 -0
- data/lib/copyscape.rb +22 -0
- data/lib/copyscape/request_base.rb +38 -0
- data/lib/copyscape/response.rb +70 -0
- data/lib/copyscape/text_search.rb +14 -0
- data/lib/copyscape/url_search.rb +14 -0
- data/lib/copyscape/version.rb +4 -0
- data/test/error_response.xml +5 -0
- data/test/response_test.rb +103 -0
- data/test/test_helper.rb +11 -0
- data/test/text_search_test.rb +29 -0
- data/test/url_search_response_duplicate.xml +654 -0
- data/test/url_search_response_not_duplicate.xml +6 -0
- data/test/url_search_test.rb +28 -0
- metadata +138 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
copyscape (0.0.1)
|
5
|
+
httparty (= 0.7.7)
|
6
|
+
nokogiri (= 1.4.1)
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: http://rubygems.org/
|
10
|
+
specs:
|
11
|
+
crack (0.1.8)
|
12
|
+
httparty (0.7.7)
|
13
|
+
crack (= 0.1.8)
|
14
|
+
mocha (0.9.8)
|
15
|
+
rake
|
16
|
+
nokogiri (1.4.1)
|
17
|
+
rake (0.8.7)
|
18
|
+
shoulda-context (1.0.0.beta1)
|
19
|
+
|
20
|
+
PLATFORMS
|
21
|
+
ruby
|
22
|
+
|
23
|
+
DEPENDENCIES
|
24
|
+
bundler (>= 1.0.0)
|
25
|
+
copyscape!
|
26
|
+
mocha
|
27
|
+
rake (= 0.8.7)
|
28
|
+
shoulda-context
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
copyscape
|
2
|
+
=========
|
3
|
+
|
4
|
+
Ruby wrapper for the Copyscape API.
|
5
|
+
|
6
|
+
|
7
|
+
Usage
|
8
|
+
-----
|
9
|
+
|
10
|
+
First, you'll need to setup your [Copyscape](http://www.copyscape.com/) account and whatnot. You can do
|
11
|
+
that [here](http://www.copyscape.com/signup.php?pro=1&o=f).
|
12
|
+
|
13
|
+
Next, install the gem.
|
14
|
+
|
15
|
+
$ gem install copyscape
|
16
|
+
|
17
|
+
And then you can do things like this...
|
18
|
+
|
19
|
+
Copyscape.username = 'yourname'
|
20
|
+
Copyscape.api_key = 'abc123'
|
21
|
+
|
22
|
+
# URL search
|
23
|
+
search = Copyscape.url_search("http://www.copyscape.com/example.html")
|
24
|
+
search.duplicate? # => true
|
25
|
+
search.count # => 81
|
26
|
+
search.duplicates.each do |duplicate|
|
27
|
+
puts duplciate['title']
|
28
|
+
puts duplicate['url']
|
29
|
+
puts duplicate['textsnippet']
|
30
|
+
puts duplicate['htmlsnippet']
|
31
|
+
puts duplicate['minwordsmatched']
|
32
|
+
end
|
33
|
+
|
34
|
+
# Text search
|
35
|
+
search = Copyscape.text_search('This is some text I want to check for plagurism')
|
36
|
+
search.duplicate? # => false
|
37
|
+
|
38
|
+
|
39
|
+
Currently, there is no support in the gem for "private index" searching, though
|
40
|
+
it would be pretty easy to add.
|
41
|
+
|
42
|
+
|
43
|
+
More...
|
44
|
+
-------
|
45
|
+
|
46
|
+
I'm in no way associated with Copyscape.
|
47
|
+
|
data/Rakefile
ADDED
data/copyscape.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path("../lib/copyscape/version", __FILE__)
|
3
|
+
|
4
|
+
# Nasty work around to keep the dependencies in Syck
|
5
|
+
YAML::ENGINE.yamler = 'syck'
|
6
|
+
|
7
|
+
Gem::Specification.new do |s|
|
8
|
+
s.name = "copyscape"
|
9
|
+
s.version = Copyscape::Version
|
10
|
+
s.platform = Gem::Platform::RUBY
|
11
|
+
s.authors = ['Ben VandenBos']
|
12
|
+
s.email = ['bvandenbos@gmail.com']
|
13
|
+
s.homepage = "http://github.com/bvandenbos/copyscape-rb"
|
14
|
+
s.summary = "Ruby wrapper for Copyscape API"
|
15
|
+
s.description = "Ruby wrapper for Copyscape API"
|
16
|
+
|
17
|
+
s.required_rubygems_version = ">= 1.3.6"
|
18
|
+
s.add_development_dependency "bundler", ">= 1.0.0"
|
19
|
+
|
20
|
+
s.files = `git ls-files`.split("\n")
|
21
|
+
s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
|
22
|
+
s.require_path = 'lib'
|
23
|
+
|
24
|
+
s.add_dependency("nokogiri", "1.4.1")
|
25
|
+
s.add_dependency("httparty", "0.7.7")
|
26
|
+
s.add_development_dependency("rake", "0.8.7")
|
27
|
+
s.add_development_dependency("shoulda-context")
|
28
|
+
s.add_development_dependency("mocha")
|
29
|
+
|
30
|
+
end
|
31
|
+
|
data/lib/copyscape.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'copyscape/response'
|
2
|
+
require 'copyscape/url_search'
|
3
|
+
require 'copyscape/text_search'
|
4
|
+
require 'copyscape/version'
|
5
|
+
|
6
|
+
module Copyscape
|
7
|
+
|
8
|
+
class << self
|
9
|
+
|
10
|
+
attr_accessor :username, :api_key
|
11
|
+
|
12
|
+
def url_search(url)
|
13
|
+
UrlSearch.new(url)
|
14
|
+
end
|
15
|
+
|
16
|
+
def text_search(*prms)
|
17
|
+
TextSearch.new(*prms)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
require 'httparty'
|
3
|
+
|
4
|
+
module Copyscape
|
5
|
+
|
6
|
+
class RequestBase
|
7
|
+
|
8
|
+
include HTTParty
|
9
|
+
base_uri 'http://www.copyscape.com/api'
|
10
|
+
|
11
|
+
extend Forwardable
|
12
|
+
|
13
|
+
def_delegators :response, :duplicate_count, :duplicates, :duplicate?,
|
14
|
+
:count, :error, :error?, :query, :query_words, :raw_response
|
15
|
+
|
16
|
+
def base_params
|
17
|
+
raise "You must set Copyscape.username" unless Copyscape.username
|
18
|
+
raise "You must set Copyscape.api_key" unless Copyscape.api_key
|
19
|
+
{:u => Copyscape.username,
|
20
|
+
:k => Copyscape.api_key}
|
21
|
+
end
|
22
|
+
|
23
|
+
def response
|
24
|
+
raise "@response must be set" unless @response
|
25
|
+
@response
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_response(params)
|
29
|
+
self.class.get('/', :query => base_params.merge(params)).body
|
30
|
+
end
|
31
|
+
|
32
|
+
def post_response(params)
|
33
|
+
self.class.post('/', :body => base_params.merge(params)).body
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Copyscape
|
4
|
+
|
5
|
+
class Response
|
6
|
+
|
7
|
+
attr_reader :raw_response
|
8
|
+
|
9
|
+
def initialize(buffer)
|
10
|
+
@raw_response = buffer
|
11
|
+
@document = Nokogiri(buffer)
|
12
|
+
end
|
13
|
+
|
14
|
+
def query
|
15
|
+
field('query')
|
16
|
+
end
|
17
|
+
|
18
|
+
def query_words
|
19
|
+
query_words = field('querywords')
|
20
|
+
query_words.to_i if query_words
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the number of duplicates
|
24
|
+
def count
|
25
|
+
count = field('count')
|
26
|
+
count.to_i
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns true if the response was an error
|
30
|
+
def error?
|
31
|
+
!!error
|
32
|
+
end
|
33
|
+
|
34
|
+
def error
|
35
|
+
field('error')
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returns true if there are one or more duplicates
|
39
|
+
def duplicate?
|
40
|
+
count > 0
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns an array of all the results in the form of a hash:
|
44
|
+
def duplicates
|
45
|
+
@duplicates ||= [].tap do |r|
|
46
|
+
@document.search('result').collect do |result|
|
47
|
+
r << result_to_hash(result)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
# Given a result xml element, return a hash of the values we're interested in.
|
55
|
+
def result_to_hash(result)
|
56
|
+
result.children.inject({}) do |hash, node|
|
57
|
+
hash[node.name] = node.text
|
58
|
+
hash[node.name] = node.text.to_i if node.text && node.text =~ /^\d+$/
|
59
|
+
hash
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def field(name)
|
64
|
+
node = @document.search(name).first
|
65
|
+
node.text if node
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require 'copyscape/request_base'
|
2
|
+
|
3
|
+
module Copyscape
|
4
|
+
|
5
|
+
class TextSearch < RequestBase
|
6
|
+
|
7
|
+
def initialize(text, encoding = 'UTF-8')
|
8
|
+
http_response = post_response(:o => 'csearch', :t => text, :e => encoding)
|
9
|
+
@response = Response.new(http_response)
|
10
|
+
end
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
require 'copyscape/response'
|
3
|
+
|
4
|
+
class ResponseTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
context "response" do
|
7
|
+
setup do
|
8
|
+
@response = Copyscape::Response.new(url_search_response_duplicate)
|
9
|
+
end
|
10
|
+
|
11
|
+
should "include query" do
|
12
|
+
assert_equal "http://www.copyscape.com/example.html", @response.query
|
13
|
+
end
|
14
|
+
|
15
|
+
should "include query words" do
|
16
|
+
assert_equal 1340, @response.query_words
|
17
|
+
end
|
18
|
+
|
19
|
+
should "not be an error" do
|
20
|
+
assert !@response.error?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context "duplicate" do
|
25
|
+
|
26
|
+
setup do
|
27
|
+
@response = Copyscape::Response.new(url_search_response_duplicate)
|
28
|
+
end
|
29
|
+
|
30
|
+
should "include count" do
|
31
|
+
assert_equal 81, @response.count
|
32
|
+
end
|
33
|
+
|
34
|
+
should "be a duplicate" do
|
35
|
+
assert @response.duplicate?
|
36
|
+
end
|
37
|
+
|
38
|
+
should "include duplicates" do
|
39
|
+
dup = @response.duplicates.first
|
40
|
+
assert_equal 'http://www.archives.gov/exhibits/charters/declaration_transcript.html', dup['url']
|
41
|
+
assert_equal "Declaration of Independence - Transcript", dup['title']
|
42
|
+
assert_equal "... We hold these truths to be self-evident, that all men are created equal, that they are endowed by ... But when a long train of abuses and usurpations, pursuing invariably the same ... to pass Laws of immediate and pressing importance, unless suspended in their ... for opposing with manly firmness his invasions on the rights of the people. ... English Laws in a neighbouring Province, establishing therein an Arbitrary government, ... He has plundered our seas, ravaged our Coasts, burnt our towns, ... of Cruelty & perfidy scarcely paralleled in the most barbarous ages, ... He has constrained our fellow Citizens taken Captive on the high Seas to bear ... the merciless Indian Savages, whose known rule of warfare, ... which, would inevitably interrupt our connections and correspondence. ... on the protection of divine Providence, we mutually pledge to each other our ... ",
|
43
|
+
dup['textsnippet']
|
44
|
+
assert_equal "<font color=\"#777777\">... </font><font color=\"#000000\">We hold these truths to be self-evident, that all men are created equal, that they are endowed by ... </font><font color=\"#777777\"></font><font color=\"#000000\">But when a long train of abuses and usurpations, pursuing invariably the same ... </font><font color=\"#777777\"></font><font color=\"#000000\">to pass Laws of immediate and pressing importance, unless suspended in their ... </font><font color=\"#777777\"></font><font color=\"#000000\">for opposing with manly firmness his invasions on the rights of the people. ... </font><font color=\"#777777\"></font><font color=\"#000000\">English Laws in a neighbouring Province, establishing therein an Arbitrary government, ... </font><font color=\"#777777\"></font><font color=\"#000000\">He has plundered our seas, ravaged our Coasts, burnt our towns, ... </font><font color=\"#777777\"></font><font color=\"#000000\">of Cruelty & perfidy scarcely paralleled in the most barbarous ages, ... </font><font color=\"#777777\"></font><font color=\"#000000\">He has constrained our fellow Citizens taken Captive on the high Seas to bear ... </font><font color=\"#777777\"></font><font color=\"#000000\">the merciless Indian Savages, whose known rule of warfare, ... </font><font color=\"#777777\"></font><font color=\"#000000\">which, would inevitably interrupt our connections and correspondence. ... </font><font color=\"#777777\"></font><font color=\"#000000\">on the protection of divine Providence, we mutually pledge to each other our ... </font>",
|
45
|
+
dup['htmlsnippet']
|
46
|
+
assert_equal 134, dup['minwordsmatched']
|
47
|
+
end
|
48
|
+
|
49
|
+
should "have the right amount of duplicates" do
|
50
|
+
assert_equal @response.count, @response.duplicates.length
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
context "not duplicate" do
|
56
|
+
setup do
|
57
|
+
@response = Copyscape::Response.new(url_search_response_not_duplicate)
|
58
|
+
end
|
59
|
+
|
60
|
+
should "include count" do
|
61
|
+
assert_equal 0, @response.count
|
62
|
+
end
|
63
|
+
|
64
|
+
should "be a duplicate" do
|
65
|
+
assert !@response.duplicate?
|
66
|
+
end
|
67
|
+
|
68
|
+
should "include duplicates" do
|
69
|
+
assert @response.duplicates.empty?
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
context "error" do
|
75
|
+
setup do
|
76
|
+
@response = Copyscape::Response.new(error_response)
|
77
|
+
end
|
78
|
+
|
79
|
+
should "be an error" do
|
80
|
+
assert @response.error?
|
81
|
+
end
|
82
|
+
|
83
|
+
should "return error message" do
|
84
|
+
assert_equal "connection failed (2) - please ensure you entered the URL correctly", @response.error
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def url_search_response_duplicate
|
92
|
+
File.read(File.expand_path('../url_search_response_duplicate.xml', __FILE__))
|
93
|
+
end
|
94
|
+
|
95
|
+
def url_search_response_not_duplicate
|
96
|
+
File.read(File.expand_path('../url_search_response_not_duplicate.xml', __FILE__))
|
97
|
+
end
|
98
|
+
|
99
|
+
def error_response
|
100
|
+
File.read(File.expand_path('../error_response.xml', __FILE__))
|
101
|
+
end
|
102
|
+
|
103
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class TextSearchTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
context "request_url" do
|
6
|
+
|
7
|
+
should "have the correct url" do
|
8
|
+
Copyscape::TextSearch.expects(:post).once.returns(url_search_response_duplicate).with do |path, options|
|
9
|
+
params = options[:body]
|
10
|
+
assert_equal '/', path
|
11
|
+
assert_equal 'joe', params[:u]
|
12
|
+
assert_equal '123abc', params[:k]
|
13
|
+
assert_equal 'this is some text', params[:t]
|
14
|
+
assert_equal 'csearch', params[:o]
|
15
|
+
assert_equal 'UTF-8', params[:e]
|
16
|
+
end
|
17
|
+
@search = Copyscape::TextSearch.new('this is some text')
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def url_search_response_duplicate
|
24
|
+
o = Object.new
|
25
|
+
o.stubs(:body).returns(File.read(File.expand_path('../url_search_response_duplicate.xml', __FILE__)))
|
26
|
+
o
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|