copyscape 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/
2
+ .DS_store
3
+
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,28 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ copyscape (0.0.1)
5
+ httparty (= 0.7.7)
6
+ nokogiri (= 1.4.1)
7
+
8
+ GEM
9
+ remote: http://rubygems.org/
10
+ specs:
11
+ crack (0.1.8)
12
+ httparty (0.7.7)
13
+ crack (= 0.1.8)
14
+ mocha (0.9.8)
15
+ rake
16
+ nokogiri (1.4.1)
17
+ rake (0.8.7)
18
+ shoulda-context (1.0.0.beta1)
19
+
20
+ PLATFORMS
21
+ ruby
22
+
23
+ DEPENDENCIES
24
+ bundler (>= 1.0.0)
25
+ copyscape!
26
+ mocha
27
+ rake (= 0.8.7)
28
+ shoulda-context
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ copyscape
2
+ =========
3
+
4
+ Ruby wrapper for the Copyscape API.
5
+
6
+
7
+ Usage
8
+ -----
9
+
10
+ First, you'll need to setup your [Copyscape](http://www.copyscape.com/) account and whatnot. You can do
11
+ that [here](http://www.copyscape.com/signup.php?pro=1&o=f).
12
+
13
+ Next, install the gem.
14
+
15
+ $ gem install copyscape
16
+
17
+ And then you can do things like this...
18
+
19
+ Copyscape.username = 'yourname'
20
+ Copyscape.api_key = 'abc123'
21
+
22
+ # URL search
23
+ search = Copyscape.url_search("http://www.copyscape.com/example.html")
24
+ search.duplicate? # => true
25
+ search.count # => 81
26
+ search.duplicates.each do |duplicate|
27
+ puts duplciate['title']
28
+ puts duplicate['url']
29
+ puts duplicate['textsnippet']
30
+ puts duplicate['htmlsnippet']
31
+ puts duplicate['minwordsmatched']
32
+ end
33
+
34
+ # Text search
35
+ search = Copyscape.text_search('This is some text I want to check for plagurism')
36
+ search.duplicate? # => false
37
+
38
+
39
+ Currently, there is no support in the gem for "private index" searching, though
40
+ it would be pretty easy to add.
41
+
42
+
43
+ More...
44
+ -------
45
+
46
+ I'm in no way associated with Copyscape.
47
+
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ $LOAD_PATH.unshift 'lib'
5
+
6
+ task :default => :test
7
+
8
+ desc "Run tests"
9
+ task :test do
10
+ Dir['test/*_test.rb'].each do |f|
11
+ require File.expand_path(f)
12
+ end
13
+ end
data/copyscape.gemspec ADDED
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path("../lib/copyscape/version", __FILE__)
3
+
4
+ # Nasty work around to keep the dependencies in Syck
5
+ YAML::ENGINE.yamler = 'syck'
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "copyscape"
9
+ s.version = Copyscape::Version
10
+ s.platform = Gem::Platform::RUBY
11
+ s.authors = ['Ben VandenBos']
12
+ s.email = ['bvandenbos@gmail.com']
13
+ s.homepage = "http://github.com/bvandenbos/copyscape-rb"
14
+ s.summary = "Ruby wrapper for Copyscape API"
15
+ s.description = "Ruby wrapper for Copyscape API"
16
+
17
+ s.required_rubygems_version = ">= 1.3.6"
18
+ s.add_development_dependency "bundler", ">= 1.0.0"
19
+
20
+ s.files = `git ls-files`.split("\n")
21
+ s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
22
+ s.require_path = 'lib'
23
+
24
+ s.add_dependency("nokogiri", "1.4.1")
25
+ s.add_dependency("httparty", "0.7.7")
26
+ s.add_development_dependency("rake", "0.8.7")
27
+ s.add_development_dependency("shoulda-context")
28
+ s.add_development_dependency("mocha")
29
+
30
+ end
31
+
data/lib/copyscape.rb ADDED
@@ -0,0 +1,22 @@
1
+ require 'copyscape/response'
2
+ require 'copyscape/url_search'
3
+ require 'copyscape/text_search'
4
+ require 'copyscape/version'
5
+
6
+ module Copyscape
7
+
8
+ class << self
9
+
10
+ attr_accessor :username, :api_key
11
+
12
+ def url_search(url)
13
+ UrlSearch.new(url)
14
+ end
15
+
16
+ def text_search(*prms)
17
+ TextSearch.new(*prms)
18
+ end
19
+
20
+ end
21
+
22
+ end
@@ -0,0 +1,38 @@
1
+ require 'forwardable'
2
+ require 'httparty'
3
+
4
+ module Copyscape
5
+
6
+ class RequestBase
7
+
8
+ include HTTParty
9
+ base_uri 'http://www.copyscape.com/api'
10
+
11
+ extend Forwardable
12
+
13
+ def_delegators :response, :duplicate_count, :duplicates, :duplicate?,
14
+ :count, :error, :error?, :query, :query_words, :raw_response
15
+
16
+ def base_params
17
+ raise "You must set Copyscape.username" unless Copyscape.username
18
+ raise "You must set Copyscape.api_key" unless Copyscape.api_key
19
+ {:u => Copyscape.username,
20
+ :k => Copyscape.api_key}
21
+ end
22
+
23
+ def response
24
+ raise "@response must be set" unless @response
25
+ @response
26
+ end
27
+
28
+ def get_response(params)
29
+ self.class.get('/', :query => base_params.merge(params)).body
30
+ end
31
+
32
+ def post_response(params)
33
+ self.class.post('/', :body => base_params.merge(params)).body
34
+ end
35
+
36
+ end
37
+
38
+ end
@@ -0,0 +1,70 @@
1
+ require 'nokogiri'
2
+
3
+ module Copyscape
4
+
5
+ class Response
6
+
7
+ attr_reader :raw_response
8
+
9
+ def initialize(buffer)
10
+ @raw_response = buffer
11
+ @document = Nokogiri(buffer)
12
+ end
13
+
14
+ def query
15
+ field('query')
16
+ end
17
+
18
+ def query_words
19
+ query_words = field('querywords')
20
+ query_words.to_i if query_words
21
+ end
22
+
23
+ # Returns the number of duplicates
24
+ def count
25
+ count = field('count')
26
+ count.to_i
27
+ end
28
+
29
+ # Returns true if the response was an error
30
+ def error?
31
+ !!error
32
+ end
33
+
34
+ def error
35
+ field('error')
36
+ end
37
+
38
+ # Returns true if there are one or more duplicates
39
+ def duplicate?
40
+ count > 0
41
+ end
42
+
43
+ # Returns an array of all the results in the form of a hash:
44
+ def duplicates
45
+ @duplicates ||= [].tap do |r|
46
+ @document.search('result').collect do |result|
47
+ r << result_to_hash(result)
48
+ end
49
+ end
50
+ end
51
+
52
+ private
53
+
54
+ # Given a result xml element, return a hash of the values we're interested in.
55
+ def result_to_hash(result)
56
+ result.children.inject({}) do |hash, node|
57
+ hash[node.name] = node.text
58
+ hash[node.name] = node.text.to_i if node.text && node.text =~ /^\d+$/
59
+ hash
60
+ end
61
+ end
62
+
63
+ def field(name)
64
+ node = @document.search(name).first
65
+ node.text if node
66
+ end
67
+
68
+ end
69
+
70
+ end
@@ -0,0 +1,14 @@
1
+ require 'copyscape/request_base'
2
+
3
+ module Copyscape
4
+
5
+ class TextSearch < RequestBase
6
+
7
+ def initialize(text, encoding = 'UTF-8')
8
+ http_response = post_response(:o => 'csearch', :t => text, :e => encoding)
9
+ @response = Response.new(http_response)
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,14 @@
1
+ require 'copyscape/request_base'
2
+
3
+ module Copyscape
4
+
5
+ class UrlSearch < RequestBase
6
+
7
+ def initialize(url)
8
+ http_response = get_response(:o => 'csearch', :q => url)
9
+ @response = Response.new(http_response)
10
+ end
11
+
12
+ end
13
+
14
+ end
@@ -0,0 +1,4 @@
1
+
2
+ module Copyscape
3
+ Version = '0.0.4'
4
+ end
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <response>
3
+ <error>connection failed (2) - please ensure you entered the URL correctly</error>
4
+ <query>http://www.somethingthatdoesntexist.com/</query>
5
+ </response>
@@ -0,0 +1,103 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ require 'copyscape/response'
3
+
4
+ class ResponseTest < Test::Unit::TestCase
5
+
6
+ context "response" do
7
+ setup do
8
+ @response = Copyscape::Response.new(url_search_response_duplicate)
9
+ end
10
+
11
+ should "include query" do
12
+ assert_equal "http://www.copyscape.com/example.html", @response.query
13
+ end
14
+
15
+ should "include query words" do
16
+ assert_equal 1340, @response.query_words
17
+ end
18
+
19
+ should "not be an error" do
20
+ assert !@response.error?
21
+ end
22
+ end
23
+
24
+ context "duplicate" do
25
+
26
+ setup do
27
+ @response = Copyscape::Response.new(url_search_response_duplicate)
28
+ end
29
+
30
+ should "include count" do
31
+ assert_equal 81, @response.count
32
+ end
33
+
34
+ should "be a duplicate" do
35
+ assert @response.duplicate?
36
+ end
37
+
38
+ should "include duplicates" do
39
+ dup = @response.duplicates.first
40
+ assert_equal 'http://www.archives.gov/exhibits/charters/declaration_transcript.html', dup['url']
41
+ assert_equal "Declaration of Independence - Transcript", dup['title']
42
+ assert_equal "... We hold these truths to be self-evident, that all men are created equal, that they are endowed by ... But when a long train of abuses and usurpations, pursuing invariably the same ... to pass Laws of immediate and pressing importance, unless suspended in their ... for opposing with manly firmness his invasions on the rights of the people. ... English Laws in a neighbouring Province, establishing therein an Arbitrary government, ... He has plundered our seas, ravaged our Coasts, burnt our towns, ... of Cruelty & perfidy scarcely paralleled in the most barbarous ages, ... He has constrained our fellow Citizens taken Captive on the high Seas to bear ... the merciless Indian Savages, whose known rule of warfare, ... which, would inevitably interrupt our connections and correspondence. ... on the protection of divine Providence, we mutually pledge to each other our ... ",
43
+ dup['textsnippet']
44
+ assert_equal "<font color=\"#777777\">... </font><font color=\"#000000\">We hold these truths to be self-evident, that all men are created equal, that they are endowed by ... </font><font color=\"#777777\"></font><font color=\"#000000\">But when a long train of abuses and usurpations, pursuing invariably the same ... </font><font color=\"#777777\"></font><font color=\"#000000\">to pass Laws of immediate and pressing importance, unless suspended in their ... </font><font color=\"#777777\"></font><font color=\"#000000\">for opposing with manly firmness his invasions on the rights of the people. ... </font><font color=\"#777777\"></font><font color=\"#000000\">English Laws in a neighbouring Province, establishing therein an Arbitrary government, ... </font><font color=\"#777777\"></font><font color=\"#000000\">He has plundered our seas, ravaged our Coasts, burnt our towns, ... </font><font color=\"#777777\"></font><font color=\"#000000\">of Cruelty &amp; perfidy scarcely paralleled in the most barbarous ages, ... </font><font color=\"#777777\"></font><font color=\"#000000\">He has constrained our fellow Citizens taken Captive on the high Seas to bear ... </font><font color=\"#777777\"></font><font color=\"#000000\">the merciless Indian Savages, whose known rule of warfare, ... </font><font color=\"#777777\"></font><font color=\"#000000\">which, would inevitably interrupt our connections and correspondence. ... </font><font color=\"#777777\"></font><font color=\"#000000\">on the protection of divine Providence, we mutually pledge to each other our ... </font>",
45
+ dup['htmlsnippet']
46
+ assert_equal 134, dup['minwordsmatched']
47
+ end
48
+
49
+ should "have the right amount of duplicates" do
50
+ assert_equal @response.count, @response.duplicates.length
51
+ end
52
+
53
+ end
54
+
55
+ context "not duplicate" do
56
+ setup do
57
+ @response = Copyscape::Response.new(url_search_response_not_duplicate)
58
+ end
59
+
60
+ should "include count" do
61
+ assert_equal 0, @response.count
62
+ end
63
+
64
+ should "be a duplicate" do
65
+ assert !@response.duplicate?
66
+ end
67
+
68
+ should "include duplicates" do
69
+ assert @response.duplicates.empty?
70
+ end
71
+
72
+ end
73
+
74
+ context "error" do
75
+ setup do
76
+ @response = Copyscape::Response.new(error_response)
77
+ end
78
+
79
+ should "be an error" do
80
+ assert @response.error?
81
+ end
82
+
83
+ should "return error message" do
84
+ assert_equal "connection failed (2) - please ensure you entered the URL correctly", @response.error
85
+ end
86
+
87
+ end
88
+
89
+ private
90
+
91
+ def url_search_response_duplicate
92
+ File.read(File.expand_path('../url_search_response_duplicate.xml', __FILE__))
93
+ end
94
+
95
+ def url_search_response_not_duplicate
96
+ File.read(File.expand_path('../url_search_response_not_duplicate.xml', __FILE__))
97
+ end
98
+
99
+ def error_response
100
+ File.read(File.expand_path('../error_response.xml', __FILE__))
101
+ end
102
+
103
+ end
@@ -0,0 +1,11 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda-context'
4
+ require 'mocha'
5
+
6
+ $LOAD_PATH.unshift File.dirname(File.expand_path(__FILE__)) + '/../lib'
7
+
8
+ require 'copyscape'
9
+
10
+ Copyscape.username = 'joe'
11
+ Copyscape.api_key = '123abc'
@@ -0,0 +1,29 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class TextSearchTest < Test::Unit::TestCase
4
+
5
+ context "request_url" do
6
+
7
+ should "have the correct url" do
8
+ Copyscape::TextSearch.expects(:post).once.returns(url_search_response_duplicate).with do |path, options|
9
+ params = options[:body]
10
+ assert_equal '/', path
11
+ assert_equal 'joe', params[:u]
12
+ assert_equal '123abc', params[:k]
13
+ assert_equal 'this is some text', params[:t]
14
+ assert_equal 'csearch', params[:o]
15
+ assert_equal 'UTF-8', params[:e]
16
+ end
17
+ @search = Copyscape::TextSearch.new('this is some text')
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def url_search_response_duplicate
24
+ o = Object.new
25
+ o.stubs(:body).returns(File.read(File.expand_path('../url_search_response_duplicate.xml', __FILE__)))
26
+ o
27
+ end
28
+
29
+ end