galetahub-copyscape 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 7668fcb74ce8f1b89373abc290d5787823ab6200
4
+ data.tar.gz: 6990772cee3f7d5d70d5dc5c1698b5a701cf6043
5
+ SHA512:
6
+ metadata.gz: ac1bbbb27fbd06c5feaad9bb34c4eb308d56c02dc1e0dfa95680ca703d9da2a9bb03cf2b886a2c18938c3a6684b9e25e67ceccad22e11d21e1b76bca97cb8e5c
7
+ data.tar.gz: fa8a45afffed617caff217e7c65b0d608a78a886a9bd000f124a74c30df7af06d5826b24718b7f916581129d3206587eedbac552bb0c7c69ba1d3ff5a0675569
@@ -0,0 +1,20 @@
1
+ Copyright 2018 Fodojo LLC
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,51 @@
1
+ copyscape
2
+ =========
3
+
4
+ Ruby wrapper for the Copyscape API.
5
+
6
+
7
+ Usage
8
+ -----
9
+
10
+ First, you'll need to setup your [Copyscape](http://www.copyscape.com/) account and whatnot. You can do
11
+ that [here](http://www.copyscape.com/signup.php?pro=1&o=f).
12
+ API documentation [here](http://www.copyscape.com/apiconfigure.php).
13
+
14
+ Next, install the gem.
15
+
16
+ $ gem install copyscape
17
+
18
+ And then you can do things like this...
19
+
20
+ Copyscape.username = 'yourname'
21
+ Copyscape.api_key = 'abc123'
22
+
23
+ # URL search
24
+ search = Copyscape.url_search("http://www.copyscape.com/example.html")
25
+ search.duplicate? # => true
26
+ search.count # => 81
27
+ search.duplicates.each do |duplicate|
28
+ puts duplciate['title']
29
+ puts duplicate['url']
30
+ puts duplicate['textsnippet']
31
+ puts duplicate['htmlsnippet']
32
+ puts duplicate['minwordsmatched']
33
+ end
34
+
35
+ # Text search
36
+ search = Copyscape.text_search('This is some text I want to check for plagurism')
37
+ search.duplicate? # => false
38
+
39
+ # Search with additional options
40
+ Copyscape.text_search('This is some text I want to check for plagurism', :x => 1, :c => 5)
41
+
42
+
43
+ Currently, there is no support in the gem for "private index" searching, though
44
+ it would be pretty easy to add.
45
+
46
+
47
+ More...
48
+ -------
49
+
50
+ I'm in no way associated with Copyscape.
51
+
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rake'
4
+ require 'rake/testtask'
5
+
6
+ require File.join(File.dirname(__FILE__), 'lib', 'copyscape', 'version')
7
+
8
+ desc 'Default: run unit tests.'
9
+ task :default => :test
10
+
11
+ desc 'Test the enum_field plugin.'
12
+ Rake::TestTask.new(:test) do |t|
13
+ t.libs << 'lib'
14
+ t.libs << 'test'
15
+ t.pattern = 'test/**/*_test.rb'
16
+ t.verbose = true
17
+ end
@@ -0,0 +1,23 @@
1
+ require 'copyscape/response'
2
+ require 'copyscape/url_search'
3
+ require 'copyscape/text_search'
4
+ require 'copyscape/balance'
5
+ require 'copyscape/version'
6
+
7
+ module Copyscape
8
+ class << self
9
+ attr_accessor :username, :api_key
10
+
11
+ def url_search(*args)
12
+ UrlSearch.new(*args)
13
+ end
14
+
15
+ def text_search(*args)
16
+ TextSearch.new(*args)
17
+ end
18
+
19
+ def balance(*args)
20
+ Balance.new(*args)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,15 @@
1
+ require 'copyscape/request_base'
2
+
3
+ module Copyscape
4
+ class Balance < RequestBase
5
+ def initialize(format = :xml, options = {})
6
+ options = {
7
+ f: format.to_s,
8
+ o: 'balance'
9
+ }.merge(options)
10
+
11
+ http_response = get_response(options)
12
+ @response = Response.new(http_response)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,50 @@
1
+ require 'forwardable'
2
+ require 'httparty'
3
+
4
+ module Copyscape
5
+ class RequestBase
6
+ include HTTParty
7
+ extend Forwardable
8
+
9
+ base_uri 'http://www.copyscape.com/api'
10
+
11
+ def_delegators :response, :duplicate_count, :duplicates, :duplicate?,
12
+ :count, :error, :error?, :query, :query_words, :raw_response,
13
+ :allwordsmatched, :allpercentmatched, :alltextmatched, :allviewurl
14
+
15
+ # API operation (o): Use csearch to search against the public Internet or psearch to search against your private index.
16
+ # You can also use cpsearch to search against both the Internet and your private index, for the cost of two search credits.
17
+ # Text encoding (e): Use an IANA name, such as UTF-8 (Unicode), ISO-8859-1 (Latin-1) or WINDOWS-1251 (Cyrillic).
18
+ # Text to be searched (t): If you are using the Raw POST method, as described above, the raw text should be supplied
19
+ # in the POST payload without a parameter name and without any urlencoding.
20
+ # Full comparisons (c): Set to a value between 1 and 10 to request a full text-on-text comparison
21
+ # (with an exact count of matching words) between the query text and the top (one to ten) results found.
22
+ # Note that full comparisons may add a delay of a few seconds.
23
+ # Response format (f): If omitted or set to xml, the API will respond in XML.
24
+ # If set to html, the API will respond in basic HTML.
25
+ # Example test (x): If set to 1, the API will search the Internet for copies of the text on this page and
26
+ # you will not be charged.
27
+ #
28
+ def base_params
29
+ raise 'You must set Copyscape.username' unless Copyscape.username
30
+ raise 'You must set Copyscape.api_key' unless Copyscape.api_key
31
+ {
32
+ u: Copyscape.username,
33
+ k: Copyscape.api_key
34
+ }
35
+ end
36
+
37
+ def response
38
+ raise '@response must be set' unless @response
39
+ @response
40
+ end
41
+
42
+ def get_response(params)
43
+ self.class.get('/', query: base_params.merge(params)).body
44
+ end
45
+
46
+ def post_response(params)
47
+ self.class.post('/', body: base_params.merge(params)).body
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,103 @@
1
+ # More info here http://www.copyscape.com/api-guide.php
2
+ #
3
+ require 'nokogiri'
4
+
5
+ module Copyscape
6
+ class Response
7
+ attr_reader :raw_response
8
+
9
+ def initialize(buffer)
10
+ @raw_response = buffer
11
+ @document = Nokogiri(buffer)
12
+ end
13
+
14
+ # URL searched
15
+ # Presert: if a URL search
16
+ # The <query> value may differ from the original URL you supplied if there was a frameset or redirection.
17
+ #
18
+ def query
19
+ field('query')
20
+ end
21
+
22
+ # Number of words checked
23
+ def query_words
24
+ (field('querywords') || 0).to_i
25
+ end
26
+
27
+ # Returns the number of duplicates
28
+ def count
29
+ (field('count') || 0).to_i
30
+ end
31
+
32
+ # Returns true if the response was an error
33
+ def error?
34
+ !!error
35
+ end
36
+
37
+ # Reason for API request failure
38
+ # Present: if request failed
39
+ #
40
+ def error
41
+ field('error')
42
+ end
43
+
44
+ # Number of source words matched
45
+ # Present: if succeeded and c>=3 and o is not cpsearch
46
+ #
47
+ def allwordsmatched
48
+ (field('allwordsmatched') || 0).to_i
49
+ end
50
+
51
+ # Percentage of source words matched
52
+ # Present: if succeeded and c>=3 and o is not cpsearch
53
+ #
54
+ def allpercentmatched
55
+ (field('allpercentmatched') || 0).to_i
56
+ end
57
+
58
+ # Full extract of source text matched
59
+ # Present: if succeeded and c>=3 and o is not cpsearch
60
+ #
61
+ def alltextmatched
62
+ field('alltextmatched')
63
+ end
64
+
65
+ # URL for viewing found results
66
+ # Present: if succeeded and o is csearch
67
+ # The <allviewurl> value can be used to display the list of results in an iframe or window.
68
+ # If used, the contents of this page must be displayed in full, without modification.
69
+ #
70
+ def allviewurl
71
+ field('allviewurl')
72
+ end
73
+
74
+ # Returns true if there are one or more duplicates
75
+ def duplicate?
76
+ count > 0
77
+ end
78
+
79
+ # Returns an array of all the results in the form of a hash:
80
+ def duplicates
81
+ @duplicates ||= [].tap do |r|
82
+ @document.search('result').collect do |result|
83
+ r << result_to_hash(result)
84
+ end
85
+ end
86
+ end
87
+
88
+ private
89
+
90
+ # Given a result xml element, return a hash of the values we're interested in.
91
+ def result_to_hash(result)
92
+ result.children.inject({}) do |hash, node|
93
+ hash[node.name] = (node.text =~ /^\d+$/ ? node.text.to_i : node.text)
94
+ hash
95
+ end
96
+ end
97
+
98
+ def field(name)
99
+ node = @document.search(name).first
100
+ node.text if node
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,29 @@
1
+ require 'copyscape/request_base'
2
+
3
+ module Copyscape
4
+ class TextSearch < RequestBase
5
+ # The parameters are as follows:
6
+ # Parameter Explanation Value Required? Default
7
+ # u Your username [your username] Yes -
8
+ # k Your API key [your API key] Yes -
9
+ # o API operation csearch (or psearch or
10
+ # cpsearch if you create a private index) Yes -
11
+ # e Text encoding [encoding name] Yes -
12
+ # t Text to be searched [the text] Yes -
13
+ # c Full comparisons 0 to 10 No 0
14
+ # f Response format xml or html No xml
15
+ # x Example test 1 or omitted No -
16
+ #
17
+ def initialize(text, encoding = 'UTF-8', options = {})
18
+ options = {
19
+ t: text,
20
+ e: encoding,
21
+ o: 'csearch',
22
+ f: 'xml'
23
+ }.merge(options)
24
+
25
+ http_response = post_response(options)
26
+ @response = Response.new(http_response)
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,28 @@
1
+ require 'copyscape/request_base'
2
+
3
+ module Copyscape
4
+ class UrlSearch < RequestBase
5
+ # The parameters are as follows:
6
+ # Parameter Explanation Value Required? Default
7
+ # u Your username [your username] Yes -
8
+ # k Your API key [your API key] Yes -
9
+ # o API operation csearch (or psearch or
10
+ # cpsearch if you create a private index) Yes -
11
+ # e Text encoding [encoding name] Yes -
12
+ # t Text to be searched [the text] Yes -
13
+ # c Full comparisons 0 to 10 No 0
14
+ # f Response format xml or html No xml
15
+ # x Example test 1 or omitted No -
16
+ #
17
+ def initialize(url, options = {})
18
+ options = {
19
+ q: url,
20
+ o: 'csearch',
21
+ f: 'xml'
22
+ }.merge(options)
23
+
24
+ http_response = get_response(options)
25
+ @response = Response.new(http_response)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,3 @@
1
+ module Copyscape
2
+ VERSION = '0.1.0'.freeze
3
+ end
@@ -0,0 +1 @@
1
+ <html><head><title>Copyscape Premium balance for joe</title></head><body><B>$9.25</B> remaining, <B>185</B> searches remaining, <B>185</B> available today.</body></html>
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <remaining>
3
+ <value>9.25</value>
4
+ <total>185</total>
5
+ <today>185</today>
6
+ </remaining>
@@ -0,0 +1,37 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class BalanceTest < Test::Unit::TestCase
4
+ context "request balance in xml" do
5
+ setup do
6
+ Copyscape::Balance.expects(:get).once.returns(balance_response_duplicate(:xml))
7
+ @balance = Copyscape::Balance.new(:xml)
8
+ end
9
+
10
+ should "have the correct url" do
11
+ assert_match raw_response_duplicate(:xml), @balance.raw_response
12
+ end
13
+ end
14
+
15
+ context "request balance in html" do
16
+ setup do
17
+ Copyscape::Balance.expects(:get).once.returns(balance_response_duplicate(:html))
18
+ @balance = Copyscape::Balance.new(:html)
19
+ end
20
+
21
+ should "have the correct url" do
22
+ assert_match raw_response_duplicate(:html), @balance.raw_response
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def balance_response_duplicate(format)
29
+ o = Object.new
30
+ o.stubs(:body).returns(File.read(File.expand_path("../balance.#{format.to_s}", __FILE__)))
31
+ o
32
+ end
33
+
34
+ def raw_response_duplicate(format)
35
+ balance_response_duplicate(format).body
36
+ end
37
+ end
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <response>
3
+ <error>connection failed (2) - please ensure you entered the URL correctly</error>
4
+ <query>http://www.somethingthatdoesntexist.com/</query>
5
+ </response>
@@ -0,0 +1,98 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+ require 'copyscape/response'
3
+
4
+ class ResponseTest < Test::Unit::TestCase
5
+ context "response" do
6
+ setup do
7
+ @response = Copyscape::Response.new(url_search_response_duplicate)
8
+ end
9
+
10
+ should "include query" do
11
+ assert_equal "http://www.copyscape.com/example.html", @response.query
12
+ end
13
+
14
+ should "include query words" do
15
+ assert_equal 1340, @response.query_words
16
+ end
17
+
18
+ should "not be an error" do
19
+ assert !@response.error?
20
+ end
21
+ end
22
+
23
+ context "duplicate" do
24
+ setup do
25
+ @response = Copyscape::Response.new(url_search_response_duplicate)
26
+ end
27
+
28
+ should "include count" do
29
+ assert_equal 81, @response.count
30
+ end
31
+
32
+ should "be a duplicate" do
33
+ assert @response.duplicate?
34
+ end
35
+
36
+ should "include duplicates" do
37
+ dup = @response.duplicates.first
38
+ assert_equal 'http://www.archives.gov/exhibits/charters/declaration_transcript.html', dup['url']
39
+ assert_equal "Declaration of Independence - Transcript", dup['title']
40
+ assert_equal "... We hold these truths to be self-evident, that all men are created equal, that they are endowed by ... But when a long train of abuses and usurpations, pursuing invariably the same ... to pass Laws of immediate and pressing importance, unless suspended in their ... for opposing with manly firmness his invasions on the rights of the people. ... English Laws in a neighbouring Province, establishing therein an Arbitrary government, ... He has plundered our seas, ravaged our Coasts, burnt our towns, ... of Cruelty & perfidy scarcely paralleled in the most barbarous ages, ... He has constrained our fellow Citizens taken Captive on the high Seas to bear ... the merciless Indian Savages, whose known rule of warfare, ... which, would inevitably interrupt our connections and correspondence. ... on the protection of divine Providence, we mutually pledge to each other our ... ",
41
+ dup['textsnippet']
42
+ assert_equal "<font color=\"#777777\">... </font><font color=\"#000000\">We hold these truths to be self-evident, that all men are created equal, that they are endowed by ... </font><font color=\"#777777\"></font><font color=\"#000000\">But when a long train of abuses and usurpations, pursuing invariably the same ... </font><font color=\"#777777\"></font><font color=\"#000000\">to pass Laws of immediate and pressing importance, unless suspended in their ... </font><font color=\"#777777\"></font><font color=\"#000000\">for opposing with manly firmness his invasions on the rights of the people. ... </font><font color=\"#777777\"></font><font color=\"#000000\">English Laws in a neighbouring Province, establishing therein an Arbitrary government, ... </font><font color=\"#777777\"></font><font color=\"#000000\">He has plundered our seas, ravaged our Coasts, burnt our towns, ... </font><font color=\"#777777\"></font><font color=\"#000000\">of Cruelty &amp; perfidy scarcely paralleled in the most barbarous ages, ... </font><font color=\"#777777\"></font><font color=\"#000000\">He has constrained our fellow Citizens taken Captive on the high Seas to bear ... </font><font color=\"#777777\"></font><font color=\"#000000\">the merciless Indian Savages, whose known rule of warfare, ... </font><font color=\"#777777\"></font><font color=\"#000000\">which, would inevitably interrupt our connections and correspondence. ... </font><font color=\"#777777\"></font><font color=\"#000000\">on the protection of divine Providence, we mutually pledge to each other our ... </font>",
43
+ dup['htmlsnippet']
44
+ assert_equal 134, dup['minwordsmatched']
45
+ end
46
+
47
+ should "have the right amount of duplicates" do
48
+ assert_equal @response.count, @response.duplicates.length
49
+ end
50
+
51
+ end
52
+
53
+ context "not duplicate" do
54
+ setup do
55
+ @response = Copyscape::Response.new(url_search_response_not_duplicate)
56
+ end
57
+
58
+ should "include count" do
59
+ assert_equal 0, @response.count
60
+ end
61
+
62
+ should "be a duplicate" do
63
+ assert !@response.duplicate?
64
+ end
65
+
66
+ should "include duplicates" do
67
+ assert @response.duplicates.empty?
68
+ end
69
+ end
70
+
71
+ context "error" do
72
+ setup do
73
+ @response = Copyscape::Response.new(error_response)
74
+ end
75
+
76
+ should "be an error" do
77
+ assert @response.error?
78
+ end
79
+
80
+ should "return error message" do
81
+ assert_equal "connection failed (2) - please ensure you entered the URL correctly", @response.error
82
+ end
83
+ end
84
+
85
+ private
86
+
87
+ def url_search_response_duplicate
88
+ File.read(File.expand_path('../url_search_response_duplicate.xml', __FILE__))
89
+ end
90
+
91
+ def url_search_response_not_duplicate
92
+ File.read(File.expand_path('../url_search_response_not_duplicate.xml', __FILE__))
93
+ end
94
+
95
+ def error_response
96
+ File.read(File.expand_path('../error_response.xml', __FILE__))
97
+ end
98
+ end