si-copyscape 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +64 -0
- data/Guardfile +42 -0
- data/README.md +112 -0
- data/lib/copyscape.rb +70 -0
- data/lib/copyscape_api.rb +39 -0
- data/lib/copyscape_matches.rb +52 -0
- data/lib/copyscape_response.rb +47 -0
- data/lib/si-copyscape.rb +8 -0
- data/lib/version.rb +5 -0
- data/si-copyscape.gemspec +28 -0
- data/test/cassettes/api_add_to_private_index.yml +42 -0
- data/test/cassettes/api_balance.yml +43 -0
- data/test/cassettes/api_error.yml +42 -0
- data/test/cassettes/api_internet_and_private_matches.yml +163 -0
- data/test/cassettes/api_internet_matches.yml +170 -0
- data/test/cassettes/api_private_matches.yml +50 -0
- data/test/test_copyscape.rb +89 -0
- data/test/test_copyscape_api.rb +15 -0
- data/test/test_copyscape_matches.rb +71 -0
- data/test/test_copyscape_response.rb +47 -0
- data/test/test_helper.rb +16 -0
- metadata +163 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6aa16fb6129dc878e9954870974c18f3e1e6f6ee
|
4
|
+
data.tar.gz: 94caa08afc29b45beeabb904e83ad3b3b9261e5e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d9aa789ecab6eba516c76ca4da9ca91eb5d85a05eefb1611513aab46ecdee978d5d94054b82b27b845e5dd1f7c53c626bc673e4a45a3b460ce920a4ef72a102c
|
7
|
+
data.tar.gz: 49abd6235ea1b75ff648a4c82de9b6c29fe4513ef56e461a4052dca808be675e8cc8d12cb3c9a8cadecaeb021f0d87ebe2e676f08d90e2dc9a9cd4ebf62d4213
|
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
*.gem
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
si-copyscape (0.1.0)
|
5
|
+
crack
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
addressable (2.3.8)
|
11
|
+
coderay (1.1.0)
|
12
|
+
crack (0.4.2)
|
13
|
+
safe_yaml (~> 1.0.0)
|
14
|
+
ffi (1.9.10)
|
15
|
+
formatador (0.2.5)
|
16
|
+
guard (2.13.0)
|
17
|
+
formatador (>= 0.2.4)
|
18
|
+
listen (>= 2.7, <= 4.0)
|
19
|
+
lumberjack (~> 1.0)
|
20
|
+
nenv (~> 0.1)
|
21
|
+
notiffany (~> 0.0)
|
22
|
+
pry (>= 0.9.12)
|
23
|
+
shellany (~> 0.0)
|
24
|
+
thor (>= 0.18.1)
|
25
|
+
guard-compat (1.2.1)
|
26
|
+
guard-minitest (2.4.4)
|
27
|
+
guard-compat (~> 1.2)
|
28
|
+
minitest (>= 3.0)
|
29
|
+
listen (3.0.3)
|
30
|
+
rb-fsevent (>= 0.9.3)
|
31
|
+
rb-inotify (>= 0.9)
|
32
|
+
lumberjack (1.0.9)
|
33
|
+
method_source (0.8.2)
|
34
|
+
minitest (5.8.0)
|
35
|
+
nenv (0.2.0)
|
36
|
+
notiffany (0.0.7)
|
37
|
+
nenv (~> 0.1)
|
38
|
+
shellany (~> 0.0)
|
39
|
+
pry (0.10.1)
|
40
|
+
coderay (~> 1.1.0)
|
41
|
+
method_source (~> 0.8.1)
|
42
|
+
slop (~> 3.4)
|
43
|
+
rb-fsevent (0.9.5)
|
44
|
+
rb-inotify (0.9.5)
|
45
|
+
ffi (>= 0.5.0)
|
46
|
+
safe_yaml (1.0.4)
|
47
|
+
shellany (0.0.1)
|
48
|
+
slop (3.6.0)
|
49
|
+
thor (0.19.1)
|
50
|
+
vcr (2.9.3)
|
51
|
+
webmock (1.21.0)
|
52
|
+
addressable (>= 2.3.6)
|
53
|
+
crack (>= 0.3.2)
|
54
|
+
|
55
|
+
PLATFORMS
|
56
|
+
ruby
|
57
|
+
|
58
|
+
DEPENDENCIES
|
59
|
+
guard
|
60
|
+
guard-minitest
|
61
|
+
minitest
|
62
|
+
si-copyscape!
|
63
|
+
vcr
|
64
|
+
webmock
|
data/Guardfile
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
## Uncomment and set this to only include directories you want to watch
|
5
|
+
# directories %w(app lib config test spec features) \
|
6
|
+
# .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
|
7
|
+
|
8
|
+
## Note: if you are using the `directories` clause above and you are not
|
9
|
+
## watching the project directory ('.'), then you will want to move
|
10
|
+
## the Guardfile to a watched dir and symlink it back, e.g.
|
11
|
+
#
|
12
|
+
# $ mkdir config
|
13
|
+
# $ mv Guardfile config/
|
14
|
+
# $ ln -s config/Guardfile .
|
15
|
+
#
|
16
|
+
# and, you'll have to watch "config/Guardfile" instead of "Guardfile"
|
17
|
+
|
18
|
+
guard :minitest do
|
19
|
+
# with Minitest::Unit
|
20
|
+
watch(%r{^test/(.*)\/?test_(.*)\.rb$})
|
21
|
+
watch(%r{^lib/(.*/)?([^/]+)\.rb$}) { |m| "test/#{m[1]}test_#{m[2]}.rb" }
|
22
|
+
watch(%r{^test/test_helper\.rb$}) { 'test' }
|
23
|
+
|
24
|
+
# with Minitest::Spec
|
25
|
+
# watch(%r{^spec/(.*)_spec\.rb$})
|
26
|
+
# watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
27
|
+
# watch(%r{^spec/spec_helper\.rb$}) { 'spec' }
|
28
|
+
|
29
|
+
# Rails 4
|
30
|
+
# watch(%r{^app/(.+)\.rb$}) { |m| "test/#{m[1]}_test.rb" }
|
31
|
+
# watch(%r{^app/controllers/application_controller\.rb$}) { 'test/controllers' }
|
32
|
+
# watch(%r{^app/controllers/(.+)_controller\.rb$}) { |m| "test/integration/#{m[1]}_test.rb" }
|
33
|
+
# watch(%r{^app/views/(.+)_mailer/.+}) { |m| "test/mailers/#{m[1]}_mailer_test.rb" }
|
34
|
+
# watch(%r{^lib/(.+)\.rb$}) { |m| "test/lib/#{m[1]}_test.rb" }
|
35
|
+
# watch(%r{^test/.+_test\.rb$})
|
36
|
+
# watch(%r{^test/test_helper\.rb$}) { 'test' }
|
37
|
+
|
38
|
+
# Rails < 4
|
39
|
+
# watch(%r{^app/controllers/(.*)\.rb$}) { |m| "test/functional/#{m[1]}_test.rb" }
|
40
|
+
# watch(%r{^app/helpers/(.*)\.rb$}) { |m| "test/helpers/#{m[1]}_test.rb" }
|
41
|
+
# watch(%r{^app/models/(.*)\.rb$}) { |m| "test/unit/#{m[1]}_test.rb" }
|
42
|
+
end
|
data/README.md
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
# SI::CopyScape Gem
|
2
|
+
A gem to provide the communication layer with the Copyscape.com Premium API and your application.
|
3
|
+
|
4
|
+
Wiki: (requires login) http://wiki.searchinfluence.com/index.php?title=CopyScape
|
5
|
+
|
6
|
+
API Documentation: (requires login) http://copyscape.com/apiconfigure.php
|
7
|
+
|
8
|
+
## Usage:
|
9
|
+
Set the following environment variables (or pass the values in during initialization):
|
10
|
+
- ENV['COPYSCAPE_USERNAME']
|
11
|
+
- ENV['COPYSCAPE_API_KEY']
|
12
|
+
|
13
|
+
Find the most recent version of this gem at http://gems.searchinfluence.com/gems/si-copyscape and then specify the version number when adding this gem to your application's Gemfile.
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
# Add the following to your Gemfile
|
17
|
+
source 'http://user:pass@gems.searchinfluence.com' do
|
18
|
+
gem 'si-copyscape', '0.0.0'
|
19
|
+
end
|
20
|
+
```
|
21
|
+
|
22
|
+
Instantiate the copyscape object with the following optional paramaters
|
23
|
+
```ruby
|
24
|
+
copyscape = SI::CopyScape.new(
|
25
|
+
username: "foo", # not required if ENV['COPYSCAPE_USERNAME'] is set
|
26
|
+
api_key: "bar", # not required if ENV['COPYSCAPE_API_KEY'] is set
|
27
|
+
uri: "http://foo", # defaults to http://www.copyscape.com/api/
|
28
|
+
match_percent: 10 # defaults to 5
|
29
|
+
)
|
30
|
+
```
|
31
|
+
|
32
|
+
Get Remaining Credit Information
|
33
|
+
```ruby
|
34
|
+
copyscape.credit_balance
|
35
|
+
#=><struct SI::CopyScape::Balance value=112.62, total=2252, today=2252>
|
36
|
+
```
|
37
|
+
|
38
|
+
Perform text searches for copy matches
|
39
|
+
```ruby
|
40
|
+
text = "A national website promotion company, Search Influence routinely delivers a 10:1 return on investment, or better, for our customers."
|
41
|
+
|
42
|
+
# Performs a public internet search (CREDIT COST: 1)
|
43
|
+
copyscape.internet_matches! text
|
44
|
+
|
45
|
+
# Performs a private index search (CREDIT COST: 1)
|
46
|
+
copyscape.private_matches! text
|
47
|
+
|
48
|
+
# Performs a public internet & private index search (CREDIT COST: 2)
|
49
|
+
copyscape.internet_and_private_matches! text
|
50
|
+
|
51
|
+
# All these methods return an array of data structs ex:
|
52
|
+
#=>
|
53
|
+
# [
|
54
|
+
# <struct SI::CopyScape::Match
|
55
|
+
# words_matched = 20,
|
56
|
+
# percent_matched = 100,
|
57
|
+
# title = "Search Influence | Website Promotion Company",
|
58
|
+
# url = "http://www.searchinfluence.com/",
|
59
|
+
# copyscape_url = "http://view.copyscape.com/compare/wpbdhatumu/1",
|
60
|
+
# text_snippet = "... Trusted, Scalable Search, Social and Online Advertising. A national website promotion company, Search Influence routinely delivers a 10:1 return on investment, or better, for our customers.",
|
61
|
+
# html_snippet = "<font color=\"#777777\">... Trusted, Scalable Search, Social and Online Advertising. </font><font color=\"#000000\">A national website promotion company, Search Influence routinely delivers a 10:1 return on investment, or better, for our customers.</font>"
|
62
|
+
# >
|
63
|
+
# ]
|
64
|
+
```
|
65
|
+
|
66
|
+
Add text to our private index on Copyscape.com
|
67
|
+
```ruby
|
68
|
+
copyscape.add_to_private_index(
|
69
|
+
text: 'Text to add to index',
|
70
|
+
title: 'Title', # not required
|
71
|
+
id: 420 # not required
|
72
|
+
)
|
73
|
+
#=><struct SI::CopyScape::PrivateIndex words=5, handle="SIA_2_E00JOQ0A2W_T1Q2J78LA1", id="420", title="Title">
|
74
|
+
```
|
75
|
+
|
76
|
+
If there is an error, you can get a string describing the error (returns nil if there is no error)
|
77
|
+
```ruby
|
78
|
+
copyscape.internet_matches! "test"
|
79
|
+
copyscape.error
|
80
|
+
#=>"At least 15 words are required to perform a search"
|
81
|
+
```
|
82
|
+
|
83
|
+
The error method is also available on the SI::CopyscapeMatches collection returned after a search
|
84
|
+
```ruby
|
85
|
+
matches = copyscape.internet_matches! "test"
|
86
|
+
matches.error
|
87
|
+
#=>"At least 15 words are required to perform a search"
|
88
|
+
```
|
89
|
+
|
90
|
+
## Flog Score
|
91
|
+
```
|
92
|
+
177.7: flog total
|
93
|
+
5.2: flog/method average
|
94
|
+
|
95
|
+
35.0: SI::CopyscapeMatches total
|
96
|
+
16.5: SI::CopyscapeMatches#_build_collection lib/copyscape_matches.rb:28
|
97
|
+
11.0: SI::CopyscapeMatches#_without_rejects lib/copyscape_matches.rb:47
|
98
|
+
7.5: SI::CopyscapeMatches#initialize lib/copyscape_matches.rb:7
|
99
|
+
|
100
|
+
31.6: SI::CopyScape total
|
101
|
+
11.0: SI::CopyScape#credit_balance lib/copyscape.rb:23
|
102
|
+
9.6: SI::CopyScape#add_to_private_index lib/copyscape.rb:52
|
103
|
+
6.0: SI::CopyScape#initialize lib/copyscape.rb:11
|
104
|
+
5.0: SI::CopyScape#none
|
105
|
+
|
106
|
+
25.8: SI::CopyscapeAPI total
|
107
|
+
14.8: SI::CopyscapeAPI#request lib/copyscape_api.rb:12
|
108
|
+
11.0: SI::CopyscapeAPI#_call_api lib/copyscape_api.rb:27
|
109
|
+
|
110
|
+
7.1: SI::CopyscapeResponse total
|
111
|
+
7.1: SI::CopyscapeResponse#results lib/copyscape_response.rb:27
|
112
|
+
```
|
data/lib/copyscape.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module SI
|
2
|
+
class CopyScape
|
3
|
+
|
4
|
+
# Data Structures used for responses to the public interface
|
5
|
+
Match = Struct.new(:words_matched, :percent_matched, :title, :url, :copyscape_url, :text_snippet, :html_snippet)
|
6
|
+
Balance = Struct.new(:value, :total, :today)
|
7
|
+
PrivateIndex = Struct.new(:words, :handle, :id, :title)
|
8
|
+
|
9
|
+
attr_reader :api, :match_percent
|
10
|
+
|
11
|
+
def initialize username: nil, api_key: nil, uri: nil, match_percent: 5
|
12
|
+
username ||= ENV['COPYSCAPE_USERNAME']
|
13
|
+
api_key ||= ENV['COPYSCAPE_API_KEY']
|
14
|
+
uri ||= 'http://www.copyscape.com/api/'
|
15
|
+
@match_percent = match_percent
|
16
|
+
@api = SI::CopyscapeAPI.new(username: username, api_key: api_key, api_url: uri)
|
17
|
+
end
|
18
|
+
|
19
|
+
def error
|
20
|
+
api.response.error
|
21
|
+
end
|
22
|
+
|
23
|
+
def credit_balance
|
24
|
+
res = _request(operation: 'balance').remaining
|
25
|
+
Balance.new(res['value'].to_f, res['total'].to_i, res['today'].to_i)
|
26
|
+
end
|
27
|
+
|
28
|
+
def internet_matches! text
|
29
|
+
# cost 1 credit
|
30
|
+
return SI::CopyscapeMatches.new(
|
31
|
+
response: _text_search(text: text, operation: 'csearch'),
|
32
|
+
match_percent: match_percent
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
def private_matches! text
|
37
|
+
# cost 1 credit
|
38
|
+
return SI::CopyscapeMatches.new(
|
39
|
+
response: _text_search(text: text, operation: 'psearch'),
|
40
|
+
match_percent: match_percent
|
41
|
+
)
|
42
|
+
end
|
43
|
+
|
44
|
+
def internet_and_private_matches! text
|
45
|
+
# cost 2 credits
|
46
|
+
return SI::CopyscapeMatches.new(
|
47
|
+
response: _text_search(text: text, operation: 'cpsearch'),
|
48
|
+
match_percent: match_percent
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
def add_to_private_index text:, title: nil, id: nil, encoding: 'UTF-8'
|
53
|
+
params = { e: encoding, a: title, i: id }
|
54
|
+
res = _request(operation: 'pindexadd', params: params, postdata: text).response
|
55
|
+
PrivateIndex.new(res['words'].to_i, res['handle'], res['id'], res['title'])
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def _text_search text:, operation:, encoding: 'UTF-8', full: 3
|
61
|
+
params = { e: encoding, c: full.to_s }
|
62
|
+
_request(operation: operation, params: params, postdata: text);
|
63
|
+
end
|
64
|
+
|
65
|
+
def _request operation:, params: {}, postdata: nil
|
66
|
+
api.request(operation: operation, params: params, postdata: postdata)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module SI
|
2
|
+
class CopyscapeAPI
|
3
|
+
|
4
|
+
attr_reader :api_url, :api_key, :username, :response
|
5
|
+
|
6
|
+
def initialize username:, api_key:, api_url:
|
7
|
+
@username ||= username
|
8
|
+
@api_key ||= api_key
|
9
|
+
@api_url ||= api_url
|
10
|
+
end
|
11
|
+
|
12
|
+
def request operation:, params: {}, postdata: nil
|
13
|
+
uri_hash = { u: username, k: api_key, o: operation }
|
14
|
+
uri_string = api_url + '?' + params.merge(uri_hash).map{|k,v| "#{k}=#{v}"}.join('&')
|
15
|
+
uri = URI.parse(URI.encode(uri_string))
|
16
|
+
@response = _respond_to _call_api(uri, postdata)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def _respond_to request
|
22
|
+
if request.is_a?(Net::HTTPSuccess) || request.is_a(Net::HTTPRedirection)
|
23
|
+
SI::CopyscapeResponse.new(raw_response: request)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def _call_api uri, postdata=nil
|
28
|
+
if postdata.nil?
|
29
|
+
Net::HTTP.get_response(uri)
|
30
|
+
else
|
31
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
32
|
+
request = Net::HTTP::Post.new(uri.request_uri)
|
33
|
+
request.body = postdata
|
34
|
+
http.request(request)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module SI
|
2
|
+
class CopyscapeMatches
|
3
|
+
|
4
|
+
include Enumerable
|
5
|
+
attr_reader :collection, :query_word_count, :match_percent, :error
|
6
|
+
|
7
|
+
def initialize response:, match_percent:
|
8
|
+
@query_word_count = response.query_words.to_i
|
9
|
+
@match_percent = match_percent.to_i
|
10
|
+
@collection = _build_collection response.results
|
11
|
+
@error = response.error
|
12
|
+
end
|
13
|
+
|
14
|
+
def all_text_snippets
|
15
|
+
collection.map{|m| m.text_snippet}
|
16
|
+
end
|
17
|
+
|
18
|
+
def all_html_snippets
|
19
|
+
collection.map{|m| m.html_snippet}
|
20
|
+
end
|
21
|
+
|
22
|
+
def each(&block)
|
23
|
+
collection.each(&block)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def _build_collection results
|
29
|
+
results = _without_rejects(results).map do |match|
|
30
|
+
SI::CopyScape::Match.new(
|
31
|
+
match['wordsmatched'].to_i,
|
32
|
+
match['percentmatched'].to_i,
|
33
|
+
match['title'],
|
34
|
+
match['url'],
|
35
|
+
match['viewurl'],
|
36
|
+
match['textsnippet'],
|
37
|
+
match['htmlsnippet']
|
38
|
+
)
|
39
|
+
end
|
40
|
+
_sort results
|
41
|
+
end
|
42
|
+
|
43
|
+
def _sort results
|
44
|
+
results.sort{|a,b| b.percent_matched <=> a.percent_matched }
|
45
|
+
end
|
46
|
+
|
47
|
+
def _without_rejects results
|
48
|
+
results.reject{|r| r['percentmatched'].to_i < match_percent || !r['urlerror'].nil? }
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module SI
|
2
|
+
class CopyscapeResponse
|
3
|
+
|
4
|
+
attr_reader :raw_response, :error
|
5
|
+
|
6
|
+
def initialize raw_response:
|
7
|
+
@raw_response ||= raw_response
|
8
|
+
@error = _error_msg
|
9
|
+
end
|
10
|
+
|
11
|
+
def raw_xml
|
12
|
+
raw_response.body
|
13
|
+
end
|
14
|
+
|
15
|
+
def raw_hash
|
16
|
+
_to_hash
|
17
|
+
end
|
18
|
+
|
19
|
+
def remaining
|
20
|
+
raw_hash['remaining']
|
21
|
+
end
|
22
|
+
|
23
|
+
def response
|
24
|
+
raw_hash['response']
|
25
|
+
end
|
26
|
+
|
27
|
+
def results
|
28
|
+
result = response['result'] if response.is_a?(Hash)
|
29
|
+
result.is_a?(Array) ? result : [result].compact
|
30
|
+
end
|
31
|
+
|
32
|
+
def query_words
|
33
|
+
response['querywords'].to_i if response.is_a?(Hash)
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def _to_hash
|
39
|
+
Crack::XML.parse(raw_xml)
|
40
|
+
end
|
41
|
+
|
42
|
+
def _error_msg
|
43
|
+
response['error'] if response.is_a?(Hash)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|