si-copyscape 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +64 -0
- data/Guardfile +42 -0
- data/README.md +112 -0
- data/lib/copyscape.rb +70 -0
- data/lib/copyscape_api.rb +39 -0
- data/lib/copyscape_matches.rb +52 -0
- data/lib/copyscape_response.rb +47 -0
- data/lib/si-copyscape.rb +8 -0
- data/lib/version.rb +5 -0
- data/si-copyscape.gemspec +28 -0
- data/test/cassettes/api_add_to_private_index.yml +42 -0
- data/test/cassettes/api_balance.yml +43 -0
- data/test/cassettes/api_error.yml +42 -0
- data/test/cassettes/api_internet_and_private_matches.yml +163 -0
- data/test/cassettes/api_internet_matches.yml +170 -0
- data/test/cassettes/api_private_matches.yml +50 -0
- data/test/test_copyscape.rb +89 -0
- data/test/test_copyscape_api.rb +15 -0
- data/test/test_copyscape_matches.rb +71 -0
- data/test/test_copyscape_response.rb +47 -0
- data/test/test_helper.rb +16 -0
- metadata +163 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 6aa16fb6129dc878e9954870974c18f3e1e6f6ee
|
4
|
+
data.tar.gz: 94caa08afc29b45beeabb904e83ad3b3b9261e5e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: d9aa789ecab6eba516c76ca4da9ca91eb5d85a05eefb1611513aab46ecdee978d5d94054b82b27b845e5dd1f7c53c626bc673e4a45a3b460ce920a4ef72a102c
|
7
|
+
data.tar.gz: 49abd6235ea1b75ff648a4c82de9b6c29fe4513ef56e461a4052dca808be675e8cc8d12cb3c9a8cadecaeb021f0d87ebe2e676f08d90e2dc9a9cd4ebf62d4213
|
data/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
*.gem
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
si-copyscape (0.1.0)
|
5
|
+
crack
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
addressable (2.3.8)
|
11
|
+
coderay (1.1.0)
|
12
|
+
crack (0.4.2)
|
13
|
+
safe_yaml (~> 1.0.0)
|
14
|
+
ffi (1.9.10)
|
15
|
+
formatador (0.2.5)
|
16
|
+
guard (2.13.0)
|
17
|
+
formatador (>= 0.2.4)
|
18
|
+
listen (>= 2.7, <= 4.0)
|
19
|
+
lumberjack (~> 1.0)
|
20
|
+
nenv (~> 0.1)
|
21
|
+
notiffany (~> 0.0)
|
22
|
+
pry (>= 0.9.12)
|
23
|
+
shellany (~> 0.0)
|
24
|
+
thor (>= 0.18.1)
|
25
|
+
guard-compat (1.2.1)
|
26
|
+
guard-minitest (2.4.4)
|
27
|
+
guard-compat (~> 1.2)
|
28
|
+
minitest (>= 3.0)
|
29
|
+
listen (3.0.3)
|
30
|
+
rb-fsevent (>= 0.9.3)
|
31
|
+
rb-inotify (>= 0.9)
|
32
|
+
lumberjack (1.0.9)
|
33
|
+
method_source (0.8.2)
|
34
|
+
minitest (5.8.0)
|
35
|
+
nenv (0.2.0)
|
36
|
+
notiffany (0.0.7)
|
37
|
+
nenv (~> 0.1)
|
38
|
+
shellany (~> 0.0)
|
39
|
+
pry (0.10.1)
|
40
|
+
coderay (~> 1.1.0)
|
41
|
+
method_source (~> 0.8.1)
|
42
|
+
slop (~> 3.4)
|
43
|
+
rb-fsevent (0.9.5)
|
44
|
+
rb-inotify (0.9.5)
|
45
|
+
ffi (>= 0.5.0)
|
46
|
+
safe_yaml (1.0.4)
|
47
|
+
shellany (0.0.1)
|
48
|
+
slop (3.6.0)
|
49
|
+
thor (0.19.1)
|
50
|
+
vcr (2.9.3)
|
51
|
+
webmock (1.21.0)
|
52
|
+
addressable (>= 2.3.6)
|
53
|
+
crack (>= 0.3.2)
|
54
|
+
|
55
|
+
PLATFORMS
|
56
|
+
ruby
|
57
|
+
|
58
|
+
DEPENDENCIES
|
59
|
+
guard
|
60
|
+
guard-minitest
|
61
|
+
minitest
|
62
|
+
si-copyscape!
|
63
|
+
vcr
|
64
|
+
webmock
|
data/Guardfile
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
# A sample Guardfile
|
2
|
+
# More info at https://github.com/guard/guard#readme
|
3
|
+
|
4
|
+
## Uncomment and set this to only include directories you want to watch
|
5
|
+
# directories %w(app lib config test spec features) \
|
6
|
+
# .select{|d| Dir.exists?(d) ? d : UI.warning("Directory #{d} does not exist")}
|
7
|
+
|
8
|
+
## Note: if you are using the `directories` clause above and you are not
|
9
|
+
## watching the project directory ('.'), then you will want to move
|
10
|
+
## the Guardfile to a watched dir and symlink it back, e.g.
|
11
|
+
#
|
12
|
+
# $ mkdir config
|
13
|
+
# $ mv Guardfile config/
|
14
|
+
# $ ln -s config/Guardfile .
|
15
|
+
#
|
16
|
+
# and, you'll have to watch "config/Guardfile" instead of "Guardfile"
|
17
|
+
|
18
|
+
guard :minitest do
|
19
|
+
# with Minitest::Unit
|
20
|
+
watch(%r{^test/(.*)\/?test_(.*)\.rb$})
|
21
|
+
watch(%r{^lib/(.*/)?([^/]+)\.rb$}) { |m| "test/#{m[1]}test_#{m[2]}.rb" }
|
22
|
+
watch(%r{^test/test_helper\.rb$}) { 'test' }
|
23
|
+
|
24
|
+
# with Minitest::Spec
|
25
|
+
# watch(%r{^spec/(.*)_spec\.rb$})
|
26
|
+
# watch(%r{^lib/(.+)\.rb$}) { |m| "spec/#{m[1]}_spec.rb" }
|
27
|
+
# watch(%r{^spec/spec_helper\.rb$}) { 'spec' }
|
28
|
+
|
29
|
+
# Rails 4
|
30
|
+
# watch(%r{^app/(.+)\.rb$}) { |m| "test/#{m[1]}_test.rb" }
|
31
|
+
# watch(%r{^app/controllers/application_controller\.rb$}) { 'test/controllers' }
|
32
|
+
# watch(%r{^app/controllers/(.+)_controller\.rb$}) { |m| "test/integration/#{m[1]}_test.rb" }
|
33
|
+
# watch(%r{^app/views/(.+)_mailer/.+}) { |m| "test/mailers/#{m[1]}_mailer_test.rb" }
|
34
|
+
# watch(%r{^lib/(.+)\.rb$}) { |m| "test/lib/#{m[1]}_test.rb" }
|
35
|
+
# watch(%r{^test/.+_test\.rb$})
|
36
|
+
# watch(%r{^test/test_helper\.rb$}) { 'test' }
|
37
|
+
|
38
|
+
# Rails < 4
|
39
|
+
# watch(%r{^app/controllers/(.*)\.rb$}) { |m| "test/functional/#{m[1]}_test.rb" }
|
40
|
+
# watch(%r{^app/helpers/(.*)\.rb$}) { |m| "test/helpers/#{m[1]}_test.rb" }
|
41
|
+
# watch(%r{^app/models/(.*)\.rb$}) { |m| "test/unit/#{m[1]}_test.rb" }
|
42
|
+
end
|
data/README.md
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
# SI::CopyScape Gem
|
2
|
+
A gem to provide the communication layer with the Copyscape.com Premium API and your application.
|
3
|
+
|
4
|
+
Wiki: (requires login) http://wiki.searchinfluence.com/index.php?title=CopyScape
|
5
|
+
|
6
|
+
API Documentation: (requires login) http://copyscape.com/apiconfigure.php
|
7
|
+
|
8
|
+
## Usage:
|
9
|
+
Set the following environment variables (or pass the values in during initialization):
|
10
|
+
- ENV['COPYSCAPE_USERNAME']
|
11
|
+
- ENV['COPYSCAPE_API_KEY']
|
12
|
+
|
13
|
+
Find the most recent version of this gem at http://gems.searchinfluence.com/gems/si-copyscape and then specify the version number when adding this gem to your application's Gemfile.
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
# Add the following to your Gemfile
|
17
|
+
source 'http://user:pass@gems.searchinfluence.com' do
|
18
|
+
gem 'si-copyscape', '0.0.0'
|
19
|
+
end
|
20
|
+
```
|
21
|
+
|
22
|
+
Instantiate the copyscape object with the following optional paramaters
|
23
|
+
```ruby
|
24
|
+
copyscape = SI::CopyScape.new(
|
25
|
+
username: "foo", # not required if ENV['COPYSCAPE_USERNAME'] is set
|
26
|
+
api_key: "bar", # not required if ENV['COPYSCAPE_API_KEY'] is set
|
27
|
+
uri: "http://foo", # defaults to http://www.copyscape.com/api/
|
28
|
+
match_percent: 10 # defaults to 5
|
29
|
+
)
|
30
|
+
```
|
31
|
+
|
32
|
+
Get Remaining Credit Information
|
33
|
+
```ruby
|
34
|
+
copyscape.credit_balance
|
35
|
+
#=><struct SI::CopyScape::Balance value=112.62, total=2252, today=2252>
|
36
|
+
```
|
37
|
+
|
38
|
+
Perform text searches for copy matches
|
39
|
+
```ruby
|
40
|
+
text = "A national website promotion company, Search Influence routinely delivers a 10:1 return on investment, or better, for our customers."
|
41
|
+
|
42
|
+
# Performs a public internet search (CREDIT COST: 1)
|
43
|
+
copyscape.internet_matches! text
|
44
|
+
|
45
|
+
# Performs a private index search (CREDIT COST: 1)
|
46
|
+
copyscape.private_matches! text
|
47
|
+
|
48
|
+
# Performs a public internet & private index search (CREDIT COST: 2)
|
49
|
+
copyscape.internet_and_private_matches! text
|
50
|
+
|
51
|
+
# All these methods return an array of data structs ex:
|
52
|
+
#=>
|
53
|
+
# [
|
54
|
+
# <struct SI::CopyScape::Match
|
55
|
+
# words_matched = 20,
|
56
|
+
# percent_matched = 100,
|
57
|
+
# title = "Search Influence | Website Promotion Company",
|
58
|
+
# url = "http://www.searchinfluence.com/",
|
59
|
+
# copyscape_url = "http://view.copyscape.com/compare/wpbdhatumu/1",
|
60
|
+
# text_snippet = "... Trusted, Scalable Search, Social and Online Advertising. A national website promotion company, Search Influence routinely delivers a 10:1 return on investment, or better, for our customers.",
|
61
|
+
# html_snippet = "<font color=\"#777777\">... Trusted, Scalable Search, Social and Online Advertising. </font><font color=\"#000000\">A national website promotion company, Search Influence routinely delivers a 10:1 return on investment, or better, for our customers.</font>"
|
62
|
+
# >
|
63
|
+
# ]
|
64
|
+
```
|
65
|
+
|
66
|
+
Add text to our private index on Copyscape.com
|
67
|
+
```ruby
|
68
|
+
copyscape.add_to_private_index(
|
69
|
+
text: 'Text to add to index',
|
70
|
+
title: 'Title', # not required
|
71
|
+
id: 420 # not required
|
72
|
+
)
|
73
|
+
#=><struct SI::CopyScape::PrivateIndex words=5, handle="SIA_2_E00JOQ0A2W_T1Q2J78LA1", id="420", title="Title">
|
74
|
+
```
|
75
|
+
|
76
|
+
If there is an error, you can get a string describing the error (returns nil if there is no error)
|
77
|
+
```ruby
|
78
|
+
copyscape.internet_matches! "test"
|
79
|
+
copyscape.error
|
80
|
+
#=>"At least 15 words are required to perform a search"
|
81
|
+
```
|
82
|
+
|
83
|
+
The error method is also available on the SI::CopyscapeMatches collection returned after a search
|
84
|
+
```ruby
|
85
|
+
matches = copyscape.internet_matches! "test"
|
86
|
+
matches.error
|
87
|
+
#=>"At least 15 words are required to perform a search"
|
88
|
+
```
|
89
|
+
|
90
|
+
## Flog Score
|
91
|
+
```
|
92
|
+
177.7: flog total
|
93
|
+
5.2: flog/method average
|
94
|
+
|
95
|
+
35.0: SI::CopyscapeMatches total
|
96
|
+
16.5: SI::CopyscapeMatches#_build_collection lib/copyscape_matches.rb:28
|
97
|
+
11.0: SI::CopyscapeMatches#_without_rejects lib/copyscape_matches.rb:47
|
98
|
+
7.5: SI::CopyscapeMatches#initialize lib/copyscape_matches.rb:7
|
99
|
+
|
100
|
+
31.6: SI::CopyScape total
|
101
|
+
11.0: SI::CopyScape#credit_balance lib/copyscape.rb:23
|
102
|
+
9.6: SI::CopyScape#add_to_private_index lib/copyscape.rb:52
|
103
|
+
6.0: SI::CopyScape#initialize lib/copyscape.rb:11
|
104
|
+
5.0: SI::CopyScape#none
|
105
|
+
|
106
|
+
25.8: SI::CopyscapeAPI total
|
107
|
+
14.8: SI::CopyscapeAPI#request lib/copyscape_api.rb:12
|
108
|
+
11.0: SI::CopyscapeAPI#_call_api lib/copyscape_api.rb:27
|
109
|
+
|
110
|
+
7.1: SI::CopyscapeResponse total
|
111
|
+
7.1: SI::CopyscapeResponse#results lib/copyscape_response.rb:27
|
112
|
+
```
|
data/lib/copyscape.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
module SI
|
2
|
+
class CopyScape
|
3
|
+
|
4
|
+
# Data Structures used for responses to the public interface
|
5
|
+
Match = Struct.new(:words_matched, :percent_matched, :title, :url, :copyscape_url, :text_snippet, :html_snippet)
|
6
|
+
Balance = Struct.new(:value, :total, :today)
|
7
|
+
PrivateIndex = Struct.new(:words, :handle, :id, :title)
|
8
|
+
|
9
|
+
attr_reader :api, :match_percent
|
10
|
+
|
11
|
+
def initialize username: nil, api_key: nil, uri: nil, match_percent: 5
|
12
|
+
username ||= ENV['COPYSCAPE_USERNAME']
|
13
|
+
api_key ||= ENV['COPYSCAPE_API_KEY']
|
14
|
+
uri ||= 'http://www.copyscape.com/api/'
|
15
|
+
@match_percent = match_percent
|
16
|
+
@api = SI::CopyscapeAPI.new(username: username, api_key: api_key, api_url: uri)
|
17
|
+
end
|
18
|
+
|
19
|
+
def error
|
20
|
+
api.response.error
|
21
|
+
end
|
22
|
+
|
23
|
+
def credit_balance
|
24
|
+
res = _request(operation: 'balance').remaining
|
25
|
+
Balance.new(res['value'].to_f, res['total'].to_i, res['today'].to_i)
|
26
|
+
end
|
27
|
+
|
28
|
+
def internet_matches! text
|
29
|
+
# cost 1 credit
|
30
|
+
return SI::CopyscapeMatches.new(
|
31
|
+
response: _text_search(text: text, operation: 'csearch'),
|
32
|
+
match_percent: match_percent
|
33
|
+
)
|
34
|
+
end
|
35
|
+
|
36
|
+
def private_matches! text
|
37
|
+
# cost 1 credit
|
38
|
+
return SI::CopyscapeMatches.new(
|
39
|
+
response: _text_search(text: text, operation: 'psearch'),
|
40
|
+
match_percent: match_percent
|
41
|
+
)
|
42
|
+
end
|
43
|
+
|
44
|
+
def internet_and_private_matches! text
|
45
|
+
# cost 2 credits
|
46
|
+
return SI::CopyscapeMatches.new(
|
47
|
+
response: _text_search(text: text, operation: 'cpsearch'),
|
48
|
+
match_percent: match_percent
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
def add_to_private_index text:, title: nil, id: nil, encoding: 'UTF-8'
|
53
|
+
params = { e: encoding, a: title, i: id }
|
54
|
+
res = _request(operation: 'pindexadd', params: params, postdata: text).response
|
55
|
+
PrivateIndex.new(res['words'].to_i, res['handle'], res['id'], res['title'])
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def _text_search text:, operation:, encoding: 'UTF-8', full: 3
|
61
|
+
params = { e: encoding, c: full.to_s }
|
62
|
+
_request(operation: operation, params: params, postdata: text);
|
63
|
+
end
|
64
|
+
|
65
|
+
def _request operation:, params: {}, postdata: nil
|
66
|
+
api.request(operation: operation, params: params, postdata: postdata)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module SI
|
2
|
+
class CopyscapeAPI
|
3
|
+
|
4
|
+
attr_reader :api_url, :api_key, :username, :response
|
5
|
+
|
6
|
+
def initialize username:, api_key:, api_url:
|
7
|
+
@username ||= username
|
8
|
+
@api_key ||= api_key
|
9
|
+
@api_url ||= api_url
|
10
|
+
end
|
11
|
+
|
12
|
+
def request operation:, params: {}, postdata: nil
|
13
|
+
uri_hash = { u: username, k: api_key, o: operation }
|
14
|
+
uri_string = api_url + '?' + params.merge(uri_hash).map{|k,v| "#{k}=#{v}"}.join('&')
|
15
|
+
uri = URI.parse(URI.encode(uri_string))
|
16
|
+
@response = _respond_to _call_api(uri, postdata)
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def _respond_to request
|
22
|
+
if request.is_a?(Net::HTTPSuccess) || request.is_a(Net::HTTPRedirection)
|
23
|
+
SI::CopyscapeResponse.new(raw_response: request)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def _call_api uri, postdata=nil
|
28
|
+
if postdata.nil?
|
29
|
+
Net::HTTP.get_response(uri)
|
30
|
+
else
|
31
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
32
|
+
request = Net::HTTP::Post.new(uri.request_uri)
|
33
|
+
request.body = postdata
|
34
|
+
http.request(request)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module SI
|
2
|
+
class CopyscapeMatches
|
3
|
+
|
4
|
+
include Enumerable
|
5
|
+
attr_reader :collection, :query_word_count, :match_percent, :error
|
6
|
+
|
7
|
+
def initialize response:, match_percent:
|
8
|
+
@query_word_count = response.query_words.to_i
|
9
|
+
@match_percent = match_percent.to_i
|
10
|
+
@collection = _build_collection response.results
|
11
|
+
@error = response.error
|
12
|
+
end
|
13
|
+
|
14
|
+
def all_text_snippets
|
15
|
+
collection.map{|m| m.text_snippet}
|
16
|
+
end
|
17
|
+
|
18
|
+
def all_html_snippets
|
19
|
+
collection.map{|m| m.html_snippet}
|
20
|
+
end
|
21
|
+
|
22
|
+
def each(&block)
|
23
|
+
collection.each(&block)
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def _build_collection results
|
29
|
+
results = _without_rejects(results).map do |match|
|
30
|
+
SI::CopyScape::Match.new(
|
31
|
+
match['wordsmatched'].to_i,
|
32
|
+
match['percentmatched'].to_i,
|
33
|
+
match['title'],
|
34
|
+
match['url'],
|
35
|
+
match['viewurl'],
|
36
|
+
match['textsnippet'],
|
37
|
+
match['htmlsnippet']
|
38
|
+
)
|
39
|
+
end
|
40
|
+
_sort results
|
41
|
+
end
|
42
|
+
|
43
|
+
def _sort results
|
44
|
+
results.sort{|a,b| b.percent_matched <=> a.percent_matched }
|
45
|
+
end
|
46
|
+
|
47
|
+
def _without_rejects results
|
48
|
+
results.reject{|r| r['percentmatched'].to_i < match_percent || !r['urlerror'].nil? }
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module SI
|
2
|
+
class CopyscapeResponse
|
3
|
+
|
4
|
+
attr_reader :raw_response, :error
|
5
|
+
|
6
|
+
def initialize raw_response:
|
7
|
+
@raw_response ||= raw_response
|
8
|
+
@error = _error_msg
|
9
|
+
end
|
10
|
+
|
11
|
+
def raw_xml
|
12
|
+
raw_response.body
|
13
|
+
end
|
14
|
+
|
15
|
+
def raw_hash
|
16
|
+
_to_hash
|
17
|
+
end
|
18
|
+
|
19
|
+
def remaining
|
20
|
+
raw_hash['remaining']
|
21
|
+
end
|
22
|
+
|
23
|
+
def response
|
24
|
+
raw_hash['response']
|
25
|
+
end
|
26
|
+
|
27
|
+
def results
|
28
|
+
result = response['result'] if response.is_a?(Hash)
|
29
|
+
result.is_a?(Array) ? result : [result].compact
|
30
|
+
end
|
31
|
+
|
32
|
+
def query_words
|
33
|
+
response['querywords'].to_i if response.is_a?(Hash)
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def _to_hash
|
39
|
+
Crack::XML.parse(raw_xml)
|
40
|
+
end
|
41
|
+
|
42
|
+
def _error_msg
|
43
|
+
response['error'] if response.is_a?(Hash)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|