plagiarism-checker 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 62f540b75e8fce69177d645e6c1b0b0c0b081c54
4
+ data.tar.gz: 7cddcf56deb0f2a66bf2af731e7f72ae4c3ce906
5
+ SHA512:
6
+ metadata.gz: 93cd5f553fa669824c0cda767f14536e0e78724e9e9d7eb4eb53a41b8e9091f01f9a8cb7d76ef91edf97ad7343cc03662ebee66920afb7a1aafcdc32c562ca52
7
+ data.tar.gz: c679d5ecf82865dd8c627e1d84bcdab6f0abca6acd7cca1910b13ccb280bd7dd03f76be04d965e3f9f246966b0cf973d7b6e0aa249f9378d152b9deab8cd4d72
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ .idea
11
+ /README.md
12
+ /examples/test.rb
data/.rubocop.yml ADDED
@@ -0,0 +1,2 @@
1
+ Metrics/LineLength:
2
+ Max: 120
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in copyleaks_api.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License(MIT)
2
+
3
+ Copyright(c) 2016 Copyleaks LTD (https://copyleaks.com)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'copyleaks_api'
5
+
6
+ require 'irb'
7
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'copyleaks_api/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "plagiarism-checker"
8
+ spec.version = CopyleaksApi::VERSION
9
+ spec.authors = ["Copyleaks ltd"]
10
+ spec.email = ["sales@copyleaks.com"]
11
+
12
+ spec.summary = %q{Detects plagiarism and checks content distribution online.}
13
+ spec.description = %q{Copyleaks detects plagiarism and checks content distribution online. Use Copyleaks to find out if textual content is original and if it has been used before. With Copyleaks cloud publishers, academics, and more can scan files (pdf, doc, docx, ocr...), URLs and free text for plagiarism check.}
14
+ spec.homepage = "https://api.copyleaks.com"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency 'webmock'
23
+ spec.add_development_dependency 'pry'
24
+ spec.add_development_dependency 'rspec'
25
+ spec.add_development_dependency "bundler", "~> 1.7"
26
+ spec.add_development_dependency "rake", "~> 10.0"
27
+
28
+ spec.add_runtime_dependency 'mimemagic'
29
+ end
data/examples/main.rb ADDED
@@ -0,0 +1,60 @@
1
+ require 'bundler/setup'
2
+ require 'copyleaks_api'
3
+
4
+ # firstly we need to create new Cloud entity
5
+ email = '<YOUR-EMAIL>'
6
+ api_key = '<YOUR-API-KEY>'
7
+ cloud = CopyleaksApi::CopyleaksCloud.new(email, api_key, :publisher)
8
+
9
+ # to check your balance just call balance
10
+
11
+ puts "Your balance is #{cloud.balance} credits"
12
+
13
+ # firstly we need to change work mode to sandbox
14
+
15
+ CopyleaksApi::Config.sandbox_mode = true
16
+
17
+ # now we can create new process by some url and custom callback
18
+ process = cloud.create_by_url('http://exmaple.com', http_callback: 'http://requestb.in/')
19
+
20
+ # Other scanning options
21
+ # Text scan:
22
+ #process = cloud.create_by_text("-Your text here-")
23
+
24
+ # Textual file scan:
25
+ #path = File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'files', 'lorem.txt')
26
+ #process = cloud.create_by_file(path)
27
+
28
+ # Ocr scan:
29
+ #path = File.join(File.dirname(__FILE__), '..', 'spec', 'fixtures', 'files', 'lorem.jpg')
30
+ #process = cloud.create_by_ocr(path, language: CopyleaksApi::Language.latin)
31
+
32
+ puts "Now process has state '#{process.status}'"
33
+
34
+ # to update process information we can just do this
35
+
36
+ # process.reload
37
+
38
+ # and it automatically call cloud.status with his id
39
+ # it need some time to process your request so we need to wait
40
+
41
+ while process.processing?
42
+ sleep(1)
43
+ process.reload
44
+ end
45
+
46
+ puts "And after sleep - #{process.status}"
47
+ # to get our results from processing we can just call correspond method
48
+
49
+ puts 'And its results are:'
50
+ puts process.result.inspect
51
+
52
+ # all results will be in array ow hashes with keys like Copyleaks API provides
53
+ # to get list of all existing processes we can call list method
54
+
55
+ processes = cloud.list
56
+ puts "Overall you have #{processes.size} processes"
57
+
58
+ # Delete finished process by PID:
59
+ #PID = '00000000-0000-0000-0000-000000000000'
60
+ #cloud.delete(PID)
@@ -0,0 +1,23 @@
1
+ unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
3
+ end
4
+ require 'copyleaks_api/version'
5
+
6
+ require 'copyleaks_api/language'
7
+ require 'copyleaks_api/errors'
8
+
9
+ require 'copyleaks_api/validators/custom_fields_validator'
10
+ require 'copyleaks_api/validators/email_validator'
11
+ require 'copyleaks_api/validators/file_validator'
12
+ require 'copyleaks_api/validators/response_validator'
13
+ require 'copyleaks_api/validators/url_validator'
14
+ require 'copyleaks_api/validators/language_validator'
15
+
16
+ require 'copyleaks_api/copyleaks_cloud'
17
+ require 'copyleaks_api/access_token'
18
+ require 'copyleaks_api/api'
19
+ require 'copyleaks_api/config'
20
+ require 'copyleaks_api/copyleaks_process'
21
+
22
+ module CopyleaksApi
23
+ end
@@ -0,0 +1,36 @@
1
+ require 'time'
2
+ require 'json'
3
+
4
+ module CopyleaksApi
5
+ class AccessToken
6
+ attr_reader :created_at, :expire_at
7
+
8
+ # constructor
9
+ def initialize(cloud, email, api_key)
10
+ @cloud = cloud
11
+ @email = email
12
+ @api_key = api_key
13
+ login
14
+ end
15
+
16
+ # predicate method to check if token is not expired
17
+ def fresh?
18
+ DateTime.now.new_offset(0) < @expire_at
19
+ end
20
+
21
+ # return token string
22
+ def token
23
+ return @token if fresh?
24
+ login
25
+ end
26
+
27
+ # get token for given email and api_key pair
28
+ def login
29
+ res = @cloud.api.post('account/login-api', { Email: @email, ApiKey: @api_key }.to_json)
30
+ @token = res['access_token']
31
+ @created_at = DateTime.parse(res['.issued'])
32
+ @expire_at = DateTime.parse(res['.expires'])
33
+ @token
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,153 @@
1
+ require 'json'
2
+ require 'net/http'
3
+ require 'mimemagic'
4
+ require 'mimemagic/overlay'
5
+ require 'openssl'
6
+
7
+ module CopyleaksApi
8
+ class Api
9
+ BASE_URL = 'https://api.copyleaks.com'.freeze
10
+ API_VERSION = 'v1'.freeze
11
+
12
+ # constructor
13
+ def initialize
14
+ uri = URI(BASE_URL)
15
+ @http = Net::HTTP.new(uri.host, uri.port)
16
+ @http.use_ssl = true
17
+ @http.verify_mode = ::OpenSSL::SSL::VERIFY_NONE
18
+ end
19
+
20
+ # make get request without any callback header
21
+ def get(path, options = {})
22
+ request = Net::HTTP::Get.new(request_uri(path))
23
+ make_request(request, options.merge(no_callbacks: true))
24
+ end
25
+
26
+ # make post request with given options
27
+ def post(path, body = nil, options = {})
28
+ request = Net::HTTP::Post.new(request_uri(path))
29
+ request.body = body
30
+ make_request(request, options)
31
+ end
32
+
33
+ # makes delete request without callbacks
34
+ def delete(path, options = {})
35
+ request = Net::HTTP::Delete.new(request_uri(path))
36
+ make_request(request, options.merge(no_callbacks: true))
37
+ end
38
+
39
+ # makes post request with file inside
40
+ def post_file(path, file_path, options = {})
41
+ request = Net::HTTP::Post.new(request_uri(path))
42
+ options[:partial_scan] ||= CopyleaksApi::Config.allow_partial_scan
43
+ boundary = "copyleaks_sdk_#{SecureRandom.hex(4)}"
44
+ request.body = file_body(file_path, boundary)
45
+ make_request(request, options.merge(boundary: boundary))
46
+ end
47
+
48
+ private
49
+
50
+ # extracts mime type of given file
51
+ def extract_mime_type(path)
52
+ mime = MimeMagic.by_magic(File.open(path))
53
+ mime ? mime.type : 'text/plain'
54
+ end
55
+
56
+ # prepares post body with file inside
57
+ def file_body(path, boundary)
58
+ [
59
+ "\r\n--#{boundary}\r\n",
60
+ "content-disposition: form-data; name=\"file\"",
61
+ "; filename=\"#{File.basename(path)}\"\r\n",
62
+ "Content-Type: #{extract_mime_type(path)}\r\n",
63
+ "Content-Transfer-Encoding: binary\r\n",
64
+ "\r\n",
65
+ File.open(path, 'rb') { |io| io.read },
66
+ "\r\n--#{boundary}--\r\n"
67
+ ].join('')
68
+ end
69
+
70
+ # gather all API path
71
+ def request_uri(path)
72
+ "/#{API_VERSION}/#{path}"
73
+ end
74
+
75
+ # gather headers, makes request and do validation
76
+ def make_request(request, options)
77
+ gather_headers(request, options)
78
+ response = @http.request(request)
79
+ Validators::ResponseValidator.validate!(response)
80
+ JSON.parse(response.body)
81
+ end
82
+
83
+ # gather all headers
84
+ def gather_headers(request, options)
85
+ [
86
+ http_callbacks_header(options),
87
+ email_callback_header(options),
88
+ authentication_header(options),
89
+ sandbox_header,
90
+ content_type_header(options),
91
+ partial_scan_header(options),
92
+ 'User-Agent' => "RUBYSDK/#{CopyleaksApi::VERSION}"
93
+ ].reduce({}, :merge).each do |header, value|
94
+ request[header] = value
95
+ end
96
+ end
97
+
98
+ # prepares header for sandbox mode
99
+ def sandbox_header
100
+ return {} unless Config.sandbox_mode
101
+ { 'copyleaks-sandbox-mode' => '' }
102
+ end
103
+
104
+ # prepares header for content type
105
+ def content_type_header(options)
106
+ { 'Content-Type' => options[:boundary] ? "multipart/form-data; boundary=\"#{options[:boundary]}\"" :
107
+ 'application/json' }
108
+ end
109
+
110
+ # prepares header for partial scan
111
+ def partial_scan_header(options)
112
+ return {} unless !options[:allow_partial_scan].nil? && options[:allow_partial_scan] || Config.allow_partial_scan
113
+ { 'copyleaks-allow-partial-scan' => '' }
114
+ end
115
+
116
+ # prepares authentication header
117
+ def authentication_header(options)
118
+ return {} unless options[:token]
119
+ { 'Authorization' => "Bearer #{options[:token]}" }
120
+ end
121
+
122
+ # prepare header for http callback
123
+ def http_callbacks_header(options)
124
+ return {} if options[:no_http_callback] || options[:no_callbacks]
125
+ value = options[:http_callback] || CopyleaksApi::Config.http_callback
126
+ return {} unless value
127
+ Validators::UrlValidator.validate!(value)
128
+ { 'copyleaks-http-callback' => value }
129
+ end
130
+
131
+ # prepares header for email callback
132
+ def email_callback_header(options)
133
+ return {} if options[:no_email_callback] || options[:no_callbacks]
134
+ value = options[:email_callback] || CopyleaksApi::Config.email_callback
135
+ return {} unless value
136
+ Validators::EmailValidator.validate!(value)
137
+ { 'copyleaks-email-callback' => value }
138
+ end
139
+
140
+ # prepares headers with custom fields
141
+ def custom_field_headers(options)
142
+ return {} if options[:no_custom_fields]
143
+ value = CopyleaksApi::Config.custom_fields.merge(options[:custom_fields] || {})
144
+ Validators::CustomFieldsValidator.validate!(value)
145
+ prepare_custom_fields(value)
146
+ end
147
+
148
+ # prepares custom fields before transformation into headers
149
+ def prepare_custom_fields(fields)
150
+ fields.each_with_object({}) { |e, o| o["copyleaks-client-custom-#{e[0]}"] = e[1] }
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,34 @@
1
+ module CopyleaksApi
2
+ class Config
3
+ DEFAULTS = {
4
+ sandbox_mode: false,
5
+ allow_partial_scan: false,
6
+ http_callback: nil,
7
+ email_callback: nil,
8
+ custom_fields: {},
9
+ }.freeze
10
+
11
+ class << self
12
+ attr_writer :sandbox_mode, :http_callback, :email_callback, :custom_fields, :allow_partial_scan
13
+
14
+ DEFAULTS.each do |attr, value|
15
+ # getters for all options
16
+ define_method(attr) do
17
+ var = instance_variable_get("@#{attr}")
18
+ return var if var
19
+ instance_variable_set("@#{attr}", value)
20
+ end
21
+ end
22
+
23
+ # provide block syntax possibility for setting options
24
+ def config
25
+ yield(self)
26
+ end
27
+
28
+ # reset all options to default
29
+ def reset
30
+ DEFAULTS.each { |attr, value| instance_variable_set("@#{attr}", value) }
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,92 @@
1
+ require 'copyleaks_api/errors'
2
+ require 'copyleaks_api/api'
3
+
4
+ module CopyleaksApi
5
+ class CopyleaksCloud
6
+ ALLOWED_ENDPOINTS = [:publisher, :academic]
7
+ attr_accessor :access_token
8
+ attr_reader :endpoint_type
9
+
10
+ # constructor
11
+ def initialize(email, api_key, type)
12
+ raise ArgumentError, "Endpoint type '#{type}' is invalid" unless ALLOWED_ENDPOINTS.include?(type.to_sym)
13
+ @access_token = AccessToken.new(self, email, api_key)
14
+ @endpoint_type = type
15
+ end
16
+
17
+ def api
18
+ @api ||= CopyleaksApi::Api.new
19
+ end
20
+
21
+ # returns account balance from endpoint
22
+ def balance
23
+ api.get(url('count-credits'), token: @access_token.token)['Amount'].to_i
24
+ end
25
+
26
+ # uses create-by-url endpoint to create process
27
+ def create_by_url(url, options = {})
28
+ response = api.post(url('create-by-url'), { 'Url' => url }.to_json, options.merge(token: @access_token.token))
29
+ CopyleaksProcess.create(self, response)
30
+ end
31
+
32
+ # uses create-by-file endpoint to create process
33
+ def create_by_file(file_path, options = {})
34
+ response = api.post_file(url('create-by-file'), file_path, options.merge(token: @access_token.token))
35
+ CopyleaksProcess.create(self, response)
36
+ end
37
+
38
+ # uses create-by-file-ocr endpoint to create process
39
+ def create_by_ocr(ocr_file_path, options = {})
40
+ response = api.post_file(url_with_language('create-by-file-ocr', options), ocr_file_path,
41
+ options.merge(token: @access_token.token))
42
+ CopyleaksProcess.create(self, response)
43
+ end
44
+
45
+ # uses create-by-text endpoint to create process
46
+ def create_by_text(text, options = {})
47
+ response = api.post(url('create-by-text'), text, options.merge(token: @access_token.token))
48
+ CopyleaksProcess.create(self, response)
49
+ end
50
+
51
+ # deletes process by given id
52
+ def delete(id)
53
+ api.delete(url(:delete, id), token: @access_token.token)
54
+ true
55
+ end
56
+
57
+ # retries all processes
58
+ def list
59
+ response = api.get(url(:list), token: @access_token.token)
60
+ response.map { |hash| CopyleaksProcess.create_from_list(self, hash) }
61
+ end
62
+
63
+ # retries status information of process with given id
64
+ def status(id, options = {})
65
+ response = api.get(url(:status, id), no_callbacks: true, token: @access_token.token)
66
+ return response if options[:raw]
67
+ CopyleaksApi::CopyleaksProcess.create_from_status(self, id, response)
68
+ end
69
+
70
+ # retries result information of process with given id
71
+ def result(id, options = {})
72
+ response = api.get(url(:result, id), no_callbacks: true, token: @access_token.token)
73
+ return response if options[:raw]
74
+ CopyleaksApi::CopyleaksProcess.create_from_result(self, id, response)
75
+ end
76
+
77
+ private
78
+
79
+ # gather url for endpoints which need language in get parameters
80
+ def url_with_language(action, options)
81
+ language = options[:language] || Language.english
82
+ Validators::LanguageValidator.validate!(language)
83
+ url("#{action}?language=#{language}")
84
+ end
85
+
86
+ # gather path for endpoints
87
+ def url(action, id = nil)
88
+ return "#{@endpoint_type}/#{action}" if id.nil?
89
+ "#{@endpoint_type}/#{id}/#{action}"
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,82 @@
1
+ require 'time'
2
+
3
+ module CopyleaksApi
4
+ class CopyleaksProcess
5
+ STATUSES = ['processing', 'ready', 'allocated', 'finished', 'error', 'deleted'].freeze
6
+
7
+ attr_accessor :status, :process_id, :status, :progress, :custom_fields, :created_at
8
+
9
+ # constructor
10
+ def initialize(options)
11
+ @cloud = options[:cloud]
12
+ [:cloud, :process_id, :custom_fields, :result, :status, :progress].each do |attr|
13
+ instance_variable_set("@#{attr}", options[attr]) if options[attr]
14
+ end
15
+
16
+ @created_at = DateTime.parse(options[:created_at]) if options[:created_at]
17
+ @status = STATUSES[options[:status_code].to_i + 1] if options[:status_code]
18
+ end
19
+
20
+ STATUSES[1..-1].each do |status|
21
+ # predicate methods for all statuses
22
+ define_method("#{status}?") do
23
+ reload if @status.nil?
24
+ @status == status
25
+ end
26
+ end
27
+
28
+ # returns true if process status means processing data on server side
29
+ def processing?
30
+ ['ready', 'allocated', 'processing'].include?(@status)
31
+ end
32
+
33
+ # return result data or retrieves from result endpoint if nothing specified
34
+ def result
35
+ @result ||= @cloud.result(process_id, raw: true)
36
+ end
37
+
38
+ # returns status information or reload if no data is specified
39
+ def status
40
+ reload if @status.nil?
41
+ @status
42
+ end
43
+
44
+ # deletes process from API
45
+ def delete
46
+ @cloud.delete(process_id)
47
+ @status = 'deleted'
48
+ end
49
+
50
+ # reload object attributes using status endpoint
51
+ def reload
52
+ response = @cloud.status(process_id, raw: true)
53
+ @status = response['Status'].downcase
54
+ @progress = response['ProgressPercents'].to_i
55
+ @result = nil
56
+ self
57
+ end
58
+
59
+ class << self
60
+ # create CopyleaksProcess based on data got from any create endpoint
61
+ def create(cloud, hash)
62
+ new(cloud: cloud, process_id: hash['ProcessId'], created_at: hash['CreationTimeUTC'])
63
+ end
64
+
65
+ # create CopyleaksProcess based on data got from status endpoint
66
+ def create_from_status(cloud, id, hash)
67
+ new(cloud: cloud, process_id: id, status: hash['Status'].downcase, progress: hash['ProgressPercents'])
68
+ end
69
+
70
+ # creates CopyleaksProcess based on data got from result endpoint
71
+ def create_from_result(cloud, id, result)
72
+ new(cloud: cloud, process_id: id, result: result)
73
+ end
74
+
75
+ # creates CopyleaksProcess based on data got from list endpoint
76
+ def create_from_list(cloud, hash)
77
+ new(cloud: cloud, process_id: hash['ProcessId'], created_at: hash['CreationTimeUTC'],
78
+ status: hash['Status'].downcase, custom_fields: hash['CustomFields'])
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,29 @@
1
+ module CopyleaksApi
2
+ BasicError = Class.new(StandardError)
3
+ BadCustomFieldError = Class.new(BasicError)
4
+ BadFileError = Class.new(BasicError)
5
+ BadEmailError = Class.new(BasicError)
6
+ BadUrlError = Class.new(BasicError)
7
+ UnknownLanguageError = Class.new(BasicError)
8
+
9
+ class BadResponseError < BasicError
10
+ attr_accessor :code
11
+
12
+ # constructor
13
+ def initialize(code, message)
14
+ @code = code.to_i
15
+ @message = message
16
+ end
17
+
18
+ def to_s
19
+ "Error code: #{code}. #{@message}"
20
+ end
21
+ end
22
+
23
+ class ManagedError < BadResponseError
24
+ # returns true if this error is internal server error
25
+ def internal_error?
26
+ code == 16
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,61 @@
1
+ module CopyleaksApi
2
+ module Language
3
+ ALLOWED = ['Afrikaans',
4
+ 'Albanian',
5
+ 'Basque',
6
+ 'Brazilian',
7
+ 'Bulgarian',
8
+ 'Byelorussian',
9
+ 'Catalan',
10
+ 'Chinese_Simplified',
11
+ 'Chinese_Traditional',
12
+ 'Croatian',
13
+ 'Czech',
14
+ 'Danish',
15
+ 'Dutch',
16
+ 'English',
17
+ 'Esperanto',
18
+ 'Estonian',
19
+ 'Finnish',
20
+ 'French',
21
+ 'Galician',
22
+ 'German',
23
+ 'Greek',
24
+ 'Hungarian',
25
+ 'Icelandic',
26
+ 'Indonesian',
27
+ 'Italian',
28
+ 'Japanese',
29
+ 'Korean',
30
+ 'Latin',
31
+ 'Latvian',
32
+ 'Lithuanian',
33
+ 'Macedonian',
34
+ 'Malay',
35
+ 'Moldavian',
36
+ 'Norwegian',
37
+ 'Polish',
38
+ 'Portuguese',
39
+ 'Romanian',
40
+ 'Russian',
41
+ 'Serbian',
42
+ 'Slovak',
43
+ 'Slovenian',
44
+ 'Spanish',
45
+ 'Swedish',
46
+ 'Tagalog',
47
+ 'Turkish',
48
+ 'Ukrainian'].freeze
49
+
50
+ ALLOWED.each_with_index do |lang, index|
51
+ method = lang.downcase
52
+
53
+ # returns appropriate language name based on method name
54
+ define_method(method) do
55
+ ALLOWED[index]
56
+ end
57
+
58
+ module_function method
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,35 @@
1
+ module CopyleaksApi
2
+ module Validators
3
+ class CustomFieldsValidator
4
+ KEY_MAX_LENGTH = 128
5
+ VALUE_MAX_LENGTH = 512
6
+ OVERALL_MAX_LENGTH = 8192
7
+
8
+ class << self
9
+ # raises appropriate error if any length is too large
10
+ def validate!(fields)
11
+ raise BadCustomFieldError.new('Key is too long') unless keys_valid?(fields)
12
+ raise BadCustomFieldError.new('Value is too long') unless values_valid?(fields)
13
+ raise BadCustomFieldError.new('Overall size is too large') unless overall_valid?(fields)
14
+ end
15
+
16
+ private
17
+
18
+ # checks custom keys for length
19
+ def keys_valid?(hash)
20
+ hash.keys.map(&:to_s).all? { |s| s.size <= KEY_MAX_LENGTH }
21
+ end
22
+
23
+ # checks values for length
24
+ def values_valid?(hash)
25
+ hash.values.map(&:to_s).all? { |s| s.size <= VALUE_MAX_LENGTH }
26
+ end
27
+
28
+ # checks overall length
29
+ def overall_valid?(hash)
30
+ hash.reduce(0) { |a, e| a + e[0].to_s.size + e[1].to_s.size } <= OVERALL_MAX_LENGTH
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,13 @@
1
+ module CopyleaksApi
2
+ module Validators
3
+ class EmailValidator
4
+ class << self
5
+ # raises error if given email is invalid
6
+ def validate!(email)
7
+ return if email =~ /^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/
8
+ raise BadEmailError.new(email), "Email #{email} is invalid"
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,56 @@
1
+ module CopyleaksApi
2
+ module Validators
3
+ class FileValidator
4
+ SUPPORTED_FILE_TYPES = [:html, :htm, :txt, :pdf, :doc, :docx, :rtf].freeze
5
+ SUPPORTED_IMAGE_TYPES = [:gif, :png, :bmp, :jpg, :jpeg].freeze
6
+ BYTES_IN_MB = 1_024_000.0
7
+
8
+ class << self
9
+ # check file for ocr for correctness
10
+ def validate_ocr!(path)
11
+ validate_file(path, SUPPORTED_IMAGE_TYPES)
12
+ end
13
+
14
+ # check text file for correctness
15
+ def validate_text_file!(path)
16
+ validate_file(path, SUPPORTED_FILE_TYPES)
17
+ end
18
+
19
+ private
20
+
21
+ # check given file for correctness to given type
22
+ def validate_file(path, types)
23
+ ext = file_extension(path)
24
+ return if types.include?(ext) && file_size(path) <= allowed_file_size(ext)
25
+ raise BadFileError, "#{path} file has unsupported extension or to large"
26
+ end
27
+
28
+ # returns good file size in MB for given type
29
+ def allowed_file_size(type)
30
+ case type.to_sym
31
+ when :html, :htm
32
+ 5
33
+ when :txt
34
+ 3
35
+ when :pdf, :doc, :docx
36
+ 25
37
+ when *SUPPORTED_IMAGE_TYPES
38
+ 25
39
+ else
40
+ 0
41
+ end
42
+ end
43
+
44
+ # extract file extension
45
+ def file_extension(path)
46
+ path.split('.').last.downcase.to_sym
47
+ end
48
+
49
+ # extract file size in MB
50
+ def file_size(path)
51
+ File.size(path) / BYTES_IN_MB
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,12 @@
1
+ module CopyleaksApi
2
+ module Validators
3
+ class LanguageValidator
4
+ class << self
5
+ # raise error if given language is not allowed
6
+ def validate!(language)
7
+ raise UnknownLanguageError, "#{language} is unknown" unless Language::ALLOWED.include?(language)
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,27 @@
1
+ require 'json'
2
+
3
+ module CopyleaksApi
4
+ module Validators
5
+ class ResponseValidator
6
+ ERROR_HEADER = 'Copyleaks-Error-Code'.freeze
7
+ GOOD_STATUS_CODE = 200
8
+
9
+ class << self
10
+ # raises error if response has APi error code or bad status code
11
+ def validate!(response)
12
+ raise ManagedError.new(response[ERROR_HEADER], extract_message(response.body)) if response[ERROR_HEADER]
13
+ raise BadResponseError.new(response.code, response.body) if response.code.to_i != GOOD_STATUS_CODE
14
+ end
15
+
16
+ private
17
+
18
+ # extract message from body
19
+ def extract_message(string)
20
+ JSON.parse(string)['Message']
21
+ rescue JSON::ParserError
22
+ string
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,12 @@
1
+ module CopyleaksApi
2
+ module Validators
3
+ class UrlValidator
4
+ class << self
5
+ # raises error if given url for callback is invalid
6
+ def validate!(url)
7
+ raise BadUrlError.new(url) unless url =~ %r(^(https?://)?([\da-z\.-]+)\.([a-z\.]{2,6})([/\w \.-]*)*/?$)
8
+ end
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,3 @@
1
+ module CopyleaksApi
2
+ VERSION = '1.0.0'.freeze
3
+ end
metadata ADDED
@@ -0,0 +1,156 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: plagiarism-checker
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Copyleaks ltd
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-06-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: webmock
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pry
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.7'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.7'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: mimemagic
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Copyleaks detects plagiarism and checks content distribution online.
98
+ Use Copyleaks to find out if textual content is original and if it has been used
99
+ before. With Copyleaks cloud publishers, academics, and more can scan files (pdf,
100
+ doc, docx, ocr...), URLs and free text for plagiarism check.
101
+ email:
102
+ - sales@copyleaks.com
103
+ executables: []
104
+ extensions: []
105
+ extra_rdoc_files: []
106
+ files:
107
+ - ".gitignore"
108
+ - ".rubocop.yml"
109
+ - Gemfile
110
+ - LICENSE.txt
111
+ - README.md
112
+ - Rakefile
113
+ - bin/console
114
+ - bin/setup
115
+ - copyleaks_api.gemspec
116
+ - examples/main.rb
117
+ - lib/copyleaks_api.rb
118
+ - lib/copyleaks_api/access_token.rb
119
+ - lib/copyleaks_api/api.rb
120
+ - lib/copyleaks_api/config.rb
121
+ - lib/copyleaks_api/copyleaks_cloud.rb
122
+ - lib/copyleaks_api/copyleaks_process.rb
123
+ - lib/copyleaks_api/errors.rb
124
+ - lib/copyleaks_api/language.rb
125
+ - lib/copyleaks_api/validators/custom_fields_validator.rb
126
+ - lib/copyleaks_api/validators/email_validator.rb
127
+ - lib/copyleaks_api/validators/file_validator.rb
128
+ - lib/copyleaks_api/validators/language_validator.rb
129
+ - lib/copyleaks_api/validators/response_validator.rb
130
+ - lib/copyleaks_api/validators/url_validator.rb
131
+ - lib/copyleaks_api/version.rb
132
+ homepage: https://api.copyleaks.com
133
+ licenses:
134
+ - MIT
135
+ metadata: {}
136
+ post_install_message:
137
+ rdoc_options: []
138
+ require_paths:
139
+ - lib
140
+ required_ruby_version: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - ">="
143
+ - !ruby/object:Gem::Version
144
+ version: '0'
145
+ required_rubygems_version: !ruby/object:Gem::Requirement
146
+ requirements:
147
+ - - ">="
148
+ - !ruby/object:Gem::Version
149
+ version: '0'
150
+ requirements: []
151
+ rubyforge_project:
152
+ rubygems_version: 2.6.4
153
+ signing_key:
154
+ specification_version: 4
155
+ summary: Detects plagiarism and checks content distribution online.
156
+ test_files: []