plagiarism-checker 1.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +8 -12
- data/Gemfile +3 -1
- data/LICENSE.txt +1 -1
- data/README.md +39 -0
- data/Rakefile +4 -2
- data/bin/console +15 -7
- data/bin/setup +8 -8
- data/lib/copyleaks/api.rb +493 -0
- data/lib/copyleaks/app.config.rb +47 -0
- data/lib/copyleaks/models/auth_token.rb +54 -0
- data/lib/copyleaks/models/delete_request_model.rb +64 -0
- data/lib/copyleaks/models/exceptions/auth_exipred_exception.rb +32 -0
- data/lib/copyleaks/models/exceptions/command_exception.rb +32 -0
- data/lib/copyleaks/models/exceptions/index.rb +32 -0
- data/lib/copyleaks/models/exceptions/rate_limit_exception.rb +32 -0
- data/lib/copyleaks/models/exceptions/under_maintenance_exception.rb +32 -0
- data/lib/copyleaks/models/exports/export_crawled_version.rb +54 -0
- data/lib/copyleaks/models/exports/export_model.rb +84 -0
- data/lib/copyleaks/models/exports/export_pdf_report.rb +54 -0
- data/lib/copyleaks/models/exports/export_results.rb +56 -0
- data/lib/copyleaks/models/exports/index.rb +32 -0
- data/lib/copyleaks/models/id_object.rb +44 -0
- data/lib/copyleaks/models/index.rb +35 -0
- data/lib/copyleaks/models/start_request_model.rb +63 -0
- data/lib/copyleaks/models/submissions/file_ocr_submission_model.rb +61 -0
- data/lib/copyleaks/models/submissions/file_submission_model.rb +57 -0
- data/lib/copyleaks/models/submissions/index.rb +33 -0
- data/lib/copyleaks/models/submissions/properties/actions.rb +33 -0
- data/lib/copyleaks/models/submissions/properties/author.rb +41 -0
- data/lib/copyleaks/models/submissions/properties/copyleaks_db.rb +44 -0
- data/lib/copyleaks/models/submissions/properties/domains_mode.rb +31 -0
- data/lib/copyleaks/models/submissions/properties/exclude.rb +59 -0
- data/lib/copyleaks/models/submissions/properties/filter.rb +67 -0
- data/lib/copyleaks/models/submissions/properties/index.rb +45 -0
- data/lib/copyleaks/models/submissions/properties/indexing.rb +41 -0
- data/lib/copyleaks/models/submissions/properties/pdf_properties.rb +55 -0
- data/lib/copyleaks/models/submissions/properties/repository.rb +41 -0
- data/lib/copyleaks/models/submissions/properties/scanning.rb +55 -0
- data/lib/copyleaks/models/submissions/properties/scanning_exclude.rb +44 -0
- data/lib/copyleaks/models/submissions/properties/scanning_repository.rb +46 -0
- data/lib/copyleaks/models/submissions/properties/sensitive_data_protection.rb +71 -0
- data/lib/copyleaks/models/submissions/properties/submission_properties.rb +136 -0
- data/lib/copyleaks/models/submissions/properties/webhooks.rb +44 -0
- data/lib/copyleaks/models/submissions/submission_model.rb +47 -0
- data/lib/copyleaks/models/submissions/url_submission_model.rb +51 -0
- data/lib/copyleaks/utils/status-code.utils.rb +38 -0
- data/lib/copyleaks/version.rb +3 -0
- data/lib/index.rb +35 -0
- data/plagiarism-checker.gemspec +29 -0
- metadata +50 -107
- data/.rubocop.yml +0 -2
- data/copyleaks_api.gemspec +0 -29
- data/examples/main.rb +0 -60
- data/lib/copyleaks_api.rb +0 -23
- data/lib/copyleaks_api/access_token.rb +0 -36
- data/lib/copyleaks_api/api.rb +0 -153
- data/lib/copyleaks_api/config.rb +0 -34
- data/lib/copyleaks_api/copyleaks_cloud.rb +0 -92
- data/lib/copyleaks_api/copyleaks_process.rb +0 -82
- data/lib/copyleaks_api/errors.rb +0 -29
- data/lib/copyleaks_api/language.rb +0 -61
- data/lib/copyleaks_api/validators/custom_fields_validator.rb +0 -35
- data/lib/copyleaks_api/validators/email_validator.rb +0 -13
- data/lib/copyleaks_api/validators/file_validator.rb +0 -56
- data/lib/copyleaks_api/validators/language_validator.rb +0 -12
- data/lib/copyleaks_api/validators/response_validator.rb +0 -27
- data/lib/copyleaks_api/validators/url_validator.rb +0 -12
- data/lib/copyleaks_api/version.rb +0 -3
data/lib/copyleaks_api/config.rb
DELETED
@@ -1,34 +0,0 @@
|
|
1
|
-
module CopyleaksApi
|
2
|
-
class Config
|
3
|
-
DEFAULTS = {
|
4
|
-
sandbox_mode: false,
|
5
|
-
allow_partial_scan: false,
|
6
|
-
http_callback: nil,
|
7
|
-
email_callback: nil,
|
8
|
-
custom_fields: {},
|
9
|
-
}.freeze
|
10
|
-
|
11
|
-
class << self
|
12
|
-
attr_writer :sandbox_mode, :http_callback, :email_callback, :custom_fields, :allow_partial_scan
|
13
|
-
|
14
|
-
DEFAULTS.each do |attr, value|
|
15
|
-
# getters for all options
|
16
|
-
define_method(attr) do
|
17
|
-
var = instance_variable_get("@#{attr}")
|
18
|
-
return var if var
|
19
|
-
instance_variable_set("@#{attr}", value)
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
# provide block syntax possibility for setting options
|
24
|
-
def config
|
25
|
-
yield(self)
|
26
|
-
end
|
27
|
-
|
28
|
-
# reset all options to default
|
29
|
-
def reset
|
30
|
-
DEFAULTS.each { |attr, value| instance_variable_set("@#{attr}", value) }
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,92 +0,0 @@
|
|
1
|
-
require 'copyleaks_api/errors'
|
2
|
-
require 'copyleaks_api/api'
|
3
|
-
|
4
|
-
module CopyleaksApi
|
5
|
-
class CopyleaksCloud
|
6
|
-
ALLOWED_ENDPOINTS = [:publisher, :academic]
|
7
|
-
attr_accessor :access_token
|
8
|
-
attr_reader :endpoint_type
|
9
|
-
|
10
|
-
# constructor
|
11
|
-
def initialize(email, api_key, type)
|
12
|
-
raise ArgumentError, "Endpoint type '#{type}' is invalid" unless ALLOWED_ENDPOINTS.include?(type.to_sym)
|
13
|
-
@access_token = AccessToken.new(self, email, api_key)
|
14
|
-
@endpoint_type = type
|
15
|
-
end
|
16
|
-
|
17
|
-
def api
|
18
|
-
@api ||= CopyleaksApi::Api.new
|
19
|
-
end
|
20
|
-
|
21
|
-
# returns account balance from endpoint
|
22
|
-
def balance
|
23
|
-
api.get(url('count-credits'), token: @access_token.token)['Amount'].to_i
|
24
|
-
end
|
25
|
-
|
26
|
-
# uses create-by-url endpoint to create process
|
27
|
-
def create_by_url(url, options = {})
|
28
|
-
response = api.post(url('create-by-url'), { 'Url' => url }.to_json, options.merge(token: @access_token.token))
|
29
|
-
CopyleaksProcess.create(self, response)
|
30
|
-
end
|
31
|
-
|
32
|
-
# uses create-by-file endpoint to create process
|
33
|
-
def create_by_file(file_path, options = {})
|
34
|
-
response = api.post_file(url('create-by-file'), file_path, options.merge(token: @access_token.token))
|
35
|
-
CopyleaksProcess.create(self, response)
|
36
|
-
end
|
37
|
-
|
38
|
-
# uses create-by-file-ocr endpoint to create process
|
39
|
-
def create_by_ocr(ocr_file_path, options = {})
|
40
|
-
response = api.post_file(url_with_language('create-by-file-ocr', options), ocr_file_path,
|
41
|
-
options.merge(token: @access_token.token))
|
42
|
-
CopyleaksProcess.create(self, response)
|
43
|
-
end
|
44
|
-
|
45
|
-
# uses create-by-text endpoint to create process
|
46
|
-
def create_by_text(text, options = {})
|
47
|
-
response = api.post(url('create-by-text'), text, options.merge(token: @access_token.token))
|
48
|
-
CopyleaksProcess.create(self, response)
|
49
|
-
end
|
50
|
-
|
51
|
-
# deletes process by given id
|
52
|
-
def delete(id)
|
53
|
-
api.delete(url(:delete, id), token: @access_token.token)
|
54
|
-
true
|
55
|
-
end
|
56
|
-
|
57
|
-
# retries all processes
|
58
|
-
def list
|
59
|
-
response = api.get(url(:list), token: @access_token.token)
|
60
|
-
response.map { |hash| CopyleaksProcess.create_from_list(self, hash) }
|
61
|
-
end
|
62
|
-
|
63
|
-
# retries status information of process with given id
|
64
|
-
def status(id, options = {})
|
65
|
-
response = api.get(url(:status, id), no_callbacks: true, token: @access_token.token)
|
66
|
-
return response if options[:raw]
|
67
|
-
CopyleaksApi::CopyleaksProcess.create_from_status(self, id, response)
|
68
|
-
end
|
69
|
-
|
70
|
-
# retries result information of process with given id
|
71
|
-
def result(id, options = {})
|
72
|
-
response = api.get(url(:result, id), no_callbacks: true, token: @access_token.token)
|
73
|
-
return response if options[:raw]
|
74
|
-
CopyleaksApi::CopyleaksProcess.create_from_result(self, id, response)
|
75
|
-
end
|
76
|
-
|
77
|
-
private
|
78
|
-
|
79
|
-
# gather url for endpoints which need language in get parameters
|
80
|
-
def url_with_language(action, options)
|
81
|
-
language = options[:language] || Language.english
|
82
|
-
Validators::LanguageValidator.validate!(language)
|
83
|
-
url("#{action}?language=#{language}")
|
84
|
-
end
|
85
|
-
|
86
|
-
# gather path for endpoints
|
87
|
-
def url(action, id = nil)
|
88
|
-
return "#{@endpoint_type}/#{action}" if id.nil?
|
89
|
-
"#{@endpoint_type}/#{id}/#{action}"
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
@@ -1,82 +0,0 @@
|
|
1
|
-
require 'time'
|
2
|
-
|
3
|
-
module CopyleaksApi
|
4
|
-
class CopyleaksProcess
|
5
|
-
STATUSES = ['processing', 'ready', 'allocated', 'finished', 'error', 'deleted'].freeze
|
6
|
-
|
7
|
-
attr_accessor :status, :process_id, :status, :progress, :custom_fields, :created_at
|
8
|
-
|
9
|
-
# constructor
|
10
|
-
def initialize(options)
|
11
|
-
@cloud = options[:cloud]
|
12
|
-
[:cloud, :process_id, :custom_fields, :result, :status, :progress].each do |attr|
|
13
|
-
instance_variable_set("@#{attr}", options[attr]) if options[attr]
|
14
|
-
end
|
15
|
-
|
16
|
-
@created_at = DateTime.parse(options[:created_at]) if options[:created_at]
|
17
|
-
@status = STATUSES[options[:status_code].to_i + 1] if options[:status_code]
|
18
|
-
end
|
19
|
-
|
20
|
-
STATUSES[1..-1].each do |status|
|
21
|
-
# predicate methods for all statuses
|
22
|
-
define_method("#{status}?") do
|
23
|
-
reload if @status.nil?
|
24
|
-
@status == status
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# returns true if process status means processing data on server side
|
29
|
-
def processing?
|
30
|
-
['ready', 'allocated', 'processing'].include?(@status)
|
31
|
-
end
|
32
|
-
|
33
|
-
# return result data or retrieves from result endpoint if nothing specified
|
34
|
-
def result
|
35
|
-
@result ||= @cloud.result(process_id, raw: true)
|
36
|
-
end
|
37
|
-
|
38
|
-
# returns status information or reload if no data is specified
|
39
|
-
def status
|
40
|
-
reload if @status.nil?
|
41
|
-
@status
|
42
|
-
end
|
43
|
-
|
44
|
-
# deletes process from API
|
45
|
-
def delete
|
46
|
-
@cloud.delete(process_id)
|
47
|
-
@status = 'deleted'
|
48
|
-
end
|
49
|
-
|
50
|
-
# reload object attributes using status endpoint
|
51
|
-
def reload
|
52
|
-
response = @cloud.status(process_id, raw: true)
|
53
|
-
@status = response['Status'].downcase
|
54
|
-
@progress = response['ProgressPercents'].to_i
|
55
|
-
@result = nil
|
56
|
-
self
|
57
|
-
end
|
58
|
-
|
59
|
-
class << self
|
60
|
-
# create CopyleaksProcess based on data got from any create endpoint
|
61
|
-
def create(cloud, hash)
|
62
|
-
new(cloud: cloud, process_id: hash['ProcessId'], created_at: hash['CreationTimeUTC'])
|
63
|
-
end
|
64
|
-
|
65
|
-
# create CopyleaksProcess based on data got from status endpoint
|
66
|
-
def create_from_status(cloud, id, hash)
|
67
|
-
new(cloud: cloud, process_id: id, status: hash['Status'].downcase, progress: hash['ProgressPercents'])
|
68
|
-
end
|
69
|
-
|
70
|
-
# creates CopyleaksProcess based on data got from result endpoint
|
71
|
-
def create_from_result(cloud, id, result)
|
72
|
-
new(cloud: cloud, process_id: id, result: result)
|
73
|
-
end
|
74
|
-
|
75
|
-
# creates CopyleaksProcess based on data got from list endpoint
|
76
|
-
def create_from_list(cloud, hash)
|
77
|
-
new(cloud: cloud, process_id: hash['ProcessId'], created_at: hash['CreationTimeUTC'],
|
78
|
-
status: hash['Status'].downcase, custom_fields: hash['CustomFields'])
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
data/lib/copyleaks_api/errors.rb
DELETED
@@ -1,29 +0,0 @@
|
|
1
|
-
module CopyleaksApi
|
2
|
-
BasicError = Class.new(StandardError)
|
3
|
-
BadCustomFieldError = Class.new(BasicError)
|
4
|
-
BadFileError = Class.new(BasicError)
|
5
|
-
BadEmailError = Class.new(BasicError)
|
6
|
-
BadUrlError = Class.new(BasicError)
|
7
|
-
UnknownLanguageError = Class.new(BasicError)
|
8
|
-
|
9
|
-
class BadResponseError < BasicError
|
10
|
-
attr_accessor :code
|
11
|
-
|
12
|
-
# constructor
|
13
|
-
def initialize(code, message)
|
14
|
-
@code = code.to_i
|
15
|
-
@message = message
|
16
|
-
end
|
17
|
-
|
18
|
-
def to_s
|
19
|
-
"Error code: #{code}. #{@message}"
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
class ManagedError < BadResponseError
|
24
|
-
# returns true if this error is internal server error
|
25
|
-
def internal_error?
|
26
|
-
code == 16
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
module CopyleaksApi
|
2
|
-
module Language
|
3
|
-
ALLOWED = ['Afrikaans',
|
4
|
-
'Albanian',
|
5
|
-
'Basque',
|
6
|
-
'Brazilian',
|
7
|
-
'Bulgarian',
|
8
|
-
'Byelorussian',
|
9
|
-
'Catalan',
|
10
|
-
'Chinese_Simplified',
|
11
|
-
'Chinese_Traditional',
|
12
|
-
'Croatian',
|
13
|
-
'Czech',
|
14
|
-
'Danish',
|
15
|
-
'Dutch',
|
16
|
-
'English',
|
17
|
-
'Esperanto',
|
18
|
-
'Estonian',
|
19
|
-
'Finnish',
|
20
|
-
'French',
|
21
|
-
'Galician',
|
22
|
-
'German',
|
23
|
-
'Greek',
|
24
|
-
'Hungarian',
|
25
|
-
'Icelandic',
|
26
|
-
'Indonesian',
|
27
|
-
'Italian',
|
28
|
-
'Japanese',
|
29
|
-
'Korean',
|
30
|
-
'Latin',
|
31
|
-
'Latvian',
|
32
|
-
'Lithuanian',
|
33
|
-
'Macedonian',
|
34
|
-
'Malay',
|
35
|
-
'Moldavian',
|
36
|
-
'Norwegian',
|
37
|
-
'Polish',
|
38
|
-
'Portuguese',
|
39
|
-
'Romanian',
|
40
|
-
'Russian',
|
41
|
-
'Serbian',
|
42
|
-
'Slovak',
|
43
|
-
'Slovenian',
|
44
|
-
'Spanish',
|
45
|
-
'Swedish',
|
46
|
-
'Tagalog',
|
47
|
-
'Turkish',
|
48
|
-
'Ukrainian'].freeze
|
49
|
-
|
50
|
-
ALLOWED.each_with_index do |lang, index|
|
51
|
-
method = lang.downcase
|
52
|
-
|
53
|
-
# returns appropriate language name based on method name
|
54
|
-
define_method(method) do
|
55
|
-
ALLOWED[index]
|
56
|
-
end
|
57
|
-
|
58
|
-
module_function method
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
module CopyleaksApi
|
2
|
-
module Validators
|
3
|
-
class CustomFieldsValidator
|
4
|
-
KEY_MAX_LENGTH = 128
|
5
|
-
VALUE_MAX_LENGTH = 512
|
6
|
-
OVERALL_MAX_LENGTH = 8192
|
7
|
-
|
8
|
-
class << self
|
9
|
-
# raises appropriate error if any length is too large
|
10
|
-
def validate!(fields)
|
11
|
-
raise BadCustomFieldError.new('Key is too long') unless keys_valid?(fields)
|
12
|
-
raise BadCustomFieldError.new('Value is too long') unless values_valid?(fields)
|
13
|
-
raise BadCustomFieldError.new('Overall size is too large') unless overall_valid?(fields)
|
14
|
-
end
|
15
|
-
|
16
|
-
private
|
17
|
-
|
18
|
-
# checks custom keys for length
|
19
|
-
def keys_valid?(hash)
|
20
|
-
hash.keys.map(&:to_s).all? { |s| s.size <= KEY_MAX_LENGTH }
|
21
|
-
end
|
22
|
-
|
23
|
-
# checks values for length
|
24
|
-
def values_valid?(hash)
|
25
|
-
hash.values.map(&:to_s).all? { |s| s.size <= VALUE_MAX_LENGTH }
|
26
|
-
end
|
27
|
-
|
28
|
-
# checks overall length
|
29
|
-
def overall_valid?(hash)
|
30
|
-
hash.reduce(0) { |a, e| a + e[0].to_s.size + e[1].to_s.size } <= OVERALL_MAX_LENGTH
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
module CopyleaksApi
|
2
|
-
module Validators
|
3
|
-
class EmailValidator
|
4
|
-
class << self
|
5
|
-
# raises error if given email is invalid
|
6
|
-
def validate!(email)
|
7
|
-
return if email =~ /^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/
|
8
|
-
raise BadEmailError.new(email), "Email #{email} is invalid"
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
module CopyleaksApi
|
2
|
-
module Validators
|
3
|
-
class FileValidator
|
4
|
-
SUPPORTED_FILE_TYPES = [:html, :htm, :txt, :pdf, :doc, :docx, :rtf].freeze
|
5
|
-
SUPPORTED_IMAGE_TYPES = [:gif, :png, :bmp, :jpg, :jpeg].freeze
|
6
|
-
BYTES_IN_MB = 1_024_000.0
|
7
|
-
|
8
|
-
class << self
|
9
|
-
# check file for ocr for correctness
|
10
|
-
def validate_ocr!(path)
|
11
|
-
validate_file(path, SUPPORTED_IMAGE_TYPES)
|
12
|
-
end
|
13
|
-
|
14
|
-
# check text file for correctness
|
15
|
-
def validate_text_file!(path)
|
16
|
-
validate_file(path, SUPPORTED_FILE_TYPES)
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
# check given file for correctness to given type
|
22
|
-
def validate_file(path, types)
|
23
|
-
ext = file_extension(path)
|
24
|
-
return if types.include?(ext) && file_size(path) <= allowed_file_size(ext)
|
25
|
-
raise BadFileError, "#{path} file has unsupported extension or to large"
|
26
|
-
end
|
27
|
-
|
28
|
-
# returns good file size in MB for given type
|
29
|
-
def allowed_file_size(type)
|
30
|
-
case type.to_sym
|
31
|
-
when :html, :htm
|
32
|
-
5
|
33
|
-
when :txt
|
34
|
-
3
|
35
|
-
when :pdf, :doc, :docx
|
36
|
-
25
|
37
|
-
when *SUPPORTED_IMAGE_TYPES
|
38
|
-
25
|
39
|
-
else
|
40
|
-
0
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
# extract file extension
|
45
|
-
def file_extension(path)
|
46
|
-
path.split('.').last.downcase.to_sym
|
47
|
-
end
|
48
|
-
|
49
|
-
# extract file size in MB
|
50
|
-
def file_size(path)
|
51
|
-
File.size(path) / BYTES_IN_MB
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,12 +0,0 @@
|
|
1
|
-
module CopyleaksApi
|
2
|
-
module Validators
|
3
|
-
class LanguageValidator
|
4
|
-
class << self
|
5
|
-
# raise error if given language is not allowed
|
6
|
-
def validate!(language)
|
7
|
-
raise UnknownLanguageError, "#{language} is unknown" unless Language::ALLOWED.include?(language)
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
@@ -1,27 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
|
3
|
-
module CopyleaksApi
|
4
|
-
module Validators
|
5
|
-
class ResponseValidator
|
6
|
-
ERROR_HEADER = 'Copyleaks-Error-Code'.freeze
|
7
|
-
GOOD_STATUS_CODE = 200
|
8
|
-
|
9
|
-
class << self
|
10
|
-
# raises error if response has APi error code or bad status code
|
11
|
-
def validate!(response)
|
12
|
-
raise ManagedError.new(response[ERROR_HEADER], extract_message(response.body)) if response[ERROR_HEADER]
|
13
|
-
raise BadResponseError.new(response.code, response.body) if response.code.to_i != GOOD_STATUS_CODE
|
14
|
-
end
|
15
|
-
|
16
|
-
private
|
17
|
-
|
18
|
-
# extract message from body
|
19
|
-
def extract_message(string)
|
20
|
-
JSON.parse(string)['Message']
|
21
|
-
rescue JSON::ParserError
|
22
|
-
string
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|