plagiarism-checker 1.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +8 -12
  3. data/Gemfile +3 -1
  4. data/LICENSE.txt +1 -1
  5. data/README.md +39 -0
  6. data/Rakefile +4 -2
  7. data/bin/console +15 -7
  8. data/bin/setup +8 -8
  9. data/lib/copyleaks/api.rb +493 -0
  10. data/lib/copyleaks/app.config.rb +47 -0
  11. data/lib/copyleaks/models/auth_token.rb +54 -0
  12. data/lib/copyleaks/models/delete_request_model.rb +64 -0
  13. data/lib/copyleaks/models/exceptions/auth_exipred_exception.rb +32 -0
  14. data/lib/copyleaks/models/exceptions/command_exception.rb +32 -0
  15. data/lib/copyleaks/models/exceptions/index.rb +32 -0
  16. data/lib/copyleaks/models/exceptions/rate_limit_exception.rb +32 -0
  17. data/lib/copyleaks/models/exceptions/under_maintenance_exception.rb +32 -0
  18. data/lib/copyleaks/models/exports/export_crawled_version.rb +54 -0
  19. data/lib/copyleaks/models/exports/export_model.rb +84 -0
  20. data/lib/copyleaks/models/exports/export_pdf_report.rb +54 -0
  21. data/lib/copyleaks/models/exports/export_results.rb +56 -0
  22. data/lib/copyleaks/models/exports/index.rb +32 -0
  23. data/lib/copyleaks/models/id_object.rb +44 -0
  24. data/lib/copyleaks/models/index.rb +35 -0
  25. data/lib/copyleaks/models/start_request_model.rb +63 -0
  26. data/lib/copyleaks/models/submissions/file_ocr_submission_model.rb +61 -0
  27. data/lib/copyleaks/models/submissions/file_submission_model.rb +57 -0
  28. data/lib/copyleaks/models/submissions/index.rb +33 -0
  29. data/lib/copyleaks/models/submissions/properties/actions.rb +33 -0
  30. data/lib/copyleaks/models/submissions/properties/author.rb +41 -0
  31. data/lib/copyleaks/models/submissions/properties/copyleaks_db.rb +44 -0
  32. data/lib/copyleaks/models/submissions/properties/domains_mode.rb +31 -0
  33. data/lib/copyleaks/models/submissions/properties/exclude.rb +59 -0
  34. data/lib/copyleaks/models/submissions/properties/filter.rb +67 -0
  35. data/lib/copyleaks/models/submissions/properties/index.rb +45 -0
  36. data/lib/copyleaks/models/submissions/properties/indexing.rb +41 -0
  37. data/lib/copyleaks/models/submissions/properties/pdf_properties.rb +55 -0
  38. data/lib/copyleaks/models/submissions/properties/repository.rb +41 -0
  39. data/lib/copyleaks/models/submissions/properties/scanning.rb +55 -0
  40. data/lib/copyleaks/models/submissions/properties/scanning_exclude.rb +44 -0
  41. data/lib/copyleaks/models/submissions/properties/scanning_repository.rb +46 -0
  42. data/lib/copyleaks/models/submissions/properties/sensitive_data_protection.rb +71 -0
  43. data/lib/copyleaks/models/submissions/properties/submission_properties.rb +136 -0
  44. data/lib/copyleaks/models/submissions/properties/webhooks.rb +44 -0
  45. data/lib/copyleaks/models/submissions/submission_model.rb +47 -0
  46. data/lib/copyleaks/models/submissions/url_submission_model.rb +51 -0
  47. data/lib/copyleaks/utils/status-code.utils.rb +38 -0
  48. data/lib/copyleaks/version.rb +3 -0
  49. data/lib/index.rb +35 -0
  50. data/plagiarism-checker.gemspec +29 -0
  51. metadata +50 -107
  52. data/.rubocop.yml +0 -2
  53. data/copyleaks_api.gemspec +0 -29
  54. data/examples/main.rb +0 -60
  55. data/lib/copyleaks_api.rb +0 -23
  56. data/lib/copyleaks_api/access_token.rb +0 -36
  57. data/lib/copyleaks_api/api.rb +0 -153
  58. data/lib/copyleaks_api/config.rb +0 -34
  59. data/lib/copyleaks_api/copyleaks_cloud.rb +0 -92
  60. data/lib/copyleaks_api/copyleaks_process.rb +0 -82
  61. data/lib/copyleaks_api/errors.rb +0 -29
  62. data/lib/copyleaks_api/language.rb +0 -61
  63. data/lib/copyleaks_api/validators/custom_fields_validator.rb +0 -35
  64. data/lib/copyleaks_api/validators/email_validator.rb +0 -13
  65. data/lib/copyleaks_api/validators/file_validator.rb +0 -56
  66. data/lib/copyleaks_api/validators/language_validator.rb +0 -12
  67. data/lib/copyleaks_api/validators/response_validator.rb +0 -27
  68. data/lib/copyleaks_api/validators/url_validator.rb +0 -12
  69. data/lib/copyleaks_api/version.rb +0 -3
@@ -1,34 +0,0 @@
1
- module CopyleaksApi
2
- class Config
3
- DEFAULTS = {
4
- sandbox_mode: false,
5
- allow_partial_scan: false,
6
- http_callback: nil,
7
- email_callback: nil,
8
- custom_fields: {},
9
- }.freeze
10
-
11
- class << self
12
- attr_writer :sandbox_mode, :http_callback, :email_callback, :custom_fields, :allow_partial_scan
13
-
14
- DEFAULTS.each do |attr, value|
15
- # getters for all options
16
- define_method(attr) do
17
- var = instance_variable_get("@#{attr}")
18
- return var if var
19
- instance_variable_set("@#{attr}", value)
20
- end
21
- end
22
-
23
- # provide block syntax possibility for setting options
24
- def config
25
- yield(self)
26
- end
27
-
28
- # reset all options to default
29
- def reset
30
- DEFAULTS.each { |attr, value| instance_variable_set("@#{attr}", value) }
31
- end
32
- end
33
- end
34
- end
@@ -1,92 +0,0 @@
1
- require 'copyleaks_api/errors'
2
- require 'copyleaks_api/api'
3
-
4
- module CopyleaksApi
5
- class CopyleaksCloud
6
- ALLOWED_ENDPOINTS = [:publisher, :academic]
7
- attr_accessor :access_token
8
- attr_reader :endpoint_type
9
-
10
- # constructor
11
- def initialize(email, api_key, type)
12
- raise ArgumentError, "Endpoint type '#{type}' is invalid" unless ALLOWED_ENDPOINTS.include?(type.to_sym)
13
- @access_token = AccessToken.new(self, email, api_key)
14
- @endpoint_type = type
15
- end
16
-
17
- def api
18
- @api ||= CopyleaksApi::Api.new
19
- end
20
-
21
- # returns account balance from endpoint
22
- def balance
23
- api.get(url('count-credits'), token: @access_token.token)['Amount'].to_i
24
- end
25
-
26
- # uses create-by-url endpoint to create process
27
- def create_by_url(url, options = {})
28
- response = api.post(url('create-by-url'), { 'Url' => url }.to_json, options.merge(token: @access_token.token))
29
- CopyleaksProcess.create(self, response)
30
- end
31
-
32
- # uses create-by-file endpoint to create process
33
- def create_by_file(file_path, options = {})
34
- response = api.post_file(url('create-by-file'), file_path, options.merge(token: @access_token.token))
35
- CopyleaksProcess.create(self, response)
36
- end
37
-
38
- # uses create-by-file-ocr endpoint to create process
39
- def create_by_ocr(ocr_file_path, options = {})
40
- response = api.post_file(url_with_language('create-by-file-ocr', options), ocr_file_path,
41
- options.merge(token: @access_token.token))
42
- CopyleaksProcess.create(self, response)
43
- end
44
-
45
- # uses create-by-text endpoint to create process
46
- def create_by_text(text, options = {})
47
- response = api.post(url('create-by-text'), text, options.merge(token: @access_token.token))
48
- CopyleaksProcess.create(self, response)
49
- end
50
-
51
- # deletes process by given id
52
- def delete(id)
53
- api.delete(url(:delete, id), token: @access_token.token)
54
- true
55
- end
56
-
57
- # retries all processes
58
- def list
59
- response = api.get(url(:list), token: @access_token.token)
60
- response.map { |hash| CopyleaksProcess.create_from_list(self, hash) }
61
- end
62
-
63
- # retries status information of process with given id
64
- def status(id, options = {})
65
- response = api.get(url(:status, id), no_callbacks: true, token: @access_token.token)
66
- return response if options[:raw]
67
- CopyleaksApi::CopyleaksProcess.create_from_status(self, id, response)
68
- end
69
-
70
- # retries result information of process with given id
71
- def result(id, options = {})
72
- response = api.get(url(:result, id), no_callbacks: true, token: @access_token.token)
73
- return response if options[:raw]
74
- CopyleaksApi::CopyleaksProcess.create_from_result(self, id, response)
75
- end
76
-
77
- private
78
-
79
- # gather url for endpoints which need language in get parameters
80
- def url_with_language(action, options)
81
- language = options[:language] || Language.english
82
- Validators::LanguageValidator.validate!(language)
83
- url("#{action}?language=#{language}")
84
- end
85
-
86
- # gather path for endpoints
87
- def url(action, id = nil)
88
- return "#{@endpoint_type}/#{action}" if id.nil?
89
- "#{@endpoint_type}/#{id}/#{action}"
90
- end
91
- end
92
- end
@@ -1,82 +0,0 @@
1
- require 'time'
2
-
3
- module CopyleaksApi
4
- class CopyleaksProcess
5
- STATUSES = ['processing', 'ready', 'allocated', 'finished', 'error', 'deleted'].freeze
6
-
7
- attr_accessor :status, :process_id, :status, :progress, :custom_fields, :created_at
8
-
9
- # constructor
10
- def initialize(options)
11
- @cloud = options[:cloud]
12
- [:cloud, :process_id, :custom_fields, :result, :status, :progress].each do |attr|
13
- instance_variable_set("@#{attr}", options[attr]) if options[attr]
14
- end
15
-
16
- @created_at = DateTime.parse(options[:created_at]) if options[:created_at]
17
- @status = STATUSES[options[:status_code].to_i + 1] if options[:status_code]
18
- end
19
-
20
- STATUSES[1..-1].each do |status|
21
- # predicate methods for all statuses
22
- define_method("#{status}?") do
23
- reload if @status.nil?
24
- @status == status
25
- end
26
- end
27
-
28
- # returns true if process status means processing data on server side
29
- def processing?
30
- ['ready', 'allocated', 'processing'].include?(@status)
31
- end
32
-
33
- # return result data or retrieves from result endpoint if nothing specified
34
- def result
35
- @result ||= @cloud.result(process_id, raw: true)
36
- end
37
-
38
- # returns status information or reload if no data is specified
39
- def status
40
- reload if @status.nil?
41
- @status
42
- end
43
-
44
- # deletes process from API
45
- def delete
46
- @cloud.delete(process_id)
47
- @status = 'deleted'
48
- end
49
-
50
- # reload object attributes using status endpoint
51
- def reload
52
- response = @cloud.status(process_id, raw: true)
53
- @status = response['Status'].downcase
54
- @progress = response['ProgressPercents'].to_i
55
- @result = nil
56
- self
57
- end
58
-
59
- class << self
60
- # create CopyleaksProcess based on data got from any create endpoint
61
- def create(cloud, hash)
62
- new(cloud: cloud, process_id: hash['ProcessId'], created_at: hash['CreationTimeUTC'])
63
- end
64
-
65
- # create CopyleaksProcess based on data got from status endpoint
66
- def create_from_status(cloud, id, hash)
67
- new(cloud: cloud, process_id: id, status: hash['Status'].downcase, progress: hash['ProgressPercents'])
68
- end
69
-
70
- # creates CopyleaksProcess based on data got from result endpoint
71
- def create_from_result(cloud, id, result)
72
- new(cloud: cloud, process_id: id, result: result)
73
- end
74
-
75
- # creates CopyleaksProcess based on data got from list endpoint
76
- def create_from_list(cloud, hash)
77
- new(cloud: cloud, process_id: hash['ProcessId'], created_at: hash['CreationTimeUTC'],
78
- status: hash['Status'].downcase, custom_fields: hash['CustomFields'])
79
- end
80
- end
81
- end
82
- end
@@ -1,29 +0,0 @@
1
- module CopyleaksApi
2
- BasicError = Class.new(StandardError)
3
- BadCustomFieldError = Class.new(BasicError)
4
- BadFileError = Class.new(BasicError)
5
- BadEmailError = Class.new(BasicError)
6
- BadUrlError = Class.new(BasicError)
7
- UnknownLanguageError = Class.new(BasicError)
8
-
9
- class BadResponseError < BasicError
10
- attr_accessor :code
11
-
12
- # constructor
13
- def initialize(code, message)
14
- @code = code.to_i
15
- @message = message
16
- end
17
-
18
- def to_s
19
- "Error code: #{code}. #{@message}"
20
- end
21
- end
22
-
23
- class ManagedError < BadResponseError
24
- # returns true if this error is internal server error
25
- def internal_error?
26
- code == 16
27
- end
28
- end
29
- end
@@ -1,61 +0,0 @@
1
- module CopyleaksApi
2
- module Language
3
- ALLOWED = ['Afrikaans',
4
- 'Albanian',
5
- 'Basque',
6
- 'Brazilian',
7
- 'Bulgarian',
8
- 'Byelorussian',
9
- 'Catalan',
10
- 'Chinese_Simplified',
11
- 'Chinese_Traditional',
12
- 'Croatian',
13
- 'Czech',
14
- 'Danish',
15
- 'Dutch',
16
- 'English',
17
- 'Esperanto',
18
- 'Estonian',
19
- 'Finnish',
20
- 'French',
21
- 'Galician',
22
- 'German',
23
- 'Greek',
24
- 'Hungarian',
25
- 'Icelandic',
26
- 'Indonesian',
27
- 'Italian',
28
- 'Japanese',
29
- 'Korean',
30
- 'Latin',
31
- 'Latvian',
32
- 'Lithuanian',
33
- 'Macedonian',
34
- 'Malay',
35
- 'Moldavian',
36
- 'Norwegian',
37
- 'Polish',
38
- 'Portuguese',
39
- 'Romanian',
40
- 'Russian',
41
- 'Serbian',
42
- 'Slovak',
43
- 'Slovenian',
44
- 'Spanish',
45
- 'Swedish',
46
- 'Tagalog',
47
- 'Turkish',
48
- 'Ukrainian'].freeze
49
-
50
- ALLOWED.each_with_index do |lang, index|
51
- method = lang.downcase
52
-
53
- # returns appropriate language name based on method name
54
- define_method(method) do
55
- ALLOWED[index]
56
- end
57
-
58
- module_function method
59
- end
60
- end
61
- end
@@ -1,35 +0,0 @@
1
- module CopyleaksApi
2
- module Validators
3
- class CustomFieldsValidator
4
- KEY_MAX_LENGTH = 128
5
- VALUE_MAX_LENGTH = 512
6
- OVERALL_MAX_LENGTH = 8192
7
-
8
- class << self
9
- # raises appropriate error if any length is too large
10
- def validate!(fields)
11
- raise BadCustomFieldError.new('Key is too long') unless keys_valid?(fields)
12
- raise BadCustomFieldError.new('Value is too long') unless values_valid?(fields)
13
- raise BadCustomFieldError.new('Overall size is too large') unless overall_valid?(fields)
14
- end
15
-
16
- private
17
-
18
- # checks custom keys for length
19
- def keys_valid?(hash)
20
- hash.keys.map(&:to_s).all? { |s| s.size <= KEY_MAX_LENGTH }
21
- end
22
-
23
- # checks values for length
24
- def values_valid?(hash)
25
- hash.values.map(&:to_s).all? { |s| s.size <= VALUE_MAX_LENGTH }
26
- end
27
-
28
- # checks overall length
29
- def overall_valid?(hash)
30
- hash.reduce(0) { |a, e| a + e[0].to_s.size + e[1].to_s.size } <= OVERALL_MAX_LENGTH
31
- end
32
- end
33
- end
34
- end
35
- end
@@ -1,13 +0,0 @@
1
- module CopyleaksApi
2
- module Validators
3
- class EmailValidator
4
- class << self
5
- # raises error if given email is invalid
6
- def validate!(email)
7
- return if email =~ /^([a-z0-9_\.-]+)@([\da-z\.-]+)\.([a-z\.]{2,6})$/
8
- raise BadEmailError.new(email), "Email #{email} is invalid"
9
- end
10
- end
11
- end
12
- end
13
- end
@@ -1,56 +0,0 @@
1
- module CopyleaksApi
2
- module Validators
3
- class FileValidator
4
- SUPPORTED_FILE_TYPES = [:html, :htm, :txt, :pdf, :doc, :docx, :rtf].freeze
5
- SUPPORTED_IMAGE_TYPES = [:gif, :png, :bmp, :jpg, :jpeg].freeze
6
- BYTES_IN_MB = 1_024_000.0
7
-
8
- class << self
9
- # check file for ocr for correctness
10
- def validate_ocr!(path)
11
- validate_file(path, SUPPORTED_IMAGE_TYPES)
12
- end
13
-
14
- # check text file for correctness
15
- def validate_text_file!(path)
16
- validate_file(path, SUPPORTED_FILE_TYPES)
17
- end
18
-
19
- private
20
-
21
- # check given file for correctness to given type
22
- def validate_file(path, types)
23
- ext = file_extension(path)
24
- return if types.include?(ext) && file_size(path) <= allowed_file_size(ext)
25
- raise BadFileError, "#{path} file has unsupported extension or to large"
26
- end
27
-
28
- # returns good file size in MB for given type
29
- def allowed_file_size(type)
30
- case type.to_sym
31
- when :html, :htm
32
- 5
33
- when :txt
34
- 3
35
- when :pdf, :doc, :docx
36
- 25
37
- when *SUPPORTED_IMAGE_TYPES
38
- 25
39
- else
40
- 0
41
- end
42
- end
43
-
44
- # extract file extension
45
- def file_extension(path)
46
- path.split('.').last.downcase.to_sym
47
- end
48
-
49
- # extract file size in MB
50
- def file_size(path)
51
- File.size(path) / BYTES_IN_MB
52
- end
53
- end
54
- end
55
- end
56
- end
@@ -1,12 +0,0 @@
1
- module CopyleaksApi
2
- module Validators
3
- class LanguageValidator
4
- class << self
5
- # raise error if given language is not allowed
6
- def validate!(language)
7
- raise UnknownLanguageError, "#{language} is unknown" unless Language::ALLOWED.include?(language)
8
- end
9
- end
10
- end
11
- end
12
- end
@@ -1,27 +0,0 @@
1
- require 'json'
2
-
3
- module CopyleaksApi
4
- module Validators
5
- class ResponseValidator
6
- ERROR_HEADER = 'Copyleaks-Error-Code'.freeze
7
- GOOD_STATUS_CODE = 200
8
-
9
- class << self
10
- # raises error if response has APi error code or bad status code
11
- def validate!(response)
12
- raise ManagedError.new(response[ERROR_HEADER], extract_message(response.body)) if response[ERROR_HEADER]
13
- raise BadResponseError.new(response.code, response.body) if response.code.to_i != GOOD_STATUS_CODE
14
- end
15
-
16
- private
17
-
18
- # extract message from body
19
- def extract_message(string)
20
- JSON.parse(string)['Message']
21
- rescue JSON::ParserError
22
- string
23
- end
24
- end
25
- end
26
- end
27
- end