ocrsdk 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +5 -0
- data/.rspec +2 -0
- data/Gemfile +3 -0
- data/LICENSE +20 -0
- data/README.md +2 -0
- data/Rakefile +25 -0
- data/VERSION +1 -0
- data/coverage/index.css +56 -0
- data/coverage/index.html +244 -0
- data/coverage/jquery.js +154 -0
- data/coverage/jquery.tablesorter.js +2 -0
- data/coverage/lib/ocrsdk/abstract_entity.rb.html +207 -0
- data/coverage/lib/ocrsdk/document.rb.html +159 -0
- data/coverage/lib/ocrsdk/errors.rb.html +199 -0
- data/coverage/lib/ocrsdk/image.rb.html +543 -0
- data/coverage/lib/ocrsdk/pdf.rb.html +311 -0
- data/coverage/lib/ocrsdk/promise.rb.html +791 -0
- data/coverage/lib/ocrsdk/verifiers/format.rb.html +303 -0
- data/coverage/lib/ocrsdk/verifiers/language.rb.html +439 -0
- data/coverage/lib/ocrsdk/verifiers/profile.rb.html +215 -0
- data/coverage/lib/ocrsdk/verifiers/status.rb.html +247 -0
- data/coverage/lib/ocrsdk/verifiers.rb.html +167 -0
- data/coverage/lib/ocrsdk.rb.html +263 -0
- data/coverage/report.css +97 -0
- data/lib/ocrsdk/abstract_entity.rb +14 -0
- data/lib/ocrsdk/document.rb +8 -0
- data/lib/ocrsdk/errors.rb +13 -0
- data/lib/ocrsdk/image.rb +56 -0
- data/lib/ocrsdk/pdf.rb +27 -0
- data/lib/ocrsdk/promise.rb +87 -0
- data/lib/ocrsdk/verifiers/format.rb +26 -0
- data/lib/ocrsdk/verifiers/language.rb +43 -0
- data/lib/ocrsdk/verifiers/profile.rb +15 -0
- data/lib/ocrsdk/verifiers/status.rb +19 -0
- data/lib/ocrsdk/verifiers.rb +9 -0
- data/lib/ocrsdk.rb +21 -0
- data/ocrsdk.gemspec +28 -0
- data/spec/fixtures/files/lorem.complex.pdf +0 -0
- data/spec/fixtures/files/lorem.pdf +0 -0
- data/spec/fixtures/files/malformed.pdf +0 -0
- data/spec/fixtures/files/recognizeable.pdf +0 -0
- data/spec/fixtures/files/russian.jpg +0 -0
- data/spec/fixtures/files/searchable.malformed.pdf +0 -0
- data/spec/helpers/ocrsdk_helpers.rb +106 -0
- data/spec/ocrsdk/image_spec.rb +93 -0
- data/spec/ocrsdk/pdf_spec.rb +26 -0
- data/spec/ocrsdk/promise_spec.rb +165 -0
- data/spec/ocrsdk/verifiers/format_spec.rb +51 -0
- data/spec/ocrsdk/verifiers/language_spec.rb +55 -0
- data/spec/ocrsdk/verifiers/profile_spec.rb +33 -0
- data/spec/ocrsdk/verifiers/status_spec.rb +43 -0
- data/spec/ocrsdk_spec.rb +8 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/support/test_files.rb +36 -0
- data/travis.yml +4 -0
- metadata +247 -0
data/coverage/report.css
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
body {
|
2
|
+
font-family: Verdana, Helvetica, Arial, Sans-Serif;
|
3
|
+
font-size: 12px;
|
4
|
+
color: #4C4C4C;
|
5
|
+
background-color: #F4F2ED;
|
6
|
+
padding: 1em;
|
7
|
+
}
|
8
|
+
|
9
|
+
pre, code {
|
10
|
+
color: #000000;
|
11
|
+
font-family: "Bitstream Vera Sans Mono","Monaco","Courier New",monospace;
|
12
|
+
font-size: 95%;
|
13
|
+
line-height: 1.3em;
|
14
|
+
margin-top: 0;
|
15
|
+
margin-bottom: 0;
|
16
|
+
padding: 0;
|
17
|
+
word-wrap: break-word;
|
18
|
+
}
|
19
|
+
|
20
|
+
h1, h2, h3, h4, h5, h6 {
|
21
|
+
margin: 0em 0em 1em 0em;
|
22
|
+
color: #666666;
|
23
|
+
}
|
24
|
+
|
25
|
+
h1 {
|
26
|
+
display: block;
|
27
|
+
font-size: 2em;
|
28
|
+
letter-spacing: -1px;
|
29
|
+
}
|
30
|
+
|
31
|
+
h1 a:visited{
|
32
|
+
color: 666666;
|
33
|
+
}
|
34
|
+
|
35
|
+
h2 {
|
36
|
+
margin-top: -1em;
|
37
|
+
}
|
38
|
+
|
39
|
+
.hit {
|
40
|
+
background-color: #BED2BE;
|
41
|
+
color: black;
|
42
|
+
}
|
43
|
+
|
44
|
+
.never {
|
45
|
+
background-color: #E0DEDB;
|
46
|
+
color: black;
|
47
|
+
}
|
48
|
+
|
49
|
+
.miss {
|
50
|
+
background-color: #CE8B8C;
|
51
|
+
color: black;
|
52
|
+
}
|
53
|
+
|
54
|
+
.line_number {
|
55
|
+
color: black;
|
56
|
+
font-weight: normal;
|
57
|
+
width: 1px;
|
58
|
+
}
|
59
|
+
|
60
|
+
.code_line {
|
61
|
+
padding-left: 5px;
|
62
|
+
}
|
63
|
+
|
64
|
+
table {
|
65
|
+
width: 100%;
|
66
|
+
border: 2px solid #999;
|
67
|
+
}
|
68
|
+
|
69
|
+
td {
|
70
|
+
padding: 2px;
|
71
|
+
}
|
72
|
+
|
73
|
+
tr {
|
74
|
+
font-size: 14px;
|
75
|
+
}
|
76
|
+
|
77
|
+
th {
|
78
|
+
padding: 2px;
|
79
|
+
background-color: #999;
|
80
|
+
color: white;
|
81
|
+
font-size: 12px;
|
82
|
+
text-align: left;
|
83
|
+
}
|
84
|
+
|
85
|
+
a:hover {
|
86
|
+
text-decoration: none;
|
87
|
+
}
|
88
|
+
|
89
|
+
#content .side_by_side {
|
90
|
+
width: 50%;
|
91
|
+
overflow: auto;
|
92
|
+
float: left;
|
93
|
+
}
|
94
|
+
|
95
|
+
#toggle_test_file {
|
96
|
+
color: #666666;
|
97
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class OCRSDK::AbstractEntity
|
2
|
+
def initialize(application_id=nil, password=nil)
|
3
|
+
@application_id = application_id || '' # Rails.configuration.ocrsdk.application_id
|
4
|
+
@password = password || '' # Rails.configuration.ocrsdk.password
|
5
|
+
|
6
|
+
@url = prepare_url @application_id, @password
|
7
|
+
end
|
8
|
+
|
9
|
+
private
|
10
|
+
|
11
|
+
def prepare_url(app_id, pass)
|
12
|
+
URI("http://#{CGI.escape app_id}:#{CGI.escape pass}@#{OCRSDK::SERVICE_URL}")
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module OCRSDK
|
2
|
+
class OCRSDKError < RuntimeError; end
|
3
|
+
|
4
|
+
class NetworkError < OCRSDKError; end
|
5
|
+
class NotEnoughCredits < OCRSDKError; end
|
6
|
+
class ProcessingFailed < OCRSDKError; end
|
7
|
+
|
8
|
+
class UnsupportedFeature < OCRSDKError; end
|
9
|
+
class UnsupportedLanguage < UnsupportedFeature; end
|
10
|
+
class UnsupportedProfile < UnsupportedFeature; end
|
11
|
+
class UnsupportedInputFormat < UnsupportedFeature; end
|
12
|
+
class UnsupportedOutputFormat < UnsupportedFeature; end
|
13
|
+
end
|
data/lib/ocrsdk/image.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
class OCRSDK::Image < OCRSDK::AbstractEntity
|
2
|
+
include OCRSDK::Verifiers::Language
|
3
|
+
include OCRSDK::Verifiers::Format
|
4
|
+
include OCRSDK::Verifiers::Profile
|
5
|
+
|
6
|
+
def initialize(image_path, application_id=nil, password=nil)
|
7
|
+
super(application_id, password)
|
8
|
+
@image_path = image_path
|
9
|
+
end
|
10
|
+
|
11
|
+
def as_text(languages)
|
12
|
+
xml_string = api_process_image @image_path, languages, :txt, :text_extraction
|
13
|
+
|
14
|
+
OCRSDK::Promise.from_response xml_string, @application_id, @password
|
15
|
+
end
|
16
|
+
|
17
|
+
def as_text_sync(languages, wait_interval=OCRSDK::DEFAULT_POLL_TIME)
|
18
|
+
as_text(languages).wait(wait_interval).result.force_encoding('utf-8')
|
19
|
+
end
|
20
|
+
|
21
|
+
def as_pdf(languages)
|
22
|
+
xml_string = api_process_image @image_path, languages, :pdf, :document_conversion
|
23
|
+
|
24
|
+
OCRSDK::Promise.from_response xml_string, @application_id, @password
|
25
|
+
end
|
26
|
+
|
27
|
+
def as_pdf_sync(languages, out_path=nil, wait_interval=OCRSDK::DEFAULT_POLL_TIME)
|
28
|
+
result = as_pdf(languages).wait(wait_interval).result
|
29
|
+
|
30
|
+
if out_path.nil?
|
31
|
+
result
|
32
|
+
else
|
33
|
+
File.open(out_path, 'wb+') {|f| f.write result }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
# TODO handle 4xx and 5xx responses and errors, file not found error
|
40
|
+
# http://ocrsdk.com/documentation/apireference/processImage/
|
41
|
+
def api_process_image(image_path, languages, format=:txt, profile=:document_conversion)
|
42
|
+
raise OCRSDK::UnsupportedInputFormat unless supported_input_format? File.extname(image_path)[1..-1]
|
43
|
+
raise OCRSDK::UnsupportedOutputFormat unless supported_output_format? format
|
44
|
+
raise OCRSDK::UnsupportedProfile unless supported_profile? (profile)
|
45
|
+
|
46
|
+
params = URI.encode_www_form(
|
47
|
+
language: languages_to_s(languages).join(','),
|
48
|
+
exportFormat: format_to_s(format),
|
49
|
+
profile: profile_to_s(profile))
|
50
|
+
uri = URI.join @url, '/processImage', "?#{params}"
|
51
|
+
|
52
|
+
RestClient.post uri.to_s, upload: { file: File.new(image_path, 'rb') }
|
53
|
+
rescue RestClient::ExceptionWithResponse
|
54
|
+
raise OCRSDK::NetworkError
|
55
|
+
end
|
56
|
+
end
|
data/lib/ocrsdk/pdf.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
class OCRSDK::PDF < OCRSDK::Image
|
2
|
+
# We're on a shaky ground regarding what kind of pdfs
|
3
|
+
# should be recognized and what shouldn't.
|
4
|
+
# Currently we count that if there are
|
5
|
+
# images * 20 > length of text
|
6
|
+
# then this document might need recognition.
|
7
|
+
# Assumption is that there might be a title,
|
8
|
+
# page numbers or credits along with images.
|
9
|
+
def recognizeable?
|
10
|
+
reader = PDF::Reader.new @image_path
|
11
|
+
|
12
|
+
images = 0
|
13
|
+
text = 0
|
14
|
+
chars = Set.new
|
15
|
+
reader.pages.each do |page|
|
16
|
+
text += page.text.length
|
17
|
+
chars += page.text.split('').map(&:ord).uniq
|
18
|
+
images += page.xobjects.map {|k, v| v.hash[:Subtype]}.count(:Image)
|
19
|
+
end
|
20
|
+
|
21
|
+
# count number of distinct characters
|
22
|
+
# in case of "searchable", but incorrectly recognized document
|
23
|
+
images * 20 > text || chars.length < 10
|
24
|
+
rescue PDF::Reader::MalformedPDFError, PDF::Reader::UnsupportedFeatureError
|
25
|
+
false
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
class OCRSDK::Promise < OCRSDK::AbstractEntity
|
2
|
+
include OCRSDK::Verifiers::Status
|
3
|
+
|
4
|
+
attr_reader :task_id, :status, :result_url, :estimate_processing_time
|
5
|
+
|
6
|
+
def self.from_response(xml_string, application_id=nil, password=nil)
|
7
|
+
OCRSDK::Promise.new(nil, application_id, password).parse_response xml_string
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(task_id, application_id=nil, password=nil)
|
11
|
+
super(application_id, password)
|
12
|
+
@task_id = task_id
|
13
|
+
end
|
14
|
+
|
15
|
+
def estimate_completion
|
16
|
+
@registration_time + @estimate_processing_time.seconds
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_response(xml_string)
|
20
|
+
xml = Nokogiri::XML.parse xml_string
|
21
|
+
begin
|
22
|
+
task = xml.xpath('/response/task').first
|
23
|
+
@task_id = task['id']
|
24
|
+
rescue NoMethodError # if Nokogiri can't find root node
|
25
|
+
raise OCRSDK::OCRSDKError, "Problem parsing provided xml string: #{xml_string}"
|
26
|
+
end
|
27
|
+
|
28
|
+
@status = status_to_sym task['status']
|
29
|
+
@result_url = task['resultUrl']
|
30
|
+
@registration_time = DateTime.parse task['registrationTime']
|
31
|
+
@estimate_processing_time = task['estimatedProcessingTime'].to_i
|
32
|
+
|
33
|
+
# admin should be notified in this case
|
34
|
+
raise OCRSDK::NotEnoughCredits if @status == :not_enough_credits
|
35
|
+
|
36
|
+
self
|
37
|
+
end
|
38
|
+
|
39
|
+
def update
|
40
|
+
parse_response api_update_status
|
41
|
+
end
|
42
|
+
|
43
|
+
def completed?
|
44
|
+
@status == :completed
|
45
|
+
end
|
46
|
+
|
47
|
+
def failed?
|
48
|
+
[:processing_failed, :deleted, :not_enough_credits].include? @status
|
49
|
+
end
|
50
|
+
|
51
|
+
def processing?
|
52
|
+
[:submitted, :queued, :in_progress].include? @status
|
53
|
+
end
|
54
|
+
|
55
|
+
def result
|
56
|
+
raise OCRSDK::ProcessingFailed if failed?
|
57
|
+
api_get_result
|
58
|
+
end
|
59
|
+
|
60
|
+
def wait(seconds=OCRSDK::DEFAULT_POLL_TIME)
|
61
|
+
while processing? do
|
62
|
+
sleep seconds
|
63
|
+
update
|
64
|
+
end
|
65
|
+
|
66
|
+
self
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
# http://ocrsdk.com/documentation/apireference/getTaskStatus/
|
72
|
+
def api_update_status
|
73
|
+
params = URI.encode_www_form taskId: @task_id
|
74
|
+
uri = URI.join @url, '/getTaskStatus', "?#{params}"
|
75
|
+
|
76
|
+
RestClient.get uri.to_s
|
77
|
+
rescue RestClient::ExceptionWithResponse
|
78
|
+
raise OCRSDK::NetworkError
|
79
|
+
end
|
80
|
+
|
81
|
+
def api_get_result
|
82
|
+
RestClient.get @result_url.to_s
|
83
|
+
rescue RestClient::ExceptionWithResponse
|
84
|
+
raise OCRSDK::NetworkError
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module OCRSDK::Verifiers::Format
|
2
|
+
# http://ocrsdk.com/documentation/specifications/image-formats/
|
3
|
+
INPUT_FORMATS = [:bmp, :dcx, :pcx, :png, :jp2, :jpc, :jpg, :jpeg, :jfif, :pdf,
|
4
|
+
:tif, :tiff, :gif, :djvu, :djv, :jb2]
|
5
|
+
|
6
|
+
# http://ocrsdk.com/documentation/apireference/processImage/
|
7
|
+
OUTPUT_FORMATS = [:txt, :rtf, :docx, :xlsx, :pptx, :pdf_searchable,
|
8
|
+
:pdf_text_and_images, :xml, :alto]
|
9
|
+
|
10
|
+
def format_to_s(format)
|
11
|
+
format.to_s.camelize(:lower)
|
12
|
+
end
|
13
|
+
|
14
|
+
def supported_input_format?(format)
|
15
|
+
format = format.downcase.to_sym if format.kind_of? String
|
16
|
+
|
17
|
+
INPUT_FORMATS.include? format
|
18
|
+
end
|
19
|
+
|
20
|
+
def supported_output_format?(format)
|
21
|
+
format = format.underscore.to_sym if format.kind_of? String
|
22
|
+
|
23
|
+
OUTPUT_FORMATS.include? format
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module OCRSDK::Verifiers::Language
|
2
|
+
# http://ocrsdk.com/documentation/specifications/recognition-languages/
|
3
|
+
LANGUAGES = [:afrikaans, :albanian, :aymara, :azeri_latin, :basque, :bemba,
|
4
|
+
:blackfoot, :breton, :bugotu, :bulgarian, :buryat, :chamorro, :corsican,
|
5
|
+
:crimean_tatar, :croatian, :crow, :czech, :dutch, :dutch_belgian, :english,
|
6
|
+
:eskimo_latin, :esperanto, :estonian, :evenki, :faeroese, :fijian, :finnish,
|
7
|
+
:french, :frisian, :gaelic_scottish, :gagauz, :galician, :ganda, :german,
|
8
|
+
:german_law, :german_luxembourg, :german_medical, :german_new_spelling_law,
|
9
|
+
:greek, :hani, :hausa, :hebrew, :hungarian, :icelandic, :interlingua, :italian,
|
10
|
+
:japanese, :kabardian, :kasub, :kawa, :kikuyu, :kirgiz, :kongo, :korean_hangul,
|
11
|
+
:koryak, :kpelle, :lak, :lappish, :latvian, :lezgin, :macedonian, :malay, :malinke,
|
12
|
+
:maltese, :mansi, :maori, :mari, :maya, :miao, :minankabaw, :mohawk, :nenets, :nogay,
|
13
|
+
:norwegian_bokmal, :norwegian_nynorsk, :nyanja, :occidental, :old_english, :old_french,
|
14
|
+
:old_german, :papiamento, :pidgin_english, :polish, :portuguese_brazilian,
|
15
|
+
:portuguese_standard, :provencal, :quechua, :romanian, :romanian_moldavia, :romany,
|
16
|
+
:rundi, :russian, :samoan, :selkup, :serbian_cyrillic, :shona, :sioux, :slovenian,
|
17
|
+
:somali, :spanish, :sunda, :tabassaran, :tagalog, :tahitian, :tajik, :tatar, :tinpo,
|
18
|
+
:tun, :turkish, :uighur_cyrillic, :ukrainian, :uzbek_cyrillic, :visayan]
|
19
|
+
|
20
|
+
def language_to_s(language)
|
21
|
+
language.to_s.camelize
|
22
|
+
end
|
23
|
+
|
24
|
+
def language_to_sym(language)
|
25
|
+
language.underscore.to_sym
|
26
|
+
end
|
27
|
+
|
28
|
+
def supported_language?(language)
|
29
|
+
language = language_to_sym language if language.kind_of? String
|
30
|
+
|
31
|
+
LANGUAGES.include? language
|
32
|
+
end
|
33
|
+
|
34
|
+
def languages_to_s(languages)
|
35
|
+
languages = languages.map(&method(:language_to_s))
|
36
|
+
|
37
|
+
unless languages.map(&method(:supported_language?)).all?
|
38
|
+
raise OCRSDK::UnsupportedLanguage
|
39
|
+
else
|
40
|
+
languages
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module OCRSDK::Verifiers::Profile
|
2
|
+
# http://ocrsdk.com/documentation/specifications/processing-profiles/
|
3
|
+
PROFILES = [:document_conversion, :document_archiving, :text_extraction,
|
4
|
+
:field_level_recognition, :barcode_recognition]
|
5
|
+
|
6
|
+
def profile_to_s(profile)
|
7
|
+
profile.to_s.camelize(:lower)
|
8
|
+
end
|
9
|
+
|
10
|
+
def supported_profile?(profile)
|
11
|
+
profile = profile.underscore.to_sym if profile.kind_of? String
|
12
|
+
|
13
|
+
PROFILES.include? profile
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module OCRSDK::Verifiers::Status
|
2
|
+
# http://ocrsdk.com/documentation/specifications/task-statuses/
|
3
|
+
STATUSES = [:submitted, :queued, :in_progress, :completed,
|
4
|
+
:processing_failed, :deleted, :not_enough_credits]
|
5
|
+
|
6
|
+
def status_to_s(status)
|
7
|
+
status.to_s.camelize
|
8
|
+
end
|
9
|
+
|
10
|
+
def status_to_sym(status)
|
11
|
+
status.underscore.to_sym
|
12
|
+
end
|
13
|
+
|
14
|
+
def supported_status?(status)
|
15
|
+
status = status_to_sym status if status.kind_of? String
|
16
|
+
|
17
|
+
STATUSES.include? status
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# Intended to be included into target classes
|
2
|
+
# they serve the purpose of validating and converting
|
3
|
+
# symbols between internal and outside api representations
|
4
|
+
module OCRSDK::Verifiers
|
5
|
+
end
|
6
|
+
|
7
|
+
Dir[File.dirname(__FILE__) + '/verifiers/*.rb'].each do |file|
|
8
|
+
require file
|
9
|
+
end
|
data/lib/ocrsdk.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'pdf-reader'
|
5
|
+
require 'rest-client'
|
6
|
+
require 'active_support/inflector'
|
7
|
+
require 'active_support/time'
|
8
|
+
|
9
|
+
# http://ocrsdk.com/documentation/apireference/
|
10
|
+
module OCRSDK
|
11
|
+
DEFAULT_POLL_TIME = 3
|
12
|
+
SERVICE_URL = 'cloud.ocrsdk.com'
|
13
|
+
end
|
14
|
+
|
15
|
+
require 'ocrsdk/errors'
|
16
|
+
require 'ocrsdk/verifiers'
|
17
|
+
require 'ocrsdk/abstract_entity'
|
18
|
+
require 'ocrsdk/image'
|
19
|
+
require 'ocrsdk/pdf'
|
20
|
+
require 'ocrsdk/document'
|
21
|
+
require 'ocrsdk/promise'
|
data/ocrsdk.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "ocrsdk"
|
3
|
+
s.version = File.read("VERSION").delete("\n\r")
|
4
|
+
s.authors = ["Andrew Korzhuev"]
|
5
|
+
s.description = %q{Abbyy's OCR (ocrsdk.com) API wrapper in Ruby.}
|
6
|
+
s.summary = %q{Abbyy's OCR (ocrsdk.com) API wrapper in Ruby.}
|
7
|
+
s.email = "andrew@korzhuev.com"
|
8
|
+
s.extra_rdoc_files = ["LICENSE", "README.md"]
|
9
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
10
|
+
s.homepage = "http://github.com/andrusha/ocrsdk"
|
11
|
+
s.require_paths = ["lib"]
|
12
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.add_runtime_dependency "rest-client" #git://github.com/babatakao/rest-client.git
|
16
|
+
s.add_runtime_dependency "nokogiri"
|
17
|
+
s.add_runtime_dependency "pdf-reader"
|
18
|
+
s.add_runtime_dependency "activesupport"
|
19
|
+
|
20
|
+
s.add_development_dependency "rake", ">= 0.8"
|
21
|
+
if RUBY_VERSION >= '1.9'
|
22
|
+
s.add_development_dependency "cover_me", ">= 1.2.0"
|
23
|
+
else
|
24
|
+
s.add_development_dependency "rcov", ">= 0.9"
|
25
|
+
end
|
26
|
+
s.add_development_dependency "rspec", ">= 2"
|
27
|
+
s.add_development_dependency "webmock"
|
28
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,106 @@
|
|
1
|
+
module OCRSDKHelpers
|
2
|
+
|
3
|
+
def process_image_response
|
4
|
+
<<-XML
|
5
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
6
|
+
<response>
|
7
|
+
<task id="22345200-abe8-4f60-90c8-0d43c5f6c0f6"
|
8
|
+
registrationTime="2001-01-01T13:18:22Z"
|
9
|
+
statusChangeTime="2001-01-01T13:18:22Z"
|
10
|
+
status="Submitted"
|
11
|
+
error="{An error message.}"
|
12
|
+
filesCount="10"
|
13
|
+
credits="10"
|
14
|
+
estimatedProcessingTime="3600"
|
15
|
+
resultUrl="http://domain/blob ID"
|
16
|
+
description="My first OCR task"/>
|
17
|
+
<task/>
|
18
|
+
</response>
|
19
|
+
XML
|
20
|
+
end
|
21
|
+
|
22
|
+
def process_image_response_credits
|
23
|
+
<<-XML
|
24
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
25
|
+
<response>
|
26
|
+
<task id="22345200-abe8-4f60-90c8-0d43c5f6c0f6"
|
27
|
+
registrationTime="2001-01-01T13:18:22Z"
|
28
|
+
statusChangeTime="2001-01-01T13:18:22Z"
|
29
|
+
status="NotEnoughCredits"
|
30
|
+
error="{An error message.}"
|
31
|
+
filesCount="10"
|
32
|
+
credits="0"
|
33
|
+
estimatedProcessingTime="3600"
|
34
|
+
resultUrl="http://domain/blob ID"
|
35
|
+
description="My first OCR task"/>
|
36
|
+
<task/>
|
37
|
+
</response>
|
38
|
+
XML
|
39
|
+
end
|
40
|
+
|
41
|
+
def process_image_updated_response
|
42
|
+
<<-XML
|
43
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
44
|
+
<response>
|
45
|
+
<task id="update-task-id"
|
46
|
+
registrationTime="2001-01-01T13:18:22Z"
|
47
|
+
statusChangeTime="2001-02-01T13:18:22Z"
|
48
|
+
status="InProgress"
|
49
|
+
error="{An error message.}"
|
50
|
+
filesCount="10"
|
51
|
+
credits="10"
|
52
|
+
estimatedProcessingTime="3600"
|
53
|
+
resultUrl="http://domain/blob ID"
|
54
|
+
description="My first OCR task"/>
|
55
|
+
<task/>
|
56
|
+
</response>
|
57
|
+
XML
|
58
|
+
end
|
59
|
+
|
60
|
+
def process_image_completed_response
|
61
|
+
<<-XML
|
62
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
63
|
+
<response>
|
64
|
+
<task id="update-task-id"
|
65
|
+
registrationTime="2001-01-01T13:18:22Z"
|
66
|
+
statusChangeTime="2001-03-01T13:18:22Z"
|
67
|
+
status="Completed"
|
68
|
+
error="{An error message.}"
|
69
|
+
filesCount="10"
|
70
|
+
credits="10"
|
71
|
+
estimatedProcessingTime="3600"
|
72
|
+
resultUrl="http://domain/blob ID"
|
73
|
+
description="My first OCR task"/>
|
74
|
+
<task/>
|
75
|
+
</response>
|
76
|
+
XML
|
77
|
+
end
|
78
|
+
|
79
|
+
def process_image_failed_response
|
80
|
+
<<-XML
|
81
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
82
|
+
<response>
|
83
|
+
<task id="update-task-id"
|
84
|
+
registrationTime="2001-01-01T13:18:22Z"
|
85
|
+
statusChangeTime="2001-03-01T13:18:22Z"
|
86
|
+
status="ProcessingFailed"
|
87
|
+
error="{An error message.}"
|
88
|
+
filesCount="10"
|
89
|
+
credits="10"
|
90
|
+
estimatedProcessingTime="3600"
|
91
|
+
resultUrl="http://domain/blob ID"
|
92
|
+
description="My first OCR task"/>
|
93
|
+
<task/>
|
94
|
+
</response>
|
95
|
+
XML
|
96
|
+
end
|
97
|
+
|
98
|
+
def mock_ocrsdk
|
99
|
+
OCRSDK::Image.any_instance.stub(:api_process_image) { |x,y,i,j| process_image_response }
|
100
|
+
OCRSDK::Promise.any_instance.stub(:api_update_status) { process_image_completed_response }
|
101
|
+
OCRSDK::Promise.any_instance.stub(:api_get_result) { "meow" }
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
RSpec.configuration.include OCRSDKHelpers
|