ocrsdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +5 -0
- data/.rspec +2 -0
- data/Gemfile +3 -0
- data/LICENSE +20 -0
- data/README.md +2 -0
- data/Rakefile +25 -0
- data/VERSION +1 -0
- data/coverage/index.css +56 -0
- data/coverage/index.html +244 -0
- data/coverage/jquery.js +154 -0
- data/coverage/jquery.tablesorter.js +2 -0
- data/coverage/lib/ocrsdk/abstract_entity.rb.html +207 -0
- data/coverage/lib/ocrsdk/document.rb.html +159 -0
- data/coverage/lib/ocrsdk/errors.rb.html +199 -0
- data/coverage/lib/ocrsdk/image.rb.html +543 -0
- data/coverage/lib/ocrsdk/pdf.rb.html +311 -0
- data/coverage/lib/ocrsdk/promise.rb.html +791 -0
- data/coverage/lib/ocrsdk/verifiers/format.rb.html +303 -0
- data/coverage/lib/ocrsdk/verifiers/language.rb.html +439 -0
- data/coverage/lib/ocrsdk/verifiers/profile.rb.html +215 -0
- data/coverage/lib/ocrsdk/verifiers/status.rb.html +247 -0
- data/coverage/lib/ocrsdk/verifiers.rb.html +167 -0
- data/coverage/lib/ocrsdk.rb.html +263 -0
- data/coverage/report.css +97 -0
- data/lib/ocrsdk/abstract_entity.rb +14 -0
- data/lib/ocrsdk/document.rb +8 -0
- data/lib/ocrsdk/errors.rb +13 -0
- data/lib/ocrsdk/image.rb +56 -0
- data/lib/ocrsdk/pdf.rb +27 -0
- data/lib/ocrsdk/promise.rb +87 -0
- data/lib/ocrsdk/verifiers/format.rb +26 -0
- data/lib/ocrsdk/verifiers/language.rb +43 -0
- data/lib/ocrsdk/verifiers/profile.rb +15 -0
- data/lib/ocrsdk/verifiers/status.rb +19 -0
- data/lib/ocrsdk/verifiers.rb +9 -0
- data/lib/ocrsdk.rb +21 -0
- data/ocrsdk.gemspec +28 -0
- data/spec/fixtures/files/lorem.complex.pdf +0 -0
- data/spec/fixtures/files/lorem.pdf +0 -0
- data/spec/fixtures/files/malformed.pdf +0 -0
- data/spec/fixtures/files/recognizeable.pdf +0 -0
- data/spec/fixtures/files/russian.jpg +0 -0
- data/spec/fixtures/files/searchable.malformed.pdf +0 -0
- data/spec/helpers/ocrsdk_helpers.rb +106 -0
- data/spec/ocrsdk/image_spec.rb +93 -0
- data/spec/ocrsdk/pdf_spec.rb +26 -0
- data/spec/ocrsdk/promise_spec.rb +165 -0
- data/spec/ocrsdk/verifiers/format_spec.rb +51 -0
- data/spec/ocrsdk/verifiers/language_spec.rb +55 -0
- data/spec/ocrsdk/verifiers/profile_spec.rb +33 -0
- data/spec/ocrsdk/verifiers/status_spec.rb +43 -0
- data/spec/ocrsdk_spec.rb +8 -0
- data/spec/spec_helper.rb +8 -0
- data/spec/support/test_files.rb +36 -0
- data/travis.yml +4 -0
- metadata +247 -0
data/coverage/report.css
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
body {
|
2
|
+
font-family: Verdana, Helvetica, Arial, Sans-Serif;
|
3
|
+
font-size: 12px;
|
4
|
+
color: #4C4C4C;
|
5
|
+
background-color: #F4F2ED;
|
6
|
+
padding: 1em;
|
7
|
+
}
|
8
|
+
|
9
|
+
pre, code {
|
10
|
+
color: #000000;
|
11
|
+
font-family: "Bitstream Vera Sans Mono","Monaco","Courier New",monospace;
|
12
|
+
font-size: 95%;
|
13
|
+
line-height: 1.3em;
|
14
|
+
margin-top: 0;
|
15
|
+
margin-bottom: 0;
|
16
|
+
padding: 0;
|
17
|
+
word-wrap: break-word;
|
18
|
+
}
|
19
|
+
|
20
|
+
h1, h2, h3, h4, h5, h6 {
|
21
|
+
margin: 0em 0em 1em 0em;
|
22
|
+
color: #666666;
|
23
|
+
}
|
24
|
+
|
25
|
+
h1 {
|
26
|
+
display: block;
|
27
|
+
font-size: 2em;
|
28
|
+
letter-spacing: -1px;
|
29
|
+
}
|
30
|
+
|
31
|
+
h1 a:visited{
|
32
|
+
color: 666666;
|
33
|
+
}
|
34
|
+
|
35
|
+
h2 {
|
36
|
+
margin-top: -1em;
|
37
|
+
}
|
38
|
+
|
39
|
+
.hit {
|
40
|
+
background-color: #BED2BE;
|
41
|
+
color: black;
|
42
|
+
}
|
43
|
+
|
44
|
+
.never {
|
45
|
+
background-color: #E0DEDB;
|
46
|
+
color: black;
|
47
|
+
}
|
48
|
+
|
49
|
+
.miss {
|
50
|
+
background-color: #CE8B8C;
|
51
|
+
color: black;
|
52
|
+
}
|
53
|
+
|
54
|
+
.line_number {
|
55
|
+
color: black;
|
56
|
+
font-weight: normal;
|
57
|
+
width: 1px;
|
58
|
+
}
|
59
|
+
|
60
|
+
.code_line {
|
61
|
+
padding-left: 5px;
|
62
|
+
}
|
63
|
+
|
64
|
+
table {
|
65
|
+
width: 100%;
|
66
|
+
border: 2px solid #999;
|
67
|
+
}
|
68
|
+
|
69
|
+
td {
|
70
|
+
padding: 2px;
|
71
|
+
}
|
72
|
+
|
73
|
+
tr {
|
74
|
+
font-size: 14px;
|
75
|
+
}
|
76
|
+
|
77
|
+
th {
|
78
|
+
padding: 2px;
|
79
|
+
background-color: #999;
|
80
|
+
color: white;
|
81
|
+
font-size: 12px;
|
82
|
+
text-align: left;
|
83
|
+
}
|
84
|
+
|
85
|
+
a:hover {
|
86
|
+
text-decoration: none;
|
87
|
+
}
|
88
|
+
|
89
|
+
#content .side_by_side {
|
90
|
+
width: 50%;
|
91
|
+
overflow: auto;
|
92
|
+
float: left;
|
93
|
+
}
|
94
|
+
|
95
|
+
#toggle_test_file {
|
96
|
+
color: #666666;
|
97
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class OCRSDK::AbstractEntity
|
2
|
+
def initialize(application_id=nil, password=nil)
|
3
|
+
@application_id = application_id || '' # Rails.configuration.ocrsdk.application_id
|
4
|
+
@password = password || '' # Rails.configuration.ocrsdk.password
|
5
|
+
|
6
|
+
@url = prepare_url @application_id, @password
|
7
|
+
end
|
8
|
+
|
9
|
+
private
|
10
|
+
|
11
|
+
def prepare_url(app_id, pass)
|
12
|
+
URI("http://#{CGI.escape app_id}:#{CGI.escape pass}@#{OCRSDK::SERVICE_URL}")
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module OCRSDK
|
2
|
+
class OCRSDKError < RuntimeError; end
|
3
|
+
|
4
|
+
class NetworkError < OCRSDKError; end
|
5
|
+
class NotEnoughCredits < OCRSDKError; end
|
6
|
+
class ProcessingFailed < OCRSDKError; end
|
7
|
+
|
8
|
+
class UnsupportedFeature < OCRSDKError; end
|
9
|
+
class UnsupportedLanguage < UnsupportedFeature; end
|
10
|
+
class UnsupportedProfile < UnsupportedFeature; end
|
11
|
+
class UnsupportedInputFormat < UnsupportedFeature; end
|
12
|
+
class UnsupportedOutputFormat < UnsupportedFeature; end
|
13
|
+
end
|
data/lib/ocrsdk/image.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
class OCRSDK::Image < OCRSDK::AbstractEntity
|
2
|
+
include OCRSDK::Verifiers::Language
|
3
|
+
include OCRSDK::Verifiers::Format
|
4
|
+
include OCRSDK::Verifiers::Profile
|
5
|
+
|
6
|
+
def initialize(image_path, application_id=nil, password=nil)
|
7
|
+
super(application_id, password)
|
8
|
+
@image_path = image_path
|
9
|
+
end
|
10
|
+
|
11
|
+
def as_text(languages)
|
12
|
+
xml_string = api_process_image @image_path, languages, :txt, :text_extraction
|
13
|
+
|
14
|
+
OCRSDK::Promise.from_response xml_string, @application_id, @password
|
15
|
+
end
|
16
|
+
|
17
|
+
def as_text_sync(languages, wait_interval=OCRSDK::DEFAULT_POLL_TIME)
|
18
|
+
as_text(languages).wait(wait_interval).result.force_encoding('utf-8')
|
19
|
+
end
|
20
|
+
|
21
|
+
def as_pdf(languages)
|
22
|
+
xml_string = api_process_image @image_path, languages, :pdf, :document_conversion
|
23
|
+
|
24
|
+
OCRSDK::Promise.from_response xml_string, @application_id, @password
|
25
|
+
end
|
26
|
+
|
27
|
+
def as_pdf_sync(languages, out_path=nil, wait_interval=OCRSDK::DEFAULT_POLL_TIME)
|
28
|
+
result = as_pdf(languages).wait(wait_interval).result
|
29
|
+
|
30
|
+
if out_path.nil?
|
31
|
+
result
|
32
|
+
else
|
33
|
+
File.open(out_path, 'wb+') {|f| f.write result }
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
# TODO handle 4xx and 5xx responses and errors, file not found error
|
40
|
+
# http://ocrsdk.com/documentation/apireference/processImage/
|
41
|
+
def api_process_image(image_path, languages, format=:txt, profile=:document_conversion)
|
42
|
+
raise OCRSDK::UnsupportedInputFormat unless supported_input_format? File.extname(image_path)[1..-1]
|
43
|
+
raise OCRSDK::UnsupportedOutputFormat unless supported_output_format? format
|
44
|
+
raise OCRSDK::UnsupportedProfile unless supported_profile? (profile)
|
45
|
+
|
46
|
+
params = URI.encode_www_form(
|
47
|
+
language: languages_to_s(languages).join(','),
|
48
|
+
exportFormat: format_to_s(format),
|
49
|
+
profile: profile_to_s(profile))
|
50
|
+
uri = URI.join @url, '/processImage', "?#{params}"
|
51
|
+
|
52
|
+
RestClient.post uri.to_s, upload: { file: File.new(image_path, 'rb') }
|
53
|
+
rescue RestClient::ExceptionWithResponse
|
54
|
+
raise OCRSDK::NetworkError
|
55
|
+
end
|
56
|
+
end
|
data/lib/ocrsdk/pdf.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
class OCRSDK::PDF < OCRSDK::Image
|
2
|
+
# We're on a shaky ground regarding what kind of pdfs
|
3
|
+
# should be recognized and what shouldn't.
|
4
|
+
# Currently we count that if there are
|
5
|
+
# images * 20 > length of text
|
6
|
+
# then this document might need recognition.
|
7
|
+
# Assumption is that there might be a title,
|
8
|
+
# page numbers or credits along with images.
|
9
|
+
def recognizeable?
|
10
|
+
reader = PDF::Reader.new @image_path
|
11
|
+
|
12
|
+
images = 0
|
13
|
+
text = 0
|
14
|
+
chars = Set.new
|
15
|
+
reader.pages.each do |page|
|
16
|
+
text += page.text.length
|
17
|
+
chars += page.text.split('').map(&:ord).uniq
|
18
|
+
images += page.xobjects.map {|k, v| v.hash[:Subtype]}.count(:Image)
|
19
|
+
end
|
20
|
+
|
21
|
+
# count number of distinct characters
|
22
|
+
# in case of "searchable", but incorrectly recognized document
|
23
|
+
images * 20 > text || chars.length < 10
|
24
|
+
rescue PDF::Reader::MalformedPDFError, PDF::Reader::UnsupportedFeatureError
|
25
|
+
false
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
class OCRSDK::Promise < OCRSDK::AbstractEntity
|
2
|
+
include OCRSDK::Verifiers::Status
|
3
|
+
|
4
|
+
attr_reader :task_id, :status, :result_url, :estimate_processing_time
|
5
|
+
|
6
|
+
def self.from_response(xml_string, application_id=nil, password=nil)
|
7
|
+
OCRSDK::Promise.new(nil, application_id, password).parse_response xml_string
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize(task_id, application_id=nil, password=nil)
|
11
|
+
super(application_id, password)
|
12
|
+
@task_id = task_id
|
13
|
+
end
|
14
|
+
|
15
|
+
def estimate_completion
|
16
|
+
@registration_time + @estimate_processing_time.seconds
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_response(xml_string)
|
20
|
+
xml = Nokogiri::XML.parse xml_string
|
21
|
+
begin
|
22
|
+
task = xml.xpath('/response/task').first
|
23
|
+
@task_id = task['id']
|
24
|
+
rescue NoMethodError # if Nokogiri can't find root node
|
25
|
+
raise OCRSDK::OCRSDKError, "Problem parsing provided xml string: #{xml_string}"
|
26
|
+
end
|
27
|
+
|
28
|
+
@status = status_to_sym task['status']
|
29
|
+
@result_url = task['resultUrl']
|
30
|
+
@registration_time = DateTime.parse task['registrationTime']
|
31
|
+
@estimate_processing_time = task['estimatedProcessingTime'].to_i
|
32
|
+
|
33
|
+
# admin should be notified in this case
|
34
|
+
raise OCRSDK::NotEnoughCredits if @status == :not_enough_credits
|
35
|
+
|
36
|
+
self
|
37
|
+
end
|
38
|
+
|
39
|
+
def update
|
40
|
+
parse_response api_update_status
|
41
|
+
end
|
42
|
+
|
43
|
+
def completed?
|
44
|
+
@status == :completed
|
45
|
+
end
|
46
|
+
|
47
|
+
def failed?
|
48
|
+
[:processing_failed, :deleted, :not_enough_credits].include? @status
|
49
|
+
end
|
50
|
+
|
51
|
+
def processing?
|
52
|
+
[:submitted, :queued, :in_progress].include? @status
|
53
|
+
end
|
54
|
+
|
55
|
+
def result
|
56
|
+
raise OCRSDK::ProcessingFailed if failed?
|
57
|
+
api_get_result
|
58
|
+
end
|
59
|
+
|
60
|
+
def wait(seconds=OCRSDK::DEFAULT_POLL_TIME)
|
61
|
+
while processing? do
|
62
|
+
sleep seconds
|
63
|
+
update
|
64
|
+
end
|
65
|
+
|
66
|
+
self
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
# http://ocrsdk.com/documentation/apireference/getTaskStatus/
|
72
|
+
def api_update_status
|
73
|
+
params = URI.encode_www_form taskId: @task_id
|
74
|
+
uri = URI.join @url, '/getTaskStatus', "?#{params}"
|
75
|
+
|
76
|
+
RestClient.get uri.to_s
|
77
|
+
rescue RestClient::ExceptionWithResponse
|
78
|
+
raise OCRSDK::NetworkError
|
79
|
+
end
|
80
|
+
|
81
|
+
def api_get_result
|
82
|
+
RestClient.get @result_url.to_s
|
83
|
+
rescue RestClient::ExceptionWithResponse
|
84
|
+
raise OCRSDK::NetworkError
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module OCRSDK::Verifiers::Format
|
2
|
+
# http://ocrsdk.com/documentation/specifications/image-formats/
|
3
|
+
INPUT_FORMATS = [:bmp, :dcx, :pcx, :png, :jp2, :jpc, :jpg, :jpeg, :jfif, :pdf,
|
4
|
+
:tif, :tiff, :gif, :djvu, :djv, :jb2]
|
5
|
+
|
6
|
+
# http://ocrsdk.com/documentation/apireference/processImage/
|
7
|
+
OUTPUT_FORMATS = [:txt, :rtf, :docx, :xlsx, :pptx, :pdf_searchable,
|
8
|
+
:pdf_text_and_images, :xml, :alto]
|
9
|
+
|
10
|
+
def format_to_s(format)
|
11
|
+
format.to_s.camelize(:lower)
|
12
|
+
end
|
13
|
+
|
14
|
+
def supported_input_format?(format)
|
15
|
+
format = format.downcase.to_sym if format.kind_of? String
|
16
|
+
|
17
|
+
INPUT_FORMATS.include? format
|
18
|
+
end
|
19
|
+
|
20
|
+
def supported_output_format?(format)
|
21
|
+
format = format.underscore.to_sym if format.kind_of? String
|
22
|
+
|
23
|
+
OUTPUT_FORMATS.include? format
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module OCRSDK::Verifiers::Language
|
2
|
+
# http://ocrsdk.com/documentation/specifications/recognition-languages/
|
3
|
+
LANGUAGES = [:afrikaans, :albanian, :aymara, :azeri_latin, :basque, :bemba,
|
4
|
+
:blackfoot, :breton, :bugotu, :bulgarian, :buryat, :chamorro, :corsican,
|
5
|
+
:crimean_tatar, :croatian, :crow, :czech, :dutch, :dutch_belgian, :english,
|
6
|
+
:eskimo_latin, :esperanto, :estonian, :evenki, :faeroese, :fijian, :finnish,
|
7
|
+
:french, :frisian, :gaelic_scottish, :gagauz, :galician, :ganda, :german,
|
8
|
+
:german_law, :german_luxembourg, :german_medical, :german_new_spelling_law,
|
9
|
+
:greek, :hani, :hausa, :hebrew, :hungarian, :icelandic, :interlingua, :italian,
|
10
|
+
:japanese, :kabardian, :kasub, :kawa, :kikuyu, :kirgiz, :kongo, :korean_hangul,
|
11
|
+
:koryak, :kpelle, :lak, :lappish, :latvian, :lezgin, :macedonian, :malay, :malinke,
|
12
|
+
:maltese, :mansi, :maori, :mari, :maya, :miao, :minankabaw, :mohawk, :nenets, :nogay,
|
13
|
+
:norwegian_bokmal, :norwegian_nynorsk, :nyanja, :occidental, :old_english, :old_french,
|
14
|
+
:old_german, :papiamento, :pidgin_english, :polish, :portuguese_brazilian,
|
15
|
+
:portuguese_standard, :provencal, :quechua, :romanian, :romanian_moldavia, :romany,
|
16
|
+
:rundi, :russian, :samoan, :selkup, :serbian_cyrillic, :shona, :sioux, :slovenian,
|
17
|
+
:somali, :spanish, :sunda, :tabassaran, :tagalog, :tahitian, :tajik, :tatar, :tinpo,
|
18
|
+
:tun, :turkish, :uighur_cyrillic, :ukrainian, :uzbek_cyrillic, :visayan]
|
19
|
+
|
20
|
+
def language_to_s(language)
|
21
|
+
language.to_s.camelize
|
22
|
+
end
|
23
|
+
|
24
|
+
def language_to_sym(language)
|
25
|
+
language.underscore.to_sym
|
26
|
+
end
|
27
|
+
|
28
|
+
def supported_language?(language)
|
29
|
+
language = language_to_sym language if language.kind_of? String
|
30
|
+
|
31
|
+
LANGUAGES.include? language
|
32
|
+
end
|
33
|
+
|
34
|
+
def languages_to_s(languages)
|
35
|
+
languages = languages.map(&method(:language_to_s))
|
36
|
+
|
37
|
+
unless languages.map(&method(:supported_language?)).all?
|
38
|
+
raise OCRSDK::UnsupportedLanguage
|
39
|
+
else
|
40
|
+
languages
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module OCRSDK::Verifiers::Profile
|
2
|
+
# http://ocrsdk.com/documentation/specifications/processing-profiles/
|
3
|
+
PROFILES = [:document_conversion, :document_archiving, :text_extraction,
|
4
|
+
:field_level_recognition, :barcode_recognition]
|
5
|
+
|
6
|
+
def profile_to_s(profile)
|
7
|
+
profile.to_s.camelize(:lower)
|
8
|
+
end
|
9
|
+
|
10
|
+
def supported_profile?(profile)
|
11
|
+
profile = profile.underscore.to_sym if profile.kind_of? String
|
12
|
+
|
13
|
+
PROFILES.include? profile
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module OCRSDK::Verifiers::Status
|
2
|
+
# http://ocrsdk.com/documentation/specifications/task-statuses/
|
3
|
+
STATUSES = [:submitted, :queued, :in_progress, :completed,
|
4
|
+
:processing_failed, :deleted, :not_enough_credits]
|
5
|
+
|
6
|
+
def status_to_s(status)
|
7
|
+
status.to_s.camelize
|
8
|
+
end
|
9
|
+
|
10
|
+
def status_to_sym(status)
|
11
|
+
status.underscore.to_sym
|
12
|
+
end
|
13
|
+
|
14
|
+
def supported_status?(status)
|
15
|
+
status = status_to_sym status if status.kind_of? String
|
16
|
+
|
17
|
+
STATUSES.include? status
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# Intended to be included into target classes
|
2
|
+
# they serve the purpose of validating and converting
|
3
|
+
# symbols between internal and outside api representations
|
4
|
+
module OCRSDK::Verifiers
|
5
|
+
end
|
6
|
+
|
7
|
+
Dir[File.dirname(__FILE__) + '/verifiers/*.rb'].each do |file|
|
8
|
+
require file
|
9
|
+
end
|
data/lib/ocrsdk.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'uri'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'pdf-reader'
|
5
|
+
require 'rest-client'
|
6
|
+
require 'active_support/inflector'
|
7
|
+
require 'active_support/time'
|
8
|
+
|
9
|
+
# http://ocrsdk.com/documentation/apireference/
|
10
|
+
module OCRSDK
|
11
|
+
DEFAULT_POLL_TIME = 3
|
12
|
+
SERVICE_URL = 'cloud.ocrsdk.com'
|
13
|
+
end
|
14
|
+
|
15
|
+
require 'ocrsdk/errors'
|
16
|
+
require 'ocrsdk/verifiers'
|
17
|
+
require 'ocrsdk/abstract_entity'
|
18
|
+
require 'ocrsdk/image'
|
19
|
+
require 'ocrsdk/pdf'
|
20
|
+
require 'ocrsdk/document'
|
21
|
+
require 'ocrsdk/promise'
|
data/ocrsdk.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "ocrsdk"
|
3
|
+
s.version = File.read("VERSION").delete("\n\r")
|
4
|
+
s.authors = ["Andrew Korzhuev"]
|
5
|
+
s.description = %q{Abbyy's OCR (ocrsdk.com) API wrapper in Ruby.}
|
6
|
+
s.summary = %q{Abbyy's OCR (ocrsdk.com) API wrapper in Ruby.}
|
7
|
+
s.email = "andrew@korzhuev.com"
|
8
|
+
s.extra_rdoc_files = ["LICENSE", "README.md"]
|
9
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
10
|
+
s.homepage = "http://github.com/andrusha/ocrsdk"
|
11
|
+
s.require_paths = ["lib"]
|
12
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
13
|
+
s.files = `git ls-files`.split("\n")
|
14
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
15
|
+
s.add_runtime_dependency "rest-client" #git://github.com/babatakao/rest-client.git
|
16
|
+
s.add_runtime_dependency "nokogiri"
|
17
|
+
s.add_runtime_dependency "pdf-reader"
|
18
|
+
s.add_runtime_dependency "activesupport"
|
19
|
+
|
20
|
+
s.add_development_dependency "rake", ">= 0.8"
|
21
|
+
if RUBY_VERSION >= '1.9'
|
22
|
+
s.add_development_dependency "cover_me", ">= 1.2.0"
|
23
|
+
else
|
24
|
+
s.add_development_dependency "rcov", ">= 0.9"
|
25
|
+
end
|
26
|
+
s.add_development_dependency "rspec", ">= 2"
|
27
|
+
s.add_development_dependency "webmock"
|
28
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,106 @@
|
|
1
|
+
module OCRSDKHelpers
|
2
|
+
|
3
|
+
def process_image_response
|
4
|
+
<<-XML
|
5
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
6
|
+
<response>
|
7
|
+
<task id="22345200-abe8-4f60-90c8-0d43c5f6c0f6"
|
8
|
+
registrationTime="2001-01-01T13:18:22Z"
|
9
|
+
statusChangeTime="2001-01-01T13:18:22Z"
|
10
|
+
status="Submitted"
|
11
|
+
error="{An error message.}"
|
12
|
+
filesCount="10"
|
13
|
+
credits="10"
|
14
|
+
estimatedProcessingTime="3600"
|
15
|
+
resultUrl="http://domain/blob ID"
|
16
|
+
description="My first OCR task"/>
|
17
|
+
<task/>
|
18
|
+
</response>
|
19
|
+
XML
|
20
|
+
end
|
21
|
+
|
22
|
+
def process_image_response_credits
|
23
|
+
<<-XML
|
24
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
25
|
+
<response>
|
26
|
+
<task id="22345200-abe8-4f60-90c8-0d43c5f6c0f6"
|
27
|
+
registrationTime="2001-01-01T13:18:22Z"
|
28
|
+
statusChangeTime="2001-01-01T13:18:22Z"
|
29
|
+
status="NotEnoughCredits"
|
30
|
+
error="{An error message.}"
|
31
|
+
filesCount="10"
|
32
|
+
credits="0"
|
33
|
+
estimatedProcessingTime="3600"
|
34
|
+
resultUrl="http://domain/blob ID"
|
35
|
+
description="My first OCR task"/>
|
36
|
+
<task/>
|
37
|
+
</response>
|
38
|
+
XML
|
39
|
+
end
|
40
|
+
|
41
|
+
def process_image_updated_response
|
42
|
+
<<-XML
|
43
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
44
|
+
<response>
|
45
|
+
<task id="update-task-id"
|
46
|
+
registrationTime="2001-01-01T13:18:22Z"
|
47
|
+
statusChangeTime="2001-02-01T13:18:22Z"
|
48
|
+
status="InProgress"
|
49
|
+
error="{An error message.}"
|
50
|
+
filesCount="10"
|
51
|
+
credits="10"
|
52
|
+
estimatedProcessingTime="3600"
|
53
|
+
resultUrl="http://domain/blob ID"
|
54
|
+
description="My first OCR task"/>
|
55
|
+
<task/>
|
56
|
+
</response>
|
57
|
+
XML
|
58
|
+
end
|
59
|
+
|
60
|
+
def process_image_completed_response
|
61
|
+
<<-XML
|
62
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
63
|
+
<response>
|
64
|
+
<task id="update-task-id"
|
65
|
+
registrationTime="2001-01-01T13:18:22Z"
|
66
|
+
statusChangeTime="2001-03-01T13:18:22Z"
|
67
|
+
status="Completed"
|
68
|
+
error="{An error message.}"
|
69
|
+
filesCount="10"
|
70
|
+
credits="10"
|
71
|
+
estimatedProcessingTime="3600"
|
72
|
+
resultUrl="http://domain/blob ID"
|
73
|
+
description="My first OCR task"/>
|
74
|
+
<task/>
|
75
|
+
</response>
|
76
|
+
XML
|
77
|
+
end
|
78
|
+
|
79
|
+
def process_image_failed_response
|
80
|
+
<<-XML
|
81
|
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
82
|
+
<response>
|
83
|
+
<task id="update-task-id"
|
84
|
+
registrationTime="2001-01-01T13:18:22Z"
|
85
|
+
statusChangeTime="2001-03-01T13:18:22Z"
|
86
|
+
status="ProcessingFailed"
|
87
|
+
error="{An error message.}"
|
88
|
+
filesCount="10"
|
89
|
+
credits="10"
|
90
|
+
estimatedProcessingTime="3600"
|
91
|
+
resultUrl="http://domain/blob ID"
|
92
|
+
description="My first OCR task"/>
|
93
|
+
<task/>
|
94
|
+
</response>
|
95
|
+
XML
|
96
|
+
end
|
97
|
+
|
98
|
+
def mock_ocrsdk
|
99
|
+
OCRSDK::Image.any_instance.stub(:api_process_image) { |x,y,i,j| process_image_response }
|
100
|
+
OCRSDK::Promise.any_instance.stub(:api_update_status) { process_image_completed_response }
|
101
|
+
OCRSDK::Promise.any_instance.stub(:api_get_result) { "meow" }
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
RSpec.configuration.include OCRSDKHelpers
|