ocrsdk 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. data/.gitignore +5 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +3 -0
  4. data/LICENSE +20 -0
  5. data/README.md +2 -0
  6. data/Rakefile +25 -0
  7. data/VERSION +1 -0
  8. data/coverage/index.css +56 -0
  9. data/coverage/index.html +244 -0
  10. data/coverage/jquery.js +154 -0
  11. data/coverage/jquery.tablesorter.js +2 -0
  12. data/coverage/lib/ocrsdk/abstract_entity.rb.html +207 -0
  13. data/coverage/lib/ocrsdk/document.rb.html +159 -0
  14. data/coverage/lib/ocrsdk/errors.rb.html +199 -0
  15. data/coverage/lib/ocrsdk/image.rb.html +543 -0
  16. data/coverage/lib/ocrsdk/pdf.rb.html +311 -0
  17. data/coverage/lib/ocrsdk/promise.rb.html +791 -0
  18. data/coverage/lib/ocrsdk/verifiers/format.rb.html +303 -0
  19. data/coverage/lib/ocrsdk/verifiers/language.rb.html +439 -0
  20. data/coverage/lib/ocrsdk/verifiers/profile.rb.html +215 -0
  21. data/coverage/lib/ocrsdk/verifiers/status.rb.html +247 -0
  22. data/coverage/lib/ocrsdk/verifiers.rb.html +167 -0
  23. data/coverage/lib/ocrsdk.rb.html +263 -0
  24. data/coverage/report.css +97 -0
  25. data/lib/ocrsdk/abstract_entity.rb +14 -0
  26. data/lib/ocrsdk/document.rb +8 -0
  27. data/lib/ocrsdk/errors.rb +13 -0
  28. data/lib/ocrsdk/image.rb +56 -0
  29. data/lib/ocrsdk/pdf.rb +27 -0
  30. data/lib/ocrsdk/promise.rb +87 -0
  31. data/lib/ocrsdk/verifiers/format.rb +26 -0
  32. data/lib/ocrsdk/verifiers/language.rb +43 -0
  33. data/lib/ocrsdk/verifiers/profile.rb +15 -0
  34. data/lib/ocrsdk/verifiers/status.rb +19 -0
  35. data/lib/ocrsdk/verifiers.rb +9 -0
  36. data/lib/ocrsdk.rb +21 -0
  37. data/ocrsdk.gemspec +28 -0
  38. data/spec/fixtures/files/lorem.complex.pdf +0 -0
  39. data/spec/fixtures/files/lorem.pdf +0 -0
  40. data/spec/fixtures/files/malformed.pdf +0 -0
  41. data/spec/fixtures/files/recognizeable.pdf +0 -0
  42. data/spec/fixtures/files/russian.jpg +0 -0
  43. data/spec/fixtures/files/searchable.malformed.pdf +0 -0
  44. data/spec/helpers/ocrsdk_helpers.rb +106 -0
  45. data/spec/ocrsdk/image_spec.rb +93 -0
  46. data/spec/ocrsdk/pdf_spec.rb +26 -0
  47. data/spec/ocrsdk/promise_spec.rb +165 -0
  48. data/spec/ocrsdk/verifiers/format_spec.rb +51 -0
  49. data/spec/ocrsdk/verifiers/language_spec.rb +55 -0
  50. data/spec/ocrsdk/verifiers/profile_spec.rb +33 -0
  51. data/spec/ocrsdk/verifiers/status_spec.rb +43 -0
  52. data/spec/ocrsdk_spec.rb +8 -0
  53. data/spec/spec_helper.rb +8 -0
  54. data/spec/support/test_files.rb +36 -0
  55. data/travis.yml +4 -0
  56. metadata +247 -0
@@ -0,0 +1,93 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Image do
5
+ describe ".as_text" do
6
+ subject { OCRSDK::Image.new 'image.jpg' }
7
+ before { mock_ocrsdk }
8
+
9
+ it "should call api and return Promise" do
10
+ subject.as_text([:russian]).should be_kind_of(OCRSDK::Promise)
11
+ end
12
+ end
13
+
14
+ describe ".as_text_sync" do
15
+ subject { OCRSDK::Image.new 'image.jpg' }
16
+ before { mock_ocrsdk }
17
+
18
+ it "should wait till Promise is done and return result" do
19
+ subject.as_text_sync([:russian], 0).should == 'meow'
20
+ end
21
+ end
22
+
23
+ describe ".as_pdf" do
24
+ subject { OCRSDK::Image.new 'image.jpg' }
25
+ before { mock_ocrsdk }
26
+
27
+ it "should call api and return Promise" do
28
+ subject.as_pdf([:russian]).should be_kind_of(OCRSDK::Promise)
29
+ end
30
+ end
31
+
32
+ describe ".as_pdf_sync" do
33
+ subject { OCRSDK::Image.new 'image.jpg' }
34
+ before { mock_ocrsdk }
35
+
36
+ it "should wait till Promise is done and return result if output file isn't specified" do
37
+ subject.as_pdf_sync([:russian], nil, 0).should == 'meow'
38
+ end
39
+
40
+ it "should wait till Promise is done and write result in file" do
41
+ outpath = File.join(File.dirname(File.expand_path(__FILE__)), 'output.pdf')
42
+ subject.as_pdf_sync([:russian], outpath, 0)
43
+ File.exists?(outpath).should be_true
44
+ File.delete outpath
45
+ end
46
+ end
47
+
48
+ describe ".api_process_image" do
49
+ subject { OCRSDK::Image.new 'image.jpg', 'app_id', 'pass' }
50
+
51
+ it "should raise UnsupportedLanguage on unsupported language" do
52
+ expect {
53
+ subject.instance_eval { api_process_image 'image.jpg', [:meow] }
54
+ }.to raise_error(OCRSDK::UnsupportedLanguage)
55
+ end
56
+
57
+ it "should raise UnsupportedInputFormat on unsupported input format" do
58
+ expect {
59
+ subject.instance_eval { api_process_image 'image.meow', [:russian] }
60
+ }.to raise_error(OCRSDK::UnsupportedInputFormat)
61
+ end
62
+
63
+ it "should raise UnsupportedOutputFormat on unsupported output format" do
64
+ expect {
65
+ subject.instance_eval { api_process_image 'image.jpg', [:russian], :meow }
66
+ }.to raise_error(OCRSDK::UnsupportedOutputFormat)
67
+ end
68
+
69
+ it "should raise UnsupportedProfile on unsupported profile" do
70
+ expect {
71
+ subject.instance_eval { api_process_image 'image.jpg', [:russian], :txt, :meow }
72
+ }.to raise_error(OCRSDK::UnsupportedProfile)
73
+ end
74
+
75
+ it "should raise NetworkError on problems with REST request" do
76
+ RestClient.stub(:post) {|url, params| raise RestClient::ExceptionWithResponse }
77
+ RestClient.should_receive(:post).once
78
+
79
+ expect {
80
+ subject.instance_eval { api_process_image TestFiles.russian_jpg_path, [:russian] }
81
+ }.to raise_error(OCRSDK::NetworkError)
82
+ end
83
+
84
+ it "should do a post request with correct url and file attached" do
85
+ RestClient.stub(:post) do |uri, params|
86
+ uri.to_s.should == "http://app_id:pass@#{OCRSDK::SERVICE_URL}/processImage?language=Russian%2CEnglish&exportFormat=txt&profile=documentConversion"
87
+ params[:upload][:file].should be_kind_of(File)
88
+ end
89
+ RestClient.should_receive(:post).once
90
+ subject.instance_eval { api_process_image TestFiles.russian_jpg_path, [:russian, :english], :txt, :document_conversion }
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,26 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::PDF do
5
+ describe ".recognizeable?" do
6
+ it "should return false for a document with text only" do
7
+ OCRSDK::PDF.new(TestFiles.lorem_pdf).recognizeable?.should be_false
8
+ end
9
+
10
+ it "should return false for document with a lot of text and some images" do
11
+ OCRSDK::PDF.new(TestFiles.lorem_complex_pdf).recognizeable?.should be_false
12
+ end
13
+
14
+ it "should return true for document with images only" do
15
+ OCRSDK::PDF.new(TestFiles.recognizeable_pdf).recognizeable?.should be_true
16
+ end
17
+
18
+ it "should return false for malformed document" do
19
+ OCRSDK::PDF.new(TestFiles.malformed_pdf).recognizeable?.should be_false
20
+ end
21
+
22
+ it "should return true for 'searchable' document with malformed text underneath the pic" do
23
+ OCRSDK::PDF.new(TestFiles.searchable_malformed_pdf).recognizeable?.should be_true
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,165 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Promise do
5
+
6
+ describe ".parse_response" do
7
+ context "correct response" do
8
+ subject { OCRSDK::Promise.new(nil).parse_response process_image_response }
9
+
10
+ its(:task_id) { should == '22345200-abe8-4f60-90c8-0d43c5f6c0f6' }
11
+ its(:status) { should == :submitted }
12
+ its(:result_url) { should == 'http://domain/blob ID' }
13
+ its(:estimate_processing_time) { should == 3600 }
14
+ its(:estimate_completion) { should == DateTime.parse("2001-01-01T13:18:22Z") + 3600.seconds }
15
+ end
16
+
17
+ context "incorrect response" do
18
+ subject { OCRSDK::Promise.new nil }
19
+
20
+ it "should raise OCRSDKError" do
21
+ expect {
22
+ subject.parse_response ''
23
+ }.to raise_error(OCRSDK::OCRSDKError)
24
+ end
25
+ end
26
+
27
+ context "insufficient credits" do
28
+ subject { OCRSDK::Promise.new nil }
29
+
30
+ it "should raise an OCRSDK::NotEnoughCredits error" do
31
+ expect {
32
+ subject.parse_response process_image_response_credits
33
+ }.to raise_error(OCRSDK::NotEnoughCredits)
34
+ end
35
+ end
36
+ end
37
+
38
+ describe "self.from_response" do
39
+ subject { OCRSDK::Promise.from_response process_image_response }
40
+
41
+ its(:task_id) { should == '22345200-abe8-4f60-90c8-0d43c5f6c0f6' }
42
+ its(:status) { should == :submitted }
43
+ its(:result_url) { should == 'http://domain/blob ID' }
44
+ its(:estimate_processing_time) { should == 3600 }
45
+ its(:estimate_completion) { should == DateTime.parse("2001-01-01T13:18:22Z") + 3600.seconds }
46
+ end
47
+
48
+ describe ".update" do
49
+ subject { OCRSDK::Promise.new 'update-task-id' }
50
+ before do
51
+ subject.stub(:api_update_status) { process_image_updated_response }
52
+ subject.update
53
+ end
54
+
55
+ its(:task_id) { should == 'update-task-id' }
56
+ its(:status) { should == :in_progress }
57
+ end
58
+
59
+ describe ".api_update_status" do
60
+ subject { OCRSDK::Promise.new 'test', 'app_id', 'pass' }
61
+
62
+ it "should make an api call with correct url" do
63
+ RestClient.stub(:get) do |url|
64
+ url.to_s.should == "http://app_id:pass@#{OCRSDK::SERVICE_URL}/getTaskStatus?taskId=test"
65
+ end
66
+ RestClient.should_receive(:get).once
67
+ subject.instance_eval { api_update_status }
68
+ end
69
+
70
+ it "should raise a NetworkError in case REST request fails" do
71
+ RestClient.stub(:get) {|url| raise RestClient::ExceptionWithResponse }
72
+ RestClient.should_receive(:get).once
73
+
74
+ expect {
75
+ subject.instance_eval { api_update_status }
76
+ }.to raise_error(OCRSDK::NetworkError)
77
+ end
78
+ end
79
+
80
+ describe ".result" do
81
+ context "processing completed without errors" do
82
+ subject { OCRSDK::Promise.from_response process_image_completed_response }
83
+
84
+ it "should call api method" do
85
+ subject.should_receive(:api_get_result).once
86
+ subject.result
87
+ end
88
+
89
+ it "should get file with coorect url and return its contents" do
90
+ RestClient.stub(:get) do |url|
91
+ url.to_s.should == "http://domain/blob ID"
92
+ "meow"
93
+ end
94
+ RestClient.should_receive(:get).once
95
+ subject.result.should == 'meow'
96
+ end
97
+
98
+ it "should raise NetworkError in case getting file fails" do
99
+ RestClient.stub(:get) {|url| raise RestClient::ExceptionWithResponse }
100
+ RestClient.should_receive(:get).once
101
+
102
+ expect {
103
+ subject.result
104
+ }.to raise_error(OCRSDK::NetworkError)
105
+ end
106
+ end
107
+
108
+ context "processing failed" do
109
+ subject { OCRSDK::Promise.from_response process_image_failed_response }
110
+
111
+ it "should raise an ProcessingFailed" do
112
+ expect {
113
+ subject.result
114
+ }.to raise_error(OCRSDK::ProcessingFailed)
115
+ end
116
+ end
117
+ end
118
+
119
+ describe ".completed? and .failed?" do
120
+ context "processed job" do
121
+ subject { OCRSDK::Promise.from_response process_image_updated_response }
122
+
123
+ its(:processing?) { should be_true }
124
+ its(:completed?) { should be_false }
125
+ its(:failed?) { should be_false }
126
+ end
127
+
128
+ context "completed job" do
129
+ subject { OCRSDK::Promise.from_response process_image_completed_response }
130
+
131
+ its(:processing?) { should be_false }
132
+ its(:completed?) { should be_true }
133
+ its(:failed?) { should be_false }
134
+ end
135
+
136
+ context "failed job" do
137
+ subject { OCRSDK::Promise.from_response process_image_failed_response }
138
+
139
+ its(:processing?) { should be_false }
140
+ its(:completed?) { should be_false }
141
+ its(:failed?) { should be_true }
142
+ end
143
+ end
144
+
145
+ describe ".wait" do
146
+ subject { OCRSDK::Promise.from_response process_image_response }
147
+
148
+ it "should check the status as many times as needed waiting while ocr is completed" do
149
+ called_once = false
150
+ subject.stub(:update) do
151
+ if called_once
152
+ subject.parse_response process_image_completed_response
153
+ else
154
+ called_once = true
155
+ end
156
+ end
157
+ subject.should_receive(:update).twice
158
+
159
+ start = Time.now
160
+ subject.wait 0.1
161
+ (Time.now - start).should >= 0.2
162
+ end
163
+ end
164
+
165
+ end
@@ -0,0 +1,51 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Verifiers::Format do
5
+ let (:class_with_module) {
6
+ Class.new do
7
+ include OCRSDK::Verifiers::Format
8
+ end
9
+ }
10
+ subject { class_with_module.new }
11
+
12
+ it "should have list of possible input formats" do
13
+ OCRSDK::Verifiers::Format::INPUT_FORMATS.length.should > 0
14
+ end
15
+
16
+ it "should have list of possible output formats" do
17
+ OCRSDK::Verifiers::Format::OUTPUT_FORMATS.length.should > 0
18
+ end
19
+
20
+ it "should convert format to string" do
21
+ subject.format_to_s(:meow_meow).should == 'meowMeow'
22
+ end
23
+
24
+ describe ".supported_input_format?" do
25
+ it "should return false for incorrect input format" do
26
+ subject.supported_input_format?(:meow_meow).should be_false
27
+ end
28
+
29
+ it "should return true for correct input format as symbol" do
30
+ subject.supported_input_format?(:pdf).should be_true
31
+ end
32
+
33
+ it "should return true for correct input format as string" do
34
+ subject.supported_input_format?("pdf").should be_true
35
+ end
36
+ end
37
+
38
+ describe ".supported_output_format?" do
39
+ it "should return false for incorrect output format" do
40
+ subject.supported_output_format?(:meow_meow).should be_false
41
+ end
42
+
43
+ it "should return true for correct output format as symbol" do
44
+ subject.supported_output_format?(:txt).should be_true
45
+ end
46
+
47
+ it "should return true for correct output format as string" do
48
+ subject.supported_output_format?("pdfSearchable").should be_true
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,55 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Verifiers::Language do
5
+ let (:class_with_module) {
6
+ Class.new do
7
+ include OCRSDK::Verifiers::Language
8
+ end
9
+ }
10
+ subject { class_with_module.new }
11
+
12
+ it "should have list of languages" do
13
+ OCRSDK::Verifiers::Language::LANGUAGES.length.should > 0
14
+ end
15
+
16
+ it "should convert language to string" do
17
+ subject.language_to_s(:meow_meow).should == 'MeowMeow'
18
+ end
19
+
20
+ describe ".language_to_sym" do
21
+ it "should convert language to symbol" do
22
+ subject.language_to_sym("MeowMeow").should == :meow_meow
23
+ end
24
+
25
+ it "should produce reversible results" do
26
+ subject.language_to_sym(subject.language_to_s(:meow_meow)).should == :meow_meow
27
+ end
28
+ end
29
+
30
+ describe ".languages_to_s" do
31
+ it "should convert list of languages to strings" do
32
+ subject.languages_to_s([:hebrew, :serbian_cyrillic]).should == ['Hebrew', 'SerbianCyrillic']
33
+ end
34
+
35
+ it "should raise an exception if language doesn't exist" do
36
+ expect {
37
+ subject.languages_to_s([:meow_meow, :pew_pew])
38
+ }.to raise_error(OCRSDK::UnsupportedLanguage)
39
+ end
40
+ end
41
+
42
+ describe ".supported_language?" do
43
+ it "should return false for incorrect language" do
44
+ subject.supported_language?(:meow_meow).should be_false
45
+ end
46
+
47
+ it "should return true for correct language as symbol" do
48
+ subject.supported_language?(:russian).should be_true
49
+ end
50
+
51
+ it "should return true for correct language as string" do
52
+ subject.supported_language?("Russian").should be_true
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Verifiers::Profile do
5
+ let (:class_with_module) {
6
+ Class.new do
7
+ include OCRSDK::Verifiers::Profile
8
+ end
9
+ }
10
+ subject { class_with_module.new }
11
+
12
+ it "should have list of possible profiles" do
13
+ OCRSDK::Verifiers::Profile::PROFILES.length.should > 0
14
+ end
15
+
16
+ it "should convert profile to string" do
17
+ subject.profile_to_s(:meow_meow).should == 'meowMeow'
18
+ end
19
+
20
+ describe ".supported_profile?" do
21
+ it "should return false for incorrect profile" do
22
+ subject.supported_profile?(:meow_meow).should be_false
23
+ end
24
+
25
+ it "should return true for correct profile as symbol" do
26
+ subject.supported_profile?(:document_conversion).should be_true
27
+ end
28
+
29
+ it "should return true for correct profile as string" do
30
+ subject.supported_profile?("documentConversion").should be_true
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,43 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Verifiers::Status do
5
+ let (:class_with_module) {
6
+ Class.new do
7
+ include OCRSDK::Verifiers::Status
8
+ end
9
+ }
10
+ subject { class_with_module.new }
11
+
12
+ it "should have list of statuses" do
13
+ OCRSDK::Verifiers::Status::STATUSES.length.should > 0
14
+ end
15
+
16
+ it "should convert status to string" do
17
+ subject.status_to_s(:meow_meow).should == 'MeowMeow'
18
+ end
19
+
20
+ describe ".status_to_sym" do
21
+ it "should convert status to symbol" do
22
+ subject.status_to_sym("MeowMeow").should == :meow_meow
23
+ end
24
+
25
+ it "should produce reversible results" do
26
+ subject.status_to_sym(subject.status_to_s(:meow_meow)).should == :meow_meow
27
+ end
28
+ end
29
+
30
+ describe ".supported_status?" do
31
+ it "should return false for incorrect language" do
32
+ subject.supported_status?(:meow_meow).should be_false
33
+ end
34
+
35
+ it "should return true for correct status as symbol" do
36
+ subject.supported_status?(:submitted).should be_true
37
+ end
38
+
39
+ it "should return true for correct status as string" do
40
+ subject.supported_status?("Submitted").should be_true
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,8 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK do
5
+ it "should have service url" do
6
+ OCRSDK::SERVICE_URL.length.should > 0
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ if RUBY_VERSION >= '1.9'
2
+ require 'cover_me'
3
+ end
4
+
5
+ require 'rspec'
6
+ require 'ocrsdk'
7
+ require 'helpers/ocrsdk_helpers'
8
+ require 'support/test_files'
@@ -0,0 +1,36 @@
1
+ # As a separate module, because we might want to
2
+ # mock some files in the future or use generator
3
+ # for the same reason it returns opened instance of file
4
+ #
5
+ # Each method check if corresponding file exists in
6
+ # spec/support/files/file.name.kitten.ext
7
+ # if method has `_path` in the end Pathname instance would be returned
8
+ # otherwise it will be File instance
9
+ module TestFiles
10
+ def self.respond_to?(method)
11
+ File.exists? self.filename(method)
12
+ end
13
+
14
+ def self.method_missing(method, *args, &block)
15
+ fname = self.filename(method)
16
+ if File.exists? fname
17
+ if only_path? method
18
+ fname
19
+ else
20
+ File.open(fname)
21
+ end
22
+ else
23
+ super
24
+ end
25
+ end
26
+
27
+ def self.filename(method)
28
+ method = method[0..-6] if only_path? method
29
+ File.join(File.dirname(File.expand_path(__FILE__)), '..', 'fixtures', 'files', method.to_s.gsub('_', '.'))
30
+ end
31
+
32
+ def self.only_path?(method)
33
+ method[-5..-1] == '_path'
34
+ end
35
+
36
+ end
data/travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - 1.9.3