ocrsdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. data/.gitignore +5 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +3 -0
  4. data/LICENSE +20 -0
  5. data/README.md +2 -0
  6. data/Rakefile +25 -0
  7. data/VERSION +1 -0
  8. data/coverage/index.css +56 -0
  9. data/coverage/index.html +244 -0
  10. data/coverage/jquery.js +154 -0
  11. data/coverage/jquery.tablesorter.js +2 -0
  12. data/coverage/lib/ocrsdk/abstract_entity.rb.html +207 -0
  13. data/coverage/lib/ocrsdk/document.rb.html +159 -0
  14. data/coverage/lib/ocrsdk/errors.rb.html +199 -0
  15. data/coverage/lib/ocrsdk/image.rb.html +543 -0
  16. data/coverage/lib/ocrsdk/pdf.rb.html +311 -0
  17. data/coverage/lib/ocrsdk/promise.rb.html +791 -0
  18. data/coverage/lib/ocrsdk/verifiers/format.rb.html +303 -0
  19. data/coverage/lib/ocrsdk/verifiers/language.rb.html +439 -0
  20. data/coverage/lib/ocrsdk/verifiers/profile.rb.html +215 -0
  21. data/coverage/lib/ocrsdk/verifiers/status.rb.html +247 -0
  22. data/coverage/lib/ocrsdk/verifiers.rb.html +167 -0
  23. data/coverage/lib/ocrsdk.rb.html +263 -0
  24. data/coverage/report.css +97 -0
  25. data/lib/ocrsdk/abstract_entity.rb +14 -0
  26. data/lib/ocrsdk/document.rb +8 -0
  27. data/lib/ocrsdk/errors.rb +13 -0
  28. data/lib/ocrsdk/image.rb +56 -0
  29. data/lib/ocrsdk/pdf.rb +27 -0
  30. data/lib/ocrsdk/promise.rb +87 -0
  31. data/lib/ocrsdk/verifiers/format.rb +26 -0
  32. data/lib/ocrsdk/verifiers/language.rb +43 -0
  33. data/lib/ocrsdk/verifiers/profile.rb +15 -0
  34. data/lib/ocrsdk/verifiers/status.rb +19 -0
  35. data/lib/ocrsdk/verifiers.rb +9 -0
  36. data/lib/ocrsdk.rb +21 -0
  37. data/ocrsdk.gemspec +28 -0
  38. data/spec/fixtures/files/lorem.complex.pdf +0 -0
  39. data/spec/fixtures/files/lorem.pdf +0 -0
  40. data/spec/fixtures/files/malformed.pdf +0 -0
  41. data/spec/fixtures/files/recognizeable.pdf +0 -0
  42. data/spec/fixtures/files/russian.jpg +0 -0
  43. data/spec/fixtures/files/searchable.malformed.pdf +0 -0
  44. data/spec/helpers/ocrsdk_helpers.rb +106 -0
  45. data/spec/ocrsdk/image_spec.rb +93 -0
  46. data/spec/ocrsdk/pdf_spec.rb +26 -0
  47. data/spec/ocrsdk/promise_spec.rb +165 -0
  48. data/spec/ocrsdk/verifiers/format_spec.rb +51 -0
  49. data/spec/ocrsdk/verifiers/language_spec.rb +55 -0
  50. data/spec/ocrsdk/verifiers/profile_spec.rb +33 -0
  51. data/spec/ocrsdk/verifiers/status_spec.rb +43 -0
  52. data/spec/ocrsdk_spec.rb +8 -0
  53. data/spec/spec_helper.rb +8 -0
  54. data/spec/support/test_files.rb +36 -0
  55. data/travis.yml +4 -0
  56. metadata +247 -0
@@ -0,0 +1,93 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Image do
5
+ describe ".as_text" do
6
+ subject { OCRSDK::Image.new 'image.jpg' }
7
+ before { mock_ocrsdk }
8
+
9
+ it "should call api and return Promise" do
10
+ subject.as_text([:russian]).should be_kind_of(OCRSDK::Promise)
11
+ end
12
+ end
13
+
14
+ describe ".as_text_sync" do
15
+ subject { OCRSDK::Image.new 'image.jpg' }
16
+ before { mock_ocrsdk }
17
+
18
+ it "should wait till Promise is done and return result" do
19
+ subject.as_text_sync([:russian], 0).should == 'meow'
20
+ end
21
+ end
22
+
23
+ describe ".as_pdf" do
24
+ subject { OCRSDK::Image.new 'image.jpg' }
25
+ before { mock_ocrsdk }
26
+
27
+ it "should call api and return Promise" do
28
+ subject.as_pdf([:russian]).should be_kind_of(OCRSDK::Promise)
29
+ end
30
+ end
31
+
32
+ describe ".as_pdf_sync" do
33
+ subject { OCRSDK::Image.new 'image.jpg' }
34
+ before { mock_ocrsdk }
35
+
36
+ it "should wait till Promise is done and return result if output file isn't specified" do
37
+ subject.as_pdf_sync([:russian], nil, 0).should == 'meow'
38
+ end
39
+
40
+ it "should wait till Promise is done and write result in file" do
41
+ outpath = File.join(File.dirname(File.expand_path(__FILE__)), 'output.pdf')
42
+ subject.as_pdf_sync([:russian], outpath, 0)
43
+ File.exists?(outpath).should be_true
44
+ File.delete outpath
45
+ end
46
+ end
47
+
48
+ describe ".api_process_image" do
49
+ subject { OCRSDK::Image.new 'image.jpg', 'app_id', 'pass' }
50
+
51
+ it "should raise UnsupportedLanguage on unsupported language" do
52
+ expect {
53
+ subject.instance_eval { api_process_image 'image.jpg', [:meow] }
54
+ }.to raise_error(OCRSDK::UnsupportedLanguage)
55
+ end
56
+
57
+ it "should raise UnsupportedInputFormat on unsupported input format" do
58
+ expect {
59
+ subject.instance_eval { api_process_image 'image.meow', [:russian] }
60
+ }.to raise_error(OCRSDK::UnsupportedInputFormat)
61
+ end
62
+
63
+ it "should raise UnsupportedOutputFormat on unsupported output format" do
64
+ expect {
65
+ subject.instance_eval { api_process_image 'image.jpg', [:russian], :meow }
66
+ }.to raise_error(OCRSDK::UnsupportedOutputFormat)
67
+ end
68
+
69
+ it "should raise UnsupportedProfile on unsupported profile" do
70
+ expect {
71
+ subject.instance_eval { api_process_image 'image.jpg', [:russian], :txt, :meow }
72
+ }.to raise_error(OCRSDK::UnsupportedProfile)
73
+ end
74
+
75
+ it "should raise NetworkError on problems with REST request" do
76
+ RestClient.stub(:post) {|url, params| raise RestClient::ExceptionWithResponse }
77
+ RestClient.should_receive(:post).once
78
+
79
+ expect {
80
+ subject.instance_eval { api_process_image TestFiles.russian_jpg_path, [:russian] }
81
+ }.to raise_error(OCRSDK::NetworkError)
82
+ end
83
+
84
+ it "should do a post request with correct url and file attached" do
85
+ RestClient.stub(:post) do |uri, params|
86
+ uri.to_s.should == "http://app_id:pass@#{OCRSDK::SERVICE_URL}/processImage?language=Russian%2CEnglish&exportFormat=txt&profile=documentConversion"
87
+ params[:upload][:file].should be_kind_of(File)
88
+ end
89
+ RestClient.should_receive(:post).once
90
+ subject.instance_eval { api_process_image TestFiles.russian_jpg_path, [:russian, :english], :txt, :document_conversion }
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,26 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::PDF do
5
+ describe ".recognizeable?" do
6
+ it "should return false for a document with text only" do
7
+ OCRSDK::PDF.new(TestFiles.lorem_pdf).recognizeable?.should be_false
8
+ end
9
+
10
+ it "should return false for document with a lot of text and some images" do
11
+ OCRSDK::PDF.new(TestFiles.lorem_complex_pdf).recognizeable?.should be_false
12
+ end
13
+
14
+ it "should return true for document with images only" do
15
+ OCRSDK::PDF.new(TestFiles.recognizeable_pdf).recognizeable?.should be_true
16
+ end
17
+
18
+ it "should return false for malformed document" do
19
+ OCRSDK::PDF.new(TestFiles.malformed_pdf).recognizeable?.should be_false
20
+ end
21
+
22
+ it "should return true for 'searchable' document with malformed text underneath the pic" do
23
+ OCRSDK::PDF.new(TestFiles.searchable_malformed_pdf).recognizeable?.should be_true
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,165 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Promise do
5
+
6
+ describe ".parse_response" do
7
+ context "correct response" do
8
+ subject { OCRSDK::Promise.new(nil).parse_response process_image_response }
9
+
10
+ its(:task_id) { should == '22345200-abe8-4f60-90c8-0d43c5f6c0f6' }
11
+ its(:status) { should == :submitted }
12
+ its(:result_url) { should == 'http://domain/blob ID' }
13
+ its(:estimate_processing_time) { should == 3600 }
14
+ its(:estimate_completion) { should == DateTime.parse("2001-01-01T13:18:22Z") + 3600.seconds }
15
+ end
16
+
17
+ context "incorrect response" do
18
+ subject { OCRSDK::Promise.new nil }
19
+
20
+ it "should raise OCRSDKError" do
21
+ expect {
22
+ subject.parse_response ''
23
+ }.to raise_error(OCRSDK::OCRSDKError)
24
+ end
25
+ end
26
+
27
+ context "insufficient credits" do
28
+ subject { OCRSDK::Promise.new nil }
29
+
30
+ it "should raise an OCRSDK::NotEnoughCredits error" do
31
+ expect {
32
+ subject.parse_response process_image_response_credits
33
+ }.to raise_error(OCRSDK::NotEnoughCredits)
34
+ end
35
+ end
36
+ end
37
+
38
+ describe "self.from_response" do
39
+ subject { OCRSDK::Promise.from_response process_image_response }
40
+
41
+ its(:task_id) { should == '22345200-abe8-4f60-90c8-0d43c5f6c0f6' }
42
+ its(:status) { should == :submitted }
43
+ its(:result_url) { should == 'http://domain/blob ID' }
44
+ its(:estimate_processing_time) { should == 3600 }
45
+ its(:estimate_completion) { should == DateTime.parse("2001-01-01T13:18:22Z") + 3600.seconds }
46
+ end
47
+
48
+ describe ".update" do
49
+ subject { OCRSDK::Promise.new 'update-task-id' }
50
+ before do
51
+ subject.stub(:api_update_status) { process_image_updated_response }
52
+ subject.update
53
+ end
54
+
55
+ its(:task_id) { should == 'update-task-id' }
56
+ its(:status) { should == :in_progress }
57
+ end
58
+
59
+ describe ".api_update_status" do
60
+ subject { OCRSDK::Promise.new 'test', 'app_id', 'pass' }
61
+
62
+ it "should make an api call with correct url" do
63
+ RestClient.stub(:get) do |url|
64
+ url.to_s.should == "http://app_id:pass@#{OCRSDK::SERVICE_URL}/getTaskStatus?taskId=test"
65
+ end
66
+ RestClient.should_receive(:get).once
67
+ subject.instance_eval { api_update_status }
68
+ end
69
+
70
+ it "should raise a NetworkError in case REST request fails" do
71
+ RestClient.stub(:get) {|url| raise RestClient::ExceptionWithResponse }
72
+ RestClient.should_receive(:get).once
73
+
74
+ expect {
75
+ subject.instance_eval { api_update_status }
76
+ }.to raise_error(OCRSDK::NetworkError)
77
+ end
78
+ end
79
+
80
+ describe ".result" do
81
+ context "processing completed without errors" do
82
+ subject { OCRSDK::Promise.from_response process_image_completed_response }
83
+
84
+ it "should call api method" do
85
+ subject.should_receive(:api_get_result).once
86
+ subject.result
87
+ end
88
+
89
+ it "should get file with coorect url and return its contents" do
90
+ RestClient.stub(:get) do |url|
91
+ url.to_s.should == "http://domain/blob ID"
92
+ "meow"
93
+ end
94
+ RestClient.should_receive(:get).once
95
+ subject.result.should == 'meow'
96
+ end
97
+
98
+ it "should raise NetworkError in case getting file fails" do
99
+ RestClient.stub(:get) {|url| raise RestClient::ExceptionWithResponse }
100
+ RestClient.should_receive(:get).once
101
+
102
+ expect {
103
+ subject.result
104
+ }.to raise_error(OCRSDK::NetworkError)
105
+ end
106
+ end
107
+
108
+ context "processing failed" do
109
+ subject { OCRSDK::Promise.from_response process_image_failed_response }
110
+
111
+ it "should raise an ProcessingFailed" do
112
+ expect {
113
+ subject.result
114
+ }.to raise_error(OCRSDK::ProcessingFailed)
115
+ end
116
+ end
117
+ end
118
+
119
+ describe ".completed? and .failed?" do
120
+ context "processed job" do
121
+ subject { OCRSDK::Promise.from_response process_image_updated_response }
122
+
123
+ its(:processing?) { should be_true }
124
+ its(:completed?) { should be_false }
125
+ its(:failed?) { should be_false }
126
+ end
127
+
128
+ context "completed job" do
129
+ subject { OCRSDK::Promise.from_response process_image_completed_response }
130
+
131
+ its(:processing?) { should be_false }
132
+ its(:completed?) { should be_true }
133
+ its(:failed?) { should be_false }
134
+ end
135
+
136
+ context "failed job" do
137
+ subject { OCRSDK::Promise.from_response process_image_failed_response }
138
+
139
+ its(:processing?) { should be_false }
140
+ its(:completed?) { should be_false }
141
+ its(:failed?) { should be_true }
142
+ end
143
+ end
144
+
145
+ describe ".wait" do
146
+ subject { OCRSDK::Promise.from_response process_image_response }
147
+
148
+ it "should check the status as many times as needed waiting while ocr is completed" do
149
+ called_once = false
150
+ subject.stub(:update) do
151
+ if called_once
152
+ subject.parse_response process_image_completed_response
153
+ else
154
+ called_once = true
155
+ end
156
+ end
157
+ subject.should_receive(:update).twice
158
+
159
+ start = Time.now
160
+ subject.wait 0.1
161
+ (Time.now - start).should >= 0.2
162
+ end
163
+ end
164
+
165
+ end
@@ -0,0 +1,51 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Verifiers::Format do
5
+ let (:class_with_module) {
6
+ Class.new do
7
+ include OCRSDK::Verifiers::Format
8
+ end
9
+ }
10
+ subject { class_with_module.new }
11
+
12
+ it "should have list of possible input formats" do
13
+ OCRSDK::Verifiers::Format::INPUT_FORMATS.length.should > 0
14
+ end
15
+
16
+ it "should have list of possible output formats" do
17
+ OCRSDK::Verifiers::Format::OUTPUT_FORMATS.length.should > 0
18
+ end
19
+
20
+ it "should convert format to string" do
21
+ subject.format_to_s(:meow_meow).should == 'meowMeow'
22
+ end
23
+
24
+ describe ".supported_input_format?" do
25
+ it "should return false for incorrect input format" do
26
+ subject.supported_input_format?(:meow_meow).should be_false
27
+ end
28
+
29
+ it "should return true for correct input format as symbol" do
30
+ subject.supported_input_format?(:pdf).should be_true
31
+ end
32
+
33
+ it "should return true for correct input format as string" do
34
+ subject.supported_input_format?("pdf").should be_true
35
+ end
36
+ end
37
+
38
+ describe ".supported_output_format?" do
39
+ it "should return false for incorrect output format" do
40
+ subject.supported_output_format?(:meow_meow).should be_false
41
+ end
42
+
43
+ it "should return true for correct output format as symbol" do
44
+ subject.supported_output_format?(:txt).should be_true
45
+ end
46
+
47
+ it "should return true for correct output format as string" do
48
+ subject.supported_output_format?("pdfSearchable").should be_true
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,55 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Verifiers::Language do
5
+ let (:class_with_module) {
6
+ Class.new do
7
+ include OCRSDK::Verifiers::Language
8
+ end
9
+ }
10
+ subject { class_with_module.new }
11
+
12
+ it "should have list of languages" do
13
+ OCRSDK::Verifiers::Language::LANGUAGES.length.should > 0
14
+ end
15
+
16
+ it "should convert language to string" do
17
+ subject.language_to_s(:meow_meow).should == 'MeowMeow'
18
+ end
19
+
20
+ describe ".language_to_sym" do
21
+ it "should convert language to symbol" do
22
+ subject.language_to_sym("MeowMeow").should == :meow_meow
23
+ end
24
+
25
+ it "should produce reversible results" do
26
+ subject.language_to_sym(subject.language_to_s(:meow_meow)).should == :meow_meow
27
+ end
28
+ end
29
+
30
+ describe ".languages_to_s" do
31
+ it "should convert list of languages to strings" do
32
+ subject.languages_to_s([:hebrew, :serbian_cyrillic]).should == ['Hebrew', 'SerbianCyrillic']
33
+ end
34
+
35
+ it "should raise an exception if language doesn't exist" do
36
+ expect {
37
+ subject.languages_to_s([:meow_meow, :pew_pew])
38
+ }.to raise_error(OCRSDK::UnsupportedLanguage)
39
+ end
40
+ end
41
+
42
+ describe ".supported_language?" do
43
+ it "should return false for incorrect language" do
44
+ subject.supported_language?(:meow_meow).should be_false
45
+ end
46
+
47
+ it "should return true for correct language as symbol" do
48
+ subject.supported_language?(:russian).should be_true
49
+ end
50
+
51
+ it "should return true for correct language as string" do
52
+ subject.supported_language?("Russian").should be_true
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,33 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Verifiers::Profile do
5
+ let (:class_with_module) {
6
+ Class.new do
7
+ include OCRSDK::Verifiers::Profile
8
+ end
9
+ }
10
+ subject { class_with_module.new }
11
+
12
+ it "should have list of possible profiles" do
13
+ OCRSDK::Verifiers::Profile::PROFILES.length.should > 0
14
+ end
15
+
16
+ it "should convert profile to string" do
17
+ subject.profile_to_s(:meow_meow).should == 'meowMeow'
18
+ end
19
+
20
+ describe ".supported_profile?" do
21
+ it "should return false for incorrect profile" do
22
+ subject.supported_profile?(:meow_meow).should be_false
23
+ end
24
+
25
+ it "should return true for correct profile as symbol" do
26
+ subject.supported_profile?(:document_conversion).should be_true
27
+ end
28
+
29
+ it "should return true for correct profile as string" do
30
+ subject.supported_profile?("documentConversion").should be_true
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,43 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK::Verifiers::Status do
5
+ let (:class_with_module) {
6
+ Class.new do
7
+ include OCRSDK::Verifiers::Status
8
+ end
9
+ }
10
+ subject { class_with_module.new }
11
+
12
+ it "should have list of statuses" do
13
+ OCRSDK::Verifiers::Status::STATUSES.length.should > 0
14
+ end
15
+
16
+ it "should convert status to string" do
17
+ subject.status_to_s(:meow_meow).should == 'MeowMeow'
18
+ end
19
+
20
+ describe ".status_to_sym" do
21
+ it "should convert status to symbol" do
22
+ subject.status_to_sym("MeowMeow").should == :meow_meow
23
+ end
24
+
25
+ it "should produce reversible results" do
26
+ subject.status_to_sym(subject.status_to_s(:meow_meow)).should == :meow_meow
27
+ end
28
+ end
29
+
30
+ describe ".supported_status?" do
31
+ it "should return false for incorrect language" do
32
+ subject.supported_status?(:meow_meow).should be_false
33
+ end
34
+
35
+ it "should return true for correct status as symbol" do
36
+ subject.supported_status?(:submitted).should be_true
37
+ end
38
+
39
+ it "should return true for correct status as string" do
40
+ subject.supported_status?("Submitted").should be_true
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,8 @@
1
+ # -*- encoding : utf-8 -*-
2
+ require 'spec_helper'
3
+
4
+ describe OCRSDK do
5
+ it "should have service url" do
6
+ OCRSDK::SERVICE_URL.length.should > 0
7
+ end
8
+ end
@@ -0,0 +1,8 @@
1
+ if RUBY_VERSION >= '1.9'
2
+ require 'cover_me'
3
+ end
4
+
5
+ require 'rspec'
6
+ require 'ocrsdk'
7
+ require 'helpers/ocrsdk_helpers'
8
+ require 'support/test_files'
@@ -0,0 +1,36 @@
1
+ # As a separate module, because we might want to
2
+ # mock some files in the future or use generator
3
+ # for the same reason it returns opened instance of file
4
+ #
5
+ # Each method check if corresponding file exists in
6
+ # spec/support/files/file.name.kitten.ext
7
+ # if method has `_path` in the end Pathname instance would be returned
8
+ # otherwise it will be File instance
9
+ module TestFiles
10
+ def self.respond_to?(method)
11
+ File.exists? self.filename(method)
12
+ end
13
+
14
+ def self.method_missing(method, *args, &block)
15
+ fname = self.filename(method)
16
+ if File.exists? fname
17
+ if only_path? method
18
+ fname
19
+ else
20
+ File.open(fname)
21
+ end
22
+ else
23
+ super
24
+ end
25
+ end
26
+
27
+ def self.filename(method)
28
+ method = method[0..-6] if only_path? method
29
+ File.join(File.dirname(File.expand_path(__FILE__)), '..', 'fixtures', 'files', method.to_s.gsub('_', '.'))
30
+ end
31
+
32
+ def self.only_path?(method)
33
+ method[-5..-1] == '_path'
34
+ end
35
+
36
+ end
data/travis.yml ADDED
@@ -0,0 +1,4 @@
1
+ rvm:
2
+ - 1.8.7
3
+ - 1.9.2
4
+ - 1.9.3