indico 0.10.3 → 0.10.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/indico.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'indico/version'
2
2
  require 'indico/helper'
3
3
  require 'indico/image'
4
+ require 'indico/pdf'
4
5
  require 'indico/multi'
5
6
  require 'indico/settings'
6
7
  require 'indico/errors'
@@ -127,6 +128,10 @@ module Indico
127
128
  api_handler(text, "summarization", config)
128
129
  end
129
130
 
131
+ def self.pdf_extraction(pdf, config = {})
132
+ api_handler(preprocess_pdf(pdf), "pdfextraction", config)
133
+ end
134
+
130
135
  def self.fer(image, config = nil)
131
136
  size = (config != nil and config["detect"] == true) ? false : 48
132
137
  api_handler(preprocess(image, size, false), 'fer', config)
data/lib/indico/pdf.rb ADDED
@@ -0,0 +1,26 @@
1
+ require 'base64'
2
+
3
+ module Indico
4
+ def self.preprocess_pdf(pdf)
5
+ if pdf.class == Array
6
+ # Batch Request
7
+ pdf_array = Array.new
8
+
9
+ # process each image
10
+ pdf.each do |_pdf|
11
+ pdf_array.push(preprocess_pdf(_pdf))
12
+ end
13
+
14
+ return pdf_array
15
+ elsif pdf.class != String
16
+ raise Exception.new("PDF input must be filename, url or base64 string")
17
+ end
18
+
19
+ begin
20
+ return Base64.encode64(File.read(pdf))
21
+ rescue
22
+ # likely a url or a base64 encoded string already
23
+ return pdf
24
+ end
25
+ end
26
+ end
@@ -1,3 +1,3 @@
1
1
  module Indico
2
- VERSION = '0.10.3'
2
+ VERSION = '0.10.4'
3
3
  end
Binary file
@@ -0,0 +1,43 @@
1
+ require 'spec_helper'
2
+
3
+ describe Indico do
4
+ before do
5
+ api_key = ENV['INDICO_API_KEY']
6
+ private_cloud = 'indico-test'
7
+ @config = { api_key: api_key, cloud: private_cloud}
8
+ end
9
+
10
+ it 'should properly handle pdf urls' do
11
+ pdf_url = "https://s3-us-west-2.amazonaws.com/indico-test-data/test.pdf"
12
+
13
+ response = Indico.pdf_extraction(pdf_url, @config)
14
+ expected_keys = Set.new(%w(metadata text))
15
+
16
+ expect(Set.new(response.keys)).to eql(expected_keys)
17
+ end
18
+
19
+ it 'should properly handle local pdf files' do
20
+ pdf_path = File.expand_path(
21
+ File.join(File.dirname(__FILE__), "data", "test.pdf")
22
+ )
23
+
24
+ response = Indico.pdf_extraction(pdf_path, @config)
25
+ expected_keys = Set.new(%w(metadata text))
26
+
27
+ expect(Set.new(response.keys)).to eql(expected_keys)
28
+ end
29
+
30
+ it 'should properly handle array of local pdf files' do
31
+ pdf_path = File.expand_path(
32
+ File.join(File.dirname(__FILE__), "data", "test.pdf")
33
+ )
34
+ arr = Array.new()
35
+ arr.push(pdf_path)
36
+ arr.push(pdf_path)
37
+
38
+ response = Indico.pdf_extraction(arr, @config)
39
+ expected_keys = Set.new(%w(metadata text))
40
+
41
+ expect(Set.new(response[0].keys)).to eql(expected_keys)
42
+ end
43
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: indico
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.3
4
+ version: 0.10.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2016-10-19 00:00:00.000000000 Z
15
+ date: 2016-12-16 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: inifile
@@ -117,6 +117,7 @@ files:
117
117
  - lib/indico/helper.rb
118
118
  - lib/indico/image.rb
119
119
  - lib/indico/multi.rb
120
+ - lib/indico/pdf.rb
120
121
  - lib/indico/settings.rb
121
122
  - lib/indico/version.rb
122
123
  - spec/config/.indicorc.test
@@ -125,10 +126,12 @@ files:
125
126
  - spec/data/dog.jpg
126
127
  - spec/data/happy.png
127
128
  - spec/data/happy64.txt
129
+ - spec/data/test.pdf
128
130
  - spec/imagerecognition_spec.rb
129
131
  - spec/indico_batch_spec.rb
130
132
  - spec/indico_spec.rb
131
133
  - spec/keywords_v2_spec.rb
134
+ - spec/pdf_extraction_spec.rb
132
135
  - spec/political_spec.rb
133
136
  - spec/settings_spec.rb
134
137
  - spec/spec_helper.rb