indico 0.10.3 → 0.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/indico.rb +5 -0
- data/lib/indico/pdf.rb +26 -0
- data/lib/indico/version.rb +1 -1
- data/spec/data/test.pdf +0 -0
- data/spec/pdf_extraction_spec.rb +43 -0
- metadata +5 -2
data/lib/indico.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'indico/version'
|
2
2
|
require 'indico/helper'
|
3
3
|
require 'indico/image'
|
4
|
+
require 'indico/pdf'
|
4
5
|
require 'indico/multi'
|
5
6
|
require 'indico/settings'
|
6
7
|
require 'indico/errors'
|
@@ -127,6 +128,10 @@ module Indico
|
|
127
128
|
api_handler(text, "summarization", config)
|
128
129
|
end
|
129
130
|
|
131
|
+
def self.pdf_extraction(pdf, config = {})
|
132
|
+
api_handler(preprocess_pdf(pdf), "pdfextraction", config)
|
133
|
+
end
|
134
|
+
|
130
135
|
def self.fer(image, config = nil)
|
131
136
|
size = (config != nil and config["detect"] == true) ? false : 48
|
132
137
|
api_handler(preprocess(image, size, false), 'fer', config)
|
data/lib/indico/pdf.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'base64'
|
2
|
+
|
3
|
+
module Indico
|
4
|
+
def self.preprocess_pdf(pdf)
|
5
|
+
if pdf.class == Array
|
6
|
+
# Batch Request
|
7
|
+
pdf_array = Array.new
|
8
|
+
|
9
|
+
# process each image
|
10
|
+
pdf.each do |_pdf|
|
11
|
+
pdf_array.push(preprocess_pdf(_pdf))
|
12
|
+
end
|
13
|
+
|
14
|
+
return pdf_array
|
15
|
+
elsif pdf.class != String
|
16
|
+
raise Exception.new("PDF input must be filename, url or base64 string")
|
17
|
+
end
|
18
|
+
|
19
|
+
begin
|
20
|
+
return Base64.encode64(File.read(pdf))
|
21
|
+
rescue
|
22
|
+
# likely a url or a base64 encoded string already
|
23
|
+
return pdf
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
data/lib/indico/version.rb
CHANGED
data/spec/data/test.pdf
ADDED
Binary file
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Indico do
|
4
|
+
before do
|
5
|
+
api_key = ENV['INDICO_API_KEY']
|
6
|
+
private_cloud = 'indico-test'
|
7
|
+
@config = { api_key: api_key, cloud: private_cloud}
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'should properly handle pdf urls' do
|
11
|
+
pdf_url = "https://s3-us-west-2.amazonaws.com/indico-test-data/test.pdf"
|
12
|
+
|
13
|
+
response = Indico.pdf_extraction(pdf_url, @config)
|
14
|
+
expected_keys = Set.new(%w(metadata text))
|
15
|
+
|
16
|
+
expect(Set.new(response.keys)).to eql(expected_keys)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'should properly handle local pdf files' do
|
20
|
+
pdf_path = File.expand_path(
|
21
|
+
File.join(File.dirname(__FILE__), "data", "test.pdf")
|
22
|
+
)
|
23
|
+
|
24
|
+
response = Indico.pdf_extraction(pdf_path, @config)
|
25
|
+
expected_keys = Set.new(%w(metadata text))
|
26
|
+
|
27
|
+
expect(Set.new(response.keys)).to eql(expected_keys)
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'should properly handle array of local pdf files' do
|
31
|
+
pdf_path = File.expand_path(
|
32
|
+
File.join(File.dirname(__FILE__), "data", "test.pdf")
|
33
|
+
)
|
34
|
+
arr = Array.new()
|
35
|
+
arr.push(pdf_path)
|
36
|
+
arr.push(pdf_path)
|
37
|
+
|
38
|
+
response = Indico.pdf_extraction(arr, @config)
|
39
|
+
expected_keys = Set.new(%w(metadata text))
|
40
|
+
|
41
|
+
expect(Set.new(response[0].keys)).to eql(expected_keys)
|
42
|
+
end
|
43
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: indico
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.10.
|
4
|
+
version: 0.10.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2016-
|
15
|
+
date: 2016-12-16 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: inifile
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- lib/indico/helper.rb
|
118
118
|
- lib/indico/image.rb
|
119
119
|
- lib/indico/multi.rb
|
120
|
+
- lib/indico/pdf.rb
|
120
121
|
- lib/indico/settings.rb
|
121
122
|
- lib/indico/version.rb
|
122
123
|
- spec/config/.indicorc.test
|
@@ -125,10 +126,12 @@ files:
|
|
125
126
|
- spec/data/dog.jpg
|
126
127
|
- spec/data/happy.png
|
127
128
|
- spec/data/happy64.txt
|
129
|
+
- spec/data/test.pdf
|
128
130
|
- spec/imagerecognition_spec.rb
|
129
131
|
- spec/indico_batch_spec.rb
|
130
132
|
- spec/indico_spec.rb
|
131
133
|
- spec/keywords_v2_spec.rb
|
134
|
+
- spec/pdf_extraction_spec.rb
|
132
135
|
- spec/political_spec.rb
|
133
136
|
- spec/settings_spec.rb
|
134
137
|
- spec/spec_helper.rb
|