parsefile 0.0.8 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ocrfile.rb +6 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 99b0e344729338584b0af0696cb2e6e6ef18edef
|
4
|
+
data.tar.gz: 79b40b78a7af582e57492a4daa75fab5db0462b7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2f2683b2aa5ba9b328f3d8b1bbd9b75bcb5cf9f20aa4c434ee64db7b9ac798e842b998e315532b1253f7b955a2d952abcb9f67fcd9311bc4bf76a473a582a765
|
7
|
+
data.tar.gz: 2dc0d20a1c0eceb636ff89c15c2311a235a23814cae04bf11e41e2b305d0ca7508ce68180d7a441617cb273f0d9158881b8e28e235422ad48ef3886606a72623
|
data/lib/ocrfile.rb
CHANGED
@@ -18,13 +18,14 @@ class OCRFile
|
|
18
18
|
begin
|
19
19
|
if File.exist?(@output_dir+@rel_path+".json")
|
20
20
|
load_extracted_text(@output_dir+@rel_path+".json")
|
21
|
-
elsif @path.include?(".pdf")
|
22
|
-
|
21
|
+
#elsif @path.include?(".pdf")
|
22
|
+
# ocr_pdf
|
23
23
|
else
|
24
24
|
if @tika
|
25
25
|
give_me_text_local
|
26
26
|
else
|
27
|
-
|
27
|
+
@text = File.read(@path)
|
28
|
+
# give_me_text
|
28
29
|
end
|
29
30
|
end
|
30
31
|
rescue # Detect errors
|
@@ -49,7 +50,8 @@ class OCRFile
|
|
49
50
|
|
50
51
|
# Send file to give me text
|
51
52
|
def give_me_text
|
52
|
-
|
53
|
+
puts "using: give_me_text"
|
54
|
+
|
53
55
|
c = Curl::Easy.new("http://givemetext.okfnlabs.org/tika/tika/form")
|
54
56
|
c.multipart_form_post = true
|
55
57
|
c.http_post(Curl::PostField.file('file', @path))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parsefile
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-03-07 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: OCR file and extract metadata using Apache Tika and Tesseract
|
15
15
|
email: shidash@shidash.com
|