uploadconvert 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/uploadconvert.rb +9 -6
- metadata +1 -1
data/lib/uploadconvert.rb
CHANGED
@@ -35,13 +35,16 @@ class UploadConvert
|
|
35
35
|
|
36
36
|
# Extract text from embedded text PDFs
|
37
37
|
def embedPDF
|
38
|
-
|
39
|
-
|
40
|
-
|
38
|
+
begin
|
39
|
+
Docsplit.extract_text(@input, :ocr => false)
|
40
|
+
outfile = @input.split(".pdf")
|
41
|
+
text = File.read(outfile[0]+".txt")
|
41
42
|
|
42
|
-
|
43
|
-
|
44
|
-
|
43
|
+
# Clean up text and delete file
|
44
|
+
File.delete(outfile[0]+".txt")
|
45
|
+
cleanPDF(text)
|
46
|
+
rescue
|
47
|
+
end
|
45
48
|
end
|
46
49
|
|
47
50
|
# OCR PDFs and turn that text into a JSON
|