RubyGems - parsefile - Versions diffs - 0.0.7 → 0.0.8 - Mend

parsefile 0.0.7 → 0.0.8

Files changed (3) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 0b13ab5008debd780cd15627b816d35ffc40fae1
-  data.tar.gz: 34b49fcc2c705fb4f00cd782133b68ed44d37886
+  metadata.gz: 2738907314aac77a653fd655f4a200e2a63b6ed6
+  data.tar.gz: 8c780c9b2304699e37f71cdb7f4842918bc61fc9
 SHA512:
-  metadata.gz: d08d0c4314107dc2aa413af4cb4144181330fd7b4c545cd6320873ff833e1fa22a33f5e9d2001831dae23bcc17b95df48f5a063849a39d0fed82c1e92209cf61
-  data.tar.gz: 450304b1c5d2ab8d3a7cc43f8fe71d9aed02c5dab8f3288ba213b1d599351751da552a26e3a04ba11f368bad4262a8703dc1b6c11285df2d891b2795082759e1
+  metadata.gz: e1385fdeb0923ebe72068c529341314aabebe1123f161e636762c19ad3325518716c759f9eae5fdd4bc6f12892709a45fe2dc5381e6151e6472677800fb31865
+  data.tar.gz: 919fbc77f37f6d40b15726ae9d5f36b622fdbe37d2789f730da61b5dea61d24ec12f359b607b096dbc5f0afd936e30fb68f23ee50efdd6276e3aa3c9273d6ade

data/lib/ocrfile.rb CHANGED Viewed

@@ -28,7 +28,7 @@ class OCRFile
         end
       end
     rescue # Detect errors
-      binding.pry
+      #binding.pry
     end
     return @text
@@ -36,17 +36,20 @@ class OCRFile
   # Check if file is pdf
   def is_pdf?
+    puts "determined: is_pdf"
     file_start = File.open(@path, 'r') { |f| f.read(8)}
     file_start.match(/\%PDF-\d+\.?\d+/)
   end
   # Load text that is already extracted
   def load_extracted_text(file)
+	puts "file exists: load_extracted_text"
     @text = JSON.parse(File.read(file))["text"]
   end
   # Send file to give me text
   def give_me_text
+	puts "using: give_me_text"
     c = Curl::Easy.new("http://givemetext.okfnlabs.org/tika/tika/form")
     c.multipart_form_post = true
     c.http_post(Curl::PostField.file('file', @path))
@@ -56,6 +59,7 @@ class OCRFile
   end
   def give_me_text_local
+	puts "using: give_me_text_local"
 	c = Curl::Easy.new(@tika + "/tika")
 	# TODO: move this mime filtering to a higher global level
 	mime_magic = MimeMagic.by_path(@path)
@@ -65,7 +69,6 @@ class OCRFile
 	c.http_put(file_data)
 	#binding.pry
 	@text = c.body_str
 	gotten_text_ok?(@text)
   end
@@ -77,6 +80,7 @@ class OCRFile
   # OCR with tesseract
   def ocr_pdf
+	puts "using: ocr_pdf"
     # Dir_paths
     base = Dir.pwd+"/"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: parsefile
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.8
 platform: ruby
 authors:
 - M. C. McGrath
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-10-10 00:00:00.000000000 Z
+date: 2017-01-19 00:00:00.000000000 Z
 dependencies: []
 description: OCR file and extract metadata using Apache Tika and Tesseract
 email: shidash@shidash.com