RubyGems - parsefile - Versions diffs - 0.0.7 → 0.0.8 - Mend

parsefile 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 0b13ab5008debd780cd15627b816d35ffc40fae1
-  data.tar.gz: 34b49fcc2c705fb4f00cd782133b68ed44d37886
+  metadata.gz: 2738907314aac77a653fd655f4a200e2a63b6ed6
+  data.tar.gz: 8c780c9b2304699e37f71cdb7f4842918bc61fc9
 SHA512:
-  metadata.gz: d08d0c4314107dc2aa413af4cb4144181330fd7b4c545cd6320873ff833e1fa22a33f5e9d2001831dae23bcc17b95df48f5a063849a39d0fed82c1e92209cf61
-  data.tar.gz: 450304b1c5d2ab8d3a7cc43f8fe71d9aed02c5dab8f3288ba213b1d599351751da552a26e3a04ba11f368bad4262a8703dc1b6c11285df2d891b2795082759e1
+  metadata.gz: e1385fdeb0923ebe72068c529341314aabebe1123f161e636762c19ad3325518716c759f9eae5fdd4bc6f12892709a45fe2dc5381e6151e6472677800fb31865
+  data.tar.gz: 919fbc77f37f6d40b15726ae9d5f36b622fdbe37d2789f730da61b5dea61d24ec12f359b607b096dbc5f0afd936e30fb68f23ee50efdd6276e3aa3c9273d6ade

data/lib/ocrfile.rb CHANGED Viewed

@@ -28,7 +28,7 @@ class OCRFile
         end
       end
     rescue # Detect errors
-      binding.pry
+      #binding.pry
     end
     return @text
@@ -36,17 +36,20 @@ class OCRFile
   # Check if file is pdf
   def is_pdf?
+    puts "determined: is_pdf"
     file_start = File.open(@path, 'r') { |f| f.read(8)}
     file_start.match(/\%PDF-\d+\.?\d+/)
   end
   # Load text that is already extracted
   def load_extracted_text(file)
+	puts "file exists: load_extracted_text"
     @text = JSON.parse(File.read(file))["text"]
   end
   # Send file to give me text
   def give_me_text
+	puts "using: give_me_text"
     c = Curl::Easy.new("http://givemetext.okfnlabs.org/tika/tika/form")
     c.multipart_form_post = true
     c.http_post(Curl::PostField.file('file', @path))
@@ -56,6 +59,7 @@ class OCRFile
   end
   def give_me_text_local
+	puts "using: give_me_text_local"
 	c = Curl::Easy.new(@tika + "/tika")
 	# TODO: move this mime filtering to a higher global level
 	mime_magic = MimeMagic.by_path(@path)
@@ -65,7 +69,6 @@ class OCRFile
 	c.http_put(file_data)
 	#binding.pry
 	@text = c.body_str
 	gotten_text_ok?(@text)
   end
@@ -77,6 +80,7 @@ class OCRFile
   # OCR with tesseract
   def ocr_pdf
+	puts "using: ocr_pdf"
     # Dir_paths
     base = Dir.pwd+"/"

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: parsefile
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.8
 platform: ruby
 authors:
 - M. C. McGrath
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-10-10 00:00:00.000000000 Z
+date: 2017-01-19 00:00:00.000000000 Z
 dependencies: []
 description: OCR file and extract metadata using Apache Tika and Tesseract
 email: shidash@shidash.com