parsefile 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ocrfile.rb +6 -4
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2738907314aac77a653fd655f4a200e2a63b6ed6
4
- data.tar.gz: 8c780c9b2304699e37f71cdb7f4842918bc61fc9
3
+ metadata.gz: 99b0e344729338584b0af0696cb2e6e6ef18edef
4
+ data.tar.gz: 79b40b78a7af582e57492a4daa75fab5db0462b7
5
5
  SHA512:
6
- metadata.gz: e1385fdeb0923ebe72068c529341314aabebe1123f161e636762c19ad3325518716c759f9eae5fdd4bc6f12892709a45fe2dc5381e6151e6472677800fb31865
7
- data.tar.gz: 919fbc77f37f6d40b15726ae9d5f36b622fdbe37d2789f730da61b5dea61d24ec12f359b607b096dbc5f0afd936e30fb68f23ee50efdd6276e3aa3c9273d6ade
6
+ metadata.gz: 2f2683b2aa5ba9b328f3d8b1bbd9b75bcb5cf9f20aa4c434ee64db7b9ac798e842b998e315532b1253f7b955a2d952abcb9f67fcd9311bc4bf76a473a582a765
7
+ data.tar.gz: 2dc0d20a1c0eceb636ff89c15c2311a235a23814cae04bf11e41e2b305d0ca7508ce68180d7a441617cb273f0d9158881b8e28e235422ad48ef3886606a72623
data/lib/ocrfile.rb CHANGED
@@ -18,13 +18,14 @@ class OCRFile
18
18
  begin
19
19
  if File.exist?(@output_dir+@rel_path+".json")
20
20
  load_extracted_text(@output_dir+@rel_path+".json")
21
- elsif @path.include?(".pdf")
22
- ocr_pdf
21
+ #elsif @path.include?(".pdf")
22
+ # ocr_pdf
23
23
  else
24
24
  if @tika
25
25
  give_me_text_local
26
26
  else
27
- give_me_text
27
+ @text = File.read(@path)
28
+ # give_me_text
28
29
  end
29
30
  end
30
31
  rescue # Detect errors
@@ -49,7 +50,8 @@ class OCRFile
49
50
 
50
51
  # Send file to give me text
51
52
  def give_me_text
52
- puts "using: give_me_text"
53
+ puts "using: give_me_text"
54
+
53
55
  c = Curl::Easy.new("http://givemetext.okfnlabs.org/tika/tika/form")
54
56
  c.multipart_form_post = true
55
57
  c.http_post(Curl::PostField.file('file', @path))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parsefile
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-01-19 00:00:00.000000000 Z
12
+ date: 2017-03-07 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: OCR file and extract metadata using Apache Tika and Tesseract
15
15
  email: shidash@shidash.com