parsefile 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ocrfile.rb +6 -4
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2738907314aac77a653fd655f4a200e2a63b6ed6
4
- data.tar.gz: 8c780c9b2304699e37f71cdb7f4842918bc61fc9
3
+ metadata.gz: 99b0e344729338584b0af0696cb2e6e6ef18edef
4
+ data.tar.gz: 79b40b78a7af582e57492a4daa75fab5db0462b7
5
5
  SHA512:
6
- metadata.gz: e1385fdeb0923ebe72068c529341314aabebe1123f161e636762c19ad3325518716c759f9eae5fdd4bc6f12892709a45fe2dc5381e6151e6472677800fb31865
7
- data.tar.gz: 919fbc77f37f6d40b15726ae9d5f36b622fdbe37d2789f730da61b5dea61d24ec12f359b607b096dbc5f0afd936e30fb68f23ee50efdd6276e3aa3c9273d6ade
6
+ metadata.gz: 2f2683b2aa5ba9b328f3d8b1bbd9b75bcb5cf9f20aa4c434ee64db7b9ac798e842b998e315532b1253f7b955a2d952abcb9f67fcd9311bc4bf76a473a582a765
7
+ data.tar.gz: 2dc0d20a1c0eceb636ff89c15c2311a235a23814cae04bf11e41e2b305d0ca7508ce68180d7a441617cb273f0d9158881b8e28e235422ad48ef3886606a72623
data/lib/ocrfile.rb CHANGED
@@ -18,13 +18,14 @@ class OCRFile
18
18
  begin
19
19
  if File.exist?(@output_dir+@rel_path+".json")
20
20
  load_extracted_text(@output_dir+@rel_path+".json")
21
- elsif @path.include?(".pdf")
22
- ocr_pdf
21
+ #elsif @path.include?(".pdf")
22
+ # ocr_pdf
23
23
  else
24
24
  if @tika
25
25
  give_me_text_local
26
26
  else
27
- give_me_text
27
+ @text = File.read(@path)
28
+ # give_me_text
28
29
  end
29
30
  end
30
31
  rescue # Detect errors
@@ -49,7 +50,8 @@ class OCRFile
49
50
 
50
51
  # Send file to give me text
51
52
  def give_me_text
52
- puts "using: give_me_text"
53
+ puts "using: give_me_text"
54
+
53
55
  c = Curl::Easy.new("http://givemetext.okfnlabs.org/tika/tika/form")
54
56
  c.multipart_form_post = true
55
57
  c.http_post(Curl::PostField.file('file', @path))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parsefile
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-01-19 00:00:00.000000000 Z
12
+ date: 2017-03-07 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: OCR file and extract metadata using Apache Tika and Tesseract
15
15
  email: shidash@shidash.com