rtesseract 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/.document CHANGED
File without changes
data/.gitignore CHANGED
File without changes
data/LICENSE CHANGED
File without changes
File without changes
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
7
  gem.name = "rtesseract"
8
- gem.version = '0.0.2'
8
+ gem.version = '0.0.3'
9
9
  gem.summary = "Ruby library for working with the Tesseract OCR."
10
10
  gem.description = "Ruby library for working with the Tesseract OCR."
11
11
  gem.email = "dannnylo@gmail.com"
@@ -3,18 +3,21 @@ require "pathname"
3
3
  require "tempfile"
4
4
 
5
5
  class RTesseract
6
- VERSION = '0.0.2'
6
+ VERSION = '0.0.3'
7
+ attr_accessor :options
8
+ attr_writer :lang
7
9
 
8
10
  def initialize(src="", options={})
9
- @source = Pathname.new src
10
- @command = options[:command] || "tesseract"
11
+ @source = Pathname.new src
12
+ @command = options.delete(:command) || "tesseract"
13
+ @lang = options.delete(:lang) || options.delete("lang") || ""
11
14
  @options = options
12
15
  @value = ""
13
16
  end
14
17
 
15
18
  def source= src
16
- @source = Pathname.new src
17
19
  @value = ""
20
+ @source = Pathname.new src
18
21
  end
19
22
 
20
23
  def image_name
@@ -30,7 +33,7 @@ class RTesseract
30
33
  end
31
34
 
32
35
  #Remove files
33
- def remove_file(files)
36
+ def remove_file(files=[])
34
37
  files.each do |file|
35
38
  begin
36
39
  File.unlink(file) if File.exist?(file)
@@ -43,11 +46,42 @@ class RTesseract
43
46
  raise "Error on remove file."
44
47
  end
45
48
 
49
+ # Select the language
50
+ #===Languages
51
+ ## * eng - English
52
+ ## * deu - German
53
+ ## * deu-f - German fraktur
54
+ ## * fra - French
55
+ ## * ita - Italian
56
+ ## * nld - Dutch
57
+ ## * por - Portuguese
58
+ ## * spa - Spanish
59
+ ## * vie - Vietnamese
60
+ ## Note: Make sure you have installed the language to tesseract
61
+ def lang
62
+ language = "#{@lang}".strip
63
+ {"eng" => ["eng","en","en-us","english"], "deu" => ["deu"], "deu-f" => ["deu-f"] , "fra" => ["fra"], "ita" => ["ita","it"] , "nld" => ["nld"] , "por" => ["por","pt","pt-br","portuguese"] , "spa" => ["spa"] , "vie" => ["vie"]}.each do |value,names|
64
+ return " -l #{value} " if names.include? language.downcase
65
+ end
66
+ ""
67
+ rescue
68
+ ""
69
+ end
70
+
71
+ def config
72
+ @options ||= {}
73
+ @options.collect{|k,v| "#{k} #{v}" }.join("\n")
74
+ end
75
+
76
+ def config_file
77
+ #TODO: create the config
78
+ end
79
+
46
80
  #Convert image to string
47
81
  def convert
48
82
  tmp_file = Pathname.new(Dir::tmpdir).join("#{@source.basename}")
49
83
  tmp_image = image_to_tiff
50
- `#{@command} #{tmp_image} #{tmp_file.to_s}`
84
+ `#{@command} #{tmp_image} #{tmp_file.to_s} #{lang}`
51
85
  @value = File.read("#{tmp_file.to_s}.txt").to_s
52
86
  remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
53
87
  rescue
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{rtesseract}
8
- s.version = "0.0.2"
8
+ s.version = "0.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Danilo Jeremias da Silva"]
12
- s.date = %q{2010-09-10}
12
+ s.date = %q{2010-10-11}
13
13
  s.description = %q{Ruby library for working with the Tesseract OCR.}
14
14
  s.email = %q{dannnylo@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -25,6 +25,11 @@ Gem::Specification.new do |s|
25
25
  "lib/rtesseract.rb",
26
26
  "rtesseract.gemspec",
27
27
  "test/helper.rb",
28
+ "test/images/test.bmp",
29
+ "test/images/test.jpg",
30
+ "test/images/test.png",
31
+ "test/images/test.tif",
32
+ "test/images/test1.tif",
28
33
  "test/test_rtesseract.rb"
29
34
  ]
30
35
  s.homepage = %q{http://github.com/dannnylo/rtesseract}
File without changes
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1,9 +1,62 @@
1
1
  require 'helper'
2
+ require 'pathname'
2
3
 
3
4
  class TestRtesseract < Test::Unit::TestCase
4
- should "probably rename this file and start testing for real" do
5
- flunk "hey buddy, you should probably rename this file and start testing for real"
6
- end
5
+ context "Path" do
6
+ setup do
7
+ @path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
8
+ @image_tiff = @path.join("images","test.tif").to_s
9
+ end
10
+
11
+ should "be instantiable" do
12
+ assert_equal RTesseract.new.class , RTesseract
13
+ assert_equal RTesseract.new("").class , RTesseract
14
+ assert_equal RTesseract.new(@image_tiff).class , RTesseract
15
+ end
16
+
17
+ should "translate image to text" do
18
+ assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
19
+ assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
20
+ end
21
+
22
+ should "translate images .png, .jpg, .bmp" do
23
+ assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
24
+ assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
25
+ assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
26
+ end
27
+
28
+ should "change the image" do
29
+ image = RTesseract.new(@image_tiff)
30
+ assert_equal image.to_s_without_spaces,"43ZZ"
31
+ image.source = @path.join("images","test1.tif").to_s
32
+ assert_equal image.to_s_without_spaces,"V2V4"
33
+ end
7
34
 
35
+ should "select the language" do
36
+ #English
37
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
38
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
39
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
40
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
41
+
42
+ #Portuguese
43
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
44
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
45
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
46
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
47
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
48
+
49
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
50
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
51
+
52
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
53
+ end
54
+
55
+ should "be configurable" do
56
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
57
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
58
+ assert_equal RTesseract.new(@image_tiff,{:enable_assoc=>0,:chop_enable=>0}).config , "chop_enable 0\nenable_assoc 0"
59
+ end
60
+ end
8
61
  end
9
62
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Danilo Jeremias da Silva
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-10 00:00:00 -03:00
18
+ date: 2010-10-11 00:00:00 -03:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -66,6 +66,11 @@ files:
66
66
  - lib/rtesseract.rb
67
67
  - rtesseract.gemspec
68
68
  - test/helper.rb
69
+ - test/images/test.bmp
70
+ - test/images/test.jpg
71
+ - test/images/test.png
72
+ - test/images/test.tif
73
+ - test/images/test1.tif
69
74
  - test/test_rtesseract.rb
70
75
  has_rdoc: true
71
76
  homepage: http://github.com/dannnylo/rtesseract