rtesseract 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.document CHANGED
File without changes
data/.gitignore CHANGED
File without changes
data/LICENSE CHANGED
File without changes
File without changes
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
7
  gem.name = "rtesseract"
8
- gem.version = '0.0.2'
8
+ gem.version = '0.0.3'
9
9
  gem.summary = "Ruby library for working with the Tesseract OCR."
10
10
  gem.description = "Ruby library for working with the Tesseract OCR."
11
11
  gem.email = "dannnylo@gmail.com"
@@ -3,18 +3,21 @@ require "pathname"
3
3
  require "tempfile"
4
4
 
5
5
  class RTesseract
6
- VERSION = '0.0.2'
6
+ VERSION = '0.0.3'
7
+ attr_accessor :options
8
+ attr_writer :lang
7
9
 
8
10
  def initialize(src="", options={})
9
- @source = Pathname.new src
10
- @command = options[:command] || "tesseract"
11
+ @source = Pathname.new src
12
+ @command = options.delete(:command) || "tesseract"
13
+ @lang = options.delete(:lang) || options.delete("lang") || ""
11
14
  @options = options
12
15
  @value = ""
13
16
  end
14
17
 
15
18
  def source= src
16
- @source = Pathname.new src
17
19
  @value = ""
20
+ @source = Pathname.new src
18
21
  end
19
22
 
20
23
  def image_name
@@ -30,7 +33,7 @@ class RTesseract
30
33
  end
31
34
 
32
35
  #Remove files
33
- def remove_file(files)
36
+ def remove_file(files=[])
34
37
  files.each do |file|
35
38
  begin
36
39
  File.unlink(file) if File.exist?(file)
@@ -43,11 +46,42 @@ class RTesseract
43
46
  raise "Error on remove file."
44
47
  end
45
48
 
49
+ # Select the language
50
+ #===Languages
51
+ ## * eng - English
52
+ ## * deu - German
53
+ ## * deu-f - German fraktur
54
+ ## * fra - French
55
+ ## * ita - Italian
56
+ ## * nld - Dutch
57
+ ## * por - Portuguese
58
+ ## * spa - Spanish
59
+ ## * vie - Vietnamese
60
+ ## Note: Make sure you have installed the language to tesseract
61
+ def lang
62
+ language = "#{@lang}".strip
63
+ {"eng" => ["eng","en","en-us","english"], "deu" => ["deu"], "deu-f" => ["deu-f"] , "fra" => ["fra"], "ita" => ["ita","it"] , "nld" => ["nld"] , "por" => ["por","pt","pt-br","portuguese"] , "spa" => ["spa"] , "vie" => ["vie"]}.each do |value,names|
64
+ return " -l #{value} " if names.include? language.downcase
65
+ end
66
+ ""
67
+ rescue
68
+ ""
69
+ end
70
+
71
+ def config
72
+ @options ||= {}
73
+ @options.collect{|k,v| "#{k} #{v}" }.join("\n")
74
+ end
75
+
76
+ def config_file
77
+ #TODO: create the config
78
+ end
79
+
46
80
  #Convert image to string
47
81
  def convert
48
82
  tmp_file = Pathname.new(Dir::tmpdir).join("#{@source.basename}")
49
83
  tmp_image = image_to_tiff
50
- `#{@command} #{tmp_image} #{tmp_file.to_s}`
84
+ `#{@command} #{tmp_image} #{tmp_file.to_s} #{lang}`
51
85
  @value = File.read("#{tmp_file.to_s}.txt").to_s
52
86
  remove_file([tmp_image,"#{tmp_file.to_s}.txt"])
53
87
  rescue
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{rtesseract}
8
- s.version = "0.0.2"
8
+ s.version = "0.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Danilo Jeremias da Silva"]
12
- s.date = %q{2010-09-10}
12
+ s.date = %q{2010-10-11}
13
13
  s.description = %q{Ruby library for working with the Tesseract OCR.}
14
14
  s.email = %q{dannnylo@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -25,6 +25,11 @@ Gem::Specification.new do |s|
25
25
  "lib/rtesseract.rb",
26
26
  "rtesseract.gemspec",
27
27
  "test/helper.rb",
28
+ "test/images/test.bmp",
29
+ "test/images/test.jpg",
30
+ "test/images/test.png",
31
+ "test/images/test.tif",
32
+ "test/images/test1.tif",
28
33
  "test/test_rtesseract.rb"
29
34
  ]
30
35
  s.homepage = %q{http://github.com/dannnylo/rtesseract}
File without changes
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1,9 +1,62 @@
1
1
  require 'helper'
2
+ require 'pathname'
2
3
 
3
4
  class TestRtesseract < Test::Unit::TestCase
4
- should "probably rename this file and start testing for real" do
5
- flunk "hey buddy, you should probably rename this file and start testing for real"
6
- end
5
+ context "Path" do
6
+ setup do
7
+ @path = Pathname.new(__FILE__.gsub("test_rtesseract.rb","")).expand_path
8
+ @image_tiff = @path.join("images","test.tif").to_s
9
+ end
10
+
11
+ should "be instantiable" do
12
+ assert_equal RTesseract.new.class , RTesseract
13
+ assert_equal RTesseract.new("").class , RTesseract
14
+ assert_equal RTesseract.new(@image_tiff).class , RTesseract
15
+ end
16
+
17
+ should "translate image to text" do
18
+ assert_equal RTesseract.new(@image_tiff).to_s_without_spaces , "43ZZ"
19
+ assert_equal RTesseract.new(@path.join("images","test1.tif").to_s).to_s_without_spaces , "V2V4"
20
+ end
21
+
22
+ should "translate images .png, .jpg, .bmp" do
23
+ assert_equal RTesseract.new(@path.join("images","test.png").to_s).to_s_without_spaces , "HW9W"
24
+ assert_equal RTesseract.new(@path.join("images","test.jpg").to_s).to_s_without_spaces , "3R8Z"
25
+ assert_equal RTesseract.new(@path.join("images","test.bmp").to_s).to_s_without_spaces , "ZLA6"
26
+ end
27
+
28
+ should "change the image" do
29
+ image = RTesseract.new(@image_tiff)
30
+ assert_equal image.to_s_without_spaces,"43ZZ"
31
+ image.source = @path.join("images","test1.tif").to_s
32
+ assert_equal image.to_s_without_spaces,"V2V4"
33
+ end
7
34
 
35
+ should "select the language" do
36
+ #English
37
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
38
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"en"}).lang , " -l eng "
39
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"en-US"}).lang , " -l eng "
40
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"english"}).lang , " -l eng "
41
+
42
+ #Portuguese
43
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).lang , " -l por "
44
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-BR"}).lang , " -l por "
45
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt-br"}).lang , " -l por "
46
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"pt"}).lang , " -l por "
47
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"portuguese"}).lang , " -l por "
48
+
49
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).to_s_without_spaces , "43ZZ"
50
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"por"}).to_s_without_spaces , "43ZZ"
51
+
52
+ assert_equal RTesseract.new(@image_tiff,{:lang=>"eng"}).lang , " -l eng "
53
+ end
54
+
55
+ should "be configurable" do
56
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0,:enable_assoc=>0,:display_text=>0}).config , "chop_enable 0\nenable_assoc 0\ndisplay_text 0"
57
+ assert_equal RTesseract.new(@image_tiff,{:chop_enable=>0}).config , "chop_enable 0"
58
+ assert_equal RTesseract.new(@image_tiff,{:enable_assoc=>0,:chop_enable=>0}).config , "chop_enable 0\nenable_assoc 0"
59
+ end
60
+ end
8
61
  end
9
62
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rtesseract
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Danilo Jeremias da Silva
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-10 00:00:00 -03:00
18
+ date: 2010-10-11 00:00:00 -03:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -66,6 +66,11 @@ files:
66
66
  - lib/rtesseract.rb
67
67
  - rtesseract.gemspec
68
68
  - test/helper.rb
69
+ - test/images/test.bmp
70
+ - test/images/test.jpg
71
+ - test/images/test.png
72
+ - test/images/test.tif
73
+ - test/images/test1.tif
69
74
  - test/test_rtesseract.rb
70
75
  has_rdoc: true
71
76
  homepage: http://github.com/dannnylo/rtesseract