gocr-ruby 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -1
- data/README.md +22 -2
- data/Rakefile +3 -3
- data/ext/gocr/gocr.c +98 -25
- data/ext/gocr/unicode.h +1 -1
- data/image.png +0 -0
- data/lib/gocr.rb +1 -1
- data/lib/gocr/engine.rb +20 -0
- data/lib/gocr/version.rb +1 -1
- metadata +3 -3
- data/lib/gocr/image.rb +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d257eb31f2bf08f056a30e19597da354ced4147
|
4
|
+
data.tar.gz: b4ed2b93a9fd6aeedf31e874bfec01b14f40b56c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8df4f786fbc95987f1cc37c722af93e768d3c71fd53ff108a23184aca8bfe2e1cb1c1b37d0647785ac1267d09ca3d863ea8dba7d9c3e6673b851808e6611622f
|
7
|
+
data.tar.gz: b7579da8c2e8f2fa1e385c0afbf484a105ecf7368b0727a55cdfff31c9451ef2d9223886a18580c6af647a53a621a5b4496a2147f851acd025699233e13223a0
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# gocr-ruby is a gocr binding for ruby
|
2
2
|
|
3
|
-
|
3
|
+
Ruby GOCR binding for lib http://jocr.sourceforge.net/
|
4
|
+
|
5
|
+
Currently it is in progress, unstable version.
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -18,7 +20,25 @@ Or install it yourself as:
|
|
18
20
|
|
19
21
|
## Usage
|
20
22
|
|
21
|
-
|
23
|
+
current api almost as a ruby-tesseract-ocr
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
require 'gocr'
|
27
|
+
e = GOCR::Engine.new(whitelist: '0-9').text_for('image.png')
|
28
|
+
```
|
29
|
+
|
30
|
+
### Available options
|
31
|
+
|
32
|
+
* `:whitelist` - char filter (ex. hexdigits: ""0-9A-Fx"", only ASCII)
|
33
|
+
* `:database` - database path including final slash (default is ./db/)
|
34
|
+
* `:format` - output format (ISO8859_1 TeX HTML XML UTF8 ASCII)
|
35
|
+
* `:gray_level` - threshold grey level 0<160<=255 (0 = autodetect)
|
36
|
+
* `:numbers_only` - numbers only
|
37
|
+
* `:mode` operation modes (bitpattern, see official gocr manual)
|
38
|
+
* `:certainty` - value of certainty (in percent, 0..100, default=95)
|
39
|
+
* `:unrecognize_char` - output this string for every unrecognized character
|
40
|
+
* `:dust_size` - dust_size (remove small clusters, -1 = autodetect)
|
41
|
+
* `:space_width` - spacewidth/dots (0 = autodetect)
|
22
42
|
|
23
43
|
## Contributing
|
24
44
|
|
data/Rakefile
CHANGED
@@ -9,7 +9,7 @@ require 'rubygems/package_task'
|
|
9
9
|
#
|
10
10
|
# See https://github.com/luislavena/rake-compiler for details
|
11
11
|
|
12
|
-
Rake::ExtensionTask.new 'gocr
|
12
|
+
Rake::ExtensionTask.new 'gocr' do |ext|
|
13
13
|
|
14
14
|
# This causes the shared object to be placed in lib/my_malloc/my_malloc.so
|
15
15
|
#
|
@@ -19,7 +19,7 @@ Rake::ExtensionTask.new 'gocr-ruby' do |ext|
|
|
19
19
|
ext.lib_dir = 'lib/gocr'
|
20
20
|
end
|
21
21
|
|
22
|
-
s = Gem::Specification.new 'gocr
|
22
|
+
s = Gem::Specification.new 'gocr', '0.0.1' do |s|
|
23
23
|
s.summary = 'simple gocr wrapper'
|
24
24
|
s.authors = %w[zyablitskiy@gmail.com]
|
25
25
|
|
@@ -40,7 +40,7 @@ Gem::PackageTask.new s do end
|
|
40
40
|
# This isn't a good test, but does provide a sanity check
|
41
41
|
|
42
42
|
task test: %w[compile] do
|
43
|
-
ruby '-Ilib', '-rgocr', '-e', 'p GOCR::
|
43
|
+
ruby '-Ilib', '-rgocr', '-e', 'p GOCR::Engine.new(numbers_only: false).text_for("image.png")'
|
44
44
|
# ruby '-Ilib', '-rgocr', '-e', 'p 3'
|
45
45
|
end
|
46
46
|
|
data/ext/gocr/gocr.c
CHANGED
@@ -42,7 +42,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
42
42
|
#include "ocr0.h" /* only_numbers */
|
43
43
|
#include "progress.h"
|
44
44
|
#include "version.h"
|
45
|
-
#include
|
45
|
+
#include <ruby.h>
|
46
46
|
|
47
47
|
/*
|
48
48
|
#ifndef RSTRING_PTR
|
@@ -322,9 +322,12 @@ static int read_picture(job_t *job) {
|
|
322
322
|
}
|
323
323
|
|
324
324
|
/* subject of change, we need more output for XML (ToDo) */
|
325
|
-
|
325
|
+
char* print_output(job_t *job) {
|
326
326
|
int linecounter = 0;
|
327
327
|
const char *line;
|
328
|
+
char** lines;
|
329
|
+
char* output;
|
330
|
+
int i = 0, j, n = 0;
|
328
331
|
|
329
332
|
assert(job);
|
330
333
|
|
@@ -332,61 +335,131 @@ void print_output(job_t *job) {
|
|
332
335
|
simplify code 2010-09-26
|
333
336
|
*/
|
334
337
|
linecounter = 0;
|
338
|
+
lines = (char**) malloc(job->res.lines.num);
|
335
339
|
line = getTextLine(&(job->res.linelist), linecounter++);
|
336
340
|
while (line) {
|
341
|
+
n += strlen(line) + 1;
|
342
|
+
lines[i] = (char*) malloc(strlen(line));
|
343
|
+
strcpy(lines[i++], line);
|
337
344
|
/* notice: decode() is shiftet to getTextLine since 0.38 */
|
338
|
-
fputs(line, stdout);
|
339
345
|
if (job->cfg.out_format==HTML) fputs("<br />",stdout);
|
340
346
|
if (job->cfg.out_format!=XML) fputc('\n', stdout);
|
341
347
|
line = getTextLine(&(job->res.linelist), linecounter++);
|
342
348
|
}
|
343
|
-
free_textlines(&(job->res.linelist));
|
349
|
+
// free_textlines(&(job->res.linelist));
|
350
|
+
|
351
|
+
output = (char*) malloc(n);
|
352
|
+
strcpy(output, lines[0]);
|
353
|
+
for(j = 1; j < i; j++) {
|
354
|
+
strcat(output, "\n");
|
355
|
+
strcat(output, lines[j]);
|
356
|
+
}
|
357
|
+
|
358
|
+
return output;
|
344
359
|
}
|
345
360
|
|
346
361
|
/* FIXME jb: remove JOB; renamed to OCR_JOB 2010-09-26 */
|
347
362
|
job_t *OCR_JOB;
|
348
363
|
|
349
364
|
|
350
|
-
char*
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
job=OCR_JOB=&job1;
|
355
|
-
|
356
|
-
setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
|
365
|
+
char* gocr_main(job_t* job) {
|
366
|
+
int multipnm=1;
|
367
|
+
char* output;
|
368
|
+
setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
|
357
369
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
if ( job->cfg.mode & 2 ) /* check for db-option flag */
|
362
|
-
load_db(job);
|
370
|
+
/* load character data base (JS1002: now outside pgm2asc) */
|
371
|
+
if ( job->cfg.mode & 2 ) /* check for db-option flag */
|
372
|
+
load_db(job);
|
363
373
|
/* load_db uses readpnm() and would conflict with multi images */
|
364
374
|
|
375
|
+
while (multipnm==1) { /* multi-image loop */
|
376
|
+
|
365
377
|
job_init_image(job); /* single image */
|
378
|
+
|
366
379
|
mark_start(job);
|
367
|
-
read_picture(job);
|
368
380
|
|
381
|
+
multipnm = read_picture(job);
|
382
|
+
/* separation of main and rest for using as lib
|
383
|
+
this will be changed later => introduction of set_option()
|
384
|
+
for better communication to the engine */
|
385
|
+
if (multipnm<0) break; /* read error */
|
386
|
+
|
387
|
+
/* call main loop */
|
369
388
|
pgm2asc(job);
|
389
|
+
|
370
390
|
mark_end(job);
|
371
|
-
|
391
|
+
|
392
|
+
output = print_output(job);
|
393
|
+
|
372
394
|
job_free_image(job);
|
373
|
-
|
395
|
+
|
396
|
+
}
|
397
|
+
|
398
|
+
// return ((multipnm<0)?multipnm:0); /* -1=255 on error, 0 ok */
|
399
|
+
return output;
|
374
400
|
}
|
375
401
|
|
376
402
|
|
377
403
|
static VALUE image_recognize(VALUE self, VALUE arg) {
|
378
|
-
|
379
|
-
|
404
|
+
VALUE tmp;
|
405
|
+
|
406
|
+
job_t job1, *job; /* fixme, dont want global variables for lib */
|
407
|
+
job=OCR_JOB=&job1;
|
408
|
+
job_init(job);
|
409
|
+
|
410
|
+
job->src.fname = StringValuePtr(arg);
|
411
|
+
tmp = rb_iv_get(self, "@database");
|
412
|
+
if (tmp != Qnil) {
|
413
|
+
job->cfg.db_path = StringValuePtr(tmp);
|
414
|
+
}
|
415
|
+
tmp = rb_iv_get(self, "@format");
|
416
|
+
if (tmp != Qnil) {
|
417
|
+
job->cfg.out_format = NUM2INT(tmp);
|
418
|
+
}
|
419
|
+
tmp = rb_iv_get(self, "@whitelist");
|
420
|
+
if (tmp != Qnil) {
|
421
|
+
if (strlen(StringValuePtr(tmp)) > 0)
|
422
|
+
job->cfg.cfilter = StringValuePtr(tmp);
|
423
|
+
}
|
424
|
+
tmp = rb_iv_get(self, "@dust_size");
|
425
|
+
if (tmp != Qnil) {
|
426
|
+
job->cfg.dust_size = NUM2INT(tmp);
|
427
|
+
}
|
428
|
+
tmp = rb_iv_get(self, "@gray_level");
|
429
|
+
if (tmp != Qnil) {
|
430
|
+
job->cfg.cs = NUM2INT(tmp);
|
431
|
+
}
|
432
|
+
tmp = rb_iv_get(self, "@space_width");
|
433
|
+
if (tmp != Qnil) {
|
434
|
+
job->cfg.spc = NUM2INT(tmp);
|
435
|
+
}
|
436
|
+
tmp = rb_iv_get(self, "@mode");
|
437
|
+
if (tmp != Qnil) {
|
438
|
+
job->cfg.mode |= NUM2INT(tmp);
|
439
|
+
}
|
440
|
+
tmp = rb_iv_get(self, "@numbers_only");
|
441
|
+
if (tmp == Qtrue) {
|
442
|
+
job->cfg.only_numbers = 1;
|
443
|
+
}
|
444
|
+
tmp = rb_iv_get(self, "@certainty");
|
445
|
+
if (tmp != Qnil) {
|
446
|
+
job->cfg.certainty = NUM2INT(tmp);
|
447
|
+
}
|
448
|
+
tmp = rb_iv_get(self, "@unrecognize_char");
|
449
|
+
if (tmp != Qnil) {
|
450
|
+
job->cfg.unrec_marker = StringValuePtr(tmp)[0];
|
451
|
+
}
|
452
|
+
return rb_str_new2( gocr_main(job) );
|
380
453
|
}
|
381
454
|
|
382
455
|
/*
|
383
|
-
* @brief define ruby class GOCR::
|
456
|
+
* @brief define ruby class GOCR::Engine and method text_for
|
384
457
|
*
|
385
458
|
*/
|
386
459
|
void Init_gocr() {
|
387
460
|
VALUE mGocr = rb_define_module("GOCR");
|
388
|
-
VALUE mImage = rb_define_class_under(mGocr, "
|
389
|
-
|
461
|
+
VALUE mImage = rb_define_class_under(mGocr, "Engine", rb_cObject);
|
462
|
+
rb_define_method(mImage, "text_for", image_recognize, 1);
|
390
463
|
}
|
391
464
|
|
392
465
|
|
@@ -433,4 +506,4 @@ void Init_gocr() {
|
|
433
506
|
// }
|
434
507
|
//
|
435
508
|
// return ((multipnm<0)?multipnm:0); /* -1=255 on error, 0 ok */
|
436
|
-
//}
|
509
|
+
//}
|
data/ext/gocr/unicode.h
CHANGED
data/image.png
CHANGED
Binary file
|
data/lib/gocr.rb
CHANGED
data/lib/gocr/engine.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "gocr/gocr"
|
2
|
+
|
3
|
+
module GOCR
|
4
|
+
class Engine
|
5
|
+
attr_accessor :whitelist, :blacklist, :database, :format, :gray_level, :numbers_only,
|
6
|
+
:mode, :certainty, :unrecognize_char, :dust_size, :space_width
|
7
|
+
|
8
|
+
FORMATS = Hash[%w(UTF8 ISO8859_1 TeX HTML XML ASCII).map.with_index.to_a].freeze
|
9
|
+
|
10
|
+
def initialize(options={})
|
11
|
+
options.each do |k, v|
|
12
|
+
send("#{k}=", v) if respond_to?(k)
|
13
|
+
end
|
14
|
+
@format = FORMATS[format].to_i
|
15
|
+
@dust_size = -1 if dust_size.nil?
|
16
|
+
@unrecognize_char = @unrecognize_char[0] unless unrecognize_char.nil?
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
data/lib/gocr/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gocr-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vladimir Zyablitskiy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -126,7 +126,7 @@ files:
|
|
126
126
|
- gocr-ruby.gemspec
|
127
127
|
- image.png
|
128
128
|
- lib/gocr.rb
|
129
|
-
- lib/gocr/
|
129
|
+
- lib/gocr/engine.rb
|
130
130
|
- lib/gocr/version.rb
|
131
131
|
homepage: https://github.com/rainlabs/gocr-ruby
|
132
132
|
licenses:
|