gocr-ruby 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -1
- data/README.md +22 -2
- data/Rakefile +3 -3
- data/ext/gocr/gocr.c +98 -25
- data/ext/gocr/unicode.h +1 -1
- data/image.png +0 -0
- data/lib/gocr.rb +1 -1
- data/lib/gocr/engine.rb +20 -0
- data/lib/gocr/version.rb +1 -1
- metadata +3 -3
- data/lib/gocr/image.rb +0 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1d257eb31f2bf08f056a30e19597da354ced4147
|
4
|
+
data.tar.gz: b4ed2b93a9fd6aeedf31e874bfec01b14f40b56c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8df4f786fbc95987f1cc37c722af93e768d3c71fd53ff108a23184aca8bfe2e1cb1c1b37d0647785ac1267d09ca3d863ea8dba7d9c3e6673b851808e6611622f
|
7
|
+
data.tar.gz: b7579da8c2e8f2fa1e385c0afbf484a105ecf7368b0727a55cdfff31c9451ef2d9223886a18580c6af647a53a621a5b4496a2147f851acd025699233e13223a0
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# gocr-ruby is a gocr binding for ruby
|
2
2
|
|
3
|
-
|
3
|
+
Ruby GOCR binding for lib http://jocr.sourceforge.net/
|
4
|
+
|
5
|
+
Currently it is in progress, unstable version.
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -18,7 +20,25 @@ Or install it yourself as:
|
|
18
20
|
|
19
21
|
## Usage
|
20
22
|
|
21
|
-
|
23
|
+
current api almost as a ruby-tesseract-ocr
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
require 'gocr'
|
27
|
+
e = GOCR::Engine.new(whitelist: '0-9').text_for('image.png')
|
28
|
+
```
|
29
|
+
|
30
|
+
### Available options
|
31
|
+
|
32
|
+
* `:whitelist` - char filter (ex. hexdigits: ""0-9A-Fx"", only ASCII)
|
33
|
+
* `:database` - database path including final slash (default is ./db/)
|
34
|
+
* `:format` - output format (ISO8859_1 TeX HTML XML UTF8 ASCII)
|
35
|
+
* `:gray_level` - threshold grey level 0<160<=255 (0 = autodetect)
|
36
|
+
* `:numbers_only` - numbers only
|
37
|
+
* `:mode` operation modes (bitpattern, see official gocr manual)
|
38
|
+
* `:certainty` - value of certainty (in percent, 0..100, default=95)
|
39
|
+
* `:unrecognize_char` - output this string for every unrecognized character
|
40
|
+
* `:dust_size` - dust_size (remove small clusters, -1 = autodetect)
|
41
|
+
* `:space_width` - spacewidth/dots (0 = autodetect)
|
22
42
|
|
23
43
|
## Contributing
|
24
44
|
|
data/Rakefile
CHANGED
@@ -9,7 +9,7 @@ require 'rubygems/package_task'
|
|
9
9
|
#
|
10
10
|
# See https://github.com/luislavena/rake-compiler for details
|
11
11
|
|
12
|
-
Rake::ExtensionTask.new 'gocr
|
12
|
+
Rake::ExtensionTask.new 'gocr' do |ext|
|
13
13
|
|
14
14
|
# This causes the shared object to be placed in lib/my_malloc/my_malloc.so
|
15
15
|
#
|
@@ -19,7 +19,7 @@ Rake::ExtensionTask.new 'gocr-ruby' do |ext|
|
|
19
19
|
ext.lib_dir = 'lib/gocr'
|
20
20
|
end
|
21
21
|
|
22
|
-
s = Gem::Specification.new 'gocr
|
22
|
+
s = Gem::Specification.new 'gocr', '0.0.1' do |s|
|
23
23
|
s.summary = 'simple gocr wrapper'
|
24
24
|
s.authors = %w[zyablitskiy@gmail.com]
|
25
25
|
|
@@ -40,7 +40,7 @@ Gem::PackageTask.new s do end
|
|
40
40
|
# This isn't a good test, but does provide a sanity check
|
41
41
|
|
42
42
|
task test: %w[compile] do
|
43
|
-
ruby '-Ilib', '-rgocr', '-e', 'p GOCR::
|
43
|
+
ruby '-Ilib', '-rgocr', '-e', 'p GOCR::Engine.new(numbers_only: false).text_for("image.png")'
|
44
44
|
# ruby '-Ilib', '-rgocr', '-e', 'p 3'
|
45
45
|
end
|
46
46
|
|
data/ext/gocr/gocr.c
CHANGED
@@ -42,7 +42,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
42
42
|
#include "ocr0.h" /* only_numbers */
|
43
43
|
#include "progress.h"
|
44
44
|
#include "version.h"
|
45
|
-
#include
|
45
|
+
#include <ruby.h>
|
46
46
|
|
47
47
|
/*
|
48
48
|
#ifndef RSTRING_PTR
|
@@ -322,9 +322,12 @@ static int read_picture(job_t *job) {
|
|
322
322
|
}
|
323
323
|
|
324
324
|
/* subject of change, we need more output for XML (ToDo) */
|
325
|
-
|
325
|
+
char* print_output(job_t *job) {
|
326
326
|
int linecounter = 0;
|
327
327
|
const char *line;
|
328
|
+
char** lines;
|
329
|
+
char* output;
|
330
|
+
int i = 0, j, n = 0;
|
328
331
|
|
329
332
|
assert(job);
|
330
333
|
|
@@ -332,61 +335,131 @@ void print_output(job_t *job) {
|
|
332
335
|
simplify code 2010-09-26
|
333
336
|
*/
|
334
337
|
linecounter = 0;
|
338
|
+
lines = (char**) malloc(job->res.lines.num);
|
335
339
|
line = getTextLine(&(job->res.linelist), linecounter++);
|
336
340
|
while (line) {
|
341
|
+
n += strlen(line) + 1;
|
342
|
+
lines[i] = (char*) malloc(strlen(line));
|
343
|
+
strcpy(lines[i++], line);
|
337
344
|
/* notice: decode() is shiftet to getTextLine since 0.38 */
|
338
|
-
fputs(line, stdout);
|
339
345
|
if (job->cfg.out_format==HTML) fputs("<br />",stdout);
|
340
346
|
if (job->cfg.out_format!=XML) fputc('\n', stdout);
|
341
347
|
line = getTextLine(&(job->res.linelist), linecounter++);
|
342
348
|
}
|
343
|
-
free_textlines(&(job->res.linelist));
|
349
|
+
// free_textlines(&(job->res.linelist));
|
350
|
+
|
351
|
+
output = (char*) malloc(n);
|
352
|
+
strcpy(output, lines[0]);
|
353
|
+
for(j = 1; j < i; j++) {
|
354
|
+
strcat(output, "\n");
|
355
|
+
strcat(output, lines[j]);
|
356
|
+
}
|
357
|
+
|
358
|
+
return output;
|
344
359
|
}
|
345
360
|
|
346
361
|
/* FIXME jb: remove JOB; renamed to OCR_JOB 2010-09-26 */
|
347
362
|
job_t *OCR_JOB;
|
348
363
|
|
349
364
|
|
350
|
-
char*
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
job=OCR_JOB=&job1;
|
355
|
-
|
356
|
-
setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
|
365
|
+
char* gocr_main(job_t* job) {
|
366
|
+
int multipnm=1;
|
367
|
+
char* output;
|
368
|
+
setvbuf(stdout, (char *) NULL, _IONBF, 0); /* not buffered */
|
357
369
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
if ( job->cfg.mode & 2 ) /* check for db-option flag */
|
362
|
-
load_db(job);
|
370
|
+
/* load character data base (JS1002: now outside pgm2asc) */
|
371
|
+
if ( job->cfg.mode & 2 ) /* check for db-option flag */
|
372
|
+
load_db(job);
|
363
373
|
/* load_db uses readpnm() and would conflict with multi images */
|
364
374
|
|
375
|
+
while (multipnm==1) { /* multi-image loop */
|
376
|
+
|
365
377
|
job_init_image(job); /* single image */
|
378
|
+
|
366
379
|
mark_start(job);
|
367
|
-
read_picture(job);
|
368
380
|
|
381
|
+
multipnm = read_picture(job);
|
382
|
+
/* separation of main and rest for using as lib
|
383
|
+
this will be changed later => introduction of set_option()
|
384
|
+
for better communication to the engine */
|
385
|
+
if (multipnm<0) break; /* read error */
|
386
|
+
|
387
|
+
/* call main loop */
|
369
388
|
pgm2asc(job);
|
389
|
+
|
370
390
|
mark_end(job);
|
371
|
-
|
391
|
+
|
392
|
+
output = print_output(job);
|
393
|
+
|
372
394
|
job_free_image(job);
|
373
|
-
|
395
|
+
|
396
|
+
}
|
397
|
+
|
398
|
+
// return ((multipnm<0)?multipnm:0); /* -1=255 on error, 0 ok */
|
399
|
+
return output;
|
374
400
|
}
|
375
401
|
|
376
402
|
|
377
403
|
static VALUE image_recognize(VALUE self, VALUE arg) {
|
378
|
-
|
379
|
-
|
404
|
+
VALUE tmp;
|
405
|
+
|
406
|
+
job_t job1, *job; /* fixme, dont want global variables for lib */
|
407
|
+
job=OCR_JOB=&job1;
|
408
|
+
job_init(job);
|
409
|
+
|
410
|
+
job->src.fname = StringValuePtr(arg);
|
411
|
+
tmp = rb_iv_get(self, "@database");
|
412
|
+
if (tmp != Qnil) {
|
413
|
+
job->cfg.db_path = StringValuePtr(tmp);
|
414
|
+
}
|
415
|
+
tmp = rb_iv_get(self, "@format");
|
416
|
+
if (tmp != Qnil) {
|
417
|
+
job->cfg.out_format = NUM2INT(tmp);
|
418
|
+
}
|
419
|
+
tmp = rb_iv_get(self, "@whitelist");
|
420
|
+
if (tmp != Qnil) {
|
421
|
+
if (strlen(StringValuePtr(tmp)) > 0)
|
422
|
+
job->cfg.cfilter = StringValuePtr(tmp);
|
423
|
+
}
|
424
|
+
tmp = rb_iv_get(self, "@dust_size");
|
425
|
+
if (tmp != Qnil) {
|
426
|
+
job->cfg.dust_size = NUM2INT(tmp);
|
427
|
+
}
|
428
|
+
tmp = rb_iv_get(self, "@gray_level");
|
429
|
+
if (tmp != Qnil) {
|
430
|
+
job->cfg.cs = NUM2INT(tmp);
|
431
|
+
}
|
432
|
+
tmp = rb_iv_get(self, "@space_width");
|
433
|
+
if (tmp != Qnil) {
|
434
|
+
job->cfg.spc = NUM2INT(tmp);
|
435
|
+
}
|
436
|
+
tmp = rb_iv_get(self, "@mode");
|
437
|
+
if (tmp != Qnil) {
|
438
|
+
job->cfg.mode |= NUM2INT(tmp);
|
439
|
+
}
|
440
|
+
tmp = rb_iv_get(self, "@numbers_only");
|
441
|
+
if (tmp == Qtrue) {
|
442
|
+
job->cfg.only_numbers = 1;
|
443
|
+
}
|
444
|
+
tmp = rb_iv_get(self, "@certainty");
|
445
|
+
if (tmp != Qnil) {
|
446
|
+
job->cfg.certainty = NUM2INT(tmp);
|
447
|
+
}
|
448
|
+
tmp = rb_iv_get(self, "@unrecognize_char");
|
449
|
+
if (tmp != Qnil) {
|
450
|
+
job->cfg.unrec_marker = StringValuePtr(tmp)[0];
|
451
|
+
}
|
452
|
+
return rb_str_new2( gocr_main(job) );
|
380
453
|
}
|
381
454
|
|
382
455
|
/*
|
383
|
-
* @brief define ruby class GOCR::
|
456
|
+
* @brief define ruby class GOCR::Engine and method text_for
|
384
457
|
*
|
385
458
|
*/
|
386
459
|
void Init_gocr() {
|
387
460
|
VALUE mGocr = rb_define_module("GOCR");
|
388
|
-
VALUE mImage = rb_define_class_under(mGocr, "
|
389
|
-
|
461
|
+
VALUE mImage = rb_define_class_under(mGocr, "Engine", rb_cObject);
|
462
|
+
rb_define_method(mImage, "text_for", image_recognize, 1);
|
390
463
|
}
|
391
464
|
|
392
465
|
|
@@ -433,4 +506,4 @@ void Init_gocr() {
|
|
433
506
|
// }
|
434
507
|
//
|
435
508
|
// return ((multipnm<0)?multipnm:0); /* -1=255 on error, 0 ok */
|
436
|
-
//}
|
509
|
+
//}
|
data/ext/gocr/unicode.h
CHANGED
data/image.png
CHANGED
Binary file
|
data/lib/gocr.rb
CHANGED
data/lib/gocr/engine.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "gocr/gocr"
|
2
|
+
|
3
|
+
module GOCR
|
4
|
+
class Engine
|
5
|
+
attr_accessor :whitelist, :blacklist, :database, :format, :gray_level, :numbers_only,
|
6
|
+
:mode, :certainty, :unrecognize_char, :dust_size, :space_width
|
7
|
+
|
8
|
+
FORMATS = Hash[%w(UTF8 ISO8859_1 TeX HTML XML ASCII).map.with_index.to_a].freeze
|
9
|
+
|
10
|
+
def initialize(options={})
|
11
|
+
options.each do |k, v|
|
12
|
+
send("#{k}=", v) if respond_to?(k)
|
13
|
+
end
|
14
|
+
@format = FORMATS[format].to_i
|
15
|
+
@dust_size = -1 if dust_size.nil?
|
16
|
+
@unrecognize_char = @unrecognize_char[0] unless unrecognize_char.nil?
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
data/lib/gocr/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gocr-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Vladimir Zyablitskiy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -126,7 +126,7 @@ files:
|
|
126
126
|
- gocr-ruby.gemspec
|
127
127
|
- image.png
|
128
128
|
- lib/gocr.rb
|
129
|
-
- lib/gocr/
|
129
|
+
- lib/gocr/engine.rb
|
130
130
|
- lib/gocr/version.rb
|
131
131
|
homepage: https://github.com/rainlabs/gocr-ruby
|
132
132
|
licenses:
|