simple-ocr 1.0.2 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9709934460b8582dd2a2ddf24c9c46a6ea1fed9b
4
- data.tar.gz: da7c6d2f038b5dade09dc8e18a51d1a83b2be5bb
3
+ metadata.gz: 0be030f3372adbcdaf03d9eeaa5be0a4b09a138f
4
+ data.tar.gz: 9fb31cb8b19cd79e6b03ad460867b17d96ffc581
5
5
  SHA512:
6
- metadata.gz: a8b518651bc8d31ad658199302917d0b313d753d39128b4ceb67404ffbaed31bbc5343a03b90d43a50be60e529c4b2cce157f7e56bfce0007ea8d0a298e87401
7
- data.tar.gz: 68a05698d7de9d6738d0a4f68aebfc2455ad75a2a6f7930a2e5db9fa9b8f2aac3885a0f01f83dec916c2c0217e1d31f4ec8963f13bad796c9fce299e071b7811
6
+ metadata.gz: 395dd09141e4ee03841e23ed86e3333943b97fb3f47b8a46bb82f0513d01d844969cefc20576e4c129179d258df8c5579fdc4d9bb72cc9eb40112cbf5043425a
7
+ data.tar.gz: b0de671ff7cf564bc2d90b57d003bff019097ca22b1fa9597fe99ed66b72df0c7ff53f75e8bf81c7045223f7c4b07ad35ebfe7c8ccf334dd86ef7f36eac0b067
@@ -13,7 +13,8 @@ module OCR
13
13
  #
14
14
  # @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
15
15
  def scan(output_file, options, type)
16
- Scan.new(@image, output_file, options, type).scan_img
17
- end
16
+ Scan.new(@image, output_file, options, type).scan_img
17
+ end
18
+
18
19
  end
19
20
  end
@@ -21,7 +21,7 @@ module OCR
21
21
  #
22
22
  # @return [String] input file path
23
23
  def duplicate_path
24
- @input_file.dup
24
+ return @input_file.dup
25
25
  end
26
26
 
27
27
  # From PDF to Image conversion
@@ -30,7 +30,7 @@ module OCR
30
30
  def image_path
31
31
  duppath = duplicate_path
32
32
  duppath[name_exten[1]] = Path::EXTENS[:png]
33
- duppath
33
+ return duppath
34
34
  end
35
35
 
36
36
  # Clean your Input File
@@ -38,15 +38,15 @@ module OCR
38
38
  # @return [String] Cleaned Image Path
39
39
  def clean_image_path
40
40
  duppath = duplicate_path
41
- duppath[get_filename] = "cleaned_"+get_filename+".png"
42
- duppath
41
+ duppath[get_filename] = "cleaned_"+name_exten[0]+".png"
42
+ return duppath
43
43
  end
44
44
 
45
45
  # Get the FileName
46
46
  #
47
47
  # @return [String] Filename
48
48
  def get_filename
49
- File.basename(@input_file)
49
+ File.basename(@input_file).split("/")[0]
50
50
  end
51
51
  end
52
52
  end
@@ -1,5 +1,4 @@
1
1
  require 'open3'
2
- require 'fileutils'
3
2
 
4
3
  module OCR
5
4
  class Scan
@@ -14,20 +13,20 @@ module OCR
14
13
  @options = options
15
14
  @type = handle_output_type(type)
16
15
  @input_file = input_file
17
- if pdf?(input_file)
16
+ if OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
18
17
  @image = OCR::Path.new(input_file).image_path
19
18
  convert_to_img
20
19
  else
21
20
  @image = input_file
22
21
  end
23
- @clean_image = OCR::Path.new(output_file).clean_image_path
22
+ @clean_image = OCR::Path.new(input_file).clean_image_path
24
23
  end
25
24
 
26
25
  def handle_output_type(type)
27
26
  if type == :pdf
28
- 'pdf'
27
+ "pdf"
29
28
  elsif type == :hocr
30
- 'hocr'
29
+ "hocr"
31
30
  else
32
31
  nil.to_s
33
32
  end
@@ -60,11 +59,8 @@ module OCR
60
59
  # Deleting unnecessary files after processing.
61
60
  def delete_files
62
61
  FileUtils.rm_rf(@clean_image)
63
- FileUtils.rm_rf(@image) if pdf?
62
+ FileUtils.rm_rf(@image) if OCR::Path.new(@input_file).name_exten[1] == "pdf"
64
63
  end
65
64
 
66
- def pdf?(input_file = @input_file)
67
- OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
68
- end
69
65
  end
70
66
  end
@@ -0,0 +1,564 @@
1
+ #!/bin/bash
2
+ #
3
+ # Developed by Fred Weinhaus 6/9/2009 .......... revised 6/26/2015
4
+ #
5
+ # ------------------------------------------------------------------------------
6
+ #
7
+ # Licensing:
8
+ #
9
+ # Copyright � Fred Weinhaus
10
+ #
11
+ # My scripts are available free of charge for non-commercial use, ONLY.
12
+ #
13
+ # For use of my scripts in commercial (for-profit) environments or
14
+ # non-free applications, please contact me (Fred Weinhaus) for
15
+ # licensing arrangements. My email address is fmw at alink dot net.
16
+ #
17
+ # If you: 1) redistribute, 2) incorporate any of these scripts into other
18
+ # free applications or 3) reprogram them in another scripting language,
19
+ # then you must contact me for permission, especially if the result might
20
+ # be used in a commercial or for-profit environment.
21
+ #
22
+ # My scripts are also subject, in a subordinate manner, to the ImageMagick
23
+ # license, which can be found at: http://www.imagemagick.org/script/license.php
24
+ #
25
+ # ------------------------------------------------------------------------------
26
+ #
27
+ ####
28
+ #
29
+ # USAGE: textcleaner [-r rotate] [-l layout] [-c cropoff] [-g] [-e enhance ] [-f filtersize] [-o offset] [-u] [-t threshold] [-s sharpamt] [-s saturation] [-a adaptblur] [-T] [-p padamt] [-b bgcolor] infile outfile
30
+ # USAGE: textcleaner [-help]
31
+ #
32
+ # OPTIONS:
33
+ #
34
+ # -r rotate rotate image 90 degrees in direction specified if
35
+ # aspect ratio does not match layout; options are cw
36
+ # (or clockwise), ccw (or counterclockwise) and n
37
+ # (or none); default=none or no rotation
38
+ # -l layout desired layout; options are p (or portrait) or
39
+ # l (or landscape); default=portrait
40
+ # -c cropoff image cropping offsets after potential rotate 90;
41
+ # choices: one, two or four non-negative integer comma
42
+ # separated values; one value will crop all around;
43
+ # two values will crop at left/right,top/bottom;
44
+ # four values will crop left,top,right,bottom
45
+ # -g convert document to grayscale before enhancing
46
+ # -e enhance enhance image brightness before cleaning;
47
+ # choices are: none, stretch or normalize;
48
+ # default=stretch
49
+ # -f filtersize size of filter used to clean background;
50
+ # integer>0; default=15
51
+ # -o offset offset of filter in percent used to reduce noise;
52
+ # integer>=0; default=5
53
+ # -u unrotate image; cannot unrotate more than
54
+ # about 5 degrees
55
+ # -t threshold text smoothing threshold; 0<=threshold<=100;
56
+ # nominal value is about 50; default is no smoothing
57
+ # -s sharpamt sharpening amount in pixels; float>=0;
58
+ # nominal about 1; default=0
59
+ # -S saturation color saturation expressed as percent; integer>=0;
60
+ # only applicable if -g not set; a value of 100 is
61
+ # no change; default=200 (double saturation)
62
+ # -a adaptblur alternate text smoothing using adaptive blur;
63
+ # floats>=0; default=0 (no smoothing)
64
+ # -T trim background around outer part of image
65
+ # -p padamt border pad amount around outer part of image;
66
+ # integer>=0; default=0
67
+ # -b bgcolor desired color for background; default=white
68
+ #
69
+ ###
70
+ #
71
+ # NAME: TEXTCLEANER
72
+ #
73
+ # PURPOSE: To process a scanned document of text to clean the text background.
74
+ #
75
+ # DESCRIPTION: TEXTCLEANER processses a scanned document of text to clean
76
+ # the text background and enhance the text. The order of processing is:
77
+ # 1) optional 90 degree rotate if aspect does not match layout
78
+ # 2) optional crop,
79
+ # 3) optional convert to grayscale,
80
+ # 4) optional enhance,
81
+ # 5) filter to clean background and optionally smooth/antialias,
82
+ # 6) optional unrotate (limited to about 5 degrees or less),
83
+ # 7) optional text smoothing,
84
+ # 8) optional sharpening,
85
+ # 9) optional saturation change (if -g is not specified),
86
+ # 10) optional alternate text smoothing via adaptive blur
87
+ # 11) optional auto trim of border (effective only if background well-cleaned),
88
+ # 12) optional pad of border
89
+ #
90
+ # OPTIONS:
91
+ #
92
+ # -r rotate ... ROTATE image either clockwise or counterclockwise by 90 degrees,
93
+ # if image aspect ratio does not match the layout mode. Choices are: cc (or
94
+ # clockwise), ccw (or counterclockwise) and n (or none). The default is no rotation.
95
+ #
96
+ # -l layout ... LAYOUT for determining if rotation is to be applied. The choices
97
+ # are p (or portrait) or l (or landscape). The image will be rotated if rotate is
98
+ # specified and the aspect ratio of the image does not match the layout chosen.
99
+ # The default is portrait.
100
+ #
101
+ # -c cropoffsets ... CROPOFFSETS are the image cropping offsets after potential
102
+ # rotate 90. Choices: one, two or four non-negative integer comma separated
103
+ # values. One value will crop all around. Two values will crop at
104
+ # left/right,top/bottom. Four values will crop left,top,right,bottom.
105
+ #
106
+ # -g ... Convert the document to grayscale.
107
+ #
108
+ # -e enhance ... ENHANCE brightness of image. The choices are: none, stretch,
109
+ # or normalize. The default=stretch.
110
+ #
111
+ # -f filtersize ... FILTERSIZE is the size of the filter used to clean up the
112
+ # background. Values are integers>0. The filtersize needs to be larger than
113
+ # the thickness of the writing, but the smaller the better beyond this. Making it
114
+ # larger will increase the processing time and may lose text. The default is 15.
115
+ #
116
+ # -o offset ... OFFSET is the offset threshold in percent used by the filter
117
+ # to eliminate noise. Values are integers>=0. Values too small will leave much
118
+ # noise and artifacts in the result. Values too large will remove too much
119
+ # text leaving gaps. The default is 5.
120
+ #
121
+ # -u ... UNROTATE the image. This is limited to about 5 degrees or less.
122
+ #
123
+ # -t threshold ... THRESHOLD is the text smoothing threshold. Values are integers
124
+ # between 0 and 100. Smaller values smooth/thicken the text more. Larger values
125
+ # thin, but can result in gaps in the text. Nominal value is in the middle at
126
+ # about 50. The default is to disable smoothing.
127
+ #
128
+ # -s sharpamt ... SHARPAMT is the amount of pixel sharpening to be applied to
129
+ # the resulting text. Values are floats>=0. If used, it should be small
130
+ # (suggested about 1). The default=0 (no sharpening).
131
+ #
132
+ # -S saturation ... SATURATION is the desired color saturation of the text
133
+ # expressed as a percentage. Values are integers>=0. A value of 100 is no change.
134
+ # Larger values will make the text colors more saturated. The default=200
135
+ # indicates double saturation. Not applicable when -g option specified.
136
+ #
137
+ # -a adaptblur ... ADAPTBLUR applies an alternate text smoothing using
138
+ # an adaptive blur. The values are floats>=0. The default=0 indicates no
139
+ # blurring.
140
+ #
141
+ # -T ... TRIM the border around the image.
142
+ #
143
+ # -p padamt ... PADAMT is the border pad amount in pixels. The default=0.
144
+ #
145
+ # -b bgcolor ... BGCOLOR is the desired background color after it has been
146
+ # cleaned up. Any valid IM color may be use. The default is white.
147
+ #
148
+ # CAVEAT: No guarantee that this script will work on all platforms,
149
+ # nor that trapping of inconsistent parameters is complete and
150
+ # foolproof. Use At Your Own Risk.
151
+ #
152
+ ######
153
+ #
154
+
155
+ # set default values
156
+ rotate="none" # rotate 90 clockwise (cw) or counterclockwise (ccw)
157
+ layout="portrait" # rotate 90 to match layout; portrait or landscape
158
+ cropoff="" # crop amounts; comma separate list of 1, 2 or 4 integers
159
+ numcrops=0 # number of crops flag
160
+ gray="no" # convert to grayscale flag
161
+ enhance="stretch" # none, stretch, normalize
162
+ filtersize=15 # local area filter size
163
+ offset=5 # local area offset to remove "noise"; too small-get noise, too large-lose text
164
+ threshold="" # smoothing threshold
165
+ sharpamt=0 # sharpen sigma
166
+ saturation=200 # color saturation percent; 100 is no change
167
+ adaptblur=0 # adaptive blur
168
+ unrotate="no" # unrotate flag
169
+ trim="no" # trim flag
170
+ padamt=0 # pad amount
171
+ bgcolor="white" # color for output whiteboard background
172
+
173
+ # set directory for temporary files
174
+ dir="/tmp" # suggestions are dir="." or dir="/tmp"
175
+
176
+ # set up functions to report Usage and Usage with Description
177
+ PROGNAME=`type $0 | awk '{print $3}'` # search for executable on path
178
+ PROGDIR=`dirname $PROGNAME` # extract directory of program
179
+ PROGNAME=`basename $PROGNAME` # base name of program
180
+ usage1()
181
+ {
182
+ echo >&2 ""
183
+ echo >&2 "$PROGNAME:" "$@"
184
+ sed >&2 -e '1,/^####/d; /^###/g; /^#/!q; s/^#//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME"
185
+ }
186
+ usage2()
187
+ {
188
+ echo >&2 ""
189
+ echo >&2 "$PROGNAME:" "$@"
190
+ sed >&2 -e '1,/^####/d; /^######/g; /^#/!q; s/^#*//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME"
191
+ }
192
+
193
+
194
+ # function to report error messages
195
+ errMsg()
196
+ {
197
+ echo ""
198
+ echo $1
199
+ echo ""
200
+ usage1
201
+ exit 1
202
+ }
203
+
204
+
205
+ # function to test for minus at start of value of second part of option 1 or 2
206
+ checkMinus()
207
+ {
208
+ test=`echo "$1" | grep -c '^-.*$'` # returns 1 if match; 0 otherwise
209
+ [ $test -eq 1 ] && errMsg "$errorMsg"
210
+ }
211
+
212
+ # test for correct number of arguments and get values
213
+ if [ $# -eq 0 ]
214
+ then
215
+ # help information
216
+ echo ""
217
+ usage2
218
+ exit 0
219
+ elif [ $# -gt 27 ]
220
+ then
221
+ errMsg "--- TOO MANY ARGUMENTS WERE PROVIDED ---"
222
+ else
223
+ while [ $# -gt 0 ]
224
+ do
225
+ # get parameter values
226
+ case "$1" in
227
+ -h|-help) # help information
228
+ echo ""
229
+ usage2
230
+ exit 0
231
+ ;;
232
+ -r) # rotate
233
+ shift # to get the next parameter
234
+ # test if parameter starts with minus sign
235
+ errorMsg="--- INVALID ROTATE SPECIFICATION ---"
236
+ checkMinus "$1"
237
+ rotate=`echo "$1" | tr "[:upper:]" "[:lower:]"`
238
+ case "$rotate" in
239
+ none|n) rotate="none" ;;
240
+ clockwise|cw) rotate="cw" ;;
241
+ counterclockwise|ccw) rotate="ccw" ;;
242
+ *) errMsg "--- ROTATE=$rotate IS NOT A VALID CHOICE ---" ;;
243
+ esac
244
+ ;;
245
+ -l) # layout
246
+ shift # to get the next parameter
247
+ # test if parameter starts with minus sign
248
+ errorMsg="--- INVALID LAYOUT SPECIFICATION ---"
249
+ checkMinus "$1"
250
+ layout=`echo "$1" | tr "[:upper:]" "[:lower:]"`
251
+ case "$layout" in
252
+ portrait|p) layout="portrait" ;;
253
+ landscape|l) layout="landscape" ;;
254
+ *) errMsg "--- LAYOUT=$layout IS NOT A VALID CHOICE ---" ;;
255
+ esac
256
+ ;;
257
+ -c) # get cropoffsets
258
+ shift # to get the next parameter
259
+ # test if parameter starts with minus sign
260
+ errorMsg="--- INVALID CROPOFFSETS SPECIFICATION ---"
261
+ checkMinus "$1"
262
+ cropoff="$1"
263
+ cropoff="${cropoff},"
264
+ cropoff=`expr "$cropoff" : '\([,0-9]*\)'`
265
+ numcrops=`echo "$cropoff" | tr "," " " | wc -w`
266
+ [ "$cropoff" = "" ] && errMsg "--- ONE OR TWO OR FOUR OFFSETS MUST BE PROVIDED ---"
267
+ [ $numcrops -ne 1 -a $numcrops -ne 2 -a $numcrops -ne 4 ] && errMsg "--- ONE OR TWO OR FOUR OFFSETS MUST BE PROVIDED ---"
268
+ crop1=`echo "$cropoff" | cut -d, -f1`
269
+ crop2=`echo "$cropoff" | cut -d, -f2`
270
+ crop3=`echo "$cropoff" | cut -d, -f3`
271
+ crop4=`echo "$cropoff" | cut -d, -f4`
272
+ ;;
273
+ -g) # set grayscale
274
+ gray="yes"
275
+ ;;
276
+ -e) # get enhance
277
+ shift # to get the next parameter
278
+ # test if parameter starts with minus sign
279
+ errorMsg="--- INVALID ENHANCE SPECIFICATION ---"
280
+ checkMinus "$1"
281
+ enhance="$1"
282
+ case "$1" in
283
+ none) ;;
284
+ stretch) ;;
285
+ normalize) ;;
286
+ *) errMsg "--- ENHANCE=$enhance IS NOT A VALID CHOICE ---" ;;
287
+ esac
288
+ ;;
289
+ -f) # get filtersize
290
+ shift # to get the next parameter
291
+ # test if parameter starts with minus sign
292
+ errorMsg="--- INVALID FILTERSIZE SPECIFICATION ---"
293
+ checkMinus "$1"
294
+ filtersize=`expr "$1" : '\([0-9]*\)'`
295
+ [ "$filtersize" = "" ] && errMsg "--- FILTERSIZE=$filtersize MUST BE A NON-NEGATIVE INTEGER ---"
296
+ filtersizetest=`echo "$filtersize < 1" | bc`
297
+ [ $filtersizetest -eq 1 ] && errMsg "--- FILTERSIZE=$filtersize MUST BE AN INTEGER GREATER THAN 0 ---"
298
+ ;;
299
+ -o) # get offset
300
+ shift # to get the next parameter
301
+ # test if parameter starts with minus sign
302
+ errorMsg="--- INVALID OFFSET SPECIFICATION ---"
303
+ checkMinus "$1"
304
+ offset=`expr "$1" : '\([0-9]*\)'`
305
+ [ "$offset" = "" ] && errMsg "--- OFFSET=$offset MUST BE A NON-NEGATIVE INTEGER ---"
306
+ ;;
307
+ -t) # get threshold
308
+ shift # to get the next parameter
309
+ # test if parameter starts with minus sign
310
+ errorMsg="--- INVALID THRESHOLD SPECIFICATION ---"
311
+ checkMinus "$1"
312
+ threshold=`expr "$1" : '\([0-9]*\)'`
313
+ [ "$threshold" = "" ] && errMsg "--- THRESHOLD=$threshold MUST BE A NON-NEGATIVE INTEGER ---"
314
+ thresholdtestA=`echo "$threshold < 0" | bc`
315
+ thresholdtestB=`echo "$threshold > 100" | bc`
316
+ [ $thresholdtestA -eq 1 -o $thresholdtestB -eq 1 ] && errMsg "--- THRESHOLD=$threshold MUST BE AN INTEGER GREATER BETWEEN 0 AND 100 ---"
317
+ ;;
318
+ -s) # get sharpamt
319
+ shift # to get the next parameter
320
+ # test if parameter starts with minus sign
321
+ errorMsg="--- INVALID SHARPAMT SPECIFICATION ---"
322
+ checkMinus "$1"
323
+ sharpamt=`expr "$1" : '\([.0-9]*\)'`
324
+ [ "$sharpamt" = "" ] && errMsg "--- SHARPAMT=$sharpamt MUST BE A NON-NEGATIVE FLOAT ---"
325
+ ;;
326
+ -S) # get saturation
327
+ shift # to get the next parameter
328
+ # test if parameter starts with minus sign
329
+ errorMsg="--- INVALID SATURATION SPECIFICATION ---"
330
+ checkMinus "$1"
331
+ saturation=`expr "$1" : '\([0-9]*\)'`
332
+ [ "$saturation" = "" ] && errMsg "--- SATURATION=$saturation MUST BE A NON-NEGATIVE INTEGER ---"
333
+ ;;
334
+ -a) # get adaptblur
335
+ shift # to get the next parameter
336
+ # test if parameter starts with minus sign
337
+ errorMsg="--- INVALID ADAPTBLUR SPECIFICATION ---"
338
+ checkMinus "$1"
339
+ adaptblur=`expr "$1" : '\([.0-9]*\)'`
340
+ [ "$adaptblur" = "" ] && errMsg "--- ADAPTBLUR=$adaptblur MUST BE A NON-NEGATIVE FLOAT ---"
341
+ ;;
342
+ -u) # set unrotate
343
+ unrotate="yes"
344
+ ;;
345
+ -T) # set trim
346
+ trim="yes"
347
+ ;;
348
+ -p) # get padamt
349
+ shift # to get the next parameter
350
+ # test if parameter starts with minus sign
351
+ errorMsg="--- INVALID PADAMT SPECIFICATION ---"
352
+ checkMinus "$1"
353
+ padamt=`expr "$1" : '\([0-9]*\)'`
354
+ [ "$padamt" = "" ] && errMsg "--- PADAMT=$padamt MUST BE A NON-NEGATIVE INTEGER ---"
355
+ ;;
356
+ -b) # get bgcolor
357
+ shift # to get the next parameter
358
+ # test if parameter starts with minus sign
359
+ errorMsg="--- INVALID BACKGROUND COLOR SPECIFICATION ---"
360
+ checkMinus "$1"
361
+ bgcolor="$1"
362
+ ;;
363
+ -) # STDIN and end of arguments
364
+ break
365
+ ;;
366
+ -*) # any other - argument
367
+ errMsg "--- UNKNOWN OPTION ---"
368
+ ;;
369
+ *) # end of arguments
370
+ break
371
+ ;;
372
+ esac
373
+ shift # next option
374
+ done
375
+ #
376
+ # get infile and outfile
377
+ infile="$1"
378
+ outfile="$2"
379
+ fi
380
+
381
+ # test that infile provided
382
+ [ "$infile" = "" ] && errMsg "NO INPUT FILE SPECIFIED"
383
+
384
+ # test that outfile provided
385
+ [ "$outfile" = "" ] && errMsg "NO OUTPUT FILE SPECIFIED"
386
+
387
+ # get im version
388
+ im_version=`convert -list configure | \
389
+ sed '/^LIB_VERSION_NUMBER /!d; s//,/; s/,/,0/g; s/,0*\([0-9][0-9]\)/\1/g' | head -n 1`
390
+
391
+ tmpA1="$dir/textcleaner_1_$$.mpc"
392
+ tmpA2="$dir/textcleaner_1_$$.cache"
393
+ trap "rm -f $tmpA1 $tmpA2 exit 0;" 0
394
+ trap "rm -f $tmpA1 $tmpA2; exit 1" 1 2 3 15
395
+ #trap "rm -f $tmpA1 $tmpA2; exit 1" ERR
396
+
397
+
398
+ # test for hdri enabled
399
+ # NOTE: must put grep before trap using ERR in case it does not find a match
400
+ if [ "$im_version" -ge "07000000" ]; then
401
+ hdri_on=`convert -version | grep "HDRI"`
402
+ else
403
+ hdri_on=`convert -list configure | grep "enable-hdri"`
404
+ fi
405
+
406
+
407
+ # colorspace RGB and sRGB swapped between 6.7.5.5 and 6.7.6.7
408
+ # though probably not resolved until the latter
409
+ # then -colorspace gray changed to linear between 6.7.6.7 and 6.7.8.2
410
+ # then -separate converted to linear gray channels between 6.7.6.7 and 6.7.8.2,
411
+ # though probably not resolved until the latter
412
+ # so -colorspace HSL/HSB -separate and -colorspace gray became linear
413
+ # but we need to use -set colorspace RGB before using them at appropriate times
414
+ # so that results stay as in original script
415
+ # The following was determined from various version tests using textcleaner
416
+ # with IM 6.7.4.10, 6.7.6.10, 6.7.9.0
417
+ if [ "$im_version" -lt "06070607" -o "$im_version" -gt "06070707" ]; then
418
+ setcspace="-set colorspace RGB"
419
+ else
420
+ setcspace=""
421
+ fi
422
+ # no need for setcspace for grayscale or channels after 6.8.5.4
423
+ if [ "$im_version" -gt "06080504" ]; then
424
+ setcspace=""
425
+ fi
426
+
427
+
428
+ # read the input image into the TMP cached image.
429
+ convert -quiet "$infile" +repage "$tmpA1" ||
430
+ errMsg "--- FILE $infile NOT READABLE OR HAS ZERO SIZE ---"
431
+
432
+ # get image size
433
+ ww=`convert $tmpA1 -ping -format "%w" info:`
434
+ hh=`convert $tmpA1 -ping -format "%h" info:`
435
+
436
+ # get image h/w aspect ratio and determine if portrait=1 (h/w>1) or landscape=0 (h/w<1)
437
+ aspect=`convert xc: -format "%[fx:($hh/$ww)>=1?1:0]" info:`
438
+
439
+ #echo "ww=$ww; hh=$hh; aspect=$aspect"
440
+
441
+ # set up rotation
442
+ if [ "$layout" = "portrait" -a $aspect -eq 0 -a "$rotate" = "cw" ]; then
443
+ rotation="-rotate 90"
444
+ elif [ "$layout" = "portrait" -a $aspect -eq 0 -a "$rotate" = "ccw" ]; then
445
+ rotation="-rotate -90"
446
+ elif [ "$layout" = "landscape" -a $aspect -eq 1 -a "$rotate" = "cw" ]; then
447
+ rotation="-rotate 90"
448
+ elif [ "$layout" = "landscape" -a $aspect -eq 1 -a "$rotate" = "ccw" ]; then
449
+ rotation="-rotate -90"
450
+ else
451
+ rotation=""
452
+ fi
453
+
454
+ # set up cropping
455
+ if [ "$cropoff" != "" -a $numcrops -eq 1 ]; then
456
+ wwc=`convert xc: -format "%[fx:$ww-2*$crop1]" info:`
457
+ hhc=`convert xc: -format "%[fx:$hh-2*$crop1]" info:`
458
+ cropping="-crop ${wwc}x${hhc}+$crop1+$crop1 +repage"
459
+ elif [ "$cropoff" != "" -a $numcrops -eq 2 ]; then
460
+ wwc=`convert xc: -format "%[fx:$ww-2*$crop1]" info:`
461
+ hhc=`convert xc: -format "%[fx:$hh-2*$crop2]" info:`
462
+ cropping="-crop ${wwc}x${hhc}+$crop1+$crop2 +repage"
463
+ elif [ "$cropoff" != "" -a $numcrops -eq 4 ]; then
464
+ wwc=`convert xc: -format "%[fx:$ww-($crop1+$crop3)]" info:`
465
+ hhc=`convert xc: -format "%[fx:$hh-($crop2+$crop4)]" info:`
466
+ cropping="-crop ${wwc}x${hhc}+$crop1+$crop2 +repage"
467
+ else
468
+ cropping=""
469
+ fi
470
+ #echo "cropoff=$cropoff; numcrops=$numcrops; cropping=$cropping"
471
+
472
+ # test if grayscale
473
+ grayscale=`convert $tmpA1 -format "%[colorspace]" info:`
474
+ typegray=`convert $tmpA1 -format '%r' info: | grep 'Gray'`
475
+ if [ "$gray" = "yes" -o "$grayscale" = "Gray" -o "$typegray" != "" ]; then
476
+ makegray="$setcspace -colorspace gray -type grayscale"
477
+ else
478
+ makegray=""
479
+ fi
480
+ #echo "makegray=$makegray"
481
+
482
+ # set up enhance
483
+ if [ "$enhance" = "stretch" ]; then
484
+ enhancing="$setcspace -contrast-stretch 0"
485
+ elif [ "$enhance" = "normalize" ]; then
486
+ enhancing="$setcspace -normalize"
487
+ else
488
+ enhancing=""
489
+ fi
490
+ #echo "enhancing=$enhancing"
491
+
492
+ # setup blurring
493
+ if [ "$threshold" = "" ]; then
494
+ blurring=""
495
+ else
496
+ # note: any 0<bluramt<=1, will be the same as using bluramt=1, since radius must be used as an integer
497
+ # bluramt=`convert xc: -format "%[fx:$threshold/100]" info:`
498
+ # blurring="-blur ${bluramt}x65535 -level ${threshold}x100%"
499
+ blurring="-blur 1x65535 -level ${threshold}x100%"
500
+ fi
501
+ #echo "blurring=$blurring"
502
+
503
+ # set up unrotate
504
+ if [ "$unrotate" = "yes" ]; then
505
+ unrotating="-background $bgcolor -deskew 40%"
506
+ else
507
+ unrotating=""
508
+ fi
509
+ #echo "unrotating=$unrotating"
510
+
511
+ # setup sharpening
512
+ if [ "$sharpamt" = "0" -o "$sharpamt" = "0.0" ]; then
513
+ sharpening=""
514
+ else
515
+ sharpening="-sharpen 0x${sharpamt}"
516
+ fi
517
+ #echo "sharpening=$sharpening"
518
+
519
+ # setup modulation
520
+ [ "$gray" = "yes" -o "$grayscale" = "Gray" -o "$typegray" != "" ] && saturation=100
521
+ if [ $saturation -eq 100 ]; then
522
+ modulation=""
523
+ else
524
+ modulation="-modulate 100,$saturation,100"
525
+ fi
526
+ #echo "modulation=$modulation"
527
+
528
+ # set up adaptiveblurring
529
+ if [ "$adaptblur" = "0" ]; then
530
+ adaptiveblurring=""
531
+ else
532
+ adaptiveblurring="-adaptive-blur $adaptblur"
533
+ fi
534
+
535
+ # set up trim
536
+ if [ "$trim" = "yes" -a "$hdri_on" != "" ]; then
537
+ # hdri is enabled
538
+ # need to round near white to pure white for trim to work
539
+ trimming="-white-threshold 99.9% -trim +repage "
540
+ elif [ "$trim" = "yes" -a "$hdri_on" = "" ]; then
541
+ # hdri is not enabled
542
+ trimming="-trim +repage "
543
+ else
544
+ trimming=""
545
+ fi
546
+ #echo "trimming=$trimming"
547
+
548
+ # set up pad
549
+ if [ $padamt -gt 0 ]; then
550
+ # note must reset -compose from -compose copy_opacity as -border uses -compose
551
+ padding="-compose over -bordercolor $bgcolor -border $padamt"
552
+ else
553
+ padding=""
554
+ fi
555
+ #echo "padding=$padding"
556
+
557
+
558
+ # process image
559
+ convert -respect-parenthesis \( $tmpA1 $rotation $cropping $makegray $enhancing \) \
560
+ \( -clone 0 $setcspace -colorspace gray -negate -lat ${filtersize}x${filtersize}+${offset}% -contrast-stretch 0 $blurring \) \
561
+ -compose copy_opacity -composite -fill "$bgcolor" -opaque none -alpha off \
562
+ $unrotating $sharpening $modulation $adaptiveblurring $trimming $padding \
563
+ "$outfile"
564
+ exit 0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Swaathi Kakarla
@@ -22,6 +22,7 @@ files:
22
22
  - lib/simple-ocr/path.rb
23
23
  - lib/simple-ocr/scan.rb
24
24
  - lib/simple-ocr/zonal_ocr.rb
25
+ - lib/textcleaner
25
26
  homepage: http://www.skcript.com
26
27
  licenses:
27
28
  - Closed
@@ -42,7 +43,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
43
  version: '0'
43
44
  requirements: []
44
45
  rubyforge_project:
45
- rubygems_version: 2.4.8
46
+ rubygems_version: 2.4.5
46
47
  signing_key:
47
48
  specification_version: 4
48
49
  summary: OCR Engine by Skcript