simple-ocr 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9709934460b8582dd2a2ddf24c9c46a6ea1fed9b
4
- data.tar.gz: da7c6d2f038b5dade09dc8e18a51d1a83b2be5bb
3
+ metadata.gz: 0be030f3372adbcdaf03d9eeaa5be0a4b09a138f
4
+ data.tar.gz: 9fb31cb8b19cd79e6b03ad460867b17d96ffc581
5
5
  SHA512:
6
- metadata.gz: a8b518651bc8d31ad658199302917d0b313d753d39128b4ceb67404ffbaed31bbc5343a03b90d43a50be60e529c4b2cce157f7e56bfce0007ea8d0a298e87401
7
- data.tar.gz: 68a05698d7de9d6738d0a4f68aebfc2455ad75a2a6f7930a2e5db9fa9b8f2aac3885a0f01f83dec916c2c0217e1d31f4ec8963f13bad796c9fce299e071b7811
6
+ metadata.gz: 395dd09141e4ee03841e23ed86e3333943b97fb3f47b8a46bb82f0513d01d844969cefc20576e4c129179d258df8c5579fdc4d9bb72cc9eb40112cbf5043425a
7
+ data.tar.gz: b0de671ff7cf564bc2d90b57d003bff019097ca22b1fa9597fe99ed66b72df0c7ff53f75e8bf81c7045223f7c4b07ad35ebfe7c8ccf334dd86ef7f36eac0b067
@@ -13,7 +13,8 @@ module OCR
13
13
  #
14
14
  # @params [String, String, String] path to output file, options of conversion (e.g. Language), output format of file.
15
15
  def scan(output_file, options, type)
16
- Scan.new(@image, output_file, options, type).scan_img
17
- end
16
+ Scan.new(@image, output_file, options, type).scan_img
17
+ end
18
+
18
19
  end
19
20
  end
@@ -21,7 +21,7 @@ module OCR
21
21
  #
22
22
  # @return [String] input file path
23
23
  def duplicate_path
24
- @input_file.dup
24
+ return @input_file.dup
25
25
  end
26
26
 
27
27
  # From PDF to Image conversion
@@ -30,7 +30,7 @@ module OCR
30
30
  def image_path
31
31
  duppath = duplicate_path
32
32
  duppath[name_exten[1]] = Path::EXTENS[:png]
33
- duppath
33
+ return duppath
34
34
  end
35
35
 
36
36
  # Clean your Input File
@@ -38,15 +38,15 @@ module OCR
38
38
  # @return [String] Cleaned Image Path
39
39
  def clean_image_path
40
40
  duppath = duplicate_path
41
- duppath[get_filename] = "cleaned_"+get_filename+".png"
42
- duppath
41
+ duppath[get_filename] = "cleaned_"+name_exten[0]+".png"
42
+ return duppath
43
43
  end
44
44
 
45
45
  # Get the FileName
46
46
  #
47
47
  # @return [String] Filename
48
48
  def get_filename
49
- File.basename(@input_file)
49
+ File.basename(@input_file).split("/")[0]
50
50
  end
51
51
  end
52
52
  end
@@ -1,5 +1,4 @@
1
1
  require 'open3'
2
- require 'fileutils'
3
2
 
4
3
  module OCR
5
4
  class Scan
@@ -14,20 +13,20 @@ module OCR
14
13
  @options = options
15
14
  @type = handle_output_type(type)
16
15
  @input_file = input_file
17
- if pdf?(input_file)
16
+ if OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
18
17
  @image = OCR::Path.new(input_file).image_path
19
18
  convert_to_img
20
19
  else
21
20
  @image = input_file
22
21
  end
23
- @clean_image = OCR::Path.new(output_file).clean_image_path
22
+ @clean_image = OCR::Path.new(input_file).clean_image_path
24
23
  end
25
24
 
26
25
  def handle_output_type(type)
27
26
  if type == :pdf
28
- 'pdf'
27
+ "pdf"
29
28
  elsif type == :hocr
30
- 'hocr'
29
+ "hocr"
31
30
  else
32
31
  nil.to_s
33
32
  end
@@ -60,11 +59,8 @@ module OCR
60
59
  # Deleting unnecessary files after processing.
61
60
  def delete_files
62
61
  FileUtils.rm_rf(@clean_image)
63
- FileUtils.rm_rf(@image) if pdf?
62
+ FileUtils.rm_rf(@image) if OCR::Path.new(@input_file).name_exten[1] == "pdf"
64
63
  end
65
64
 
66
- def pdf?(input_file = @input_file)
67
- OCR::Path.new(input_file).name_exten[1] == OCR::Path::EXTENS[:pdf]
68
- end
69
65
  end
70
66
  end
@@ -0,0 +1,564 @@
1
+ #!/bin/bash
2
+ #
3
+ # Developed by Fred Weinhaus 6/9/2009 .......... revised 6/26/2015
4
+ #
5
+ # ------------------------------------------------------------------------------
6
+ #
7
+ # Licensing:
8
+ #
9
+ # Copyright � Fred Weinhaus
10
+ #
11
+ # My scripts are available free of charge for non-commercial use, ONLY.
12
+ #
13
+ # For use of my scripts in commercial (for-profit) environments or
14
+ # non-free applications, please contact me (Fred Weinhaus) for
15
+ # licensing arrangements. My email address is fmw at alink dot net.
16
+ #
17
+ # If you: 1) redistribute, 2) incorporate any of these scripts into other
18
+ # free applications or 3) reprogram them in another scripting language,
19
+ # then you must contact me for permission, especially if the result might
20
+ # be used in a commercial or for-profit environment.
21
+ #
22
+ # My scripts are also subject, in a subordinate manner, to the ImageMagick
23
+ # license, which can be found at: http://www.imagemagick.org/script/license.php
24
+ #
25
+ # ------------------------------------------------------------------------------
26
+ #
27
+ ####
28
+ #
29
+ # USAGE: textcleaner [-r rotate] [-l layout] [-c cropoff] [-g] [-e enhance ] [-f filtersize] [-o offset] [-u] [-t threshold] [-s sharpamt] [-s saturation] [-a adaptblur] [-T] [-p padamt] [-b bgcolor] infile outfile
30
+ # USAGE: textcleaner [-help]
31
+ #
32
+ # OPTIONS:
33
+ #
34
+ # -r rotate rotate image 90 degrees in direction specified if
35
+ # aspect ratio does not match layout; options are cw
36
+ # (or clockwise), ccw (or counterclockwise) and n
37
+ # (or none); default=none or no rotation
38
+ # -l layout desired layout; options are p (or portrait) or
39
+ # l (or landscape); default=portrait
40
+ # -c cropoff image cropping offsets after potential rotate 90;
41
+ # choices: one, two or four non-negative integer comma
42
+ # separated values; one value will crop all around;
43
+ # two values will crop at left/right,top/bottom;
44
+ # four values will crop left,top,right,bottom
45
+ # -g convert document to grayscale before enhancing
46
+ # -e enhance enhance image brightness before cleaning;
47
+ # choices are: none, stretch or normalize;
48
+ # default=stretch
49
+ # -f filtersize size of filter used to clean background;
50
+ # integer>0; default=15
51
+ # -o offset offset of filter in percent used to reduce noise;
52
+ # integer>=0; default=5
53
+ # -u unrotate image; cannot unrotate more than
54
+ # about 5 degrees
55
+ # -t threshold text smoothing threshold; 0<=threshold<=100;
56
+ # nominal value is about 50; default is no smoothing
57
+ # -s sharpamt sharpening amount in pixels; float>=0;
58
+ # nominal about 1; default=0
59
+ # -S saturation color saturation expressed as percent; integer>=0;
60
+ # only applicable if -g not set; a value of 100 is
61
+ # no change; default=200 (double saturation)
62
+ # -a adaptblur alternate text smoothing using adaptive blur;
63
+ # floats>=0; default=0 (no smoothing)
64
+ # -T trim background around outer part of image
65
+ # -p padamt border pad amount around outer part of image;
66
+ # integer>=0; default=0
67
+ # -b bgcolor desired color for background; default=white
68
+ #
69
+ ###
70
+ #
71
+ # NAME: TEXTCLEANER
72
+ #
73
+ # PURPOSE: To process a scanned document of text to clean the text background.
74
+ #
75
+ # DESCRIPTION: TEXTCLEANER processses a scanned document of text to clean
76
+ # the text background and enhance the text. The order of processing is:
77
+ # 1) optional 90 degree rotate if aspect does not match layout
78
+ # 2) optional crop,
79
+ # 3) optional convert to grayscale,
80
+ # 4) optional enhance,
81
+ # 5) filter to clean background and optionally smooth/antialias,
82
+ # 6) optional unrotate (limited to about 5 degrees or less),
83
+ # 7) optional text smoothing,
84
+ # 8) optional sharpening,
85
+ # 9) optional saturation change (if -g is not specified),
86
+ # 10) optional alternate text smoothing via adaptive blur
87
+ # 11) optional auto trim of border (effective only if background well-cleaned),
88
+ # 12) optional pad of border
89
+ #
90
+ # OPTIONS:
91
+ #
92
+ # -r rotate ... ROTATE image either clockwise or counterclockwise by 90 degrees,
93
+ # if image aspect ratio does not match the layout mode. Choices are: cc (or
94
+ # clockwise), ccw (or counterclockwise) and n (or none). The default is no rotation.
95
+ #
96
+ # -l layout ... LAYOUT for determining if rotation is to be applied. The choices
97
+ # are p (or portrait) or l (or landscape). The image will be rotated if rotate is
98
+ # specified and the aspect ratio of the image does not match the layout chosen.
99
+ # The default is portrait.
100
+ #
101
+ # -c cropoffsets ... CROPOFFSETS are the image cropping offsets after potential
102
+ # rotate 90. Choices: one, two or four non-negative integer comma separated
103
+ # values. One value will crop all around. Two values will crop at
104
+ # left/right,top/bottom. Four values will crop left,top,right,bottom.
105
+ #
106
+ # -g ... Convert the document to grayscale.
107
+ #
108
+ # -e enhance ... ENHANCE brightness of image. The choices are: none, stretch,
109
+ # or normalize. The default=stretch.
110
+ #
111
+ # -f filtersize ... FILTERSIZE is the size of the filter used to clean up the
112
+ # background. Values are integers>0. The filtersize needs to be larger than
113
+ # the thickness of the writing, but the smaller the better beyond this. Making it
114
+ # larger will increase the processing time and may lose text. The default is 15.
115
+ #
116
+ # -o offset ... OFFSET is the offset threshold in percent used by the filter
117
+ # to eliminate noise. Values are integers>=0. Values too small will leave much
118
+ # noise and artifacts in the result. Values too large will remove too much
119
+ # text leaving gaps. The default is 5.
120
+ #
121
+ # -u ... UNROTATE the image. This is limited to about 5 degrees or less.
122
+ #
123
+ # -t threshold ... THRESHOLD is the text smoothing threshold. Values are integers
124
+ # between 0 and 100. Smaller values smooth/thicken the text more. Larger values
125
+ # thin, but can result in gaps in the text. Nominal value is in the middle at
126
+ # about 50. The default is to disable smoothing.
127
+ #
128
+ # -s sharpamt ... SHARPAMT is the amount of pixel sharpening to be applied to
129
+ # the resulting text. Values are floats>=0. If used, it should be small
130
+ # (suggested about 1). The default=0 (no sharpening).
131
+ #
132
+ # -S saturation ... SATURATION is the desired color saturation of the text
133
+ # expressed as a percentage. Values are integers>=0. A value of 100 is no change.
134
+ # Larger values will make the text colors more saturated. The default=200
135
+ # indicates double saturation. Not applicable when -g option specified.
136
+ #
137
+ # -a adaptblur ... ADAPTBLUR applies an alternate text smoothing using
138
+ # an adaptive blur. The values are floats>=0. The default=0 indicates no
139
+ # blurring.
140
+ #
141
+ # -T ... TRIM the border around the image.
142
+ #
143
+ # -p padamt ... PADAMT is the border pad amount in pixels. The default=0.
144
+ #
145
+ # -b bgcolor ... BGCOLOR is the desired background color after it has been
146
+ # cleaned up. Any valid IM color may be use. The default is white.
147
+ #
148
+ # CAVEAT: No guarantee that this script will work on all platforms,
149
+ # nor that trapping of inconsistent parameters is complete and
150
+ # foolproof. Use At Your Own Risk.
151
+ #
152
+ ######
153
+ #
154
+
155
+ # set default values
156
+ rotate="none" # rotate 90 clockwise (cw) or counterclockwise (ccw)
157
+ layout="portrait" # rotate 90 to match layout; portrait or landscape
158
+ cropoff="" # crop amounts; comma separate list of 1, 2 or 4 integers
159
+ numcrops=0 # number of crops flag
160
+ gray="no" # convert to grayscale flag
161
+ enhance="stretch" # none, stretch, normalize
162
+ filtersize=15 # local area filter size
163
+ offset=5 # local area offset to remove "noise"; too small-get noise, too large-lose text
164
+ threshold="" # smoothing threshold
165
+ sharpamt=0 # sharpen sigma
166
+ saturation=200 # color saturation percent; 100 is no change
167
+ adaptblur=0 # adaptive blur
168
+ unrotate="no" # unrotate flag
169
+ trim="no" # trim flag
170
+ padamt=0 # pad amount
171
+ bgcolor="white" # color for output whiteboard background
172
+
173
+ # set directory for temporary files
174
+ dir="/tmp" # suggestions are dir="." or dir="/tmp"
175
+
176
+ # set up functions to report Usage and Usage with Description
177
+ PROGNAME=`type $0 | awk '{print $3}'` # search for executable on path
178
+ PROGDIR=`dirname $PROGNAME` # extract directory of program
179
+ PROGNAME=`basename $PROGNAME` # base name of program
180
+ usage1()
181
+ {
182
+ echo >&2 ""
183
+ echo >&2 "$PROGNAME:" "$@"
184
+ sed >&2 -e '1,/^####/d; /^###/g; /^#/!q; s/^#//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME"
185
+ }
186
+ usage2()
187
+ {
188
+ echo >&2 ""
189
+ echo >&2 "$PROGNAME:" "$@"
190
+ sed >&2 -e '1,/^####/d; /^######/g; /^#/!q; s/^#*//; s/^ //; 4,$p' "$PROGDIR/$PROGNAME"
191
+ }
192
+
193
+
194
+ # function to report error messages
195
+ errMsg()
196
+ {
197
+ echo ""
198
+ echo $1
199
+ echo ""
200
+ usage1
201
+ exit 1
202
+ }
203
+
204
+
205
+ # function to test for minus at start of value of second part of option 1 or 2
206
+ checkMinus()
207
+ {
208
+ test=`echo "$1" | grep -c '^-.*$'` # returns 1 if match; 0 otherwise
209
+ [ $test -eq 1 ] && errMsg "$errorMsg"
210
+ }
211
+
212
+ # test for correct number of arguments and get values
213
+ if [ $# -eq 0 ]
214
+ then
215
+ # help information
216
+ echo ""
217
+ usage2
218
+ exit 0
219
+ elif [ $# -gt 27 ]
220
+ then
221
+ errMsg "--- TOO MANY ARGUMENTS WERE PROVIDED ---"
222
+ else
223
+ while [ $# -gt 0 ]
224
+ do
225
+ # get parameter values
226
+ case "$1" in
227
+ -h|-help) # help information
228
+ echo ""
229
+ usage2
230
+ exit 0
231
+ ;;
232
+ -r) # rotate
233
+ shift # to get the next parameter
234
+ # test if parameter starts with minus sign
235
+ errorMsg="--- INVALID ROTATE SPECIFICATION ---"
236
+ checkMinus "$1"
237
+ rotate=`echo "$1" | tr "[:upper:]" "[:lower:]"`
238
+ case "$rotate" in
239
+ none|n) rotate="none" ;;
240
+ clockwise|cw) rotate="cw" ;;
241
+ counterclockwise|ccw) rotate="ccw" ;;
242
+ *) errMsg "--- ROTATE=$rotate IS NOT A VALID CHOICE ---" ;;
243
+ esac
244
+ ;;
245
+ -l) # layout
246
+ shift # to get the next parameter
247
+ # test if parameter starts with minus sign
248
+ errorMsg="--- INVALID LAYOUT SPECIFICATION ---"
249
+ checkMinus "$1"
250
+ layout=`echo "$1" | tr "[:upper:]" "[:lower:]"`
251
+ case "$layout" in
252
+ portrait|p) layout="portrait" ;;
253
+ landscape|l) layout="landscape" ;;
254
+ *) errMsg "--- LAYOUT=$layout IS NOT A VALID CHOICE ---" ;;
255
+ esac
256
+ ;;
257
+ -c) # get cropoffsets
258
+ shift # to get the next parameter
259
+ # test if parameter starts with minus sign
260
+ errorMsg="--- INVALID CROPOFFSETS SPECIFICATION ---"
261
+ checkMinus "$1"
262
+ cropoff="$1"
263
+ cropoff="${cropoff},"
264
+ cropoff=`expr "$cropoff" : '\([,0-9]*\)'`
265
+ numcrops=`echo "$cropoff" | tr "," " " | wc -w`
266
+ [ "$cropoff" = "" ] && errMsg "--- ONE OR TWO OR FOUR OFFSETS MUST BE PROVIDED ---"
267
+ [ $numcrops -ne 1 -a $numcrops -ne 2 -a $numcrops -ne 4 ] && errMsg "--- ONE OR TWO OR FOUR OFFSETS MUST BE PROVIDED ---"
268
+ crop1=`echo "$cropoff" | cut -d, -f1`
269
+ crop2=`echo "$cropoff" | cut -d, -f2`
270
+ crop3=`echo "$cropoff" | cut -d, -f3`
271
+ crop4=`echo "$cropoff" | cut -d, -f4`
272
+ ;;
273
+ -g) # set grayscale
274
+ gray="yes"
275
+ ;;
276
+ -e) # get enhance
277
+ shift # to get the next parameter
278
+ # test if parameter starts with minus sign
279
+ errorMsg="--- INVALID ENHANCE SPECIFICATION ---"
280
+ checkMinus "$1"
281
+ enhance="$1"
282
+ case "$1" in
283
+ none) ;;
284
+ stretch) ;;
285
+ normalize) ;;
286
+ *) errMsg "--- ENHANCE=$enhance IS NOT A VALID CHOICE ---" ;;
287
+ esac
288
+ ;;
289
+ -f) # get filtersize
290
+ shift # to get the next parameter
291
+ # test if parameter starts with minus sign
292
+ errorMsg="--- INVALID FILTERSIZE SPECIFICATION ---"
293
+ checkMinus "$1"
294
+ filtersize=`expr "$1" : '\([0-9]*\)'`
295
+ [ "$filtersize" = "" ] && errMsg "--- FILTERSIZE=$filtersize MUST BE A NON-NEGATIVE INTEGER ---"
296
+ filtersizetest=`echo "$filtersize < 1" | bc`
297
+ [ $filtersizetest -eq 1 ] && errMsg "--- FILTERSIZE=$filtersize MUST BE AN INTEGER GREATER THAN 0 ---"
298
+ ;;
299
+ -o) # get offset
300
+ shift # to get the next parameter
301
+ # test if parameter starts with minus sign
302
+ errorMsg="--- INVALID OFFSET SPECIFICATION ---"
303
+ checkMinus "$1"
304
+ offset=`expr "$1" : '\([0-9]*\)'`
305
+ [ "$offset" = "" ] && errMsg "--- OFFSET=$offset MUST BE A NON-NEGATIVE INTEGER ---"
306
+ ;;
307
+ -t) # get threshold
308
+ shift # to get the next parameter
309
+ # test if parameter starts with minus sign
310
+ errorMsg="--- INVALID THRESHOLD SPECIFICATION ---"
311
+ checkMinus "$1"
312
+ threshold=`expr "$1" : '\([0-9]*\)'`
313
+ [ "$threshold" = "" ] && errMsg "--- THRESHOLD=$threshold MUST BE A NON-NEGATIVE INTEGER ---"
314
+ thresholdtestA=`echo "$threshold < 0" | bc`
315
+ thresholdtestB=`echo "$threshold > 100" | bc`
316
+ [ $thresholdtestA -eq 1 -o $thresholdtestB -eq 1 ] && errMsg "--- THRESHOLD=$threshold MUST BE AN INTEGER GREATER BETWEEN 0 AND 100 ---"
317
+ ;;
318
+ -s) # get sharpamt
319
+ shift # to get the next parameter
320
+ # test if parameter starts with minus sign
321
+ errorMsg="--- INVALID SHARPAMT SPECIFICATION ---"
322
+ checkMinus "$1"
323
+ sharpamt=`expr "$1" : '\([.0-9]*\)'`
324
+ [ "$sharpamt" = "" ] && errMsg "--- SHARPAMT=$sharpamt MUST BE A NON-NEGATIVE FLOAT ---"
325
+ ;;
326
+ -S) # get saturation
327
+ shift # to get the next parameter
328
+ # test if parameter starts with minus sign
329
+ errorMsg="--- INVALID SATURATION SPECIFICATION ---"
330
+ checkMinus "$1"
331
+ saturation=`expr "$1" : '\([0-9]*\)'`
332
+ [ "$saturation" = "" ] && errMsg "--- SATURATION=$saturation MUST BE A NON-NEGATIVE INTEGER ---"
333
+ ;;
334
+ -a) # get adaptblur
335
+ shift # to get the next parameter
336
+ # test if parameter starts with minus sign
337
+ errorMsg="--- INVALID ADAPTBLUR SPECIFICATION ---"
338
+ checkMinus "$1"
339
+ adaptblur=`expr "$1" : '\([.0-9]*\)'`
340
+ [ "$adaptblur" = "" ] && errMsg "--- ADAPTBLUR=$adaptblur MUST BE A NON-NEGATIVE FLOAT ---"
341
+ ;;
342
+ -u) # set unrotate
343
+ unrotate="yes"
344
+ ;;
345
+ -T) # set trim
346
+ trim="yes"
347
+ ;;
348
+ -p) # get padamt
349
+ shift # to get the next parameter
350
+ # test if parameter starts with minus sign
351
+ errorMsg="--- INVALID PADAMT SPECIFICATION ---"
352
+ checkMinus "$1"
353
+ padamt=`expr "$1" : '\([0-9]*\)'`
354
+ [ "$padamt" = "" ] && errMsg "--- PADAMT=$padamt MUST BE A NON-NEGATIVE INTEGER ---"
355
+ ;;
356
+ -b) # get bgcolor
357
+ shift # to get the next parameter
358
+ # test if parameter starts with minus sign
359
+ errorMsg="--- INVALID BACKGROUND COLOR SPECIFICATION ---"
360
+ checkMinus "$1"
361
+ bgcolor="$1"
362
+ ;;
363
+ -) # STDIN and end of arguments
364
+ break
365
+ ;;
366
+ -*) # any other - argument
367
+ errMsg "--- UNKNOWN OPTION ---"
368
+ ;;
369
+ *) # end of arguments
370
+ break
371
+ ;;
372
+ esac
373
+ shift # next option
374
+ done
375
+ #
376
+ # get infile and outfile
377
+ infile="$1"
378
+ outfile="$2"
379
+ fi
380
+
381
+ # test that infile provided
382
+ [ "$infile" = "" ] && errMsg "NO INPUT FILE SPECIFIED"
383
+
384
+ # test that outfile provided
385
+ [ "$outfile" = "" ] && errMsg "NO OUTPUT FILE SPECIFIED"
386
+
387
+ # get im version
388
+ im_version=`convert -list configure | \
389
+ sed '/^LIB_VERSION_NUMBER /!d; s//,/; s/,/,0/g; s/,0*\([0-9][0-9]\)/\1/g' | head -n 1`
390
+
391
+ tmpA1="$dir/textcleaner_1_$$.mpc"
392
+ tmpA2="$dir/textcleaner_1_$$.cache"
393
+ trap "rm -f $tmpA1 $tmpA2 exit 0;" 0
394
+ trap "rm -f $tmpA1 $tmpA2; exit 1" 1 2 3 15
395
+ #trap "rm -f $tmpA1 $tmpA2; exit 1" ERR
396
+
397
+
398
+ # test for hdri enabled
399
+ # NOTE: must put grep before trap using ERR in case it does not find a match
400
+ if [ "$im_version" -ge "07000000" ]; then
401
+ hdri_on=`convert -version | grep "HDRI"`
402
+ else
403
+ hdri_on=`convert -list configure | grep "enable-hdri"`
404
+ fi
405
+
406
+
407
+ # colorspace RGB and sRGB swapped between 6.7.5.5 and 6.7.6.7
408
+ # though probably not resolved until the latter
409
+ # then -colorspace gray changed to linear between 6.7.6.7 and 6.7.8.2
410
+ # then -separate converted to linear gray channels between 6.7.6.7 and 6.7.8.2,
411
+ # though probably not resolved until the latter
412
+ # so -colorspace HSL/HSB -separate and -colorspace gray became linear
413
+ # but we need to use -set colorspace RGB before using them at appropriate times
414
+ # so that results stay as in original script
415
+ # The following was determined from various version tests using textcleaner
416
+ # with IM 6.7.4.10, 6.7.6.10, 6.7.9.0
417
+ if [ "$im_version" -lt "06070607" -o "$im_version" -gt "06070707" ]; then
418
+ setcspace="-set colorspace RGB"
419
+ else
420
+ setcspace=""
421
+ fi
422
+ # no need for setcspace for grayscale or channels after 6.8.5.4
423
+ if [ "$im_version" -gt "06080504" ]; then
424
+ setcspace=""
425
+ fi
426
+
427
+
428
+ # read the input image into the TMP cached image.
429
+ convert -quiet "$infile" +repage "$tmpA1" ||
430
+ errMsg "--- FILE $infile NOT READABLE OR HAS ZERO SIZE ---"
431
+
432
+ # get image size
433
+ ww=`convert $tmpA1 -ping -format "%w" info:`
434
+ hh=`convert $tmpA1 -ping -format "%h" info:`
435
+
436
+ # get image h/w aspect ratio and determine if portrait=1 (h/w>1) or landscape=0 (h/w<1)
437
+ aspect=`convert xc: -format "%[fx:($hh/$ww)>=1?1:0]" info:`
438
+
439
+ #echo "ww=$ww; hh=$hh; aspect=$aspect"
440
+
441
+ # set up rotation
442
+ if [ "$layout" = "portrait" -a $aspect -eq 0 -a "$rotate" = "cw" ]; then
443
+ rotation="-rotate 90"
444
+ elif [ "$layout" = "portrait" -a $aspect -eq 0 -a "$rotate" = "ccw" ]; then
445
+ rotation="-rotate -90"
446
+ elif [ "$layout" = "landscape" -a $aspect -eq 1 -a "$rotate" = "cw" ]; then
447
+ rotation="-rotate 90"
448
+ elif [ "$layout" = "landscape" -a $aspect -eq 1 -a "$rotate" = "ccw" ]; then
449
+ rotation="-rotate -90"
450
+ else
451
+ rotation=""
452
+ fi
453
+
454
+ # set up cropping
455
+ if [ "$cropoff" != "" -a $numcrops -eq 1 ]; then
456
+ wwc=`convert xc: -format "%[fx:$ww-2*$crop1]" info:`
457
+ hhc=`convert xc: -format "%[fx:$hh-2*$crop1]" info:`
458
+ cropping="-crop ${wwc}x${hhc}+$crop1+$crop1 +repage"
459
+ elif [ "$cropoff" != "" -a $numcrops -eq 2 ]; then
460
+ wwc=`convert xc: -format "%[fx:$ww-2*$crop1]" info:`
461
+ hhc=`convert xc: -format "%[fx:$hh-2*$crop2]" info:`
462
+ cropping="-crop ${wwc}x${hhc}+$crop1+$crop2 +repage"
463
+ elif [ "$cropoff" != "" -a $numcrops -eq 4 ]; then
464
+ wwc=`convert xc: -format "%[fx:$ww-($crop1+$crop3)]" info:`
465
+ hhc=`convert xc: -format "%[fx:$hh-($crop2+$crop4)]" info:`
466
+ cropping="-crop ${wwc}x${hhc}+$crop1+$crop2 +repage"
467
+ else
468
+ cropping=""
469
+ fi
470
+ #echo "cropoff=$cropoff; numcrops=$numcrops; cropping=$cropping"
471
+
472
+ # test if grayscale
473
+ grayscale=`convert $tmpA1 -format "%[colorspace]" info:`
474
+ typegray=`convert $tmpA1 -format '%r' info: | grep 'Gray'`
475
+ if [ "$gray" = "yes" -o "$grayscale" = "Gray" -o "$typegray" != "" ]; then
476
+ makegray="$setcspace -colorspace gray -type grayscale"
477
+ else
478
+ makegray=""
479
+ fi
480
+ #echo "makegray=$makegray"
481
+
482
+ # set up enhance
483
+ if [ "$enhance" = "stretch" ]; then
484
+ enhancing="$setcspace -contrast-stretch 0"
485
+ elif [ "$enhance" = "normalize" ]; then
486
+ enhancing="$setcspace -normalize"
487
+ else
488
+ enhancing=""
489
+ fi
490
+ #echo "enhancing=$enhancing"
491
+
492
+ # setup blurring
493
+ if [ "$threshold" = "" ]; then
494
+ blurring=""
495
+ else
496
+ # note: any 0<bluramt<=1, will be the same as using bluramt=1, since radius must be used as an integer
497
+ # bluramt=`convert xc: -format "%[fx:$threshold/100]" info:`
498
+ # blurring="-blur ${bluramt}x65535 -level ${threshold}x100%"
499
+ blurring="-blur 1x65535 -level ${threshold}x100%"
500
+ fi
501
+ #echo "blurring=$blurring"
502
+
503
+ # set up unrotate
504
+ if [ "$unrotate" = "yes" ]; then
505
+ unrotating="-background $bgcolor -deskew 40%"
506
+ else
507
+ unrotating=""
508
+ fi
509
+ #echo "unrotating=$unrotating"
510
+
511
+ # setup sharpening
512
+ if [ "$sharpamt" = "0" -o "$sharpamt" = "0.0" ]; then
513
+ sharpening=""
514
+ else
515
+ sharpening="-sharpen 0x${sharpamt}"
516
+ fi
517
+ #echo "sharpening=$sharpening"
518
+
519
+ # setup modulation
520
+ [ "$gray" = "yes" -o "$grayscale" = "Gray" -o "$typegray" != "" ] && saturation=100
521
+ if [ $saturation -eq 100 ]; then
522
+ modulation=""
523
+ else
524
+ modulation="-modulate 100,$saturation,100"
525
+ fi
526
+ #echo "modulation=$modulation"
527
+
528
+ # set up adaptiveblurring
529
+ if [ "$adaptblur" = "0" ]; then
530
+ adaptiveblurring=""
531
+ else
532
+ adaptiveblurring="-adaptive-blur $adaptblur"
533
+ fi
534
+
535
+ # set up trim
536
+ if [ "$trim" = "yes" -a "$hdri_on" != "" ]; then
537
+ # hdri is enabled
538
+ # need to round near white to pure white for trim to work
539
+ trimming="-white-threshold 99.9% -trim +repage "
540
+ elif [ "$trim" = "yes" -a "$hdri_on" = "" ]; then
541
+ # hdri is not enabled
542
+ trimming="-trim +repage "
543
+ else
544
+ trimming=""
545
+ fi
546
+ #echo "trimming=$trimming"
547
+
548
+ # set up pad
549
+ if [ $padamt -gt 0 ]; then
550
+ # note must reset -compose from -compose copy_opacity as -border uses -compose
551
+ padding="-compose over -bordercolor $bgcolor -border $padamt"
552
+ else
553
+ padding=""
554
+ fi
555
+ #echo "padding=$padding"
556
+
557
+
558
+ # process image
559
+ convert -respect-parenthesis \( $tmpA1 $rotation $cropping $makegray $enhancing \) \
560
+ \( -clone 0 $setcspace -colorspace gray -negate -lat ${filtersize}x${filtersize}+${offset}% -contrast-stretch 0 $blurring \) \
561
+ -compose copy_opacity -composite -fill "$bgcolor" -opaque none -alpha off \
562
+ $unrotating $sharpening $modulation $adaptiveblurring $trimming $padding \
563
+ "$outfile"
564
+ exit 0
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple-ocr
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Swaathi Kakarla
@@ -22,6 +22,7 @@ files:
22
22
  - lib/simple-ocr/path.rb
23
23
  - lib/simple-ocr/scan.rb
24
24
  - lib/simple-ocr/zonal_ocr.rb
25
+ - lib/textcleaner
25
26
  homepage: http://www.skcript.com
26
27
  licenses:
27
28
  - Closed
@@ -42,7 +43,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
42
43
  version: '0'
43
44
  requirements: []
44
45
  rubyforge_project:
45
- rubygems_version: 2.4.8
46
+ rubygems_version: 2.4.5
46
47
  signing_key:
47
48
  specification_version: 4
48
49
  summary: OCR Engine by Skcript