asposeocrjava 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE +21 -0
- data/README.md +33 -0
- data/Rakefile +2 -0
- data/asposeocrjava.gemspec +27 -0
- data/config/aspose.yml +5 -0
- data/data/SpanishLanguageResources.zip +0 -0
- data/data/multipage.tiff +0 -0
- data/data/ocr.png +0 -0
- data/data/sample_omr.jpg +0 -0
- data/data/spanish.png +0 -0
- data/data/template.amr +76 -0
- data/lib/asposeocrjava.rb +71 -0
- data/lib/asposeocrjava/OCR/applycorrectionfilters.rb +37 -0
- data/lib/asposeocrjava/OCR/correctspellings.rb +22 -0
- data/lib/asposeocrjava/OCR/detectreading.rb +22 -0
- data/lib/asposeocrjava/OCR/detecttextblocks.rb +22 -0
- data/lib/asposeocrjava/OCR/extracttextfrompartofimage.rb +40 -0
- data/lib/asposeocrjava/OCR/gettextparthierarchyoftext.rb +40 -0
- data/lib/asposeocrjava/OCR/ignorenontextualblocks.rb +22 -0
- data/lib/asposeocrjava/OCR/loadimagefromurl.rb +17 -0
- data/lib/asposeocrjava/OCR/multipagetiff.rb +25 -0
- data/lib/asposeocrjava/OCR/readpartinformationoftext.rb +34 -0
- data/lib/asposeocrjava/OCR/recognizetext.rb +19 -0
- data/lib/asposeocrjava/OCR/recognizewhitelistedcharacters.rb +38 -0
- data/lib/asposeocrjava/OCR/workwithdifferentlanguages.rb +25 -0
- data/lib/asposeocrjava/OMR/detectimageresolution.rb +48 -0
- data/lib/asposeocrjava/OMR/extractomrdatafromimage.rb +45 -0
- data/lib/asposeocrjava/OMR/setimageresolution.rb +20 -0
- data/lib/asposeocrjava/OMR/setmarkthreshold.rb +74 -0
- data/lib/asposeocrjava/asposeocr.rb +2 -0
- data/lib/asposeocrjava/ocr.rb +13 -0
- data/lib/asposeocrjava/omr.rb +4 -0
- data/lib/asposeocrjava/version.rb +3 -0
- data/samples/ocr.rb +21 -0
- data/samples/omr.rb +12 -0
- metadata +137 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module IgnoreNonTextualBlocks
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
|
11
|
+
|
12
|
+
# Set the RemoveNonText to true
|
13
|
+
ocr_engine.getConfig().setRemoveNonText(true)
|
14
|
+
|
15
|
+
# Process the image
|
16
|
+
if ocr_engine.process()
|
17
|
+
# Display the result
|
18
|
+
puts "Text: " + ocr_engine.getText().to_string
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module LoadImageFromUrl
|
3
|
+
def initialize()
|
4
|
+
# Initialize an instance of OcrEngine
|
5
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
6
|
+
|
7
|
+
# Set the Image property by loading the image from remote location
|
8
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromUrl("http://cdn.aspose.com/tmp/ocr-sample.bmp"))
|
9
|
+
|
10
|
+
# Process the image
|
11
|
+
if ocr_engine.process()
|
12
|
+
# Display the recognized text
|
13
|
+
puts "Text: " + ocr_engine.getText().to_string
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module MultipageTiff
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'multipage.tiff'))
|
11
|
+
|
12
|
+
# Set OcrEngine.ProcessAllPages to true in order to process all pages of TIFF in single run
|
13
|
+
ocr_engine.setProcessAllPages(true)
|
14
|
+
|
15
|
+
# Process the image
|
16
|
+
if ocr_engine.process()
|
17
|
+
# Retrieve the list of Pages
|
18
|
+
pages = ocr_engine.getPages()
|
19
|
+
pages.each do |page|
|
20
|
+
puts "Text: " + page.getPageText().to_string
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module ReadPartInformationOfText
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
|
11
|
+
|
12
|
+
# Process the image
|
13
|
+
if ocr_engine.process()
|
14
|
+
# Display the recognized text
|
15
|
+
puts "Text: " + ocr_engine.getText().to_string
|
16
|
+
|
17
|
+
# Retrieve an array of recognized text by parts
|
18
|
+
text = ocr_engine.getText().getPartsInfo()
|
19
|
+
# Iterate over the text parts
|
20
|
+
i = 0
|
21
|
+
while i < text.length
|
22
|
+
symbol = text[i]
|
23
|
+
# Display part information
|
24
|
+
puts "isItalic : " + symbol.getItalic().to_s
|
25
|
+
puts "isUnderline : " + symbol.getUnderline().to_s
|
26
|
+
puts "isBold : " + symbol.getBold().to_s
|
27
|
+
puts "Text Color : " + symbol.getTextColor().to_string
|
28
|
+
puts "Quality : " + symbol.getCharactersQuality().to_s
|
29
|
+
i +=1
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module RecognizeText
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
|
11
|
+
|
12
|
+
# Process the image
|
13
|
+
if ocr_engine.process()
|
14
|
+
# Display the recognized text
|
15
|
+
puts "Text: " + ocr_engine.getText().to_string
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module RecognizeWhiteListedCharacters
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Retrieve the OcrConfig of the OcrEngine object
|
10
|
+
ocr_config = ocr_engine.getConfig()
|
11
|
+
|
12
|
+
# Set the Whitelist property to recognize numbers only
|
13
|
+
#ocr_config.setWhitelist(['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'])
|
14
|
+
|
15
|
+
# Set the Image property by loading the image from file path location
|
16
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
|
17
|
+
|
18
|
+
# Set the RemoveNonText to true
|
19
|
+
ocr_engine.getConfig().setRemoveNonText(true)
|
20
|
+
|
21
|
+
# Process the image
|
22
|
+
if ocr_engine.process()
|
23
|
+
text = ocr_engine.getText()
|
24
|
+
puts "Text: " + text.to_string
|
25
|
+
|
26
|
+
expression = "(\\d+)"
|
27
|
+
# Create a Pattern object
|
28
|
+
pattern = Rjb::import('java.util.regex.Pattern').compile(expression)
|
29
|
+
|
30
|
+
# Now create matcher object
|
31
|
+
matcher = pattern.matcher(text.toString())
|
32
|
+
if matcher.find()
|
33
|
+
puts "Found value: " + matcher.group(0).to_string
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module WorkWithDifferentLanguages
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'spanish.png'))
|
11
|
+
|
12
|
+
# Clear the default language (English)
|
13
|
+
ocr_engine.getLanguageContainer().clear()
|
14
|
+
|
15
|
+
# Load the resources of the language from file path location or an instance of InputStream
|
16
|
+
ocr_engine.getLanguageContainer().addLanguage(Rjb::import('com.aspose.ocr.LanguageFactory').load(data_dir + "SpanishLanguageResources.zip"))
|
17
|
+
|
18
|
+
# Process the image
|
19
|
+
if ocr_engine.process()
|
20
|
+
# Display the recognized text
|
21
|
+
puts "Text: " + ocr_engine.getText().to_string
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module DetectImageResolution
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize a string with template file location
|
7
|
+
template_file = data_dir + "template.amr"
|
8
|
+
|
9
|
+
# Initialize a string with scanned image file location
|
10
|
+
image_file = data_dir + "sample_omr.jpg"
|
11
|
+
|
12
|
+
# Create an instance of OmrTemplate class and load the template using the factory method Load
|
13
|
+
template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
|
14
|
+
|
15
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
16
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
17
|
+
|
18
|
+
# Set the auto image resolution detection property
|
19
|
+
image.setAutoDetectResolution(true)
|
20
|
+
|
21
|
+
# Create an instance of OmrEngine class
|
22
|
+
engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
|
23
|
+
|
24
|
+
# Extract the OMR data
|
25
|
+
result = engine.extractData(Array[image])
|
26
|
+
|
27
|
+
# Get page data of extracted data
|
28
|
+
pages = result.getPageData()
|
29
|
+
|
30
|
+
index = 0
|
31
|
+
# Iterate over the pages and display the data
|
32
|
+
pages.each do |page|
|
33
|
+
index +=1
|
34
|
+
puts "------Page: " + index.to_s
|
35
|
+
keys = page.keySet()
|
36
|
+
keys = keys.to_string
|
37
|
+
keys = keys[1, keys.length - 2]
|
38
|
+
|
39
|
+
keys = keys.split(",")
|
40
|
+
keys.each do |key|
|
41
|
+
value = page.get(key)
|
42
|
+
puts "key: " + key.to_s
|
43
|
+
puts "value: " + value.to_s
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module ExtractOmrDataFromImage
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize a string with template file location
|
7
|
+
template_file = data_dir + "template.amr"
|
8
|
+
|
9
|
+
# Initialize a string with scanned image file location
|
10
|
+
image_file = data_dir + "sample_omr.jpg"
|
11
|
+
|
12
|
+
# Create an instance of OmrTemplate class and load the template using the factory method Load
|
13
|
+
template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
|
14
|
+
|
15
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
16
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
17
|
+
|
18
|
+
# Create an instance of OmrEngine class
|
19
|
+
engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
|
20
|
+
|
21
|
+
# Extract the OMR data
|
22
|
+
result = engine.extractData(Array[image])
|
23
|
+
|
24
|
+
# Get page data of extracted data
|
25
|
+
pages = result.getPageData()
|
26
|
+
|
27
|
+
index = 0
|
28
|
+
# Iterate over the pages and display the data
|
29
|
+
pages.each do |page|
|
30
|
+
index +=1
|
31
|
+
puts "------Page: " + index.to_s
|
32
|
+
keys = page.keySet()
|
33
|
+
keys = keys.to_string
|
34
|
+
keys = keys[1, keys.length - 2]
|
35
|
+
|
36
|
+
keys = keys.split(",")
|
37
|
+
keys.each do |key|
|
38
|
+
value = page.get(key)
|
39
|
+
puts "key: " + key.to_s
|
40
|
+
puts "value: " + value.to_s
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module SetImageResolution
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize a string with template file location
|
7
|
+
image_file = data_dir + "sample_omr.jpg"
|
8
|
+
|
9
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
10
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
11
|
+
|
12
|
+
# Define new value of image resolution in double format
|
13
|
+
image.setResolution(210.0) # overwrites the old DPI value
|
14
|
+
|
15
|
+
# Do processing
|
16
|
+
|
17
|
+
puts "Set image resolution."
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module SetMarkThreshold
|
3
|
+
def initialize()
|
4
|
+
# Setting Global Threshold
|
5
|
+
set_global_threshold()
|
6
|
+
|
7
|
+
# Setting Page Level Threshold
|
8
|
+
set_page_level_threshold()
|
9
|
+
end
|
10
|
+
|
11
|
+
def set_global_threshold()
|
12
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
13
|
+
|
14
|
+
# Initialize a string with template file location
|
15
|
+
template_file = data_dir + "template.amr"
|
16
|
+
|
17
|
+
# Initialize a string with scanned image file location
|
18
|
+
image_file = data_dir + "sample_omr.jpg"
|
19
|
+
|
20
|
+
# Create an instance of OmrTemplate class and load the template using the factory method Load
|
21
|
+
template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
|
22
|
+
|
23
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
24
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
25
|
+
|
26
|
+
# Create an instance of OmrEngine class
|
27
|
+
engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
|
28
|
+
|
29
|
+
# Get the configurations of OmrEngine
|
30
|
+
config = engine.getConfiguration()
|
31
|
+
|
32
|
+
# Set fill threshold
|
33
|
+
config.setFillThreshold(0.12)
|
34
|
+
|
35
|
+
# Extract the OMR data
|
36
|
+
result = engine.extractData(Array[image])
|
37
|
+
|
38
|
+
puts "Set global threshold."
|
39
|
+
end
|
40
|
+
|
41
|
+
def set_page_level_threshold()
|
42
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
43
|
+
|
44
|
+
# Initialize a string with template file location
|
45
|
+
template_file = data_dir + "template.amr"
|
46
|
+
|
47
|
+
# Initialize a string with scanned image file location
|
48
|
+
image_file = data_dir + "sample_omr.jpg"
|
49
|
+
|
50
|
+
# Create an instance of OmrTemplate class and load the template using the factory method Load
|
51
|
+
template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
|
52
|
+
|
53
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
54
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
55
|
+
|
56
|
+
# Get the first page of the template
|
57
|
+
page = template.getPages().getItem(0)
|
58
|
+
|
59
|
+
# Create page configurations
|
60
|
+
page.setConfiguration(Rjb::import('com.aspose.omr.OmrConfig').new)
|
61
|
+
|
62
|
+
# Set fill threshold
|
63
|
+
page.getConfiguration().setFillThreshold(0.21)
|
64
|
+
|
65
|
+
# Create an instance of OmrEngine class
|
66
|
+
engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
|
67
|
+
|
68
|
+
# Extract the OMR data
|
69
|
+
result = engine.extractData(Array[image])
|
70
|
+
|
71
|
+
puts "Set page level threshold."
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require_relative 'OCR/recognizetext'
|
2
|
+
require_relative 'OCR/readpartinformationoftext'
|
3
|
+
require_relative 'OCR/extracttextfrompartofimage'
|
4
|
+
require_relative 'OCR/loadimagefromurl'
|
5
|
+
require_relative 'OCR/multipagetiff'
|
6
|
+
require_relative 'OCR/gettextparthierarchyoftext'
|
7
|
+
require_relative 'OCR/applycorrectionfilters'
|
8
|
+
require_relative 'OCR/correctspellings'
|
9
|
+
require_relative 'OCR/detectreading'
|
10
|
+
require_relative 'OCR/detecttextblocks'
|
11
|
+
require_relative 'OCR/ignorenontextualblocks'
|
12
|
+
require_relative 'OCR/recognizewhitelistedcharacters'
|
13
|
+
require_relative 'OCR/workwithdifferentlanguages'
|
data/samples/ocr.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
=begin
|
2
|
+
Please uncomment the code which you want to execute.
|
3
|
+
=end
|
4
|
+
|
5
|
+
require '../lib/asposeocrjava'
|
6
|
+
include Asposeocrjava
|
7
|
+
#include Asposeocrjava::RecognizeText
|
8
|
+
#include Asposeocrjava::ReadPartInformationOfText
|
9
|
+
#include Asposeocrjava::ExtractTextFromPartOfImage
|
10
|
+
#include Asposeocrjava::LoadImageFromUrl
|
11
|
+
#include Asposeocrjava::MultipageTiff
|
12
|
+
#include Asposeocrjava::GetTextPartHierarchyOfText
|
13
|
+
#include Asposeocrjava::ApplyCorrectionFilters
|
14
|
+
#include Asposeocrjava::CorrectSpellings
|
15
|
+
#include Asposeocrjava::DetectReading
|
16
|
+
#include Asposeocrjava::DetectTextBlocks
|
17
|
+
#include Asposeocrjava::IgnoreNonTextualBlocks
|
18
|
+
#include Asposeocrjava::RecognizeWhiteListedCharacters
|
19
|
+
#include Asposeocrjava::WorkWithDifferentLanguages
|
20
|
+
|
21
|
+
initialize_aspose_ocr
|
data/samples/omr.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
=begin
|
2
|
+
Please uncomment the code which you want to execute.
|
3
|
+
=end
|
4
|
+
|
5
|
+
require '../lib/asposeocrjava'
|
6
|
+
include Asposeocrjava
|
7
|
+
#include Asposeocrjava::ExtractOmrDataFromImage
|
8
|
+
#include Asposeocrjava::SetMarkThreshold
|
9
|
+
#include Asposeocrjava::SetImageResolution
|
10
|
+
#include Asposeocrjava::DetectImageResolution
|
11
|
+
|
12
|
+
initialize_aspose_ocr
|