asposeocrjava 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +3 -0
- data/LICENSE +21 -0
- data/README.md +33 -0
- data/Rakefile +2 -0
- data/asposeocrjava.gemspec +27 -0
- data/config/aspose.yml +5 -0
- data/data/SpanishLanguageResources.zip +0 -0
- data/data/multipage.tiff +0 -0
- data/data/ocr.png +0 -0
- data/data/sample_omr.jpg +0 -0
- data/data/spanish.png +0 -0
- data/data/template.amr +76 -0
- data/lib/asposeocrjava.rb +71 -0
- data/lib/asposeocrjava/OCR/applycorrectionfilters.rb +37 -0
- data/lib/asposeocrjava/OCR/correctspellings.rb +22 -0
- data/lib/asposeocrjava/OCR/detectreading.rb +22 -0
- data/lib/asposeocrjava/OCR/detecttextblocks.rb +22 -0
- data/lib/asposeocrjava/OCR/extracttextfrompartofimage.rb +40 -0
- data/lib/asposeocrjava/OCR/gettextparthierarchyoftext.rb +40 -0
- data/lib/asposeocrjava/OCR/ignorenontextualblocks.rb +22 -0
- data/lib/asposeocrjava/OCR/loadimagefromurl.rb +17 -0
- data/lib/asposeocrjava/OCR/multipagetiff.rb +25 -0
- data/lib/asposeocrjava/OCR/readpartinformationoftext.rb +34 -0
- data/lib/asposeocrjava/OCR/recognizetext.rb +19 -0
- data/lib/asposeocrjava/OCR/recognizewhitelistedcharacters.rb +38 -0
- data/lib/asposeocrjava/OCR/workwithdifferentlanguages.rb +25 -0
- data/lib/asposeocrjava/OMR/detectimageresolution.rb +48 -0
- data/lib/asposeocrjava/OMR/extractomrdatafromimage.rb +45 -0
- data/lib/asposeocrjava/OMR/setimageresolution.rb +20 -0
- data/lib/asposeocrjava/OMR/setmarkthreshold.rb +74 -0
- data/lib/asposeocrjava/asposeocr.rb +2 -0
- data/lib/asposeocrjava/ocr.rb +13 -0
- data/lib/asposeocrjava/omr.rb +4 -0
- data/lib/asposeocrjava/version.rb +3 -0
- data/samples/ocr.rb +21 -0
- data/samples/omr.rb +12 -0
- metadata +137 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module IgnoreNonTextualBlocks
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
|
11
|
+
|
12
|
+
# Set the RemoveNonText to true
|
13
|
+
ocr_engine.getConfig().setRemoveNonText(true)
|
14
|
+
|
15
|
+
# Process the image
|
16
|
+
if ocr_engine.process()
|
17
|
+
# Display the result
|
18
|
+
puts "Text: " + ocr_engine.getText().to_string
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module LoadImageFromUrl
|
3
|
+
def initialize()
|
4
|
+
# Initialize an instance of OcrEngine
|
5
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
6
|
+
|
7
|
+
# Set the Image property by loading the image from remote location
|
8
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromUrl("http://cdn.aspose.com/tmp/ocr-sample.bmp"))
|
9
|
+
|
10
|
+
# Process the image
|
11
|
+
if ocr_engine.process()
|
12
|
+
# Display the recognized text
|
13
|
+
puts "Text: " + ocr_engine.getText().to_string
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module MultipageTiff
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'multipage.tiff'))
|
11
|
+
|
12
|
+
# Set OcrEngine.ProcessAllPages to true in order to process all pages of TIFF in single run
|
13
|
+
ocr_engine.setProcessAllPages(true)
|
14
|
+
|
15
|
+
# Process the image
|
16
|
+
if ocr_engine.process()
|
17
|
+
# Retrieve the list of Pages
|
18
|
+
pages = ocr_engine.getPages()
|
19
|
+
pages.each do |page|
|
20
|
+
puts "Text: " + page.getPageText().to_string
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module ReadPartInformationOfText
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
|
11
|
+
|
12
|
+
# Process the image
|
13
|
+
if ocr_engine.process()
|
14
|
+
# Display the recognized text
|
15
|
+
puts "Text: " + ocr_engine.getText().to_string
|
16
|
+
|
17
|
+
# Retrieve an array of recognized text by parts
|
18
|
+
text = ocr_engine.getText().getPartsInfo()
|
19
|
+
# Iterate over the text parts
|
20
|
+
i = 0
|
21
|
+
while i < text.length
|
22
|
+
symbol = text[i]
|
23
|
+
# Display part information
|
24
|
+
puts "isItalic : " + symbol.getItalic().to_s
|
25
|
+
puts "isUnderline : " + symbol.getUnderline().to_s
|
26
|
+
puts "isBold : " + symbol.getBold().to_s
|
27
|
+
puts "Text Color : " + symbol.getTextColor().to_string
|
28
|
+
puts "Quality : " + symbol.getCharactersQuality().to_s
|
29
|
+
i +=1
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module RecognizeText
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
|
11
|
+
|
12
|
+
# Process the image
|
13
|
+
if ocr_engine.process()
|
14
|
+
# Display the recognized text
|
15
|
+
puts "Text: " + ocr_engine.getText().to_string
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module RecognizeWhiteListedCharacters
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Retrieve the OcrConfig of the OcrEngine object
|
10
|
+
ocr_config = ocr_engine.getConfig()
|
11
|
+
|
12
|
+
# Set the Whitelist property to recognize numbers only
|
13
|
+
#ocr_config.setWhitelist(['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'])
|
14
|
+
|
15
|
+
# Set the Image property by loading the image from file path location
|
16
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
|
17
|
+
|
18
|
+
# Set the RemoveNonText to true
|
19
|
+
ocr_engine.getConfig().setRemoveNonText(true)
|
20
|
+
|
21
|
+
# Process the image
|
22
|
+
if ocr_engine.process()
|
23
|
+
text = ocr_engine.getText()
|
24
|
+
puts "Text: " + text.to_string
|
25
|
+
|
26
|
+
expression = "(\\d+)"
|
27
|
+
# Create a Pattern object
|
28
|
+
pattern = Rjb::import('java.util.regex.Pattern').compile(expression)
|
29
|
+
|
30
|
+
# Now create matcher object
|
31
|
+
matcher = pattern.matcher(text.toString())
|
32
|
+
if matcher.find()
|
33
|
+
puts "Found value: " + matcher.group(0).to_string
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module WorkWithDifferentLanguages
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize an instance of OcrEngine
|
7
|
+
ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
|
8
|
+
|
9
|
+
# Set the Image property by loading the image from file path location
|
10
|
+
ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'spanish.png'))
|
11
|
+
|
12
|
+
# Clear the default language (English)
|
13
|
+
ocr_engine.getLanguageContainer().clear()
|
14
|
+
|
15
|
+
# Load the resources of the language from file path location or an instance of InputStream
|
16
|
+
ocr_engine.getLanguageContainer().addLanguage(Rjb::import('com.aspose.ocr.LanguageFactory').load(data_dir + "SpanishLanguageResources.zip"))
|
17
|
+
|
18
|
+
# Process the image
|
19
|
+
if ocr_engine.process()
|
20
|
+
# Display the recognized text
|
21
|
+
puts "Text: " + ocr_engine.getText().to_string
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module DetectImageResolution
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize a string with template file location
|
7
|
+
template_file = data_dir + "template.amr"
|
8
|
+
|
9
|
+
# Initialize a string with scanned image file location
|
10
|
+
image_file = data_dir + "sample_omr.jpg"
|
11
|
+
|
12
|
+
# Create an instance of OmrTemplate class and load the template using the factory method Load
|
13
|
+
template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
|
14
|
+
|
15
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
16
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
17
|
+
|
18
|
+
# Set the auto image resolution detection property
|
19
|
+
image.setAutoDetectResolution(true)
|
20
|
+
|
21
|
+
# Create an instance of OmrEngine class
|
22
|
+
engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
|
23
|
+
|
24
|
+
# Extract the OMR data
|
25
|
+
result = engine.extractData(Array[image])
|
26
|
+
|
27
|
+
# Get page data of extracted data
|
28
|
+
pages = result.getPageData()
|
29
|
+
|
30
|
+
index = 0
|
31
|
+
# Iterate over the pages and display the data
|
32
|
+
pages.each do |page|
|
33
|
+
index +=1
|
34
|
+
puts "------Page: " + index.to_s
|
35
|
+
keys = page.keySet()
|
36
|
+
keys = keys.to_string
|
37
|
+
keys = keys[1, keys.length - 2]
|
38
|
+
|
39
|
+
keys = keys.split(",")
|
40
|
+
keys.each do |key|
|
41
|
+
value = page.get(key)
|
42
|
+
puts "key: " + key.to_s
|
43
|
+
puts "value: " + value.to_s
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module ExtractOmrDataFromImage
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize a string with template file location
|
7
|
+
template_file = data_dir + "template.amr"
|
8
|
+
|
9
|
+
# Initialize a string with scanned image file location
|
10
|
+
image_file = data_dir + "sample_omr.jpg"
|
11
|
+
|
12
|
+
# Create an instance of OmrTemplate class and load the template using the factory method Load
|
13
|
+
template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
|
14
|
+
|
15
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
16
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
17
|
+
|
18
|
+
# Create an instance of OmrEngine class
|
19
|
+
engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
|
20
|
+
|
21
|
+
# Extract the OMR data
|
22
|
+
result = engine.extractData(Array[image])
|
23
|
+
|
24
|
+
# Get page data of extracted data
|
25
|
+
pages = result.getPageData()
|
26
|
+
|
27
|
+
index = 0
|
28
|
+
# Iterate over the pages and display the data
|
29
|
+
pages.each do |page|
|
30
|
+
index +=1
|
31
|
+
puts "------Page: " + index.to_s
|
32
|
+
keys = page.keySet()
|
33
|
+
keys = keys.to_string
|
34
|
+
keys = keys[1, keys.length - 2]
|
35
|
+
|
36
|
+
keys = keys.split(",")
|
37
|
+
keys.each do |key|
|
38
|
+
value = page.get(key)
|
39
|
+
puts "key: " + key.to_s
|
40
|
+
puts "value: " + value.to_s
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module SetImageResolution
|
3
|
+
def initialize()
|
4
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
5
|
+
|
6
|
+
# Initialize a string with template file location
|
7
|
+
image_file = data_dir + "sample_omr.jpg"
|
8
|
+
|
9
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
10
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
11
|
+
|
12
|
+
# Define new value of image resolution in double format
|
13
|
+
image.setResolution(210.0) # overwrites the old DPI value
|
14
|
+
|
15
|
+
# Do processing
|
16
|
+
|
17
|
+
puts "Set image resolution."
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Asposeocrjava
|
2
|
+
module SetMarkThreshold
|
3
|
+
def initialize()
|
4
|
+
# Setting Global Threshold
|
5
|
+
set_global_threshold()
|
6
|
+
|
7
|
+
# Setting Page Level Threshold
|
8
|
+
set_page_level_threshold()
|
9
|
+
end
|
10
|
+
|
11
|
+
def set_global_threshold()
|
12
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
13
|
+
|
14
|
+
# Initialize a string with template file location
|
15
|
+
template_file = data_dir + "template.amr"
|
16
|
+
|
17
|
+
# Initialize a string with scanned image file location
|
18
|
+
image_file = data_dir + "sample_omr.jpg"
|
19
|
+
|
20
|
+
# Create an instance of OmrTemplate class and load the template using the factory method Load
|
21
|
+
template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
|
22
|
+
|
23
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
24
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
25
|
+
|
26
|
+
# Create an instance of OmrEngine class
|
27
|
+
engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
|
28
|
+
|
29
|
+
# Get the configurations of OmrEngine
|
30
|
+
config = engine.getConfiguration()
|
31
|
+
|
32
|
+
# Set fill threshold
|
33
|
+
config.setFillThreshold(0.12)
|
34
|
+
|
35
|
+
# Extract the OMR data
|
36
|
+
result = engine.extractData(Array[image])
|
37
|
+
|
38
|
+
puts "Set global threshold."
|
39
|
+
end
|
40
|
+
|
41
|
+
def set_page_level_threshold()
|
42
|
+
data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
|
43
|
+
|
44
|
+
# Initialize a string with template file location
|
45
|
+
template_file = data_dir + "template.amr"
|
46
|
+
|
47
|
+
# Initialize a string with scanned image file location
|
48
|
+
image_file = data_dir + "sample_omr.jpg"
|
49
|
+
|
50
|
+
# Create an instance of OmrTemplate class and load the template using the factory method Load
|
51
|
+
template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
|
52
|
+
|
53
|
+
# Create an instance of OmrImage class and load the template using the factory method Load
|
54
|
+
image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
|
55
|
+
|
56
|
+
# Get the first page of the template
|
57
|
+
page = template.getPages().getItem(0)
|
58
|
+
|
59
|
+
# Create page configurations
|
60
|
+
page.setConfiguration(Rjb::import('com.aspose.omr.OmrConfig').new)
|
61
|
+
|
62
|
+
# Set fill threshold
|
63
|
+
page.getConfiguration().setFillThreshold(0.21)
|
64
|
+
|
65
|
+
# Create an instance of OmrEngine class
|
66
|
+
engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
|
67
|
+
|
68
|
+
# Extract the OMR data
|
69
|
+
result = engine.extractData(Array[image])
|
70
|
+
|
71
|
+
puts "Set page level threshold."
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require_relative 'OCR/recognizetext'
|
2
|
+
require_relative 'OCR/readpartinformationoftext'
|
3
|
+
require_relative 'OCR/extracttextfrompartofimage'
|
4
|
+
require_relative 'OCR/loadimagefromurl'
|
5
|
+
require_relative 'OCR/multipagetiff'
|
6
|
+
require_relative 'OCR/gettextparthierarchyoftext'
|
7
|
+
require_relative 'OCR/applycorrectionfilters'
|
8
|
+
require_relative 'OCR/correctspellings'
|
9
|
+
require_relative 'OCR/detectreading'
|
10
|
+
require_relative 'OCR/detecttextblocks'
|
11
|
+
require_relative 'OCR/ignorenontextualblocks'
|
12
|
+
require_relative 'OCR/recognizewhitelistedcharacters'
|
13
|
+
require_relative 'OCR/workwithdifferentlanguages'
|
data/samples/ocr.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
=begin
|
2
|
+
Please uncomment the code which you want to execute.
|
3
|
+
=end
|
4
|
+
|
5
|
+
require '../lib/asposeocrjava'
|
6
|
+
include Asposeocrjava
|
7
|
+
#include Asposeocrjava::RecognizeText
|
8
|
+
#include Asposeocrjava::ReadPartInformationOfText
|
9
|
+
#include Asposeocrjava::ExtractTextFromPartOfImage
|
10
|
+
#include Asposeocrjava::LoadImageFromUrl
|
11
|
+
#include Asposeocrjava::MultipageTiff
|
12
|
+
#include Asposeocrjava::GetTextPartHierarchyOfText
|
13
|
+
#include Asposeocrjava::ApplyCorrectionFilters
|
14
|
+
#include Asposeocrjava::CorrectSpellings
|
15
|
+
#include Asposeocrjava::DetectReading
|
16
|
+
#include Asposeocrjava::DetectTextBlocks
|
17
|
+
#include Asposeocrjava::IgnoreNonTextualBlocks
|
18
|
+
#include Asposeocrjava::RecognizeWhiteListedCharacters
|
19
|
+
#include Asposeocrjava::WorkWithDifferentLanguages
|
20
|
+
|
21
|
+
initialize_aspose_ocr
|
data/samples/omr.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
=begin
|
2
|
+
Please uncomment the code which you want to execute.
|
3
|
+
=end
|
4
|
+
|
5
|
+
require '../lib/asposeocrjava'
|
6
|
+
include Asposeocrjava
|
7
|
+
#include Asposeocrjava::ExtractOmrDataFromImage
|
8
|
+
#include Asposeocrjava::SetMarkThreshold
|
9
|
+
#include Asposeocrjava::SetImageResolution
|
10
|
+
#include Asposeocrjava::DetectImageResolution
|
11
|
+
|
12
|
+
initialize_aspose_ocr
|