asposeocrjava 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE +21 -0
  4. data/README.md +33 -0
  5. data/Rakefile +2 -0
  6. data/asposeocrjava.gemspec +27 -0
  7. data/config/aspose.yml +5 -0
  8. data/data/SpanishLanguageResources.zip +0 -0
  9. data/data/multipage.tiff +0 -0
  10. data/data/ocr.png +0 -0
  11. data/data/sample_omr.jpg +0 -0
  12. data/data/spanish.png +0 -0
  13. data/data/template.amr +76 -0
  14. data/lib/asposeocrjava.rb +71 -0
  15. data/lib/asposeocrjava/OCR/applycorrectionfilters.rb +37 -0
  16. data/lib/asposeocrjava/OCR/correctspellings.rb +22 -0
  17. data/lib/asposeocrjava/OCR/detectreading.rb +22 -0
  18. data/lib/asposeocrjava/OCR/detecttextblocks.rb +22 -0
  19. data/lib/asposeocrjava/OCR/extracttextfrompartofimage.rb +40 -0
  20. data/lib/asposeocrjava/OCR/gettextparthierarchyoftext.rb +40 -0
  21. data/lib/asposeocrjava/OCR/ignorenontextualblocks.rb +22 -0
  22. data/lib/asposeocrjava/OCR/loadimagefromurl.rb +17 -0
  23. data/lib/asposeocrjava/OCR/multipagetiff.rb +25 -0
  24. data/lib/asposeocrjava/OCR/readpartinformationoftext.rb +34 -0
  25. data/lib/asposeocrjava/OCR/recognizetext.rb +19 -0
  26. data/lib/asposeocrjava/OCR/recognizewhitelistedcharacters.rb +38 -0
  27. data/lib/asposeocrjava/OCR/workwithdifferentlanguages.rb +25 -0
  28. data/lib/asposeocrjava/OMR/detectimageresolution.rb +48 -0
  29. data/lib/asposeocrjava/OMR/extractomrdatafromimage.rb +45 -0
  30. data/lib/asposeocrjava/OMR/setimageresolution.rb +20 -0
  31. data/lib/asposeocrjava/OMR/setmarkthreshold.rb +74 -0
  32. data/lib/asposeocrjava/asposeocr.rb +2 -0
  33. data/lib/asposeocrjava/ocr.rb +13 -0
  34. data/lib/asposeocrjava/omr.rb +4 -0
  35. data/lib/asposeocrjava/version.rb +3 -0
  36. data/samples/ocr.rb +21 -0
  37. data/samples/omr.rb +12 -0
  38. metadata +137 -0
@@ -0,0 +1,22 @@
1
+ module Asposeocrjava
2
+ module IgnoreNonTextualBlocks
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
11
+
12
+ # Set the RemoveNonText to true
13
+ ocr_engine.getConfig().setRemoveNonText(true)
14
+
15
+ # Process the image
16
+ if ocr_engine.process()
17
+ # Display the result
18
+ puts "Text: " + ocr_engine.getText().to_string
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ module Asposeocrjava
2
+ module LoadImageFromUrl
3
+ def initialize()
4
+ # Initialize an instance of OcrEngine
5
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
6
+
7
+ # Set the Image property by loading the image from remote location
8
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromUrl("http://cdn.aspose.com/tmp/ocr-sample.bmp"))
9
+
10
+ # Process the image
11
+ if ocr_engine.process()
12
+ # Display the recognized text
13
+ puts "Text: " + ocr_engine.getText().to_string
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,25 @@
1
+ module Asposeocrjava
2
+ module MultipageTiff
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'multipage.tiff'))
11
+
12
+ # Set OcrEngine.ProcessAllPages to true in order to process all pages of TIFF in single run
13
+ ocr_engine.setProcessAllPages(true)
14
+
15
+ # Process the image
16
+ if ocr_engine.process()
17
+ # Retrieve the list of Pages
18
+ pages = ocr_engine.getPages()
19
+ pages.each do |page|
20
+ puts "Text: " + page.getPageText().to_string
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,34 @@
1
+ module Asposeocrjava
2
+ module ReadPartInformationOfText
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
11
+
12
+ # Process the image
13
+ if ocr_engine.process()
14
+ # Display the recognized text
15
+ puts "Text: " + ocr_engine.getText().to_string
16
+
17
+ # Retrieve an array of recognized text by parts
18
+ text = ocr_engine.getText().getPartsInfo()
19
+ # Iterate over the text parts
20
+ i = 0
21
+ while i < text.length
22
+ symbol = text[i]
23
+ # Display part information
24
+ puts "isItalic : " + symbol.getItalic().to_s
25
+ puts "isUnderline : " + symbol.getUnderline().to_s
26
+ puts "isBold : " + symbol.getBold().to_s
27
+ puts "Text Color : " + symbol.getTextColor().to_string
28
+ puts "Quality : " + symbol.getCharactersQuality().to_s
29
+ i +=1
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,19 @@
1
+ module Asposeocrjava
2
+ module RecognizeText
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
11
+
12
+ # Process the image
13
+ if ocr_engine.process()
14
+ # Display the recognized text
15
+ puts "Text: " + ocr_engine.getText().to_string
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,38 @@
1
+ module Asposeocrjava
2
+ module RecognizeWhiteListedCharacters
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Retrieve the OcrConfig of the OcrEngine object
10
+ ocr_config = ocr_engine.getConfig()
11
+
12
+ # Set the Whitelist property to recognize numbers only
13
+ #ocr_config.setWhitelist(['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'])
14
+
15
+ # Set the Image property by loading the image from file path location
16
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
17
+
18
+ # Set the RemoveNonText to true
19
+ ocr_engine.getConfig().setRemoveNonText(true)
20
+
21
+ # Process the image
22
+ if ocr_engine.process()
23
+ text = ocr_engine.getText()
24
+ puts "Text: " + text.to_string
25
+
26
+ expression = "(\\d+)"
27
+ # Create a Pattern object
28
+ pattern = Rjb::import('java.util.regex.Pattern').compile(expression)
29
+
30
+ # Now create matcher object
31
+ matcher = pattern.matcher(text.toString())
32
+ if matcher.find()
33
+ puts "Found value: " + matcher.group(0).to_string
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,25 @@
1
+ module Asposeocrjava
2
+ module WorkWithDifferentLanguages
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'spanish.png'))
11
+
12
+ # Clear the default language (English)
13
+ ocr_engine.getLanguageContainer().clear()
14
+
15
+ # Load the resources of the language from file path location or an instance of InputStream
16
+ ocr_engine.getLanguageContainer().addLanguage(Rjb::import('com.aspose.ocr.LanguageFactory').load(data_dir + "SpanishLanguageResources.zip"))
17
+
18
+ # Process the image
19
+ if ocr_engine.process()
20
+ # Display the recognized text
21
+ puts "Text: " + ocr_engine.getText().to_string
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,48 @@
1
+ module Asposeocrjava
2
+ module DetectImageResolution
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize a string with template file location
7
+ template_file = data_dir + "template.amr"
8
+
9
+ # Initialize a string with scanned image file location
10
+ image_file = data_dir + "sample_omr.jpg"
11
+
12
+ # Create an instance of OmrTemplate class and load the template using the factory method Load
13
+ template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
14
+
15
+ # Create an instance of OmrImage class and load the template using the factory method Load
16
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
17
+
18
+ # Set the auto image resolution detection property
19
+ image.setAutoDetectResolution(true)
20
+
21
+ # Create an instance of OmrEngine class
22
+ engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
23
+
24
+ # Extract the OMR data
25
+ result = engine.extractData(Array[image])
26
+
27
+ # Get page data of extracted data
28
+ pages = result.getPageData()
29
+
30
+ index = 0
31
+ # Iterate over the pages and display the data
32
+ pages.each do |page|
33
+ index +=1
34
+ puts "------Page: " + index.to_s
35
+ keys = page.keySet()
36
+ keys = keys.to_string
37
+ keys = keys[1, keys.length - 2]
38
+
39
+ keys = keys.split(",")
40
+ keys.each do |key|
41
+ value = page.get(key)
42
+ puts "key: " + key.to_s
43
+ puts "value: " + value.to_s
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,45 @@
1
+ module Asposeocrjava
2
+ module ExtractOmrDataFromImage
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize a string with template file location
7
+ template_file = data_dir + "template.amr"
8
+
9
+ # Initialize a string with scanned image file location
10
+ image_file = data_dir + "sample_omr.jpg"
11
+
12
+ # Create an instance of OmrTemplate class and load the template using the factory method Load
13
+ template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
14
+
15
+ # Create an instance of OmrImage class and load the template using the factory method Load
16
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
17
+
18
+ # Create an instance of OmrEngine class
19
+ engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
20
+
21
+ # Extract the OMR data
22
+ result = engine.extractData(Array[image])
23
+
24
+ # Get page data of extracted data
25
+ pages = result.getPageData()
26
+
27
+ index = 0
28
+ # Iterate over the pages and display the data
29
+ pages.each do |page|
30
+ index +=1
31
+ puts "------Page: " + index.to_s
32
+ keys = page.keySet()
33
+ keys = keys.to_string
34
+ keys = keys[1, keys.length - 2]
35
+
36
+ keys = keys.split(",")
37
+ keys.each do |key|
38
+ value = page.get(key)
39
+ puts "key: " + key.to_s
40
+ puts "value: " + value.to_s
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,20 @@
1
+ module Asposeocrjava
2
+ module SetImageResolution
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize a string with template file location
7
+ image_file = data_dir + "sample_omr.jpg"
8
+
9
+ # Create an instance of OmrImage class and load the template using the factory method Load
10
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
11
+
12
+ # Define new value of image resolution in double format
13
+ image.setResolution(210.0) # overwrites the old DPI value
14
+
15
+ # Do processing
16
+
17
+ puts "Set image resolution."
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,74 @@
1
+ module Asposeocrjava
2
+ module SetMarkThreshold
3
+ def initialize()
4
+ # Setting Global Threshold
5
+ set_global_threshold()
6
+
7
+ # Setting Page Level Threshold
8
+ set_page_level_threshold()
9
+ end
10
+
11
+ def set_global_threshold()
12
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
13
+
14
+ # Initialize a string with template file location
15
+ template_file = data_dir + "template.amr"
16
+
17
+ # Initialize a string with scanned image file location
18
+ image_file = data_dir + "sample_omr.jpg"
19
+
20
+ # Create an instance of OmrTemplate class and load the template using the factory method Load
21
+ template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
22
+
23
+ # Create an instance of OmrImage class and load the template using the factory method Load
24
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
25
+
26
+ # Create an instance of OmrEngine class
27
+ engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
28
+
29
+ # Get the configurations of OmrEngine
30
+ config = engine.getConfiguration()
31
+
32
+ # Set fill threshold
33
+ config.setFillThreshold(0.12)
34
+
35
+ # Extract the OMR data
36
+ result = engine.extractData(Array[image])
37
+
38
+ puts "Set global threshold."
39
+ end
40
+
41
+ def set_page_level_threshold()
42
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
43
+
44
+ # Initialize a string with template file location
45
+ template_file = data_dir + "template.amr"
46
+
47
+ # Initialize a string with scanned image file location
48
+ image_file = data_dir + "sample_omr.jpg"
49
+
50
+ # Create an instance of OmrTemplate class and load the template using the factory method Load
51
+ template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
52
+
53
+ # Create an instance of OmrImage class and load the template using the factory method Load
54
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
55
+
56
+ # Get the first page of the template
57
+ page = template.getPages().getItem(0)
58
+
59
+ # Create page configurations
60
+ page.setConfiguration(Rjb::import('com.aspose.omr.OmrConfig').new)
61
+
62
+ # Set fill threshold
63
+ page.getConfiguration().setFillThreshold(0.21)
64
+
65
+ # Create an instance of OmrEngine class
66
+ engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
67
+
68
+ # Extract the OMR data
69
+ result = engine.extractData(Array[image])
70
+
71
+ puts "Set page level threshold."
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,2 @@
1
+ require_relative 'ocr'
2
+ require_relative 'omr'
@@ -0,0 +1,13 @@
1
+ require_relative 'OCR/recognizetext'
2
+ require_relative 'OCR/readpartinformationoftext'
3
+ require_relative 'OCR/extracttextfrompartofimage'
4
+ require_relative 'OCR/loadimagefromurl'
5
+ require_relative 'OCR/multipagetiff'
6
+ require_relative 'OCR/gettextparthierarchyoftext'
7
+ require_relative 'OCR/applycorrectionfilters'
8
+ require_relative 'OCR/correctspellings'
9
+ require_relative 'OCR/detectreading'
10
+ require_relative 'OCR/detecttextblocks'
11
+ require_relative 'OCR/ignorenontextualblocks'
12
+ require_relative 'OCR/recognizewhitelistedcharacters'
13
+ require_relative 'OCR/workwithdifferentlanguages'
@@ -0,0 +1,4 @@
1
+ require_relative 'OMR/extractomrdatafromimage'
2
+ require_relative 'OMR/setmarkthreshold'
3
+ require_relative 'OMR/setimageresolution'
4
+ require_relative 'OMR/detectimageresolution'
@@ -0,0 +1,3 @@
1
+ module Asposeocrjava
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,21 @@
1
+ =begin
2
+ Please uncomment the code which you want to execute.
3
+ =end
4
+
5
+ require '../lib/asposeocrjava'
6
+ include Asposeocrjava
7
+ #include Asposeocrjava::RecognizeText
8
+ #include Asposeocrjava::ReadPartInformationOfText
9
+ #include Asposeocrjava::ExtractTextFromPartOfImage
10
+ #include Asposeocrjava::LoadImageFromUrl
11
+ #include Asposeocrjava::MultipageTiff
12
+ #include Asposeocrjava::GetTextPartHierarchyOfText
13
+ #include Asposeocrjava::ApplyCorrectionFilters
14
+ #include Asposeocrjava::CorrectSpellings
15
+ #include Asposeocrjava::DetectReading
16
+ #include Asposeocrjava::DetectTextBlocks
17
+ #include Asposeocrjava::IgnoreNonTextualBlocks
18
+ #include Asposeocrjava::RecognizeWhiteListedCharacters
19
+ #include Asposeocrjava::WorkWithDifferentLanguages
20
+
21
+ initialize_aspose_ocr
@@ -0,0 +1,12 @@
1
+ =begin
2
+ Please uncomment the code which you want to execute.
3
+ =end
4
+
5
+ require '../lib/asposeocrjava'
6
+ include Asposeocrjava
7
+ #include Asposeocrjava::ExtractOmrDataFromImage
8
+ #include Asposeocrjava::SetMarkThreshold
9
+ #include Asposeocrjava::SetImageResolution
10
+ #include Asposeocrjava::DetectImageResolution
11
+
12
+ initialize_aspose_ocr