asposeocrjava 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +3 -0
  3. data/LICENSE +21 -0
  4. data/README.md +33 -0
  5. data/Rakefile +2 -0
  6. data/asposeocrjava.gemspec +27 -0
  7. data/config/aspose.yml +5 -0
  8. data/data/SpanishLanguageResources.zip +0 -0
  9. data/data/multipage.tiff +0 -0
  10. data/data/ocr.png +0 -0
  11. data/data/sample_omr.jpg +0 -0
  12. data/data/spanish.png +0 -0
  13. data/data/template.amr +76 -0
  14. data/lib/asposeocrjava.rb +71 -0
  15. data/lib/asposeocrjava/OCR/applycorrectionfilters.rb +37 -0
  16. data/lib/asposeocrjava/OCR/correctspellings.rb +22 -0
  17. data/lib/asposeocrjava/OCR/detectreading.rb +22 -0
  18. data/lib/asposeocrjava/OCR/detecttextblocks.rb +22 -0
  19. data/lib/asposeocrjava/OCR/extracttextfrompartofimage.rb +40 -0
  20. data/lib/asposeocrjava/OCR/gettextparthierarchyoftext.rb +40 -0
  21. data/lib/asposeocrjava/OCR/ignorenontextualblocks.rb +22 -0
  22. data/lib/asposeocrjava/OCR/loadimagefromurl.rb +17 -0
  23. data/lib/asposeocrjava/OCR/multipagetiff.rb +25 -0
  24. data/lib/asposeocrjava/OCR/readpartinformationoftext.rb +34 -0
  25. data/lib/asposeocrjava/OCR/recognizetext.rb +19 -0
  26. data/lib/asposeocrjava/OCR/recognizewhitelistedcharacters.rb +38 -0
  27. data/lib/asposeocrjava/OCR/workwithdifferentlanguages.rb +25 -0
  28. data/lib/asposeocrjava/OMR/detectimageresolution.rb +48 -0
  29. data/lib/asposeocrjava/OMR/extractomrdatafromimage.rb +45 -0
  30. data/lib/asposeocrjava/OMR/setimageresolution.rb +20 -0
  31. data/lib/asposeocrjava/OMR/setmarkthreshold.rb +74 -0
  32. data/lib/asposeocrjava/asposeocr.rb +2 -0
  33. data/lib/asposeocrjava/ocr.rb +13 -0
  34. data/lib/asposeocrjava/omr.rb +4 -0
  35. data/lib/asposeocrjava/version.rb +3 -0
  36. data/samples/ocr.rb +21 -0
  37. data/samples/omr.rb +12 -0
  38. metadata +137 -0
@@ -0,0 +1,22 @@
1
+ module Asposeocrjava
2
+ module IgnoreNonTextualBlocks
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
11
+
12
+ # Set the RemoveNonText to true
13
+ ocr_engine.getConfig().setRemoveNonText(true)
14
+
15
+ # Process the image
16
+ if ocr_engine.process()
17
+ # Display the result
18
+ puts "Text: " + ocr_engine.getText().to_string
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ module Asposeocrjava
2
+ module LoadImageFromUrl
3
+ def initialize()
4
+ # Initialize an instance of OcrEngine
5
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
6
+
7
+ # Set the Image property by loading the image from remote location
8
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromUrl("http://cdn.aspose.com/tmp/ocr-sample.bmp"))
9
+
10
+ # Process the image
11
+ if ocr_engine.process()
12
+ # Display the recognized text
13
+ puts "Text: " + ocr_engine.getText().to_string
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,25 @@
1
+ module Asposeocrjava
2
+ module MultipageTiff
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'multipage.tiff'))
11
+
12
+ # Set OcrEngine.ProcessAllPages to true in order to process all pages of TIFF in single run
13
+ ocr_engine.setProcessAllPages(true)
14
+
15
+ # Process the image
16
+ if ocr_engine.process()
17
+ # Retrieve the list of Pages
18
+ pages = ocr_engine.getPages()
19
+ pages.each do |page|
20
+ puts "Text: " + page.getPageText().to_string
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,34 @@
1
+ module Asposeocrjava
2
+ module ReadPartInformationOfText
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
11
+
12
+ # Process the image
13
+ if ocr_engine.process()
14
+ # Display the recognized text
15
+ puts "Text: " + ocr_engine.getText().to_string
16
+
17
+ # Retrieve an array of recognized text by parts
18
+ text = ocr_engine.getText().getPartsInfo()
19
+ # Iterate over the text parts
20
+ i = 0
21
+ while i < text.length
22
+ symbol = text[i]
23
+ # Display part information
24
+ puts "isItalic : " + symbol.getItalic().to_s
25
+ puts "isUnderline : " + symbol.getUnderline().to_s
26
+ puts "isBold : " + symbol.getBold().to_s
27
+ puts "Text Color : " + symbol.getTextColor().to_string
28
+ puts "Quality : " + symbol.getCharactersQuality().to_s
29
+ i +=1
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,19 @@
1
+ module Asposeocrjava
2
+ module RecognizeText
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
11
+
12
+ # Process the image
13
+ if ocr_engine.process()
14
+ # Display the recognized text
15
+ puts "Text: " + ocr_engine.getText().to_string
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,38 @@
1
+ module Asposeocrjava
2
+ module RecognizeWhiteListedCharacters
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Retrieve the OcrConfig of the OcrEngine object
10
+ ocr_config = ocr_engine.getConfig()
11
+
12
+ # Set the Whitelist property to recognize numbers only
13
+ #ocr_config.setWhitelist(['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'])
14
+
15
+ # Set the Image property by loading the image from file path location
16
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'ocr.png'))
17
+
18
+ # Set the RemoveNonText to true
19
+ ocr_engine.getConfig().setRemoveNonText(true)
20
+
21
+ # Process the image
22
+ if ocr_engine.process()
23
+ text = ocr_engine.getText()
24
+ puts "Text: " + text.to_string
25
+
26
+ expression = "(\\d+)"
27
+ # Create a Pattern object
28
+ pattern = Rjb::import('java.util.regex.Pattern').compile(expression)
29
+
30
+ # Now create matcher object
31
+ matcher = pattern.matcher(text.toString())
32
+ if matcher.find()
33
+ puts "Found value: " + matcher.group(0).to_string
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,25 @@
1
+ module Asposeocrjava
2
+ module WorkWithDifferentLanguages
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize an instance of OcrEngine
7
+ ocr_engine = Rjb::import('com.aspose.ocr.OcrEngine').new
8
+
9
+ # Set the Image property by loading the image from file path location
10
+ ocr_engine.setImage(Rjb::import('com.aspose.ocr.ImageStream').fromFile(data_dir + 'spanish.png'))
11
+
12
+ # Clear the default language (English)
13
+ ocr_engine.getLanguageContainer().clear()
14
+
15
+ # Load the resources of the language from file path location or an instance of InputStream
16
+ ocr_engine.getLanguageContainer().addLanguage(Rjb::import('com.aspose.ocr.LanguageFactory').load(data_dir + "SpanishLanguageResources.zip"))
17
+
18
+ # Process the image
19
+ if ocr_engine.process()
20
+ # Display the recognized text
21
+ puts "Text: " + ocr_engine.getText().to_string
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,48 @@
1
+ module Asposeocrjava
2
+ module DetectImageResolution
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize a string with template file location
7
+ template_file = data_dir + "template.amr"
8
+
9
+ # Initialize a string with scanned image file location
10
+ image_file = data_dir + "sample_omr.jpg"
11
+
12
+ # Create an instance of OmrTemplate class and load the template using the factory method Load
13
+ template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
14
+
15
+ # Create an instance of OmrImage class and load the template using the factory method Load
16
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
17
+
18
+ # Set the auto image resolution detection property
19
+ image.setAutoDetectResolution(true)
20
+
21
+ # Create an instance of OmrEngine class
22
+ engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
23
+
24
+ # Extract the OMR data
25
+ result = engine.extractData(Array[image])
26
+
27
+ # Get page data of extracted data
28
+ pages = result.getPageData()
29
+
30
+ index = 0
31
+ # Iterate over the pages and display the data
32
+ pages.each do |page|
33
+ index +=1
34
+ puts "------Page: " + index.to_s
35
+ keys = page.keySet()
36
+ keys = keys.to_string
37
+ keys = keys[1, keys.length - 2]
38
+
39
+ keys = keys.split(",")
40
+ keys.each do |key|
41
+ value = page.get(key)
42
+ puts "key: " + key.to_s
43
+ puts "value: " + value.to_s
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,45 @@
1
+ module Asposeocrjava
2
+ module ExtractOmrDataFromImage
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize a string with template file location
7
+ template_file = data_dir + "template.amr"
8
+
9
+ # Initialize a string with scanned image file location
10
+ image_file = data_dir + "sample_omr.jpg"
11
+
12
+ # Create an instance of OmrTemplate class and load the template using the factory method Load
13
+ template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
14
+
15
+ # Create an instance of OmrImage class and load the template using the factory method Load
16
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
17
+
18
+ # Create an instance of OmrEngine class
19
+ engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
20
+
21
+ # Extract the OMR data
22
+ result = engine.extractData(Array[image])
23
+
24
+ # Get page data of extracted data
25
+ pages = result.getPageData()
26
+
27
+ index = 0
28
+ # Iterate over the pages and display the data
29
+ pages.each do |page|
30
+ index +=1
31
+ puts "------Page: " + index.to_s
32
+ keys = page.keySet()
33
+ keys = keys.to_string
34
+ keys = keys[1, keys.length - 2]
35
+
36
+ keys = keys.split(",")
37
+ keys.each do |key|
38
+ value = page.get(key)
39
+ puts "key: " + key.to_s
40
+ puts "value: " + value.to_s
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,20 @@
1
+ module Asposeocrjava
2
+ module SetImageResolution
3
+ def initialize()
4
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
5
+
6
+ # Initialize a string with template file location
7
+ image_file = data_dir + "sample_omr.jpg"
8
+
9
+ # Create an instance of OmrImage class and load the template using the factory method Load
10
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
11
+
12
+ # Define new value of image resolution in double format
13
+ image.setResolution(210.0) # overwrites the old DPI value
14
+
15
+ # Do processing
16
+
17
+ puts "Set image resolution."
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,74 @@
1
+ module Asposeocrjava
2
+ module SetMarkThreshold
3
+ def initialize()
4
+ # Setting Global Threshold
5
+ set_global_threshold()
6
+
7
+ # Setting Page Level Threshold
8
+ set_page_level_threshold()
9
+ end
10
+
11
+ def set_global_threshold()
12
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
13
+
14
+ # Initialize a string with template file location
15
+ template_file = data_dir + "template.amr"
16
+
17
+ # Initialize a string with scanned image file location
18
+ image_file = data_dir + "sample_omr.jpg"
19
+
20
+ # Create an instance of OmrTemplate class and load the template using the factory method Load
21
+ template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
22
+
23
+ # Create an instance of OmrImage class and load the template using the factory method Load
24
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
25
+
26
+ # Create an instance of OmrEngine class
27
+ engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
28
+
29
+ # Get the configurations of OmrEngine
30
+ config = engine.getConfiguration()
31
+
32
+ # Set fill threshold
33
+ config.setFillThreshold(0.12)
34
+
35
+ # Extract the OMR data
36
+ result = engine.extractData(Array[image])
37
+
38
+ puts "Set global threshold."
39
+ end
40
+
41
+ def set_page_level_threshold()
42
+ data_dir = File.dirname(File.dirname(File.dirname(File.dirname(__FILE__)))) + '/data/'
43
+
44
+ # Initialize a string with template file location
45
+ template_file = data_dir + "template.amr"
46
+
47
+ # Initialize a string with scanned image file location
48
+ image_file = data_dir + "sample_omr.jpg"
49
+
50
+ # Create an instance of OmrTemplate class and load the template using the factory method Load
51
+ template = Rjb::import('com.aspose.omr.OmrTemplate').load(template_file)
52
+
53
+ # Create an instance of OmrImage class and load the template using the factory method Load
54
+ image = Rjb::import('com.aspose.omr.OmrImage').load(image_file)
55
+
56
+ # Get the first page of the template
57
+ page = template.getPages().getItem(0)
58
+
59
+ # Create page configurations
60
+ page.setConfiguration(Rjb::import('com.aspose.omr.OmrConfig').new)
61
+
62
+ # Set fill threshold
63
+ page.getConfiguration().setFillThreshold(0.21)
64
+
65
+ # Create an instance of OmrEngine class
66
+ engine = Rjb::import('com.aspose.omr.OmrEngine').new(template)
67
+
68
+ # Extract the OMR data
69
+ result = engine.extractData(Array[image])
70
+
71
+ puts "Set page level threshold."
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,2 @@
1
+ require_relative 'ocr'
2
+ require_relative 'omr'
@@ -0,0 +1,13 @@
1
+ require_relative 'OCR/recognizetext'
2
+ require_relative 'OCR/readpartinformationoftext'
3
+ require_relative 'OCR/extracttextfrompartofimage'
4
+ require_relative 'OCR/loadimagefromurl'
5
+ require_relative 'OCR/multipagetiff'
6
+ require_relative 'OCR/gettextparthierarchyoftext'
7
+ require_relative 'OCR/applycorrectionfilters'
8
+ require_relative 'OCR/correctspellings'
9
+ require_relative 'OCR/detectreading'
10
+ require_relative 'OCR/detecttextblocks'
11
+ require_relative 'OCR/ignorenontextualblocks'
12
+ require_relative 'OCR/recognizewhitelistedcharacters'
13
+ require_relative 'OCR/workwithdifferentlanguages'
@@ -0,0 +1,4 @@
1
+ require_relative 'OMR/extractomrdatafromimage'
2
+ require_relative 'OMR/setmarkthreshold'
3
+ require_relative 'OMR/setimageresolution'
4
+ require_relative 'OMR/detectimageresolution'
@@ -0,0 +1,3 @@
1
+ module Asposeocrjava
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,21 @@
1
+ =begin
2
+ Please uncomment the code which you want to execute.
3
+ =end
4
+
5
+ require '../lib/asposeocrjava'
6
+ include Asposeocrjava
7
+ #include Asposeocrjava::RecognizeText
8
+ #include Asposeocrjava::ReadPartInformationOfText
9
+ #include Asposeocrjava::ExtractTextFromPartOfImage
10
+ #include Asposeocrjava::LoadImageFromUrl
11
+ #include Asposeocrjava::MultipageTiff
12
+ #include Asposeocrjava::GetTextPartHierarchyOfText
13
+ #include Asposeocrjava::ApplyCorrectionFilters
14
+ #include Asposeocrjava::CorrectSpellings
15
+ #include Asposeocrjava::DetectReading
16
+ #include Asposeocrjava::DetectTextBlocks
17
+ #include Asposeocrjava::IgnoreNonTextualBlocks
18
+ #include Asposeocrjava::RecognizeWhiteListedCharacters
19
+ #include Asposeocrjava::WorkWithDifferentLanguages
20
+
21
+ initialize_aspose_ocr
@@ -0,0 +1,12 @@
1
+ =begin
2
+ Please uncomment the code which you want to execute.
3
+ =end
4
+
5
+ require '../lib/asposeocrjava'
6
+ include Asposeocrjava
7
+ #include Asposeocrjava::ExtractOmrDataFromImage
8
+ #include Asposeocrjava::SetMarkThreshold
9
+ #include Asposeocrjava::SetImageResolution
10
+ #include Asposeocrjava::DetectImageResolution
11
+
12
+ initialize_aspose_ocr