cloudmersive-ocr-api-client 1.3.1 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -5
- data/docs/ImageOcrApi.md +6 -2
- data/docs/ImageToWordsWithLocationResult.md +1 -1
- data/docs/OcrPageResultWithLinesWithLocation.md +10 -0
- data/docs/OcrPageResultWithWordsWithLocation.md +10 -0
- data/docs/PdfOcrApi.md +122 -0
- data/docs/PdfToLinesWithLocationResult.md +9 -0
- data/docs/PdfToWordsWithLocationResult.md +9 -0
- data/lib/cloudmersive-ocr-api-client.rb +4 -0
- data/lib/cloudmersive-ocr-api-client/api/image_ocr_api.rb +6 -0
- data/lib/cloudmersive-ocr-api-client/api/pdf_ocr_api.rb +124 -0
- data/lib/cloudmersive-ocr-api-client/models/image_to_words_with_location_result.rb +1 -0
- data/lib/cloudmersive-ocr-api-client/models/ocr_page_result_with_lines_with_location.rb +210 -0
- data/lib/cloudmersive-ocr-api-client/models/ocr_page_result_with_words_with_location.rb +210 -0
- data/lib/cloudmersive-ocr-api-client/models/pdf_to_lines_with_location_result.rb +199 -0
- data/lib/cloudmersive-ocr-api-client/models/pdf_to_words_with_location_result.rb +199 -0
- data/lib/cloudmersive-ocr-api-client/version.rb +1 -1
- data/spec/api/image_ocr_api_spec.rb +2 -0
- data/spec/api/pdf_ocr_api_spec.rb +28 -0
- data/spec/models/ocr_page_result_with_lines_with_location_spec.rb +54 -0
- data/spec/models/ocr_page_result_with_words_with_location_spec.rb +54 -0
- data/spec/models/pdf_to_lines_with_location_result_spec.rb +48 -0
- data/spec/models/pdf_to_words_with_location_result_spec.rb +48 -0
- metadata +14 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ff04f846b8311bd12821460fa222360a4da5e2fe5e19e0a44474a4b2f2a68ab6
|
4
|
+
data.tar.gz: b4b8548a3506dcc5dcc97927a4dbc32f0c82585e7939632a37e2fbf2c6c11ea0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 41303026201fcc1208f9ac1574254a8cfb6b13d3cec4baef7b70f851e9f50e83f8268b34e597887a13da6c80725ce0f2c83a396d15c96f5c33c563eb3902693f
|
7
|
+
data.tar.gz: 487c14642e58d7854477ce89f4c109cf62918484de6ceb564d06f10c9f3962aaa9495144c8b500edfc757063451ca12b3a372677ee311df6ddcf1cf8220f9f2f
|
data/README.md
CHANGED
@@ -7,7 +7,7 @@ The powerful Optical Character Recognition (OCR) APIs let you convert scanned im
|
|
7
7
|
This SDK is automatically generated by the [Swagger Codegen](https://github.com/swagger-api/swagger-codegen) project:
|
8
8
|
|
9
9
|
- API version: v1
|
10
|
-
- Package version: 1.3.
|
10
|
+
- Package version: 1.3.2
|
11
11
|
- Build package: io.swagger.codegen.languages.RubyClientCodegen
|
12
12
|
|
13
13
|
## Installation
|
@@ -23,15 +23,15 @@ gem build cloudmersive-ocr-api-client.gemspec
|
|
23
23
|
Then either install the gem locally:
|
24
24
|
|
25
25
|
```shell
|
26
|
-
gem install ./cloudmersive-ocr-api-client-1.3.
|
26
|
+
gem install ./cloudmersive-ocr-api-client-1.3.2.gem
|
27
27
|
```
|
28
|
-
(for development, run `gem install --dev ./cloudmersive-ocr-api-client-1.3.
|
28
|
+
(for development, run `gem install --dev ./cloudmersive-ocr-api-client-1.3.2.gem` to install the development dependencies)
|
29
29
|
|
30
30
|
or publish the gem to a gem hosting service, e.g. [RubyGems](https://rubygems.org/).
|
31
31
|
|
32
32
|
Finally add this to the Gemfile:
|
33
33
|
|
34
|
-
gem 'cloudmersive-ocr-api-client', '~> 1.3.
|
34
|
+
gem 'cloudmersive-ocr-api-client', '~> 1.3.2'
|
35
35
|
|
36
36
|
### Install from Git
|
37
37
|
|
@@ -67,7 +67,8 @@ api_instance = CloudmersiveOcrApiClient::ImageOcrApi.new
|
|
67
67
|
image_file = File.new("/path/to/file.txt") # File | Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
68
68
|
|
69
69
|
opts = {
|
70
|
-
language: "language_example" # String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
70
|
+
language: "language_example", # String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
71
|
+
preprocessing: "preprocessing_example" # String | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
71
72
|
}
|
72
73
|
|
73
74
|
begin
|
@@ -90,6 +91,8 @@ Class | Method | HTTP request | Description
|
|
90
91
|
*CloudmersiveOcrApiClient::ImageOcrApi* | [**image_ocr_image_words_with_location**](docs/ImageOcrApi.md#image_ocr_image_words_with_location) | **POST** /ocr/image/to/words-with-location | Convert a scanned image into words with location
|
91
92
|
*CloudmersiveOcrApiClient::ImageOcrApi* | [**image_ocr_photo_to_text**](docs/ImageOcrApi.md#image_ocr_photo_to_text) | **POST** /ocr/photo/toText | Convert a photo of a document into text
|
92
93
|
*CloudmersiveOcrApiClient::ImageOcrApi* | [**image_ocr_post**](docs/ImageOcrApi.md#image_ocr_post) | **POST** /ocr/image/toText | Convert a scanned image into text
|
94
|
+
*CloudmersiveOcrApiClient::PdfOcrApi* | [**pdf_ocr_pdf_to_lines_with_location**](docs/PdfOcrApi.md#pdf_ocr_pdf_to_lines_with_location) | **POST** /ocr/pdf/to/lines-with-location | Convert a PDF into text lines with location
|
95
|
+
*CloudmersiveOcrApiClient::PdfOcrApi* | [**pdf_ocr_pdf_to_words_with_location**](docs/PdfOcrApi.md#pdf_ocr_pdf_to_words_with_location) | **POST** /ocr/pdf/to/words-with-location | Convert a PDF into words with location
|
93
96
|
*CloudmersiveOcrApiClient::PdfOcrApi* | [**pdf_ocr_post**](docs/PdfOcrApi.md#pdf_ocr_post) | **POST** /ocr/pdf/toText | Converts an uploaded image in common formats such as JPEG, PNG into text via Optical Character Recognition.
|
94
97
|
*CloudmersiveOcrApiClient::PreprocessingApi* | [**preprocessing_unrotate**](docs/PreprocessingApi.md#preprocessing_unrotate) | **POST** /ocr/preprocessing/image/unrotate | Detect and unrotate a document image
|
95
98
|
*CloudmersiveOcrApiClient::PreprocessingApi* | [**preprocessing_unskew**](docs/PreprocessingApi.md#preprocessing_unskew) | **POST** /ocr/preprocessing/image/unskew | Detect and unskew a photo of a document
|
@@ -102,8 +105,12 @@ Class | Method | HTTP request | Description
|
|
102
105
|
- [CloudmersiveOcrApiClient::ImageToWordsWithLocationResult](docs/ImageToWordsWithLocationResult.md)
|
103
106
|
- [CloudmersiveOcrApiClient::OcrLineElement](docs/OcrLineElement.md)
|
104
107
|
- [CloudmersiveOcrApiClient::OcrPageResult](docs/OcrPageResult.md)
|
108
|
+
- [CloudmersiveOcrApiClient::OcrPageResultWithLinesWithLocation](docs/OcrPageResultWithLinesWithLocation.md)
|
109
|
+
- [CloudmersiveOcrApiClient::OcrPageResultWithWordsWithLocation](docs/OcrPageResultWithWordsWithLocation.md)
|
105
110
|
- [CloudmersiveOcrApiClient::OcrWordElement](docs/OcrWordElement.md)
|
111
|
+
- [CloudmersiveOcrApiClient::PdfToLinesWithLocationResult](docs/PdfToLinesWithLocationResult.md)
|
106
112
|
- [CloudmersiveOcrApiClient::PdfToTextResponse](docs/PdfToTextResponse.md)
|
113
|
+
- [CloudmersiveOcrApiClient::PdfToWordsWithLocationResult](docs/PdfToWordsWithLocationResult.md)
|
107
114
|
|
108
115
|
|
109
116
|
## Documentation for Authorization
|
data/docs/ImageOcrApi.md
CHANGED
@@ -34,7 +34,8 @@ api_instance = CloudmersiveOcrApiClient::ImageOcrApi.new
|
|
34
34
|
image_file = File.new("/path/to/file.txt") # File | Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
35
35
|
|
36
36
|
opts = {
|
37
|
-
language: "language_example" # String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
37
|
+
language: "language_example", # String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
38
|
+
preprocessing: "preprocessing_example" # String | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
38
39
|
}
|
39
40
|
|
40
41
|
begin
|
@@ -52,6 +53,7 @@ Name | Type | Description | Notes
|
|
52
53
|
------------- | ------------- | ------------- | -------------
|
53
54
|
**image_file** | **File**| Image file to perform OCR on. Common file formats such as PNG, JPEG are supported. |
|
54
55
|
**language** | **String**| Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish) | [optional]
|
56
|
+
**preprocessing** | **String**| Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended). | [optional]
|
55
57
|
|
56
58
|
### Return type
|
57
59
|
|
@@ -92,7 +94,8 @@ api_instance = CloudmersiveOcrApiClient::ImageOcrApi.new
|
|
92
94
|
image_file = File.new("/path/to/file.txt") # File | Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
93
95
|
|
94
96
|
opts = {
|
95
|
-
language: "language_example" # String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
97
|
+
language: "language_example", # String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
98
|
+
preprocessing: "preprocessing_example" # String | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
96
99
|
}
|
97
100
|
|
98
101
|
begin
|
@@ -110,6 +113,7 @@ Name | Type | Description | Notes
|
|
110
113
|
------------- | ------------- | ------------- | -------------
|
111
114
|
**image_file** | **File**| Image file to perform OCR on. Common file formats such as PNG, JPEG are supported. |
|
112
115
|
**language** | **String**| Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish) | [optional]
|
116
|
+
**preprocessing** | **String**| Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended). | [optional]
|
113
117
|
|
114
118
|
### Return type
|
115
119
|
|
@@ -4,6 +4,6 @@
|
|
4
4
|
Name | Type | Description | Notes
|
5
5
|
------------ | ------------- | ------------- | -------------
|
6
6
|
**successful** | **BOOLEAN** | | [optional]
|
7
|
-
**words** | [**Array<OcrWordElement>**](OcrWordElement.md) |
|
7
|
+
**words** | [**Array<OcrWordElement>**](OcrWordElement.md) | Word elements in the image | [optional]
|
8
8
|
|
9
9
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# CloudmersiveOcrApiClient::OcrPageResultWithLinesWithLocation
|
2
|
+
|
3
|
+
## Properties
|
4
|
+
Name | Type | Description | Notes
|
5
|
+
------------ | ------------- | ------------- | -------------
|
6
|
+
**successful** | **BOOLEAN** | | [optional]
|
7
|
+
**page_number** | **Integer** | Page number of the page that was OCR-ed, starting with 1 for the first page in the PDF file | [optional]
|
8
|
+
**lines** | [**Array<OcrLineElement>**](OcrLineElement.md) | Word elements in the image | [optional]
|
9
|
+
|
10
|
+
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# CloudmersiveOcrApiClient::OcrPageResultWithWordsWithLocation
|
2
|
+
|
3
|
+
## Properties
|
4
|
+
Name | Type | Description | Notes
|
5
|
+
------------ | ------------- | ------------- | -------------
|
6
|
+
**successful** | **BOOLEAN** | | [optional]
|
7
|
+
**page_number** | **Integer** | Page number of the page that was OCR-ed, starting with 1 for the first page in the PDF file | [optional]
|
8
|
+
**words** | [**Array<OcrWordElement>**](OcrWordElement.md) | Word elements in the image | [optional]
|
9
|
+
|
10
|
+
|
data/docs/PdfOcrApi.md
CHANGED
@@ -4,9 +4,131 @@ All URIs are relative to *https://api.cloudmersive.com*
|
|
4
4
|
|
5
5
|
Method | HTTP request | Description
|
6
6
|
------------- | ------------- | -------------
|
7
|
+
[**pdf_ocr_pdf_to_lines_with_location**](PdfOcrApi.md#pdf_ocr_pdf_to_lines_with_location) | **POST** /ocr/pdf/to/lines-with-location | Convert a PDF into text lines with location
|
8
|
+
[**pdf_ocr_pdf_to_words_with_location**](PdfOcrApi.md#pdf_ocr_pdf_to_words_with_location) | **POST** /ocr/pdf/to/words-with-location | Convert a PDF into words with location
|
7
9
|
[**pdf_ocr_post**](PdfOcrApi.md#pdf_ocr_post) | **POST** /ocr/pdf/toText | Converts an uploaded image in common formats such as JPEG, PNG into text via Optical Character Recognition.
|
8
10
|
|
9
11
|
|
12
|
+
# **pdf_ocr_pdf_to_lines_with_location**
|
13
|
+
> PdfToLinesWithLocationResult pdf_ocr_pdf_to_lines_with_location(image_file, opts)
|
14
|
+
|
15
|
+
Convert a PDF into text lines with location
|
16
|
+
|
17
|
+
Converts a PDF into lines/text with location information and other metdata via Optical Character Recognition. This API is intended to be run on scanned documents. If you want to OCR photos (e.g. taken with a smart phone camera), be sure to use the photo/toText API instead, as it is designed to unskew the image first.
|
18
|
+
|
19
|
+
### Example
|
20
|
+
```ruby
|
21
|
+
# load the gem
|
22
|
+
require 'cloudmersive-ocr-api-client'
|
23
|
+
# setup authorization
|
24
|
+
CloudmersiveOcrApiClient.configure do |config|
|
25
|
+
# Configure API key authorization: Apikey
|
26
|
+
config.api_key['Apikey'] = 'YOUR API KEY'
|
27
|
+
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
28
|
+
#config.api_key_prefix['Apikey'] = 'Bearer'
|
29
|
+
end
|
30
|
+
|
31
|
+
api_instance = CloudmersiveOcrApiClient::PdfOcrApi.new
|
32
|
+
|
33
|
+
image_file = File.new("/path/to/file.txt") # File | Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
34
|
+
|
35
|
+
opts = {
|
36
|
+
language: "language_example", # String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
37
|
+
preprocessing: "preprocessing_example" # String | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
38
|
+
}
|
39
|
+
|
40
|
+
begin
|
41
|
+
#Convert a PDF into text lines with location
|
42
|
+
result = api_instance.pdf_ocr_pdf_to_lines_with_location(image_file, opts)
|
43
|
+
p result
|
44
|
+
rescue CloudmersiveOcrApiClient::ApiError => e
|
45
|
+
puts "Exception when calling PdfOcrApi->pdf_ocr_pdf_to_lines_with_location: #{e}"
|
46
|
+
end
|
47
|
+
```
|
48
|
+
|
49
|
+
### Parameters
|
50
|
+
|
51
|
+
Name | Type | Description | Notes
|
52
|
+
------------- | ------------- | ------------- | -------------
|
53
|
+
**image_file** | **File**| Image file to perform OCR on. Common file formats such as PNG, JPEG are supported. |
|
54
|
+
**language** | **String**| Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish) | [optional]
|
55
|
+
**preprocessing** | **String**| Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended). | [optional]
|
56
|
+
|
57
|
+
### Return type
|
58
|
+
|
59
|
+
[**PdfToLinesWithLocationResult**](PdfToLinesWithLocationResult.md)
|
60
|
+
|
61
|
+
### Authorization
|
62
|
+
|
63
|
+
[Apikey](../README.md#Apikey)
|
64
|
+
|
65
|
+
### HTTP request headers
|
66
|
+
|
67
|
+
- **Content-Type**: multipart/form-data
|
68
|
+
- **Accept**: application/json, text/json, application/xml, text/xml
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
# **pdf_ocr_pdf_to_words_with_location**
|
73
|
+
> PdfToWordsWithLocationResult pdf_ocr_pdf_to_words_with_location(image_file, opts)
|
74
|
+
|
75
|
+
Convert a PDF into words with location
|
76
|
+
|
77
|
+
Converts a PDF into words/text with location information and other metdata via Optical Character Recognition. This API is intended to be run on scanned documents. If you want to OCR photos (e.g. taken with a smart phone camera), be sure to use the photo/toText API instead, as it is designed to unskew the image first.
|
78
|
+
|
79
|
+
### Example
|
80
|
+
```ruby
|
81
|
+
# load the gem
|
82
|
+
require 'cloudmersive-ocr-api-client'
|
83
|
+
# setup authorization
|
84
|
+
CloudmersiveOcrApiClient.configure do |config|
|
85
|
+
# Configure API key authorization: Apikey
|
86
|
+
config.api_key['Apikey'] = 'YOUR API KEY'
|
87
|
+
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
|
88
|
+
#config.api_key_prefix['Apikey'] = 'Bearer'
|
89
|
+
end
|
90
|
+
|
91
|
+
api_instance = CloudmersiveOcrApiClient::PdfOcrApi.new
|
92
|
+
|
93
|
+
image_file = File.new("/path/to/file.txt") # File | Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
94
|
+
|
95
|
+
opts = {
|
96
|
+
language: "language_example", # String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
97
|
+
preprocessing: "preprocessing_example" # String | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
98
|
+
}
|
99
|
+
|
100
|
+
begin
|
101
|
+
#Convert a PDF into words with location
|
102
|
+
result = api_instance.pdf_ocr_pdf_to_words_with_location(image_file, opts)
|
103
|
+
p result
|
104
|
+
rescue CloudmersiveOcrApiClient::ApiError => e
|
105
|
+
puts "Exception when calling PdfOcrApi->pdf_ocr_pdf_to_words_with_location: #{e}"
|
106
|
+
end
|
107
|
+
```
|
108
|
+
|
109
|
+
### Parameters
|
110
|
+
|
111
|
+
Name | Type | Description | Notes
|
112
|
+
------------- | ------------- | ------------- | -------------
|
113
|
+
**image_file** | **File**| Image file to perform OCR on. Common file formats such as PNG, JPEG are supported. |
|
114
|
+
**language** | **String**| Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish) | [optional]
|
115
|
+
**preprocessing** | **String**| Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended). | [optional]
|
116
|
+
|
117
|
+
### Return type
|
118
|
+
|
119
|
+
[**PdfToWordsWithLocationResult**](PdfToWordsWithLocationResult.md)
|
120
|
+
|
121
|
+
### Authorization
|
122
|
+
|
123
|
+
[Apikey](../README.md#Apikey)
|
124
|
+
|
125
|
+
### HTTP request headers
|
126
|
+
|
127
|
+
- **Content-Type**: multipart/form-data
|
128
|
+
- **Accept**: application/json, text/json, application/xml, text/xml
|
129
|
+
|
130
|
+
|
131
|
+
|
10
132
|
# **pdf_ocr_post**
|
11
133
|
> PdfToTextResponse pdf_ocr_post(image_file, opts)
|
12
134
|
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# CloudmersiveOcrApiClient::PdfToLinesWithLocationResult
|
2
|
+
|
3
|
+
## Properties
|
4
|
+
Name | Type | Description | Notes
|
5
|
+
------------ | ------------- | ------------- | -------------
|
6
|
+
**successful** | **BOOLEAN** | | [optional]
|
7
|
+
**ocr_pages** | [**Array<OcrPageResultWithLinesWithLocation>**](OcrPageResultWithLinesWithLocation.md) | | [optional]
|
8
|
+
|
9
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
# CloudmersiveOcrApiClient::PdfToWordsWithLocationResult
|
2
|
+
|
3
|
+
## Properties
|
4
|
+
Name | Type | Description | Notes
|
5
|
+
------------ | ------------- | ------------- | -------------
|
6
|
+
**successful** | **BOOLEAN** | | [optional]
|
7
|
+
**ocr_pages** | [**Array<OcrPageResultWithWordsWithLocation>**](OcrPageResultWithWordsWithLocation.md) | | [optional]
|
8
|
+
|
9
|
+
|
@@ -22,8 +22,12 @@ require 'cloudmersive-ocr-api-client/models/image_to_text_response'
|
|
22
22
|
require 'cloudmersive-ocr-api-client/models/image_to_words_with_location_result'
|
23
23
|
require 'cloudmersive-ocr-api-client/models/ocr_line_element'
|
24
24
|
require 'cloudmersive-ocr-api-client/models/ocr_page_result'
|
25
|
+
require 'cloudmersive-ocr-api-client/models/ocr_page_result_with_lines_with_location'
|
26
|
+
require 'cloudmersive-ocr-api-client/models/ocr_page_result_with_words_with_location'
|
25
27
|
require 'cloudmersive-ocr-api-client/models/ocr_word_element'
|
28
|
+
require 'cloudmersive-ocr-api-client/models/pdf_to_lines_with_location_result'
|
26
29
|
require 'cloudmersive-ocr-api-client/models/pdf_to_text_response'
|
30
|
+
require 'cloudmersive-ocr-api-client/models/pdf_to_words_with_location_result'
|
27
31
|
|
28
32
|
# APIs
|
29
33
|
require 'cloudmersive-ocr-api-client/api/image_ocr_api'
|
@@ -25,6 +25,7 @@ module CloudmersiveOcrApiClient
|
|
25
25
|
# @param image_file Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
26
26
|
# @param [Hash] opts the optional parameters
|
27
27
|
# @option opts [String] :language Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
28
|
+
# @option opts [String] :preprocessing Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
28
29
|
# @return [ImageToLinesWithLocationResult]
|
29
30
|
def image_ocr_image_lines_with_location(image_file, opts = {})
|
30
31
|
data, _status_code, _headers = image_ocr_image_lines_with_location_with_http_info(image_file, opts)
|
@@ -36,6 +37,7 @@ module CloudmersiveOcrApiClient
|
|
36
37
|
# @param image_file Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
37
38
|
# @param [Hash] opts the optional parameters
|
38
39
|
# @option opts [String] :language Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
40
|
+
# @option opts [String] :preprocessing Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
39
41
|
# @return [Array<(ImageToLinesWithLocationResult, Fixnum, Hash)>] ImageToLinesWithLocationResult data, response status code and response headers
|
40
42
|
def image_ocr_image_lines_with_location_with_http_info(image_file, opts = {})
|
41
43
|
if @api_client.config.debugging
|
@@ -58,6 +60,7 @@ module CloudmersiveOcrApiClient
|
|
58
60
|
# HTTP header 'Content-Type'
|
59
61
|
header_params['Content-Type'] = @api_client.select_header_content_type(['multipart/form-data'])
|
60
62
|
header_params[:'language'] = opts[:'language'] if !opts[:'language'].nil?
|
63
|
+
header_params[:'preprocessing'] = opts[:'preprocessing'] if !opts[:'preprocessing'].nil?
|
61
64
|
|
62
65
|
# form parameters
|
63
66
|
form_params = {}
|
@@ -84,6 +87,7 @@ module CloudmersiveOcrApiClient
|
|
84
87
|
# @param image_file Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
85
88
|
# @param [Hash] opts the optional parameters
|
86
89
|
# @option opts [String] :language Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
90
|
+
# @option opts [String] :preprocessing Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
87
91
|
# @return [ImageToWordsWithLocationResult]
|
88
92
|
def image_ocr_image_words_with_location(image_file, opts = {})
|
89
93
|
data, _status_code, _headers = image_ocr_image_words_with_location_with_http_info(image_file, opts)
|
@@ -95,6 +99,7 @@ module CloudmersiveOcrApiClient
|
|
95
99
|
# @param image_file Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
96
100
|
# @param [Hash] opts the optional parameters
|
97
101
|
# @option opts [String] :language Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
102
|
+
# @option opts [String] :preprocessing Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
98
103
|
# @return [Array<(ImageToWordsWithLocationResult, Fixnum, Hash)>] ImageToWordsWithLocationResult data, response status code and response headers
|
99
104
|
def image_ocr_image_words_with_location_with_http_info(image_file, opts = {})
|
100
105
|
if @api_client.config.debugging
|
@@ -117,6 +122,7 @@ module CloudmersiveOcrApiClient
|
|
117
122
|
# HTTP header 'Content-Type'
|
118
123
|
header_params['Content-Type'] = @api_client.select_header_content_type(['multipart/form-data'])
|
119
124
|
header_params[:'language'] = opts[:'language'] if !opts[:'language'].nil?
|
125
|
+
header_params[:'preprocessing'] = opts[:'preprocessing'] if !opts[:'preprocessing'].nil?
|
120
126
|
|
121
127
|
# form parameters
|
122
128
|
form_params = {}
|
@@ -20,6 +20,130 @@ module CloudmersiveOcrApiClient
|
|
20
20
|
@api_client = api_client
|
21
21
|
end
|
22
22
|
|
23
|
+
# Convert a PDF into text lines with location
|
24
|
+
# Converts a PDF into lines/text with location information and other metdata via Optical Character Recognition. This API is intended to be run on scanned documents. If you want to OCR photos (e.g. taken with a smart phone camera), be sure to use the photo/toText API instead, as it is designed to unskew the image first.
|
25
|
+
# @param image_file Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
26
|
+
# @param [Hash] opts the optional parameters
|
27
|
+
# @option opts [String] :language Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
28
|
+
# @option opts [String] :preprocessing Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
29
|
+
# @return [PdfToLinesWithLocationResult]
|
30
|
+
def pdf_ocr_pdf_to_lines_with_location(image_file, opts = {})
|
31
|
+
data, _status_code, _headers = pdf_ocr_pdf_to_lines_with_location_with_http_info(image_file, opts)
|
32
|
+
return data
|
33
|
+
end
|
34
|
+
|
35
|
+
# Convert a PDF into text lines with location
|
36
|
+
# Converts a PDF into lines/text with location information and other metdata via Optical Character Recognition. This API is intended to be run on scanned documents. If you want to OCR photos (e.g. taken with a smart phone camera), be sure to use the photo/toText API instead, as it is designed to unskew the image first.
|
37
|
+
# @param image_file Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
38
|
+
# @param [Hash] opts the optional parameters
|
39
|
+
# @option opts [String] :language Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
40
|
+
# @option opts [String] :preprocessing Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
41
|
+
# @return [Array<(PdfToLinesWithLocationResult, Fixnum, Hash)>] PdfToLinesWithLocationResult data, response status code and response headers
|
42
|
+
def pdf_ocr_pdf_to_lines_with_location_with_http_info(image_file, opts = {})
|
43
|
+
if @api_client.config.debugging
|
44
|
+
@api_client.config.logger.debug "Calling API: PdfOcrApi.pdf_ocr_pdf_to_lines_with_location ..."
|
45
|
+
end
|
46
|
+
# verify the required parameter 'image_file' is set
|
47
|
+
if @api_client.config.client_side_validation && image_file.nil?
|
48
|
+
fail ArgumentError, "Missing the required parameter 'image_file' when calling PdfOcrApi.pdf_ocr_pdf_to_lines_with_location"
|
49
|
+
end
|
50
|
+
# resource path
|
51
|
+
local_var_path = "/ocr/pdf/to/lines-with-location"
|
52
|
+
|
53
|
+
# query parameters
|
54
|
+
query_params = {}
|
55
|
+
|
56
|
+
# header parameters
|
57
|
+
header_params = {}
|
58
|
+
# HTTP header 'Accept' (if needed)
|
59
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/json', 'application/xml', 'text/xml'])
|
60
|
+
# HTTP header 'Content-Type'
|
61
|
+
header_params['Content-Type'] = @api_client.select_header_content_type(['multipart/form-data'])
|
62
|
+
header_params[:'language'] = opts[:'language'] if !opts[:'language'].nil?
|
63
|
+
header_params[:'preprocessing'] = opts[:'preprocessing'] if !opts[:'preprocessing'].nil?
|
64
|
+
|
65
|
+
# form parameters
|
66
|
+
form_params = {}
|
67
|
+
form_params["imageFile"] = image_file
|
68
|
+
|
69
|
+
# http body (model)
|
70
|
+
post_body = nil
|
71
|
+
auth_names = ['Apikey']
|
72
|
+
data, status_code, headers = @api_client.call_api(:POST, local_var_path,
|
73
|
+
:header_params => header_params,
|
74
|
+
:query_params => query_params,
|
75
|
+
:form_params => form_params,
|
76
|
+
:body => post_body,
|
77
|
+
:auth_names => auth_names,
|
78
|
+
:return_type => 'PdfToLinesWithLocationResult')
|
79
|
+
if @api_client.config.debugging
|
80
|
+
@api_client.config.logger.debug "API called: PdfOcrApi#pdf_ocr_pdf_to_lines_with_location\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
81
|
+
end
|
82
|
+
return data, status_code, headers
|
83
|
+
end
|
84
|
+
|
85
|
+
# Convert a PDF into words with location
|
86
|
+
# Converts a PDF into words/text with location information and other metdata via Optical Character Recognition. This API is intended to be run on scanned documents. If you want to OCR photos (e.g. taken with a smart phone camera), be sure to use the photo/toText API instead, as it is designed to unskew the image first.
|
87
|
+
# @param image_file Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
88
|
+
# @param [Hash] opts the optional parameters
|
89
|
+
# @option opts [String] :language Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
90
|
+
# @option opts [String] :preprocessing Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
91
|
+
# @return [PdfToWordsWithLocationResult]
|
92
|
+
def pdf_ocr_pdf_to_words_with_location(image_file, opts = {})
|
93
|
+
data, _status_code, _headers = pdf_ocr_pdf_to_words_with_location_with_http_info(image_file, opts)
|
94
|
+
return data
|
95
|
+
end
|
96
|
+
|
97
|
+
# Convert a PDF into words with location
|
98
|
+
# Converts a PDF into words/text with location information and other metdata via Optical Character Recognition. This API is intended to be run on scanned documents. If you want to OCR photos (e.g. taken with a smart phone camera), be sure to use the photo/toText API instead, as it is designed to unskew the image first.
|
99
|
+
# @param image_file Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|
100
|
+
# @param [Hash] opts the optional parameters
|
101
|
+
# @option opts [String] :language Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
|
102
|
+
# @option opts [String] :preprocessing Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
|
103
|
+
# @return [Array<(PdfToWordsWithLocationResult, Fixnum, Hash)>] PdfToWordsWithLocationResult data, response status code and response headers
|
104
|
+
def pdf_ocr_pdf_to_words_with_location_with_http_info(image_file, opts = {})
|
105
|
+
if @api_client.config.debugging
|
106
|
+
@api_client.config.logger.debug "Calling API: PdfOcrApi.pdf_ocr_pdf_to_words_with_location ..."
|
107
|
+
end
|
108
|
+
# verify the required parameter 'image_file' is set
|
109
|
+
if @api_client.config.client_side_validation && image_file.nil?
|
110
|
+
fail ArgumentError, "Missing the required parameter 'image_file' when calling PdfOcrApi.pdf_ocr_pdf_to_words_with_location"
|
111
|
+
end
|
112
|
+
# resource path
|
113
|
+
local_var_path = "/ocr/pdf/to/words-with-location"
|
114
|
+
|
115
|
+
# query parameters
|
116
|
+
query_params = {}
|
117
|
+
|
118
|
+
# header parameters
|
119
|
+
header_params = {}
|
120
|
+
# HTTP header 'Accept' (if needed)
|
121
|
+
header_params['Accept'] = @api_client.select_header_accept(['application/json', 'text/json', 'application/xml', 'text/xml'])
|
122
|
+
# HTTP header 'Content-Type'
|
123
|
+
header_params['Content-Type'] = @api_client.select_header_content_type(['multipart/form-data'])
|
124
|
+
header_params[:'language'] = opts[:'language'] if !opts[:'language'].nil?
|
125
|
+
header_params[:'preprocessing'] = opts[:'preprocessing'] if !opts[:'preprocessing'].nil?
|
126
|
+
|
127
|
+
# form parameters
|
128
|
+
form_params = {}
|
129
|
+
form_params["imageFile"] = image_file
|
130
|
+
|
131
|
+
# http body (model)
|
132
|
+
post_body = nil
|
133
|
+
auth_names = ['Apikey']
|
134
|
+
data, status_code, headers = @api_client.call_api(:POST, local_var_path,
|
135
|
+
:header_params => header_params,
|
136
|
+
:query_params => query_params,
|
137
|
+
:form_params => form_params,
|
138
|
+
:body => post_body,
|
139
|
+
:auth_names => auth_names,
|
140
|
+
:return_type => 'PdfToWordsWithLocationResult')
|
141
|
+
if @api_client.config.debugging
|
142
|
+
@api_client.config.logger.debug "API called: PdfOcrApi#pdf_ocr_pdf_to_words_with_location\nData: #{data.inspect}\nStatus code: #{status_code}\nHeaders: #{headers}"
|
143
|
+
end
|
144
|
+
return data, status_code, headers
|
145
|
+
end
|
146
|
+
|
23
147
|
# Converts an uploaded image in common formats such as JPEG, PNG into text via Optical Character Recognition.
|
24
148
|
#
|
25
149
|
# @param image_file Image file to perform OCR on. Common file formats such as PNG, JPEG are supported.
|