adobe_pdfservices_ruby 0.1.2 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +6 -6
- data/lib/adobe_pdfservices_ruby.rb +4 -1
- data/lib/pdfservices/asset.rb +8 -4
- data/lib/pdfservices/operations/base.rb +12 -3
- data/lib/pdfservices/operations/document_generation/internal.rb +3 -3
- data/lib/pdfservices/operations/document_generation.rb +0 -9
- data/lib/pdfservices/operations/extract_pdf.rb +2 -9
- data/lib/pdfservices/operations/html_to_pdf/internal.rb +19 -19
- data/lib/pdfservices/operations/html_to_pdf.rb +0 -9
- data/lib/pdfservices/operations/internal_external_operation.rb +2 -2
- data/lib/pdfservices/operations/ocr/internal.rb +6 -6
- data/lib/pdfservices/operations/ocr.rb +9 -1
- data/lib/pdfservices/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6147a7ab0e87ee51554ec2136b1f21ff3e2d4630760a6ea07432744d8a145699
|
4
|
+
data.tar.gz: 7f0f7c7f48470007c8078531598ca63dc3969628ea44a12b612d221c07703833
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 32a89f111dd8b13884f7dd854c59e6e74e8dde5d77adc6035cb2af98527ddccc9cd97d9bd854a7972e33217628fa9b37bbc64bac9b8f30174f660d4bd06f38af
|
7
|
+
data.tar.gz: '028ecf12bb31bf37a3f48137a9d5ad55d18fb23b8026f121773cc36788032e9587e7fc3b75ccbd696cf2895693dd7e8c5eb32e44a63c6e3fcef5ac2562ba7a48'
|
data/CHANGELOG.md
CHANGED
@@ -22,3 +22,16 @@
|
|
22
22
|
- Allow client to be initialized with no secret_key if there's an access_token provided (useful for development)
|
23
23
|
- Asset delete request is properly formed
|
24
24
|
- Use MimeMagic to determine content-type of files
|
25
|
+
|
26
|
+
## [0.1.3] - 2024-01-27
|
27
|
+
|
28
|
+
- Fix incorrect URLs in README
|
29
|
+
- Fix incorrect URLs in gemspec
|
30
|
+
- Add support for document generation via spike
|
31
|
+
- Fix passed blocks not being called for most operations
|
32
|
+
- Add support for html_to_pdf operation via spike
|
33
|
+
- Add support for OCR operation via spike
|
34
|
+
|
35
|
+
## [0.1.4] - 2024-01-29
|
36
|
+
|
37
|
+
- Remove require for multipart parser
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# PDF Services for Ruby
|
2
2
|
|
3
|
-
### Originally forked from [Adobe Document Services PDF Tools SDK for Ruby](https://github.com/arpc/
|
3
|
+
### Originally forked from [Adobe Document Services PDF Tools SDK for Ruby](https://github.com/arpc/pdfservices-ruby-sdk)
|
4
4
|
|
5
5
|
This gem provides a Ruby wrapper for the [Adobe PDF Services API](https://developer.adobe.com/document-services/docs/overview/). It allows you to perform various PDF operations, such as extracting content from PDFs, OCR, HTML to PDF, and document generation.
|
6
6
|
|
@@ -58,13 +58,13 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
58
58
|
Work is in progress on getting the gem to support all of the operations available in the [Adobe PDF Services API](https://developer.adobe.com/document-services/docs/overview/). Below is a list of the operations and their current support status:
|
59
59
|
|
60
60
|
- ✅ Extract PDF
|
61
|
-
-
|
62
|
-
-
|
61
|
+
- ✅ OCR
|
62
|
+
- ✅ Internal
|
63
63
|
- ❗ EXPERIMENTAL: External
|
64
|
-
-
|
65
|
-
-
|
64
|
+
- ✅ HTML to PDF
|
65
|
+
- ✅ Internal
|
66
66
|
- ❗ EXPERIMENTAL: External
|
67
|
-
-
|
67
|
+
- ✅ Document Generation:
|
68
68
|
- ✅ Internal
|
69
69
|
- ❗ EXPERIMENTAL: External
|
70
70
|
|
@@ -1,7 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'json'
|
4
|
-
require 'multipart_parser/reader'
|
5
4
|
require 'yaml'
|
6
5
|
require 'faraday'
|
7
6
|
require 'mimemagic'
|
@@ -25,9 +24,13 @@ require_relative 'pdfservices/operations/document_generation/external'
|
|
25
24
|
|
26
25
|
# OCR
|
27
26
|
require_relative 'pdfservices/operations/ocr'
|
27
|
+
require_relative 'pdfservices/operations/ocr/internal'
|
28
|
+
require_relative 'pdfservices/operations/ocr/external'
|
28
29
|
|
29
30
|
# HTML to PDF
|
30
31
|
require_relative 'pdfservices/operations/html_to_pdf'
|
32
|
+
require_relative 'pdfservices/operations/html_to_pdf/internal'
|
33
|
+
require_relative 'pdfservices/operations/html_to_pdf/external'
|
31
34
|
|
32
35
|
# Extract PDF
|
33
36
|
require_relative 'pdfservices/operations/extract_pdf'
|
data/lib/pdfservices/asset.rb
CHANGED
@@ -5,14 +5,17 @@ module PdfServices
|
|
5
5
|
attr_reader :id
|
6
6
|
|
7
7
|
def initialize(api, id = nil)
|
8
|
+
# MimeMagic can't detect docx files, and will return `application/zip` so we need to add it manually
|
9
|
+
MimeMagic.add('application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
10
|
+
magic: [[0..2000, 'word/']])
|
8
11
|
raise ArgumentError, 'Api is nil' unless api
|
9
12
|
|
10
13
|
@api = api
|
11
14
|
@id = id
|
12
15
|
end
|
13
16
|
|
14
|
-
def upload(file
|
15
|
-
url = presigned_url(
|
17
|
+
def upload(file)
|
18
|
+
url = presigned_url(file:)
|
16
19
|
upload_uri = url['uploadUri']
|
17
20
|
asset_id = url['assetID']
|
18
21
|
|
@@ -39,7 +42,7 @@ module PdfServices
|
|
39
42
|
@api.get(download_uri)
|
40
43
|
end
|
41
44
|
|
42
|
-
def
|
45
|
+
def delete_asset
|
43
46
|
raise AssetError, 'Asset ID is nil' unless @id
|
44
47
|
|
45
48
|
@api.delete("#{ASSETS_ENDPOINT}/#{@id}")
|
@@ -54,9 +57,10 @@ module PdfServices
|
|
54
57
|
}
|
55
58
|
end
|
56
59
|
|
57
|
-
def presigned_url(operation = :upload,
|
60
|
+
def presigned_url(operation = :upload, file: nil)
|
58
61
|
case operation
|
59
62
|
when :upload
|
63
|
+
media_type = file ? MimeMagic.by_magic(file).type : 'application/pdf'
|
60
64
|
response = @api.post(ASSETS_ENDPOINT, body: { mediaType: media_type },
|
61
65
|
headers: { 'Content-Type' => 'application/json' })
|
62
66
|
when :download
|
@@ -26,6 +26,15 @@ module PdfServices
|
|
26
26
|
handle_polling_result(url, json_response, original_asset, &block)
|
27
27
|
end
|
28
28
|
|
29
|
+
def handle_response(response, asset, &block)
|
30
|
+
unless response.status == 201
|
31
|
+
raise "Unexpected response status from operation endpoint: #{response.status}, #{response.body}"
|
32
|
+
end
|
33
|
+
|
34
|
+
document_url = response.headers['location']
|
35
|
+
poll_document_result document_url, asset, &block
|
36
|
+
end
|
37
|
+
|
29
38
|
private
|
30
39
|
|
31
40
|
def handle_polling_result(url, json_response, original_asset, &block)
|
@@ -64,16 +73,16 @@ module PdfServices
|
|
64
73
|
end
|
65
74
|
|
66
75
|
def handle_polling_done(_json_response, original_asset)
|
67
|
-
original_asset.
|
76
|
+
original_asset.delete_asset
|
68
77
|
end
|
69
78
|
|
70
79
|
def handle_polling_failed(json_response, original_asset)
|
71
|
-
original_asset.
|
80
|
+
original_asset.delete_asset
|
72
81
|
raise PollingError, "Document extraction failed: #{json_response['error']}"
|
73
82
|
end
|
74
83
|
|
75
84
|
def handle_polling_unexpected_status(json_response, original_asset)
|
76
|
-
original_asset.
|
85
|
+
original_asset.delete_asset
|
77
86
|
raise PollingError, "Unexpected status: #{json_response['status']}"
|
78
87
|
end
|
79
88
|
end
|
@@ -5,7 +5,7 @@ module PdfServices
|
|
5
5
|
class Internal < Operation
|
6
6
|
INTERNAL_OPTIONS = %i[output_format json_data_for_merge fragments notifiers].freeze
|
7
7
|
|
8
|
-
def execute(template_path, options = {})
|
8
|
+
def execute(template_path, options = {}, &block)
|
9
9
|
validate_options(options)
|
10
10
|
asset = upload_asset(template_path)
|
11
11
|
|
@@ -13,7 +13,7 @@ module PdfServices
|
|
13
13
|
body: request_body(asset.id, options),
|
14
14
|
headers: request_headers)
|
15
15
|
|
16
|
-
handle_response(response, asset
|
16
|
+
handle_response(response, asset, &block)
|
17
17
|
end
|
18
18
|
|
19
19
|
private
|
@@ -64,7 +64,7 @@ module PdfServices
|
|
64
64
|
raise ArgumentError, "Invalid options: #{invalid_keys}" unless invalid_keys.empty?
|
65
65
|
end
|
66
66
|
|
67
|
-
def handle_polling_done(json_response, original_asset)
|
67
|
+
def handle_polling_done(json_response, original_asset, &block)
|
68
68
|
asset_id = json_response['asset']['assetID']
|
69
69
|
file = Asset.new(@api).download(asset_id).body
|
70
70
|
super
|
@@ -9,15 +9,6 @@ module PdfServices
|
|
9
9
|
{ 'Content-Type' => 'application/json' }
|
10
10
|
end
|
11
11
|
|
12
|
-
def handle_response(response, asset_id)
|
13
|
-
unless response.status == 201
|
14
|
-
raise "Unexpected response status from document merge endpoint: #{response.status}, asset_id: #{asset_id}"
|
15
|
-
end
|
16
|
-
|
17
|
-
document_url = response.headers['location']
|
18
|
-
poll_document_result document_url, asset_id
|
19
|
-
end
|
20
|
-
|
21
12
|
def internal_class
|
22
13
|
Internal
|
23
14
|
end
|
@@ -8,7 +8,7 @@ module PdfServices
|
|
8
8
|
TABLE_OUTPUT_FORMATS = %w[csv xlsx].freeze
|
9
9
|
RENDITIONS_EXTRACTS = %w[tables figures].freeze
|
10
10
|
|
11
|
-
def execute(source_pdf_path = nil, options = {})
|
11
|
+
def execute(source_pdf_path = nil, options = {}, &block)
|
12
12
|
validate_options(options)
|
13
13
|
@download_zip = options.delete(:download_zip) || false
|
14
14
|
asset = upload_asset(source_pdf_path)
|
@@ -16,7 +16,7 @@ module PdfServices
|
|
16
16
|
response = @api.post(OPERATION_ENDPOINT,
|
17
17
|
body: extract_pdf_request_body(asset.id, options),
|
18
18
|
headers: extract_pdf_request_headers)
|
19
|
-
|
19
|
+
handle_response(response, asset, &block)
|
20
20
|
end
|
21
21
|
|
22
22
|
private
|
@@ -36,13 +36,6 @@ module PdfServices
|
|
36
36
|
{ 'Content-Type' => 'application/json' }
|
37
37
|
end
|
38
38
|
|
39
|
-
def handle_extract_pdf_response(response, asset)
|
40
|
-
raise OperationError, "Extract PDF operation failed: #{response.body}" unless response.status == 201
|
41
|
-
|
42
|
-
polling_url = response.headers['location']
|
43
|
-
poll_document_result polling_url, asset
|
44
|
-
end
|
45
|
-
|
46
39
|
def handle_polling_done(json_response, original_asset)
|
47
40
|
file_key = @download_zip ? 'resource' : 'content'
|
48
41
|
asset_id = json_response[file_key]['assetID']
|
@@ -6,15 +6,18 @@ module PdfServices
|
|
6
6
|
INTERNAL_OPTIONS = %i[input_url json include_header_footer page_layout notifiers].freeze
|
7
7
|
PAGE_LAYOUT_OPTIONS = %i[page_width page_height].freeze
|
8
8
|
|
9
|
-
def execute(html_file_path, options = {})
|
10
|
-
validate_options(options)
|
11
|
-
|
9
|
+
def execute(html_file_path, options = {}, &block)
|
10
|
+
validate_options(options, html_file_path)
|
11
|
+
|
12
|
+
asset = upload_asset(html_file_path) unless options[:input_url]
|
13
|
+
|
14
|
+
asset_id = asset.id if asset
|
12
15
|
|
13
16
|
response = @api.post(OPERATION_ENDPOINT,
|
14
|
-
body: request_body(
|
17
|
+
body: request_body(asset_id, options),
|
15
18
|
headers: request_headers)
|
16
19
|
|
17
|
-
handle_response(response, asset
|
20
|
+
handle_response(response, asset, &block)
|
18
21
|
end
|
19
22
|
|
20
23
|
private
|
@@ -24,13 +27,13 @@ module PdfServices
|
|
24
27
|
Asset.new(@api).download(asset_id).body
|
25
28
|
end
|
26
29
|
|
27
|
-
def request_body(asset_id, options)
|
28
|
-
body = {
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
30
|
+
def request_body(asset_id, options) # rubocop:disable Metrics/AbcSize
|
31
|
+
body = {}
|
32
|
+
body[:includeHeaderFooter] = options[:include_header_footer] if options[:include_header_footer]
|
33
|
+
body[:pageLayout] = options[:page_layout] if options[:page_layout]
|
34
|
+
body[:json] = transform_json(options[:json]) if options[:json]
|
35
|
+
body[:assetID] = asset_id if asset_id
|
36
|
+
body[:inputUrl] = options[:input_url] if options[:input_url]
|
34
37
|
body[:notifiers] = options[:notifiers] if options[:notifiers]
|
35
38
|
body
|
36
39
|
end
|
@@ -39,22 +42,19 @@ module PdfServices
|
|
39
42
|
json.is_a?(String) ? json : json.to_json
|
40
43
|
end
|
41
44
|
|
42
|
-
def validate_options(options)
|
45
|
+
def validate_options(options, source = nil)
|
43
46
|
raise ArgumentError, 'Invalid options' unless options.is_a?(Hash)
|
44
47
|
|
45
48
|
options.each_key do |key|
|
46
49
|
raise ArgumentError, "Invalid option: #{key}" unless INTERNAL_OPTIONS.include?(key)
|
47
50
|
end
|
48
51
|
|
49
|
-
|
52
|
+
validate_source(source, options)
|
50
53
|
validate_page_layout_options(options[:page_layout]) if options[:page_layout]
|
51
54
|
end
|
52
55
|
|
53
|
-
def
|
54
|
-
|
55
|
-
required_keys.each do |key|
|
56
|
-
raise ArgumentError, "Missing required option: #{key}" unless options.key?(key)
|
57
|
-
end
|
56
|
+
def validate_source(source, options)
|
57
|
+
raise OperationError, "Cannot specify both 'input_url' and a HTML file" if options[:input_url] && source
|
58
58
|
end
|
59
59
|
|
60
60
|
def validate_page_layout_options(options)
|
@@ -9,15 +9,6 @@ module PdfServices
|
|
9
9
|
{ 'Content-Type' => 'application/json' }
|
10
10
|
end
|
11
11
|
|
12
|
-
def handle_response(response, asset_id)
|
13
|
-
unless response.status == 201
|
14
|
-
raise "Unexpected response status from document merge endpoint: #{response.status}, asset_id: #{asset_id}"
|
15
|
-
end
|
16
|
-
|
17
|
-
document_url = response.headers['location']
|
18
|
-
poll_document_result document_url, asset_id
|
19
|
-
end
|
20
|
-
|
21
12
|
def internal_class
|
22
13
|
Internal
|
23
14
|
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
module PdfServices
|
2
2
|
module InternalExternalOperation
|
3
3
|
class Operation < Base::Operation
|
4
|
-
def execute(source_file_path, options = {})
|
4
|
+
def execute(source_file_path, options = {}, &block)
|
5
5
|
operation_class = switch_on_type(options)
|
6
|
-
operation_class.new(@api).execute(source_file_path, options)
|
6
|
+
operation_class.new(@api).execute(source_file_path, options, &block)
|
7
7
|
end
|
8
8
|
|
9
9
|
private
|
@@ -5,21 +5,21 @@ module PdfServices
|
|
5
5
|
class Internal < Operation
|
6
6
|
INTERNAL_OPTIONS = %i[ocr_lang ocr_type notifiers].freeze
|
7
7
|
|
8
|
-
def execute(
|
8
|
+
def execute(source_pdf, options = {}, &block)
|
9
9
|
validate_options(options)
|
10
|
-
asset = upload_asset(
|
10
|
+
asset = upload_asset(source_pdf)
|
11
11
|
|
12
12
|
response = @api.post(OPERATION_ENDPOINT,
|
13
|
-
body: request_body(asset.id, options),
|
14
|
-
headers: request_headers)
|
13
|
+
body: request_body(asset.id, options), headers: { 'Content-Type' => 'application/json' })
|
15
14
|
|
16
|
-
handle_response(response, asset
|
15
|
+
handle_response(response, asset, &block)
|
17
16
|
end
|
18
17
|
|
19
18
|
private
|
20
19
|
|
21
|
-
def handle_polling_done(json_response,
|
20
|
+
def handle_polling_done(json_response, _original_asset)
|
22
21
|
asset_id = json_response['asset']['assetID']
|
22
|
+
super
|
23
23
|
Asset.new(@api).download(asset_id).body
|
24
24
|
end
|
25
25
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module PdfServices
|
4
4
|
module Ocr
|
5
5
|
class Operation < InternalExternalOperation::Operation
|
6
|
-
|
6
|
+
OPERATION_ENDPOINT = "#{BASE_ENDPOINT}ocr".freeze
|
7
7
|
OCR_LANGS = %w[
|
8
8
|
da-DK lt-LT sl-SI el-GR ru-RU en-US zh-HK hu-HU et-EE
|
9
9
|
pt-BR uk-UA nb-NO pl-PL lv-LV fi-FI ja-JP es-ES bg-BG
|
@@ -16,6 +16,14 @@ module PdfServices
|
|
16
16
|
|
17
17
|
private
|
18
18
|
|
19
|
+
def internal_class
|
20
|
+
Internal
|
21
|
+
end
|
22
|
+
|
23
|
+
def external_class
|
24
|
+
External
|
25
|
+
end
|
26
|
+
|
19
27
|
def validate_ocr_lang_option(ocr_lang)
|
20
28
|
raise ArgumentError, "Invalid ocr_lang option: #{ocr_lang}" unless OCR_LANGS.include?(ocr_lang)
|
21
29
|
end
|
data/lib/pdfservices/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: adobe_pdfservices_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jimmy Bosse
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-01-
|
12
|
+
date: 2024-01-29 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: faraday
|