adobe_pdfservices_ruby 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +20 -1
- data/README.md +6 -6
- data/adobe_pdfservices_ruby.gemspec +3 -3
- data/lib/adobe_pdfservices_ruby.rb +4 -0
- data/lib/pdfservices/api.rb +1 -1
- data/lib/pdfservices/asset.rb +8 -4
- data/lib/pdfservices/client.rb +0 -4
- data/lib/pdfservices/operations/base.rb +12 -3
- data/lib/pdfservices/operations/document_generation/internal.rb +3 -3
- data/lib/pdfservices/operations/document_generation.rb +0 -9
- data/lib/pdfservices/operations/extract_pdf.rb +2 -9
- data/lib/pdfservices/operations/html_to_pdf/internal.rb +19 -19
- data/lib/pdfservices/operations/html_to_pdf.rb +0 -9
- data/lib/pdfservices/operations/internal_external_operation.rb +2 -2
- data/lib/pdfservices/operations/ocr/internal.rb +6 -6
- data/lib/pdfservices/operations/ocr.rb +9 -1
- data/lib/pdfservices/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 88fb3c5c2291299008c81f86ec8017aa56d61f052f2b102bd26ac0957dad83bc
|
4
|
+
data.tar.gz: 40cae430768bb8e7de397e16d28a773bc6ad46d6f9e7463ac578ad8ea84b32a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4bbbfff961a25af5bbb585f298a082468c477922814b09345b2bab7ccb5bbaa896e9fd8e2ed5f0311e609ed63ea3287200b42a40f19e1594bf06062765eee93d
|
7
|
+
data.tar.gz: 3e9ab8906517b6946b8d1ef25129e3bc96e1263079909f0ed96defcbca3335ff996719044fbab8da70acaa3ffe1c5ae523f72bc7fd12a206ef448b32c8e1d0e1
|
data/CHANGELOG.md
CHANGED
@@ -11,4 +11,23 @@
|
|
11
11
|
- Change usage to use a single client object
|
12
12
|
- EXPERIMENTAL: Add support for internal and external operations (OCR, htmltopdf, documentgeneration)
|
13
13
|
- Update tests
|
14
|
-
- Update README
|
14
|
+
- Update README
|
15
|
+
|
16
|
+
## [0.1.2] - 2024-01-25
|
17
|
+
|
18
|
+
- Update URL's in gemspec
|
19
|
+
- Fix misnamed parameters in extract_pdf operation
|
20
|
+
- Remove multiple authorization headers for pre-signed URL's
|
21
|
+
- Remove push_host from gemspec
|
22
|
+
- Allow client to be initialized with no secret_key if there's an access_token provided (useful for development)
|
23
|
+
- Asset delete request is properly formed
|
24
|
+
- Use MimeMagic to determine content-type of files
|
25
|
+
|
26
|
+
## [0.1.3] - 2024-02-01
|
27
|
+
|
28
|
+
- Fix incorrect URLs in README
|
29
|
+
- Fix incorrect URLs in gemspec
|
30
|
+
- Add support for document generation via spike
|
31
|
+
- Fix passed blocks not being called for most operations
|
32
|
+
- Add support for html_to_pdf operation via spike
|
33
|
+
- Add support for OCR operation via spike
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# PDF Services for Ruby
|
2
2
|
|
3
|
-
### Originally forked from [Adobe Document Services PDF Tools SDK for Ruby](https://github.com/arpc/
|
3
|
+
### Originally forked from [Adobe Document Services PDF Tools SDK for Ruby](https://github.com/arpc/pdfservices-ruby-sdk)
|
4
4
|
|
5
5
|
This gem provides a Ruby wrapper for the [Adobe PDF Services API](https://developer.adobe.com/document-services/docs/overview/). It allows you to perform various PDF operations, such as extracting content from PDFs, OCR, HTML to PDF, and document generation.
|
6
6
|
|
@@ -58,13 +58,13 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
58
58
|
Work is in progress on getting the gem to support all of the operations available in the [Adobe PDF Services API](https://developer.adobe.com/document-services/docs/overview/). Below is a list of the operations and their current support status:
|
59
59
|
|
60
60
|
- ✅ Extract PDF
|
61
|
-
-
|
62
|
-
-
|
61
|
+
- ✅ OCR
|
62
|
+
- ✅ Internal
|
63
63
|
- ❗ EXPERIMENTAL: External
|
64
|
-
-
|
65
|
-
-
|
64
|
+
- ✅ HTML to PDF
|
65
|
+
- ✅ Internal
|
66
66
|
- ❗ EXPERIMENTAL: External
|
67
|
-
-
|
67
|
+
- ✅ Document Generation:
|
68
68
|
- ✅ Internal
|
69
69
|
- ❗ EXPERIMENTAL: External
|
70
70
|
|
@@ -10,13 +10,13 @@ Gem::Specification.new do |spec|
|
|
10
10
|
|
11
11
|
spec.summary = 'Adobe PDF Services Ruby'
|
12
12
|
spec.description = 'An Adobe PDF Services Ruby SDK provides APIs for creating, combining, exporting and manipulating PDFs.'
|
13
|
-
spec.homepage = 'https://github.com/benterova/
|
13
|
+
spec.homepage = 'https://github.com/benterova/adobe-pdfservices-ruby/blob/main/README.md'
|
14
14
|
spec.license = 'MIT'
|
15
15
|
spec.required_ruby_version = '>= 3.0.0'
|
16
16
|
|
17
17
|
spec.metadata['homepage_uri'] = spec.homepage
|
18
|
-
spec.metadata['source_code_uri'] = 'https://github.com/benterova/
|
19
|
-
spec.metadata['changelog_uri'] = 'https://github.com/benterova/
|
18
|
+
spec.metadata['source_code_uri'] = 'https://github.com/benterova/adobe-pdfservices-ruby'
|
19
|
+
spec.metadata['changelog_uri'] = 'https://github.com/benterova/adobe-pdfservices-ruby/blob/main/CHANGELOG.md'
|
20
20
|
|
21
21
|
# Specify which files should be added to the gem when it is released.
|
22
22
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
@@ -25,9 +25,13 @@ require_relative 'pdfservices/operations/document_generation/external'
|
|
25
25
|
|
26
26
|
# OCR
|
27
27
|
require_relative 'pdfservices/operations/ocr'
|
28
|
+
require_relative 'pdfservices/operations/ocr/internal'
|
29
|
+
require_relative 'pdfservices/operations/ocr/external'
|
28
30
|
|
29
31
|
# HTML to PDF
|
30
32
|
require_relative 'pdfservices/operations/html_to_pdf'
|
33
|
+
require_relative 'pdfservices/operations/html_to_pdf/internal'
|
34
|
+
require_relative 'pdfservices/operations/html_to_pdf/external'
|
31
35
|
|
32
36
|
# Extract PDF
|
33
37
|
require_relative 'pdfservices/operations/extract_pdf'
|
data/lib/pdfservices/api.rb
CHANGED
data/lib/pdfservices/asset.rb
CHANGED
@@ -5,14 +5,17 @@ module PdfServices
|
|
5
5
|
attr_reader :id
|
6
6
|
|
7
7
|
def initialize(api, id = nil)
|
8
|
+
# MimeMagic can't detect docx files, and will return `application/zip` so we need to add it manually
|
9
|
+
MimeMagic.add('application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
10
|
+
magic: [[0..2000, 'word/']])
|
8
11
|
raise ArgumentError, 'Api is nil' unless api
|
9
12
|
|
10
13
|
@api = api
|
11
14
|
@id = id
|
12
15
|
end
|
13
16
|
|
14
|
-
def upload(file
|
15
|
-
url = presigned_url(
|
17
|
+
def upload(file)
|
18
|
+
url = presigned_url(file:)
|
16
19
|
upload_uri = url['uploadUri']
|
17
20
|
asset_id = url['assetID']
|
18
21
|
|
@@ -39,7 +42,7 @@ module PdfServices
|
|
39
42
|
@api.get(download_uri)
|
40
43
|
end
|
41
44
|
|
42
|
-
def
|
45
|
+
def delete_asset
|
43
46
|
raise AssetError, 'Asset ID is nil' unless @id
|
44
47
|
|
45
48
|
@api.delete("#{ASSETS_ENDPOINT}/#{@id}")
|
@@ -54,9 +57,10 @@ module PdfServices
|
|
54
57
|
}
|
55
58
|
end
|
56
59
|
|
57
|
-
def presigned_url(operation = :upload,
|
60
|
+
def presigned_url(operation = :upload, file: nil)
|
58
61
|
case operation
|
59
62
|
when :upload
|
63
|
+
media_type = file ? MimeMagic.by_magic(file).type : 'application/pdf'
|
60
64
|
response = @api.post(ASSETS_ENDPOINT, body: { mediaType: media_type },
|
61
65
|
headers: { 'Content-Type' => 'application/json' })
|
62
66
|
when :download
|
data/lib/pdfservices/client.rb
CHANGED
@@ -53,10 +53,6 @@ module PdfServices
|
|
53
53
|
client_id: @client_id,
|
54
54
|
client_secret: @client_secret
|
55
55
|
}
|
56
|
-
unless response.status == 200
|
57
|
-
raise ClientError,
|
58
|
-
"Something went wrong when trying to refresh the token: #{response.body}"
|
59
|
-
end
|
60
56
|
end
|
61
57
|
|
62
58
|
raise "Token refresh error: #{response.status} - #{response.body}" unless response.status == 200
|
@@ -26,6 +26,15 @@ module PdfServices
|
|
26
26
|
handle_polling_result(url, json_response, original_asset, &block)
|
27
27
|
end
|
28
28
|
|
29
|
+
def handle_response(response, asset, &block)
|
30
|
+
unless response.status == 201
|
31
|
+
raise "Unexpected response status from operation endpoint: #{response.status}, #{response.body}"
|
32
|
+
end
|
33
|
+
|
34
|
+
document_url = response.headers['location']
|
35
|
+
poll_document_result document_url, asset, &block
|
36
|
+
end
|
37
|
+
|
29
38
|
private
|
30
39
|
|
31
40
|
def handle_polling_result(url, json_response, original_asset, &block)
|
@@ -64,16 +73,16 @@ module PdfServices
|
|
64
73
|
end
|
65
74
|
|
66
75
|
def handle_polling_done(_json_response, original_asset)
|
67
|
-
original_asset.
|
76
|
+
original_asset.delete_asset
|
68
77
|
end
|
69
78
|
|
70
79
|
def handle_polling_failed(json_response, original_asset)
|
71
|
-
original_asset.
|
80
|
+
original_asset.delete_asset
|
72
81
|
raise PollingError, "Document extraction failed: #{json_response['error']}"
|
73
82
|
end
|
74
83
|
|
75
84
|
def handle_polling_unexpected_status(json_response, original_asset)
|
76
|
-
original_asset.
|
85
|
+
original_asset.delete_asset
|
77
86
|
raise PollingError, "Unexpected status: #{json_response['status']}"
|
78
87
|
end
|
79
88
|
end
|
@@ -5,7 +5,7 @@ module PdfServices
|
|
5
5
|
class Internal < Operation
|
6
6
|
INTERNAL_OPTIONS = %i[output_format json_data_for_merge fragments notifiers].freeze
|
7
7
|
|
8
|
-
def execute(template_path, options = {})
|
8
|
+
def execute(template_path, options = {}, &block)
|
9
9
|
validate_options(options)
|
10
10
|
asset = upload_asset(template_path)
|
11
11
|
|
@@ -13,7 +13,7 @@ module PdfServices
|
|
13
13
|
body: request_body(asset.id, options),
|
14
14
|
headers: request_headers)
|
15
15
|
|
16
|
-
handle_response(response, asset
|
16
|
+
handle_response(response, asset, &block)
|
17
17
|
end
|
18
18
|
|
19
19
|
private
|
@@ -64,7 +64,7 @@ module PdfServices
|
|
64
64
|
raise ArgumentError, "Invalid options: #{invalid_keys}" unless invalid_keys.empty?
|
65
65
|
end
|
66
66
|
|
67
|
-
def handle_polling_done(json_response, original_asset)
|
67
|
+
def handle_polling_done(json_response, original_asset, &block)
|
68
68
|
asset_id = json_response['asset']['assetID']
|
69
69
|
file = Asset.new(@api).download(asset_id).body
|
70
70
|
super
|
@@ -9,15 +9,6 @@ module PdfServices
|
|
9
9
|
{ 'Content-Type' => 'application/json' }
|
10
10
|
end
|
11
11
|
|
12
|
-
def handle_response(response, asset_id)
|
13
|
-
unless response.status == 201
|
14
|
-
raise "Unexpected response status from document merge endpoint: #{response.status}, asset_id: #{asset_id}"
|
15
|
-
end
|
16
|
-
|
17
|
-
document_url = response.headers['location']
|
18
|
-
poll_document_result document_url, asset_id
|
19
|
-
end
|
20
|
-
|
21
12
|
def internal_class
|
22
13
|
Internal
|
23
14
|
end
|
@@ -8,7 +8,7 @@ module PdfServices
|
|
8
8
|
TABLE_OUTPUT_FORMATS = %w[csv xlsx].freeze
|
9
9
|
RENDITIONS_EXTRACTS = %w[tables figures].freeze
|
10
10
|
|
11
|
-
def execute(source_pdf_path = nil, options = {})
|
11
|
+
def execute(source_pdf_path = nil, options = {}, &block)
|
12
12
|
validate_options(options)
|
13
13
|
@download_zip = options.delete(:download_zip) || false
|
14
14
|
asset = upload_asset(source_pdf_path)
|
@@ -16,7 +16,7 @@ module PdfServices
|
|
16
16
|
response = @api.post(OPERATION_ENDPOINT,
|
17
17
|
body: extract_pdf_request_body(asset.id, options),
|
18
18
|
headers: extract_pdf_request_headers)
|
19
|
-
|
19
|
+
handle_response(response, asset, &block)
|
20
20
|
end
|
21
21
|
|
22
22
|
private
|
@@ -36,13 +36,6 @@ module PdfServices
|
|
36
36
|
{ 'Content-Type' => 'application/json' }
|
37
37
|
end
|
38
38
|
|
39
|
-
def handle_extract_pdf_response(response, asset)
|
40
|
-
raise OperationError, "Extract PDF operation failed: #{response.body}" unless response.status == 201
|
41
|
-
|
42
|
-
polling_url = response.headers['location']
|
43
|
-
poll_document_result polling_url, asset
|
44
|
-
end
|
45
|
-
|
46
39
|
def handle_polling_done(json_response, original_asset)
|
47
40
|
file_key = @download_zip ? 'resource' : 'content'
|
48
41
|
asset_id = json_response[file_key]['assetID']
|
@@ -6,15 +6,18 @@ module PdfServices
|
|
6
6
|
INTERNAL_OPTIONS = %i[input_url json include_header_footer page_layout notifiers].freeze
|
7
7
|
PAGE_LAYOUT_OPTIONS = %i[page_width page_height].freeze
|
8
8
|
|
9
|
-
def execute(html_file_path, options = {})
|
10
|
-
validate_options(options)
|
11
|
-
|
9
|
+
def execute(html_file_path, options = {}, &block)
|
10
|
+
validate_options(options, html_file_path)
|
11
|
+
|
12
|
+
asset = upload_asset(html_file_path) unless options[:input_url]
|
13
|
+
|
14
|
+
asset_id = asset.id if asset
|
12
15
|
|
13
16
|
response = @api.post(OPERATION_ENDPOINT,
|
14
|
-
body: request_body(
|
17
|
+
body: request_body(asset_id, options),
|
15
18
|
headers: request_headers)
|
16
19
|
|
17
|
-
handle_response(response, asset
|
20
|
+
handle_response(response, asset, &block)
|
18
21
|
end
|
19
22
|
|
20
23
|
private
|
@@ -24,13 +27,13 @@ module PdfServices
|
|
24
27
|
Asset.new(@api).download(asset_id).body
|
25
28
|
end
|
26
29
|
|
27
|
-
def request_body(asset_id, options)
|
28
|
-
body = {
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
30
|
+
def request_body(asset_id, options) # rubocop:disable Metrics/AbcSize
|
31
|
+
body = {}
|
32
|
+
body[:includeHeaderFooter] = options[:include_header_footer] if options[:include_header_footer]
|
33
|
+
body[:pageLayout] = options[:page_layout] if options[:page_layout]
|
34
|
+
body[:json] = transform_json(options[:json]) if options[:json]
|
35
|
+
body[:assetID] = asset_id if asset_id
|
36
|
+
body[:inputUrl] = options[:input_url] if options[:input_url]
|
34
37
|
body[:notifiers] = options[:notifiers] if options[:notifiers]
|
35
38
|
body
|
36
39
|
end
|
@@ -39,22 +42,19 @@ module PdfServices
|
|
39
42
|
json.is_a?(String) ? json : json.to_json
|
40
43
|
end
|
41
44
|
|
42
|
-
def validate_options(options)
|
45
|
+
def validate_options(options, source = nil)
|
43
46
|
raise ArgumentError, 'Invalid options' unless options.is_a?(Hash)
|
44
47
|
|
45
48
|
options.each_key do |key|
|
46
49
|
raise ArgumentError, "Invalid option: #{key}" unless INTERNAL_OPTIONS.include?(key)
|
47
50
|
end
|
48
51
|
|
49
|
-
|
52
|
+
validate_source(source, options)
|
50
53
|
validate_page_layout_options(options[:page_layout]) if options[:page_layout]
|
51
54
|
end
|
52
55
|
|
53
|
-
def
|
54
|
-
|
55
|
-
required_keys.each do |key|
|
56
|
-
raise ArgumentError, "Missing required option: #{key}" unless options.key?(key)
|
57
|
-
end
|
56
|
+
def validate_source(source, options)
|
57
|
+
raise OperationError, "Cannot specify both 'input_url' and a HTML file" if options[:input_url] && source
|
58
58
|
end
|
59
59
|
|
60
60
|
def validate_page_layout_options(options)
|
@@ -9,15 +9,6 @@ module PdfServices
|
|
9
9
|
{ 'Content-Type' => 'application/json' }
|
10
10
|
end
|
11
11
|
|
12
|
-
def handle_response(response, asset_id)
|
13
|
-
unless response.status == 201
|
14
|
-
raise "Unexpected response status from document merge endpoint: #{response.status}, asset_id: #{asset_id}"
|
15
|
-
end
|
16
|
-
|
17
|
-
document_url = response.headers['location']
|
18
|
-
poll_document_result document_url, asset_id
|
19
|
-
end
|
20
|
-
|
21
12
|
def internal_class
|
22
13
|
Internal
|
23
14
|
end
|
@@ -1,9 +1,9 @@
|
|
1
1
|
module PdfServices
|
2
2
|
module InternalExternalOperation
|
3
3
|
class Operation < Base::Operation
|
4
|
-
def execute(source_file_path, options = {})
|
4
|
+
def execute(source_file_path, options = {}, &block)
|
5
5
|
operation_class = switch_on_type(options)
|
6
|
-
operation_class.new(@api).execute(source_file_path, options)
|
6
|
+
operation_class.new(@api).execute(source_file_path, options, &block)
|
7
7
|
end
|
8
8
|
|
9
9
|
private
|
@@ -5,21 +5,21 @@ module PdfServices
|
|
5
5
|
class Internal < Operation
|
6
6
|
INTERNAL_OPTIONS = %i[ocr_lang ocr_type notifiers].freeze
|
7
7
|
|
8
|
-
def execute(
|
8
|
+
def execute(source_pdf, options = {}, &block)
|
9
9
|
validate_options(options)
|
10
|
-
asset = upload_asset(
|
10
|
+
asset = upload_asset(source_pdf)
|
11
11
|
|
12
12
|
response = @api.post(OPERATION_ENDPOINT,
|
13
|
-
body: request_body(asset.id, options),
|
14
|
-
headers: request_headers)
|
13
|
+
body: request_body(asset.id, options), headers: { 'Content-Type' => 'application/json' })
|
15
14
|
|
16
|
-
handle_response(response, asset
|
15
|
+
handle_response(response, asset, &block)
|
17
16
|
end
|
18
17
|
|
19
18
|
private
|
20
19
|
|
21
|
-
def handle_polling_done(json_response,
|
20
|
+
def handle_polling_done(json_response, _original_asset)
|
22
21
|
asset_id = json_response['asset']['assetID']
|
22
|
+
super
|
23
23
|
Asset.new(@api).download(asset_id).body
|
24
24
|
end
|
25
25
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
module PdfServices
|
4
4
|
module Ocr
|
5
5
|
class Operation < InternalExternalOperation::Operation
|
6
|
-
|
6
|
+
OPERATION_ENDPOINT = "#{BASE_ENDPOINT}ocr".freeze
|
7
7
|
OCR_LANGS = %w[
|
8
8
|
da-DK lt-LT sl-SI el-GR ru-RU en-US zh-HK hu-HU et-EE
|
9
9
|
pt-BR uk-UA nb-NO pl-PL lv-LV fi-FI ja-JP es-ES bg-BG
|
@@ -16,6 +16,14 @@ module PdfServices
|
|
16
16
|
|
17
17
|
private
|
18
18
|
|
19
|
+
def internal_class
|
20
|
+
Internal
|
21
|
+
end
|
22
|
+
|
23
|
+
def external_class
|
24
|
+
External
|
25
|
+
end
|
26
|
+
|
19
27
|
def validate_ocr_lang_option(ocr_lang)
|
20
28
|
raise ArgumentError, "Invalid ocr_lang option: #{ocr_lang}" unless OCR_LANGS.include?(ocr_lang)
|
21
29
|
end
|
data/lib/pdfservices/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: adobe_pdfservices_ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jimmy Bosse
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2024-01-
|
12
|
+
date: 2024-01-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: faraday
|
@@ -91,13 +91,13 @@ files:
|
|
91
91
|
- lib/pdfservices/operations/ocr/external.rb
|
92
92
|
- lib/pdfservices/operations/ocr/internal.rb
|
93
93
|
- lib/pdfservices/version.rb
|
94
|
-
homepage: https://github.com/benterova/
|
94
|
+
homepage: https://github.com/benterova/adobe-pdfservices-ruby/blob/main/README.md
|
95
95
|
licenses:
|
96
96
|
- MIT
|
97
97
|
metadata:
|
98
|
-
homepage_uri: https://github.com/benterova/
|
99
|
-
source_code_uri: https://github.com/benterova/
|
100
|
-
changelog_uri: https://github.com/benterova/
|
98
|
+
homepage_uri: https://github.com/benterova/adobe-pdfservices-ruby/blob/main/README.md
|
99
|
+
source_code_uri: https://github.com/benterova/adobe-pdfservices-ruby
|
100
|
+
changelog_uri: https://github.com/benterova/adobe-pdfservices-ruby/blob/main/CHANGELOG.md
|
101
101
|
post_install_message:
|
102
102
|
rdoc_options: []
|
103
103
|
require_paths:
|