adobe_pdfservices_ruby 0.1.1 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 897432d0fa7438d335b7ad8d890a55be8acde8246438b8f19d6cb777e99165e2
4
- data.tar.gz: 3243b85b5629490c72938ce5f9f16681a6ad337a1114498a1f3bac327a5ec422
3
+ metadata.gz: 88fb3c5c2291299008c81f86ec8017aa56d61f052f2b102bd26ac0957dad83bc
4
+ data.tar.gz: 40cae430768bb8e7de397e16d28a773bc6ad46d6f9e7463ac578ad8ea84b32a3
5
5
  SHA512:
6
- metadata.gz: d180ada893c3c9204f098446594b2e4a48933941c2e1c0118ec3503d4968f2c712d4c3936c80c7ff79e22695292723640f39a19f723453f201f8182c411a62cb
7
- data.tar.gz: ed6ff19b9ccf5e44aefa4936e0d6ddae8107fd7901e9e62d4d7fd0b55c24ad06707143e054f269ff50fe7c82501a413dc5f0668d690d3464cd393ef3725a1abe
6
+ metadata.gz: 4bbbfff961a25af5bbb585f298a082468c477922814b09345b2bab7ccb5bbaa896e9fd8e2ed5f0311e609ed63ea3287200b42a40f19e1594bf06062765eee93d
7
+ data.tar.gz: 3e9ab8906517b6946b8d1ef25129e3bc96e1263079909f0ed96defcbca3335ff996719044fbab8da70acaa3ffe1c5ae523f72bc7fd12a206ef448b32c8e1d0e1
data/CHANGELOG.md CHANGED
@@ -11,4 +11,23 @@
11
11
  - Change usage to use a single client object
12
12
  - EXPERIMENTAL: Add support for internal and external operations (OCR, htmltopdf, documentgeneration)
13
13
  - Update tests
14
- - Update README
14
+ - Update README
15
+
16
+ ## [0.1.2] - 2024-01-25
17
+
18
+ - Update URL's in gemspec
19
+ - Fix misnamed parameters in extract_pdf operation
20
+ - Remove multiple authorization headers for pre-signed URL's
21
+ - Remove push_host from gemspec
22
+ - Allow client to be initialized with no secret_key if there's an access_token provided (useful for development)
23
+ - Asset delete request is properly formed
24
+ - Use MimeMagic to determine content-type of files
25
+
26
+ ## [0.1.3] - 2024-02-01
27
+
28
+ - Fix incorrect URLs in README
29
+ - Fix incorrect URLs in gemspec
30
+ - Add support for document generation via spike
31
+ - Fix passed blocks not being called for most operations
32
+ - Add support for html_to_pdf operation via spike
33
+ - Add support for OCR operation via spike
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # PDF Services for Ruby
2
2
 
3
- ### Originally forked from [Adobe Document Services PDF Tools SDK for Ruby](https://github.com/arpc/adobe_pdfservices_ruby)
3
+ ### Originally forked from [Adobe Document Services PDF Tools SDK for Ruby](https://github.com/arpc/pdfservices-ruby-sdk)
4
4
 
5
5
  This gem provides a Ruby wrapper for the [Adobe PDF Services API](https://developer.adobe.com/document-services/docs/overview/). It allows you to perform various PDF operations, such as extracting content from PDFs, OCR, HTML to PDF, and document generation.
6
6
 
@@ -58,13 +58,13 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
58
58
  Work is in progress on getting the gem to support all of the operations available in the [Adobe PDF Services API](https://developer.adobe.com/document-services/docs/overview/). Below is a list of the operations and their current support status:
59
59
 
60
60
  - ✅ Extract PDF
61
- - OCR
62
- - EXPERIMENTAL: Internal
61
+ - OCR
62
+ - Internal
63
63
  - ❗ EXPERIMENTAL: External
64
- - HTML to PDF
65
- - EXPERIMENTAL: Internal
64
+ - HTML to PDF
65
+ - Internal
66
66
  - ❗ EXPERIMENTAL: External
67
- - Document Generation:
67
+ - Document Generation:
68
68
  - ✅ Internal
69
69
  - ❗ EXPERIMENTAL: External
70
70
 
@@ -10,13 +10,13 @@ Gem::Specification.new do |spec|
10
10
 
11
11
  spec.summary = 'Adobe PDF Services Ruby'
12
12
  spec.description = 'An Adobe PDF Services Ruby SDK provides APIs for creating, combining, exporting and manipulating PDFs.'
13
- spec.homepage = 'https://github.com/benterova/adobe_pdfservices_ruby/blob/main/README.md'
13
+ spec.homepage = 'https://github.com/benterova/adobe-pdfservices-ruby/blob/main/README.md'
14
14
  spec.license = 'MIT'
15
15
  spec.required_ruby_version = '>= 3.0.0'
16
16
 
17
17
  spec.metadata['homepage_uri'] = spec.homepage
18
- spec.metadata['source_code_uri'] = 'https://github.com/benterova/adobe_pdfservices_ruby'
19
- spec.metadata['changelog_uri'] = 'https://github.com/benterova/adobe_pdfservices_ruby/blob/main/CHANGELOG.md'
18
+ spec.metadata['source_code_uri'] = 'https://github.com/benterova/adobe-pdfservices-ruby'
19
+ spec.metadata['changelog_uri'] = 'https://github.com/benterova/adobe-pdfservices-ruby/blob/main/CHANGELOG.md'
20
20
 
21
21
  # Specify which files should be added to the gem when it is released.
22
22
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
@@ -25,9 +25,13 @@ require_relative 'pdfservices/operations/document_generation/external'
25
25
 
26
26
  # OCR
27
27
  require_relative 'pdfservices/operations/ocr'
28
+ require_relative 'pdfservices/operations/ocr/internal'
29
+ require_relative 'pdfservices/operations/ocr/external'
28
30
 
29
31
  # HTML to PDF
30
32
  require_relative 'pdfservices/operations/html_to_pdf'
33
+ require_relative 'pdfservices/operations/html_to_pdf/internal'
34
+ require_relative 'pdfservices/operations/html_to_pdf/external'
31
35
 
32
36
  # Extract PDF
33
37
  require_relative 'pdfservices/operations/extract_pdf'
@@ -37,7 +37,7 @@ module PdfServices
37
37
 
38
38
  def delete(url, headers: {})
39
39
  response = @connection.delete(url) do |req|
40
- build_headers(req, headers)
40
+ build_request(req, headers, nil)
41
41
  end
42
42
  handle_response(response)
43
43
  end
@@ -5,14 +5,17 @@ module PdfServices
5
5
  attr_reader :id
6
6
 
7
7
  def initialize(api, id = nil)
8
+ # MimeMagic can't detect docx files, and will return `application/zip` so we need to add it manually
9
+ MimeMagic.add('application/vnd.openxmlformats-officedocument.wordprocessingml.document',
10
+ magic: [[0..2000, 'word/']])
8
11
  raise ArgumentError, 'Api is nil' unless api
9
12
 
10
13
  @api = api
11
14
  @id = id
12
15
  end
13
16
 
14
- def upload(file, media_type: 'application/pdf')
15
- url = presigned_url(media_type:)
17
+ def upload(file)
18
+ url = presigned_url(file:)
16
19
  upload_uri = url['uploadUri']
17
20
  asset_id = url['assetID']
18
21
 
@@ -39,7 +42,7 @@ module PdfServices
39
42
  @api.get(download_uri)
40
43
  end
41
44
 
42
- def delete
45
+ def delete_asset
43
46
  raise AssetError, 'Asset ID is nil' unless @id
44
47
 
45
48
  @api.delete("#{ASSETS_ENDPOINT}/#{@id}")
@@ -54,9 +57,10 @@ module PdfServices
54
57
  }
55
58
  end
56
59
 
57
- def presigned_url(operation = :upload, media_type: 'application/pdf')
60
+ def presigned_url(operation = :upload, file: nil)
58
61
  case operation
59
62
  when :upload
63
+ media_type = file ? MimeMagic.by_magic(file).type : 'application/pdf'
60
64
  response = @api.post(ASSETS_ENDPOINT, body: { mediaType: media_type },
61
65
  headers: { 'Content-Type' => 'application/json' })
62
66
  when :download
@@ -53,10 +53,6 @@ module PdfServices
53
53
  client_id: @client_id,
54
54
  client_secret: @client_secret
55
55
  }
56
- unless response.status == 200
57
- raise ClientError,
58
- "Something went wrong when trying to refresh the token: #{response.body}"
59
- end
60
56
  end
61
57
 
62
58
  raise "Token refresh error: #{response.status} - #{response.body}" unless response.status == 200
@@ -26,6 +26,15 @@ module PdfServices
26
26
  handle_polling_result(url, json_response, original_asset, &block)
27
27
  end
28
28
 
29
+ def handle_response(response, asset, &block)
30
+ unless response.status == 201
31
+ raise "Unexpected response status from operation endpoint: #{response.status}, #{response.body}"
32
+ end
33
+
34
+ document_url = response.headers['location']
35
+ poll_document_result document_url, asset, &block
36
+ end
37
+
29
38
  private
30
39
 
31
40
  def handle_polling_result(url, json_response, original_asset, &block)
@@ -64,16 +73,16 @@ module PdfServices
64
73
  end
65
74
 
66
75
  def handle_polling_done(_json_response, original_asset)
67
- original_asset.delete
76
+ original_asset.delete_asset
68
77
  end
69
78
 
70
79
  def handle_polling_failed(json_response, original_asset)
71
- original_asset.delete
80
+ original_asset.delete_asset
72
81
  raise PollingError, "Document extraction failed: #{json_response['error']}"
73
82
  end
74
83
 
75
84
  def handle_polling_unexpected_status(json_response, original_asset)
76
- original_asset.delete
85
+ original_asset.delete_asset
77
86
  raise PollingError, "Unexpected status: #{json_response['status']}"
78
87
  end
79
88
  end
@@ -5,7 +5,7 @@ module PdfServices
5
5
  class Internal < Operation
6
6
  INTERNAL_OPTIONS = %i[output_format json_data_for_merge fragments notifiers].freeze
7
7
 
8
- def execute(template_path, options = {})
8
+ def execute(template_path, options = {}, &block)
9
9
  validate_options(options)
10
10
  asset = upload_asset(template_path)
11
11
 
@@ -13,7 +13,7 @@ module PdfServices
13
13
  body: request_body(asset.id, options),
14
14
  headers: request_headers)
15
15
 
16
- handle_response(response, asset.id)
16
+ handle_response(response, asset, &block)
17
17
  end
18
18
 
19
19
  private
@@ -64,7 +64,7 @@ module PdfServices
64
64
  raise ArgumentError, "Invalid options: #{invalid_keys}" unless invalid_keys.empty?
65
65
  end
66
66
 
67
- def handle_polling_done(json_response, original_asset)
67
+ def handle_polling_done(json_response, original_asset, &block)
68
68
  asset_id = json_response['asset']['assetID']
69
69
  file = Asset.new(@api).download(asset_id).body
70
70
  super
@@ -9,15 +9,6 @@ module PdfServices
9
9
  { 'Content-Type' => 'application/json' }
10
10
  end
11
11
 
12
- def handle_response(response, asset_id)
13
- unless response.status == 201
14
- raise "Unexpected response status from document merge endpoint: #{response.status}, asset_id: #{asset_id}"
15
- end
16
-
17
- document_url = response.headers['location']
18
- poll_document_result document_url, asset_id
19
- end
20
-
21
12
  def internal_class
22
13
  Internal
23
14
  end
@@ -8,7 +8,7 @@ module PdfServices
8
8
  TABLE_OUTPUT_FORMATS = %w[csv xlsx].freeze
9
9
  RENDITIONS_EXTRACTS = %w[tables figures].freeze
10
10
 
11
- def execute(source_pdf_path = nil, options = {})
11
+ def execute(source_pdf_path = nil, options = {}, &block)
12
12
  validate_options(options)
13
13
  @download_zip = options.delete(:download_zip) || false
14
14
  asset = upload_asset(source_pdf_path)
@@ -16,7 +16,7 @@ module PdfServices
16
16
  response = @api.post(OPERATION_ENDPOINT,
17
17
  body: extract_pdf_request_body(asset.id, options),
18
18
  headers: extract_pdf_request_headers)
19
- handle_extract_pdf_response(response, asset)
19
+ handle_response(response, asset, &block)
20
20
  end
21
21
 
22
22
  private
@@ -36,13 +36,6 @@ module PdfServices
36
36
  { 'Content-Type' => 'application/json' }
37
37
  end
38
38
 
39
- def handle_extract_pdf_response(response, asset)
40
- raise OperationError, "Extract PDF operation failed: #{response.body}" unless response.status == 201
41
-
42
- polling_url = response.headers['location']
43
- poll_document_result polling_url, asset
44
- end
45
-
46
39
  def handle_polling_done(json_response, original_asset)
47
40
  file_key = @download_zip ? 'resource' : 'content'
48
41
  asset_id = json_response[file_key]['assetID']
@@ -6,15 +6,18 @@ module PdfServices
6
6
  INTERNAL_OPTIONS = %i[input_url json include_header_footer page_layout notifiers].freeze
7
7
  PAGE_LAYOUT_OPTIONS = %i[page_width page_height].freeze
8
8
 
9
- def execute(html_file_path, options = {})
10
- validate_options(options)
11
- asset = upload_asset(html_file_path)
9
+ def execute(html_file_path, options = {}, &block)
10
+ validate_options(options, html_file_path)
11
+
12
+ asset = upload_asset(html_file_path) unless options[:input_url]
13
+
14
+ asset_id = asset.id if asset
12
15
 
13
16
  response = @api.post(OPERATION_ENDPOINT,
14
- body: request_body(asset.id, options),
17
+ body: request_body(asset_id, options),
15
18
  headers: request_headers)
16
19
 
17
- handle_response(response, asset.id)
20
+ handle_response(response, asset, &block)
18
21
  end
19
22
 
20
23
  private
@@ -24,13 +27,13 @@ module PdfServices
24
27
  Asset.new(@api).download(asset_id).body
25
28
  end
26
29
 
27
- def request_body(asset_id, options)
28
- body = {
29
- assetID: asset_id,
30
- inputUrl: options.fetch(:input_url, ''),
31
- pageLayout: camelize_keys(options.fetch(:page_layout, {})),
32
- json: transform_json(options.fetch(:json, ''))
33
- }
30
+ def request_body(asset_id, options) # rubocop:disable Metrics/AbcSize
31
+ body = {}
32
+ body[:includeHeaderFooter] = options[:include_header_footer] if options[:include_header_footer]
33
+ body[:pageLayout] = options[:page_layout] if options[:page_layout]
34
+ body[:json] = transform_json(options[:json]) if options[:json]
35
+ body[:assetID] = asset_id if asset_id
36
+ body[:inputUrl] = options[:input_url] if options[:input_url]
34
37
  body[:notifiers] = options[:notifiers] if options[:notifiers]
35
38
  body
36
39
  end
@@ -39,22 +42,19 @@ module PdfServices
39
42
  json.is_a?(String) ? json : json.to_json
40
43
  end
41
44
 
42
- def validate_options(options)
45
+ def validate_options(options, source = nil)
43
46
  raise ArgumentError, 'Invalid options' unless options.is_a?(Hash)
44
47
 
45
48
  options.each_key do |key|
46
49
  raise ArgumentError, "Invalid option: #{key}" unless INTERNAL_OPTIONS.include?(key)
47
50
  end
48
51
 
49
- validate_required_keys(options)
52
+ validate_source(source, options)
50
53
  validate_page_layout_options(options[:page_layout]) if options[:page_layout]
51
54
  end
52
55
 
53
- def validate_required_keys(options)
54
- required_keys = INTERNAL_OPTIONS - %i[page_layout notifiers]
55
- required_keys.each do |key|
56
- raise ArgumentError, "Missing required option: #{key}" unless options.key?(key)
57
- end
56
+ def validate_source(source, options)
57
+ raise OperationError, "Cannot specify both 'input_url' and a HTML file" if options[:input_url] && source
58
58
  end
59
59
 
60
60
  def validate_page_layout_options(options)
@@ -9,15 +9,6 @@ module PdfServices
9
9
  { 'Content-Type' => 'application/json' }
10
10
  end
11
11
 
12
- def handle_response(response, asset_id)
13
- unless response.status == 201
14
- raise "Unexpected response status from document merge endpoint: #{response.status}, asset_id: #{asset_id}"
15
- end
16
-
17
- document_url = response.headers['location']
18
- poll_document_result document_url, asset_id
19
- end
20
-
21
12
  def internal_class
22
13
  Internal
23
14
  end
@@ -1,9 +1,9 @@
1
1
  module PdfServices
2
2
  module InternalExternalOperation
3
3
  class Operation < Base::Operation
4
- def execute(source_file_path, options = {})
4
+ def execute(source_file_path, options = {}, &block)
5
5
  operation_class = switch_on_type(options)
6
- operation_class.new(@api).execute(source_file_path, options)
6
+ operation_class.new(@api).execute(source_file_path, options, &block)
7
7
  end
8
8
 
9
9
  private
@@ -5,21 +5,21 @@ module PdfServices
5
5
  class Internal < Operation
6
6
  INTERNAL_OPTIONS = %i[ocr_lang ocr_type notifiers].freeze
7
7
 
8
- def execute(html_file_path, options = {})
8
+ def execute(source_pdf, options = {}, &block)
9
9
  validate_options(options)
10
- asset = upload_asset(html_file_path)
10
+ asset = upload_asset(source_pdf)
11
11
 
12
12
  response = @api.post(OPERATION_ENDPOINT,
13
- body: request_body(asset.id, options),
14
- headers: request_headers)
13
+ body: request_body(asset.id, options), headers: { 'Content-Type' => 'application/json' })
15
14
 
16
- handle_response(response, asset.id)
15
+ handle_response(response, asset, &block)
17
16
  end
18
17
 
19
18
  private
20
19
 
21
- def handle_polling_done(json_response, _original_asset_id)
20
+ def handle_polling_done(json_response, _original_asset)
22
21
  asset_id = json_response['asset']['assetID']
22
+ super
23
23
  Asset.new(@api).download(asset_id).body
24
24
  end
25
25
 
@@ -3,7 +3,7 @@
3
3
  module PdfServices
4
4
  module Ocr
5
5
  class Operation < InternalExternalOperation::Operation
6
- OCR_ENDPOINT = 'https://pdf-services-ue1.adobe.io/operation/ocr'
6
+ OPERATION_ENDPOINT = "#{BASE_ENDPOINT}ocr".freeze
7
7
  OCR_LANGS = %w[
8
8
  da-DK lt-LT sl-SI el-GR ru-RU en-US zh-HK hu-HU et-EE
9
9
  pt-BR uk-UA nb-NO pl-PL lv-LV fi-FI ja-JP es-ES bg-BG
@@ -16,6 +16,14 @@ module PdfServices
16
16
 
17
17
  private
18
18
 
19
+ def internal_class
20
+ Internal
21
+ end
22
+
23
+ def external_class
24
+ External
25
+ end
26
+
19
27
  def validate_ocr_lang_option(ocr_lang)
20
28
  raise ArgumentError, "Invalid ocr_lang option: #{ocr_lang}" unless OCR_LANGS.include?(ocr_lang)
21
29
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PdfServices
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.3'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: adobe_pdfservices_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jimmy Bosse
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2024-01-25 00:00:00.000000000 Z
12
+ date: 2024-01-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: faraday
@@ -91,13 +91,13 @@ files:
91
91
  - lib/pdfservices/operations/ocr/external.rb
92
92
  - lib/pdfservices/operations/ocr/internal.rb
93
93
  - lib/pdfservices/version.rb
94
- homepage: https://github.com/benterova/adobe_pdfservices_ruby/blob/main/README.md
94
+ homepage: https://github.com/benterova/adobe-pdfservices-ruby/blob/main/README.md
95
95
  licenses:
96
96
  - MIT
97
97
  metadata:
98
- homepage_uri: https://github.com/benterova/adobe_pdfservices_ruby/blob/main/README.md
99
- source_code_uri: https://github.com/benterova/adobe_pdfservices_ruby
100
- changelog_uri: https://github.com/benterova/adobe_pdfservices_ruby/blob/main/CHANGELOG.md
98
+ homepage_uri: https://github.com/benterova/adobe-pdfservices-ruby/blob/main/README.md
99
+ source_code_uri: https://github.com/benterova/adobe-pdfservices-ruby
100
+ changelog_uri: https://github.com/benterova/adobe-pdfservices-ruby/blob/main/CHANGELOG.md
101
101
  post_install_message:
102
102
  rdoc_options: []
103
103
  require_paths: