adobe_pdfservices_ruby 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7874ed45d46a4c10b5a44574d45b49946707cebb0c05447416bcedfbc777e090
4
- data.tar.gz: 2a920b6f69c0abd16245d1f08486387993a5232c2e6bd5f005112dadec28b88d
3
+ metadata.gz: 6147a7ab0e87ee51554ec2136b1f21ff3e2d4630760a6ea07432744d8a145699
4
+ data.tar.gz: 7f0f7c7f48470007c8078531598ca63dc3969628ea44a12b612d221c07703833
5
5
  SHA512:
6
- metadata.gz: 384c4c8555dd64925398fdd6d0c5e5b46d8333893c84684a100ab84c54793f58f754dd515c9e8d426f9a46450b178eea2f4d440c93dde3f5699de9f9d2ac0092
7
- data.tar.gz: a7e0e3c4cf47c017abdb1fa6bc0d61daa83e438f2ce8eb68eeecc8bf87f7de0dca670983e24f97853158019917f4299c925953c64080dccb9a6d477b310224a5
6
+ metadata.gz: 32a89f111dd8b13884f7dd854c59e6e74e8dde5d77adc6035cb2af98527ddccc9cd97d9bd854a7972e33217628fa9b37bbc64bac9b8f30174f660d4bd06f38af
7
+ data.tar.gz: '028ecf12bb31bf37a3f48137a9d5ad55d18fb23b8026f121773cc36788032e9587e7fc3b75ccbd696cf2895693dd7e8c5eb32e44a63c6e3fcef5ac2562ba7a48'
data/CHANGELOG.md CHANGED
@@ -22,3 +22,16 @@
22
22
  - Allow client to be initialized with no secret_key if there's an access_token provided (useful for development)
23
23
  - Asset delete request is properly formed
24
24
  - Use MimeMagic to determine content-type of files
25
+
26
+ ## [0.1.3] - 2024-01-27
27
+
28
+ - Fix incorrect URLs in README
29
+ - Fix incorrect URLs in gemspec
30
+ - Add support for document generation via spike
31
+ - Fix passed blocks not being called for most operations
32
+ - Add support for html_to_pdf operation via spike
33
+ - Add support for OCR operation via spike
34
+
35
+ ## [0.1.4] - 2024-01-29
36
+
37
+ - Remove require for multipart parser
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # PDF Services for Ruby
2
2
 
3
- ### Originally forked from [Adobe Document Services PDF Tools SDK for Ruby](https://github.com/arpc/adobe_pdfservices_ruby)
3
+ ### Originally forked from [Adobe Document Services PDF Tools SDK for Ruby](https://github.com/arpc/pdfservices-ruby-sdk)
4
4
 
5
5
  This gem provides a Ruby wrapper for the [Adobe PDF Services API](https://developer.adobe.com/document-services/docs/overview/). It allows you to perform various PDF operations, such as extracting content from PDFs, OCR, HTML to PDF, and document generation.
6
6
 
@@ -58,13 +58,13 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
58
58
  Work is in progress on getting the gem to support all of the operations available in the [Adobe PDF Services API](https://developer.adobe.com/document-services/docs/overview/). Below is a list of the operations and their current support status:
59
59
 
60
60
  - ✅ Extract PDF
61
- - OCR
62
- - EXPERIMENTAL: Internal
61
+ - OCR
62
+ - Internal
63
63
  - ❗ EXPERIMENTAL: External
64
- - HTML to PDF
65
- - EXPERIMENTAL: Internal
64
+ - HTML to PDF
65
+ - Internal
66
66
  - ❗ EXPERIMENTAL: External
67
- - Document Generation:
67
+ - Document Generation:
68
68
  - ✅ Internal
69
69
  - ❗ EXPERIMENTAL: External
70
70
 
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'json'
4
- require 'multipart_parser/reader'
5
4
  require 'yaml'
6
5
  require 'faraday'
7
6
  require 'mimemagic'
@@ -25,9 +24,13 @@ require_relative 'pdfservices/operations/document_generation/external'
25
24
 
26
25
  # OCR
27
26
  require_relative 'pdfservices/operations/ocr'
27
+ require_relative 'pdfservices/operations/ocr/internal'
28
+ require_relative 'pdfservices/operations/ocr/external'
28
29
 
29
30
  # HTML to PDF
30
31
  require_relative 'pdfservices/operations/html_to_pdf'
32
+ require_relative 'pdfservices/operations/html_to_pdf/internal'
33
+ require_relative 'pdfservices/operations/html_to_pdf/external'
31
34
 
32
35
  # Extract PDF
33
36
  require_relative 'pdfservices/operations/extract_pdf'
@@ -5,14 +5,17 @@ module PdfServices
5
5
  attr_reader :id
6
6
 
7
7
  def initialize(api, id = nil)
8
+ # MimeMagic can't detect docx files, and will return `application/zip` so we need to add it manually
9
+ MimeMagic.add('application/vnd.openxmlformats-officedocument.wordprocessingml.document',
10
+ magic: [[0..2000, 'word/']])
8
11
  raise ArgumentError, 'Api is nil' unless api
9
12
 
10
13
  @api = api
11
14
  @id = id
12
15
  end
13
16
 
14
- def upload(file, media_type: 'application/pdf')
15
- url = presigned_url(media_type:)
17
+ def upload(file)
18
+ url = presigned_url(file:)
16
19
  upload_uri = url['uploadUri']
17
20
  asset_id = url['assetID']
18
21
 
@@ -39,7 +42,7 @@ module PdfServices
39
42
  @api.get(download_uri)
40
43
  end
41
44
 
42
- def delete
45
+ def delete_asset
43
46
  raise AssetError, 'Asset ID is nil' unless @id
44
47
 
45
48
  @api.delete("#{ASSETS_ENDPOINT}/#{@id}")
@@ -54,9 +57,10 @@ module PdfServices
54
57
  }
55
58
  end
56
59
 
57
- def presigned_url(operation = :upload, media_type: 'application/pdf')
60
+ def presigned_url(operation = :upload, file: nil)
58
61
  case operation
59
62
  when :upload
63
+ media_type = file ? MimeMagic.by_magic(file).type : 'application/pdf'
60
64
  response = @api.post(ASSETS_ENDPOINT, body: { mediaType: media_type },
61
65
  headers: { 'Content-Type' => 'application/json' })
62
66
  when :download
@@ -26,6 +26,15 @@ module PdfServices
26
26
  handle_polling_result(url, json_response, original_asset, &block)
27
27
  end
28
28
 
29
+ def handle_response(response, asset, &block)
30
+ unless response.status == 201
31
+ raise "Unexpected response status from operation endpoint: #{response.status}, #{response.body}"
32
+ end
33
+
34
+ document_url = response.headers['location']
35
+ poll_document_result document_url, asset, &block
36
+ end
37
+
29
38
  private
30
39
 
31
40
  def handle_polling_result(url, json_response, original_asset, &block)
@@ -64,16 +73,16 @@ module PdfServices
64
73
  end
65
74
 
66
75
  def handle_polling_done(_json_response, original_asset)
67
- original_asset.delete
76
+ original_asset.delete_asset
68
77
  end
69
78
 
70
79
  def handle_polling_failed(json_response, original_asset)
71
- original_asset.delete
80
+ original_asset.delete_asset
72
81
  raise PollingError, "Document extraction failed: #{json_response['error']}"
73
82
  end
74
83
 
75
84
  def handle_polling_unexpected_status(json_response, original_asset)
76
- original_asset.delete
85
+ original_asset.delete_asset
77
86
  raise PollingError, "Unexpected status: #{json_response['status']}"
78
87
  end
79
88
  end
@@ -5,7 +5,7 @@ module PdfServices
5
5
  class Internal < Operation
6
6
  INTERNAL_OPTIONS = %i[output_format json_data_for_merge fragments notifiers].freeze
7
7
 
8
- def execute(template_path, options = {})
8
+ def execute(template_path, options = {}, &block)
9
9
  validate_options(options)
10
10
  asset = upload_asset(template_path)
11
11
 
@@ -13,7 +13,7 @@ module PdfServices
13
13
  body: request_body(asset.id, options),
14
14
  headers: request_headers)
15
15
 
16
- handle_response(response, asset.id)
16
+ handle_response(response, asset, &block)
17
17
  end
18
18
 
19
19
  private
@@ -64,7 +64,7 @@ module PdfServices
64
64
  raise ArgumentError, "Invalid options: #{invalid_keys}" unless invalid_keys.empty?
65
65
  end
66
66
 
67
- def handle_polling_done(json_response, original_asset)
67
+ def handle_polling_done(json_response, original_asset, &block)
68
68
  asset_id = json_response['asset']['assetID']
69
69
  file = Asset.new(@api).download(asset_id).body
70
70
  super
@@ -9,15 +9,6 @@ module PdfServices
9
9
  { 'Content-Type' => 'application/json' }
10
10
  end
11
11
 
12
- def handle_response(response, asset_id)
13
- unless response.status == 201
14
- raise "Unexpected response status from document merge endpoint: #{response.status}, asset_id: #{asset_id}"
15
- end
16
-
17
- document_url = response.headers['location']
18
- poll_document_result document_url, asset_id
19
- end
20
-
21
12
  def internal_class
22
13
  Internal
23
14
  end
@@ -8,7 +8,7 @@ module PdfServices
8
8
  TABLE_OUTPUT_FORMATS = %w[csv xlsx].freeze
9
9
  RENDITIONS_EXTRACTS = %w[tables figures].freeze
10
10
 
11
- def execute(source_pdf_path = nil, options = {})
11
+ def execute(source_pdf_path = nil, options = {}, &block)
12
12
  validate_options(options)
13
13
  @download_zip = options.delete(:download_zip) || false
14
14
  asset = upload_asset(source_pdf_path)
@@ -16,7 +16,7 @@ module PdfServices
16
16
  response = @api.post(OPERATION_ENDPOINT,
17
17
  body: extract_pdf_request_body(asset.id, options),
18
18
  headers: extract_pdf_request_headers)
19
- handle_extract_pdf_response(response, asset)
19
+ handle_response(response, asset, &block)
20
20
  end
21
21
 
22
22
  private
@@ -36,13 +36,6 @@ module PdfServices
36
36
  { 'Content-Type' => 'application/json' }
37
37
  end
38
38
 
39
- def handle_extract_pdf_response(response, asset)
40
- raise OperationError, "Extract PDF operation failed: #{response.body}" unless response.status == 201
41
-
42
- polling_url = response.headers['location']
43
- poll_document_result polling_url, asset
44
- end
45
-
46
39
  def handle_polling_done(json_response, original_asset)
47
40
  file_key = @download_zip ? 'resource' : 'content'
48
41
  asset_id = json_response[file_key]['assetID']
@@ -6,15 +6,18 @@ module PdfServices
6
6
  INTERNAL_OPTIONS = %i[input_url json include_header_footer page_layout notifiers].freeze
7
7
  PAGE_LAYOUT_OPTIONS = %i[page_width page_height].freeze
8
8
 
9
- def execute(html_file_path, options = {})
10
- validate_options(options)
11
- asset = upload_asset(html_file_path)
9
+ def execute(html_file_path, options = {}, &block)
10
+ validate_options(options, html_file_path)
11
+
12
+ asset = upload_asset(html_file_path) unless options[:input_url]
13
+
14
+ asset_id = asset.id if asset
12
15
 
13
16
  response = @api.post(OPERATION_ENDPOINT,
14
- body: request_body(asset.id, options),
17
+ body: request_body(asset_id, options),
15
18
  headers: request_headers)
16
19
 
17
- handle_response(response, asset.id)
20
+ handle_response(response, asset, &block)
18
21
  end
19
22
 
20
23
  private
@@ -24,13 +27,13 @@ module PdfServices
24
27
  Asset.new(@api).download(asset_id).body
25
28
  end
26
29
 
27
- def request_body(asset_id, options)
28
- body = {
29
- assetID: asset_id,
30
- inputUrl: options.fetch(:input_url, ''),
31
- pageLayout: camelize_keys(options.fetch(:page_layout, {})),
32
- json: transform_json(options.fetch(:json, ''))
33
- }
30
+ def request_body(asset_id, options) # rubocop:disable Metrics/AbcSize
31
+ body = {}
32
+ body[:includeHeaderFooter] = options[:include_header_footer] if options[:include_header_footer]
33
+ body[:pageLayout] = options[:page_layout] if options[:page_layout]
34
+ body[:json] = transform_json(options[:json]) if options[:json]
35
+ body[:assetID] = asset_id if asset_id
36
+ body[:inputUrl] = options[:input_url] if options[:input_url]
34
37
  body[:notifiers] = options[:notifiers] if options[:notifiers]
35
38
  body
36
39
  end
@@ -39,22 +42,19 @@ module PdfServices
39
42
  json.is_a?(String) ? json : json.to_json
40
43
  end
41
44
 
42
- def validate_options(options)
45
+ def validate_options(options, source = nil)
43
46
  raise ArgumentError, 'Invalid options' unless options.is_a?(Hash)
44
47
 
45
48
  options.each_key do |key|
46
49
  raise ArgumentError, "Invalid option: #{key}" unless INTERNAL_OPTIONS.include?(key)
47
50
  end
48
51
 
49
- validate_required_keys(options)
52
+ validate_source(source, options)
50
53
  validate_page_layout_options(options[:page_layout]) if options[:page_layout]
51
54
  end
52
55
 
53
- def validate_required_keys(options)
54
- required_keys = INTERNAL_OPTIONS - %i[page_layout notifiers]
55
- required_keys.each do |key|
56
- raise ArgumentError, "Missing required option: #{key}" unless options.key?(key)
57
- end
56
+ def validate_source(source, options)
57
+ raise OperationError, "Cannot specify both 'input_url' and a HTML file" if options[:input_url] && source
58
58
  end
59
59
 
60
60
  def validate_page_layout_options(options)
@@ -9,15 +9,6 @@ module PdfServices
9
9
  { 'Content-Type' => 'application/json' }
10
10
  end
11
11
 
12
- def handle_response(response, asset_id)
13
- unless response.status == 201
14
- raise "Unexpected response status from document merge endpoint: #{response.status}, asset_id: #{asset_id}"
15
- end
16
-
17
- document_url = response.headers['location']
18
- poll_document_result document_url, asset_id
19
- end
20
-
21
12
  def internal_class
22
13
  Internal
23
14
  end
@@ -1,9 +1,9 @@
1
1
  module PdfServices
2
2
  module InternalExternalOperation
3
3
  class Operation < Base::Operation
4
- def execute(source_file_path, options = {})
4
+ def execute(source_file_path, options = {}, &block)
5
5
  operation_class = switch_on_type(options)
6
- operation_class.new(@api).execute(source_file_path, options)
6
+ operation_class.new(@api).execute(source_file_path, options, &block)
7
7
  end
8
8
 
9
9
  private
@@ -5,21 +5,21 @@ module PdfServices
5
5
  class Internal < Operation
6
6
  INTERNAL_OPTIONS = %i[ocr_lang ocr_type notifiers].freeze
7
7
 
8
- def execute(html_file_path, options = {})
8
+ def execute(source_pdf, options = {}, &block)
9
9
  validate_options(options)
10
- asset = upload_asset(html_file_path)
10
+ asset = upload_asset(source_pdf)
11
11
 
12
12
  response = @api.post(OPERATION_ENDPOINT,
13
- body: request_body(asset.id, options),
14
- headers: request_headers)
13
+ body: request_body(asset.id, options), headers: { 'Content-Type' => 'application/json' })
15
14
 
16
- handle_response(response, asset.id)
15
+ handle_response(response, asset, &block)
17
16
  end
18
17
 
19
18
  private
20
19
 
21
- def handle_polling_done(json_response, _original_asset_id)
20
+ def handle_polling_done(json_response, _original_asset)
22
21
  asset_id = json_response['asset']['assetID']
22
+ super
23
23
  Asset.new(@api).download(asset_id).body
24
24
  end
25
25
 
@@ -3,7 +3,7 @@
3
3
  module PdfServices
4
4
  module Ocr
5
5
  class Operation < InternalExternalOperation::Operation
6
- OCR_ENDPOINT = 'https://pdf-services-ue1.adobe.io/operation/ocr'
6
+ OPERATION_ENDPOINT = "#{BASE_ENDPOINT}ocr".freeze
7
7
  OCR_LANGS = %w[
8
8
  da-DK lt-LT sl-SI el-GR ru-RU en-US zh-HK hu-HU et-EE
9
9
  pt-BR uk-UA nb-NO pl-PL lv-LV fi-FI ja-JP es-ES bg-BG
@@ -16,6 +16,14 @@ module PdfServices
16
16
 
17
17
  private
18
18
 
19
+ def internal_class
20
+ Internal
21
+ end
22
+
23
+ def external_class
24
+ External
25
+ end
26
+
19
27
  def validate_ocr_lang_option(ocr_lang)
20
28
  raise ArgumentError, "Invalid ocr_lang option: #{ocr_lang}" unless OCR_LANGS.include?(ocr_lang)
21
29
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PdfServices
4
- VERSION = '0.1.2'
4
+ VERSION = '0.1.4'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: adobe_pdfservices_ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jimmy Bosse
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2024-01-25 00:00:00.000000000 Z
12
+ date: 2024-01-29 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: faraday