pdfcrowd 5.8.0 → 5.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/pdfcrowd.rb +1070 -243
  3. metadata +2 -2
data/lib/pdfcrowd.rb CHANGED
@@ -530,7 +530,7 @@ end
530
530
  module Pdfcrowd
531
531
  HOST = ENV["PDFCROWD_HOST"] || 'api.pdfcrowd.com'
532
532
  MULTIPART_BOUNDARY = '----------ThIs_Is_tHe_bOUnDary_$'
533
- CLIENT_VERSION = '5.8.0'
533
+ CLIENT_VERSION = '5.10.0'
534
534
 
535
535
  class ConnectionHelper
536
536
  def initialize(user_name, api_key)
@@ -541,7 +541,7 @@ module Pdfcrowd
541
541
 
542
542
  setProxy(nil, nil, nil, nil)
543
543
  setUseHttp(false)
544
- setUserAgent('pdfcrowd_ruby_client/5.8.0 (https://pdfcrowd.com)')
544
+ setUserAgent('pdfcrowd_ruby_client/5.10.0 (https://pdfcrowd.com)')
545
545
 
546
546
  @retry_count = 1
547
547
  @converter_version = '20.10'
@@ -597,6 +597,10 @@ module Pdfcrowd
597
597
  @page_count
598
598
  end
599
599
 
600
+ def getTotalPageCount()
601
+ @total_page_count
602
+ end
603
+
600
604
  def getOutputSize()
601
605
  @output_size
602
606
  end
@@ -613,6 +617,7 @@ module Pdfcrowd
613
617
  @consumed_credits = 0
614
618
  @job_id = ''
615
619
  @page_count = 0
620
+ @total_page_count = 0
616
621
  @output_size = 0
617
622
  @retry = 0
618
623
  end
@@ -699,6 +704,7 @@ module Pdfcrowd
699
704
  @consumed_credits = (response["X-Pdfcrowd-Consumed-Credits"] || 0).to_i
700
705
  @job_id = response["X-Pdfcrowd-Job-Id"] || ''
701
706
  @page_count = (response["X-Pdfcrowd-Pages"] || 0).to_i
707
+ @total_page_count = (response["X-Pdfcrowd-Total-Pages"] || 0).to_i
702
708
  @output_size = (response["X-Pdfcrowd-Output-Size"] || 0).to_i
703
709
 
704
710
  raise Error.new('test 502', '502') \
@@ -1714,11 +1720,11 @@ module Pdfcrowd
1714
1720
 
1715
1721
  # The input HTML is automatically enhanced to improve the readability.
1716
1722
  #
1717
- # * +enhancements+ - Allowed values are none, readability-v1, readability-v2, readability-v3.
1723
+ # * +enhancements+ - Allowed values are none, readability-v1, readability-v2, readability-v3, readability-v4.
1718
1724
  # * *Returns* - The converter object.
1719
1725
  def setReadabilityEnhancements(enhancements)
1720
- unless /(?i)^(none|readability-v1|readability-v2|readability-v3)$/.match(enhancements)
1721
- raise Error.new(Pdfcrowd.create_invalid_value_message(enhancements, "setReadabilityEnhancements", "html-to-pdf", "Allowed values are none, readability-v1, readability-v2, readability-v3.", "set_readability_enhancements"), 470);
1726
+ unless /(?i)^(none|readability-v1|readability-v2|readability-v3|readability-v4)$/.match(enhancements)
1727
+ raise Error.new(Pdfcrowd.create_invalid_value_message(enhancements, "setReadabilityEnhancements", "html-to-pdf", "Allowed values are none, readability-v1, readability-v2, readability-v3, readability-v4.", "set_readability_enhancements"), 470);
1722
1728
  end
1723
1729
 
1724
1730
  @fields['readability_enhancements'] = enhancements
@@ -1777,11 +1783,11 @@ module Pdfcrowd
1777
1783
 
1778
1784
  # Specifies the scaling mode used for fitting the HTML contents to the print area.
1779
1785
  #
1780
- # * +mode+ - The smart scaling mode. Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, mode1.
1786
+ # * +mode+ - The smart scaling mode. Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, single-page-fit-ex, mode1.
1781
1787
  # * *Returns* - The converter object.
1782
1788
  def setSmartScalingMode(mode)
1783
- unless /(?i)^(default|disabled|viewport-fit|content-fit|single-page-fit|mode1)$/.match(mode)
1784
- raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setSmartScalingMode", "html-to-pdf", "Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, mode1.", "set_smart_scaling_mode"), 470);
1789
+ unless /(?i)^(default|disabled|viewport-fit|content-fit|single-page-fit|single-page-fit-ex|mode1)$/.match(mode)
1790
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setSmartScalingMode", "html-to-pdf", "Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, single-page-fit-ex, mode1.", "set_smart_scaling_mode"), 470);
1785
1791
  end
1786
1792
 
1787
1793
  @fields['smart_scaling_mode'] = mode
@@ -2197,12 +2203,18 @@ module Pdfcrowd
2197
2203
  return @helper.getJobId()
2198
2204
  end
2199
2205
 
2200
- # Get the total number of pages in the output document.
2206
+ # Get the number of pages in the output document.
2201
2207
  # * *Returns* - The page count.
2202
2208
  def getPageCount()
2203
2209
  return @helper.getPageCount()
2204
2210
  end
2205
2211
 
2212
+ # Get the total number of pages in the original output document, including the pages excluded by setPrintPageRange().
2213
+ # * *Returns* - The total page count.
2214
+ def getTotalPageCount()
2215
+ return @helper.getTotalPageCount()
2216
+ end
2217
+
2206
2218
  # Get the size of the output in bytes.
2207
2219
  # * *Returns* - The count of bytes.
2208
2220
  def getOutputSize()
@@ -2880,11 +2892,11 @@ module Pdfcrowd
2880
2892
 
2881
2893
  # The input HTML is automatically enhanced to improve the readability.
2882
2894
  #
2883
- # * +enhancements+ - Allowed values are none, readability-v1, readability-v2, readability-v3.
2895
+ # * +enhancements+ - Allowed values are none, readability-v1, readability-v2, readability-v3, readability-v4.
2884
2896
  # * *Returns* - The converter object.
2885
2897
  def setReadabilityEnhancements(enhancements)
2886
- unless /(?i)^(none|readability-v1|readability-v2|readability-v3)$/.match(enhancements)
2887
- raise Error.new(Pdfcrowd.create_invalid_value_message(enhancements, "setReadabilityEnhancements", "html-to-image", "Allowed values are none, readability-v1, readability-v2, readability-v3.", "set_readability_enhancements"), 470);
2898
+ unless /(?i)^(none|readability-v1|readability-v2|readability-v3|readability-v4)$/.match(enhancements)
2899
+ raise Error.new(Pdfcrowd.create_invalid_value_message(enhancements, "setReadabilityEnhancements", "html-to-image", "Allowed values are none, readability-v1, readability-v2, readability-v3, readability-v4.", "set_readability_enhancements"), 470);
2888
2900
  end
2889
2901
 
2890
2902
  @fields['readability_enhancements'] = enhancements
@@ -4006,7 +4018,7 @@ module Pdfcrowd
4006
4018
  return @helper.getJobId()
4007
4019
  end
4008
4020
 
4009
- # Get the total number of pages in the output document.
4021
+ # Get the number of pages in the output document.
4010
4022
  # * *Returns* - The page count.
4011
4023
  def getPageCount()
4012
4024
  return @helper.getPageCount()
@@ -4291,231 +4303,1012 @@ module Pdfcrowd
4291
4303
  self
4292
4304
  end
4293
4305
 
4294
- # Turn on the debug logging. Details about the conversion are stored in the debug log. The URL of the log can be obtained from the getDebugLogUrl method or available in conversion statistics.
4306
+ # Apply a watermark to each page of the output PDF file. A watermark can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the watermark.
4295
4307
  #
4296
- # * +value+ - Set to true to enable the debug logging.
4308
+ # * +watermark+ - The file path to a local file. The file must exist and not be empty.
4297
4309
  # * *Returns* - The converter object.
4298
- def setDebugLog(value)
4299
- @fields['debug_log'] = value
4310
+ def setPageWatermark(watermark)
4311
+ if (!(File.file?(watermark) && !File.zero?(watermark)))
4312
+ raise Error.new(Pdfcrowd.create_invalid_value_message(watermark, "setPageWatermark", "image-to-pdf", "The file must exist and not be empty.", "set_page_watermark"), 470);
4313
+ end
4314
+
4315
+ @files['page_watermark'] = watermark
4300
4316
  self
4301
4317
  end
4302
4318
 
4303
- # Get the URL of the debug log for the last conversion.
4304
- # * *Returns* - The link to the debug log.
4305
- def getDebugLogUrl()
4306
- return @helper.getDebugLogUrl()
4307
- end
4308
-
4309
- # Get the number of conversion credits available in your account.
4310
- # This method can only be called after a call to one of the convertXtoY methods.
4311
- # The returned value can differ from the actual count if you run parallel conversions.
4312
- # The special value 999999 is returned if the information is not available.
4313
- # * *Returns* - The number of credits.
4314
- def getRemainingCreditCount()
4315
- return @helper.getRemainingCreditCount()
4316
- end
4317
-
4318
- # Get the number of credits consumed by the last conversion.
4319
- # * *Returns* - The number of credits.
4320
- def getConsumedCreditCount()
4321
- return @helper.getConsumedCreditCount()
4322
- end
4323
-
4324
- # Get the job id.
4325
- # * *Returns* - The unique job identifier.
4326
- def getJobId()
4327
- return @helper.getJobId()
4319
+ # Load a file from the specified URL and apply the file as a watermark to each page of the output PDF. A watermark can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the watermark.
4320
+ #
4321
+ # * +url+ - The supported protocols are http:// and https://.
4322
+ # * *Returns* - The converter object.
4323
+ def setPageWatermarkUrl(url)
4324
+ unless /(?i)^https?:\/\/.*$/.match(url)
4325
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setPageWatermarkUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_page_watermark_url"), 470);
4326
+ end
4327
+
4328
+ @fields['page_watermark_url'] = url
4329
+ self
4328
4330
  end
4329
4331
 
4330
- # Get the size of the output in bytes.
4331
- # * *Returns* - The count of bytes.
4332
- def getOutputSize()
4333
- return @helper.getOutputSize()
4332
+ # Apply each page of a watermark to the corresponding page of the output PDF. A watermark can be either a PDF or an image.
4333
+ #
4334
+ # * +watermark+ - The file path to a local file. The file must exist and not be empty.
4335
+ # * *Returns* - The converter object.
4336
+ def setMultipageWatermark(watermark)
4337
+ if (!(File.file?(watermark) && !File.zero?(watermark)))
4338
+ raise Error.new(Pdfcrowd.create_invalid_value_message(watermark, "setMultipageWatermark", "image-to-pdf", "The file must exist and not be empty.", "set_multipage_watermark"), 470);
4339
+ end
4340
+
4341
+ @files['multipage_watermark'] = watermark
4342
+ self
4334
4343
  end
4335
4344
 
4336
- # Get the version details.
4337
- # * *Returns* - API version, converter version, and client version.
4338
- def getVersion()
4339
- return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion()
4345
+ # Load a file from the specified URL and apply each page of the file as a watermark to the corresponding page of the output PDF. A watermark can be either a PDF or an image.
4346
+ #
4347
+ # * +url+ - The supported protocols are http:// and https://.
4348
+ # * *Returns* - The converter object.
4349
+ def setMultipageWatermarkUrl(url)
4350
+ unless /(?i)^https?:\/\/.*$/.match(url)
4351
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setMultipageWatermarkUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_multipage_watermark_url"), 470);
4352
+ end
4353
+
4354
+ @fields['multipage_watermark_url'] = url
4355
+ self
4340
4356
  end
4341
4357
 
4342
- # Tag the conversion with a custom value. The tag is used in conversion statistics. A value longer than 32 characters is cut off.
4358
+ # Apply a background to each page of the output PDF file. A background can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the background.
4343
4359
  #
4344
- # * +tag+ - A string with the custom tag.
4360
+ # * +background+ - The file path to a local file. The file must exist and not be empty.
4345
4361
  # * *Returns* - The converter object.
4346
- def setTag(tag)
4347
- @fields['tag'] = tag
4362
+ def setPageBackground(background)
4363
+ if (!(File.file?(background) && !File.zero?(background)))
4364
+ raise Error.new(Pdfcrowd.create_invalid_value_message(background, "setPageBackground", "image-to-pdf", "The file must exist and not be empty.", "set_page_background"), 470);
4365
+ end
4366
+
4367
+ @files['page_background'] = background
4348
4368
  self
4349
4369
  end
4350
4370
 
4351
- # A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTP scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
4371
+ # Load a file from the specified URL and apply the file as a background to each page of the output PDF. A background can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the background.
4352
4372
  #
4353
- # * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
4373
+ # * +url+ - The supported protocols are http:// and https://.
4354
4374
  # * *Returns* - The converter object.
4355
- def setHttpProxy(proxy)
4356
- unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4357
- raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
4375
+ def setPageBackgroundUrl(url)
4376
+ unless /(?i)^https?:\/\/.*$/.match(url)
4377
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setPageBackgroundUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_page_background_url"), 470);
4358
4378
  end
4359
4379
 
4360
- @fields['http_proxy'] = proxy
4380
+ @fields['page_background_url'] = url
4361
4381
  self
4362
4382
  end
4363
4383
 
4364
- # A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTPS scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
4384
+ # Apply each page of a background to the corresponding page of the output PDF. A background can be either a PDF or an image.
4365
4385
  #
4366
- # * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
4386
+ # * +background+ - The file path to a local file. The file must exist and not be empty.
4367
4387
  # * *Returns* - The converter object.
4368
- def setHttpsProxy(proxy)
4369
- unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4370
- raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
4388
+ def setMultipageBackground(background)
4389
+ if (!(File.file?(background) && !File.zero?(background)))
4390
+ raise Error.new(Pdfcrowd.create_invalid_value_message(background, "setMultipageBackground", "image-to-pdf", "The file must exist and not be empty.", "set_multipage_background"), 470);
4371
4391
  end
4372
4392
 
4373
- @fields['https_proxy'] = proxy
4393
+ @files['multipage_background'] = background
4374
4394
  self
4375
4395
  end
4376
4396
 
4377
- # Set the converter version. Different versions may produce different output. Choose which one provides the best output for your case.
4397
+ # Load a file from the specified URL and apply each page of the file as a background to the corresponding page of the output PDF. A background can be either a PDF or an image.
4378
4398
  #
4379
- # * +version+ - The version identifier. Allowed values are latest, 20.10, 18.10.
4399
+ # * +url+ - The supported protocols are http:// and https://.
4380
4400
  # * *Returns* - The converter object.
4381
- def setConverterVersion(version)
4382
- unless /(?i)^(latest|20.10|18.10)$/.match(version)
4383
- raise Error.new(Pdfcrowd.create_invalid_value_message(version, "setConverterVersion", "image-to-pdf", "Allowed values are latest, 20.10, 18.10.", "set_converter_version"), 470);
4401
+ def setMultipageBackgroundUrl(url)
4402
+ unless /(?i)^https?:\/\/.*$/.match(url)
4403
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setMultipageBackgroundUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_multipage_background_url"), 470);
4384
4404
  end
4385
4405
 
4386
- @helper.setConverterVersion(version)
4406
+ @fields['multipage_background_url'] = url
4387
4407
  self
4388
4408
  end
4389
4409
 
4390
- # Specifies if the client communicates over HTTP or HTTPS with Pdfcrowd API.
4391
- # Warning: Using HTTP is insecure as data sent over HTTP is not encrypted. Enable this option only if you know what you are doing.
4410
+ # Create linearized PDF. This is also known as Fast Web View.
4392
4411
  #
4393
- # * +value+ - Set to true to use HTTP.
4412
+ # * +value+ - Set to true to create linearized PDF.
4394
4413
  # * *Returns* - The converter object.
4395
- def setUseHttp(value)
4396
- @helper.setUseHttp(value)
4414
+ def setLinearize(value)
4415
+ @fields['linearize'] = value
4397
4416
  self
4398
4417
  end
4399
4418
 
4400
- # Set a custom user agent HTTP header. It can be useful if you are behind a proxy or a firewall.
4419
+ # Encrypt the PDF. This prevents search engines from indexing the contents.
4401
4420
  #
4402
- # * +agent+ - The user agent string.
4421
+ # * +value+ - Set to true to enable PDF encryption.
4403
4422
  # * *Returns* - The converter object.
4404
- def setUserAgent(agent)
4405
- @helper.setUserAgent(agent)
4423
+ def setEncrypt(value)
4424
+ @fields['encrypt'] = value
4406
4425
  self
4407
4426
  end
4408
4427
 
4409
- # Specifies an HTTP proxy that the API client library will use to connect to the internet.
4428
+ # Protect the PDF with a user password. When a PDF has a user password, it must be supplied in order to view the document and to perform operations allowed by the access permissions.
4410
4429
  #
4411
- # * +host+ - The proxy hostname.
4412
- # * +port+ - The proxy port.
4413
- # * +user_name+ - The username.
4414
- # * +password+ - The password.
4430
+ # * +password+ - The user password.
4415
4431
  # * *Returns* - The converter object.
4416
- def setProxy(host, port, user_name, password)
4417
- @helper.setProxy(host, port, user_name, password)
4432
+ def setUserPassword(password)
4433
+ @fields['user_password'] = password
4418
4434
  self
4419
4435
  end
4420
4436
 
4421
- # Specifies the number of automatic retries when the 502 HTTP status code is received. The 502 status code indicates a temporary network issue. This feature can be disabled by setting to 0.
4437
+ # Protect the PDF with an owner password. Supplying an owner password grants unlimited access to the PDF including changing the passwords and access permissions.
4422
4438
  #
4423
- # * +count+ - Number of retries.
4439
+ # * +password+ - The owner password.
4424
4440
  # * *Returns* - The converter object.
4425
- def setRetryCount(count)
4426
- @helper.setRetryCount(count)
4441
+ def setOwnerPassword(password)
4442
+ @fields['owner_password'] = password
4427
4443
  self
4428
4444
  end
4429
4445
 
4430
- end
4431
-
4432
- # Conversion from PDF to HTML.
4433
- class PdfToHtmlClient
4434
- # Constructor for the Pdfcrowd API client.
4446
+ # Disallow printing of the output PDF.
4435
4447
  #
4436
- # * +user_name+ - Your username at Pdfcrowd.
4437
- # * +api_key+ - Your API key.
4438
- def initialize(user_name, api_key)
4439
- @helper = ConnectionHelper.new(user_name, api_key)
4440
- @fields = {
4441
- 'input_format'=>'pdf',
4442
- 'output_format'=>'html'
4443
- }
4444
- @file_id = 1
4445
- @files = {}
4446
- @raw_data = {}
4448
+ # * +value+ - Set to true to set the no-print flag in the output PDF.
4449
+ # * *Returns* - The converter object.
4450
+ def setNoPrint(value)
4451
+ @fields['no_print'] = value
4452
+ self
4447
4453
  end
4448
4454
 
4449
- # Convert a PDF.
4455
+ # Disallow modification of the output PDF.
4450
4456
  #
4451
- # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4452
- # * *Returns* - Byte array containing the conversion output.
4453
- def convertUrl(url)
4454
- unless /(?i)^https?:\/\/.*$/.match(url)
4455
- raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url"), 470);
4456
- end
4457
-
4458
- @fields['url'] = url
4459
- @helper.post(@fields, @files, @raw_data)
4457
+ # * +value+ - Set to true to set the read-only only flag in the output PDF.
4458
+ # * *Returns* - The converter object.
4459
+ def setNoModify(value)
4460
+ @fields['no_modify'] = value
4461
+ self
4460
4462
  end
4461
4463
 
4462
- # Convert a PDF and write the result to an output stream.
4464
+ # Disallow text and graphics extraction from the output PDF.
4463
4465
  #
4464
- # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4465
- # * +out_stream+ - The output stream that will contain the conversion output.
4466
- def convertUrlToStream(url, out_stream)
4467
- unless /(?i)^https?:\/\/.*$/.match(url)
4468
- raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
4469
- end
4470
-
4471
- @fields['url'] = url
4472
- @helper.post(@fields, @files, @raw_data, out_stream)
4466
+ # * +value+ - Set to true to set the no-copy flag in the output PDF.
4467
+ # * *Returns* - The converter object.
4468
+ def setNoCopy(value)
4469
+ @fields['no_copy'] = value
4470
+ self
4473
4471
  end
4474
4472
 
4475
- # Convert a PDF and write the result to a local file.
4473
+ # Set the title of the PDF.
4476
4474
  #
4477
- # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4478
- # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
4479
- def convertUrlToFile(url, file_path)
4480
- if (!(!file_path.nil? && !file_path.empty?))
4481
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_url_to_file"), 470);
4482
- end
4483
-
4484
- if (!(isOutputTypeValid(file_path)))
4485
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_url_to_file"), 470);
4486
- end
4487
-
4488
- output_file = open(file_path, "wb")
4489
- begin
4490
- convertUrlToStream(url, output_file)
4491
- output_file.close()
4492
- rescue Error => why
4493
- output_file.close()
4494
- FileUtils.rm(file_path)
4495
- raise
4496
- end
4475
+ # * +title+ - The title.
4476
+ # * *Returns* - The converter object.
4477
+ def setTitle(title)
4478
+ @fields['title'] = title
4479
+ self
4497
4480
  end
4498
4481
 
4499
- # Convert a local file.
4482
+ # Set the subject of the PDF.
4500
4483
  #
4501
- # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4502
- # * *Returns* - Byte array containing the conversion output.
4503
- def convertFile(file)
4504
- if (!(File.file?(file) && !File.zero?(file)))
4505
- raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-html", "The file must exist and not be empty.", "convert_file"), 470);
4506
- end
4507
-
4508
- @files['file'] = file
4509
- @helper.post(@fields, @files, @raw_data)
4484
+ # * +subject+ - The subject.
4485
+ # * *Returns* - The converter object.
4486
+ def setSubject(subject)
4487
+ @fields['subject'] = subject
4488
+ self
4510
4489
  end
4511
4490
 
4512
- # Convert a local file and write the result to an output stream.
4491
+ # Set the author of the PDF.
4492
+ #
4493
+ # * +author+ - The author.
4494
+ # * *Returns* - The converter object.
4495
+ def setAuthor(author)
4496
+ @fields['author'] = author
4497
+ self
4498
+ end
4499
+
4500
+ # Associate keywords with the document.
4501
+ #
4502
+ # * +keywords+ - The string with the keywords.
4503
+ # * *Returns* - The converter object.
4504
+ def setKeywords(keywords)
4505
+ @fields['keywords'] = keywords
4506
+ self
4507
+ end
4508
+
4509
+ # Specify the page layout to be used when the document is opened.
4510
+ #
4511
+ # * +layout+ - Allowed values are single-page, one-column, two-column-left, two-column-right.
4512
+ # * *Returns* - The converter object.
4513
+ def setPageLayout(layout)
4514
+ unless /(?i)^(single-page|one-column|two-column-left|two-column-right)$/.match(layout)
4515
+ raise Error.new(Pdfcrowd.create_invalid_value_message(layout, "setPageLayout", "image-to-pdf", "Allowed values are single-page, one-column, two-column-left, two-column-right.", "set_page_layout"), 470);
4516
+ end
4517
+
4518
+ @fields['page_layout'] = layout
4519
+ self
4520
+ end
4521
+
4522
+ # Specify how the document should be displayed when opened.
4523
+ #
4524
+ # * +mode+ - Allowed values are full-screen, thumbnails, outlines.
4525
+ # * *Returns* - The converter object.
4526
+ def setPageMode(mode)
4527
+ unless /(?i)^(full-screen|thumbnails|outlines)$/.match(mode)
4528
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setPageMode", "image-to-pdf", "Allowed values are full-screen, thumbnails, outlines.", "set_page_mode"), 470);
4529
+ end
4530
+
4531
+ @fields['page_mode'] = mode
4532
+ self
4533
+ end
4534
+
4535
+ # Specify how the page should be displayed when opened.
4536
+ #
4537
+ # * +zoom_type+ - Allowed values are fit-width, fit-height, fit-page.
4538
+ # * *Returns* - The converter object.
4539
+ def setInitialZoomType(zoom_type)
4540
+ unless /(?i)^(fit-width|fit-height|fit-page)$/.match(zoom_type)
4541
+ raise Error.new(Pdfcrowd.create_invalid_value_message(zoom_type, "setInitialZoomType", "image-to-pdf", "Allowed values are fit-width, fit-height, fit-page.", "set_initial_zoom_type"), 470);
4542
+ end
4543
+
4544
+ @fields['initial_zoom_type'] = zoom_type
4545
+ self
4546
+ end
4547
+
4548
+ # Display the specified page when the document is opened.
4549
+ #
4550
+ # * +page+ - Must be a positive integer number.
4551
+ # * *Returns* - The converter object.
4552
+ def setInitialPage(page)
4553
+ if (!(Integer(page) > 0))
4554
+ raise Error.new(Pdfcrowd.create_invalid_value_message(page, "setInitialPage", "image-to-pdf", "Must be a positive integer number.", "set_initial_page"), 470);
4555
+ end
4556
+
4557
+ @fields['initial_page'] = page
4558
+ self
4559
+ end
4560
+
4561
+ # Specify the initial page zoom in percents when the document is opened.
4562
+ #
4563
+ # * +zoom+ - Must be a positive integer number.
4564
+ # * *Returns* - The converter object.
4565
+ def setInitialZoom(zoom)
4566
+ if (!(Integer(zoom) > 0))
4567
+ raise Error.new(Pdfcrowd.create_invalid_value_message(zoom, "setInitialZoom", "image-to-pdf", "Must be a positive integer number.", "set_initial_zoom"), 470);
4568
+ end
4569
+
4570
+ @fields['initial_zoom'] = zoom
4571
+ self
4572
+ end
4573
+
4574
+ # Specify whether to hide the viewer application's tool bars when the document is active.
4575
+ #
4576
+ # * +value+ - Set to true to hide tool bars.
4577
+ # * *Returns* - The converter object.
4578
+ def setHideToolbar(value)
4579
+ @fields['hide_toolbar'] = value
4580
+ self
4581
+ end
4582
+
4583
+ # Specify whether to hide the viewer application's menu bar when the document is active.
4584
+ #
4585
+ # * +value+ - Set to true to hide the menu bar.
4586
+ # * *Returns* - The converter object.
4587
+ def setHideMenubar(value)
4588
+ @fields['hide_menubar'] = value
4589
+ self
4590
+ end
4591
+
4592
+ # Specify whether to hide user interface elements in the document's window (such as scroll bars and navigation controls), leaving only the document's contents displayed.
4593
+ #
4594
+ # * +value+ - Set to true to hide ui elements.
4595
+ # * *Returns* - The converter object.
4596
+ def setHideWindowUi(value)
4597
+ @fields['hide_window_ui'] = value
4598
+ self
4599
+ end
4600
+
4601
+ # Specify whether to resize the document's window to fit the size of the first displayed page.
4602
+ #
4603
+ # * +value+ - Set to true to resize the window.
4604
+ # * *Returns* - The converter object.
4605
+ def setFitWindow(value)
4606
+ @fields['fit_window'] = value
4607
+ self
4608
+ end
4609
+
4610
+ # Specify whether to position the document's window in the center of the screen.
4611
+ #
4612
+ # * +value+ - Set to true to center the window.
4613
+ # * *Returns* - The converter object.
4614
+ def setCenterWindow(value)
4615
+ @fields['center_window'] = value
4616
+ self
4617
+ end
4618
+
4619
+ # Specify whether the window's title bar should display the document title. If false , the title bar should instead display the name of the PDF file containing the document.
4620
+ #
4621
+ # * +value+ - Set to true to display the title.
4622
+ # * *Returns* - The converter object.
4623
+ def setDisplayTitle(value)
4624
+ @fields['display_title'] = value
4625
+ self
4626
+ end
4627
+
4628
+ # Turn on the debug logging. Details about the conversion are stored in the debug log. The URL of the log can be obtained from the getDebugLogUrl method or available in conversion statistics.
4629
+ #
4630
+ # * +value+ - Set to true to enable the debug logging.
4631
+ # * *Returns* - The converter object.
4632
+ def setDebugLog(value)
4633
+ @fields['debug_log'] = value
4634
+ self
4635
+ end
4636
+
4637
+ # Get the URL of the debug log for the last conversion.
4638
+ # * *Returns* - The link to the debug log.
4639
+ def getDebugLogUrl()
4640
+ return @helper.getDebugLogUrl()
4641
+ end
4642
+
4643
+ # Get the number of conversion credits available in your account.
4644
+ # This method can only be called after a call to one of the convertXtoY methods.
4645
+ # The returned value can differ from the actual count if you run parallel conversions.
4646
+ # The special value 999999 is returned if the information is not available.
4647
+ # * *Returns* - The number of credits.
4648
+ def getRemainingCreditCount()
4649
+ return @helper.getRemainingCreditCount()
4650
+ end
4651
+
4652
+ # Get the number of credits consumed by the last conversion.
4653
+ # * *Returns* - The number of credits.
4654
+ def getConsumedCreditCount()
4655
+ return @helper.getConsumedCreditCount()
4656
+ end
4657
+
4658
+ # Get the job id.
4659
+ # * *Returns* - The unique job identifier.
4660
+ def getJobId()
4661
+ return @helper.getJobId()
4662
+ end
4663
+
4664
+ # Get the size of the output in bytes.
4665
+ # * *Returns* - The count of bytes.
4666
+ def getOutputSize()
4667
+ return @helper.getOutputSize()
4668
+ end
4669
+
4670
+ # Get the version details.
4671
+ # * *Returns* - API version, converter version, and client version.
4672
+ def getVersion()
4673
+ return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion()
4674
+ end
4675
+
4676
+ # Tag the conversion with a custom value. The tag is used in conversion statistics. A value longer than 32 characters is cut off.
4677
+ #
4678
+ # * +tag+ - A string with the custom tag.
4679
+ # * *Returns* - The converter object.
4680
+ def setTag(tag)
4681
+ @fields['tag'] = tag
4682
+ self
4683
+ end
4684
+
4685
+ # A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTP scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
4686
+ #
4687
+ # * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
4688
+ # * *Returns* - The converter object.
4689
+ def setHttpProxy(proxy)
4690
+ unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4691
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
4692
+ end
4693
+
4694
+ @fields['http_proxy'] = proxy
4695
+ self
4696
+ end
4697
+
4698
+ # A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTPS scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
4699
+ #
4700
+ # * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
4701
+ # * *Returns* - The converter object.
4702
+ def setHttpsProxy(proxy)
4703
+ unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4704
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
4705
+ end
4706
+
4707
+ @fields['https_proxy'] = proxy
4708
+ self
4709
+ end
4710
+
4711
+ # Set the converter version. Different versions may produce different output. Choose which one provides the best output for your case.
4712
+ #
4713
+ # * +version+ - The version identifier. Allowed values are latest, 20.10, 18.10.
4714
+ # * *Returns* - The converter object.
4715
+ def setConverterVersion(version)
4716
+ unless /(?i)^(latest|20.10|18.10)$/.match(version)
4717
+ raise Error.new(Pdfcrowd.create_invalid_value_message(version, "setConverterVersion", "image-to-pdf", "Allowed values are latest, 20.10, 18.10.", "set_converter_version"), 470);
4718
+ end
4719
+
4720
+ @helper.setConverterVersion(version)
4721
+ self
4722
+ end
4723
+
4724
+ # Specifies if the client communicates over HTTP or HTTPS with Pdfcrowd API.
4725
+ # Warning: Using HTTP is insecure as data sent over HTTP is not encrypted. Enable this option only if you know what you are doing.
4726
+ #
4727
+ # * +value+ - Set to true to use HTTP.
4728
+ # * *Returns* - The converter object.
4729
+ def setUseHttp(value)
4730
+ @helper.setUseHttp(value)
4731
+ self
4732
+ end
4733
+
4734
+ # Set a custom user agent HTTP header. It can be useful if you are behind a proxy or a firewall.
4735
+ #
4736
+ # * +agent+ - The user agent string.
4737
+ # * *Returns* - The converter object.
4738
+ def setUserAgent(agent)
4739
+ @helper.setUserAgent(agent)
4740
+ self
4741
+ end
4742
+
4743
+ # Specifies an HTTP proxy that the API client library will use to connect to the internet.
4744
+ #
4745
+ # * +host+ - The proxy hostname.
4746
+ # * +port+ - The proxy port.
4747
+ # * +user_name+ - The username.
4748
+ # * +password+ - The password.
4749
+ # * *Returns* - The converter object.
4750
+ def setProxy(host, port, user_name, password)
4751
+ @helper.setProxy(host, port, user_name, password)
4752
+ self
4753
+ end
4754
+
4755
+ # Specifies the number of automatic retries when the 502 HTTP status code is received. The 502 status code indicates a temporary network issue. This feature can be disabled by setting to 0.
4756
+ #
4757
+ # * +count+ - Number of retries.
4758
+ # * *Returns* - The converter object.
4759
+ def setRetryCount(count)
4760
+ @helper.setRetryCount(count)
4761
+ self
4762
+ end
4763
+
4764
+ end
4765
+
4766
+ # Conversion from PDF to HTML.
4767
+ class PdfToHtmlClient
4768
+ # Constructor for the Pdfcrowd API client.
4769
+ #
4770
+ # * +user_name+ - Your username at Pdfcrowd.
4771
+ # * +api_key+ - Your API key.
4772
+ def initialize(user_name, api_key)
4773
+ @helper = ConnectionHelper.new(user_name, api_key)
4774
+ @fields = {
4775
+ 'input_format'=>'pdf',
4776
+ 'output_format'=>'html'
4777
+ }
4778
+ @file_id = 1
4779
+ @files = {}
4780
+ @raw_data = {}
4781
+ end
4782
+
4783
+ # Convert a PDF.
4784
+ #
4785
+ # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4786
+ # * *Returns* - Byte array containing the conversion output.
4787
+ def convertUrl(url)
4788
+ unless /(?i)^https?:\/\/.*$/.match(url)
4789
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url"), 470);
4790
+ end
4791
+
4792
+ @fields['url'] = url
4793
+ @helper.post(@fields, @files, @raw_data)
4794
+ end
4795
+
4796
+ # Convert a PDF and write the result to an output stream.
4797
+ #
4798
+ # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4799
+ # * +out_stream+ - The output stream that will contain the conversion output.
4800
+ def convertUrlToStream(url, out_stream)
4801
+ unless /(?i)^https?:\/\/.*$/.match(url)
4802
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
4803
+ end
4804
+
4805
+ @fields['url'] = url
4806
+ @helper.post(@fields, @files, @raw_data, out_stream)
4807
+ end
4808
+
4809
+ # Convert a PDF and write the result to a local file.
4810
+ #
4811
+ # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4812
+ # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
4813
+ def convertUrlToFile(url, file_path)
4814
+ if (!(!file_path.nil? && !file_path.empty?))
4815
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_url_to_file"), 470);
4816
+ end
4817
+
4818
+ if (!(isOutputTypeValid(file_path)))
4819
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_url_to_file"), 470);
4820
+ end
4821
+
4822
+ output_file = open(file_path, "wb")
4823
+ begin
4824
+ convertUrlToStream(url, output_file)
4825
+ output_file.close()
4826
+ rescue Error => why
4827
+ output_file.close()
4828
+ FileUtils.rm(file_path)
4829
+ raise
4830
+ end
4831
+ end
4832
+
4833
+ # Convert a local file.
4834
+ #
4835
+ # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4836
+ # * *Returns* - Byte array containing the conversion output.
4837
+ def convertFile(file)
4838
+ if (!(File.file?(file) && !File.zero?(file)))
4839
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-html", "The file must exist and not be empty.", "convert_file"), 470);
4840
+ end
4841
+
4842
+ @files['file'] = file
4843
+ @helper.post(@fields, @files, @raw_data)
4844
+ end
4845
+
4846
+ # Convert a local file and write the result to an output stream.
4847
+ #
4848
+ # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4849
+ # * +out_stream+ - The output stream that will contain the conversion output.
4850
+ def convertFileToStream(file, out_stream)
4851
+ if (!(File.file?(file) && !File.zero?(file)))
4852
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-html", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
4853
+ end
4854
+
4855
+ @files['file'] = file
4856
+ @helper.post(@fields, @files, @raw_data, out_stream)
4857
+ end
4858
+
4859
+ # Convert a local file and write the result to a local file.
4860
+ #
4861
+ # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4862
+ # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
4863
+ def convertFileToFile(file, file_path)
4864
+ if (!(!file_path.nil? && !file_path.empty?))
4865
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_file_to_file"), 470);
4866
+ end
4867
+
4868
+ if (!(isOutputTypeValid(file_path)))
4869
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_file_to_file"), 470);
4870
+ end
4871
+
4872
+ output_file = open(file_path, "wb")
4873
+ begin
4874
+ convertFileToStream(file, output_file)
4875
+ output_file.close()
4876
+ rescue Error => why
4877
+ output_file.close()
4878
+ FileUtils.rm(file_path)
4879
+ raise
4880
+ end
4881
+ end
4882
+
4883
+ # Convert raw data.
4884
+ #
4885
+ # * +data+ - The raw content to be converted.
4886
+ # * *Returns* - Byte array with the output.
4887
+ def convertRawData(data)
4888
+ @raw_data['file'] = data
4889
+ @helper.post(@fields, @files, @raw_data)
4890
+ end
4891
+
4892
+ # Convert raw data and write the result to an output stream.
4893
+ #
4894
+ # * +data+ - The raw content to be converted.
4895
+ # * +out_stream+ - The output stream that will contain the conversion output.
4896
+ def convertRawDataToStream(data, out_stream)
4897
+ @raw_data['file'] = data
4898
+ @helper.post(@fields, @files, @raw_data, out_stream)
4899
+ end
4900
+
4901
+ # Convert raw data to a file.
4902
+ #
4903
+ # * +data+ - The raw content to be converted.
4904
+ # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
4905
+ def convertRawDataToFile(data, file_path)
4906
+ if (!(!file_path.nil? && !file_path.empty?))
4907
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_raw_data_to_file"), 470);
4908
+ end
4909
+
4910
+ if (!(isOutputTypeValid(file_path)))
4911
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_raw_data_to_file"), 470);
4912
+ end
4913
+
4914
+ output_file = open(file_path, "wb")
4915
+ begin
4916
+ convertRawDataToStream(data, output_file)
4917
+ output_file.close()
4918
+ rescue Error => why
4919
+ output_file.close()
4920
+ FileUtils.rm(file_path)
4921
+ raise
4922
+ end
4923
+ end
4924
+
4925
+ # Convert the contents of an input stream.
4926
+ #
4927
+ # * +in_stream+ - The input stream with source data.
4928
+ # * *Returns* - Byte array containing the conversion output.
4929
+ def convertStream(in_stream)
4930
+ @raw_data['stream'] = in_stream.read
4931
+ @helper.post(@fields, @files, @raw_data)
4932
+ end
4933
+
4934
+ # Convert the contents of an input stream and write the result to an output stream.
4935
+ #
4936
+ # * +in_stream+ - The input stream with source data.
4937
+ # * +out_stream+ - The output stream that will contain the conversion output.
4938
+ def convertStreamToStream(in_stream, out_stream)
4939
+ @raw_data['stream'] = in_stream.read
4940
+ @helper.post(@fields, @files, @raw_data, out_stream)
4941
+ end
4942
+
4943
+ # Convert the contents of an input stream and write the result to a local file.
4944
+ #
4945
+ # * +in_stream+ - The input stream with source data.
4946
+ # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
4947
+ def convertStreamToFile(in_stream, file_path)
4948
+ if (!(!file_path.nil? && !file_path.empty?))
4949
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_stream_to_file"), 470);
4950
+ end
4951
+
4952
+ if (!(isOutputTypeValid(file_path)))
4953
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_stream_to_file"), 470);
4954
+ end
4955
+
4956
+ output_file = open(file_path, "wb")
4957
+ begin
4958
+ convertStreamToStream(in_stream, output_file)
4959
+ output_file.close()
4960
+ rescue Error => why
4961
+ output_file.close()
4962
+ FileUtils.rm(file_path)
4963
+ raise
4964
+ end
4965
+ end
4966
+
4967
+ # Password to open the encrypted PDF file.
4968
+ #
4969
+ # * +password+ - The input PDF password.
4970
+ # * *Returns* - The converter object.
4971
+ def setPdfPassword(password)
4972
+ @fields['pdf_password'] = password
4973
+ self
4974
+ end
4975
+
4976
+ # Set the scaling factor (zoom) for the main page area.
4977
+ #
4978
+ # * +factor+ - The percentage value. Must be a positive integer number.
4979
+ # * *Returns* - The converter object.
4980
+ def setScaleFactor(factor)
4981
+ if (!(Integer(factor) > 0))
4982
+ raise Error.new(Pdfcrowd.create_invalid_value_message(factor, "setScaleFactor", "pdf-to-html", "Must be a positive integer number.", "set_scale_factor"), 470);
4983
+ end
4984
+
4985
+ @fields['scale_factor'] = factor
4986
+ self
4987
+ end
4988
+
4989
+ # Set the page range to print.
4990
+ #
4991
+ # * +pages+ - A comma separated list of page numbers or ranges.
4992
+ # * *Returns* - The converter object.
4993
+ def setPrintPageRange(pages)
4994
+ unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
4995
+ raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-html", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
4996
+ end
4997
+
4998
+ @fields['print_page_range'] = pages
4999
+ self
5000
+ end
5001
+
5002
+ # Specifies where the images are stored.
5003
+ #
5004
+ # * +mode+ - The image storage mode. Allowed values are embed, separate.
5005
+ # * *Returns* - The converter object.
5006
+ def setImageMode(mode)
5007
+ unless /(?i)^(embed|separate)$/.match(mode)
5008
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setImageMode", "pdf-to-html", "Allowed values are embed, separate.", "set_image_mode"), 470);
5009
+ end
5010
+
5011
+ @fields['image_mode'] = mode
5012
+ self
5013
+ end
5014
+
5015
+ # Specifies where the style sheets are stored.
5016
+ #
5017
+ # * +mode+ - The style sheet storage mode. Allowed values are embed, separate.
5018
+ # * *Returns* - The converter object.
5019
+ def setCssMode(mode)
5020
+ unless /(?i)^(embed|separate)$/.match(mode)
5021
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setCssMode", "pdf-to-html", "Allowed values are embed, separate.", "set_css_mode"), 470);
5022
+ end
5023
+
5024
+ @fields['css_mode'] = mode
5025
+ self
5026
+ end
5027
+
5028
+ # Specifies where the fonts are stored.
5029
+ #
5030
+ # * +mode+ - The font storage mode. Allowed values are embed, separate.
5031
+ # * *Returns* - The converter object.
5032
+ def setFontMode(mode)
5033
+ unless /(?i)^(embed|separate)$/.match(mode)
5034
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setFontMode", "pdf-to-html", "Allowed values are embed, separate.", "set_font_mode"), 470);
5035
+ end
5036
+
5037
+ @fields['font_mode'] = mode
5038
+ self
5039
+ end
5040
+
5041
+ # A helper method to determine if the output file is a zip archive. The output of the conversion may be either an HTML file or a zip file containing the HTML and its external assets.
5042
+ # * *Returns* - True if the conversion output is a zip file, otherwise False.
5043
+ def isZippedOutput()
5044
+ @fields.fetch('image_mode', '') == 'separate' || @fields.fetch('css_mode', '') == 'separate' || @fields.fetch('font_mode', '') == 'separate' || @fields.fetch('force_zip', false) == true
5045
+ end
5046
+
5047
+ # Enforces the zip output format.
5048
+ #
5049
+ # * +value+ - Set to true to get the output as a zip archive.
5050
+ # * *Returns* - The converter object.
5051
+ def setForceZip(value)
5052
+ @fields['force_zip'] = value
5053
+ self
5054
+ end
5055
+
5056
+ # Set the HTML title. The title from the input PDF is used by default.
5057
+ #
5058
+ # * +title+ - The HTML title.
5059
+ # * *Returns* - The converter object.
5060
+ def setTitle(title)
5061
+ @fields['title'] = title
5062
+ self
5063
+ end
5064
+
5065
+ # Set the HTML subject. The subject from the input PDF is used by default.
5066
+ #
5067
+ # * +subject+ - The HTML subject.
5068
+ # * *Returns* - The converter object.
5069
+ def setSubject(subject)
5070
+ @fields['subject'] = subject
5071
+ self
5072
+ end
5073
+
5074
+ # Set the HTML author. The author from the input PDF is used by default.
5075
+ #
5076
+ # * +author+ - The HTML author.
5077
+ # * *Returns* - The converter object.
5078
+ def setAuthor(author)
5079
+ @fields['author'] = author
5080
+ self
5081
+ end
5082
+
5083
+ # Associate keywords with the HTML document. Keywords from the input PDF are used by default.
5084
+ #
5085
+ # * +keywords+ - The string containing the keywords.
5086
+ # * *Returns* - The converter object.
5087
+ def setKeywords(keywords)
5088
+ @fields['keywords'] = keywords
5089
+ self
5090
+ end
5091
+
5092
+ # Turn on the debug logging. Details about the conversion are stored in the debug log. The URL of the log can be obtained from the getDebugLogUrl method or available in conversion statistics.
5093
+ #
5094
+ # * +value+ - Set to true to enable the debug logging.
5095
+ # * *Returns* - The converter object.
5096
+ def setDebugLog(value)
5097
+ @fields['debug_log'] = value
5098
+ self
5099
+ end
5100
+
5101
+ # Get the URL of the debug log for the last conversion.
5102
+ # * *Returns* - The link to the debug log.
5103
+ def getDebugLogUrl()
5104
+ return @helper.getDebugLogUrl()
5105
+ end
5106
+
5107
+ # Get the number of conversion credits available in your account.
5108
+ # This method can only be called after a call to one of the convertXtoY methods.
5109
+ # The returned value can differ from the actual count if you run parallel conversions.
5110
+ # The special value 999999 is returned if the information is not available.
5111
+ # * *Returns* - The number of credits.
5112
+ def getRemainingCreditCount()
5113
+ return @helper.getRemainingCreditCount()
5114
+ end
5115
+
5116
+ # Get the number of credits consumed by the last conversion.
5117
+ # * *Returns* - The number of credits.
5118
+ def getConsumedCreditCount()
5119
+ return @helper.getConsumedCreditCount()
5120
+ end
5121
+
5122
+ # Get the job id.
5123
+ # * *Returns* - The unique job identifier.
5124
+ def getJobId()
5125
+ return @helper.getJobId()
5126
+ end
5127
+
5128
+ # Get the number of pages in the output document.
5129
+ # * *Returns* - The page count.
5130
+ def getPageCount()
5131
+ return @helper.getPageCount()
5132
+ end
5133
+
5134
+ # Get the size of the output in bytes.
5135
+ # * *Returns* - The count of bytes.
5136
+ def getOutputSize()
5137
+ return @helper.getOutputSize()
5138
+ end
5139
+
5140
+ # Get the version details.
5141
+ # * *Returns* - API version, converter version, and client version.
5142
+ def getVersion()
5143
+ return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion()
5144
+ end
5145
+
5146
+ # Tag the conversion with a custom value. The tag is used in conversion statistics. A value longer than 32 characters is cut off.
5147
+ #
5148
+ # * +tag+ - A string with the custom tag.
5149
+ # * *Returns* - The converter object.
5150
+ def setTag(tag)
5151
+ @fields['tag'] = tag
5152
+ self
5153
+ end
5154
+
5155
+ # A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTP scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
5156
+ #
5157
+ # * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
5158
+ # * *Returns* - The converter object.
5159
+ def setHttpProxy(proxy)
5160
+ unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
5161
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
5162
+ end
5163
+
5164
+ @fields['http_proxy'] = proxy
5165
+ self
5166
+ end
5167
+
5168
+ # A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTPS scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
5169
+ #
5170
+ # * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
5171
+ # * *Returns* - The converter object.
5172
+ def setHttpsProxy(proxy)
5173
+ unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
5174
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
5175
+ end
5176
+
5177
+ @fields['https_proxy'] = proxy
5178
+ self
5179
+ end
5180
+
5181
+ # Specifies if the client communicates over HTTP or HTTPS with Pdfcrowd API.
5182
+ # Warning: Using HTTP is insecure as data sent over HTTP is not encrypted. Enable this option only if you know what you are doing.
5183
+ #
5184
+ # * +value+ - Set to true to use HTTP.
5185
+ # * *Returns* - The converter object.
5186
+ def setUseHttp(value)
5187
+ @helper.setUseHttp(value)
5188
+ self
5189
+ end
5190
+
5191
+ # Set a custom user agent HTTP header. It can be useful if you are behind a proxy or a firewall.
5192
+ #
5193
+ # * +agent+ - The user agent string.
5194
+ # * *Returns* - The converter object.
5195
+ def setUserAgent(agent)
5196
+ @helper.setUserAgent(agent)
5197
+ self
5198
+ end
5199
+
5200
+ # Specifies an HTTP proxy that the API client library will use to connect to the internet.
5201
+ #
5202
+ # * +host+ - The proxy hostname.
5203
+ # * +port+ - The proxy port.
5204
+ # * +user_name+ - The username.
5205
+ # * +password+ - The password.
5206
+ # * *Returns* - The converter object.
5207
+ def setProxy(host, port, user_name, password)
5208
+ @helper.setProxy(host, port, user_name, password)
5209
+ self
5210
+ end
5211
+
5212
+ # Specifies the number of automatic retries when the 502 HTTP status code is received. The 502 status code indicates a temporary network issue. This feature can be disabled by setting to 0.
5213
+ #
5214
+ # * +count+ - Number of retries.
5215
+ # * *Returns* - The converter object.
5216
+ def setRetryCount(count)
5217
+ @helper.setRetryCount(count)
5218
+ self
5219
+ end
5220
+
5221
+ private
5222
+
5223
+ def isOutputTypeValid(file_path)
5224
+ extension = File.extname(file_path).downcase
5225
+ (extension == '.zip') == isZippedOutput()
5226
+ end
5227
+ end
5228
+
5229
+ # Conversion from PDF to text.
5230
+ class PdfToTextClient
5231
+ # Constructor for the Pdfcrowd API client.
5232
+ #
5233
+ # * +user_name+ - Your username at Pdfcrowd.
5234
+ # * +api_key+ - Your API key.
5235
+ def initialize(user_name, api_key)
5236
+ @helper = ConnectionHelper.new(user_name, api_key)
5237
+ @fields = {
5238
+ 'input_format'=>'pdf',
5239
+ 'output_format'=>'txt'
5240
+ }
5241
+ @file_id = 1
5242
+ @files = {}
5243
+ @raw_data = {}
5244
+ end
5245
+
5246
+ # Convert a PDF.
5247
+ #
5248
+ # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
5249
+ # * *Returns* - Byte array containing the conversion output.
5250
+ def convertUrl(url)
5251
+ unless /(?i)^https?:\/\/.*$/.match(url)
5252
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-text", "The supported protocols are http:// and https://.", "convert_url"), 470);
5253
+ end
5254
+
5255
+ @fields['url'] = url
5256
+ @helper.post(@fields, @files, @raw_data)
5257
+ end
5258
+
5259
+ # Convert a PDF and write the result to an output stream.
5260
+ #
5261
+ # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
5262
+ # * +out_stream+ - The output stream that will contain the conversion output.
5263
+ def convertUrlToStream(url, out_stream)
5264
+ unless /(?i)^https?:\/\/.*$/.match(url)
5265
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-text", "The supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
5266
+ end
5267
+
5268
+ @fields['url'] = url
5269
+ @helper.post(@fields, @files, @raw_data, out_stream)
5270
+ end
5271
+
5272
+ # Convert a PDF and write the result to a local file.
5273
+ #
5274
+ # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
5275
+ # * +file_path+ - The output file path. The string must not be empty.
5276
+ def convertUrlToFile(url, file_path)
5277
+ if (!(!file_path.nil? && !file_path.empty?))
5278
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_url_to_file"), 470);
5279
+ end
5280
+
5281
+ output_file = open(file_path, "wb")
5282
+ begin
5283
+ convertUrlToStream(url, output_file)
5284
+ output_file.close()
5285
+ rescue Error => why
5286
+ output_file.close()
5287
+ FileUtils.rm(file_path)
5288
+ raise
5289
+ end
5290
+ end
5291
+
5292
+ # Convert a local file.
5293
+ #
5294
+ # * +file+ - The path to a local file to convert. The file must exist and not be empty.
5295
+ # * *Returns* - Byte array containing the conversion output.
5296
+ def convertFile(file)
5297
+ if (!(File.file?(file) && !File.zero?(file)))
5298
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-text", "The file must exist and not be empty.", "convert_file"), 470);
5299
+ end
5300
+
5301
+ @files['file'] = file
5302
+ @helper.post(@fields, @files, @raw_data)
5303
+ end
5304
+
5305
+ # Convert a local file and write the result to an output stream.
4513
5306
  #
4514
5307
  # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4515
5308
  # * +out_stream+ - The output stream that will contain the conversion output.
4516
5309
  def convertFileToStream(file, out_stream)
4517
5310
  if (!(File.file?(file) && !File.zero?(file)))
4518
- raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-html", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
5311
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-text", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
4519
5312
  end
4520
5313
 
4521
5314
  @files['file'] = file
@@ -4525,14 +5318,10 @@ module Pdfcrowd
4525
5318
  # Convert a local file and write the result to a local file.
4526
5319
  #
4527
5320
  # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4528
- # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
5321
+ # * +file_path+ - The output file path. The string must not be empty.
4529
5322
  def convertFileToFile(file, file_path)
4530
5323
  if (!(!file_path.nil? && !file_path.empty?))
4531
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_file_to_file"), 470);
4532
- end
4533
-
4534
- if (!(isOutputTypeValid(file_path)))
4535
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_file_to_file"), 470);
5324
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_file_to_file"), 470);
4536
5325
  end
4537
5326
 
4538
5327
  output_file = open(file_path, "wb")
@@ -4567,14 +5356,10 @@ module Pdfcrowd
4567
5356
  # Convert raw data to a file.
4568
5357
  #
4569
5358
  # * +data+ - The raw content to be converted.
4570
- # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
5359
+ # * +file_path+ - The output file path. The string must not be empty.
4571
5360
  def convertRawDataToFile(data, file_path)
4572
5361
  if (!(!file_path.nil? && !file_path.empty?))
4573
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_raw_data_to_file"), 470);
4574
- end
4575
-
4576
- if (!(isOutputTypeValid(file_path)))
4577
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_raw_data_to_file"), 470);
5362
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_raw_data_to_file"), 470);
4578
5363
  end
4579
5364
 
4580
5365
  output_file = open(file_path, "wb")
@@ -4609,14 +5394,10 @@ module Pdfcrowd
4609
5394
  # Convert the contents of an input stream and write the result to a local file.
4610
5395
  #
4611
5396
  # * +in_stream+ - The input stream with source data.
4612
- # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
5397
+ # * +file_path+ - The output file path. The string must not be empty.
4613
5398
  def convertStreamToFile(in_stream, file_path)
4614
5399
  if (!(!file_path.nil? && !file_path.empty?))
4615
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_stream_to_file"), 470);
4616
- end
4617
-
4618
- if (!(isOutputTypeValid(file_path)))
4619
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_stream_to_file"), 470);
5400
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_stream_to_file"), 470);
4620
5401
  end
4621
5402
 
4622
5403
  output_file = open(file_path, "wb")
@@ -4630,7 +5411,7 @@ module Pdfcrowd
4630
5411
  end
4631
5412
  end
4632
5413
 
4633
- # Password to open the encrypted PDF file.
5414
+ # The password to open the encrypted PDF file.
4634
5415
  #
4635
5416
  # * +password+ - The input PDF password.
4636
5417
  # * *Returns* - The converter object.
@@ -4639,119 +5420,171 @@ module Pdfcrowd
4639
5420
  self
4640
5421
  end
4641
5422
 
4642
- # Set the scaling factor (zoom) for the main page area.
5423
+ # Set the page range to print.
4643
5424
  #
4644
- # * +factor+ - The percentage value. Must be a positive integer number.
5425
+ # * +pages+ - A comma separated list of page numbers or ranges.
4645
5426
  # * *Returns* - The converter object.
4646
- def setScaleFactor(factor)
4647
- if (!(Integer(factor) > 0))
4648
- raise Error.new(Pdfcrowd.create_invalid_value_message(factor, "setScaleFactor", "pdf-to-html", "Must be a positive integer number.", "set_scale_factor"), 470);
5427
+ def setPrintPageRange(pages)
5428
+ unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
5429
+ raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-text", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
4649
5430
  end
4650
5431
 
4651
- @fields['scale_factor'] = factor
5432
+ @fields['print_page_range'] = pages
4652
5433
  self
4653
5434
  end
4654
5435
 
4655
- # Set the page range to print.
5436
+ # Ignore the original PDF layout.
4656
5437
  #
4657
- # * +pages+ - A comma separated list of page numbers or ranges.
5438
+ # * +value+ - Set to true to ignore the layout.
4658
5439
  # * *Returns* - The converter object.
4659
- def setPrintPageRange(pages)
4660
- unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
4661
- raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-html", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
5440
+ def setNoLayout(value)
5441
+ @fields['no_layout'] = value
5442
+ self
5443
+ end
5444
+
5445
+ # The end-of-line convention for the text output.
5446
+ #
5447
+ # * +eol+ - Allowed values are unix, dos, mac.
5448
+ # * *Returns* - The converter object.
5449
+ def setEol(eol)
5450
+ unless /(?i)^(unix|dos|mac)$/.match(eol)
5451
+ raise Error.new(Pdfcrowd.create_invalid_value_message(eol, "setEol", "pdf-to-text", "Allowed values are unix, dos, mac.", "set_eol"), 470);
4662
5452
  end
4663
5453
 
4664
- @fields['print_page_range'] = pages
5454
+ @fields['eol'] = eol
4665
5455
  self
4666
5456
  end
4667
5457
 
4668
- # Specifies where the images are stored.
5458
+ # Specify the page break mode for the text output.
4669
5459
  #
4670
- # * +mode+ - The image storage mode. Allowed values are embed, separate.
5460
+ # * +mode+ - Allowed values are none, default, custom.
4671
5461
  # * *Returns* - The converter object.
4672
- def setImageMode(mode)
4673
- unless /(?i)^(embed|separate)$/.match(mode)
4674
- raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setImageMode", "pdf-to-html", "Allowed values are embed, separate.", "set_image_mode"), 470);
5462
+ def setPageBreakMode(mode)
5463
+ unless /(?i)^(none|default|custom)$/.match(mode)
5464
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setPageBreakMode", "pdf-to-text", "Allowed values are none, default, custom.", "set_page_break_mode"), 470);
4675
5465
  end
4676
5466
 
4677
- @fields['image_mode'] = mode
5467
+ @fields['page_break_mode'] = mode
4678
5468
  self
4679
5469
  end
4680
5470
 
4681
- # Specifies where the style sheets are stored.
5471
+ # Specify the custom page break.
4682
5472
  #
4683
- # * +mode+ - The style sheet storage mode. Allowed values are embed, separate.
5473
+ # * +page_break+ - String to insert between the pages.
4684
5474
  # * *Returns* - The converter object.
4685
- def setCssMode(mode)
4686
- unless /(?i)^(embed|separate)$/.match(mode)
4687
- raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setCssMode", "pdf-to-html", "Allowed values are embed, separate.", "set_css_mode"), 470);
5475
+ def setCustomPageBreak(page_break)
5476
+ @fields['custom_page_break'] = page_break
5477
+ self
5478
+ end
5479
+
5480
+ # Specify the paragraph detection mode.
5481
+ #
5482
+ # * +mode+ - Allowed values are none, bounding-box, characters.
5483
+ # * *Returns* - The converter object.
5484
+ def setParagraphMode(mode)
5485
+ unless /(?i)^(none|bounding-box|characters)$/.match(mode)
5486
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setParagraphMode", "pdf-to-text", "Allowed values are none, bounding-box, characters.", "set_paragraph_mode"), 470);
4688
5487
  end
4689
5488
 
4690
- @fields['css_mode'] = mode
5489
+ @fields['paragraph_mode'] = mode
4691
5490
  self
4692
5491
  end
4693
5492
 
4694
- # Specifies where the fonts are stored.
5493
+ # Set the maximum line spacing when the paragraph detection mode is enabled.
4695
5494
  #
4696
- # * +mode+ - The font storage mode. Allowed values are embed, separate.
5495
+ # * +threshold+ - The value must be a positive integer percentage.
4697
5496
  # * *Returns* - The converter object.
4698
- def setFontMode(mode)
4699
- unless /(?i)^(embed|separate)$/.match(mode)
4700
- raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setFontMode", "pdf-to-html", "Allowed values are embed, separate.", "set_font_mode"), 470);
5497
+ def setLineSpacingThreshold(threshold)
5498
+ unless /(?i)^0$|^[0-9]+%$/.match(threshold)
5499
+ raise Error.new(Pdfcrowd.create_invalid_value_message(threshold, "setLineSpacingThreshold", "pdf-to-text", "The value must be a positive integer percentage.", "set_line_spacing_threshold"), 470);
4701
5500
  end
4702
5501
 
4703
- @fields['font_mode'] = mode
5502
+ @fields['line_spacing_threshold'] = threshold
4704
5503
  self
4705
5504
  end
4706
5505
 
4707
- # A helper method to determine if the output file is a zip archive. The output of the conversion may be either an HTML file or a zip file containing the HTML and its external assets.
4708
- # * *Returns* - True if the conversion output is a zip file, otherwise False.
4709
- def isZippedOutput()
4710
- @fields.fetch('image_mode', '') == 'separate' || @fields.fetch('css_mode', '') == 'separate' || @fields.fetch('font_mode', '') == 'separate' || @fields.fetch('force_zip', false) == true
5506
+ # Remove the hyphen character from the end of lines.
5507
+ #
5508
+ # * +value+ - Set to true to remove hyphens.
5509
+ # * *Returns* - The converter object.
5510
+ def setRemoveHyphenation(value)
5511
+ @fields['remove_hyphenation'] = value
5512
+ self
4711
5513
  end
4712
5514
 
4713
- # Enforces the zip output format.
5515
+ # Remove empty lines from the text output.
4714
5516
  #
4715
- # * +value+ - Set to true to get the output as a zip archive.
5517
+ # * +value+ - Set to true to remove empty lines.
4716
5518
  # * *Returns* - The converter object.
4717
- def setForceZip(value)
4718
- @fields['force_zip'] = value
5519
+ def setRemoveEmptyLines(value)
5520
+ @fields['remove_empty_lines'] = value
4719
5521
  self
4720
5522
  end
4721
5523
 
4722
- # Set the HTML title. The title from the input PDF is used by default.
5524
+ # Set the top left X coordinate of the crop area in points.
4723
5525
  #
4724
- # * +title+ - The HTML title.
5526
+ # * +x+ - Must be a positive integer number or 0.
4725
5527
  # * *Returns* - The converter object.
4726
- def setTitle(title)
4727
- @fields['title'] = title
5528
+ def setCropAreaX(x)
5529
+ if (!(Integer(x) >= 0))
5530
+ raise Error.new(Pdfcrowd.create_invalid_value_message(x, "setCropAreaX", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_x"), 470);
5531
+ end
5532
+
5533
+ @fields['crop_area_x'] = x
4728
5534
  self
4729
5535
  end
4730
5536
 
4731
- # Set the HTML subject. The subject from the input PDF is used by default.
5537
+ # Set the top left Y coordinate of the crop area in points.
4732
5538
  #
4733
- # * +subject+ - The HTML subject.
5539
+ # * +y+ - Must be a positive integer number or 0.
4734
5540
  # * *Returns* - The converter object.
4735
- def setSubject(subject)
4736
- @fields['subject'] = subject
5541
+ def setCropAreaY(y)
5542
+ if (!(Integer(y) >= 0))
5543
+ raise Error.new(Pdfcrowd.create_invalid_value_message(y, "setCropAreaY", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_y"), 470);
5544
+ end
5545
+
5546
+ @fields['crop_area_y'] = y
4737
5547
  self
4738
5548
  end
4739
5549
 
4740
- # Set the HTML author. The author from the input PDF is used by default.
5550
+ # Set the width of the crop area in points.
4741
5551
  #
4742
- # * +author+ - The HTML author.
5552
+ # * +width+ - Must be a positive integer number or 0.
4743
5553
  # * *Returns* - The converter object.
4744
- def setAuthor(author)
4745
- @fields['author'] = author
5554
+ def setCropAreaWidth(width)
5555
+ if (!(Integer(width) >= 0))
5556
+ raise Error.new(Pdfcrowd.create_invalid_value_message(width, "setCropAreaWidth", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_width"), 470);
5557
+ end
5558
+
5559
+ @fields['crop_area_width'] = width
4746
5560
  self
4747
5561
  end
4748
5562
 
4749
- # Associate keywords with the HTML document. Keywords from the input PDF are used by default.
5563
+ # Set the height of the crop area in points.
4750
5564
  #
4751
- # * +keywords+ - The string containing the keywords.
5565
+ # * +height+ - Must be a positive integer number or 0.
4752
5566
  # * *Returns* - The converter object.
4753
- def setKeywords(keywords)
4754
- @fields['keywords'] = keywords
5567
+ def setCropAreaHeight(height)
5568
+ if (!(Integer(height) >= 0))
5569
+ raise Error.new(Pdfcrowd.create_invalid_value_message(height, "setCropAreaHeight", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_height"), 470);
5570
+ end
5571
+
5572
+ @fields['crop_area_height'] = height
5573
+ self
5574
+ end
5575
+
5576
+ # Set the crop area. It allows to extract just a part of a PDF page.
5577
+ #
5578
+ # * +x+ - Set the top left X coordinate of the crop area in points. Must be a positive integer number or 0.
5579
+ # * +y+ - Set the top left Y coordinate of the crop area in points. Must be a positive integer number or 0.
5580
+ # * +width+ - Set the width of the crop area in points. Must be a positive integer number or 0.
5581
+ # * +height+ - Set the height of the crop area in points. Must be a positive integer number or 0.
5582
+ # * *Returns* - The converter object.
5583
+ def setCropArea(x, y, width, height)
5584
+ setCropAreaX(x)
5585
+ setCropAreaY(y)
5586
+ setCropAreaWidth(width)
5587
+ setCropAreaHeight(height)
4755
5588
  self
4756
5589
  end
4757
5590
 
@@ -4791,7 +5624,7 @@ module Pdfcrowd
4791
5624
  return @helper.getJobId()
4792
5625
  end
4793
5626
 
4794
- # Get the total number of pages in the output document.
5627
+ # Get the number of pages in the output document.
4795
5628
  # * *Returns* - The page count.
4796
5629
  def getPageCount()
4797
5630
  return @helper.getPageCount()
@@ -4824,7 +5657,7 @@ module Pdfcrowd
4824
5657
  # * *Returns* - The converter object.
4825
5658
  def setHttpProxy(proxy)
4826
5659
  unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4827
- raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
5660
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
4828
5661
  end
4829
5662
 
4830
5663
  @fields['http_proxy'] = proxy
@@ -4837,7 +5670,7 @@ module Pdfcrowd
4837
5670
  # * *Returns* - The converter object.
4838
5671
  def setHttpsProxy(proxy)
4839
5672
  unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4840
- raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
5673
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
4841
5674
  end
4842
5675
 
4843
5676
  @fields['https_proxy'] = proxy
@@ -4884,12 +5717,6 @@ module Pdfcrowd
4884
5717
  self
4885
5718
  end
4886
5719
 
4887
- private
4888
-
4889
- def isOutputTypeValid(file_path)
4890
- extension = File.extname(file_path).downcase
4891
- (extension == '.zip') == isZippedOutput()
4892
- end
4893
5720
  end
4894
5721
 
4895
5722
  end