pdfcrowd 5.8.0 → 5.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pdfcrowd.rb +1070 -243
- metadata +2 -2
data/lib/pdfcrowd.rb
CHANGED
@@ -530,7 +530,7 @@ end
|
|
530
530
|
module Pdfcrowd
|
531
531
|
HOST = ENV["PDFCROWD_HOST"] || 'api.pdfcrowd.com'
|
532
532
|
MULTIPART_BOUNDARY = '----------ThIs_Is_tHe_bOUnDary_$'
|
533
|
-
CLIENT_VERSION = '5.
|
533
|
+
CLIENT_VERSION = '5.10.0'
|
534
534
|
|
535
535
|
class ConnectionHelper
|
536
536
|
def initialize(user_name, api_key)
|
@@ -541,7 +541,7 @@ module Pdfcrowd
|
|
541
541
|
|
542
542
|
setProxy(nil, nil, nil, nil)
|
543
543
|
setUseHttp(false)
|
544
|
-
setUserAgent('pdfcrowd_ruby_client/5.
|
544
|
+
setUserAgent('pdfcrowd_ruby_client/5.10.0 (https://pdfcrowd.com)')
|
545
545
|
|
546
546
|
@retry_count = 1
|
547
547
|
@converter_version = '20.10'
|
@@ -597,6 +597,10 @@ module Pdfcrowd
|
|
597
597
|
@page_count
|
598
598
|
end
|
599
599
|
|
600
|
+
def getTotalPageCount()
|
601
|
+
@total_page_count
|
602
|
+
end
|
603
|
+
|
600
604
|
def getOutputSize()
|
601
605
|
@output_size
|
602
606
|
end
|
@@ -613,6 +617,7 @@ module Pdfcrowd
|
|
613
617
|
@consumed_credits = 0
|
614
618
|
@job_id = ''
|
615
619
|
@page_count = 0
|
620
|
+
@total_page_count = 0
|
616
621
|
@output_size = 0
|
617
622
|
@retry = 0
|
618
623
|
end
|
@@ -699,6 +704,7 @@ module Pdfcrowd
|
|
699
704
|
@consumed_credits = (response["X-Pdfcrowd-Consumed-Credits"] || 0).to_i
|
700
705
|
@job_id = response["X-Pdfcrowd-Job-Id"] || ''
|
701
706
|
@page_count = (response["X-Pdfcrowd-Pages"] || 0).to_i
|
707
|
+
@total_page_count = (response["X-Pdfcrowd-Total-Pages"] || 0).to_i
|
702
708
|
@output_size = (response["X-Pdfcrowd-Output-Size"] || 0).to_i
|
703
709
|
|
704
710
|
raise Error.new('test 502', '502') \
|
@@ -1714,11 +1720,11 @@ module Pdfcrowd
|
|
1714
1720
|
|
1715
1721
|
# The input HTML is automatically enhanced to improve the readability.
|
1716
1722
|
#
|
1717
|
-
# * +enhancements+ - Allowed values are none, readability-v1, readability-v2, readability-v3.
|
1723
|
+
# * +enhancements+ - Allowed values are none, readability-v1, readability-v2, readability-v3, readability-v4.
|
1718
1724
|
# * *Returns* - The converter object.
|
1719
1725
|
def setReadabilityEnhancements(enhancements)
|
1720
|
-
unless /(?i)^(none|readability-v1|readability-v2|readability-v3)$/.match(enhancements)
|
1721
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(enhancements, "setReadabilityEnhancements", "html-to-pdf", "Allowed values are none, readability-v1, readability-v2, readability-v3.", "set_readability_enhancements"), 470);
|
1726
|
+
unless /(?i)^(none|readability-v1|readability-v2|readability-v3|readability-v4)$/.match(enhancements)
|
1727
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(enhancements, "setReadabilityEnhancements", "html-to-pdf", "Allowed values are none, readability-v1, readability-v2, readability-v3, readability-v4.", "set_readability_enhancements"), 470);
|
1722
1728
|
end
|
1723
1729
|
|
1724
1730
|
@fields['readability_enhancements'] = enhancements
|
@@ -1777,11 +1783,11 @@ module Pdfcrowd
|
|
1777
1783
|
|
1778
1784
|
# Specifies the scaling mode used for fitting the HTML contents to the print area.
|
1779
1785
|
#
|
1780
|
-
# * +mode+ - The smart scaling mode. Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, mode1.
|
1786
|
+
# * +mode+ - The smart scaling mode. Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, single-page-fit-ex, mode1.
|
1781
1787
|
# * *Returns* - The converter object.
|
1782
1788
|
def setSmartScalingMode(mode)
|
1783
|
-
unless /(?i)^(default|disabled|viewport-fit|content-fit|single-page-fit|mode1)$/.match(mode)
|
1784
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setSmartScalingMode", "html-to-pdf", "Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, mode1.", "set_smart_scaling_mode"), 470);
|
1789
|
+
unless /(?i)^(default|disabled|viewport-fit|content-fit|single-page-fit|single-page-fit-ex|mode1)$/.match(mode)
|
1790
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setSmartScalingMode", "html-to-pdf", "Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, single-page-fit-ex, mode1.", "set_smart_scaling_mode"), 470);
|
1785
1791
|
end
|
1786
1792
|
|
1787
1793
|
@fields['smart_scaling_mode'] = mode
|
@@ -2197,12 +2203,18 @@ module Pdfcrowd
|
|
2197
2203
|
return @helper.getJobId()
|
2198
2204
|
end
|
2199
2205
|
|
2200
|
-
# Get the
|
2206
|
+
# Get the number of pages in the output document.
|
2201
2207
|
# * *Returns* - The page count.
|
2202
2208
|
def getPageCount()
|
2203
2209
|
return @helper.getPageCount()
|
2204
2210
|
end
|
2205
2211
|
|
2212
|
+
# Get the total number of pages in the original output document, including the pages excluded by setPrintPageRange().
|
2213
|
+
# * *Returns* - The total page count.
|
2214
|
+
def getTotalPageCount()
|
2215
|
+
return @helper.getTotalPageCount()
|
2216
|
+
end
|
2217
|
+
|
2206
2218
|
# Get the size of the output in bytes.
|
2207
2219
|
# * *Returns* - The count of bytes.
|
2208
2220
|
def getOutputSize()
|
@@ -2880,11 +2892,11 @@ module Pdfcrowd
|
|
2880
2892
|
|
2881
2893
|
# The input HTML is automatically enhanced to improve the readability.
|
2882
2894
|
#
|
2883
|
-
# * +enhancements+ - Allowed values are none, readability-v1, readability-v2, readability-v3.
|
2895
|
+
# * +enhancements+ - Allowed values are none, readability-v1, readability-v2, readability-v3, readability-v4.
|
2884
2896
|
# * *Returns* - The converter object.
|
2885
2897
|
def setReadabilityEnhancements(enhancements)
|
2886
|
-
unless /(?i)^(none|readability-v1|readability-v2|readability-v3)$/.match(enhancements)
|
2887
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(enhancements, "setReadabilityEnhancements", "html-to-image", "Allowed values are none, readability-v1, readability-v2, readability-v3.", "set_readability_enhancements"), 470);
|
2898
|
+
unless /(?i)^(none|readability-v1|readability-v2|readability-v3|readability-v4)$/.match(enhancements)
|
2899
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(enhancements, "setReadabilityEnhancements", "html-to-image", "Allowed values are none, readability-v1, readability-v2, readability-v3, readability-v4.", "set_readability_enhancements"), 470);
|
2888
2900
|
end
|
2889
2901
|
|
2890
2902
|
@fields['readability_enhancements'] = enhancements
|
@@ -4006,7 +4018,7 @@ module Pdfcrowd
|
|
4006
4018
|
return @helper.getJobId()
|
4007
4019
|
end
|
4008
4020
|
|
4009
|
-
# Get the
|
4021
|
+
# Get the number of pages in the output document.
|
4010
4022
|
# * *Returns* - The page count.
|
4011
4023
|
def getPageCount()
|
4012
4024
|
return @helper.getPageCount()
|
@@ -4291,231 +4303,1012 @@ module Pdfcrowd
|
|
4291
4303
|
self
|
4292
4304
|
end
|
4293
4305
|
|
4294
|
-
#
|
4306
|
+
# Apply a watermark to each page of the output PDF file. A watermark can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the watermark.
|
4295
4307
|
#
|
4296
|
-
# * +
|
4308
|
+
# * +watermark+ - The file path to a local file. The file must exist and not be empty.
|
4297
4309
|
# * *Returns* - The converter object.
|
4298
|
-
def
|
4299
|
-
|
4310
|
+
def setPageWatermark(watermark)
|
4311
|
+
if (!(File.file?(watermark) && !File.zero?(watermark)))
|
4312
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(watermark, "setPageWatermark", "image-to-pdf", "The file must exist and not be empty.", "set_page_watermark"), 470);
|
4313
|
+
end
|
4314
|
+
|
4315
|
+
@files['page_watermark'] = watermark
|
4300
4316
|
self
|
4301
4317
|
end
|
4302
4318
|
|
4303
|
-
#
|
4304
|
-
#
|
4305
|
-
|
4306
|
-
|
4307
|
-
|
4308
|
-
|
4309
|
-
|
4310
|
-
|
4311
|
-
|
4312
|
-
|
4313
|
-
|
4314
|
-
def getRemainingCreditCount()
|
4315
|
-
return @helper.getRemainingCreditCount()
|
4316
|
-
end
|
4317
|
-
|
4318
|
-
# Get the number of credits consumed by the last conversion.
|
4319
|
-
# * *Returns* - The number of credits.
|
4320
|
-
def getConsumedCreditCount()
|
4321
|
-
return @helper.getConsumedCreditCount()
|
4322
|
-
end
|
4323
|
-
|
4324
|
-
# Get the job id.
|
4325
|
-
# * *Returns* - The unique job identifier.
|
4326
|
-
def getJobId()
|
4327
|
-
return @helper.getJobId()
|
4319
|
+
# Load a file from the specified URL and apply the file as a watermark to each page of the output PDF. A watermark can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the watermark.
|
4320
|
+
#
|
4321
|
+
# * +url+ - The supported protocols are http:// and https://.
|
4322
|
+
# * *Returns* - The converter object.
|
4323
|
+
def setPageWatermarkUrl(url)
|
4324
|
+
unless /(?i)^https?:\/\/.*$/.match(url)
|
4325
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setPageWatermarkUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_page_watermark_url"), 470);
|
4326
|
+
end
|
4327
|
+
|
4328
|
+
@fields['page_watermark_url'] = url
|
4329
|
+
self
|
4328
4330
|
end
|
4329
4331
|
|
4330
|
-
#
|
4331
|
-
#
|
4332
|
-
|
4333
|
-
|
4332
|
+
# Apply each page of a watermark to the corresponding page of the output PDF. A watermark can be either a PDF or an image.
|
4333
|
+
#
|
4334
|
+
# * +watermark+ - The file path to a local file. The file must exist and not be empty.
|
4335
|
+
# * *Returns* - The converter object.
|
4336
|
+
def setMultipageWatermark(watermark)
|
4337
|
+
if (!(File.file?(watermark) && !File.zero?(watermark)))
|
4338
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(watermark, "setMultipageWatermark", "image-to-pdf", "The file must exist and not be empty.", "set_multipage_watermark"), 470);
|
4339
|
+
end
|
4340
|
+
|
4341
|
+
@files['multipage_watermark'] = watermark
|
4342
|
+
self
|
4334
4343
|
end
|
4335
4344
|
|
4336
|
-
#
|
4337
|
-
#
|
4338
|
-
|
4339
|
-
|
4345
|
+
# Load a file from the specified URL and apply each page of the file as a watermark to the corresponding page of the output PDF. A watermark can be either a PDF or an image.
|
4346
|
+
#
|
4347
|
+
# * +url+ - The supported protocols are http:// and https://.
|
4348
|
+
# * *Returns* - The converter object.
|
4349
|
+
def setMultipageWatermarkUrl(url)
|
4350
|
+
unless /(?i)^https?:\/\/.*$/.match(url)
|
4351
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setMultipageWatermarkUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_multipage_watermark_url"), 470);
|
4352
|
+
end
|
4353
|
+
|
4354
|
+
@fields['multipage_watermark_url'] = url
|
4355
|
+
self
|
4340
4356
|
end
|
4341
4357
|
|
4342
|
-
#
|
4358
|
+
# Apply a background to each page of the output PDF file. A background can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the background.
|
4343
4359
|
#
|
4344
|
-
# * +
|
4360
|
+
# * +background+ - The file path to a local file. The file must exist and not be empty.
|
4345
4361
|
# * *Returns* - The converter object.
|
4346
|
-
def
|
4347
|
-
|
4362
|
+
def setPageBackground(background)
|
4363
|
+
if (!(File.file?(background) && !File.zero?(background)))
|
4364
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(background, "setPageBackground", "image-to-pdf", "The file must exist and not be empty.", "set_page_background"), 470);
|
4365
|
+
end
|
4366
|
+
|
4367
|
+
@files['page_background'] = background
|
4348
4368
|
self
|
4349
4369
|
end
|
4350
4370
|
|
4351
|
-
#
|
4371
|
+
# Load a file from the specified URL and apply the file as a background to each page of the output PDF. A background can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the background.
|
4352
4372
|
#
|
4353
|
-
# * +
|
4373
|
+
# * +url+ - The supported protocols are http:// and https://.
|
4354
4374
|
# * *Returns* - The converter object.
|
4355
|
-
def
|
4356
|
-
unless /(?i)^
|
4357
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(
|
4375
|
+
def setPageBackgroundUrl(url)
|
4376
|
+
unless /(?i)^https?:\/\/.*$/.match(url)
|
4377
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setPageBackgroundUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_page_background_url"), 470);
|
4358
4378
|
end
|
4359
4379
|
|
4360
|
-
@fields['
|
4380
|
+
@fields['page_background_url'] = url
|
4361
4381
|
self
|
4362
4382
|
end
|
4363
4383
|
|
4364
|
-
#
|
4384
|
+
# Apply each page of a background to the corresponding page of the output PDF. A background can be either a PDF or an image.
|
4365
4385
|
#
|
4366
|
-
# * +
|
4386
|
+
# * +background+ - The file path to a local file. The file must exist and not be empty.
|
4367
4387
|
# * *Returns* - The converter object.
|
4368
|
-
def
|
4369
|
-
|
4370
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(
|
4388
|
+
def setMultipageBackground(background)
|
4389
|
+
if (!(File.file?(background) && !File.zero?(background)))
|
4390
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(background, "setMultipageBackground", "image-to-pdf", "The file must exist and not be empty.", "set_multipage_background"), 470);
|
4371
4391
|
end
|
4372
4392
|
|
4373
|
-
@
|
4393
|
+
@files['multipage_background'] = background
|
4374
4394
|
self
|
4375
4395
|
end
|
4376
4396
|
|
4377
|
-
#
|
4397
|
+
# Load a file from the specified URL and apply each page of the file as a background to the corresponding page of the output PDF. A background can be either a PDF or an image.
|
4378
4398
|
#
|
4379
|
-
# * +
|
4399
|
+
# * +url+ - The supported protocols are http:// and https://.
|
4380
4400
|
# * *Returns* - The converter object.
|
4381
|
-
def
|
4382
|
-
unless /(?i)^
|
4383
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(
|
4401
|
+
def setMultipageBackgroundUrl(url)
|
4402
|
+
unless /(?i)^https?:\/\/.*$/.match(url)
|
4403
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setMultipageBackgroundUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_multipage_background_url"), 470);
|
4384
4404
|
end
|
4385
4405
|
|
4386
|
-
@
|
4406
|
+
@fields['multipage_background_url'] = url
|
4387
4407
|
self
|
4388
4408
|
end
|
4389
4409
|
|
4390
|
-
#
|
4391
|
-
# Warning: Using HTTP is insecure as data sent over HTTP is not encrypted. Enable this option only if you know what you are doing.
|
4410
|
+
# Create linearized PDF. This is also known as Fast Web View.
|
4392
4411
|
#
|
4393
|
-
# * +value+ - Set to true to
|
4412
|
+
# * +value+ - Set to true to create linearized PDF.
|
4394
4413
|
# * *Returns* - The converter object.
|
4395
|
-
def
|
4396
|
-
@
|
4414
|
+
def setLinearize(value)
|
4415
|
+
@fields['linearize'] = value
|
4397
4416
|
self
|
4398
4417
|
end
|
4399
4418
|
|
4400
|
-
#
|
4419
|
+
# Encrypt the PDF. This prevents search engines from indexing the contents.
|
4401
4420
|
#
|
4402
|
-
# * +
|
4421
|
+
# * +value+ - Set to true to enable PDF encryption.
|
4403
4422
|
# * *Returns* - The converter object.
|
4404
|
-
def
|
4405
|
-
@
|
4423
|
+
def setEncrypt(value)
|
4424
|
+
@fields['encrypt'] = value
|
4406
4425
|
self
|
4407
4426
|
end
|
4408
4427
|
|
4409
|
-
#
|
4428
|
+
# Protect the PDF with a user password. When a PDF has a user password, it must be supplied in order to view the document and to perform operations allowed by the access permissions.
|
4410
4429
|
#
|
4411
|
-
# * +
|
4412
|
-
# * +port+ - The proxy port.
|
4413
|
-
# * +user_name+ - The username.
|
4414
|
-
# * +password+ - The password.
|
4430
|
+
# * +password+ - The user password.
|
4415
4431
|
# * *Returns* - The converter object.
|
4416
|
-
def
|
4417
|
-
@
|
4432
|
+
def setUserPassword(password)
|
4433
|
+
@fields['user_password'] = password
|
4418
4434
|
self
|
4419
4435
|
end
|
4420
4436
|
|
4421
|
-
#
|
4437
|
+
# Protect the PDF with an owner password. Supplying an owner password grants unlimited access to the PDF including changing the passwords and access permissions.
|
4422
4438
|
#
|
4423
|
-
# * +
|
4439
|
+
# * +password+ - The owner password.
|
4424
4440
|
# * *Returns* - The converter object.
|
4425
|
-
def
|
4426
|
-
@
|
4441
|
+
def setOwnerPassword(password)
|
4442
|
+
@fields['owner_password'] = password
|
4427
4443
|
self
|
4428
4444
|
end
|
4429
4445
|
|
4430
|
-
|
4431
|
-
|
4432
|
-
# Conversion from PDF to HTML.
|
4433
|
-
class PdfToHtmlClient
|
4434
|
-
# Constructor for the Pdfcrowd API client.
|
4446
|
+
# Disallow printing of the output PDF.
|
4435
4447
|
#
|
4436
|
-
# * +
|
4437
|
-
# *
|
4438
|
-
def
|
4439
|
-
@
|
4440
|
-
|
4441
|
-
'input_format'=>'pdf',
|
4442
|
-
'output_format'=>'html'
|
4443
|
-
}
|
4444
|
-
@file_id = 1
|
4445
|
-
@files = {}
|
4446
|
-
@raw_data = {}
|
4448
|
+
# * +value+ - Set to true to set the no-print flag in the output PDF.
|
4449
|
+
# * *Returns* - The converter object.
|
4450
|
+
def setNoPrint(value)
|
4451
|
+
@fields['no_print'] = value
|
4452
|
+
self
|
4447
4453
|
end
|
4448
4454
|
|
4449
|
-
#
|
4455
|
+
# Disallow modification of the output PDF.
|
4450
4456
|
#
|
4451
|
-
# * +
|
4452
|
-
# * *Returns* -
|
4453
|
-
def
|
4454
|
-
|
4455
|
-
|
4456
|
-
end
|
4457
|
-
|
4458
|
-
@fields['url'] = url
|
4459
|
-
@helper.post(@fields, @files, @raw_data)
|
4457
|
+
# * +value+ - Set to true to set the read-only only flag in the output PDF.
|
4458
|
+
# * *Returns* - The converter object.
|
4459
|
+
def setNoModify(value)
|
4460
|
+
@fields['no_modify'] = value
|
4461
|
+
self
|
4460
4462
|
end
|
4461
4463
|
|
4462
|
-
#
|
4464
|
+
# Disallow text and graphics extraction from the output PDF.
|
4463
4465
|
#
|
4464
|
-
# * +
|
4465
|
-
# *
|
4466
|
-
def
|
4467
|
-
|
4468
|
-
|
4469
|
-
end
|
4470
|
-
|
4471
|
-
@fields['url'] = url
|
4472
|
-
@helper.post(@fields, @files, @raw_data, out_stream)
|
4466
|
+
# * +value+ - Set to true to set the no-copy flag in the output PDF.
|
4467
|
+
# * *Returns* - The converter object.
|
4468
|
+
def setNoCopy(value)
|
4469
|
+
@fields['no_copy'] = value
|
4470
|
+
self
|
4473
4471
|
end
|
4474
4472
|
|
4475
|
-
#
|
4473
|
+
# Set the title of the PDF.
|
4476
4474
|
#
|
4477
|
-
# * +
|
4478
|
-
# *
|
4479
|
-
def
|
4480
|
-
|
4481
|
-
|
4482
|
-
end
|
4483
|
-
|
4484
|
-
if (!(isOutputTypeValid(file_path)))
|
4485
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_url_to_file"), 470);
|
4486
|
-
end
|
4487
|
-
|
4488
|
-
output_file = open(file_path, "wb")
|
4489
|
-
begin
|
4490
|
-
convertUrlToStream(url, output_file)
|
4491
|
-
output_file.close()
|
4492
|
-
rescue Error => why
|
4493
|
-
output_file.close()
|
4494
|
-
FileUtils.rm(file_path)
|
4495
|
-
raise
|
4496
|
-
end
|
4475
|
+
# * +title+ - The title.
|
4476
|
+
# * *Returns* - The converter object.
|
4477
|
+
def setTitle(title)
|
4478
|
+
@fields['title'] = title
|
4479
|
+
self
|
4497
4480
|
end
|
4498
4481
|
|
4499
|
-
#
|
4482
|
+
# Set the subject of the PDF.
|
4500
4483
|
#
|
4501
|
-
# * +
|
4502
|
-
# * *Returns* -
|
4503
|
-
def
|
4504
|
-
|
4505
|
-
|
4506
|
-
end
|
4507
|
-
|
4508
|
-
@files['file'] = file
|
4509
|
-
@helper.post(@fields, @files, @raw_data)
|
4484
|
+
# * +subject+ - The subject.
|
4485
|
+
# * *Returns* - The converter object.
|
4486
|
+
def setSubject(subject)
|
4487
|
+
@fields['subject'] = subject
|
4488
|
+
self
|
4510
4489
|
end
|
4511
4490
|
|
4512
|
-
#
|
4491
|
+
# Set the author of the PDF.
|
4492
|
+
#
|
4493
|
+
# * +author+ - The author.
|
4494
|
+
# * *Returns* - The converter object.
|
4495
|
+
def setAuthor(author)
|
4496
|
+
@fields['author'] = author
|
4497
|
+
self
|
4498
|
+
end
|
4499
|
+
|
4500
|
+
# Associate keywords with the document.
|
4501
|
+
#
|
4502
|
+
# * +keywords+ - The string with the keywords.
|
4503
|
+
# * *Returns* - The converter object.
|
4504
|
+
def setKeywords(keywords)
|
4505
|
+
@fields['keywords'] = keywords
|
4506
|
+
self
|
4507
|
+
end
|
4508
|
+
|
4509
|
+
# Specify the page layout to be used when the document is opened.
|
4510
|
+
#
|
4511
|
+
# * +layout+ - Allowed values are single-page, one-column, two-column-left, two-column-right.
|
4512
|
+
# * *Returns* - The converter object.
|
4513
|
+
def setPageLayout(layout)
|
4514
|
+
unless /(?i)^(single-page|one-column|two-column-left|two-column-right)$/.match(layout)
|
4515
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(layout, "setPageLayout", "image-to-pdf", "Allowed values are single-page, one-column, two-column-left, two-column-right.", "set_page_layout"), 470);
|
4516
|
+
end
|
4517
|
+
|
4518
|
+
@fields['page_layout'] = layout
|
4519
|
+
self
|
4520
|
+
end
|
4521
|
+
|
4522
|
+
# Specify how the document should be displayed when opened.
|
4523
|
+
#
|
4524
|
+
# * +mode+ - Allowed values are full-screen, thumbnails, outlines.
|
4525
|
+
# * *Returns* - The converter object.
|
4526
|
+
def setPageMode(mode)
|
4527
|
+
unless /(?i)^(full-screen|thumbnails|outlines)$/.match(mode)
|
4528
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setPageMode", "image-to-pdf", "Allowed values are full-screen, thumbnails, outlines.", "set_page_mode"), 470);
|
4529
|
+
end
|
4530
|
+
|
4531
|
+
@fields['page_mode'] = mode
|
4532
|
+
self
|
4533
|
+
end
|
4534
|
+
|
4535
|
+
# Specify how the page should be displayed when opened.
|
4536
|
+
#
|
4537
|
+
# * +zoom_type+ - Allowed values are fit-width, fit-height, fit-page.
|
4538
|
+
# * *Returns* - The converter object.
|
4539
|
+
def setInitialZoomType(zoom_type)
|
4540
|
+
unless /(?i)^(fit-width|fit-height|fit-page)$/.match(zoom_type)
|
4541
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(zoom_type, "setInitialZoomType", "image-to-pdf", "Allowed values are fit-width, fit-height, fit-page.", "set_initial_zoom_type"), 470);
|
4542
|
+
end
|
4543
|
+
|
4544
|
+
@fields['initial_zoom_type'] = zoom_type
|
4545
|
+
self
|
4546
|
+
end
|
4547
|
+
|
4548
|
+
# Display the specified page when the document is opened.
|
4549
|
+
#
|
4550
|
+
# * +page+ - Must be a positive integer number.
|
4551
|
+
# * *Returns* - The converter object.
|
4552
|
+
def setInitialPage(page)
|
4553
|
+
if (!(Integer(page) > 0))
|
4554
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(page, "setInitialPage", "image-to-pdf", "Must be a positive integer number.", "set_initial_page"), 470);
|
4555
|
+
end
|
4556
|
+
|
4557
|
+
@fields['initial_page'] = page
|
4558
|
+
self
|
4559
|
+
end
|
4560
|
+
|
4561
|
+
# Specify the initial page zoom in percents when the document is opened.
|
4562
|
+
#
|
4563
|
+
# * +zoom+ - Must be a positive integer number.
|
4564
|
+
# * *Returns* - The converter object.
|
4565
|
+
def setInitialZoom(zoom)
|
4566
|
+
if (!(Integer(zoom) > 0))
|
4567
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(zoom, "setInitialZoom", "image-to-pdf", "Must be a positive integer number.", "set_initial_zoom"), 470);
|
4568
|
+
end
|
4569
|
+
|
4570
|
+
@fields['initial_zoom'] = zoom
|
4571
|
+
self
|
4572
|
+
end
|
4573
|
+
|
4574
|
+
# Specify whether to hide the viewer application's tool bars when the document is active.
|
4575
|
+
#
|
4576
|
+
# * +value+ - Set to true to hide tool bars.
|
4577
|
+
# * *Returns* - The converter object.
|
4578
|
+
def setHideToolbar(value)
|
4579
|
+
@fields['hide_toolbar'] = value
|
4580
|
+
self
|
4581
|
+
end
|
4582
|
+
|
4583
|
+
# Specify whether to hide the viewer application's menu bar when the document is active.
|
4584
|
+
#
|
4585
|
+
# * +value+ - Set to true to hide the menu bar.
|
4586
|
+
# * *Returns* - The converter object.
|
4587
|
+
def setHideMenubar(value)
|
4588
|
+
@fields['hide_menubar'] = value
|
4589
|
+
self
|
4590
|
+
end
|
4591
|
+
|
4592
|
+
# Specify whether to hide user interface elements in the document's window (such as scroll bars and navigation controls), leaving only the document's contents displayed.
|
4593
|
+
#
|
4594
|
+
# * +value+ - Set to true to hide ui elements.
|
4595
|
+
# * *Returns* - The converter object.
|
4596
|
+
def setHideWindowUi(value)
|
4597
|
+
@fields['hide_window_ui'] = value
|
4598
|
+
self
|
4599
|
+
end
|
4600
|
+
|
4601
|
+
# Specify whether to resize the document's window to fit the size of the first displayed page.
|
4602
|
+
#
|
4603
|
+
# * +value+ - Set to true to resize the window.
|
4604
|
+
# * *Returns* - The converter object.
|
4605
|
+
def setFitWindow(value)
|
4606
|
+
@fields['fit_window'] = value
|
4607
|
+
self
|
4608
|
+
end
|
4609
|
+
|
4610
|
+
# Specify whether to position the document's window in the center of the screen.
|
4611
|
+
#
|
4612
|
+
# * +value+ - Set to true to center the window.
|
4613
|
+
# * *Returns* - The converter object.
|
4614
|
+
def setCenterWindow(value)
|
4615
|
+
@fields['center_window'] = value
|
4616
|
+
self
|
4617
|
+
end
|
4618
|
+
|
4619
|
+
# Specify whether the window's title bar should display the document title. If false , the title bar should instead display the name of the PDF file containing the document.
|
4620
|
+
#
|
4621
|
+
# * +value+ - Set to true to display the title.
|
4622
|
+
# * *Returns* - The converter object.
|
4623
|
+
def setDisplayTitle(value)
|
4624
|
+
@fields['display_title'] = value
|
4625
|
+
self
|
4626
|
+
end
|
4627
|
+
|
4628
|
+
# Turn on the debug logging. Details about the conversion are stored in the debug log. The URL of the log can be obtained from the getDebugLogUrl method or available in conversion statistics.
|
4629
|
+
#
|
4630
|
+
# * +value+ - Set to true to enable the debug logging.
|
4631
|
+
# * *Returns* - The converter object.
|
4632
|
+
def setDebugLog(value)
|
4633
|
+
@fields['debug_log'] = value
|
4634
|
+
self
|
4635
|
+
end
|
4636
|
+
|
4637
|
+
# Get the URL of the debug log for the last conversion.
|
4638
|
+
# * *Returns* - The link to the debug log.
|
4639
|
+
def getDebugLogUrl()
|
4640
|
+
return @helper.getDebugLogUrl()
|
4641
|
+
end
|
4642
|
+
|
4643
|
+
# Get the number of conversion credits available in your account.
|
4644
|
+
# This method can only be called after a call to one of the convertXtoY methods.
|
4645
|
+
# The returned value can differ from the actual count if you run parallel conversions.
|
4646
|
+
# The special value 999999 is returned if the information is not available.
|
4647
|
+
# * *Returns* - The number of credits.
|
4648
|
+
def getRemainingCreditCount()
|
4649
|
+
return @helper.getRemainingCreditCount()
|
4650
|
+
end
|
4651
|
+
|
4652
|
+
# Get the number of credits consumed by the last conversion.
|
4653
|
+
# * *Returns* - The number of credits.
|
4654
|
+
def getConsumedCreditCount()
|
4655
|
+
return @helper.getConsumedCreditCount()
|
4656
|
+
end
|
4657
|
+
|
4658
|
+
# Get the job id.
|
4659
|
+
# * *Returns* - The unique job identifier.
|
4660
|
+
def getJobId()
|
4661
|
+
return @helper.getJobId()
|
4662
|
+
end
|
4663
|
+
|
4664
|
+
# Get the size of the output in bytes.
|
4665
|
+
# * *Returns* - The count of bytes.
|
4666
|
+
def getOutputSize()
|
4667
|
+
return @helper.getOutputSize()
|
4668
|
+
end
|
4669
|
+
|
4670
|
+
# Get the version details.
|
4671
|
+
# * *Returns* - API version, converter version, and client version.
|
4672
|
+
def getVersion()
|
4673
|
+
return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion()
|
4674
|
+
end
|
4675
|
+
|
4676
|
+
# Tag the conversion with a custom value. The tag is used in conversion statistics. A value longer than 32 characters is cut off.
|
4677
|
+
#
|
4678
|
+
# * +tag+ - A string with the custom tag.
|
4679
|
+
# * *Returns* - The converter object.
|
4680
|
+
def setTag(tag)
|
4681
|
+
@fields['tag'] = tag
|
4682
|
+
self
|
4683
|
+
end
|
4684
|
+
|
4685
|
+
# A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTP scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
|
4686
|
+
#
|
4687
|
+
# * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
|
4688
|
+
# * *Returns* - The converter object.
|
4689
|
+
def setHttpProxy(proxy)
|
4690
|
+
unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
|
4691
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
|
4692
|
+
end
|
4693
|
+
|
4694
|
+
@fields['http_proxy'] = proxy
|
4695
|
+
self
|
4696
|
+
end
|
4697
|
+
|
4698
|
+
# A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTPS scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
|
4699
|
+
#
|
4700
|
+
# * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
|
4701
|
+
# * *Returns* - The converter object.
|
4702
|
+
def setHttpsProxy(proxy)
|
4703
|
+
unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
|
4704
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
|
4705
|
+
end
|
4706
|
+
|
4707
|
+
@fields['https_proxy'] = proxy
|
4708
|
+
self
|
4709
|
+
end
|
4710
|
+
|
4711
|
+
# Set the converter version. Different versions may produce different output. Choose which one provides the best output for your case.
|
4712
|
+
#
|
4713
|
+
# * +version+ - The version identifier. Allowed values are latest, 20.10, 18.10.
|
4714
|
+
# * *Returns* - The converter object.
|
4715
|
+
def setConverterVersion(version)
|
4716
|
+
unless /(?i)^(latest|20.10|18.10)$/.match(version)
|
4717
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(version, "setConverterVersion", "image-to-pdf", "Allowed values are latest, 20.10, 18.10.", "set_converter_version"), 470);
|
4718
|
+
end
|
4719
|
+
|
4720
|
+
@helper.setConverterVersion(version)
|
4721
|
+
self
|
4722
|
+
end
|
4723
|
+
|
4724
|
+
# Specifies if the client communicates over HTTP or HTTPS with Pdfcrowd API.
|
4725
|
+
# Warning: Using HTTP is insecure as data sent over HTTP is not encrypted. Enable this option only if you know what you are doing.
|
4726
|
+
#
|
4727
|
+
# * +value+ - Set to true to use HTTP.
|
4728
|
+
# * *Returns* - The converter object.
|
4729
|
+
def setUseHttp(value)
|
4730
|
+
@helper.setUseHttp(value)
|
4731
|
+
self
|
4732
|
+
end
|
4733
|
+
|
4734
|
+
# Set a custom user agent HTTP header. It can be useful if you are behind a proxy or a firewall.
|
4735
|
+
#
|
4736
|
+
# * +agent+ - The user agent string.
|
4737
|
+
# * *Returns* - The converter object.
|
4738
|
+
def setUserAgent(agent)
|
4739
|
+
@helper.setUserAgent(agent)
|
4740
|
+
self
|
4741
|
+
end
|
4742
|
+
|
4743
|
+
# Specifies an HTTP proxy that the API client library will use to connect to the internet.
|
4744
|
+
#
|
4745
|
+
# * +host+ - The proxy hostname.
|
4746
|
+
# * +port+ - The proxy port.
|
4747
|
+
# * +user_name+ - The username.
|
4748
|
+
# * +password+ - The password.
|
4749
|
+
# * *Returns* - The converter object.
|
4750
|
+
def setProxy(host, port, user_name, password)
|
4751
|
+
@helper.setProxy(host, port, user_name, password)
|
4752
|
+
self
|
4753
|
+
end
|
4754
|
+
|
4755
|
+
# Specifies the number of automatic retries when the 502 HTTP status code is received. The 502 status code indicates a temporary network issue. This feature can be disabled by setting to 0.
|
4756
|
+
#
|
4757
|
+
# * +count+ - Number of retries.
|
4758
|
+
# * *Returns* - The converter object.
|
4759
|
+
def setRetryCount(count)
|
4760
|
+
@helper.setRetryCount(count)
|
4761
|
+
self
|
4762
|
+
end
|
4763
|
+
|
4764
|
+
end
|
4765
|
+
|
4766
|
+
# Conversion from PDF to HTML.
|
4767
|
+
class PdfToHtmlClient
|
4768
|
+
# Constructor for the Pdfcrowd API client.
|
4769
|
+
#
|
4770
|
+
# * +user_name+ - Your username at Pdfcrowd.
|
4771
|
+
# * +api_key+ - Your API key.
|
4772
|
+
def initialize(user_name, api_key)
|
4773
|
+
@helper = ConnectionHelper.new(user_name, api_key)
|
4774
|
+
@fields = {
|
4775
|
+
'input_format'=>'pdf',
|
4776
|
+
'output_format'=>'html'
|
4777
|
+
}
|
4778
|
+
@file_id = 1
|
4779
|
+
@files = {}
|
4780
|
+
@raw_data = {}
|
4781
|
+
end
|
4782
|
+
|
4783
|
+
# Convert a PDF.
|
4784
|
+
#
|
4785
|
+
# * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
|
4786
|
+
# * *Returns* - Byte array containing the conversion output.
|
4787
|
+
def convertUrl(url)
|
4788
|
+
unless /(?i)^https?:\/\/.*$/.match(url)
|
4789
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url"), 470);
|
4790
|
+
end
|
4791
|
+
|
4792
|
+
@fields['url'] = url
|
4793
|
+
@helper.post(@fields, @files, @raw_data)
|
4794
|
+
end
|
4795
|
+
|
4796
|
+
# Convert a PDF and write the result to an output stream.
|
4797
|
+
#
|
4798
|
+
# * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
|
4799
|
+
# * +out_stream+ - The output stream that will contain the conversion output.
|
4800
|
+
def convertUrlToStream(url, out_stream)
|
4801
|
+
unless /(?i)^https?:\/\/.*$/.match(url)
|
4802
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
|
4803
|
+
end
|
4804
|
+
|
4805
|
+
@fields['url'] = url
|
4806
|
+
@helper.post(@fields, @files, @raw_data, out_stream)
|
4807
|
+
end
|
4808
|
+
|
4809
|
+
# Convert a PDF and write the result to a local file.
|
4810
|
+
#
|
4811
|
+
# * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
|
4812
|
+
# * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
|
4813
|
+
def convertUrlToFile(url, file_path)
|
4814
|
+
if (!(!file_path.nil? && !file_path.empty?))
|
4815
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_url_to_file"), 470);
|
4816
|
+
end
|
4817
|
+
|
4818
|
+
if (!(isOutputTypeValid(file_path)))
|
4819
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_url_to_file"), 470);
|
4820
|
+
end
|
4821
|
+
|
4822
|
+
output_file = open(file_path, "wb")
|
4823
|
+
begin
|
4824
|
+
convertUrlToStream(url, output_file)
|
4825
|
+
output_file.close()
|
4826
|
+
rescue Error => why
|
4827
|
+
output_file.close()
|
4828
|
+
FileUtils.rm(file_path)
|
4829
|
+
raise
|
4830
|
+
end
|
4831
|
+
end
|
4832
|
+
|
4833
|
+
# Convert a local file.
|
4834
|
+
#
|
4835
|
+
# * +file+ - The path to a local file to convert. The file must exist and not be empty.
|
4836
|
+
# * *Returns* - Byte array containing the conversion output.
|
4837
|
+
def convertFile(file)
|
4838
|
+
if (!(File.file?(file) && !File.zero?(file)))
|
4839
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-html", "The file must exist and not be empty.", "convert_file"), 470);
|
4840
|
+
end
|
4841
|
+
|
4842
|
+
@files['file'] = file
|
4843
|
+
@helper.post(@fields, @files, @raw_data)
|
4844
|
+
end
|
4845
|
+
|
4846
|
+
# Convert a local file and write the result to an output stream.
|
4847
|
+
#
|
4848
|
+
# * +file+ - The path to a local file to convert. The file must exist and not be empty.
|
4849
|
+
# * +out_stream+ - The output stream that will contain the conversion output.
|
4850
|
+
def convertFileToStream(file, out_stream)
|
4851
|
+
if (!(File.file?(file) && !File.zero?(file)))
|
4852
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-html", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
|
4853
|
+
end
|
4854
|
+
|
4855
|
+
@files['file'] = file
|
4856
|
+
@helper.post(@fields, @files, @raw_data, out_stream)
|
4857
|
+
end
|
4858
|
+
|
4859
|
+
# Convert a local file and write the result to a local file.
|
4860
|
+
#
|
4861
|
+
# * +file+ - The path to a local file to convert. The file must exist and not be empty.
|
4862
|
+
# * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
|
4863
|
+
def convertFileToFile(file, file_path)
|
4864
|
+
if (!(!file_path.nil? && !file_path.empty?))
|
4865
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_file_to_file"), 470);
|
4866
|
+
end
|
4867
|
+
|
4868
|
+
if (!(isOutputTypeValid(file_path)))
|
4869
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_file_to_file"), 470);
|
4870
|
+
end
|
4871
|
+
|
4872
|
+
output_file = open(file_path, "wb")
|
4873
|
+
begin
|
4874
|
+
convertFileToStream(file, output_file)
|
4875
|
+
output_file.close()
|
4876
|
+
rescue Error => why
|
4877
|
+
output_file.close()
|
4878
|
+
FileUtils.rm(file_path)
|
4879
|
+
raise
|
4880
|
+
end
|
4881
|
+
end
|
4882
|
+
|
4883
|
+
# Convert raw data.
|
4884
|
+
#
|
4885
|
+
# * +data+ - The raw content to be converted.
|
4886
|
+
# * *Returns* - Byte array with the output.
|
4887
|
+
def convertRawData(data)
|
4888
|
+
@raw_data['file'] = data
|
4889
|
+
@helper.post(@fields, @files, @raw_data)
|
4890
|
+
end
|
4891
|
+
|
4892
|
+
# Convert raw data and write the result to an output stream.
|
4893
|
+
#
|
4894
|
+
# * +data+ - The raw content to be converted.
|
4895
|
+
# * +out_stream+ - The output stream that will contain the conversion output.
|
4896
|
+
def convertRawDataToStream(data, out_stream)
|
4897
|
+
@raw_data['file'] = data
|
4898
|
+
@helper.post(@fields, @files, @raw_data, out_stream)
|
4899
|
+
end
|
4900
|
+
|
4901
|
+
# Convert raw data to a file.
|
4902
|
+
#
|
4903
|
+
# * +data+ - The raw content to be converted.
|
4904
|
+
# * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
|
4905
|
+
def convertRawDataToFile(data, file_path)
|
4906
|
+
if (!(!file_path.nil? && !file_path.empty?))
|
4907
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_raw_data_to_file"), 470);
|
4908
|
+
end
|
4909
|
+
|
4910
|
+
if (!(isOutputTypeValid(file_path)))
|
4911
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_raw_data_to_file"), 470);
|
4912
|
+
end
|
4913
|
+
|
4914
|
+
output_file = open(file_path, "wb")
|
4915
|
+
begin
|
4916
|
+
convertRawDataToStream(data, output_file)
|
4917
|
+
output_file.close()
|
4918
|
+
rescue Error => why
|
4919
|
+
output_file.close()
|
4920
|
+
FileUtils.rm(file_path)
|
4921
|
+
raise
|
4922
|
+
end
|
4923
|
+
end
|
4924
|
+
|
4925
|
+
# Convert the contents of an input stream.
|
4926
|
+
#
|
4927
|
+
# * +in_stream+ - The input stream with source data.
|
4928
|
+
# * *Returns* - Byte array containing the conversion output.
|
4929
|
+
def convertStream(in_stream)
|
4930
|
+
@raw_data['stream'] = in_stream.read
|
4931
|
+
@helper.post(@fields, @files, @raw_data)
|
4932
|
+
end
|
4933
|
+
|
4934
|
+
# Convert the contents of an input stream and write the result to an output stream.
|
4935
|
+
#
|
4936
|
+
# * +in_stream+ - The input stream with source data.
|
4937
|
+
# * +out_stream+ - The output stream that will contain the conversion output.
|
4938
|
+
def convertStreamToStream(in_stream, out_stream)
|
4939
|
+
@raw_data['stream'] = in_stream.read
|
4940
|
+
@helper.post(@fields, @files, @raw_data, out_stream)
|
4941
|
+
end
|
4942
|
+
|
4943
|
+
# Convert the contents of an input stream and write the result to a local file.
|
4944
|
+
#
|
4945
|
+
# * +in_stream+ - The input stream with source data.
|
4946
|
+
# * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
|
4947
|
+
def convertStreamToFile(in_stream, file_path)
|
4948
|
+
if (!(!file_path.nil? && !file_path.empty?))
|
4949
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_stream_to_file"), 470);
|
4950
|
+
end
|
4951
|
+
|
4952
|
+
if (!(isOutputTypeValid(file_path)))
|
4953
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_stream_to_file"), 470);
|
4954
|
+
end
|
4955
|
+
|
4956
|
+
output_file = open(file_path, "wb")
|
4957
|
+
begin
|
4958
|
+
convertStreamToStream(in_stream, output_file)
|
4959
|
+
output_file.close()
|
4960
|
+
rescue Error => why
|
4961
|
+
output_file.close()
|
4962
|
+
FileUtils.rm(file_path)
|
4963
|
+
raise
|
4964
|
+
end
|
4965
|
+
end
|
4966
|
+
|
4967
|
+
# Password to open the encrypted PDF file.
|
4968
|
+
#
|
4969
|
+
# * +password+ - The input PDF password.
|
4970
|
+
# * *Returns* - The converter object.
|
4971
|
+
def setPdfPassword(password)
|
4972
|
+
@fields['pdf_password'] = password
|
4973
|
+
self
|
4974
|
+
end
|
4975
|
+
|
4976
|
+
# Set the scaling factor (zoom) for the main page area.
|
4977
|
+
#
|
4978
|
+
# * +factor+ - The percentage value. Must be a positive integer number.
|
4979
|
+
# * *Returns* - The converter object.
|
4980
|
+
def setScaleFactor(factor)
|
4981
|
+
if (!(Integer(factor) > 0))
|
4982
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(factor, "setScaleFactor", "pdf-to-html", "Must be a positive integer number.", "set_scale_factor"), 470);
|
4983
|
+
end
|
4984
|
+
|
4985
|
+
@fields['scale_factor'] = factor
|
4986
|
+
self
|
4987
|
+
end
|
4988
|
+
|
4989
|
+
# Set the page range to print.
|
4990
|
+
#
|
4991
|
+
# * +pages+ - A comma separated list of page numbers or ranges.
|
4992
|
+
# * *Returns* - The converter object.
|
4993
|
+
def setPrintPageRange(pages)
|
4994
|
+
unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
|
4995
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-html", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
|
4996
|
+
end
|
4997
|
+
|
4998
|
+
@fields['print_page_range'] = pages
|
4999
|
+
self
|
5000
|
+
end
|
5001
|
+
|
5002
|
+
# Specifies where the images are stored.
|
5003
|
+
#
|
5004
|
+
# * +mode+ - The image storage mode. Allowed values are embed, separate.
|
5005
|
+
# * *Returns* - The converter object.
|
5006
|
+
def setImageMode(mode)
|
5007
|
+
unless /(?i)^(embed|separate)$/.match(mode)
|
5008
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setImageMode", "pdf-to-html", "Allowed values are embed, separate.", "set_image_mode"), 470);
|
5009
|
+
end
|
5010
|
+
|
5011
|
+
@fields['image_mode'] = mode
|
5012
|
+
self
|
5013
|
+
end
|
5014
|
+
|
5015
|
+
# Specifies where the style sheets are stored.
|
5016
|
+
#
|
5017
|
+
# * +mode+ - The style sheet storage mode. Allowed values are embed, separate.
|
5018
|
+
# * *Returns* - The converter object.
|
5019
|
+
def setCssMode(mode)
|
5020
|
+
unless /(?i)^(embed|separate)$/.match(mode)
|
5021
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setCssMode", "pdf-to-html", "Allowed values are embed, separate.", "set_css_mode"), 470);
|
5022
|
+
end
|
5023
|
+
|
5024
|
+
@fields['css_mode'] = mode
|
5025
|
+
self
|
5026
|
+
end
|
5027
|
+
|
5028
|
+
# Specifies where the fonts are stored.
|
5029
|
+
#
|
5030
|
+
# * +mode+ - The font storage mode. Allowed values are embed, separate.
|
5031
|
+
# * *Returns* - The converter object.
|
5032
|
+
def setFontMode(mode)
|
5033
|
+
unless /(?i)^(embed|separate)$/.match(mode)
|
5034
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setFontMode", "pdf-to-html", "Allowed values are embed, separate.", "set_font_mode"), 470);
|
5035
|
+
end
|
5036
|
+
|
5037
|
+
@fields['font_mode'] = mode
|
5038
|
+
self
|
5039
|
+
end
|
5040
|
+
|
5041
|
+
# A helper method to determine if the output file is a zip archive. The output of the conversion may be either an HTML file or a zip file containing the HTML and its external assets.
|
5042
|
+
# * *Returns* - True if the conversion output is a zip file, otherwise False.
|
5043
|
+
def isZippedOutput()
|
5044
|
+
@fields.fetch('image_mode', '') == 'separate' || @fields.fetch('css_mode', '') == 'separate' || @fields.fetch('font_mode', '') == 'separate' || @fields.fetch('force_zip', false) == true
|
5045
|
+
end
|
5046
|
+
|
5047
|
+
# Enforces the zip output format.
|
5048
|
+
#
|
5049
|
+
# * +value+ - Set to true to get the output as a zip archive.
|
5050
|
+
# * *Returns* - The converter object.
|
5051
|
+
def setForceZip(value)
|
5052
|
+
@fields['force_zip'] = value
|
5053
|
+
self
|
5054
|
+
end
|
5055
|
+
|
5056
|
+
# Set the HTML title. The title from the input PDF is used by default.
|
5057
|
+
#
|
5058
|
+
# * +title+ - The HTML title.
|
5059
|
+
# * *Returns* - The converter object.
|
5060
|
+
def setTitle(title)
|
5061
|
+
@fields['title'] = title
|
5062
|
+
self
|
5063
|
+
end
|
5064
|
+
|
5065
|
+
# Set the HTML subject. The subject from the input PDF is used by default.
|
5066
|
+
#
|
5067
|
+
# * +subject+ - The HTML subject.
|
5068
|
+
# * *Returns* - The converter object.
|
5069
|
+
def setSubject(subject)
|
5070
|
+
@fields['subject'] = subject
|
5071
|
+
self
|
5072
|
+
end
|
5073
|
+
|
5074
|
+
# Set the HTML author. The author from the input PDF is used by default.
|
5075
|
+
#
|
5076
|
+
# * +author+ - The HTML author.
|
5077
|
+
# * *Returns* - The converter object.
|
5078
|
+
def setAuthor(author)
|
5079
|
+
@fields['author'] = author
|
5080
|
+
self
|
5081
|
+
end
|
5082
|
+
|
5083
|
+
# Associate keywords with the HTML document. Keywords from the input PDF are used by default.
|
5084
|
+
#
|
5085
|
+
# * +keywords+ - The string containing the keywords.
|
5086
|
+
# * *Returns* - The converter object.
|
5087
|
+
def setKeywords(keywords)
|
5088
|
+
@fields['keywords'] = keywords
|
5089
|
+
self
|
5090
|
+
end
|
5091
|
+
|
5092
|
+
# Turn on the debug logging. Details about the conversion are stored in the debug log. The URL of the log can be obtained from the getDebugLogUrl method or available in conversion statistics.
|
5093
|
+
#
|
5094
|
+
# * +value+ - Set to true to enable the debug logging.
|
5095
|
+
# * *Returns* - The converter object.
|
5096
|
+
def setDebugLog(value)
|
5097
|
+
@fields['debug_log'] = value
|
5098
|
+
self
|
5099
|
+
end
|
5100
|
+
|
5101
|
+
# Get the URL of the debug log for the last conversion.
|
5102
|
+
# * *Returns* - The link to the debug log.
|
5103
|
+
def getDebugLogUrl()
|
5104
|
+
return @helper.getDebugLogUrl()
|
5105
|
+
end
|
5106
|
+
|
5107
|
+
# Get the number of conversion credits available in your account.
|
5108
|
+
# This method can only be called after a call to one of the convertXtoY methods.
|
5109
|
+
# The returned value can differ from the actual count if you run parallel conversions.
|
5110
|
+
# The special value 999999 is returned if the information is not available.
|
5111
|
+
# * *Returns* - The number of credits.
|
5112
|
+
def getRemainingCreditCount()
|
5113
|
+
return @helper.getRemainingCreditCount()
|
5114
|
+
end
|
5115
|
+
|
5116
|
+
# Get the number of credits consumed by the last conversion.
|
5117
|
+
# * *Returns* - The number of credits.
|
5118
|
+
def getConsumedCreditCount()
|
5119
|
+
return @helper.getConsumedCreditCount()
|
5120
|
+
end
|
5121
|
+
|
5122
|
+
# Get the job id.
|
5123
|
+
# * *Returns* - The unique job identifier.
|
5124
|
+
def getJobId()
|
5125
|
+
return @helper.getJobId()
|
5126
|
+
end
|
5127
|
+
|
5128
|
+
# Get the number of pages in the output document.
|
5129
|
+
# * *Returns* - The page count.
|
5130
|
+
def getPageCount()
|
5131
|
+
return @helper.getPageCount()
|
5132
|
+
end
|
5133
|
+
|
5134
|
+
# Get the size of the output in bytes.
|
5135
|
+
# * *Returns* - The count of bytes.
|
5136
|
+
def getOutputSize()
|
5137
|
+
return @helper.getOutputSize()
|
5138
|
+
end
|
5139
|
+
|
5140
|
+
# Get the version details.
|
5141
|
+
# * *Returns* - API version, converter version, and client version.
|
5142
|
+
def getVersion()
|
5143
|
+
return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion()
|
5144
|
+
end
|
5145
|
+
|
5146
|
+
# Tag the conversion with a custom value. The tag is used in conversion statistics. A value longer than 32 characters is cut off.
|
5147
|
+
#
|
5148
|
+
# * +tag+ - A string with the custom tag.
|
5149
|
+
# * *Returns* - The converter object.
|
5150
|
+
def setTag(tag)
|
5151
|
+
@fields['tag'] = tag
|
5152
|
+
self
|
5153
|
+
end
|
5154
|
+
|
5155
|
+
# A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTP scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
|
5156
|
+
#
|
5157
|
+
# * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
|
5158
|
+
# * *Returns* - The converter object.
|
5159
|
+
def setHttpProxy(proxy)
|
5160
|
+
unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
|
5161
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
|
5162
|
+
end
|
5163
|
+
|
5164
|
+
@fields['http_proxy'] = proxy
|
5165
|
+
self
|
5166
|
+
end
|
5167
|
+
|
5168
|
+
# A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTPS scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
|
5169
|
+
#
|
5170
|
+
# * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
|
5171
|
+
# * *Returns* - The converter object.
|
5172
|
+
def setHttpsProxy(proxy)
|
5173
|
+
unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
|
5174
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
|
5175
|
+
end
|
5176
|
+
|
5177
|
+
@fields['https_proxy'] = proxy
|
5178
|
+
self
|
5179
|
+
end
|
5180
|
+
|
5181
|
+
# Specifies if the client communicates over HTTP or HTTPS with Pdfcrowd API.
|
5182
|
+
# Warning: Using HTTP is insecure as data sent over HTTP is not encrypted. Enable this option only if you know what you are doing.
|
5183
|
+
#
|
5184
|
+
# * +value+ - Set to true to use HTTP.
|
5185
|
+
# * *Returns* - The converter object.
|
5186
|
+
def setUseHttp(value)
|
5187
|
+
@helper.setUseHttp(value)
|
5188
|
+
self
|
5189
|
+
end
|
5190
|
+
|
5191
|
+
# Set a custom user agent HTTP header. It can be useful if you are behind a proxy or a firewall.
|
5192
|
+
#
|
5193
|
+
# * +agent+ - The user agent string.
|
5194
|
+
# * *Returns* - The converter object.
|
5195
|
+
def setUserAgent(agent)
|
5196
|
+
@helper.setUserAgent(agent)
|
5197
|
+
self
|
5198
|
+
end
|
5199
|
+
|
5200
|
+
# Specifies an HTTP proxy that the API client library will use to connect to the internet.
|
5201
|
+
#
|
5202
|
+
# * +host+ - The proxy hostname.
|
5203
|
+
# * +port+ - The proxy port.
|
5204
|
+
# * +user_name+ - The username.
|
5205
|
+
# * +password+ - The password.
|
5206
|
+
# * *Returns* - The converter object.
|
5207
|
+
def setProxy(host, port, user_name, password)
|
5208
|
+
@helper.setProxy(host, port, user_name, password)
|
5209
|
+
self
|
5210
|
+
end
|
5211
|
+
|
5212
|
+
# Specifies the number of automatic retries when the 502 HTTP status code is received. The 502 status code indicates a temporary network issue. This feature can be disabled by setting to 0.
|
5213
|
+
#
|
5214
|
+
# * +count+ - Number of retries.
|
5215
|
+
# * *Returns* - The converter object.
|
5216
|
+
def setRetryCount(count)
|
5217
|
+
@helper.setRetryCount(count)
|
5218
|
+
self
|
5219
|
+
end
|
5220
|
+
|
5221
|
+
private
|
5222
|
+
|
5223
|
+
def isOutputTypeValid(file_path)
|
5224
|
+
extension = File.extname(file_path).downcase
|
5225
|
+
(extension == '.zip') == isZippedOutput()
|
5226
|
+
end
|
5227
|
+
end
|
5228
|
+
|
5229
|
+
# Conversion from PDF to text.
|
5230
|
+
class PdfToTextClient
|
5231
|
+
# Constructor for the Pdfcrowd API client.
|
5232
|
+
#
|
5233
|
+
# * +user_name+ - Your username at Pdfcrowd.
|
5234
|
+
# * +api_key+ - Your API key.
|
5235
|
+
def initialize(user_name, api_key)
|
5236
|
+
@helper = ConnectionHelper.new(user_name, api_key)
|
5237
|
+
@fields = {
|
5238
|
+
'input_format'=>'pdf',
|
5239
|
+
'output_format'=>'txt'
|
5240
|
+
}
|
5241
|
+
@file_id = 1
|
5242
|
+
@files = {}
|
5243
|
+
@raw_data = {}
|
5244
|
+
end
|
5245
|
+
|
5246
|
+
# Convert a PDF.
|
5247
|
+
#
|
5248
|
+
# * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
|
5249
|
+
# * *Returns* - Byte array containing the conversion output.
|
5250
|
+
def convertUrl(url)
|
5251
|
+
unless /(?i)^https?:\/\/.*$/.match(url)
|
5252
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-text", "The supported protocols are http:// and https://.", "convert_url"), 470);
|
5253
|
+
end
|
5254
|
+
|
5255
|
+
@fields['url'] = url
|
5256
|
+
@helper.post(@fields, @files, @raw_data)
|
5257
|
+
end
|
5258
|
+
|
5259
|
+
# Convert a PDF and write the result to an output stream.
|
5260
|
+
#
|
5261
|
+
# * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
|
5262
|
+
# * +out_stream+ - The output stream that will contain the conversion output.
|
5263
|
+
def convertUrlToStream(url, out_stream)
|
5264
|
+
unless /(?i)^https?:\/\/.*$/.match(url)
|
5265
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-text", "The supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
|
5266
|
+
end
|
5267
|
+
|
5268
|
+
@fields['url'] = url
|
5269
|
+
@helper.post(@fields, @files, @raw_data, out_stream)
|
5270
|
+
end
|
5271
|
+
|
5272
|
+
# Convert a PDF and write the result to a local file.
|
5273
|
+
#
|
5274
|
+
# * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
|
5275
|
+
# * +file_path+ - The output file path. The string must not be empty.
|
5276
|
+
def convertUrlToFile(url, file_path)
|
5277
|
+
if (!(!file_path.nil? && !file_path.empty?))
|
5278
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_url_to_file"), 470);
|
5279
|
+
end
|
5280
|
+
|
5281
|
+
output_file = open(file_path, "wb")
|
5282
|
+
begin
|
5283
|
+
convertUrlToStream(url, output_file)
|
5284
|
+
output_file.close()
|
5285
|
+
rescue Error => why
|
5286
|
+
output_file.close()
|
5287
|
+
FileUtils.rm(file_path)
|
5288
|
+
raise
|
5289
|
+
end
|
5290
|
+
end
|
5291
|
+
|
5292
|
+
# Convert a local file.
|
5293
|
+
#
|
5294
|
+
# * +file+ - The path to a local file to convert. The file must exist and not be empty.
|
5295
|
+
# * *Returns* - Byte array containing the conversion output.
|
5296
|
+
def convertFile(file)
|
5297
|
+
if (!(File.file?(file) && !File.zero?(file)))
|
5298
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-text", "The file must exist and not be empty.", "convert_file"), 470);
|
5299
|
+
end
|
5300
|
+
|
5301
|
+
@files['file'] = file
|
5302
|
+
@helper.post(@fields, @files, @raw_data)
|
5303
|
+
end
|
5304
|
+
|
5305
|
+
# Convert a local file and write the result to an output stream.
|
4513
5306
|
#
|
4514
5307
|
# * +file+ - The path to a local file to convert. The file must exist and not be empty.
|
4515
5308
|
# * +out_stream+ - The output stream that will contain the conversion output.
|
4516
5309
|
def convertFileToStream(file, out_stream)
|
4517
5310
|
if (!(File.file?(file) && !File.zero?(file)))
|
4518
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-
|
5311
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-text", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
|
4519
5312
|
end
|
4520
5313
|
|
4521
5314
|
@files['file'] = file
|
@@ -4525,14 +5318,10 @@ module Pdfcrowd
|
|
4525
5318
|
# Convert a local file and write the result to a local file.
|
4526
5319
|
#
|
4527
5320
|
# * +file+ - The path to a local file to convert. The file must exist and not be empty.
|
4528
|
-
# * +file_path+ - The output file path. The string must not be empty.
|
5321
|
+
# * +file_path+ - The output file path. The string must not be empty.
|
4529
5322
|
def convertFileToFile(file, file_path)
|
4530
5323
|
if (!(!file_path.nil? && !file_path.empty?))
|
4531
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-
|
4532
|
-
end
|
4533
|
-
|
4534
|
-
if (!(isOutputTypeValid(file_path)))
|
4535
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_file_to_file"), 470);
|
5324
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_file_to_file"), 470);
|
4536
5325
|
end
|
4537
5326
|
|
4538
5327
|
output_file = open(file_path, "wb")
|
@@ -4567,14 +5356,10 @@ module Pdfcrowd
|
|
4567
5356
|
# Convert raw data to a file.
|
4568
5357
|
#
|
4569
5358
|
# * +data+ - The raw content to be converted.
|
4570
|
-
# * +file_path+ - The output file path. The string must not be empty.
|
5359
|
+
# * +file_path+ - The output file path. The string must not be empty.
|
4571
5360
|
def convertRawDataToFile(data, file_path)
|
4572
5361
|
if (!(!file_path.nil? && !file_path.empty?))
|
4573
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-
|
4574
|
-
end
|
4575
|
-
|
4576
|
-
if (!(isOutputTypeValid(file_path)))
|
4577
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_raw_data_to_file"), 470);
|
5362
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_raw_data_to_file"), 470);
|
4578
5363
|
end
|
4579
5364
|
|
4580
5365
|
output_file = open(file_path, "wb")
|
@@ -4609,14 +5394,10 @@ module Pdfcrowd
|
|
4609
5394
|
# Convert the contents of an input stream and write the result to a local file.
|
4610
5395
|
#
|
4611
5396
|
# * +in_stream+ - The input stream with source data.
|
4612
|
-
# * +file_path+ - The output file path. The string must not be empty.
|
5397
|
+
# * +file_path+ - The output file path. The string must not be empty.
|
4613
5398
|
def convertStreamToFile(in_stream, file_path)
|
4614
5399
|
if (!(!file_path.nil? && !file_path.empty?))
|
4615
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-
|
4616
|
-
end
|
4617
|
-
|
4618
|
-
if (!(isOutputTypeValid(file_path)))
|
4619
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_stream_to_file"), 470);
|
5400
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_stream_to_file"), 470);
|
4620
5401
|
end
|
4621
5402
|
|
4622
5403
|
output_file = open(file_path, "wb")
|
@@ -4630,7 +5411,7 @@ module Pdfcrowd
|
|
4630
5411
|
end
|
4631
5412
|
end
|
4632
5413
|
|
4633
|
-
#
|
5414
|
+
# The password to open the encrypted PDF file.
|
4634
5415
|
#
|
4635
5416
|
# * +password+ - The input PDF password.
|
4636
5417
|
# * *Returns* - The converter object.
|
@@ -4639,119 +5420,171 @@ module Pdfcrowd
|
|
4639
5420
|
self
|
4640
5421
|
end
|
4641
5422
|
|
4642
|
-
# Set the
|
5423
|
+
# Set the page range to print.
|
4643
5424
|
#
|
4644
|
-
# * +
|
5425
|
+
# * +pages+ - A comma separated list of page numbers or ranges.
|
4645
5426
|
# * *Returns* - The converter object.
|
4646
|
-
def
|
4647
|
-
|
4648
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(
|
5427
|
+
def setPrintPageRange(pages)
|
5428
|
+
unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
|
5429
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-text", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
|
4649
5430
|
end
|
4650
5431
|
|
4651
|
-
@fields['
|
5432
|
+
@fields['print_page_range'] = pages
|
4652
5433
|
self
|
4653
5434
|
end
|
4654
5435
|
|
4655
|
-
#
|
5436
|
+
# Ignore the original PDF layout.
|
4656
5437
|
#
|
4657
|
-
# * +
|
5438
|
+
# * +value+ - Set to true to ignore the layout.
|
4658
5439
|
# * *Returns* - The converter object.
|
4659
|
-
def
|
4660
|
-
|
4661
|
-
|
5440
|
+
def setNoLayout(value)
|
5441
|
+
@fields['no_layout'] = value
|
5442
|
+
self
|
5443
|
+
end
|
5444
|
+
|
5445
|
+
# The end-of-line convention for the text output.
|
5446
|
+
#
|
5447
|
+
# * +eol+ - Allowed values are unix, dos, mac.
|
5448
|
+
# * *Returns* - The converter object.
|
5449
|
+
def setEol(eol)
|
5450
|
+
unless /(?i)^(unix|dos|mac)$/.match(eol)
|
5451
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(eol, "setEol", "pdf-to-text", "Allowed values are unix, dos, mac.", "set_eol"), 470);
|
4662
5452
|
end
|
4663
5453
|
|
4664
|
-
@fields['
|
5454
|
+
@fields['eol'] = eol
|
4665
5455
|
self
|
4666
5456
|
end
|
4667
5457
|
|
4668
|
-
#
|
5458
|
+
# Specify the page break mode for the text output.
|
4669
5459
|
#
|
4670
|
-
# * +mode+ -
|
5460
|
+
# * +mode+ - Allowed values are none, default, custom.
|
4671
5461
|
# * *Returns* - The converter object.
|
4672
|
-
def
|
4673
|
-
unless /(?i)^(
|
4674
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "
|
5462
|
+
def setPageBreakMode(mode)
|
5463
|
+
unless /(?i)^(none|default|custom)$/.match(mode)
|
5464
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setPageBreakMode", "pdf-to-text", "Allowed values are none, default, custom.", "set_page_break_mode"), 470);
|
4675
5465
|
end
|
4676
5466
|
|
4677
|
-
@fields['
|
5467
|
+
@fields['page_break_mode'] = mode
|
4678
5468
|
self
|
4679
5469
|
end
|
4680
5470
|
|
4681
|
-
#
|
5471
|
+
# Specify the custom page break.
|
4682
5472
|
#
|
4683
|
-
# * +
|
5473
|
+
# * +page_break+ - String to insert between the pages.
|
4684
5474
|
# * *Returns* - The converter object.
|
4685
|
-
def
|
4686
|
-
|
4687
|
-
|
5475
|
+
def setCustomPageBreak(page_break)
|
5476
|
+
@fields['custom_page_break'] = page_break
|
5477
|
+
self
|
5478
|
+
end
|
5479
|
+
|
5480
|
+
# Specify the paragraph detection mode.
|
5481
|
+
#
|
5482
|
+
# * +mode+ - Allowed values are none, bounding-box, characters.
|
5483
|
+
# * *Returns* - The converter object.
|
5484
|
+
def setParagraphMode(mode)
|
5485
|
+
unless /(?i)^(none|bounding-box|characters)$/.match(mode)
|
5486
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setParagraphMode", "pdf-to-text", "Allowed values are none, bounding-box, characters.", "set_paragraph_mode"), 470);
|
4688
5487
|
end
|
4689
5488
|
|
4690
|
-
@fields['
|
5489
|
+
@fields['paragraph_mode'] = mode
|
4691
5490
|
self
|
4692
5491
|
end
|
4693
5492
|
|
4694
|
-
#
|
5493
|
+
# Set the maximum line spacing when the paragraph detection mode is enabled.
|
4695
5494
|
#
|
4696
|
-
# * +
|
5495
|
+
# * +threshold+ - The value must be a positive integer percentage.
|
4697
5496
|
# * *Returns* - The converter object.
|
4698
|
-
def
|
4699
|
-
unless /(?i)^
|
4700
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(
|
5497
|
+
def setLineSpacingThreshold(threshold)
|
5498
|
+
unless /(?i)^0$|^[0-9]+%$/.match(threshold)
|
5499
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(threshold, "setLineSpacingThreshold", "pdf-to-text", "The value must be a positive integer percentage.", "set_line_spacing_threshold"), 470);
|
4701
5500
|
end
|
4702
5501
|
|
4703
|
-
@fields['
|
5502
|
+
@fields['line_spacing_threshold'] = threshold
|
4704
5503
|
self
|
4705
5504
|
end
|
4706
5505
|
|
4707
|
-
#
|
4708
|
-
#
|
4709
|
-
|
4710
|
-
|
5506
|
+
# Remove the hyphen character from the end of lines.
|
5507
|
+
#
|
5508
|
+
# * +value+ - Set to true to remove hyphens.
|
5509
|
+
# * *Returns* - The converter object.
|
5510
|
+
def setRemoveHyphenation(value)
|
5511
|
+
@fields['remove_hyphenation'] = value
|
5512
|
+
self
|
4711
5513
|
end
|
4712
5514
|
|
4713
|
-
#
|
5515
|
+
# Remove empty lines from the text output.
|
4714
5516
|
#
|
4715
|
-
# * +value+ - Set to true to
|
5517
|
+
# * +value+ - Set to true to remove empty lines.
|
4716
5518
|
# * *Returns* - The converter object.
|
4717
|
-
def
|
4718
|
-
@fields['
|
5519
|
+
def setRemoveEmptyLines(value)
|
5520
|
+
@fields['remove_empty_lines'] = value
|
4719
5521
|
self
|
4720
5522
|
end
|
4721
5523
|
|
4722
|
-
# Set the
|
5524
|
+
# Set the top left X coordinate of the crop area in points.
|
4723
5525
|
#
|
4724
|
-
# * +
|
5526
|
+
# * +x+ - Must be a positive integer number or 0.
|
4725
5527
|
# * *Returns* - The converter object.
|
4726
|
-
def
|
4727
|
-
|
5528
|
+
def setCropAreaX(x)
|
5529
|
+
if (!(Integer(x) >= 0))
|
5530
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(x, "setCropAreaX", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_x"), 470);
|
5531
|
+
end
|
5532
|
+
|
5533
|
+
@fields['crop_area_x'] = x
|
4728
5534
|
self
|
4729
5535
|
end
|
4730
5536
|
|
4731
|
-
# Set the
|
5537
|
+
# Set the top left Y coordinate of the crop area in points.
|
4732
5538
|
#
|
4733
|
-
# * +
|
5539
|
+
# * +y+ - Must be a positive integer number or 0.
|
4734
5540
|
# * *Returns* - The converter object.
|
4735
|
-
def
|
4736
|
-
|
5541
|
+
def setCropAreaY(y)
|
5542
|
+
if (!(Integer(y) >= 0))
|
5543
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(y, "setCropAreaY", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_y"), 470);
|
5544
|
+
end
|
5545
|
+
|
5546
|
+
@fields['crop_area_y'] = y
|
4737
5547
|
self
|
4738
5548
|
end
|
4739
5549
|
|
4740
|
-
# Set the
|
5550
|
+
# Set the width of the crop area in points.
|
4741
5551
|
#
|
4742
|
-
# * +
|
5552
|
+
# * +width+ - Must be a positive integer number or 0.
|
4743
5553
|
# * *Returns* - The converter object.
|
4744
|
-
def
|
4745
|
-
|
5554
|
+
def setCropAreaWidth(width)
|
5555
|
+
if (!(Integer(width) >= 0))
|
5556
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(width, "setCropAreaWidth", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_width"), 470);
|
5557
|
+
end
|
5558
|
+
|
5559
|
+
@fields['crop_area_width'] = width
|
4746
5560
|
self
|
4747
5561
|
end
|
4748
5562
|
|
4749
|
-
#
|
5563
|
+
# Set the height of the crop area in points.
|
4750
5564
|
#
|
4751
|
-
# * +
|
5565
|
+
# * +height+ - Must be a positive integer number or 0.
|
4752
5566
|
# * *Returns* - The converter object.
|
4753
|
-
def
|
4754
|
-
|
5567
|
+
def setCropAreaHeight(height)
|
5568
|
+
if (!(Integer(height) >= 0))
|
5569
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(height, "setCropAreaHeight", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_height"), 470);
|
5570
|
+
end
|
5571
|
+
|
5572
|
+
@fields['crop_area_height'] = height
|
5573
|
+
self
|
5574
|
+
end
|
5575
|
+
|
5576
|
+
# Set the crop area. It allows to extract just a part of a PDF page.
|
5577
|
+
#
|
5578
|
+
# * +x+ - Set the top left X coordinate of the crop area in points. Must be a positive integer number or 0.
|
5579
|
+
# * +y+ - Set the top left Y coordinate of the crop area in points. Must be a positive integer number or 0.
|
5580
|
+
# * +width+ - Set the width of the crop area in points. Must be a positive integer number or 0.
|
5581
|
+
# * +height+ - Set the height of the crop area in points. Must be a positive integer number or 0.
|
5582
|
+
# * *Returns* - The converter object.
|
5583
|
+
def setCropArea(x, y, width, height)
|
5584
|
+
setCropAreaX(x)
|
5585
|
+
setCropAreaY(y)
|
5586
|
+
setCropAreaWidth(width)
|
5587
|
+
setCropAreaHeight(height)
|
4755
5588
|
self
|
4756
5589
|
end
|
4757
5590
|
|
@@ -4791,7 +5624,7 @@ module Pdfcrowd
|
|
4791
5624
|
return @helper.getJobId()
|
4792
5625
|
end
|
4793
5626
|
|
4794
|
-
# Get the
|
5627
|
+
# Get the number of pages in the output document.
|
4795
5628
|
# * *Returns* - The page count.
|
4796
5629
|
def getPageCount()
|
4797
5630
|
return @helper.getPageCount()
|
@@ -4824,7 +5657,7 @@ module Pdfcrowd
|
|
4824
5657
|
# * *Returns* - The converter object.
|
4825
5658
|
def setHttpProxy(proxy)
|
4826
5659
|
unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
|
4827
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-
|
5660
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
|
4828
5661
|
end
|
4829
5662
|
|
4830
5663
|
@fields['http_proxy'] = proxy
|
@@ -4837,7 +5670,7 @@ module Pdfcrowd
|
|
4837
5670
|
# * *Returns* - The converter object.
|
4838
5671
|
def setHttpsProxy(proxy)
|
4839
5672
|
unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
|
4840
|
-
raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-
|
5673
|
+
raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
|
4841
5674
|
end
|
4842
5675
|
|
4843
5676
|
@fields['https_proxy'] = proxy
|
@@ -4884,12 +5717,6 @@ module Pdfcrowd
|
|
4884
5717
|
self
|
4885
5718
|
end
|
4886
5719
|
|
4887
|
-
private
|
4888
|
-
|
4889
|
-
def isOutputTypeValid(file_path)
|
4890
|
-
extension = File.extname(file_path).downcase
|
4891
|
-
(extension == '.zip') == isZippedOutput()
|
4892
|
-
end
|
4893
5720
|
end
|
4894
5721
|
|
4895
5722
|
end
|