pdfcrowd 5.9.0 → 5.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/pdfcrowd.rb +929 -114
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a5acbb283c6b75918ea9f34e667cbf48c1a6fb04527ddfb7635094306c40645
4
- data.tar.gz: f4932f2c8cbe494a14c42dee45212d0eae455f1aadd560765e0aca046f8dfe8d
3
+ metadata.gz: 26cd6f43af08ebb5a8181d034a73d17b884e5deef10c8ee92750690f5e35d62f
4
+ data.tar.gz: 193dd80310673a5783d9f25642f6f09d1eed40f68762fb51c232036ba7543316
5
5
  SHA512:
6
- metadata.gz: 9bc88c22538231821ac6ad2e99b8acdb3df27f5c0da3ff7339aa6e8bdde66cb970bbb4c4884b2abf90ba655a97dc1b656e502fad4637278a432cec390f96a252
7
- data.tar.gz: 81937c8b7d2d32683b2f807d0da6f1b592d96607587fcbfd0c64f79a649fda1436462d2c7a2c58d2539a3fc2125a248908842ab853e4bdfd8503ff3dd0ee76ad
6
+ metadata.gz: 963f817840495dcca5869a5150c059a6d8bc14a94e378dd977cbeeb8a4ad0b9092d683e03eef37cb214654603c0bf4926505b877b3fddf82df44b7c2347cf866
7
+ data.tar.gz: 324747ca72b05e865906855c1b4b4333a9f679f691379685e8db501176ae3fe732a38ef9c5d2c0583dc2c4720515ebf222ef978f8a4ae93928e4cc424a72bfb0
data/lib/pdfcrowd.rb CHANGED
@@ -530,7 +530,7 @@ end
530
530
  module Pdfcrowd
531
531
  HOST = ENV["PDFCROWD_HOST"] || 'api.pdfcrowd.com'
532
532
  MULTIPART_BOUNDARY = '----------ThIs_Is_tHe_bOUnDary_$'
533
- CLIENT_VERSION = '5.9.0'
533
+ CLIENT_VERSION = '5.10.0'
534
534
 
535
535
  class ConnectionHelper
536
536
  def initialize(user_name, api_key)
@@ -541,7 +541,7 @@ module Pdfcrowd
541
541
 
542
542
  setProxy(nil, nil, nil, nil)
543
543
  setUseHttp(false)
544
- setUserAgent('pdfcrowd_ruby_client/5.9.0 (https://pdfcrowd.com)')
544
+ setUserAgent('pdfcrowd_ruby_client/5.10.0 (https://pdfcrowd.com)')
545
545
 
546
546
  @retry_count = 1
547
547
  @converter_version = '20.10'
@@ -1783,11 +1783,11 @@ module Pdfcrowd
1783
1783
 
1784
1784
  # Specifies the scaling mode used for fitting the HTML contents to the print area.
1785
1785
  #
1786
- # * +mode+ - The smart scaling mode. Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, mode1.
1786
+ # * +mode+ - The smart scaling mode. Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, single-page-fit-ex, mode1.
1787
1787
  # * *Returns* - The converter object.
1788
1788
  def setSmartScalingMode(mode)
1789
- unless /(?i)^(default|disabled|viewport-fit|content-fit|single-page-fit|mode1)$/.match(mode)
1790
- raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setSmartScalingMode", "html-to-pdf", "Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, mode1.", "set_smart_scaling_mode"), 470);
1789
+ unless /(?i)^(default|disabled|viewport-fit|content-fit|single-page-fit|single-page-fit-ex|mode1)$/.match(mode)
1790
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setSmartScalingMode", "html-to-pdf", "Allowed values are default, disabled, viewport-fit, content-fit, single-page-fit, single-page-fit-ex, mode1.", "set_smart_scaling_mode"), 470);
1791
1791
  end
1792
1792
 
1793
1793
  @fields['smart_scaling_mode'] = mode
@@ -4303,6 +4303,792 @@ module Pdfcrowd
4303
4303
  self
4304
4304
  end
4305
4305
 
4306
+ # Apply a watermark to each page of the output PDF file. A watermark can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the watermark.
4307
+ #
4308
+ # * +watermark+ - The file path to a local file. The file must exist and not be empty.
4309
+ # * *Returns* - The converter object.
4310
+ def setPageWatermark(watermark)
4311
+ if (!(File.file?(watermark) && !File.zero?(watermark)))
4312
+ raise Error.new(Pdfcrowd.create_invalid_value_message(watermark, "setPageWatermark", "image-to-pdf", "The file must exist and not be empty.", "set_page_watermark"), 470);
4313
+ end
4314
+
4315
+ @files['page_watermark'] = watermark
4316
+ self
4317
+ end
4318
+
4319
+ # Load a file from the specified URL and apply the file as a watermark to each page of the output PDF. A watermark can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the watermark.
4320
+ #
4321
+ # * +url+ - The supported protocols are http:// and https://.
4322
+ # * *Returns* - The converter object.
4323
+ def setPageWatermarkUrl(url)
4324
+ unless /(?i)^https?:\/\/.*$/.match(url)
4325
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setPageWatermarkUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_page_watermark_url"), 470);
4326
+ end
4327
+
4328
+ @fields['page_watermark_url'] = url
4329
+ self
4330
+ end
4331
+
4332
+ # Apply each page of a watermark to the corresponding page of the output PDF. A watermark can be either a PDF or an image.
4333
+ #
4334
+ # * +watermark+ - The file path to a local file. The file must exist and not be empty.
4335
+ # * *Returns* - The converter object.
4336
+ def setMultipageWatermark(watermark)
4337
+ if (!(File.file?(watermark) && !File.zero?(watermark)))
4338
+ raise Error.new(Pdfcrowd.create_invalid_value_message(watermark, "setMultipageWatermark", "image-to-pdf", "The file must exist and not be empty.", "set_multipage_watermark"), 470);
4339
+ end
4340
+
4341
+ @files['multipage_watermark'] = watermark
4342
+ self
4343
+ end
4344
+
4345
+ # Load a file from the specified URL and apply each page of the file as a watermark to the corresponding page of the output PDF. A watermark can be either a PDF or an image.
4346
+ #
4347
+ # * +url+ - The supported protocols are http:// and https://.
4348
+ # * *Returns* - The converter object.
4349
+ def setMultipageWatermarkUrl(url)
4350
+ unless /(?i)^https?:\/\/.*$/.match(url)
4351
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setMultipageWatermarkUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_multipage_watermark_url"), 470);
4352
+ end
4353
+
4354
+ @fields['multipage_watermark_url'] = url
4355
+ self
4356
+ end
4357
+
4358
+ # Apply a background to each page of the output PDF file. A background can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the background.
4359
+ #
4360
+ # * +background+ - The file path to a local file. The file must exist and not be empty.
4361
+ # * *Returns* - The converter object.
4362
+ def setPageBackground(background)
4363
+ if (!(File.file?(background) && !File.zero?(background)))
4364
+ raise Error.new(Pdfcrowd.create_invalid_value_message(background, "setPageBackground", "image-to-pdf", "The file must exist and not be empty.", "set_page_background"), 470);
4365
+ end
4366
+
4367
+ @files['page_background'] = background
4368
+ self
4369
+ end
4370
+
4371
+ # Load a file from the specified URL and apply the file as a background to each page of the output PDF. A background can be either a PDF or an image. If a multi-page file (PDF or TIFF) is used, the first page is used as the background.
4372
+ #
4373
+ # * +url+ - The supported protocols are http:// and https://.
4374
+ # * *Returns* - The converter object.
4375
+ def setPageBackgroundUrl(url)
4376
+ unless /(?i)^https?:\/\/.*$/.match(url)
4377
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setPageBackgroundUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_page_background_url"), 470);
4378
+ end
4379
+
4380
+ @fields['page_background_url'] = url
4381
+ self
4382
+ end
4383
+
4384
+ # Apply each page of a background to the corresponding page of the output PDF. A background can be either a PDF or an image.
4385
+ #
4386
+ # * +background+ - The file path to a local file. The file must exist and not be empty.
4387
+ # * *Returns* - The converter object.
4388
+ def setMultipageBackground(background)
4389
+ if (!(File.file?(background) && !File.zero?(background)))
4390
+ raise Error.new(Pdfcrowd.create_invalid_value_message(background, "setMultipageBackground", "image-to-pdf", "The file must exist and not be empty.", "set_multipage_background"), 470);
4391
+ end
4392
+
4393
+ @files['multipage_background'] = background
4394
+ self
4395
+ end
4396
+
4397
+ # Load a file from the specified URL and apply each page of the file as a background to the corresponding page of the output PDF. A background can be either a PDF or an image.
4398
+ #
4399
+ # * +url+ - The supported protocols are http:// and https://.
4400
+ # * *Returns* - The converter object.
4401
+ def setMultipageBackgroundUrl(url)
4402
+ unless /(?i)^https?:\/\/.*$/.match(url)
4403
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "setMultipageBackgroundUrl", "image-to-pdf", "The supported protocols are http:// and https://.", "set_multipage_background_url"), 470);
4404
+ end
4405
+
4406
+ @fields['multipage_background_url'] = url
4407
+ self
4408
+ end
4409
+
4410
+ # Create linearized PDF. This is also known as Fast Web View.
4411
+ #
4412
+ # * +value+ - Set to true to create linearized PDF.
4413
+ # * *Returns* - The converter object.
4414
+ def setLinearize(value)
4415
+ @fields['linearize'] = value
4416
+ self
4417
+ end
4418
+
4419
+ # Encrypt the PDF. This prevents search engines from indexing the contents.
4420
+ #
4421
+ # * +value+ - Set to true to enable PDF encryption.
4422
+ # * *Returns* - The converter object.
4423
+ def setEncrypt(value)
4424
+ @fields['encrypt'] = value
4425
+ self
4426
+ end
4427
+
4428
+ # Protect the PDF with a user password. When a PDF has a user password, it must be supplied in order to view the document and to perform operations allowed by the access permissions.
4429
+ #
4430
+ # * +password+ - The user password.
4431
+ # * *Returns* - The converter object.
4432
+ def setUserPassword(password)
4433
+ @fields['user_password'] = password
4434
+ self
4435
+ end
4436
+
4437
+ # Protect the PDF with an owner password. Supplying an owner password grants unlimited access to the PDF including changing the passwords and access permissions.
4438
+ #
4439
+ # * +password+ - The owner password.
4440
+ # * *Returns* - The converter object.
4441
+ def setOwnerPassword(password)
4442
+ @fields['owner_password'] = password
4443
+ self
4444
+ end
4445
+
4446
+ # Disallow printing of the output PDF.
4447
+ #
4448
+ # * +value+ - Set to true to set the no-print flag in the output PDF.
4449
+ # * *Returns* - The converter object.
4450
+ def setNoPrint(value)
4451
+ @fields['no_print'] = value
4452
+ self
4453
+ end
4454
+
4455
+ # Disallow modification of the output PDF.
4456
+ #
4457
+ # * +value+ - Set to true to set the read-only only flag in the output PDF.
4458
+ # * *Returns* - The converter object.
4459
+ def setNoModify(value)
4460
+ @fields['no_modify'] = value
4461
+ self
4462
+ end
4463
+
4464
+ # Disallow text and graphics extraction from the output PDF.
4465
+ #
4466
+ # * +value+ - Set to true to set the no-copy flag in the output PDF.
4467
+ # * *Returns* - The converter object.
4468
+ def setNoCopy(value)
4469
+ @fields['no_copy'] = value
4470
+ self
4471
+ end
4472
+
4473
+ # Set the title of the PDF.
4474
+ #
4475
+ # * +title+ - The title.
4476
+ # * *Returns* - The converter object.
4477
+ def setTitle(title)
4478
+ @fields['title'] = title
4479
+ self
4480
+ end
4481
+
4482
+ # Set the subject of the PDF.
4483
+ #
4484
+ # * +subject+ - The subject.
4485
+ # * *Returns* - The converter object.
4486
+ def setSubject(subject)
4487
+ @fields['subject'] = subject
4488
+ self
4489
+ end
4490
+
4491
+ # Set the author of the PDF.
4492
+ #
4493
+ # * +author+ - The author.
4494
+ # * *Returns* - The converter object.
4495
+ def setAuthor(author)
4496
+ @fields['author'] = author
4497
+ self
4498
+ end
4499
+
4500
+ # Associate keywords with the document.
4501
+ #
4502
+ # * +keywords+ - The string with the keywords.
4503
+ # * *Returns* - The converter object.
4504
+ def setKeywords(keywords)
4505
+ @fields['keywords'] = keywords
4506
+ self
4507
+ end
4508
+
4509
+ # Specify the page layout to be used when the document is opened.
4510
+ #
4511
+ # * +layout+ - Allowed values are single-page, one-column, two-column-left, two-column-right.
4512
+ # * *Returns* - The converter object.
4513
+ def setPageLayout(layout)
4514
+ unless /(?i)^(single-page|one-column|two-column-left|two-column-right)$/.match(layout)
4515
+ raise Error.new(Pdfcrowd.create_invalid_value_message(layout, "setPageLayout", "image-to-pdf", "Allowed values are single-page, one-column, two-column-left, two-column-right.", "set_page_layout"), 470);
4516
+ end
4517
+
4518
+ @fields['page_layout'] = layout
4519
+ self
4520
+ end
4521
+
4522
+ # Specify how the document should be displayed when opened.
4523
+ #
4524
+ # * +mode+ - Allowed values are full-screen, thumbnails, outlines.
4525
+ # * *Returns* - The converter object.
4526
+ def setPageMode(mode)
4527
+ unless /(?i)^(full-screen|thumbnails|outlines)$/.match(mode)
4528
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setPageMode", "image-to-pdf", "Allowed values are full-screen, thumbnails, outlines.", "set_page_mode"), 470);
4529
+ end
4530
+
4531
+ @fields['page_mode'] = mode
4532
+ self
4533
+ end
4534
+
4535
+ # Specify how the page should be displayed when opened.
4536
+ #
4537
+ # * +zoom_type+ - Allowed values are fit-width, fit-height, fit-page.
4538
+ # * *Returns* - The converter object.
4539
+ def setInitialZoomType(zoom_type)
4540
+ unless /(?i)^(fit-width|fit-height|fit-page)$/.match(zoom_type)
4541
+ raise Error.new(Pdfcrowd.create_invalid_value_message(zoom_type, "setInitialZoomType", "image-to-pdf", "Allowed values are fit-width, fit-height, fit-page.", "set_initial_zoom_type"), 470);
4542
+ end
4543
+
4544
+ @fields['initial_zoom_type'] = zoom_type
4545
+ self
4546
+ end
4547
+
4548
+ # Display the specified page when the document is opened.
4549
+ #
4550
+ # * +page+ - Must be a positive integer number.
4551
+ # * *Returns* - The converter object.
4552
+ def setInitialPage(page)
4553
+ if (!(Integer(page) > 0))
4554
+ raise Error.new(Pdfcrowd.create_invalid_value_message(page, "setInitialPage", "image-to-pdf", "Must be a positive integer number.", "set_initial_page"), 470);
4555
+ end
4556
+
4557
+ @fields['initial_page'] = page
4558
+ self
4559
+ end
4560
+
4561
+ # Specify the initial page zoom in percents when the document is opened.
4562
+ #
4563
+ # * +zoom+ - Must be a positive integer number.
4564
+ # * *Returns* - The converter object.
4565
+ def setInitialZoom(zoom)
4566
+ if (!(Integer(zoom) > 0))
4567
+ raise Error.new(Pdfcrowd.create_invalid_value_message(zoom, "setInitialZoom", "image-to-pdf", "Must be a positive integer number.", "set_initial_zoom"), 470);
4568
+ end
4569
+
4570
+ @fields['initial_zoom'] = zoom
4571
+ self
4572
+ end
4573
+
4574
+ # Specify whether to hide the viewer application's tool bars when the document is active.
4575
+ #
4576
+ # * +value+ - Set to true to hide tool bars.
4577
+ # * *Returns* - The converter object.
4578
+ def setHideToolbar(value)
4579
+ @fields['hide_toolbar'] = value
4580
+ self
4581
+ end
4582
+
4583
+ # Specify whether to hide the viewer application's menu bar when the document is active.
4584
+ #
4585
+ # * +value+ - Set to true to hide the menu bar.
4586
+ # * *Returns* - The converter object.
4587
+ def setHideMenubar(value)
4588
+ @fields['hide_menubar'] = value
4589
+ self
4590
+ end
4591
+
4592
+ # Specify whether to hide user interface elements in the document's window (such as scroll bars and navigation controls), leaving only the document's contents displayed.
4593
+ #
4594
+ # * +value+ - Set to true to hide ui elements.
4595
+ # * *Returns* - The converter object.
4596
+ def setHideWindowUi(value)
4597
+ @fields['hide_window_ui'] = value
4598
+ self
4599
+ end
4600
+
4601
+ # Specify whether to resize the document's window to fit the size of the first displayed page.
4602
+ #
4603
+ # * +value+ - Set to true to resize the window.
4604
+ # * *Returns* - The converter object.
4605
+ def setFitWindow(value)
4606
+ @fields['fit_window'] = value
4607
+ self
4608
+ end
4609
+
4610
+ # Specify whether to position the document's window in the center of the screen.
4611
+ #
4612
+ # * +value+ - Set to true to center the window.
4613
+ # * *Returns* - The converter object.
4614
+ def setCenterWindow(value)
4615
+ @fields['center_window'] = value
4616
+ self
4617
+ end
4618
+
4619
+ # Specify whether the window's title bar should display the document title. If false , the title bar should instead display the name of the PDF file containing the document.
4620
+ #
4621
+ # * +value+ - Set to true to display the title.
4622
+ # * *Returns* - The converter object.
4623
+ def setDisplayTitle(value)
4624
+ @fields['display_title'] = value
4625
+ self
4626
+ end
4627
+
4628
+ # Turn on the debug logging. Details about the conversion are stored in the debug log. The URL of the log can be obtained from the getDebugLogUrl method or available in conversion statistics.
4629
+ #
4630
+ # * +value+ - Set to true to enable the debug logging.
4631
+ # * *Returns* - The converter object.
4632
+ def setDebugLog(value)
4633
+ @fields['debug_log'] = value
4634
+ self
4635
+ end
4636
+
4637
+ # Get the URL of the debug log for the last conversion.
4638
+ # * *Returns* - The link to the debug log.
4639
+ def getDebugLogUrl()
4640
+ return @helper.getDebugLogUrl()
4641
+ end
4642
+
4643
+ # Get the number of conversion credits available in your account.
4644
+ # This method can only be called after a call to one of the convertXtoY methods.
4645
+ # The returned value can differ from the actual count if you run parallel conversions.
4646
+ # The special value 999999 is returned if the information is not available.
4647
+ # * *Returns* - The number of credits.
4648
+ def getRemainingCreditCount()
4649
+ return @helper.getRemainingCreditCount()
4650
+ end
4651
+
4652
+ # Get the number of credits consumed by the last conversion.
4653
+ # * *Returns* - The number of credits.
4654
+ def getConsumedCreditCount()
4655
+ return @helper.getConsumedCreditCount()
4656
+ end
4657
+
4658
+ # Get the job id.
4659
+ # * *Returns* - The unique job identifier.
4660
+ def getJobId()
4661
+ return @helper.getJobId()
4662
+ end
4663
+
4664
+ # Get the size of the output in bytes.
4665
+ # * *Returns* - The count of bytes.
4666
+ def getOutputSize()
4667
+ return @helper.getOutputSize()
4668
+ end
4669
+
4670
+ # Get the version details.
4671
+ # * *Returns* - API version, converter version, and client version.
4672
+ def getVersion()
4673
+ return "client " + CLIENT_VERSION + ", API v2, converter " + @helper.getConverterVersion()
4674
+ end
4675
+
4676
+ # Tag the conversion with a custom value. The tag is used in conversion statistics. A value longer than 32 characters is cut off.
4677
+ #
4678
+ # * +tag+ - A string with the custom tag.
4679
+ # * *Returns* - The converter object.
4680
+ def setTag(tag)
4681
+ @fields['tag'] = tag
4682
+ self
4683
+ end
4684
+
4685
+ # A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTP scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
4686
+ #
4687
+ # * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
4688
+ # * *Returns* - The converter object.
4689
+ def setHttpProxy(proxy)
4690
+ unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4691
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
4692
+ end
4693
+
4694
+ @fields['http_proxy'] = proxy
4695
+ self
4696
+ end
4697
+
4698
+ # A proxy server used by Pdfcrowd conversion process for accessing the source URLs with HTTPS scheme. It can help to circumvent regional restrictions or provide limited access to your intranet.
4699
+ #
4700
+ # * +proxy+ - The value must have format DOMAIN_OR_IP_ADDRESS:PORT.
4701
+ # * *Returns* - The converter object.
4702
+ def setHttpsProxy(proxy)
4703
+ unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4704
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
4705
+ end
4706
+
4707
+ @fields['https_proxy'] = proxy
4708
+ self
4709
+ end
4710
+
4711
+ # Set the converter version. Different versions may produce different output. Choose which one provides the best output for your case.
4712
+ #
4713
+ # * +version+ - The version identifier. Allowed values are latest, 20.10, 18.10.
4714
+ # * *Returns* - The converter object.
4715
+ def setConverterVersion(version)
4716
+ unless /(?i)^(latest|20.10|18.10)$/.match(version)
4717
+ raise Error.new(Pdfcrowd.create_invalid_value_message(version, "setConverterVersion", "image-to-pdf", "Allowed values are latest, 20.10, 18.10.", "set_converter_version"), 470);
4718
+ end
4719
+
4720
+ @helper.setConverterVersion(version)
4721
+ self
4722
+ end
4723
+
4724
+ # Specifies if the client communicates over HTTP or HTTPS with Pdfcrowd API.
4725
+ # Warning: Using HTTP is insecure as data sent over HTTP is not encrypted. Enable this option only if you know what you are doing.
4726
+ #
4727
+ # * +value+ - Set to true to use HTTP.
4728
+ # * *Returns* - The converter object.
4729
+ def setUseHttp(value)
4730
+ @helper.setUseHttp(value)
4731
+ self
4732
+ end
4733
+
4734
+ # Set a custom user agent HTTP header. It can be useful if you are behind a proxy or a firewall.
4735
+ #
4736
+ # * +agent+ - The user agent string.
4737
+ # * *Returns* - The converter object.
4738
+ def setUserAgent(agent)
4739
+ @helper.setUserAgent(agent)
4740
+ self
4741
+ end
4742
+
4743
+ # Specifies an HTTP proxy that the API client library will use to connect to the internet.
4744
+ #
4745
+ # * +host+ - The proxy hostname.
4746
+ # * +port+ - The proxy port.
4747
+ # * +user_name+ - The username.
4748
+ # * +password+ - The password.
4749
+ # * *Returns* - The converter object.
4750
+ def setProxy(host, port, user_name, password)
4751
+ @helper.setProxy(host, port, user_name, password)
4752
+ self
4753
+ end
4754
+
4755
+ # Specifies the number of automatic retries when the 502 HTTP status code is received. The 502 status code indicates a temporary network issue. This feature can be disabled by setting to 0.
4756
+ #
4757
+ # * +count+ - Number of retries.
4758
+ # * *Returns* - The converter object.
4759
+ def setRetryCount(count)
4760
+ @helper.setRetryCount(count)
4761
+ self
4762
+ end
4763
+
4764
+ end
4765
+
4766
+ # Conversion from PDF to HTML.
4767
+ class PdfToHtmlClient
4768
+ # Constructor for the Pdfcrowd API client.
4769
+ #
4770
+ # * +user_name+ - Your username at Pdfcrowd.
4771
+ # * +api_key+ - Your API key.
4772
+ def initialize(user_name, api_key)
4773
+ @helper = ConnectionHelper.new(user_name, api_key)
4774
+ @fields = {
4775
+ 'input_format'=>'pdf',
4776
+ 'output_format'=>'html'
4777
+ }
4778
+ @file_id = 1
4779
+ @files = {}
4780
+ @raw_data = {}
4781
+ end
4782
+
4783
+ # Convert a PDF.
4784
+ #
4785
+ # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4786
+ # * *Returns* - Byte array containing the conversion output.
4787
+ def convertUrl(url)
4788
+ unless /(?i)^https?:\/\/.*$/.match(url)
4789
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url"), 470);
4790
+ end
4791
+
4792
+ @fields['url'] = url
4793
+ @helper.post(@fields, @files, @raw_data)
4794
+ end
4795
+
4796
+ # Convert a PDF and write the result to an output stream.
4797
+ #
4798
+ # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4799
+ # * +out_stream+ - The output stream that will contain the conversion output.
4800
+ def convertUrlToStream(url, out_stream)
4801
+ unless /(?i)^https?:\/\/.*$/.match(url)
4802
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
4803
+ end
4804
+
4805
+ @fields['url'] = url
4806
+ @helper.post(@fields, @files, @raw_data, out_stream)
4807
+ end
4808
+
4809
+ # Convert a PDF and write the result to a local file.
4810
+ #
4811
+ # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4812
+ # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
4813
+ def convertUrlToFile(url, file_path)
4814
+ if (!(!file_path.nil? && !file_path.empty?))
4815
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_url_to_file"), 470);
4816
+ end
4817
+
4818
+ if (!(isOutputTypeValid(file_path)))
4819
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_url_to_file"), 470);
4820
+ end
4821
+
4822
+ output_file = open(file_path, "wb")
4823
+ begin
4824
+ convertUrlToStream(url, output_file)
4825
+ output_file.close()
4826
+ rescue Error => why
4827
+ output_file.close()
4828
+ FileUtils.rm(file_path)
4829
+ raise
4830
+ end
4831
+ end
4832
+
4833
+ # Convert a local file.
4834
+ #
4835
+ # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4836
+ # * *Returns* - Byte array containing the conversion output.
4837
+ def convertFile(file)
4838
+ if (!(File.file?(file) && !File.zero?(file)))
4839
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-html", "The file must exist and not be empty.", "convert_file"), 470);
4840
+ end
4841
+
4842
+ @files['file'] = file
4843
+ @helper.post(@fields, @files, @raw_data)
4844
+ end
4845
+
4846
+ # Convert a local file and write the result to an output stream.
4847
+ #
4848
+ # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4849
+ # * +out_stream+ - The output stream that will contain the conversion output.
4850
+ def convertFileToStream(file, out_stream)
4851
+ if (!(File.file?(file) && !File.zero?(file)))
4852
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-html", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
4853
+ end
4854
+
4855
+ @files['file'] = file
4856
+ @helper.post(@fields, @files, @raw_data, out_stream)
4857
+ end
4858
+
4859
+ # Convert a local file and write the result to a local file.
4860
+ #
4861
+ # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4862
+ # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
4863
+ def convertFileToFile(file, file_path)
4864
+ if (!(!file_path.nil? && !file_path.empty?))
4865
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_file_to_file"), 470);
4866
+ end
4867
+
4868
+ if (!(isOutputTypeValid(file_path)))
4869
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_file_to_file"), 470);
4870
+ end
4871
+
4872
+ output_file = open(file_path, "wb")
4873
+ begin
4874
+ convertFileToStream(file, output_file)
4875
+ output_file.close()
4876
+ rescue Error => why
4877
+ output_file.close()
4878
+ FileUtils.rm(file_path)
4879
+ raise
4880
+ end
4881
+ end
4882
+
4883
+ # Convert raw data.
4884
+ #
4885
+ # * +data+ - The raw content to be converted.
4886
+ # * *Returns* - Byte array with the output.
4887
+ def convertRawData(data)
4888
+ @raw_data['file'] = data
4889
+ @helper.post(@fields, @files, @raw_data)
4890
+ end
4891
+
4892
+ # Convert raw data and write the result to an output stream.
4893
+ #
4894
+ # * +data+ - The raw content to be converted.
4895
+ # * +out_stream+ - The output stream that will contain the conversion output.
4896
+ def convertRawDataToStream(data, out_stream)
4897
+ @raw_data['file'] = data
4898
+ @helper.post(@fields, @files, @raw_data, out_stream)
4899
+ end
4900
+
4901
+ # Convert raw data to a file.
4902
+ #
4903
+ # * +data+ - The raw content to be converted.
4904
+ # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
4905
+ def convertRawDataToFile(data, file_path)
4906
+ if (!(!file_path.nil? && !file_path.empty?))
4907
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_raw_data_to_file"), 470);
4908
+ end
4909
+
4910
+ if (!(isOutputTypeValid(file_path)))
4911
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_raw_data_to_file"), 470);
4912
+ end
4913
+
4914
+ output_file = open(file_path, "wb")
4915
+ begin
4916
+ convertRawDataToStream(data, output_file)
4917
+ output_file.close()
4918
+ rescue Error => why
4919
+ output_file.close()
4920
+ FileUtils.rm(file_path)
4921
+ raise
4922
+ end
4923
+ end
4924
+
4925
+ # Convert the contents of an input stream.
4926
+ #
4927
+ # * +in_stream+ - The input stream with source data.
4928
+ # * *Returns* - Byte array containing the conversion output.
4929
+ def convertStream(in_stream)
4930
+ @raw_data['stream'] = in_stream.read
4931
+ @helper.post(@fields, @files, @raw_data)
4932
+ end
4933
+
4934
+ # Convert the contents of an input stream and write the result to an output stream.
4935
+ #
4936
+ # * +in_stream+ - The input stream with source data.
4937
+ # * +out_stream+ - The output stream that will contain the conversion output.
4938
+ def convertStreamToStream(in_stream, out_stream)
4939
+ @raw_data['stream'] = in_stream.read
4940
+ @helper.post(@fields, @files, @raw_data, out_stream)
4941
+ end
4942
+
4943
+ # Convert the contents of an input stream and write the result to a local file.
4944
+ #
4945
+ # * +in_stream+ - The input stream with source data.
4946
+ # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
4947
+ def convertStreamToFile(in_stream, file_path)
4948
+ if (!(!file_path.nil? && !file_path.empty?))
4949
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_stream_to_file"), 470);
4950
+ end
4951
+
4952
+ if (!(isOutputTypeValid(file_path)))
4953
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_stream_to_file"), 470);
4954
+ end
4955
+
4956
+ output_file = open(file_path, "wb")
4957
+ begin
4958
+ convertStreamToStream(in_stream, output_file)
4959
+ output_file.close()
4960
+ rescue Error => why
4961
+ output_file.close()
4962
+ FileUtils.rm(file_path)
4963
+ raise
4964
+ end
4965
+ end
4966
+
4967
+ # Password to open the encrypted PDF file.
4968
+ #
4969
+ # * +password+ - The input PDF password.
4970
+ # * *Returns* - The converter object.
4971
+ def setPdfPassword(password)
4972
+ @fields['pdf_password'] = password
4973
+ self
4974
+ end
4975
+
4976
+ # Set the scaling factor (zoom) for the main page area.
4977
+ #
4978
+ # * +factor+ - The percentage value. Must be a positive integer number.
4979
+ # * *Returns* - The converter object.
4980
+ def setScaleFactor(factor)
4981
+ if (!(Integer(factor) > 0))
4982
+ raise Error.new(Pdfcrowd.create_invalid_value_message(factor, "setScaleFactor", "pdf-to-html", "Must be a positive integer number.", "set_scale_factor"), 470);
4983
+ end
4984
+
4985
+ @fields['scale_factor'] = factor
4986
+ self
4987
+ end
4988
+
4989
+ # Set the page range to print.
4990
+ #
4991
+ # * +pages+ - A comma separated list of page numbers or ranges.
4992
+ # * *Returns* - The converter object.
4993
+ def setPrintPageRange(pages)
4994
+ unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
4995
+ raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-html", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
4996
+ end
4997
+
4998
+ @fields['print_page_range'] = pages
4999
+ self
5000
+ end
5001
+
5002
+ # Specifies where the images are stored.
5003
+ #
5004
+ # * +mode+ - The image storage mode. Allowed values are embed, separate.
5005
+ # * *Returns* - The converter object.
5006
+ def setImageMode(mode)
5007
+ unless /(?i)^(embed|separate)$/.match(mode)
5008
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setImageMode", "pdf-to-html", "Allowed values are embed, separate.", "set_image_mode"), 470);
5009
+ end
5010
+
5011
+ @fields['image_mode'] = mode
5012
+ self
5013
+ end
5014
+
5015
+ # Specifies where the style sheets are stored.
5016
+ #
5017
+ # * +mode+ - The style sheet storage mode. Allowed values are embed, separate.
5018
+ # * *Returns* - The converter object.
5019
+ def setCssMode(mode)
5020
+ unless /(?i)^(embed|separate)$/.match(mode)
5021
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setCssMode", "pdf-to-html", "Allowed values are embed, separate.", "set_css_mode"), 470);
5022
+ end
5023
+
5024
+ @fields['css_mode'] = mode
5025
+ self
5026
+ end
5027
+
5028
+ # Specifies where the fonts are stored.
5029
+ #
5030
+ # * +mode+ - The font storage mode. Allowed values are embed, separate.
5031
+ # * *Returns* - The converter object.
5032
+ def setFontMode(mode)
5033
+ unless /(?i)^(embed|separate)$/.match(mode)
5034
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setFontMode", "pdf-to-html", "Allowed values are embed, separate.", "set_font_mode"), 470);
5035
+ end
5036
+
5037
+ @fields['font_mode'] = mode
5038
+ self
5039
+ end
5040
+
5041
+ # A helper method to determine if the output file is a zip archive. The output of the conversion may be either an HTML file or a zip file containing the HTML and its external assets.
5042
+ # * *Returns* - True if the conversion output is a zip file, otherwise False.
5043
+ def isZippedOutput()
5044
+ @fields.fetch('image_mode', '') == 'separate' || @fields.fetch('css_mode', '') == 'separate' || @fields.fetch('font_mode', '') == 'separate' || @fields.fetch('force_zip', false) == true
5045
+ end
5046
+
5047
+ # Enforces the zip output format.
5048
+ #
5049
+ # * +value+ - Set to true to get the output as a zip archive.
5050
+ # * *Returns* - The converter object.
5051
+ def setForceZip(value)
5052
+ @fields['force_zip'] = value
5053
+ self
5054
+ end
5055
+
5056
+ # Set the HTML title. The title from the input PDF is used by default.
5057
+ #
5058
+ # * +title+ - The HTML title.
5059
+ # * *Returns* - The converter object.
5060
+ def setTitle(title)
5061
+ @fields['title'] = title
5062
+ self
5063
+ end
5064
+
5065
+ # Set the HTML subject. The subject from the input PDF is used by default.
5066
+ #
5067
+ # * +subject+ - The HTML subject.
5068
+ # * *Returns* - The converter object.
5069
+ def setSubject(subject)
5070
+ @fields['subject'] = subject
5071
+ self
5072
+ end
5073
+
5074
+ # Set the HTML author. The author from the input PDF is used by default.
5075
+ #
5076
+ # * +author+ - The HTML author.
5077
+ # * *Returns* - The converter object.
5078
+ def setAuthor(author)
5079
+ @fields['author'] = author
5080
+ self
5081
+ end
5082
+
5083
+ # Associate keywords with the HTML document. Keywords from the input PDF are used by default.
5084
+ #
5085
+ # * +keywords+ - The string containing the keywords.
5086
+ # * *Returns* - The converter object.
5087
+ def setKeywords(keywords)
5088
+ @fields['keywords'] = keywords
5089
+ self
5090
+ end
5091
+
4306
5092
  # Turn on the debug logging. Details about the conversion are stored in the debug log. The URL of the log can be obtained from the getDebugLogUrl method or available in conversion statistics.
4307
5093
  #
4308
5094
  # * +value+ - Set to true to enable the debug logging.
@@ -4339,6 +5125,12 @@ module Pdfcrowd
4339
5125
  return @helper.getJobId()
4340
5126
  end
4341
5127
 
5128
+ # Get the number of pages in the output document.
5129
+ # * *Returns* - The page count.
5130
+ def getPageCount()
5131
+ return @helper.getPageCount()
5132
+ end
5133
+
4342
5134
  # Get the size of the output in bytes.
4343
5135
  # * *Returns* - The count of bytes.
4344
5136
  def getOutputSize()
@@ -4366,7 +5158,7 @@ module Pdfcrowd
4366
5158
  # * *Returns* - The converter object.
4367
5159
  def setHttpProxy(proxy)
4368
5160
  unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4369
- raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
5161
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
4370
5162
  end
4371
5163
 
4372
5164
  @fields['http_proxy'] = proxy
@@ -4379,26 +5171,13 @@ module Pdfcrowd
4379
5171
  # * *Returns* - The converter object.
4380
5172
  def setHttpsProxy(proxy)
4381
5173
  unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4382
- raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "image-to-pdf", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
5174
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
4383
5175
  end
4384
5176
 
4385
5177
  @fields['https_proxy'] = proxy
4386
5178
  self
4387
5179
  end
4388
5180
 
4389
- # Set the converter version. Different versions may produce different output. Choose which one provides the best output for your case.
4390
- #
4391
- # * +version+ - The version identifier. Allowed values are latest, 20.10, 18.10.
4392
- # * *Returns* - The converter object.
4393
- def setConverterVersion(version)
4394
- unless /(?i)^(latest|20.10|18.10)$/.match(version)
4395
- raise Error.new(Pdfcrowd.create_invalid_value_message(version, "setConverterVersion", "image-to-pdf", "Allowed values are latest, 20.10, 18.10.", "set_converter_version"), 470);
4396
- end
4397
-
4398
- @helper.setConverterVersion(version)
4399
- self
4400
- end
4401
-
4402
5181
  # Specifies if the client communicates over HTTP or HTTPS with Pdfcrowd API.
4403
5182
  # Warning: Using HTTP is insecure as data sent over HTTP is not encrypted. Enable this option only if you know what you are doing.
4404
5183
  #
@@ -4439,10 +5218,16 @@ module Pdfcrowd
4439
5218
  self
4440
5219
  end
4441
5220
 
5221
+ private
5222
+
5223
+ def isOutputTypeValid(file_path)
5224
+ extension = File.extname(file_path).downcase
5225
+ (extension == '.zip') == isZippedOutput()
5226
+ end
4442
5227
  end
4443
5228
 
4444
- # Conversion from PDF to HTML.
4445
- class PdfToHtmlClient
5229
+ # Conversion from PDF to text.
5230
+ class PdfToTextClient
4446
5231
  # Constructor for the Pdfcrowd API client.
4447
5232
  #
4448
5233
  # * +user_name+ - Your username at Pdfcrowd.
@@ -4451,7 +5236,7 @@ module Pdfcrowd
4451
5236
  @helper = ConnectionHelper.new(user_name, api_key)
4452
5237
  @fields = {
4453
5238
  'input_format'=>'pdf',
4454
- 'output_format'=>'html'
5239
+ 'output_format'=>'txt'
4455
5240
  }
4456
5241
  @file_id = 1
4457
5242
  @files = {}
@@ -4464,7 +5249,7 @@ module Pdfcrowd
4464
5249
  # * *Returns* - Byte array containing the conversion output.
4465
5250
  def convertUrl(url)
4466
5251
  unless /(?i)^https?:\/\/.*$/.match(url)
4467
- raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url"), 470);
5252
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrl", "pdf-to-text", "The supported protocols are http:// and https://.", "convert_url"), 470);
4468
5253
  end
4469
5254
 
4470
5255
  @fields['url'] = url
@@ -4477,7 +5262,7 @@ module Pdfcrowd
4477
5262
  # * +out_stream+ - The output stream that will contain the conversion output.
4478
5263
  def convertUrlToStream(url, out_stream)
4479
5264
  unless /(?i)^https?:\/\/.*$/.match(url)
4480
- raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-html", "The supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
5265
+ raise Error.new(Pdfcrowd.create_invalid_value_message(url, "convertUrlToStream::url", "pdf-to-text", "The supported protocols are http:// and https://.", "convert_url_to_stream"), 470);
4481
5266
  end
4482
5267
 
4483
5268
  @fields['url'] = url
@@ -4487,14 +5272,10 @@ module Pdfcrowd
4487
5272
  # Convert a PDF and write the result to a local file.
4488
5273
  #
4489
5274
  # * +url+ - The address of the PDF to convert. The supported protocols are http:// and https://.
4490
- # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
5275
+ # * +file_path+ - The output file path. The string must not be empty.
4491
5276
  def convertUrlToFile(url, file_path)
4492
5277
  if (!(!file_path.nil? && !file_path.empty?))
4493
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_url_to_file"), 470);
4494
- end
4495
-
4496
- if (!(isOutputTypeValid(file_path)))
4497
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_url_to_file"), 470);
5278
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertUrlToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_url_to_file"), 470);
4498
5279
  end
4499
5280
 
4500
5281
  output_file = open(file_path, "wb")
@@ -4514,7 +5295,7 @@ module Pdfcrowd
4514
5295
  # * *Returns* - Byte array containing the conversion output.
4515
5296
  def convertFile(file)
4516
5297
  if (!(File.file?(file) && !File.zero?(file)))
4517
- raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-html", "The file must exist and not be empty.", "convert_file"), 470);
5298
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFile", "pdf-to-text", "The file must exist and not be empty.", "convert_file"), 470);
4518
5299
  end
4519
5300
 
4520
5301
  @files['file'] = file
@@ -4527,7 +5308,7 @@ module Pdfcrowd
4527
5308
  # * +out_stream+ - The output stream that will contain the conversion output.
4528
5309
  def convertFileToStream(file, out_stream)
4529
5310
  if (!(File.file?(file) && !File.zero?(file)))
4530
- raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-html", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
5311
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file, "convertFileToStream::file", "pdf-to-text", "The file must exist and not be empty.", "convert_file_to_stream"), 470);
4531
5312
  end
4532
5313
 
4533
5314
  @files['file'] = file
@@ -4537,14 +5318,10 @@ module Pdfcrowd
4537
5318
  # Convert a local file and write the result to a local file.
4538
5319
  #
4539
5320
  # * +file+ - The path to a local file to convert. The file must exist and not be empty.
4540
- # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
5321
+ # * +file_path+ - The output file path. The string must not be empty.
4541
5322
  def convertFileToFile(file, file_path)
4542
5323
  if (!(!file_path.nil? && !file_path.empty?))
4543
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_file_to_file"), 470);
4544
- end
4545
-
4546
- if (!(isOutputTypeValid(file_path)))
4547
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_file_to_file"), 470);
5324
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertFileToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_file_to_file"), 470);
4548
5325
  end
4549
5326
 
4550
5327
  output_file = open(file_path, "wb")
@@ -4579,14 +5356,10 @@ module Pdfcrowd
4579
5356
  # Convert raw data to a file.
4580
5357
  #
4581
5358
  # * +data+ - The raw content to be converted.
4582
- # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
5359
+ # * +file_path+ - The output file path. The string must not be empty.
4583
5360
  def convertRawDataToFile(data, file_path)
4584
5361
  if (!(!file_path.nil? && !file_path.empty?))
4585
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_raw_data_to_file"), 470);
4586
- end
4587
-
4588
- if (!(isOutputTypeValid(file_path)))
4589
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_raw_data_to_file"), 470);
5362
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertRawDataToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_raw_data_to_file"), 470);
4590
5363
  end
4591
5364
 
4592
5365
  output_file = open(file_path, "wb")
@@ -4621,14 +5394,10 @@ module Pdfcrowd
4621
5394
  # Convert the contents of an input stream and write the result to a local file.
4622
5395
  #
4623
5396
  # * +in_stream+ - The input stream with source data.
4624
- # * +file_path+ - The output file path. The string must not be empty. The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.
5397
+ # * +file_path+ - The output file path. The string must not be empty.
4625
5398
  def convertStreamToFile(in_stream, file_path)
4626
5399
  if (!(!file_path.nil? && !file_path.empty?))
4627
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The string must not be empty.", "convert_stream_to_file"), 470);
4628
- end
4629
-
4630
- if (!(isOutputTypeValid(file_path)))
4631
- raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-html", "The converter generates an HTML or ZIP file. If ZIP file is generated, the file path must have a ZIP or zip extension.", "convert_stream_to_file"), 470);
5400
+ raise Error.new(Pdfcrowd.create_invalid_value_message(file_path, "convertStreamToFile::file_path", "pdf-to-text", "The string must not be empty.", "convert_stream_to_file"), 470);
4632
5401
  end
4633
5402
 
4634
5403
  output_file = open(file_path, "wb")
@@ -4642,7 +5411,7 @@ module Pdfcrowd
4642
5411
  end
4643
5412
  end
4644
5413
 
4645
- # Password to open the encrypted PDF file.
5414
+ # The password to open the encrypted PDF file.
4646
5415
  #
4647
5416
  # * +password+ - The input PDF password.
4648
5417
  # * *Returns* - The converter object.
@@ -4651,119 +5420,171 @@ module Pdfcrowd
4651
5420
  self
4652
5421
  end
4653
5422
 
4654
- # Set the scaling factor (zoom) for the main page area.
5423
+ # Set the page range to print.
4655
5424
  #
4656
- # * +factor+ - The percentage value. Must be a positive integer number.
5425
+ # * +pages+ - A comma separated list of page numbers or ranges.
4657
5426
  # * *Returns* - The converter object.
4658
- def setScaleFactor(factor)
4659
- if (!(Integer(factor) > 0))
4660
- raise Error.new(Pdfcrowd.create_invalid_value_message(factor, "setScaleFactor", "pdf-to-html", "Must be a positive integer number.", "set_scale_factor"), 470);
5427
+ def setPrintPageRange(pages)
5428
+ unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
5429
+ raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-text", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
4661
5430
  end
4662
5431
 
4663
- @fields['scale_factor'] = factor
5432
+ @fields['print_page_range'] = pages
4664
5433
  self
4665
5434
  end
4666
5435
 
4667
- # Set the page range to print.
5436
+ # Ignore the original PDF layout.
4668
5437
  #
4669
- # * +pages+ - A comma separated list of page numbers or ranges.
5438
+ # * +value+ - Set to true to ignore the layout.
4670
5439
  # * *Returns* - The converter object.
4671
- def setPrintPageRange(pages)
4672
- unless /^(?:\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*,\s*)*\s*(?:\d+|(?:\d*\s*\-\s*\d+)|(?:\d+\s*\-\s*\d*))\s*$/.match(pages)
4673
- raise Error.new(Pdfcrowd.create_invalid_value_message(pages, "setPrintPageRange", "pdf-to-html", "A comma separated list of page numbers or ranges.", "set_print_page_range"), 470);
5440
+ def setNoLayout(value)
5441
+ @fields['no_layout'] = value
5442
+ self
5443
+ end
5444
+
5445
+ # The end-of-line convention for the text output.
5446
+ #
5447
+ # * +eol+ - Allowed values are unix, dos, mac.
5448
+ # * *Returns* - The converter object.
5449
+ def setEol(eol)
5450
+ unless /(?i)^(unix|dos|mac)$/.match(eol)
5451
+ raise Error.new(Pdfcrowd.create_invalid_value_message(eol, "setEol", "pdf-to-text", "Allowed values are unix, dos, mac.", "set_eol"), 470);
4674
5452
  end
4675
5453
 
4676
- @fields['print_page_range'] = pages
5454
+ @fields['eol'] = eol
4677
5455
  self
4678
5456
  end
4679
5457
 
4680
- # Specifies where the images are stored.
5458
+ # Specify the page break mode for the text output.
4681
5459
  #
4682
- # * +mode+ - The image storage mode. Allowed values are embed, separate.
5460
+ # * +mode+ - Allowed values are none, default, custom.
4683
5461
  # * *Returns* - The converter object.
4684
- def setImageMode(mode)
4685
- unless /(?i)^(embed|separate)$/.match(mode)
4686
- raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setImageMode", "pdf-to-html", "Allowed values are embed, separate.", "set_image_mode"), 470);
5462
+ def setPageBreakMode(mode)
5463
+ unless /(?i)^(none|default|custom)$/.match(mode)
5464
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setPageBreakMode", "pdf-to-text", "Allowed values are none, default, custom.", "set_page_break_mode"), 470);
4687
5465
  end
4688
5466
 
4689
- @fields['image_mode'] = mode
5467
+ @fields['page_break_mode'] = mode
4690
5468
  self
4691
5469
  end
4692
5470
 
4693
- # Specifies where the style sheets are stored.
5471
+ # Specify the custom page break.
4694
5472
  #
4695
- # * +mode+ - The style sheet storage mode. Allowed values are embed, separate.
5473
+ # * +page_break+ - String to insert between the pages.
4696
5474
  # * *Returns* - The converter object.
4697
- def setCssMode(mode)
4698
- unless /(?i)^(embed|separate)$/.match(mode)
4699
- raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setCssMode", "pdf-to-html", "Allowed values are embed, separate.", "set_css_mode"), 470);
5475
+ def setCustomPageBreak(page_break)
5476
+ @fields['custom_page_break'] = page_break
5477
+ self
5478
+ end
5479
+
5480
+ # Specify the paragraph detection mode.
5481
+ #
5482
+ # * +mode+ - Allowed values are none, bounding-box, characters.
5483
+ # * *Returns* - The converter object.
5484
+ def setParagraphMode(mode)
5485
+ unless /(?i)^(none|bounding-box|characters)$/.match(mode)
5486
+ raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setParagraphMode", "pdf-to-text", "Allowed values are none, bounding-box, characters.", "set_paragraph_mode"), 470);
4700
5487
  end
4701
5488
 
4702
- @fields['css_mode'] = mode
5489
+ @fields['paragraph_mode'] = mode
4703
5490
  self
4704
5491
  end
4705
5492
 
4706
- # Specifies where the fonts are stored.
5493
+ # Set the maximum line spacing when the paragraph detection mode is enabled.
4707
5494
  #
4708
- # * +mode+ - The font storage mode. Allowed values are embed, separate.
5495
+ # * +threshold+ - The value must be a positive integer percentage.
4709
5496
  # * *Returns* - The converter object.
4710
- def setFontMode(mode)
4711
- unless /(?i)^(embed|separate)$/.match(mode)
4712
- raise Error.new(Pdfcrowd.create_invalid_value_message(mode, "setFontMode", "pdf-to-html", "Allowed values are embed, separate.", "set_font_mode"), 470);
5497
+ def setLineSpacingThreshold(threshold)
5498
+ unless /(?i)^0$|^[0-9]+%$/.match(threshold)
5499
+ raise Error.new(Pdfcrowd.create_invalid_value_message(threshold, "setLineSpacingThreshold", "pdf-to-text", "The value must be a positive integer percentage.", "set_line_spacing_threshold"), 470);
4713
5500
  end
4714
5501
 
4715
- @fields['font_mode'] = mode
5502
+ @fields['line_spacing_threshold'] = threshold
4716
5503
  self
4717
5504
  end
4718
5505
 
4719
- # A helper method to determine if the output file is a zip archive. The output of the conversion may be either an HTML file or a zip file containing the HTML and its external assets.
4720
- # * *Returns* - True if the conversion output is a zip file, otherwise False.
4721
- def isZippedOutput()
4722
- @fields.fetch('image_mode', '') == 'separate' || @fields.fetch('css_mode', '') == 'separate' || @fields.fetch('font_mode', '') == 'separate' || @fields.fetch('force_zip', false) == true
5506
+ # Remove the hyphen character from the end of lines.
5507
+ #
5508
+ # * +value+ - Set to true to remove hyphens.
5509
+ # * *Returns* - The converter object.
5510
+ def setRemoveHyphenation(value)
5511
+ @fields['remove_hyphenation'] = value
5512
+ self
4723
5513
  end
4724
5514
 
4725
- # Enforces the zip output format.
5515
+ # Remove empty lines from the text output.
4726
5516
  #
4727
- # * +value+ - Set to true to get the output as a zip archive.
5517
+ # * +value+ - Set to true to remove empty lines.
4728
5518
  # * *Returns* - The converter object.
4729
- def setForceZip(value)
4730
- @fields['force_zip'] = value
5519
+ def setRemoveEmptyLines(value)
5520
+ @fields['remove_empty_lines'] = value
4731
5521
  self
4732
5522
  end
4733
5523
 
4734
- # Set the HTML title. The title from the input PDF is used by default.
5524
+ # Set the top left X coordinate of the crop area in points.
4735
5525
  #
4736
- # * +title+ - The HTML title.
5526
+ # * +x+ - Must be a positive integer number or 0.
4737
5527
  # * *Returns* - The converter object.
4738
- def setTitle(title)
4739
- @fields['title'] = title
5528
+ def setCropAreaX(x)
5529
+ if (!(Integer(x) >= 0))
5530
+ raise Error.new(Pdfcrowd.create_invalid_value_message(x, "setCropAreaX", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_x"), 470);
5531
+ end
5532
+
5533
+ @fields['crop_area_x'] = x
4740
5534
  self
4741
5535
  end
4742
5536
 
4743
- # Set the HTML subject. The subject from the input PDF is used by default.
5537
+ # Set the top left Y coordinate of the crop area in points.
4744
5538
  #
4745
- # * +subject+ - The HTML subject.
5539
+ # * +y+ - Must be a positive integer number or 0.
4746
5540
  # * *Returns* - The converter object.
4747
- def setSubject(subject)
4748
- @fields['subject'] = subject
5541
+ def setCropAreaY(y)
5542
+ if (!(Integer(y) >= 0))
5543
+ raise Error.new(Pdfcrowd.create_invalid_value_message(y, "setCropAreaY", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_y"), 470);
5544
+ end
5545
+
5546
+ @fields['crop_area_y'] = y
4749
5547
  self
4750
5548
  end
4751
5549
 
4752
- # Set the HTML author. The author from the input PDF is used by default.
5550
+ # Set the width of the crop area in points.
4753
5551
  #
4754
- # * +author+ - The HTML author.
5552
+ # * +width+ - Must be a positive integer number or 0.
4755
5553
  # * *Returns* - The converter object.
4756
- def setAuthor(author)
4757
- @fields['author'] = author
5554
+ def setCropAreaWidth(width)
5555
+ if (!(Integer(width) >= 0))
5556
+ raise Error.new(Pdfcrowd.create_invalid_value_message(width, "setCropAreaWidth", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_width"), 470);
5557
+ end
5558
+
5559
+ @fields['crop_area_width'] = width
4758
5560
  self
4759
5561
  end
4760
5562
 
4761
- # Associate keywords with the HTML document. Keywords from the input PDF are used by default.
5563
+ # Set the height of the crop area in points.
4762
5564
  #
4763
- # * +keywords+ - The string containing the keywords.
5565
+ # * +height+ - Must be a positive integer number or 0.
4764
5566
  # * *Returns* - The converter object.
4765
- def setKeywords(keywords)
4766
- @fields['keywords'] = keywords
5567
+ def setCropAreaHeight(height)
5568
+ if (!(Integer(height) >= 0))
5569
+ raise Error.new(Pdfcrowd.create_invalid_value_message(height, "setCropAreaHeight", "pdf-to-text", "Must be a positive integer number or 0.", "set_crop_area_height"), 470);
5570
+ end
5571
+
5572
+ @fields['crop_area_height'] = height
5573
+ self
5574
+ end
5575
+
5576
+ # Set the crop area. It allows to extract just a part of a PDF page.
5577
+ #
5578
+ # * +x+ - Set the top left X coordinate of the crop area in points. Must be a positive integer number or 0.
5579
+ # * +y+ - Set the top left Y coordinate of the crop area in points. Must be a positive integer number or 0.
5580
+ # * +width+ - Set the width of the crop area in points. Must be a positive integer number or 0.
5581
+ # * +height+ - Set the height of the crop area in points. Must be a positive integer number or 0.
5582
+ # * *Returns* - The converter object.
5583
+ def setCropArea(x, y, width, height)
5584
+ setCropAreaX(x)
5585
+ setCropAreaY(y)
5586
+ setCropAreaWidth(width)
5587
+ setCropAreaHeight(height)
4767
5588
  self
4768
5589
  end
4769
5590
 
@@ -4836,7 +5657,7 @@ module Pdfcrowd
4836
5657
  # * *Returns* - The converter object.
4837
5658
  def setHttpProxy(proxy)
4838
5659
  unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4839
- raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
5660
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_http_proxy"), 470);
4840
5661
  end
4841
5662
 
4842
5663
  @fields['http_proxy'] = proxy
@@ -4849,7 +5670,7 @@ module Pdfcrowd
4849
5670
  # * *Returns* - The converter object.
4850
5671
  def setHttpsProxy(proxy)
4851
5672
  unless /(?i)^([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{1,}:\d+$/.match(proxy)
4852
- raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-html", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
5673
+ raise Error.new(Pdfcrowd.create_invalid_value_message(proxy, "setHttpsProxy", "pdf-to-text", "The value must have format DOMAIN_OR_IP_ADDRESS:PORT.", "set_https_proxy"), 470);
4853
5674
  end
4854
5675
 
4855
5676
  @fields['https_proxy'] = proxy
@@ -4896,12 +5717,6 @@ module Pdfcrowd
4896
5717
  self
4897
5718
  end
4898
5719
 
4899
- private
4900
-
4901
- def isOutputTypeValid(file_path)
4902
- extension = File.extname(file_path).downcase
4903
- (extension == '.zip') == isZippedOutput()
4904
- end
4905
5720
  end
4906
5721
 
4907
5722
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfcrowd
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.9.0
4
+ version: 5.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pdfcrowd Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-10-03 00:00:00.000000000 Z
11
+ date: 2022-11-29 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: The Pdfcrowd API lets you easily convert between HTML, PDF and various
14
14
  image formats.