selectpdf 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/README.md +79 -2
- data/lib/selectpdf.rb +1099 -4
- data/samples/pdf_merge.rb +40 -0
- data/samples/pdf_to_text.rb +45 -0
- data/samples/search_pdf.rb +38 -0
- metadata +8 -3
data/lib/selectpdf.rb
CHANGED
@@ -5,16 +5,172 @@ require 'json'
|
|
5
5
|
require 'fileutils'
|
6
6
|
|
7
7
|
#
|
8
|
-
# SelectPdf Online REST API Ruby client library. Contains
|
8
|
+
# SelectPdf Online REST API Ruby client library. Contains HTML to PDF converter, PDF merge, PDF to text extractor, search PDF.
|
9
9
|
#
|
10
10
|
#
|
11
11
|
# Convert HTML to PDF
|
12
12
|
#
|
13
|
-
#
|
13
|
+
# require 'selectpdf'
|
14
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
15
|
+
#
|
16
|
+
# url = 'https://selectpdf.com'
|
17
|
+
# local_file = 'Test.pdf'
|
18
|
+
# api_key = 'Your API key here'
|
19
|
+
#
|
20
|
+
# begin
|
21
|
+
# api = SelectPdf::HtmlToPdfClient.new(api_key)
|
22
|
+
#
|
23
|
+
# api.page_size = SelectPdf::PageSize::A4
|
24
|
+
# api.margins = 0
|
25
|
+
# api.page_numbers = FALSE
|
26
|
+
# api.page_breaks_enhanced_algorithm = TRUE
|
27
|
+
#
|
28
|
+
# api.convert_url_to_file(url, local_file)
|
29
|
+
# rescue SelectPdf::ApiException => e
|
30
|
+
# print("An error occurred: #{e}")
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# Merge PDFs from local disk or public url and save result into a file on disk.
|
34
|
+
#
|
35
|
+
# require 'selectpdf'
|
36
|
+
#
|
37
|
+
# $stdout.sync = true
|
38
|
+
#
|
39
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
40
|
+
#
|
41
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
42
|
+
# test_pdf = 'Input.pdf'
|
43
|
+
# local_file = 'Result.pdf'
|
44
|
+
# api_key = 'Your API key here'
|
45
|
+
#
|
46
|
+
# begin
|
47
|
+
# client = SelectPdf::PdfMergeClient.new(api_key)
|
48
|
+
#
|
49
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-merge-api/
|
50
|
+
#
|
51
|
+
# # specify the pdf files that will be merged (order will be preserved in the final pdf)
|
52
|
+
# client.add_file(test_pdf) # add PDF from local file
|
53
|
+
# client.add_url_file(test_url) # add PDF from public url
|
54
|
+
# # client.add_file(test_pdf, 'pdf_password') # add PDF (that requires a password) from local file
|
55
|
+
# # client.add_url_file(test_url, 'pdf_password') # add PDF (that requires a password) from public url
|
56
|
+
#
|
57
|
+
# print "Starting pdf merge ...\n"
|
58
|
+
#
|
59
|
+
# # merge pdfs to local file
|
60
|
+
# client.save_to_file(local_file)
|
61
|
+
#
|
62
|
+
# # merge pdfs to memory
|
63
|
+
# # pdf = client.save
|
64
|
+
#
|
65
|
+
# print "Finished! Number of pages: #{client.number_of_pages}.\n"
|
66
|
+
#
|
67
|
+
# # get API usage
|
68
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
69
|
+
# usage = usage_client.get_usage(FALSE)
|
70
|
+
# print("Usage: #{usage}\n")
|
71
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
72
|
+
# rescue SelectPdf::ApiException => e
|
73
|
+
# print("An error occurred: #{e}")
|
74
|
+
# end
|
75
|
+
#
|
76
|
+
# Extract text from PDF
|
77
|
+
#
|
78
|
+
# require 'selectpdf'
|
79
|
+
#
|
80
|
+
# $stdout.sync = true
|
81
|
+
#
|
82
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
83
|
+
#
|
84
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
85
|
+
# test_pdf = 'Input.pdf'
|
86
|
+
# local_file = 'Result.txt'
|
87
|
+
# api_key = 'Your API key here'
|
88
|
+
#
|
89
|
+
# begin
|
90
|
+
# client = SelectPdf::PdfToTextClient.new(api_key)
|
91
|
+
#
|
92
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
|
93
|
+
# client.start_page = 1 # start page (processing starts from here)
|
94
|
+
# client.end_page = 0 # end page (set 0 to process file til the end)
|
95
|
+
# client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
|
96
|
+
#
|
97
|
+
# print "Starting pdf to text ...\n"
|
98
|
+
#
|
99
|
+
# # convert local pdf to local text file
|
100
|
+
# client.text_from_file_to_file(test_pdf, local_file)
|
101
|
+
#
|
102
|
+
# # extract text from local pdf to memory
|
103
|
+
# # text = client.text_from_file(test_pdf)
|
104
|
+
# # print text
|
105
|
+
#
|
106
|
+
# # convert pdf from public url to local text file
|
107
|
+
# # client.text_from_url_to_file(test_url, local_file)
|
108
|
+
#
|
109
|
+
# # extract text from pdf from public url to memory
|
110
|
+
# # text = client.text_from_url(test_url)
|
111
|
+
# # print text
|
112
|
+
#
|
113
|
+
# print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
|
114
|
+
#
|
115
|
+
# # get API usage
|
116
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
117
|
+
# usage = usage_client.get_usage(FALSE)
|
118
|
+
# print("Usage: #{usage}\n")
|
119
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
120
|
+
# rescue SelectPdf::ApiException => e
|
121
|
+
# print("An error occurred: #{e}")
|
122
|
+
# end
|
123
|
+
#
|
124
|
+
# Search Pdf
|
125
|
+
#
|
126
|
+
# require 'selectpdf'
|
127
|
+
#
|
128
|
+
# $stdout.sync = true
|
129
|
+
#
|
130
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
131
|
+
#
|
132
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
133
|
+
# test_pdf = 'Input.pdf'
|
134
|
+
# api_key = 'Your API key here'
|
135
|
+
#
|
136
|
+
# begin
|
137
|
+
# client = SelectPdf::PdfToTextClient.new(api_key)
|
138
|
+
#
|
139
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
|
140
|
+
# client.start_page = 1 # start page (processing starts from here)
|
141
|
+
# client.end_page = 0 # end page (set 0 to process file til the end)
|
142
|
+
# client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
|
143
|
+
#
|
144
|
+
# print "Starting search pdf ...\n"
|
145
|
+
#
|
146
|
+
# # search local pdf
|
147
|
+
# results = client.search_file(test_pdf, 'pdf')
|
148
|
+
#
|
149
|
+
# # search pdf from public url
|
150
|
+
# # results = client.search_url(test_url, 'pdf')
|
151
|
+
#
|
152
|
+
# print "Search results: #{results}.\nSearch results count: #{results.length}\n"
|
153
|
+
#
|
154
|
+
# print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
|
155
|
+
#
|
156
|
+
# # get API usage
|
157
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
158
|
+
# usage = usage_client.get_usage(FALSE)
|
159
|
+
# print("Usage: #{usage}\n")
|
160
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
161
|
+
# rescue SelectPdf::ApiException => e
|
162
|
+
# print("An error occurred: #{e}")
|
163
|
+
# end
|
164
|
+
#
|
14
165
|
module SelectPdf
|
166
|
+
# Multipart/form-data boundary
|
15
167
|
MULTIPART_FORM_DATA_BOUNDARY = '------------SelectPdf_Api_Boundry_$'
|
168
|
+
|
169
|
+
# New line
|
16
170
|
NEW_LINE = "\r\n"
|
17
|
-
|
171
|
+
|
172
|
+
# Library version
|
173
|
+
CLIENT_VERSION = '1.4.0'
|
18
174
|
|
19
175
|
attr_reader :code, :message
|
20
176
|
#
|
@@ -470,7 +626,70 @@ module SelectPdf
|
|
470
626
|
#
|
471
627
|
# Code sample:
|
472
628
|
#
|
473
|
-
#
|
629
|
+
# require 'selectpdf'
|
630
|
+
#
|
631
|
+
# $stdout.sync = true
|
632
|
+
#
|
633
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
634
|
+
#
|
635
|
+
# url = 'https://selectpdf.com'
|
636
|
+
# local_file = 'Test.pdf'
|
637
|
+
# api_key = 'Your API key here'
|
638
|
+
#
|
639
|
+
# begin
|
640
|
+
# client = SelectPdf::HtmlToPdfClient.new(api_key)
|
641
|
+
#
|
642
|
+
# # set parameters - see full list at https://selectpdf.com/html-to-pdf-api/
|
643
|
+
#
|
644
|
+
# client.page_size = SelectPdf::PageSize::A4 # PDF page size
|
645
|
+
# client.page_orientation = SelectPdf::PageOrientation::PORTRAIT # PDF page orientation
|
646
|
+
# client.margins = 0 # PDF page margins
|
647
|
+
# client.rendering_engine = SelectPdf::RenderingEngine::WEBKIT # rendering engine
|
648
|
+
# client.conversion_delay = 1 # conversion delay
|
649
|
+
# client.navigation_timeout = 30 # navigation timeout
|
650
|
+
# client.page_numbers = FALSE # page numbers
|
651
|
+
# client.page_breaks_enhanced_algorithm = TRUE # enhanced page break algorithm
|
652
|
+
#
|
653
|
+
# # additional properties
|
654
|
+
#
|
655
|
+
# # client.use_css_print = TRUE # enable CSS media print
|
656
|
+
# # client.disable_javascript = TRUE # disable javascript
|
657
|
+
# # client.disable_internal_links = TRUE # disable internal links
|
658
|
+
# # client.disable_external_links = TRUE # disable external links
|
659
|
+
# # client.keep_images_together = TRUE # keep images together
|
660
|
+
# # client.scale_images = TRUE # scale images to create smaller pdfs
|
661
|
+
# # client.single_page_pdf = TRUE # generate a single page PDF
|
662
|
+
# # client.user_password = 'password' # secure the PDF with a password
|
663
|
+
#
|
664
|
+
# # generate automatic bookmarks
|
665
|
+
#
|
666
|
+
# # client.pdf_bookmarks_selectors = 'H1, H2' # create outlines (bookmarks) for the specified elements
|
667
|
+
# # client.viewer_page_mode = SelectPdf::PageMode::USE_OUTLINES # display outlines (bookmarks) in viewer
|
668
|
+
#
|
669
|
+
# print "Starting conversion ...\n"
|
670
|
+
#
|
671
|
+
# # convert url to file
|
672
|
+
# client.convert_url_to_file(url, local_file)
|
673
|
+
#
|
674
|
+
# # convert url to memory
|
675
|
+
# # pdf = client.convert_url(url)
|
676
|
+
#
|
677
|
+
# # convert html string to file
|
678
|
+
# # client.convert_html_string_to_file('This is some <b>html</b>.', local_file)
|
679
|
+
#
|
680
|
+
# # convert html string to memory
|
681
|
+
# # pdf = client.convert_html_string('This is some <b>html</b>.')
|
682
|
+
#
|
683
|
+
# print "Finished! Number of pages: #{client.number_of_pages}.\n"
|
684
|
+
#
|
685
|
+
# # get API usage
|
686
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
687
|
+
# usage = usage_client.get_usage(FALSE)
|
688
|
+
# print("Usage: #{usage}\n")
|
689
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
690
|
+
# rescue SelectPdf::ApiException => e
|
691
|
+
# print("An error occurred: #{e}")
|
692
|
+
# end
|
474
693
|
class HtmlToPdfClient < ApiClient
|
475
694
|
# Construct the Html To Pdf Client.
|
476
695
|
#
|
@@ -1525,4 +1744,880 @@ module SelectPdf
|
|
1525
1744
|
return nil
|
1526
1745
|
end
|
1527
1746
|
end
|
1747
|
+
|
1748
|
+
# Pdf Merge with SelectPdf Online API.
|
1749
|
+
#
|
1750
|
+
# Code sample:
|
1751
|
+
#
|
1752
|
+
# require 'selectpdf'
|
1753
|
+
#
|
1754
|
+
# $stdout.sync = true
|
1755
|
+
#
|
1756
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
1757
|
+
#
|
1758
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
1759
|
+
# test_pdf = 'Input.pdf'
|
1760
|
+
# local_file = 'Result.pdf'
|
1761
|
+
# api_key = 'Your API key here'
|
1762
|
+
#
|
1763
|
+
# begin
|
1764
|
+
# client = SelectPdf::PdfMergeClient.new(api_key)
|
1765
|
+
#
|
1766
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-merge-api/
|
1767
|
+
#
|
1768
|
+
# # specify the pdf files that will be merged (order will be preserved in the final pdf)
|
1769
|
+
# client.add_file(test_pdf) # add PDF from local file
|
1770
|
+
# client.add_url_file(test_url) # add PDF from public url
|
1771
|
+
# # client.add_file(test_pdf, 'pdf_password') # add PDF (that requires a password) from local file
|
1772
|
+
# # client.add_url_file(test_url, 'pdf_password') # add PDF (that requires a password) from public url
|
1773
|
+
#
|
1774
|
+
# print "Starting pdf merge ...\n"
|
1775
|
+
#
|
1776
|
+
# # merge pdfs to local file
|
1777
|
+
# client.save_to_file(local_file)
|
1778
|
+
#
|
1779
|
+
# # merge pdfs to memory
|
1780
|
+
# # pdf = client.save
|
1781
|
+
#
|
1782
|
+
# print "Finished! Number of pages: #{client.number_of_pages}.\n"
|
1783
|
+
#
|
1784
|
+
# # get API usage
|
1785
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
1786
|
+
# usage = usage_client.get_usage(FALSE)
|
1787
|
+
# print("Usage: #{usage}\n")
|
1788
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
1789
|
+
# rescue SelectPdf::ApiException => e
|
1790
|
+
# print("An error occurred: #{e}")
|
1791
|
+
# end
|
1792
|
+
class PdfMergeClient < ApiClient
|
1793
|
+
# Construct the Pdf Merge Client.
|
1794
|
+
#
|
1795
|
+
# @param api_key API Key.
|
1796
|
+
def initialize(api_key)
|
1797
|
+
super()
|
1798
|
+
@api_endpoint = 'https://selectpdf.com/api2/pdfmerge/'
|
1799
|
+
@parameters['key'] = api_key
|
1800
|
+
|
1801
|
+
@file_idx = 0
|
1802
|
+
end
|
1803
|
+
|
1804
|
+
# Add local PDF document to the list of input files.
|
1805
|
+
#
|
1806
|
+
# @param input_pdf Path to a local PDF file.
|
1807
|
+
# @param user_password User password for the PDF document (optional).
|
1808
|
+
def add_file(input_pdf, user_password = nil)
|
1809
|
+
@file_idx += 1
|
1810
|
+
|
1811
|
+
@files["file_#{@file_idx}"] = input_pdf
|
1812
|
+
@parameters.delete("url_#{@file_idx}")
|
1813
|
+
|
1814
|
+
if user_password.nil? || user_password.empty?
|
1815
|
+
@parameters.delete("password_#{@file_idx}")
|
1816
|
+
else
|
1817
|
+
@parameters["password_#{@file_idx}"] = user_password
|
1818
|
+
end
|
1819
|
+
end
|
1820
|
+
|
1821
|
+
# Add remote PDF document to the list of input files.
|
1822
|
+
#
|
1823
|
+
# @param input_url Url of a remote PDF file.
|
1824
|
+
# @param user_password User password for the PDF document (optional).
|
1825
|
+
def add_url_file(input_url, user_password = nil)
|
1826
|
+
@file_idx += 1
|
1827
|
+
|
1828
|
+
@parameters["url_#{@file_idx}"] = input_url
|
1829
|
+
@files.delete("file_#{@file_idx}")
|
1830
|
+
|
1831
|
+
if user_password.nil? || user_password.empty?
|
1832
|
+
@parameters.delete("password_#{@file_idx}")
|
1833
|
+
else
|
1834
|
+
@parameters["password_#{@file_idx}"] = user_password
|
1835
|
+
end
|
1836
|
+
end
|
1837
|
+
|
1838
|
+
# Merge all specified input pdfs and return the resulted PDF.
|
1839
|
+
#
|
1840
|
+
# @return Byte array containing the resulted PDF.
|
1841
|
+
def save
|
1842
|
+
@parameters['async'] = 'False'
|
1843
|
+
@parameters['files_no'] = @file_idx
|
1844
|
+
|
1845
|
+
result = perform_post_as_multipart_formdata
|
1846
|
+
|
1847
|
+
@file_idx = 0
|
1848
|
+
@files = {}
|
1849
|
+
|
1850
|
+
result
|
1851
|
+
end
|
1852
|
+
|
1853
|
+
# Merge all specified input pdfs and writes the resulted PDF to a specified stream.
|
1854
|
+
#
|
1855
|
+
# @param stream The output stream where the resulted PDF will be written.
|
1856
|
+
def save_to_stream(stream)
|
1857
|
+
result = save
|
1858
|
+
stream.write(result)
|
1859
|
+
end
|
1860
|
+
|
1861
|
+
# Merge all specified input pdfs and writes the resulted PDF to a local file.
|
1862
|
+
#
|
1863
|
+
# @param file_path Local file including path if necessary.
|
1864
|
+
def save_to_file(file_path)
|
1865
|
+
result = save
|
1866
|
+
File.open(file_path, 'wb') do |file|
|
1867
|
+
file.write(result)
|
1868
|
+
end
|
1869
|
+
rescue ApiException
|
1870
|
+
FileUtils.rm(file_path) if File.exist?(file_path)
|
1871
|
+
raise
|
1872
|
+
end
|
1873
|
+
|
1874
|
+
# Merge all specified input pdfs and return the resulted PDF. An asynchronous call is used.
|
1875
|
+
#
|
1876
|
+
# @return Byte array containing the resulted PDF.
|
1877
|
+
def save_async
|
1878
|
+
@parameters['files_no'] = @file_idx
|
1879
|
+
|
1880
|
+
job_id = start_async_job_multipart_form_data
|
1881
|
+
|
1882
|
+
if job_id.nil? || job_id.empty?
|
1883
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
|
1884
|
+
end
|
1885
|
+
|
1886
|
+
no_pings = 0
|
1887
|
+
|
1888
|
+
while no_pings < @async_calls_max_pings
|
1889
|
+
no_pings += 1
|
1890
|
+
|
1891
|
+
# sleep for a few seconds before next ping
|
1892
|
+
sleep(@async_calls_ping_interval)
|
1893
|
+
|
1894
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
1895
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
1896
|
+
|
1897
|
+
result = async_job_client.result
|
1898
|
+
|
1899
|
+
next if result.nil?
|
1900
|
+
|
1901
|
+
@number_of_pages = async_job_client.number_of_pages
|
1902
|
+
@file_idx = 0
|
1903
|
+
@files = {}
|
1904
|
+
|
1905
|
+
return result
|
1906
|
+
end
|
1907
|
+
|
1908
|
+
@file_idx = 0
|
1909
|
+
@files = {}
|
1910
|
+
|
1911
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'), 'Asynchronous call did not finish in expected timeframe.'
|
1912
|
+
end
|
1913
|
+
|
1914
|
+
# Merge all specified input pdfs and writes the resulted PDF to a specified stream. An asynchronous call is used.
|
1915
|
+
#
|
1916
|
+
# @param stream The output stream where the resulted PDF will be written.
|
1917
|
+
def save_to_stream_async(stream)
|
1918
|
+
result = save_async
|
1919
|
+
stream.write(result)
|
1920
|
+
end
|
1921
|
+
|
1922
|
+
# Merge all specified input pdfs and writes the resulted PDF to a local file. An asynchronous call is used.
|
1923
|
+
#
|
1924
|
+
# @param file_path Local file including path if necessary.
|
1925
|
+
def save_to_file_async(file_path)
|
1926
|
+
result = save_async
|
1927
|
+
File.open(file_path, 'wb') do |file|
|
1928
|
+
file.write(result)
|
1929
|
+
end
|
1930
|
+
rescue ApiException
|
1931
|
+
FileUtils.rm(file_path) if File.exist?(file_path)
|
1932
|
+
raise
|
1933
|
+
end
|
1934
|
+
|
1935
|
+
# Set the PDF document title.
|
1936
|
+
#
|
1937
|
+
# @param doc_title Document title.
|
1938
|
+
def doc_title=(doc_title)
|
1939
|
+
@parameters['doc_title'] = doc_title
|
1940
|
+
end
|
1941
|
+
|
1942
|
+
# Set the subject of the PDF document.
|
1943
|
+
#
|
1944
|
+
# @param doc_subject Document subject.
|
1945
|
+
def doc_subject=(doc_subject)
|
1946
|
+
@parameters['doc_subject'] = doc_subject
|
1947
|
+
end
|
1948
|
+
|
1949
|
+
# Set the PDF document keywords.
|
1950
|
+
#
|
1951
|
+
# @param doc_keywords Document keywords.
|
1952
|
+
def doc_keywords=(doc_keywords)
|
1953
|
+
@parameters['doc_keywords'] = doc_keywords
|
1954
|
+
end
|
1955
|
+
|
1956
|
+
# Set the name of the PDF document author.
|
1957
|
+
#
|
1958
|
+
# @param doc_author Document author.
|
1959
|
+
def doc_author=(doc_author)
|
1960
|
+
@parameters['doc_author'] = doc_author
|
1961
|
+
end
|
1962
|
+
|
1963
|
+
# Add the date and time when the PDF document was created to the PDF document information. The default value is False.
|
1964
|
+
#
|
1965
|
+
# @param doc_add_creation_date Add creation date to the document metadata or not.
|
1966
|
+
def doc_add_creation_date=(doc_add_creation_date)
|
1967
|
+
@parameters['doc_add_creation_date'] = doc_add_creation_date
|
1968
|
+
end
|
1969
|
+
|
1970
|
+
# Set the page layout to be used when the document is opened in a PDF viewer. The default value is SelectPdf::PageLayout::ONE_COLUMN.
|
1971
|
+
#
|
1972
|
+
# @param viewer_page_layout Page layout. Possible values: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).
|
1973
|
+
# Use constants from SelectPdf::PageLayout class.
|
1974
|
+
def viewer_page_layout=(viewer_page_layout)
|
1975
|
+
unless [0, 1, 2, 3].include?(viewer_page_layout)
|
1976
|
+
raise ApiException.new('Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).'), 'Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).'
|
1977
|
+
end
|
1978
|
+
|
1979
|
+
@parameters['viewer_page_layout'] = viewer_page_layout
|
1980
|
+
end
|
1981
|
+
|
1982
|
+
# Set the document page mode when the pdf document is opened in a PDF viewer. The default value is SelectPdf::PageMode::USE_NONE.
|
1983
|
+
#
|
1984
|
+
# @param viewer_page_mode Page mode. Possible values: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).
|
1985
|
+
# Use constants from SelectPdf::PageMode class.
|
1986
|
+
def viewer_page_mode=(viewer_page_mode)
|
1987
|
+
unless [0, 1, 2, 3, 4, 5].include?(viewer_page_mode)
|
1988
|
+
raise ApiException.new('Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).'),
|
1989
|
+
'Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).'
|
1990
|
+
end
|
1991
|
+
|
1992
|
+
@parameters['viewer_page_mode'] = viewer_page_mode
|
1993
|
+
end
|
1994
|
+
|
1995
|
+
# Set a flag specifying whether to position the document's window in the center of the screen. The default value is False.
|
1996
|
+
#
|
1997
|
+
# @param viewer_center_window Center window or not.
|
1998
|
+
def viewer_center_window=(viewer_center_window)
|
1999
|
+
@parameters['viewer_center_window'] = viewer_center_window
|
2000
|
+
end
|
2001
|
+
|
2002
|
+
# Set a flag specifying whether the window's title bar should display the document title taken from document information. The default value is False.
|
2003
|
+
#
|
2004
|
+
# @param viewer_display_doc_title Display title or not.
|
2005
|
+
def viewer_display_doc_title=(viewer_display_doc_title)
|
2006
|
+
@parameters['viewer_display_doc_title'] = viewer_display_doc_title
|
2007
|
+
end
|
2008
|
+
|
2009
|
+
# Set a flag specifying whether to resize the document's window to fit the size of the first displayed page. The default value is False.
|
2010
|
+
#
|
2011
|
+
# @param viewer_fit_window Fit window or not.
|
2012
|
+
def viewer_fit_window=(viewer_fit_window)
|
2013
|
+
@parameters['viewer_fit_window'] = viewer_fit_window
|
2014
|
+
end
|
2015
|
+
|
2016
|
+
# Set a flag specifying whether to hide the pdf viewer application's menu bar when the document is active. The default value is False.
|
2017
|
+
#
|
2018
|
+
# @param viewer_hide_menu_bar Hide menu bar or not.
|
2019
|
+
def viewer_hide_menu_bar=(viewer_hide_menu_bar)
|
2020
|
+
@parameters['viewer_hide_menu_bar'] = viewer_hide_menu_bar
|
2021
|
+
end
|
2022
|
+
|
2023
|
+
# Set a flag specifying whether to hide the pdf viewer application's tool bars when the document is active. The default value is False.
|
2024
|
+
#
|
2025
|
+
# @param viewer_hide_toolbar Hide tool bars or not.
|
2026
|
+
def viewer_hide_toolbar=(viewer_hide_toolbar)
|
2027
|
+
@parameters['viewer_hide_toolbar'] = viewer_hide_toolbar
|
2028
|
+
end
|
2029
|
+
|
2030
|
+
# Set a flag specifying whether to hide user interface elements in the document's window (such as scroll bars and navigation controls), leaving only the document's contents displayed.
|
2031
|
+
#
|
2032
|
+
# @param viewer_hide_window_ui Hide window UI or not.
|
2033
|
+
def viewer_hide_window_ui=(viewer_hide_window_ui)
|
2034
|
+
@parameters['viewer_hide_window_ui'] = viewer_hide_window_ui
|
2035
|
+
end
|
2036
|
+
|
2037
|
+
# Set PDF user password.
|
2038
|
+
#
|
2039
|
+
# @param user_password PDF user password.
|
2040
|
+
def user_password=(user_password)
|
2041
|
+
@parameters['user_password'] = user_password
|
2042
|
+
end
|
2043
|
+
|
2044
|
+
# Set PDF owner password.
|
2045
|
+
#
|
2046
|
+
# @param owner_password PDF owner password.
|
2047
|
+
def owner_password=(owner_password)
|
2048
|
+
@parameters['owner_password'] = owner_password
|
2049
|
+
end
|
2050
|
+
|
2051
|
+
# Set the maximum amount of time (in seconds) for this job.
|
2052
|
+
# The default value is 30 seconds. Use a larger value (up to 120 seconds allowed) for large documents.
|
2053
|
+
#
|
2054
|
+
# @param timeout Timeout in seconds.
|
2055
|
+
def timeout=(timeout)
|
2056
|
+
@parameters['timeout'] = timeout
|
2057
|
+
end
|
2058
|
+
|
2059
|
+
# Set a custom parameter. Do not use this method unless advised by SelectPdf.
|
2060
|
+
#
|
2061
|
+
# @param parameter_name Parameter name.
|
2062
|
+
# @param parameter_value Parameter value.
|
2063
|
+
def set_custom_parameter(parameter_name, parameter_value)
|
2064
|
+
@parameters[parameter_name] = parameter_value
|
2065
|
+
end
|
2066
|
+
end
|
2067
|
+
|
2068
|
+
# Pdf To Text Conversion with SelectPdf Online API.
|
2069
|
+
#
|
2070
|
+
# Code Sample for PDF To Text
|
2071
|
+
#
|
2072
|
+
# require 'selectpdf'
|
2073
|
+
#
|
2074
|
+
# $stdout.sync = true
|
2075
|
+
#
|
2076
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
2077
|
+
#
|
2078
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
2079
|
+
# test_pdf = 'Input.pdf'
|
2080
|
+
# local_file = 'Result.txt'
|
2081
|
+
# api_key = 'Your API key here'
|
2082
|
+
#
|
2083
|
+
# begin
|
2084
|
+
# client = SelectPdf::PdfToTextClient.new(api_key)
|
2085
|
+
#
|
2086
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
|
2087
|
+
# client.start_page = 1 # start page (processing starts from here)
|
2088
|
+
# client.end_page = 0 # end page (set 0 to process file til the end)
|
2089
|
+
# client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
|
2090
|
+
#
|
2091
|
+
# print "Starting pdf to text ...\n"
|
2092
|
+
#
|
2093
|
+
# # convert local pdf to local text file
|
2094
|
+
# client.text_from_file_to_file(test_pdf, local_file)
|
2095
|
+
#
|
2096
|
+
# # extract text from local pdf to memory
|
2097
|
+
# # text = client.text_from_file(test_pdf)
|
2098
|
+
# # print text
|
2099
|
+
#
|
2100
|
+
# # convert pdf from public url to local text file
|
2101
|
+
# # client.text_from_url_to_file(test_url, local_file)
|
2102
|
+
#
|
2103
|
+
# # extract text from pdf from public url to memory
|
2104
|
+
# # text = client.text_from_url(test_url)
|
2105
|
+
# # print text
|
2106
|
+
#
|
2107
|
+
# print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
|
2108
|
+
#
|
2109
|
+
# # get API usage
|
2110
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
2111
|
+
# usage = usage_client.get_usage(FALSE)
|
2112
|
+
# print("Usage: #{usage}\n")
|
2113
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
2114
|
+
# rescue SelectPdf::ApiException => e
|
2115
|
+
# print("An error occurred: #{e}")
|
2116
|
+
# end
|
2117
|
+
#
|
2118
|
+
# Code Sample for Search Pdf
|
2119
|
+
#
|
2120
|
+
# require 'selectpdf'
|
2121
|
+
#
|
2122
|
+
# $stdout.sync = true
|
2123
|
+
#
|
2124
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
2125
|
+
#
|
2126
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
2127
|
+
# test_pdf = 'Input.pdf'
|
2128
|
+
# api_key = 'Your API key here'
|
2129
|
+
#
|
2130
|
+
# begin
|
2131
|
+
# client = SelectPdf::PdfToTextClient.new(api_key)
|
2132
|
+
#
|
2133
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
|
2134
|
+
# client.start_page = 1 # start page (processing starts from here)
|
2135
|
+
# client.end_page = 0 # end page (set 0 to process file til the end)
|
2136
|
+
# client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
|
2137
|
+
#
|
2138
|
+
# print "Starting search pdf ...\n"
|
2139
|
+
#
|
2140
|
+
# # search local pdf
|
2141
|
+
# results = client.search_file(test_pdf, 'pdf')
|
2142
|
+
#
|
2143
|
+
# # search pdf from public url
|
2144
|
+
# # results = client.search_url(test_url, 'pdf')
|
2145
|
+
#
|
2146
|
+
# print "Search results: #{results}.\nSearch results count: #{results.length}\n"
|
2147
|
+
#
|
2148
|
+
# print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
|
2149
|
+
#
|
2150
|
+
# # get API usage
|
2151
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
2152
|
+
# usage = usage_client.get_usage(FALSE)
|
2153
|
+
# print("Usage: #{usage}\n")
|
2154
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
2155
|
+
# rescue SelectPdf::ApiException => e
|
2156
|
+
# print("An error occurred: #{e}")
|
2157
|
+
# end
|
2158
|
+
#
|
2159
|
+
class PdfToTextClient < ApiClient
|
2160
|
+
# Construct the Pdf To Text Client.
|
2161
|
+
#
|
2162
|
+
# @param api_key API Key.
|
2163
|
+
def initialize(api_key)
|
2164
|
+
super()
|
2165
|
+
@api_endpoint = 'https://selectpdf.com/api2/pdftotext/'
|
2166
|
+
@parameters['key'] = api_key
|
2167
|
+
|
2168
|
+
@file_idx = 0
|
2169
|
+
end
|
2170
|
+
|
2171
|
+
# Get the text from the specified pdf.
|
2172
|
+
#
|
2173
|
+
# @param input_pdf Path to a local PDF file.
|
2174
|
+
# @return Extracted text.
|
2175
|
+
def text_from_file(input_pdf)
|
2176
|
+
@parameters['async'] = 'False'
|
2177
|
+
@parameters['action'] = 'Convert'
|
2178
|
+
@parameters.delete('url')
|
2179
|
+
|
2180
|
+
@files = {}
|
2181
|
+
@files['inputPdf'] = input_pdf
|
2182
|
+
|
2183
|
+
perform_post_as_multipart_formdata
|
2184
|
+
end
|
2185
|
+
|
2186
|
+
# Get the text from the specified pdf and write it to the specified text file.
|
2187
|
+
#
|
2188
|
+
# @param input_pdf Path to a local PDF file.
|
2189
|
+
# @param output_file_path The output file where the resulted text will be written.
|
2190
|
+
def text_from_file_to_file(input_pdf, output_file_path)
|
2191
|
+
result = text_from_file(input_pdf)
|
2192
|
+
File.open(output_file_path, 'wb') do |file|
|
2193
|
+
file.write(result)
|
2194
|
+
end
|
2195
|
+
rescue ApiException
|
2196
|
+
FileUtils.rm(output_file_path) if File.exist?(output_file_path)
|
2197
|
+
raise
|
2198
|
+
end
|
2199
|
+
|
2200
|
+
# Get the text from the specified pdf and write it to the specified stream.
|
2201
|
+
#
|
2202
|
+
# @param input_pdf Path to a local PDF file.
|
2203
|
+
# @param stream The output stream where the resulted PDF will be written.
|
2204
|
+
def text_from_file_to_stream(input_pdf, stream)
|
2205
|
+
result = text_from_file(input_pdf)
|
2206
|
+
stream.write(result)
|
2207
|
+
end
|
2208
|
+
|
2209
|
+
# Get the text from the specified pdf with an asynchronous call.
|
2210
|
+
#
|
2211
|
+
# @param input_pdf Path to a local PDF file.
|
2212
|
+
# @return Extracted text.
|
2213
|
+
def text_from_file_async(input_pdf)
|
2214
|
+
@parameters['action'] = 'Convert'
|
2215
|
+
@parameters.delete('url')
|
2216
|
+
|
2217
|
+
@files = {}
|
2218
|
+
@files['inputPdf'] = input_pdf
|
2219
|
+
|
2220
|
+
job_id = start_async_job_multipart_form_data
|
2221
|
+
|
2222
|
+
if job_id.nil? || job_id.empty?
|
2223
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
|
2224
|
+
end
|
2225
|
+
|
2226
|
+
no_pings = 0
|
2227
|
+
|
2228
|
+
while no_pings < @async_calls_max_pings
|
2229
|
+
no_pings += 1
|
2230
|
+
|
2231
|
+
# sleep for a few seconds before next ping
|
2232
|
+
sleep(@async_calls_ping_interval)
|
2233
|
+
|
2234
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
2235
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
2236
|
+
|
2237
|
+
result = async_job_client.result
|
2238
|
+
|
2239
|
+
next if result.nil?
|
2240
|
+
|
2241
|
+
@number_of_pages = async_job_client.number_of_pages
|
2242
|
+
|
2243
|
+
return result
|
2244
|
+
end
|
2245
|
+
|
2246
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'), 'Asynchronous call did not finish in expected timeframe.'
|
2247
|
+
end
|
2248
|
+
|
2249
|
+
# Get the text from the specified pdf with an asynchronous call and write it to the specified text file.
|
2250
|
+
#
|
2251
|
+
# @param input_pdf Path to a local PDF file.
|
2252
|
+
# @param output_file_path The output file where the resulted text will be written.
|
2253
|
+
def text_from_file_to_file_async(input_pdf, output_file_path)
|
2254
|
+
result = text_from_file_async(input_pdf)
|
2255
|
+
File.open(output_file_path, 'wb') do |file|
|
2256
|
+
file.write(result)
|
2257
|
+
end
|
2258
|
+
rescue ApiException
|
2259
|
+
FileUtils.rm(output_file_path) if File.exist?(output_file_path)
|
2260
|
+
raise
|
2261
|
+
end
|
2262
|
+
|
2263
|
+
# Get the text from the specified pdf with an asynchronous call and write it to the specified stream.
|
2264
|
+
#
|
2265
|
+
# @param input_pdf Path to a local PDF file.
|
2266
|
+
# @param stream The output stream where the resulted PDF will be written.
|
2267
|
+
def text_from_file_to_stream_async(input_pdf, stream)
|
2268
|
+
result = text_from_file_async(input_pdf)
|
2269
|
+
stream.write(result)
|
2270
|
+
end
|
2271
|
+
|
2272
|
+
# Get the text from the specified pdf.
|
2273
|
+
#
|
2274
|
+
# @param url Address of the PDF file.
|
2275
|
+
# @return Extracted text.
|
2276
|
+
def text_from_url(url)
|
2277
|
+
if !url.downcase.start_with?('http://') && !url.downcase.start_with?('https://')
|
2278
|
+
raise ApiException.new('The supported protocols for the PDFs available online are http:// and https://.'),
|
2279
|
+
'The supported protocols for the PDFs available online are http:// and https://.'
|
2280
|
+
end
|
2281
|
+
|
2282
|
+
if url.downcase.start_with?('http://localhost')
|
2283
|
+
raise ApiException.new('Cannot convert local urls via this method. Use getTextFromFile instead.'),
|
2284
|
+
'Cannot convert local urls via this method. Use text_from_file instead.'
|
2285
|
+
end
|
2286
|
+
|
2287
|
+
@parameters['async'] = 'False'
|
2288
|
+
@parameters['action'] = 'Convert'
|
2289
|
+
|
2290
|
+
@files = {}
|
2291
|
+
@parameters['url'] = url
|
2292
|
+
|
2293
|
+
perform_post_as_multipart_formdata
|
2294
|
+
end
|
2295
|
+
|
2296
|
+
# Get the text from the specified pdf and write it to the specified text file.
|
2297
|
+
#
|
2298
|
+
# @param url Address of the PDF file.
|
2299
|
+
# @param output_file_path The output file where the resulted text will be written.
|
2300
|
+
def text_from_url_to_file(url, output_file_path)
|
2301
|
+
result = text_from_url(url)
|
2302
|
+
File.open(output_file_path, 'wb') do |file|
|
2303
|
+
file.write(result)
|
2304
|
+
end
|
2305
|
+
rescue ApiException
|
2306
|
+
FileUtils.rm(output_file_path) if File.exist?(output_file_path)
|
2307
|
+
raise
|
2308
|
+
end
|
2309
|
+
|
2310
|
+
# Get the text from the specified pdf and write it to the specified stream.
|
2311
|
+
#
|
2312
|
+
# @param url Address of the PDF file.
|
2313
|
+
# @param stream The output stream where the resulted PDF will be written.
|
2314
|
+
def text_from_url_to_stream(url, stream)
|
2315
|
+
result = text_from_url(url)
|
2316
|
+
stream.write(result)
|
2317
|
+
end
|
2318
|
+
|
2319
|
+
# Get the text from the specified pdf with an asynchronous call.
|
2320
|
+
#
|
2321
|
+
# @param url Address of the PDF file.
|
2322
|
+
# @return Extracted text.
|
2323
|
+
def text_from_url_async(url)
|
2324
|
+
if !url.downcase.start_with?('http://') && !url.downcase.start_with?('https://')
|
2325
|
+
raise ApiException.new('The supported protocols for the PDFs available online are http:// and https://.'),
|
2326
|
+
'The supported protocols for the PDFs available online are http:// and https://.'
|
2327
|
+
end
|
2328
|
+
|
2329
|
+
if url.downcase.start_with?('http://localhost')
|
2330
|
+
raise ApiException.new('Cannot convert local urls via this method. Use getTextFromFile instead.'),
|
2331
|
+
'Cannot convert local urls via this method. Use text_from_file_async instead.'
|
2332
|
+
end
|
2333
|
+
|
2334
|
+
@parameters['action'] = 'Convert'
|
2335
|
+
|
2336
|
+
@files = {}
|
2337
|
+
@parameters['url'] = url
|
2338
|
+
|
2339
|
+
job_id = start_async_job_multipart_form_data
|
2340
|
+
|
2341
|
+
if job_id.nil? || job_id.empty?
|
2342
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
|
2343
|
+
end
|
2344
|
+
|
2345
|
+
no_pings = 0
|
2346
|
+
|
2347
|
+
while no_pings < @async_calls_max_pings
|
2348
|
+
no_pings += 1
|
2349
|
+
|
2350
|
+
# sleep for a few seconds before next ping
|
2351
|
+
sleep(@async_calls_ping_interval)
|
2352
|
+
|
2353
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
2354
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
2355
|
+
|
2356
|
+
result = async_job_client.result
|
2357
|
+
|
2358
|
+
next if result.nil?
|
2359
|
+
|
2360
|
+
@number_of_pages = async_job_client.number_of_pages
|
2361
|
+
|
2362
|
+
return result
|
2363
|
+
end
|
2364
|
+
|
2365
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
|
2366
|
+
'Asynchronous call did not finish in expected timeframe.'
|
2367
|
+
end
|
2368
|
+
|
2369
|
+
# Get the text from the specified pdf with an asynchronous call and write it to the specified text file.
|
2370
|
+
#
|
2371
|
+
# @param url Address of the PDF file.
|
2372
|
+
# @param output_file_path The output file where the resulted text will be written.
|
2373
|
+
def text_from_url_to_file_async(url, output_file_path)
|
2374
|
+
result = text_from_url_async(url)
|
2375
|
+
File.open(output_file_path, 'wb') do |file|
|
2376
|
+
file.write(result)
|
2377
|
+
end
|
2378
|
+
rescue ApiException
|
2379
|
+
FileUtils.rm(output_file_path) if File.exist?(output_file_path)
|
2380
|
+
raise
|
2381
|
+
end
|
2382
|
+
|
2383
|
+
# Get the text from the specified pdf with an asynchronous call and write it to the specified stream.
|
2384
|
+
#
|
2385
|
+
# @param url Address of the PDF file.
|
2386
|
+
# @param stream The output stream where the resulted PDF will be written.
|
2387
|
+
def text_from_url_to_stream_async(url, stream)
|
2388
|
+
result = text_from_url_async(url)
|
2389
|
+
stream.write(result)
|
2390
|
+
end
|
2391
|
+
|
2392
|
+
# Search for a specific text in a PDF document.
|
2393
|
+
# Pages that participate to this operation are specified by start_page and end_page methods.
|
2394
|
+
#
|
2395
|
+
# @param input_pdf Path to a local PDF file.
|
2396
|
+
# @param text_to_search Text to search.
|
2397
|
+
# @param case_sensitive If the search is case sensitive or not.
|
2398
|
+
# @param whole_words_only If the search works on whole words or not.
|
2399
|
+
# @return List with text positions in the current PDF document.
|
2400
|
+
def search_file(input_pdf, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
|
2401
|
+
if text_to_search.nil? || text_to_search.empty?
|
2402
|
+
raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
|
2403
|
+
end
|
2404
|
+
|
2405
|
+
@parameters['async'] = 'False'
|
2406
|
+
@parameters['action'] = 'Search'
|
2407
|
+
@parameters.delete('url')
|
2408
|
+
@parameters['search_text'] = text_to_search
|
2409
|
+
@parameters['case_sensitive'] = case_sensitive
|
2410
|
+
@parameters['whole_words_only'] = whole_words_only
|
2411
|
+
|
2412
|
+
@files = {}
|
2413
|
+
@files['inputPdf'] = input_pdf
|
2414
|
+
|
2415
|
+
@headers['Accept'] = 'text/json'
|
2416
|
+
|
2417
|
+
result = perform_post_as_multipart_formdata
|
2418
|
+
return [] if result.nil? || result.empty?
|
2419
|
+
|
2420
|
+
JSON.parse(result)
|
2421
|
+
end
|
2422
|
+
|
2423
|
+
# Search for a specific text in a PDF document with an asynchronous call.
|
2424
|
+
# Pages that participate to this operation are specified by start_page and end_page methods.
|
2425
|
+
#
|
2426
|
+
# @param input_pdf Path to a local PDF file.
|
2427
|
+
# @param text_to_search Text to search.
|
2428
|
+
# @param case_sensitive If the search is case sensitive or not.
|
2429
|
+
# @param whole_words_only If the search works on whole words or not.
|
2430
|
+
# @return List with text positions in the current PDF document.
|
2431
|
+
def search_file_async(input_pdf, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
|
2432
|
+
if text_to_search.nil? || text_to_search.empty?
|
2433
|
+
raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
|
2434
|
+
end
|
2435
|
+
|
2436
|
+
@parameters['action'] = 'Search'
|
2437
|
+
@parameters.delete('url')
|
2438
|
+
@parameters['search_text'] = text_to_search
|
2439
|
+
@parameters['case_sensitive'] = case_sensitive
|
2440
|
+
@parameters['whole_words_only'] = whole_words_only
|
2441
|
+
|
2442
|
+
@files = {}
|
2443
|
+
@files['inputPdf'] = input_pdf
|
2444
|
+
|
2445
|
+
@headers['Accept'] = 'text/json'
|
2446
|
+
|
2447
|
+
job_id = start_async_job_multipart_form_data
|
2448
|
+
|
2449
|
+
if job_id.nil? || job_id.empty?
|
2450
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'),
|
2451
|
+
'An error occurred launching the asynchronous call.'
|
2452
|
+
end
|
2453
|
+
|
2454
|
+
no_pings = 0
|
2455
|
+
|
2456
|
+
while no_pings < @async_calls_max_pings
|
2457
|
+
no_pings += 1
|
2458
|
+
|
2459
|
+
# sleep for a few seconds before next ping
|
2460
|
+
sleep(@async_calls_ping_interval)
|
2461
|
+
|
2462
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
2463
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
2464
|
+
|
2465
|
+
result = async_job_client.result
|
2466
|
+
|
2467
|
+
next if result.nil?
|
2468
|
+
|
2469
|
+
@number_of_pages = async_job_client.number_of_pages
|
2470
|
+
return [] if result.empty?
|
2471
|
+
|
2472
|
+
return JSON.parse(result)
|
2473
|
+
end
|
2474
|
+
|
2475
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
|
2476
|
+
'Asynchronous call did not finish in expected timeframe.'
|
2477
|
+
end
|
2478
|
+
|
2479
|
+
# Search for a specific text in a PDF document.
|
2480
|
+
# Pages that participate to this operation are specified by start_page and end_page methods.
|
2481
|
+
#
|
2482
|
+
# @param url Address of the PDF file.
|
2483
|
+
# @param text_to_search Text to search.
|
2484
|
+
# @param case_sensitive If the search is case sensitive or not.
|
2485
|
+
# @param whole_words_only If the search works on whole words or not.
|
2486
|
+
# @return List with text positions in the current PDF document.
|
2487
|
+
def search_url(url, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
|
2488
|
+
if text_to_search.nil? || text_to_search.empty?
|
2489
|
+
raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
|
2490
|
+
end
|
2491
|
+
|
2492
|
+
@parameters['async'] = 'False'
|
2493
|
+
@parameters['action'] = 'Search'
|
2494
|
+
@parameters['search_text'] = text_to_search
|
2495
|
+
@parameters['case_sensitive'] = case_sensitive
|
2496
|
+
@parameters['whole_words_only'] = whole_words_only
|
2497
|
+
|
2498
|
+
@files = {}
|
2499
|
+
@parameters['url'] = url
|
2500
|
+
|
2501
|
+
@headers['Accept'] = 'text/json'
|
2502
|
+
|
2503
|
+
result = perform_post_as_multipart_formdata
|
2504
|
+
return [] if result.nil? || result.empty?
|
2505
|
+
|
2506
|
+
JSON.parse(result)
|
2507
|
+
end
|
2508
|
+
|
2509
|
+
# Search for a specific text in a PDF document with an asynchronous call.
|
2510
|
+
# Pages that participate to this operation are specified by start_page and end_page methods.
|
2511
|
+
#
|
2512
|
+
# @param url Address of the PDF file.
|
2513
|
+
# @param text_to_search Text to search.
|
2514
|
+
# @param case_sensitive If the search is case sensitive or not.
|
2515
|
+
# @param whole_words_only If the search works on whole words or not.
|
2516
|
+
# @return List with text positions in the current PDF document.
|
2517
|
+
def search_url_async(url, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
|
2518
|
+
if text_to_search.nil? || text_to_search.empty?
|
2519
|
+
raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
|
2520
|
+
end
|
2521
|
+
|
2522
|
+
@parameters['action'] = 'Search'
|
2523
|
+
@parameters['search_text'] = text_to_search
|
2524
|
+
@parameters['case_sensitive'] = case_sensitive
|
2525
|
+
@parameters['whole_words_only'] = whole_words_only
|
2526
|
+
|
2527
|
+
@files = {}
|
2528
|
+
@parameters['url'] = url
|
2529
|
+
|
2530
|
+
@headers['Accept'] = 'text/json'
|
2531
|
+
|
2532
|
+
job_id = start_async_job_multipart_form_data
|
2533
|
+
|
2534
|
+
if job_id.nil? || job_id.empty?
|
2535
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'),
|
2536
|
+
'An error occurred launching the asynchronous call.'
|
2537
|
+
end
|
2538
|
+
|
2539
|
+
no_pings = 0
|
2540
|
+
|
2541
|
+
while no_pings < @async_calls_max_pings
|
2542
|
+
no_pings += 1
|
2543
|
+
|
2544
|
+
# sleep for a few seconds before next ping
|
2545
|
+
sleep(@async_calls_ping_interval)
|
2546
|
+
|
2547
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
2548
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
2549
|
+
|
2550
|
+
result = async_job_client.result
|
2551
|
+
|
2552
|
+
next if result.nil?
|
2553
|
+
|
2554
|
+
@number_of_pages = async_job_client.number_of_pages
|
2555
|
+
return [] if result.empty?
|
2556
|
+
|
2557
|
+
return JSON.parse(result)
|
2558
|
+
end
|
2559
|
+
|
2560
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
|
2561
|
+
'Asynchronous call did not finish in expected timeframe.'
|
2562
|
+
end
|
2563
|
+
|
2564
|
+
# Set Start Page number. Default value is 1 (first page of the document).
|
2565
|
+
#
|
2566
|
+
# @param start_page Start page number (1-based).
|
2567
|
+
def start_page=(start_page)
|
2568
|
+
@parameters['start_page'] = start_page
|
2569
|
+
end
|
2570
|
+
|
2571
|
+
# Set End Page number. Default value is 0 (process till the last page of the document).
|
2572
|
+
#
|
2573
|
+
# @param end_page End page number (1-based).
|
2574
|
+
def end_page=(end_page)
|
2575
|
+
@parameters['end_page'] = end_page
|
2576
|
+
end
|
2577
|
+
|
2578
|
+
# Set PDF user password.
|
2579
|
+
#
|
2580
|
+
# @param user_password PDF user password.
|
2581
|
+
def user_password=(user_password)
|
2582
|
+
@parameters['user_password'] = user_password
|
2583
|
+
end
|
2584
|
+
|
2585
|
+
# Set the text layout. The default value is SelectPdf::TextLayout::ORIGINAL.
|
2586
|
+
#
|
2587
|
+
# @param text_layout The text layout. Possible values: Original, Reading. Use constants from SelectPdf::TextLayout class.
|
2588
|
+
def text_layout=(text_layout)
|
2589
|
+
unless [0, 1].include?(text_layout)
|
2590
|
+
raise ApiException.new('Allowed values for Text Layout: 0 (Original), 1 (Reading).'), 'Allowed values for Text Layout: 0 (Original), 1 (Reading).'
|
2591
|
+
end
|
2592
|
+
|
2593
|
+
@parameters['text_layout'] = text_layout
|
2594
|
+
end
|
2595
|
+
|
2596
|
+
# Set the output format. The default value is SelectPdf::OutputFormat::TEXT.
|
2597
|
+
#
|
2598
|
+
# @param output_format The output format. Possible values: Text, Html. Use constants from SelectPdf::OutputFormat class.
|
2599
|
+
def output_format=(output_format)
|
2600
|
+
unless [0, 1].include?(output_format)
|
2601
|
+
raise ApiException.new('Allowed values for Output Format: 0 (Text), 1 (Html).'), 'Allowed values for Output Format: 0 (Text), 1 (Html).'
|
2602
|
+
end
|
2603
|
+
|
2604
|
+
@parameters['output_format'] = output_format
|
2605
|
+
end
|
2606
|
+
|
2607
|
+
# Set the maximum amount of time (in seconds) for this job.
|
2608
|
+
# The default value is 30 seconds. Use a larger value (up to 120 seconds allowed) for large documents.
|
2609
|
+
#
|
2610
|
+
# @param timeout Timeout in seconds.
|
2611
|
+
def timeout=(timeout)
|
2612
|
+
@parameters['timeout'] = timeout
|
2613
|
+
end
|
2614
|
+
|
2615
|
+
# Set a custom parameter. Do not use this method unless advised by SelectPdf.
|
2616
|
+
#
|
2617
|
+
# @param parameter_name Parameter name.
|
2618
|
+
# @param parameter_value Parameter value.
|
2619
|
+
def set_custom_parameter(parameter_name, parameter_value)
|
2620
|
+
@parameters[parameter_name] = parameter_value
|
2621
|
+
end
|
2622
|
+
end
|
1528
2623
|
end
|