selectpdf 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/README.md +79 -2
- data/lib/selectpdf.rb +1099 -4
- data/samples/pdf_merge.rb +40 -0
- data/samples/pdf_to_text.rb +45 -0
- data/samples/search_pdf.rb +38 -0
- metadata +8 -3
data/lib/selectpdf.rb
CHANGED
@@ -5,16 +5,172 @@ require 'json'
|
|
5
5
|
require 'fileutils'
|
6
6
|
|
7
7
|
#
|
8
|
-
# SelectPdf Online REST API Ruby client library. Contains
|
8
|
+
# SelectPdf Online REST API Ruby client library. Contains HTML to PDF converter, PDF merge, PDF to text extractor, search PDF.
|
9
9
|
#
|
10
10
|
#
|
11
11
|
# Convert HTML to PDF
|
12
12
|
#
|
13
|
-
#
|
13
|
+
# require 'selectpdf'
|
14
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
15
|
+
#
|
16
|
+
# url = 'https://selectpdf.com'
|
17
|
+
# local_file = 'Test.pdf'
|
18
|
+
# api_key = 'Your API key here'
|
19
|
+
#
|
20
|
+
# begin
|
21
|
+
# api = SelectPdf::HtmlToPdfClient.new(api_key)
|
22
|
+
#
|
23
|
+
# api.page_size = SelectPdf::PageSize::A4
|
24
|
+
# api.margins = 0
|
25
|
+
# api.page_numbers = FALSE
|
26
|
+
# api.page_breaks_enhanced_algorithm = TRUE
|
27
|
+
#
|
28
|
+
# api.convert_url_to_file(url, local_file)
|
29
|
+
# rescue SelectPdf::ApiException => e
|
30
|
+
# print("An error occurred: #{e}")
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# Merge PDFs from local disk or public url and save result into a file on disk.
|
34
|
+
#
|
35
|
+
# require 'selectpdf'
|
36
|
+
#
|
37
|
+
# $stdout.sync = true
|
38
|
+
#
|
39
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
40
|
+
#
|
41
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
42
|
+
# test_pdf = 'Input.pdf'
|
43
|
+
# local_file = 'Result.pdf'
|
44
|
+
# api_key = 'Your API key here'
|
45
|
+
#
|
46
|
+
# begin
|
47
|
+
# client = SelectPdf::PdfMergeClient.new(api_key)
|
48
|
+
#
|
49
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-merge-api/
|
50
|
+
#
|
51
|
+
# # specify the pdf files that will be merged (order will be preserved in the final pdf)
|
52
|
+
# client.add_file(test_pdf) # add PDF from local file
|
53
|
+
# client.add_url_file(test_url) # add PDF from public url
|
54
|
+
# # client.add_file(test_pdf, 'pdf_password') # add PDF (that requires a password) from local file
|
55
|
+
# # client.add_url_file(test_url, 'pdf_password') # add PDF (that requires a password) from public url
|
56
|
+
#
|
57
|
+
# print "Starting pdf merge ...\n"
|
58
|
+
#
|
59
|
+
# # merge pdfs to local file
|
60
|
+
# client.save_to_file(local_file)
|
61
|
+
#
|
62
|
+
# # merge pdfs to memory
|
63
|
+
# # pdf = client.save
|
64
|
+
#
|
65
|
+
# print "Finished! Number of pages: #{client.number_of_pages}.\n"
|
66
|
+
#
|
67
|
+
# # get API usage
|
68
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
69
|
+
# usage = usage_client.get_usage(FALSE)
|
70
|
+
# print("Usage: #{usage}\n")
|
71
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
72
|
+
# rescue SelectPdf::ApiException => e
|
73
|
+
# print("An error occurred: #{e}")
|
74
|
+
# end
|
75
|
+
#
|
76
|
+
# Extract text from PDF
|
77
|
+
#
|
78
|
+
# require 'selectpdf'
|
79
|
+
#
|
80
|
+
# $stdout.sync = true
|
81
|
+
#
|
82
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
83
|
+
#
|
84
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
85
|
+
# test_pdf = 'Input.pdf'
|
86
|
+
# local_file = 'Result.txt'
|
87
|
+
# api_key = 'Your API key here'
|
88
|
+
#
|
89
|
+
# begin
|
90
|
+
# client = SelectPdf::PdfToTextClient.new(api_key)
|
91
|
+
#
|
92
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
|
93
|
+
# client.start_page = 1 # start page (processing starts from here)
|
94
|
+
# client.end_page = 0 # end page (set 0 to process file til the end)
|
95
|
+
# client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
|
96
|
+
#
|
97
|
+
# print "Starting pdf to text ...\n"
|
98
|
+
#
|
99
|
+
# # convert local pdf to local text file
|
100
|
+
# client.text_from_file_to_file(test_pdf, local_file)
|
101
|
+
#
|
102
|
+
# # extract text from local pdf to memory
|
103
|
+
# # text = client.text_from_file(test_pdf)
|
104
|
+
# # print text
|
105
|
+
#
|
106
|
+
# # convert pdf from public url to local text file
|
107
|
+
# # client.text_from_url_to_file(test_url, local_file)
|
108
|
+
#
|
109
|
+
# # extract text from pdf from public url to memory
|
110
|
+
# # text = client.text_from_url(test_url)
|
111
|
+
# # print text
|
112
|
+
#
|
113
|
+
# print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
|
114
|
+
#
|
115
|
+
# # get API usage
|
116
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
117
|
+
# usage = usage_client.get_usage(FALSE)
|
118
|
+
# print("Usage: #{usage}\n")
|
119
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
120
|
+
# rescue SelectPdf::ApiException => e
|
121
|
+
# print("An error occurred: #{e}")
|
122
|
+
# end
|
123
|
+
#
|
124
|
+
# Search Pdf
|
125
|
+
#
|
126
|
+
# require 'selectpdf'
|
127
|
+
#
|
128
|
+
# $stdout.sync = true
|
129
|
+
#
|
130
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
131
|
+
#
|
132
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
133
|
+
# test_pdf = 'Input.pdf'
|
134
|
+
# api_key = 'Your API key here'
|
135
|
+
#
|
136
|
+
# begin
|
137
|
+
# client = SelectPdf::PdfToTextClient.new(api_key)
|
138
|
+
#
|
139
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
|
140
|
+
# client.start_page = 1 # start page (processing starts from here)
|
141
|
+
# client.end_page = 0 # end page (set 0 to process file til the end)
|
142
|
+
# client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
|
143
|
+
#
|
144
|
+
# print "Starting search pdf ...\n"
|
145
|
+
#
|
146
|
+
# # search local pdf
|
147
|
+
# results = client.search_file(test_pdf, 'pdf')
|
148
|
+
#
|
149
|
+
# # search pdf from public url
|
150
|
+
# # results = client.search_url(test_url, 'pdf')
|
151
|
+
#
|
152
|
+
# print "Search results: #{results}.\nSearch results count: #{results.length}\n"
|
153
|
+
#
|
154
|
+
# print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
|
155
|
+
#
|
156
|
+
# # get API usage
|
157
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
158
|
+
# usage = usage_client.get_usage(FALSE)
|
159
|
+
# print("Usage: #{usage}\n")
|
160
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
161
|
+
# rescue SelectPdf::ApiException => e
|
162
|
+
# print("An error occurred: #{e}")
|
163
|
+
# end
|
164
|
+
#
|
14
165
|
module SelectPdf
|
166
|
+
# Multipart/form-data boundary
|
15
167
|
MULTIPART_FORM_DATA_BOUNDARY = '------------SelectPdf_Api_Boundry_$'
|
168
|
+
|
169
|
+
# New line
|
16
170
|
NEW_LINE = "\r\n"
|
17
|
-
|
171
|
+
|
172
|
+
# Library version
|
173
|
+
CLIENT_VERSION = '1.4.0'
|
18
174
|
|
19
175
|
attr_reader :code, :message
|
20
176
|
#
|
@@ -470,7 +626,70 @@ module SelectPdf
|
|
470
626
|
#
|
471
627
|
# Code sample:
|
472
628
|
#
|
473
|
-
#
|
629
|
+
# require 'selectpdf'
|
630
|
+
#
|
631
|
+
# $stdout.sync = true
|
632
|
+
#
|
633
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
634
|
+
#
|
635
|
+
# url = 'https://selectpdf.com'
|
636
|
+
# local_file = 'Test.pdf'
|
637
|
+
# api_key = 'Your API key here'
|
638
|
+
#
|
639
|
+
# begin
|
640
|
+
# client = SelectPdf::HtmlToPdfClient.new(api_key)
|
641
|
+
#
|
642
|
+
# # set parameters - see full list at https://selectpdf.com/html-to-pdf-api/
|
643
|
+
#
|
644
|
+
# client.page_size = SelectPdf::PageSize::A4 # PDF page size
|
645
|
+
# client.page_orientation = SelectPdf::PageOrientation::PORTRAIT # PDF page orientation
|
646
|
+
# client.margins = 0 # PDF page margins
|
647
|
+
# client.rendering_engine = SelectPdf::RenderingEngine::WEBKIT # rendering engine
|
648
|
+
# client.conversion_delay = 1 # conversion delay
|
649
|
+
# client.navigation_timeout = 30 # navigation timeout
|
650
|
+
# client.page_numbers = FALSE # page numbers
|
651
|
+
# client.page_breaks_enhanced_algorithm = TRUE # enhanced page break algorithm
|
652
|
+
#
|
653
|
+
# # additional properties
|
654
|
+
#
|
655
|
+
# # client.use_css_print = TRUE # enable CSS media print
|
656
|
+
# # client.disable_javascript = TRUE # disable javascript
|
657
|
+
# # client.disable_internal_links = TRUE # disable internal links
|
658
|
+
# # client.disable_external_links = TRUE # disable external links
|
659
|
+
# # client.keep_images_together = TRUE # keep images together
|
660
|
+
# # client.scale_images = TRUE # scale images to create smaller pdfs
|
661
|
+
# # client.single_page_pdf = TRUE # generate a single page PDF
|
662
|
+
# # client.user_password = 'password' # secure the PDF with a password
|
663
|
+
#
|
664
|
+
# # generate automatic bookmarks
|
665
|
+
#
|
666
|
+
# # client.pdf_bookmarks_selectors = 'H1, H2' # create outlines (bookmarks) for the specified elements
|
667
|
+
# # client.viewer_page_mode = SelectPdf::PageMode::USE_OUTLINES # display outlines (bookmarks) in viewer
|
668
|
+
#
|
669
|
+
# print "Starting conversion ...\n"
|
670
|
+
#
|
671
|
+
# # convert url to file
|
672
|
+
# client.convert_url_to_file(url, local_file)
|
673
|
+
#
|
674
|
+
# # convert url to memory
|
675
|
+
# # pdf = client.convert_url(url)
|
676
|
+
#
|
677
|
+
# # convert html string to file
|
678
|
+
# # client.convert_html_string_to_file('This is some <b>html</b>.', local_file)
|
679
|
+
#
|
680
|
+
# # convert html string to memory
|
681
|
+
# # pdf = client.convert_html_string('This is some <b>html</b>.')
|
682
|
+
#
|
683
|
+
# print "Finished! Number of pages: #{client.number_of_pages}.\n"
|
684
|
+
#
|
685
|
+
# # get API usage
|
686
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
687
|
+
# usage = usage_client.get_usage(FALSE)
|
688
|
+
# print("Usage: #{usage}\n")
|
689
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
690
|
+
# rescue SelectPdf::ApiException => e
|
691
|
+
# print("An error occurred: #{e}")
|
692
|
+
# end
|
474
693
|
class HtmlToPdfClient < ApiClient
|
475
694
|
# Construct the Html To Pdf Client.
|
476
695
|
#
|
@@ -1525,4 +1744,880 @@ module SelectPdf
|
|
1525
1744
|
return nil
|
1526
1745
|
end
|
1527
1746
|
end
|
1747
|
+
|
1748
|
+
# Pdf Merge with SelectPdf Online API.
|
1749
|
+
#
|
1750
|
+
# Code sample:
|
1751
|
+
#
|
1752
|
+
# require 'selectpdf'
|
1753
|
+
#
|
1754
|
+
# $stdout.sync = true
|
1755
|
+
#
|
1756
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
1757
|
+
#
|
1758
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
1759
|
+
# test_pdf = 'Input.pdf'
|
1760
|
+
# local_file = 'Result.pdf'
|
1761
|
+
# api_key = 'Your API key here'
|
1762
|
+
#
|
1763
|
+
# begin
|
1764
|
+
# client = SelectPdf::PdfMergeClient.new(api_key)
|
1765
|
+
#
|
1766
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-merge-api/
|
1767
|
+
#
|
1768
|
+
# # specify the pdf files that will be merged (order will be preserved in the final pdf)
|
1769
|
+
# client.add_file(test_pdf) # add PDF from local file
|
1770
|
+
# client.add_url_file(test_url) # add PDF from public url
|
1771
|
+
# # client.add_file(test_pdf, 'pdf_password') # add PDF (that requires a password) from local file
|
1772
|
+
# # client.add_url_file(test_url, 'pdf_password') # add PDF (that requires a password) from public url
|
1773
|
+
#
|
1774
|
+
# print "Starting pdf merge ...\n"
|
1775
|
+
#
|
1776
|
+
# # merge pdfs to local file
|
1777
|
+
# client.save_to_file(local_file)
|
1778
|
+
#
|
1779
|
+
# # merge pdfs to memory
|
1780
|
+
# # pdf = client.save
|
1781
|
+
#
|
1782
|
+
# print "Finished! Number of pages: #{client.number_of_pages}.\n"
|
1783
|
+
#
|
1784
|
+
# # get API usage
|
1785
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
1786
|
+
# usage = usage_client.get_usage(FALSE)
|
1787
|
+
# print("Usage: #{usage}\n")
|
1788
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
1789
|
+
# rescue SelectPdf::ApiException => e
|
1790
|
+
# print("An error occurred: #{e}")
|
1791
|
+
# end
|
1792
|
+
class PdfMergeClient < ApiClient
|
1793
|
+
# Construct the Pdf Merge Client.
|
1794
|
+
#
|
1795
|
+
# @param api_key API Key.
|
1796
|
+
def initialize(api_key)
|
1797
|
+
super()
|
1798
|
+
@api_endpoint = 'https://selectpdf.com/api2/pdfmerge/'
|
1799
|
+
@parameters['key'] = api_key
|
1800
|
+
|
1801
|
+
@file_idx = 0
|
1802
|
+
end
|
1803
|
+
|
1804
|
+
# Add local PDF document to the list of input files.
|
1805
|
+
#
|
1806
|
+
# @param input_pdf Path to a local PDF file.
|
1807
|
+
# @param user_password User password for the PDF document (optional).
|
1808
|
+
def add_file(input_pdf, user_password = nil)
|
1809
|
+
@file_idx += 1
|
1810
|
+
|
1811
|
+
@files["file_#{@file_idx}"] = input_pdf
|
1812
|
+
@parameters.delete("url_#{@file_idx}")
|
1813
|
+
|
1814
|
+
if user_password.nil? || user_password.empty?
|
1815
|
+
@parameters.delete("password_#{@file_idx}")
|
1816
|
+
else
|
1817
|
+
@parameters["password_#{@file_idx}"] = user_password
|
1818
|
+
end
|
1819
|
+
end
|
1820
|
+
|
1821
|
+
# Add remote PDF document to the list of input files.
|
1822
|
+
#
|
1823
|
+
# @param input_url Url of a remote PDF file.
|
1824
|
+
# @param user_password User password for the PDF document (optional).
|
1825
|
+
def add_url_file(input_url, user_password = nil)
|
1826
|
+
@file_idx += 1
|
1827
|
+
|
1828
|
+
@parameters["url_#{@file_idx}"] = input_url
|
1829
|
+
@files.delete("file_#{@file_idx}")
|
1830
|
+
|
1831
|
+
if user_password.nil? || user_password.empty?
|
1832
|
+
@parameters.delete("password_#{@file_idx}")
|
1833
|
+
else
|
1834
|
+
@parameters["password_#{@file_idx}"] = user_password
|
1835
|
+
end
|
1836
|
+
end
|
1837
|
+
|
1838
|
+
# Merge all specified input pdfs and return the resulted PDF.
|
1839
|
+
#
|
1840
|
+
# @return Byte array containing the resulted PDF.
|
1841
|
+
def save
|
1842
|
+
@parameters['async'] = 'False'
|
1843
|
+
@parameters['files_no'] = @file_idx
|
1844
|
+
|
1845
|
+
result = perform_post_as_multipart_formdata
|
1846
|
+
|
1847
|
+
@file_idx = 0
|
1848
|
+
@files = {}
|
1849
|
+
|
1850
|
+
result
|
1851
|
+
end
|
1852
|
+
|
1853
|
+
# Merge all specified input pdfs and writes the resulted PDF to a specified stream.
|
1854
|
+
#
|
1855
|
+
# @param stream The output stream where the resulted PDF will be written.
|
1856
|
+
def save_to_stream(stream)
|
1857
|
+
result = save
|
1858
|
+
stream.write(result)
|
1859
|
+
end
|
1860
|
+
|
1861
|
+
# Merge all specified input pdfs and writes the resulted PDF to a local file.
|
1862
|
+
#
|
1863
|
+
# @param file_path Local file including path if necessary.
|
1864
|
+
def save_to_file(file_path)
|
1865
|
+
result = save
|
1866
|
+
File.open(file_path, 'wb') do |file|
|
1867
|
+
file.write(result)
|
1868
|
+
end
|
1869
|
+
rescue ApiException
|
1870
|
+
FileUtils.rm(file_path) if File.exist?(file_path)
|
1871
|
+
raise
|
1872
|
+
end
|
1873
|
+
|
1874
|
+
# Merge all specified input pdfs and return the resulted PDF. An asynchronous call is used.
|
1875
|
+
#
|
1876
|
+
# @return Byte array containing the resulted PDF.
|
1877
|
+
def save_async
|
1878
|
+
@parameters['files_no'] = @file_idx
|
1879
|
+
|
1880
|
+
job_id = start_async_job_multipart_form_data
|
1881
|
+
|
1882
|
+
if job_id.nil? || job_id.empty?
|
1883
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
|
1884
|
+
end
|
1885
|
+
|
1886
|
+
no_pings = 0
|
1887
|
+
|
1888
|
+
while no_pings < @async_calls_max_pings
|
1889
|
+
no_pings += 1
|
1890
|
+
|
1891
|
+
# sleep for a few seconds before next ping
|
1892
|
+
sleep(@async_calls_ping_interval)
|
1893
|
+
|
1894
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
1895
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
1896
|
+
|
1897
|
+
result = async_job_client.result
|
1898
|
+
|
1899
|
+
next if result.nil?
|
1900
|
+
|
1901
|
+
@number_of_pages = async_job_client.number_of_pages
|
1902
|
+
@file_idx = 0
|
1903
|
+
@files = {}
|
1904
|
+
|
1905
|
+
return result
|
1906
|
+
end
|
1907
|
+
|
1908
|
+
@file_idx = 0
|
1909
|
+
@files = {}
|
1910
|
+
|
1911
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'), 'Asynchronous call did not finish in expected timeframe.'
|
1912
|
+
end
|
1913
|
+
|
1914
|
+
# Merge all specified input pdfs and writes the resulted PDF to a specified stream. An asynchronous call is used.
|
1915
|
+
#
|
1916
|
+
# @param stream The output stream where the resulted PDF will be written.
|
1917
|
+
def save_to_stream_async(stream)
|
1918
|
+
result = save_async
|
1919
|
+
stream.write(result)
|
1920
|
+
end
|
1921
|
+
|
1922
|
+
# Merge all specified input pdfs and writes the resulted PDF to a local file. An asynchronous call is used.
|
1923
|
+
#
|
1924
|
+
# @param file_path Local file including path if necessary.
|
1925
|
+
def save_to_file_async(file_path)
|
1926
|
+
result = save_async
|
1927
|
+
File.open(file_path, 'wb') do |file|
|
1928
|
+
file.write(result)
|
1929
|
+
end
|
1930
|
+
rescue ApiException
|
1931
|
+
FileUtils.rm(file_path) if File.exist?(file_path)
|
1932
|
+
raise
|
1933
|
+
end
|
1934
|
+
|
1935
|
+
# Set the PDF document title.
|
1936
|
+
#
|
1937
|
+
# @param doc_title Document title.
|
1938
|
+
def doc_title=(doc_title)
|
1939
|
+
@parameters['doc_title'] = doc_title
|
1940
|
+
end
|
1941
|
+
|
1942
|
+
# Set the subject of the PDF document.
|
1943
|
+
#
|
1944
|
+
# @param doc_subject Document subject.
|
1945
|
+
def doc_subject=(doc_subject)
|
1946
|
+
@parameters['doc_subject'] = doc_subject
|
1947
|
+
end
|
1948
|
+
|
1949
|
+
# Set the PDF document keywords.
|
1950
|
+
#
|
1951
|
+
# @param doc_keywords Document keywords.
|
1952
|
+
def doc_keywords=(doc_keywords)
|
1953
|
+
@parameters['doc_keywords'] = doc_keywords
|
1954
|
+
end
|
1955
|
+
|
1956
|
+
# Set the name of the PDF document author.
|
1957
|
+
#
|
1958
|
+
# @param doc_author Document author.
|
1959
|
+
def doc_author=(doc_author)
|
1960
|
+
@parameters['doc_author'] = doc_author
|
1961
|
+
end
|
1962
|
+
|
1963
|
+
# Add the date and time when the PDF document was created to the PDF document information. The default value is False.
|
1964
|
+
#
|
1965
|
+
# @param doc_add_creation_date Add creation date to the document metadata or not.
|
1966
|
+
def doc_add_creation_date=(doc_add_creation_date)
|
1967
|
+
@parameters['doc_add_creation_date'] = doc_add_creation_date
|
1968
|
+
end
|
1969
|
+
|
1970
|
+
# Set the page layout to be used when the document is opened in a PDF viewer. The default value is SelectPdf::PageLayout::ONE_COLUMN.
|
1971
|
+
#
|
1972
|
+
# @param viewer_page_layout Page layout. Possible values: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).
|
1973
|
+
# Use constants from SelectPdf::PageLayout class.
|
1974
|
+
def viewer_page_layout=(viewer_page_layout)
|
1975
|
+
unless [0, 1, 2, 3].include?(viewer_page_layout)
|
1976
|
+
raise ApiException.new('Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).'), 'Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).'
|
1977
|
+
end
|
1978
|
+
|
1979
|
+
@parameters['viewer_page_layout'] = viewer_page_layout
|
1980
|
+
end
|
1981
|
+
|
1982
|
+
# Set the document page mode when the pdf document is opened in a PDF viewer. The default value is SelectPdf::PageMode::USE_NONE.
|
1983
|
+
#
|
1984
|
+
# @param viewer_page_mode Page mode. Possible values: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).
|
1985
|
+
# Use constants from SelectPdf::PageMode class.
|
1986
|
+
def viewer_page_mode=(viewer_page_mode)
|
1987
|
+
unless [0, 1, 2, 3, 4, 5].include?(viewer_page_mode)
|
1988
|
+
raise ApiException.new('Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).'),
|
1989
|
+
'Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).'
|
1990
|
+
end
|
1991
|
+
|
1992
|
+
@parameters['viewer_page_mode'] = viewer_page_mode
|
1993
|
+
end
|
1994
|
+
|
1995
|
+
# Set a flag specifying whether to position the document's window in the center of the screen. The default value is False.
|
1996
|
+
#
|
1997
|
+
# @param viewer_center_window Center window or not.
|
1998
|
+
def viewer_center_window=(viewer_center_window)
|
1999
|
+
@parameters['viewer_center_window'] = viewer_center_window
|
2000
|
+
end
|
2001
|
+
|
2002
|
+
# Set a flag specifying whether the window's title bar should display the document title taken from document information. The default value is False.
|
2003
|
+
#
|
2004
|
+
# @param viewer_display_doc_title Display title or not.
|
2005
|
+
def viewer_display_doc_title=(viewer_display_doc_title)
|
2006
|
+
@parameters['viewer_display_doc_title'] = viewer_display_doc_title
|
2007
|
+
end
|
2008
|
+
|
2009
|
+
# Set a flag specifying whether to resize the document's window to fit the size of the first displayed page. The default value is False.
|
2010
|
+
#
|
2011
|
+
# @param viewer_fit_window Fit window or not.
|
2012
|
+
def viewer_fit_window=(viewer_fit_window)
|
2013
|
+
@parameters['viewer_fit_window'] = viewer_fit_window
|
2014
|
+
end
|
2015
|
+
|
2016
|
+
# Set a flag specifying whether to hide the pdf viewer application's menu bar when the document is active. The default value is False.
|
2017
|
+
#
|
2018
|
+
# @param viewer_hide_menu_bar Hide menu bar or not.
|
2019
|
+
def viewer_hide_menu_bar=(viewer_hide_menu_bar)
|
2020
|
+
@parameters['viewer_hide_menu_bar'] = viewer_hide_menu_bar
|
2021
|
+
end
|
2022
|
+
|
2023
|
+
# Set a flag specifying whether to hide the pdf viewer application's tool bars when the document is active. The default value is False.
|
2024
|
+
#
|
2025
|
+
# @param viewer_hide_toolbar Hide tool bars or not.
|
2026
|
+
def viewer_hide_toolbar=(viewer_hide_toolbar)
|
2027
|
+
@parameters['viewer_hide_toolbar'] = viewer_hide_toolbar
|
2028
|
+
end
|
2029
|
+
|
2030
|
+
# Set a flag specifying whether to hide user interface elements in the document's window (such as scroll bars and navigation controls), leaving only the document's contents displayed.
|
2031
|
+
#
|
2032
|
+
# @param viewer_hide_window_ui Hide window UI or not.
|
2033
|
+
def viewer_hide_window_ui=(viewer_hide_window_ui)
|
2034
|
+
@parameters['viewer_hide_window_ui'] = viewer_hide_window_ui
|
2035
|
+
end
|
2036
|
+
|
2037
|
+
# Set PDF user password.
|
2038
|
+
#
|
2039
|
+
# @param user_password PDF user password.
|
2040
|
+
def user_password=(user_password)
|
2041
|
+
@parameters['user_password'] = user_password
|
2042
|
+
end
|
2043
|
+
|
2044
|
+
# Set PDF owner password.
|
2045
|
+
#
|
2046
|
+
# @param owner_password PDF owner password.
|
2047
|
+
def owner_password=(owner_password)
|
2048
|
+
@parameters['owner_password'] = owner_password
|
2049
|
+
end
|
2050
|
+
|
2051
|
+
# Set the maximum amount of time (in seconds) for this job.
|
2052
|
+
# The default value is 30 seconds. Use a larger value (up to 120 seconds allowed) for large documents.
|
2053
|
+
#
|
2054
|
+
# @param timeout Timeout in seconds.
|
2055
|
+
def timeout=(timeout)
|
2056
|
+
@parameters['timeout'] = timeout
|
2057
|
+
end
|
2058
|
+
|
2059
|
+
# Set a custom parameter. Do not use this method unless advised by SelectPdf.
|
2060
|
+
#
|
2061
|
+
# @param parameter_name Parameter name.
|
2062
|
+
# @param parameter_value Parameter value.
|
2063
|
+
def set_custom_parameter(parameter_name, parameter_value)
|
2064
|
+
@parameters[parameter_name] = parameter_value
|
2065
|
+
end
|
2066
|
+
end
|
2067
|
+
|
2068
|
+
# Pdf To Text Conversion with SelectPdf Online API.
|
2069
|
+
#
|
2070
|
+
# Code Sample for PDF To Text
|
2071
|
+
#
|
2072
|
+
# require 'selectpdf'
|
2073
|
+
#
|
2074
|
+
# $stdout.sync = true
|
2075
|
+
#
|
2076
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
2077
|
+
#
|
2078
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
2079
|
+
# test_pdf = 'Input.pdf'
|
2080
|
+
# local_file = 'Result.txt'
|
2081
|
+
# api_key = 'Your API key here'
|
2082
|
+
#
|
2083
|
+
# begin
|
2084
|
+
# client = SelectPdf::PdfToTextClient.new(api_key)
|
2085
|
+
#
|
2086
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
|
2087
|
+
# client.start_page = 1 # start page (processing starts from here)
|
2088
|
+
# client.end_page = 0 # end page (set 0 to process file til the end)
|
2089
|
+
# client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
|
2090
|
+
#
|
2091
|
+
# print "Starting pdf to text ...\n"
|
2092
|
+
#
|
2093
|
+
# # convert local pdf to local text file
|
2094
|
+
# client.text_from_file_to_file(test_pdf, local_file)
|
2095
|
+
#
|
2096
|
+
# # extract text from local pdf to memory
|
2097
|
+
# # text = client.text_from_file(test_pdf)
|
2098
|
+
# # print text
|
2099
|
+
#
|
2100
|
+
# # convert pdf from public url to local text file
|
2101
|
+
# # client.text_from_url_to_file(test_url, local_file)
|
2102
|
+
#
|
2103
|
+
# # extract text from pdf from public url to memory
|
2104
|
+
# # text = client.text_from_url(test_url)
|
2105
|
+
# # print text
|
2106
|
+
#
|
2107
|
+
# print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
|
2108
|
+
#
|
2109
|
+
# # get API usage
|
2110
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
2111
|
+
# usage = usage_client.get_usage(FALSE)
|
2112
|
+
# print("Usage: #{usage}\n")
|
2113
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
2114
|
+
# rescue SelectPdf::ApiException => e
|
2115
|
+
# print("An error occurred: #{e}")
|
2116
|
+
# end
|
2117
|
+
#
|
2118
|
+
# Code Sample for Search Pdf
|
2119
|
+
#
|
2120
|
+
# require 'selectpdf'
|
2121
|
+
#
|
2122
|
+
# $stdout.sync = true
|
2123
|
+
#
|
2124
|
+
# print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
|
2125
|
+
#
|
2126
|
+
# test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
|
2127
|
+
# test_pdf = 'Input.pdf'
|
2128
|
+
# api_key = 'Your API key here'
|
2129
|
+
#
|
2130
|
+
# begin
|
2131
|
+
# client = SelectPdf::PdfToTextClient.new(api_key)
|
2132
|
+
#
|
2133
|
+
# # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
|
2134
|
+
# client.start_page = 1 # start page (processing starts from here)
|
2135
|
+
# client.end_page = 0 # end page (set 0 to process file til the end)
|
2136
|
+
# client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
|
2137
|
+
#
|
2138
|
+
# print "Starting search pdf ...\n"
|
2139
|
+
#
|
2140
|
+
# # search local pdf
|
2141
|
+
# results = client.search_file(test_pdf, 'pdf')
|
2142
|
+
#
|
2143
|
+
# # search pdf from public url
|
2144
|
+
# # results = client.search_url(test_url, 'pdf')
|
2145
|
+
#
|
2146
|
+
# print "Search results: #{results}.\nSearch results count: #{results.length}\n"
|
2147
|
+
#
|
2148
|
+
# print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
|
2149
|
+
#
|
2150
|
+
# # get API usage
|
2151
|
+
# usage_client = SelectPdf::UsageClient.new(api_key)
|
2152
|
+
# usage = usage_client.get_usage(FALSE)
|
2153
|
+
# print("Usage: #{usage}\n")
|
2154
|
+
# print('Conversions remained this month: ', usage['available'], "\n")
|
2155
|
+
# rescue SelectPdf::ApiException => e
|
2156
|
+
# print("An error occurred: #{e}")
|
2157
|
+
# end
|
2158
|
+
#
|
2159
|
+
class PdfToTextClient < ApiClient
|
2160
|
+
# Construct the Pdf To Text Client.
|
2161
|
+
#
|
2162
|
+
# @param api_key API Key.
|
2163
|
+
def initialize(api_key)
|
2164
|
+
super()
|
2165
|
+
@api_endpoint = 'https://selectpdf.com/api2/pdftotext/'
|
2166
|
+
@parameters['key'] = api_key
|
2167
|
+
|
2168
|
+
@file_idx = 0
|
2169
|
+
end
|
2170
|
+
|
2171
|
+
# Get the text from the specified pdf.
|
2172
|
+
#
|
2173
|
+
# @param input_pdf Path to a local PDF file.
|
2174
|
+
# @return Extracted text.
|
2175
|
+
def text_from_file(input_pdf)
|
2176
|
+
@parameters['async'] = 'False'
|
2177
|
+
@parameters['action'] = 'Convert'
|
2178
|
+
@parameters.delete('url')
|
2179
|
+
|
2180
|
+
@files = {}
|
2181
|
+
@files['inputPdf'] = input_pdf
|
2182
|
+
|
2183
|
+
perform_post_as_multipart_formdata
|
2184
|
+
end
|
2185
|
+
|
2186
|
+
# Get the text from the specified pdf and write it to the specified text file.
|
2187
|
+
#
|
2188
|
+
# @param input_pdf Path to a local PDF file.
|
2189
|
+
# @param output_file_path The output file where the resulted text will be written.
|
2190
|
+
def text_from_file_to_file(input_pdf, output_file_path)
|
2191
|
+
result = text_from_file(input_pdf)
|
2192
|
+
File.open(output_file_path, 'wb') do |file|
|
2193
|
+
file.write(result)
|
2194
|
+
end
|
2195
|
+
rescue ApiException
|
2196
|
+
FileUtils.rm(output_file_path) if File.exist?(output_file_path)
|
2197
|
+
raise
|
2198
|
+
end
|
2199
|
+
|
2200
|
+
# Get the text from the specified pdf and write it to the specified stream.
|
2201
|
+
#
|
2202
|
+
# @param input_pdf Path to a local PDF file.
|
2203
|
+
# @param stream The output stream where the resulted PDF will be written.
|
2204
|
+
def text_from_file_to_stream(input_pdf, stream)
|
2205
|
+
result = text_from_file(input_pdf)
|
2206
|
+
stream.write(result)
|
2207
|
+
end
|
2208
|
+
|
2209
|
+
# Get the text from the specified pdf with an asynchronous call.
|
2210
|
+
#
|
2211
|
+
# @param input_pdf Path to a local PDF file.
|
2212
|
+
# @return Extracted text.
|
2213
|
+
def text_from_file_async(input_pdf)
|
2214
|
+
@parameters['action'] = 'Convert'
|
2215
|
+
@parameters.delete('url')
|
2216
|
+
|
2217
|
+
@files = {}
|
2218
|
+
@files['inputPdf'] = input_pdf
|
2219
|
+
|
2220
|
+
job_id = start_async_job_multipart_form_data
|
2221
|
+
|
2222
|
+
if job_id.nil? || job_id.empty?
|
2223
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
|
2224
|
+
end
|
2225
|
+
|
2226
|
+
no_pings = 0
|
2227
|
+
|
2228
|
+
while no_pings < @async_calls_max_pings
|
2229
|
+
no_pings += 1
|
2230
|
+
|
2231
|
+
# sleep for a few seconds before next ping
|
2232
|
+
sleep(@async_calls_ping_interval)
|
2233
|
+
|
2234
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
2235
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
2236
|
+
|
2237
|
+
result = async_job_client.result
|
2238
|
+
|
2239
|
+
next if result.nil?
|
2240
|
+
|
2241
|
+
@number_of_pages = async_job_client.number_of_pages
|
2242
|
+
|
2243
|
+
return result
|
2244
|
+
end
|
2245
|
+
|
2246
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'), 'Asynchronous call did not finish in expected timeframe.'
|
2247
|
+
end
|
2248
|
+
|
2249
|
+
# Get the text from the specified pdf with an asynchronous call and write it to the specified text file.
|
2250
|
+
#
|
2251
|
+
# @param input_pdf Path to a local PDF file.
|
2252
|
+
# @param output_file_path The output file where the resulted text will be written.
|
2253
|
+
def text_from_file_to_file_async(input_pdf, output_file_path)
|
2254
|
+
result = text_from_file_async(input_pdf)
|
2255
|
+
File.open(output_file_path, 'wb') do |file|
|
2256
|
+
file.write(result)
|
2257
|
+
end
|
2258
|
+
rescue ApiException
|
2259
|
+
FileUtils.rm(output_file_path) if File.exist?(output_file_path)
|
2260
|
+
raise
|
2261
|
+
end
|
2262
|
+
|
2263
|
+
# Get the text from the specified pdf with an asynchronous call and write it to the specified stream.
|
2264
|
+
#
|
2265
|
+
# @param input_pdf Path to a local PDF file.
|
2266
|
+
# @param stream The output stream where the resulted PDF will be written.
|
2267
|
+
def text_from_file_to_stream_async(input_pdf, stream)
|
2268
|
+
result = text_from_file_async(input_pdf)
|
2269
|
+
stream.write(result)
|
2270
|
+
end
|
2271
|
+
|
2272
|
+
# Get the text from the specified pdf.
|
2273
|
+
#
|
2274
|
+
# @param url Address of the PDF file.
|
2275
|
+
# @return Extracted text.
|
2276
|
+
def text_from_url(url)
|
2277
|
+
if !url.downcase.start_with?('http://') && !url.downcase.start_with?('https://')
|
2278
|
+
raise ApiException.new('The supported protocols for the PDFs available online are http:// and https://.'),
|
2279
|
+
'The supported protocols for the PDFs available online are http:// and https://.'
|
2280
|
+
end
|
2281
|
+
|
2282
|
+
if url.downcase.start_with?('http://localhost')
|
2283
|
+
raise ApiException.new('Cannot convert local urls via this method. Use getTextFromFile instead.'),
|
2284
|
+
'Cannot convert local urls via this method. Use text_from_file instead.'
|
2285
|
+
end
|
2286
|
+
|
2287
|
+
@parameters['async'] = 'False'
|
2288
|
+
@parameters['action'] = 'Convert'
|
2289
|
+
|
2290
|
+
@files = {}
|
2291
|
+
@parameters['url'] = url
|
2292
|
+
|
2293
|
+
perform_post_as_multipart_formdata
|
2294
|
+
end
|
2295
|
+
|
2296
|
+
# Get the text from the specified pdf and write it to the specified text file.
|
2297
|
+
#
|
2298
|
+
# @param url Address of the PDF file.
|
2299
|
+
# @param output_file_path The output file where the resulted text will be written.
|
2300
|
+
def text_from_url_to_file(url, output_file_path)
|
2301
|
+
result = text_from_url(url)
|
2302
|
+
File.open(output_file_path, 'wb') do |file|
|
2303
|
+
file.write(result)
|
2304
|
+
end
|
2305
|
+
rescue ApiException
|
2306
|
+
FileUtils.rm(output_file_path) if File.exist?(output_file_path)
|
2307
|
+
raise
|
2308
|
+
end
|
2309
|
+
|
2310
|
+
# Get the text from the specified pdf and write it to the specified stream.
|
2311
|
+
#
|
2312
|
+
# @param url Address of the PDF file.
|
2313
|
+
# @param stream The output stream where the resulted PDF will be written.
|
2314
|
+
def text_from_url_to_stream(url, stream)
|
2315
|
+
result = text_from_url(url)
|
2316
|
+
stream.write(result)
|
2317
|
+
end
|
2318
|
+
|
2319
|
+
# Get the text from the specified pdf with an asynchronous call.
|
2320
|
+
#
|
2321
|
+
# @param url Address of the PDF file.
|
2322
|
+
# @return Extracted text.
|
2323
|
+
def text_from_url_async(url)
|
2324
|
+
if !url.downcase.start_with?('http://') && !url.downcase.start_with?('https://')
|
2325
|
+
raise ApiException.new('The supported protocols for the PDFs available online are http:// and https://.'),
|
2326
|
+
'The supported protocols for the PDFs available online are http:// and https://.'
|
2327
|
+
end
|
2328
|
+
|
2329
|
+
if url.downcase.start_with?('http://localhost')
|
2330
|
+
raise ApiException.new('Cannot convert local urls via this method. Use getTextFromFile instead.'),
|
2331
|
+
'Cannot convert local urls via this method. Use text_from_file_async instead.'
|
2332
|
+
end
|
2333
|
+
|
2334
|
+
@parameters['action'] = 'Convert'
|
2335
|
+
|
2336
|
+
@files = {}
|
2337
|
+
@parameters['url'] = url
|
2338
|
+
|
2339
|
+
job_id = start_async_job_multipart_form_data
|
2340
|
+
|
2341
|
+
if job_id.nil? || job_id.empty?
|
2342
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
|
2343
|
+
end
|
2344
|
+
|
2345
|
+
no_pings = 0
|
2346
|
+
|
2347
|
+
while no_pings < @async_calls_max_pings
|
2348
|
+
no_pings += 1
|
2349
|
+
|
2350
|
+
# sleep for a few seconds before next ping
|
2351
|
+
sleep(@async_calls_ping_interval)
|
2352
|
+
|
2353
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
2354
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
2355
|
+
|
2356
|
+
result = async_job_client.result
|
2357
|
+
|
2358
|
+
next if result.nil?
|
2359
|
+
|
2360
|
+
@number_of_pages = async_job_client.number_of_pages
|
2361
|
+
|
2362
|
+
return result
|
2363
|
+
end
|
2364
|
+
|
2365
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
|
2366
|
+
'Asynchronous call did not finish in expected timeframe.'
|
2367
|
+
end
|
2368
|
+
|
2369
|
+
# Get the text from the specified pdf with an asynchronous call and write it to the specified text file.
|
2370
|
+
#
|
2371
|
+
# @param url Address of the PDF file.
|
2372
|
+
# @param output_file_path The output file where the resulted text will be written.
|
2373
|
+
def text_from_url_to_file_async(url, output_file_path)
|
2374
|
+
result = text_from_url_async(url)
|
2375
|
+
File.open(output_file_path, 'wb') do |file|
|
2376
|
+
file.write(result)
|
2377
|
+
end
|
2378
|
+
rescue ApiException
|
2379
|
+
FileUtils.rm(output_file_path) if File.exist?(output_file_path)
|
2380
|
+
raise
|
2381
|
+
end
|
2382
|
+
|
2383
|
+
# Get the text from the specified pdf with an asynchronous call and write it to the specified stream.
|
2384
|
+
#
|
2385
|
+
# @param url Address of the PDF file.
|
2386
|
+
# @param stream The output stream where the resulted PDF will be written.
|
2387
|
+
def text_from_url_to_stream_async(url, stream)
|
2388
|
+
result = text_from_url_async(url)
|
2389
|
+
stream.write(result)
|
2390
|
+
end
|
2391
|
+
|
2392
|
+
# Search for a specific text in a PDF document.
|
2393
|
+
# Pages that participate to this operation are specified by start_page and end_page methods.
|
2394
|
+
#
|
2395
|
+
# @param input_pdf Path to a local PDF file.
|
2396
|
+
# @param text_to_search Text to search.
|
2397
|
+
# @param case_sensitive If the search is case sensitive or not.
|
2398
|
+
# @param whole_words_only If the search works on whole words or not.
|
2399
|
+
# @return List with text positions in the current PDF document.
|
2400
|
+
def search_file(input_pdf, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
|
2401
|
+
if text_to_search.nil? || text_to_search.empty?
|
2402
|
+
raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
|
2403
|
+
end
|
2404
|
+
|
2405
|
+
@parameters['async'] = 'False'
|
2406
|
+
@parameters['action'] = 'Search'
|
2407
|
+
@parameters.delete('url')
|
2408
|
+
@parameters['search_text'] = text_to_search
|
2409
|
+
@parameters['case_sensitive'] = case_sensitive
|
2410
|
+
@parameters['whole_words_only'] = whole_words_only
|
2411
|
+
|
2412
|
+
@files = {}
|
2413
|
+
@files['inputPdf'] = input_pdf
|
2414
|
+
|
2415
|
+
@headers['Accept'] = 'text/json'
|
2416
|
+
|
2417
|
+
result = perform_post_as_multipart_formdata
|
2418
|
+
return [] if result.nil? || result.empty?
|
2419
|
+
|
2420
|
+
JSON.parse(result)
|
2421
|
+
end
|
2422
|
+
|
2423
|
+
# Search for a specific text in a PDF document with an asynchronous call.
|
2424
|
+
# Pages that participate to this operation are specified by start_page and end_page methods.
|
2425
|
+
#
|
2426
|
+
# @param input_pdf Path to a local PDF file.
|
2427
|
+
# @param text_to_search Text to search.
|
2428
|
+
# @param case_sensitive If the search is case sensitive or not.
|
2429
|
+
# @param whole_words_only If the search works on whole words or not.
|
2430
|
+
# @return List with text positions in the current PDF document.
|
2431
|
+
def search_file_async(input_pdf, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
|
2432
|
+
if text_to_search.nil? || text_to_search.empty?
|
2433
|
+
raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
|
2434
|
+
end
|
2435
|
+
|
2436
|
+
@parameters['action'] = 'Search'
|
2437
|
+
@parameters.delete('url')
|
2438
|
+
@parameters['search_text'] = text_to_search
|
2439
|
+
@parameters['case_sensitive'] = case_sensitive
|
2440
|
+
@parameters['whole_words_only'] = whole_words_only
|
2441
|
+
|
2442
|
+
@files = {}
|
2443
|
+
@files['inputPdf'] = input_pdf
|
2444
|
+
|
2445
|
+
@headers['Accept'] = 'text/json'
|
2446
|
+
|
2447
|
+
job_id = start_async_job_multipart_form_data
|
2448
|
+
|
2449
|
+
if job_id.nil? || job_id.empty?
|
2450
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'),
|
2451
|
+
'An error occurred launching the asynchronous call.'
|
2452
|
+
end
|
2453
|
+
|
2454
|
+
no_pings = 0
|
2455
|
+
|
2456
|
+
while no_pings < @async_calls_max_pings
|
2457
|
+
no_pings += 1
|
2458
|
+
|
2459
|
+
# sleep for a few seconds before next ping
|
2460
|
+
sleep(@async_calls_ping_interval)
|
2461
|
+
|
2462
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
2463
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
2464
|
+
|
2465
|
+
result = async_job_client.result
|
2466
|
+
|
2467
|
+
next if result.nil?
|
2468
|
+
|
2469
|
+
@number_of_pages = async_job_client.number_of_pages
|
2470
|
+
return [] if result.empty?
|
2471
|
+
|
2472
|
+
return JSON.parse(result)
|
2473
|
+
end
|
2474
|
+
|
2475
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
|
2476
|
+
'Asynchronous call did not finish in expected timeframe.'
|
2477
|
+
end
|
2478
|
+
|
2479
|
+
# Search for a specific text in a PDF document.
|
2480
|
+
# Pages that participate to this operation are specified by start_page and end_page methods.
|
2481
|
+
#
|
2482
|
+
# @param url Address of the PDF file.
|
2483
|
+
# @param text_to_search Text to search.
|
2484
|
+
# @param case_sensitive If the search is case sensitive or not.
|
2485
|
+
# @param whole_words_only If the search works on whole words or not.
|
2486
|
+
# @return List with text positions in the current PDF document.
|
2487
|
+
def search_url(url, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
|
2488
|
+
if text_to_search.nil? || text_to_search.empty?
|
2489
|
+
raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
|
2490
|
+
end
|
2491
|
+
|
2492
|
+
@parameters['async'] = 'False'
|
2493
|
+
@parameters['action'] = 'Search'
|
2494
|
+
@parameters['search_text'] = text_to_search
|
2495
|
+
@parameters['case_sensitive'] = case_sensitive
|
2496
|
+
@parameters['whole_words_only'] = whole_words_only
|
2497
|
+
|
2498
|
+
@files = {}
|
2499
|
+
@parameters['url'] = url
|
2500
|
+
|
2501
|
+
@headers['Accept'] = 'text/json'
|
2502
|
+
|
2503
|
+
result = perform_post_as_multipart_formdata
|
2504
|
+
return [] if result.nil? || result.empty?
|
2505
|
+
|
2506
|
+
JSON.parse(result)
|
2507
|
+
end
|
2508
|
+
|
2509
|
+
# Search for a specific text in a PDF document with an asynchronous call.
|
2510
|
+
# Pages that participate to this operation are specified by start_page and end_page methods.
|
2511
|
+
#
|
2512
|
+
# @param url Address of the PDF file.
|
2513
|
+
# @param text_to_search Text to search.
|
2514
|
+
# @param case_sensitive If the search is case sensitive or not.
|
2515
|
+
# @param whole_words_only If the search works on whole words or not.
|
2516
|
+
# @return List with text positions in the current PDF document.
|
2517
|
+
def search_url_async(url, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
|
2518
|
+
if text_to_search.nil? || text_to_search.empty?
|
2519
|
+
raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
|
2520
|
+
end
|
2521
|
+
|
2522
|
+
@parameters['action'] = 'Search'
|
2523
|
+
@parameters['search_text'] = text_to_search
|
2524
|
+
@parameters['case_sensitive'] = case_sensitive
|
2525
|
+
@parameters['whole_words_only'] = whole_words_only
|
2526
|
+
|
2527
|
+
@files = {}
|
2528
|
+
@parameters['url'] = url
|
2529
|
+
|
2530
|
+
@headers['Accept'] = 'text/json'
|
2531
|
+
|
2532
|
+
job_id = start_async_job_multipart_form_data
|
2533
|
+
|
2534
|
+
if job_id.nil? || job_id.empty?
|
2535
|
+
raise ApiException.new('An error occurred launching the asynchronous call.'),
|
2536
|
+
'An error occurred launching the asynchronous call.'
|
2537
|
+
end
|
2538
|
+
|
2539
|
+
no_pings = 0
|
2540
|
+
|
2541
|
+
while no_pings < @async_calls_max_pings
|
2542
|
+
no_pings += 1
|
2543
|
+
|
2544
|
+
# sleep for a few seconds before next ping
|
2545
|
+
sleep(@async_calls_ping_interval)
|
2546
|
+
|
2547
|
+
async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
|
2548
|
+
async_job_client.api_endpoint = @api_async_endpoint
|
2549
|
+
|
2550
|
+
result = async_job_client.result
|
2551
|
+
|
2552
|
+
next if result.nil?
|
2553
|
+
|
2554
|
+
@number_of_pages = async_job_client.number_of_pages
|
2555
|
+
return [] if result.empty?
|
2556
|
+
|
2557
|
+
return JSON.parse(result)
|
2558
|
+
end
|
2559
|
+
|
2560
|
+
raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
|
2561
|
+
'Asynchronous call did not finish in expected timeframe.'
|
2562
|
+
end
|
2563
|
+
|
2564
|
+
# Set Start Page number. Default value is 1 (first page of the document).
|
2565
|
+
#
|
2566
|
+
# @param start_page Start page number (1-based).
|
2567
|
+
def start_page=(start_page)
|
2568
|
+
@parameters['start_page'] = start_page
|
2569
|
+
end
|
2570
|
+
|
2571
|
+
# Set End Page number. Default value is 0 (process till the last page of the document).
|
2572
|
+
#
|
2573
|
+
# @param end_page End page number (1-based).
|
2574
|
+
def end_page=(end_page)
|
2575
|
+
@parameters['end_page'] = end_page
|
2576
|
+
end
|
2577
|
+
|
2578
|
+
# Set PDF user password.
|
2579
|
+
#
|
2580
|
+
# @param user_password PDF user password.
|
2581
|
+
def user_password=(user_password)
|
2582
|
+
@parameters['user_password'] = user_password
|
2583
|
+
end
|
2584
|
+
|
2585
|
+
# Set the text layout. The default value is SelectPdf::TextLayout::ORIGINAL.
|
2586
|
+
#
|
2587
|
+
# @param text_layout The text layout. Possible values: Original, Reading. Use constants from SelectPdf::TextLayout class.
|
2588
|
+
def text_layout=(text_layout)
|
2589
|
+
unless [0, 1].include?(text_layout)
|
2590
|
+
raise ApiException.new('Allowed values for Text Layout: 0 (Original), 1 (Reading).'), 'Allowed values for Text Layout: 0 (Original), 1 (Reading).'
|
2591
|
+
end
|
2592
|
+
|
2593
|
+
@parameters['text_layout'] = text_layout
|
2594
|
+
end
|
2595
|
+
|
2596
|
+
# Set the output format. The default value is SelectPdf::OutputFormat::TEXT.
|
2597
|
+
#
|
2598
|
+
# @param output_format The output format. Possible values: Text, Html. Use constants from SelectPdf::OutputFormat class.
|
2599
|
+
def output_format=(output_format)
|
2600
|
+
unless [0, 1].include?(output_format)
|
2601
|
+
raise ApiException.new('Allowed values for Output Format: 0 (Text), 1 (Html).'), 'Allowed values for Output Format: 0 (Text), 1 (Html).'
|
2602
|
+
end
|
2603
|
+
|
2604
|
+
@parameters['output_format'] = output_format
|
2605
|
+
end
|
2606
|
+
|
2607
|
+
# Set the maximum amount of time (in seconds) for this job.
|
2608
|
+
# The default value is 30 seconds. Use a larger value (up to 120 seconds allowed) for large documents.
|
2609
|
+
#
|
2610
|
+
# @param timeout Timeout in seconds.
|
2611
|
+
def timeout=(timeout)
|
2612
|
+
@parameters['timeout'] = timeout
|
2613
|
+
end
|
2614
|
+
|
2615
|
+
# Set a custom parameter. Do not use this method unless advised by SelectPdf.
|
2616
|
+
#
|
2617
|
+
# @param parameter_name Parameter name.
|
2618
|
+
# @param parameter_value Parameter value.
|
2619
|
+
def set_custom_parameter(parameter_name, parameter_value)
|
2620
|
+
@parameters[parameter_name] = parameter_value
|
2621
|
+
end
|
2622
|
+
end
|
1528
2623
|
end
|