selectpdf 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/selectpdf.rb CHANGED
@@ -5,16 +5,172 @@ require 'json'
5
5
  require 'fileutils'
6
6
 
7
7
  #
8
- # SelectPdf Online REST API Ruby client library. Contains a powerful HTML to PDF converter.
8
+ # SelectPdf Online REST API Ruby client library. Contains HTML to PDF converter, PDF merge, PDF to text extractor, search PDF.
9
9
  #
10
10
  #
11
11
  # Convert HTML to PDF
12
12
  #
13
- # {include:file:samples/simple_url_to_pdf.rb}
13
+ # require 'selectpdf'
14
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
15
+ #
16
+ # url = 'https://selectpdf.com'
17
+ # local_file = 'Test.pdf'
18
+ # api_key = 'Your API key here'
19
+ #
20
+ # begin
21
+ # api = SelectPdf::HtmlToPdfClient.new(api_key)
22
+ #
23
+ # api.page_size = SelectPdf::PageSize::A4
24
+ # api.margins = 0
25
+ # api.page_numbers = FALSE
26
+ # api.page_breaks_enhanced_algorithm = TRUE
27
+ #
28
+ # api.convert_url_to_file(url, local_file)
29
+ # rescue SelectPdf::ApiException => e
30
+ # print("An error occurred: #{e}")
31
+ # end
32
+ #
33
+ # Merge PDFs from local disk or public url and save result into a file on disk.
34
+ #
35
+ # require 'selectpdf'
36
+ #
37
+ # $stdout.sync = true
38
+ #
39
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
40
+ #
41
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
42
+ # test_pdf = 'Input.pdf'
43
+ # local_file = 'Result.pdf'
44
+ # api_key = 'Your API key here'
45
+ #
46
+ # begin
47
+ # client = SelectPdf::PdfMergeClient.new(api_key)
48
+ #
49
+ # # set parameters - see full list at https://selectpdf.com/pdf-merge-api/
50
+ #
51
+ # # specify the pdf files that will be merged (order will be preserved in the final pdf)
52
+ # client.add_file(test_pdf) # add PDF from local file
53
+ # client.add_url_file(test_url) # add PDF from public url
54
+ # # client.add_file(test_pdf, 'pdf_password') # add PDF (that requires a password) from local file
55
+ # # client.add_url_file(test_url, 'pdf_password') # add PDF (that requires a password) from public url
56
+ #
57
+ # print "Starting pdf merge ...\n"
58
+ #
59
+ # # merge pdfs to local file
60
+ # client.save_to_file(local_file)
61
+ #
62
+ # # merge pdfs to memory
63
+ # # pdf = client.save
64
+ #
65
+ # print "Finished! Number of pages: #{client.number_of_pages}.\n"
66
+ #
67
+ # # get API usage
68
+ # usage_client = SelectPdf::UsageClient.new(api_key)
69
+ # usage = usage_client.get_usage(FALSE)
70
+ # print("Usage: #{usage}\n")
71
+ # print('Conversions remained this month: ', usage['available'], "\n")
72
+ # rescue SelectPdf::ApiException => e
73
+ # print("An error occurred: #{e}")
74
+ # end
75
+ #
76
+ # Extract text from PDF
77
+ #
78
+ # require 'selectpdf'
79
+ #
80
+ # $stdout.sync = true
81
+ #
82
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
83
+ #
84
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
85
+ # test_pdf = 'Input.pdf'
86
+ # local_file = 'Result.txt'
87
+ # api_key = 'Your API key here'
88
+ #
89
+ # begin
90
+ # client = SelectPdf::PdfToTextClient.new(api_key)
91
+ #
92
+ # # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
93
+ # client.start_page = 1 # start page (processing starts from here)
94
+ # client.end_page = 0 # end page (set 0 to process file til the end)
95
+ # client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
96
+ #
97
+ # print "Starting pdf to text ...\n"
98
+ #
99
+ # # convert local pdf to local text file
100
+ # client.text_from_file_to_file(test_pdf, local_file)
101
+ #
102
+ # # extract text from local pdf to memory
103
+ # # text = client.text_from_file(test_pdf)
104
+ # # print text
105
+ #
106
+ # # convert pdf from public url to local text file
107
+ # # client.text_from_url_to_file(test_url, local_file)
108
+ #
109
+ # # extract text from pdf from public url to memory
110
+ # # text = client.text_from_url(test_url)
111
+ # # print text
112
+ #
113
+ # print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
114
+ #
115
+ # # get API usage
116
+ # usage_client = SelectPdf::UsageClient.new(api_key)
117
+ # usage = usage_client.get_usage(FALSE)
118
+ # print("Usage: #{usage}\n")
119
+ # print('Conversions remained this month: ', usage['available'], "\n")
120
+ # rescue SelectPdf::ApiException => e
121
+ # print("An error occurred: #{e}")
122
+ # end
123
+ #
124
+ # Search Pdf
125
+ #
126
+ # require 'selectpdf'
127
+ #
128
+ # $stdout.sync = true
129
+ #
130
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
131
+ #
132
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
133
+ # test_pdf = 'Input.pdf'
134
+ # api_key = 'Your API key here'
135
+ #
136
+ # begin
137
+ # client = SelectPdf::PdfToTextClient.new(api_key)
138
+ #
139
+ # # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
140
+ # client.start_page = 1 # start page (processing starts from here)
141
+ # client.end_page = 0 # end page (set 0 to process file til the end)
142
+ # client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
143
+ #
144
+ # print "Starting search pdf ...\n"
145
+ #
146
+ # # search local pdf
147
+ # results = client.search_file(test_pdf, 'pdf')
148
+ #
149
+ # # search pdf from public url
150
+ # # results = client.search_url(test_url, 'pdf')
151
+ #
152
+ # print "Search results: #{results}.\nSearch results count: #{results.length}\n"
153
+ #
154
+ # print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
155
+ #
156
+ # # get API usage
157
+ # usage_client = SelectPdf::UsageClient.new(api_key)
158
+ # usage = usage_client.get_usage(FALSE)
159
+ # print("Usage: #{usage}\n")
160
+ # print('Conversions remained this month: ', usage['available'], "\n")
161
+ # rescue SelectPdf::ApiException => e
162
+ # print("An error occurred: #{e}")
163
+ # end
164
+ #
14
165
  module SelectPdf
166
+ # Multipart/form-data boundary
15
167
  MULTIPART_FORM_DATA_BOUNDARY = '------------SelectPdf_Api_Boundry_$'
168
+
169
+ # New line
16
170
  NEW_LINE = "\r\n"
17
- CLIENT_VERSION = '1.3.0'
171
+
172
+ # Library version
173
+ CLIENT_VERSION = '1.4.0'
18
174
 
19
175
  attr_reader :code, :message
20
176
  #
@@ -470,7 +626,70 @@ module SelectPdf
470
626
  #
471
627
  # Code sample:
472
628
  #
473
- # {include:file:samples/html_to_pdf_main.rb}
629
+ # require 'selectpdf'
630
+ #
631
+ # $stdout.sync = true
632
+ #
633
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
634
+ #
635
+ # url = 'https://selectpdf.com'
636
+ # local_file = 'Test.pdf'
637
+ # api_key = 'Your API key here'
638
+ #
639
+ # begin
640
+ # client = SelectPdf::HtmlToPdfClient.new(api_key)
641
+ #
642
+ # # set parameters - see full list at https://selectpdf.com/html-to-pdf-api/
643
+ #
644
+ # client.page_size = SelectPdf::PageSize::A4 # PDF page size
645
+ # client.page_orientation = SelectPdf::PageOrientation::PORTRAIT # PDF page orientation
646
+ # client.margins = 0 # PDF page margins
647
+ # client.rendering_engine = SelectPdf::RenderingEngine::WEBKIT # rendering engine
648
+ # client.conversion_delay = 1 # conversion delay
649
+ # client.navigation_timeout = 30 # navigation timeout
650
+ # client.page_numbers = FALSE # page numbers
651
+ # client.page_breaks_enhanced_algorithm = TRUE # enhanced page break algorithm
652
+ #
653
+ # # additional properties
654
+ #
655
+ # # client.use_css_print = TRUE # enable CSS media print
656
+ # # client.disable_javascript = TRUE # disable javascript
657
+ # # client.disable_internal_links = TRUE # disable internal links
658
+ # # client.disable_external_links = TRUE # disable external links
659
+ # # client.keep_images_together = TRUE # keep images together
660
+ # # client.scale_images = TRUE # scale images to create smaller pdfs
661
+ # # client.single_page_pdf = TRUE # generate a single page PDF
662
+ # # client.user_password = 'password' # secure the PDF with a password
663
+ #
664
+ # # generate automatic bookmarks
665
+ #
666
+ # # client.pdf_bookmarks_selectors = 'H1, H2' # create outlines (bookmarks) for the specified elements
667
+ # # client.viewer_page_mode = SelectPdf::PageMode::USE_OUTLINES # display outlines (bookmarks) in viewer
668
+ #
669
+ # print "Starting conversion ...\n"
670
+ #
671
+ # # convert url to file
672
+ # client.convert_url_to_file(url, local_file)
673
+ #
674
+ # # convert url to memory
675
+ # # pdf = client.convert_url(url)
676
+ #
677
+ # # convert html string to file
678
+ # # client.convert_html_string_to_file('This is some <b>html</b>.', local_file)
679
+ #
680
+ # # convert html string to memory
681
+ # # pdf = client.convert_html_string('This is some <b>html</b>.')
682
+ #
683
+ # print "Finished! Number of pages: #{client.number_of_pages}.\n"
684
+ #
685
+ # # get API usage
686
+ # usage_client = SelectPdf::UsageClient.new(api_key)
687
+ # usage = usage_client.get_usage(FALSE)
688
+ # print("Usage: #{usage}\n")
689
+ # print('Conversions remained this month: ', usage['available'], "\n")
690
+ # rescue SelectPdf::ApiException => e
691
+ # print("An error occurred: #{e}")
692
+ # end
474
693
  class HtmlToPdfClient < ApiClient
475
694
  # Construct the Html To Pdf Client.
476
695
  #
@@ -1525,4 +1744,880 @@ module SelectPdf
1525
1744
  return nil
1526
1745
  end
1527
1746
  end
1747
+
1748
+ # Pdf Merge with SelectPdf Online API.
1749
+ #
1750
+ # Code sample:
1751
+ #
1752
+ # require 'selectpdf'
1753
+ #
1754
+ # $stdout.sync = true
1755
+ #
1756
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
1757
+ #
1758
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
1759
+ # test_pdf = 'Input.pdf'
1760
+ # local_file = 'Result.pdf'
1761
+ # api_key = 'Your API key here'
1762
+ #
1763
+ # begin
1764
+ # client = SelectPdf::PdfMergeClient.new(api_key)
1765
+ #
1766
+ # # set parameters - see full list at https://selectpdf.com/pdf-merge-api/
1767
+ #
1768
+ # # specify the pdf files that will be merged (order will be preserved in the final pdf)
1769
+ # client.add_file(test_pdf) # add PDF from local file
1770
+ # client.add_url_file(test_url) # add PDF from public url
1771
+ # # client.add_file(test_pdf, 'pdf_password') # add PDF (that requires a password) from local file
1772
+ # # client.add_url_file(test_url, 'pdf_password') # add PDF (that requires a password) from public url
1773
+ #
1774
+ # print "Starting pdf merge ...\n"
1775
+ #
1776
+ # # merge pdfs to local file
1777
+ # client.save_to_file(local_file)
1778
+ #
1779
+ # # merge pdfs to memory
1780
+ # # pdf = client.save
1781
+ #
1782
+ # print "Finished! Number of pages: #{client.number_of_pages}.\n"
1783
+ #
1784
+ # # get API usage
1785
+ # usage_client = SelectPdf::UsageClient.new(api_key)
1786
+ # usage = usage_client.get_usage(FALSE)
1787
+ # print("Usage: #{usage}\n")
1788
+ # print('Conversions remained this month: ', usage['available'], "\n")
1789
+ # rescue SelectPdf::ApiException => e
1790
+ # print("An error occurred: #{e}")
1791
+ # end
1792
+ class PdfMergeClient < ApiClient
1793
+ # Construct the Pdf Merge Client.
1794
+ #
1795
+ # @param api_key API Key.
1796
+ def initialize(api_key)
1797
+ super()
1798
+ @api_endpoint = 'https://selectpdf.com/api2/pdfmerge/'
1799
+ @parameters['key'] = api_key
1800
+
1801
+ @file_idx = 0
1802
+ end
1803
+
1804
+ # Add local PDF document to the list of input files.
1805
+ #
1806
+ # @param input_pdf Path to a local PDF file.
1807
+ # @param user_password User password for the PDF document (optional).
1808
+ def add_file(input_pdf, user_password = nil)
1809
+ @file_idx += 1
1810
+
1811
+ @files["file_#{@file_idx}"] = input_pdf
1812
+ @parameters.delete("url_#{@file_idx}")
1813
+
1814
+ if user_password.nil? || user_password.empty?
1815
+ @parameters.delete("password_#{@file_idx}")
1816
+ else
1817
+ @parameters["password_#{@file_idx}"] = user_password
1818
+ end
1819
+ end
1820
+
1821
+ # Add remote PDF document to the list of input files.
1822
+ #
1823
+ # @param input_url Url of a remote PDF file.
1824
+ # @param user_password User password for the PDF document (optional).
1825
+ def add_url_file(input_url, user_password = nil)
1826
+ @file_idx += 1
1827
+
1828
+ @parameters["url_#{@file_idx}"] = input_url
1829
+ @files.delete("file_#{@file_idx}")
1830
+
1831
+ if user_password.nil? || user_password.empty?
1832
+ @parameters.delete("password_#{@file_idx}")
1833
+ else
1834
+ @parameters["password_#{@file_idx}"] = user_password
1835
+ end
1836
+ end
1837
+
1838
+ # Merge all specified input pdfs and return the resulted PDF.
1839
+ #
1840
+ # @return Byte array containing the resulted PDF.
1841
+ def save
1842
+ @parameters['async'] = 'False'
1843
+ @parameters['files_no'] = @file_idx
1844
+
1845
+ result = perform_post_as_multipart_formdata
1846
+
1847
+ @file_idx = 0
1848
+ @files = {}
1849
+
1850
+ result
1851
+ end
1852
+
1853
+ # Merge all specified input pdfs and writes the resulted PDF to a specified stream.
1854
+ #
1855
+ # @param stream The output stream where the resulted PDF will be written.
1856
+ def save_to_stream(stream)
1857
+ result = save
1858
+ stream.write(result)
1859
+ end
1860
+
1861
+ # Merge all specified input pdfs and writes the resulted PDF to a local file.
1862
+ #
1863
+ # @param file_path Local file including path if necessary.
1864
+ def save_to_file(file_path)
1865
+ result = save
1866
+ File.open(file_path, 'wb') do |file|
1867
+ file.write(result)
1868
+ end
1869
+ rescue ApiException
1870
+ FileUtils.rm(file_path) if File.exist?(file_path)
1871
+ raise
1872
+ end
1873
+
1874
+ # Merge all specified input pdfs and return the resulted PDF. An asynchronous call is used.
1875
+ #
1876
+ # @return Byte array containing the resulted PDF.
1877
+ def save_async
1878
+ @parameters['files_no'] = @file_idx
1879
+
1880
+ job_id = start_async_job_multipart_form_data
1881
+
1882
+ if job_id.nil? || job_id.empty?
1883
+ raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
1884
+ end
1885
+
1886
+ no_pings = 0
1887
+
1888
+ while no_pings < @async_calls_max_pings
1889
+ no_pings += 1
1890
+
1891
+ # sleep for a few seconds before next ping
1892
+ sleep(@async_calls_ping_interval)
1893
+
1894
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
1895
+ async_job_client.api_endpoint = @api_async_endpoint
1896
+
1897
+ result = async_job_client.result
1898
+
1899
+ next if result.nil?
1900
+
1901
+ @number_of_pages = async_job_client.number_of_pages
1902
+ @file_idx = 0
1903
+ @files = {}
1904
+
1905
+ return result
1906
+ end
1907
+
1908
+ @file_idx = 0
1909
+ @files = {}
1910
+
1911
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'), 'Asynchronous call did not finish in expected timeframe.'
1912
+ end
1913
+
1914
+ # Merge all specified input pdfs and writes the resulted PDF to a specified stream. An asynchronous call is used.
1915
+ #
1916
+ # @param stream The output stream where the resulted PDF will be written.
1917
+ def save_to_stream_async(stream)
1918
+ result = save_async
1919
+ stream.write(result)
1920
+ end
1921
+
1922
+ # Merge all specified input pdfs and writes the resulted PDF to a local file. An asynchronous call is used.
1923
+ #
1924
+ # @param file_path Local file including path if necessary.
1925
+ def save_to_file_async(file_path)
1926
+ result = save_async
1927
+ File.open(file_path, 'wb') do |file|
1928
+ file.write(result)
1929
+ end
1930
+ rescue ApiException
1931
+ FileUtils.rm(file_path) if File.exist?(file_path)
1932
+ raise
1933
+ end
1934
+
1935
+ # Set the PDF document title.
1936
+ #
1937
+ # @param doc_title Document title.
1938
+ def doc_title=(doc_title)
1939
+ @parameters['doc_title'] = doc_title
1940
+ end
1941
+
1942
+ # Set the subject of the PDF document.
1943
+ #
1944
+ # @param doc_subject Document subject.
1945
+ def doc_subject=(doc_subject)
1946
+ @parameters['doc_subject'] = doc_subject
1947
+ end
1948
+
1949
+ # Set the PDF document keywords.
1950
+ #
1951
+ # @param doc_keywords Document keywords.
1952
+ def doc_keywords=(doc_keywords)
1953
+ @parameters['doc_keywords'] = doc_keywords
1954
+ end
1955
+
1956
+ # Set the name of the PDF document author.
1957
+ #
1958
+ # @param doc_author Document author.
1959
+ def doc_author=(doc_author)
1960
+ @parameters['doc_author'] = doc_author
1961
+ end
1962
+
1963
+ # Add the date and time when the PDF document was created to the PDF document information. The default value is False.
1964
+ #
1965
+ # @param doc_add_creation_date Add creation date to the document metadata or not.
1966
+ def doc_add_creation_date=(doc_add_creation_date)
1967
+ @parameters['doc_add_creation_date'] = doc_add_creation_date
1968
+ end
1969
+
1970
+ # Set the page layout to be used when the document is opened in a PDF viewer. The default value is SelectPdf::PageLayout::ONE_COLUMN.
1971
+ #
1972
+ # @param viewer_page_layout Page layout. Possible values: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).
1973
+ # Use constants from SelectPdf::PageLayout class.
1974
+ def viewer_page_layout=(viewer_page_layout)
1975
+ unless [0, 1, 2, 3].include?(viewer_page_layout)
1976
+ raise ApiException.new('Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).'), 'Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).'
1977
+ end
1978
+
1979
+ @parameters['viewer_page_layout'] = viewer_page_layout
1980
+ end
1981
+
1982
+ # Set the document page mode when the pdf document is opened in a PDF viewer. The default value is SelectPdf::PageMode::USE_NONE.
1983
+ #
1984
+ # @param viewer_page_mode Page mode. Possible values: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).
1985
+ # Use constants from SelectPdf::PageMode class.
1986
+ def viewer_page_mode=(viewer_page_mode)
1987
+ unless [0, 1, 2, 3, 4, 5].include?(viewer_page_mode)
1988
+ raise ApiException.new('Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).'),
1989
+ 'Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).'
1990
+ end
1991
+
1992
+ @parameters['viewer_page_mode'] = viewer_page_mode
1993
+ end
1994
+
1995
+ # Set a flag specifying whether to position the document's window in the center of the screen. The default value is False.
1996
+ #
1997
+ # @param viewer_center_window Center window or not.
1998
+ def viewer_center_window=(viewer_center_window)
1999
+ @parameters['viewer_center_window'] = viewer_center_window
2000
+ end
2001
+
2002
+ # Set a flag specifying whether the window's title bar should display the document title taken from document information. The default value is False.
2003
+ #
2004
+ # @param viewer_display_doc_title Display title or not.
2005
+ def viewer_display_doc_title=(viewer_display_doc_title)
2006
+ @parameters['viewer_display_doc_title'] = viewer_display_doc_title
2007
+ end
2008
+
2009
+ # Set a flag specifying whether to resize the document's window to fit the size of the first displayed page. The default value is False.
2010
+ #
2011
+ # @param viewer_fit_window Fit window or not.
2012
+ def viewer_fit_window=(viewer_fit_window)
2013
+ @parameters['viewer_fit_window'] = viewer_fit_window
2014
+ end
2015
+
2016
+ # Set a flag specifying whether to hide the pdf viewer application's menu bar when the document is active. The default value is False.
2017
+ #
2018
+ # @param viewer_hide_menu_bar Hide menu bar or not.
2019
+ def viewer_hide_menu_bar=(viewer_hide_menu_bar)
2020
+ @parameters['viewer_hide_menu_bar'] = viewer_hide_menu_bar
2021
+ end
2022
+
2023
+ # Set a flag specifying whether to hide the pdf viewer application's tool bars when the document is active. The default value is False.
2024
+ #
2025
+ # @param viewer_hide_toolbar Hide tool bars or not.
2026
+ def viewer_hide_toolbar=(viewer_hide_toolbar)
2027
+ @parameters['viewer_hide_toolbar'] = viewer_hide_toolbar
2028
+ end
2029
+
2030
+ # Set a flag specifying whether to hide user interface elements in the document's window (such as scroll bars and navigation controls), leaving only the document's contents displayed.
2031
+ #
2032
+ # @param viewer_hide_window_ui Hide window UI or not.
2033
+ def viewer_hide_window_ui=(viewer_hide_window_ui)
2034
+ @parameters['viewer_hide_window_ui'] = viewer_hide_window_ui
2035
+ end
2036
+
2037
+ # Set PDF user password.
2038
+ #
2039
+ # @param user_password PDF user password.
2040
+ def user_password=(user_password)
2041
+ @parameters['user_password'] = user_password
2042
+ end
2043
+
2044
+ # Set PDF owner password.
2045
+ #
2046
+ # @param owner_password PDF owner password.
2047
+ def owner_password=(owner_password)
2048
+ @parameters['owner_password'] = owner_password
2049
+ end
2050
+
2051
+ # Set the maximum amount of time (in seconds) for this job.
2052
+ # The default value is 30 seconds. Use a larger value (up to 120 seconds allowed) for large documents.
2053
+ #
2054
+ # @param timeout Timeout in seconds.
2055
+ def timeout=(timeout)
2056
+ @parameters['timeout'] = timeout
2057
+ end
2058
+
2059
+ # Set a custom parameter. Do not use this method unless advised by SelectPdf.
2060
+ #
2061
+ # @param parameter_name Parameter name.
2062
+ # @param parameter_value Parameter value.
2063
+ def set_custom_parameter(parameter_name, parameter_value)
2064
+ @parameters[parameter_name] = parameter_value
2065
+ end
2066
+ end
2067
+
2068
+ # Pdf To Text Conversion with SelectPdf Online API.
2069
+ #
2070
+ # Code Sample for PDF To Text
2071
+ #
2072
+ # require 'selectpdf'
2073
+ #
2074
+ # $stdout.sync = true
2075
+ #
2076
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
2077
+ #
2078
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
2079
+ # test_pdf = 'Input.pdf'
2080
+ # local_file = 'Result.txt'
2081
+ # api_key = 'Your API key here'
2082
+ #
2083
+ # begin
2084
+ # client = SelectPdf::PdfToTextClient.new(api_key)
2085
+ #
2086
+ # # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
2087
+ # client.start_page = 1 # start page (processing starts from here)
2088
+ # client.end_page = 0 # end page (set 0 to process file til the end)
2089
+ # client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
2090
+ #
2091
+ # print "Starting pdf to text ...\n"
2092
+ #
2093
+ # # convert local pdf to local text file
2094
+ # client.text_from_file_to_file(test_pdf, local_file)
2095
+ #
2096
+ # # extract text from local pdf to memory
2097
+ # # text = client.text_from_file(test_pdf)
2098
+ # # print text
2099
+ #
2100
+ # # convert pdf from public url to local text file
2101
+ # # client.text_from_url_to_file(test_url, local_file)
2102
+ #
2103
+ # # extract text from pdf from public url to memory
2104
+ # # text = client.text_from_url(test_url)
2105
+ # # print text
2106
+ #
2107
+ # print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
2108
+ #
2109
+ # # get API usage
2110
+ # usage_client = SelectPdf::UsageClient.new(api_key)
2111
+ # usage = usage_client.get_usage(FALSE)
2112
+ # print("Usage: #{usage}\n")
2113
+ # print('Conversions remained this month: ', usage['available'], "\n")
2114
+ # rescue SelectPdf::ApiException => e
2115
+ # print("An error occurred: #{e}")
2116
+ # end
2117
+ #
2118
+ # Code Sample for Search Pdf
2119
+ #
2120
+ # require 'selectpdf'
2121
+ #
2122
+ # $stdout.sync = true
2123
+ #
2124
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
2125
+ #
2126
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
2127
+ # test_pdf = 'Input.pdf'
2128
+ # api_key = 'Your API key here'
2129
+ #
2130
+ # begin
2131
+ # client = SelectPdf::PdfToTextClient.new(api_key)
2132
+ #
2133
+ # # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
2134
+ # client.start_page = 1 # start page (processing starts from here)
2135
+ # client.end_page = 0 # end page (set 0 to process file til the end)
2136
+ # client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
2137
+ #
2138
+ # print "Starting search pdf ...\n"
2139
+ #
2140
+ # # search local pdf
2141
+ # results = client.search_file(test_pdf, 'pdf')
2142
+ #
2143
+ # # search pdf from public url
2144
+ # # results = client.search_url(test_url, 'pdf')
2145
+ #
2146
+ # print "Search results: #{results}.\nSearch results count: #{results.length}\n"
2147
+ #
2148
+ # print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
2149
+ #
2150
+ # # get API usage
2151
+ # usage_client = SelectPdf::UsageClient.new(api_key)
2152
+ # usage = usage_client.get_usage(FALSE)
2153
+ # print("Usage: #{usage}\n")
2154
+ # print('Conversions remained this month: ', usage['available'], "\n")
2155
+ # rescue SelectPdf::ApiException => e
2156
+ # print("An error occurred: #{e}")
2157
+ # end
2158
+ #
2159
+ class PdfToTextClient < ApiClient
2160
+ # Construct the Pdf To Text Client.
2161
+ #
2162
+ # @param api_key API Key.
2163
+ def initialize(api_key)
2164
+ super()
2165
+ @api_endpoint = 'https://selectpdf.com/api2/pdftotext/'
2166
+ @parameters['key'] = api_key
2167
+
2168
+ @file_idx = 0
2169
+ end
2170
+
2171
+ # Get the text from the specified pdf.
2172
+ #
2173
+ # @param input_pdf Path to a local PDF file.
2174
+ # @return Extracted text.
2175
+ def text_from_file(input_pdf)
2176
+ @parameters['async'] = 'False'
2177
+ @parameters['action'] = 'Convert'
2178
+ @parameters.delete('url')
2179
+
2180
+ @files = {}
2181
+ @files['inputPdf'] = input_pdf
2182
+
2183
+ perform_post_as_multipart_formdata
2184
+ end
2185
+
2186
+ # Get the text from the specified pdf and write it to the specified text file.
2187
+ #
2188
+ # @param input_pdf Path to a local PDF file.
2189
+ # @param output_file_path The output file where the resulted text will be written.
2190
+ def text_from_file_to_file(input_pdf, output_file_path)
2191
+ result = text_from_file(input_pdf)
2192
+ File.open(output_file_path, 'wb') do |file|
2193
+ file.write(result)
2194
+ end
2195
+ rescue ApiException
2196
+ FileUtils.rm(output_file_path) if File.exist?(output_file_path)
2197
+ raise
2198
+ end
2199
+
2200
+ # Get the text from the specified pdf and write it to the specified stream.
2201
+ #
2202
+ # @param input_pdf Path to a local PDF file.
2203
+ # @param stream The output stream where the resulted PDF will be written.
2204
+ def text_from_file_to_stream(input_pdf, stream)
2205
+ result = text_from_file(input_pdf)
2206
+ stream.write(result)
2207
+ end
2208
+
2209
+ # Get the text from the specified pdf with an asynchronous call.
2210
+ #
2211
+ # @param input_pdf Path to a local PDF file.
2212
+ # @return Extracted text.
2213
+ def text_from_file_async(input_pdf)
2214
+ @parameters['action'] = 'Convert'
2215
+ @parameters.delete('url')
2216
+
2217
+ @files = {}
2218
+ @files['inputPdf'] = input_pdf
2219
+
2220
+ job_id = start_async_job_multipart_form_data
2221
+
2222
+ if job_id.nil? || job_id.empty?
2223
+ raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
2224
+ end
2225
+
2226
+ no_pings = 0
2227
+
2228
+ while no_pings < @async_calls_max_pings
2229
+ no_pings += 1
2230
+
2231
+ # sleep for a few seconds before next ping
2232
+ sleep(@async_calls_ping_interval)
2233
+
2234
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
2235
+ async_job_client.api_endpoint = @api_async_endpoint
2236
+
2237
+ result = async_job_client.result
2238
+
2239
+ next if result.nil?
2240
+
2241
+ @number_of_pages = async_job_client.number_of_pages
2242
+
2243
+ return result
2244
+ end
2245
+
2246
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'), 'Asynchronous call did not finish in expected timeframe.'
2247
+ end
2248
+
2249
+ # Get the text from the specified pdf with an asynchronous call and write it to the specified text file.
2250
+ #
2251
+ # @param input_pdf Path to a local PDF file.
2252
+ # @param output_file_path The output file where the resulted text will be written.
2253
+ def text_from_file_to_file_async(input_pdf, output_file_path)
2254
+ result = text_from_file_async(input_pdf)
2255
+ File.open(output_file_path, 'wb') do |file|
2256
+ file.write(result)
2257
+ end
2258
+ rescue ApiException
2259
+ FileUtils.rm(output_file_path) if File.exist?(output_file_path)
2260
+ raise
2261
+ end
2262
+
2263
+ # Get the text from the specified pdf with an asynchronous call and write it to the specified stream.
2264
+ #
2265
+ # @param input_pdf Path to a local PDF file.
2266
+ # @param stream The output stream where the resulted PDF will be written.
2267
+ def text_from_file_to_stream_async(input_pdf, stream)
2268
+ result = text_from_file_async(input_pdf)
2269
+ stream.write(result)
2270
+ end
2271
+
2272
+ # Get the text from the specified pdf.
2273
+ #
2274
+ # @param url Address of the PDF file.
2275
+ # @return Extracted text.
2276
+ def text_from_url(url)
2277
+ if !url.downcase.start_with?('http://') && !url.downcase.start_with?('https://')
2278
+ raise ApiException.new('The supported protocols for the PDFs available online are http:// and https://.'),
2279
+ 'The supported protocols for the PDFs available online are http:// and https://.'
2280
+ end
2281
+
2282
+ if url.downcase.start_with?('http://localhost')
2283
+ raise ApiException.new('Cannot convert local urls via this method. Use getTextFromFile instead.'),
2284
+ 'Cannot convert local urls via this method. Use text_from_file instead.'
2285
+ end
2286
+
2287
+ @parameters['async'] = 'False'
2288
+ @parameters['action'] = 'Convert'
2289
+
2290
+ @files = {}
2291
+ @parameters['url'] = url
2292
+
2293
+ perform_post_as_multipart_formdata
2294
+ end
2295
+
2296
+ # Get the text from the specified pdf and write it to the specified text file.
2297
+ #
2298
+ # @param url Address of the PDF file.
2299
+ # @param output_file_path The output file where the resulted text will be written.
2300
+ def text_from_url_to_file(url, output_file_path)
2301
+ result = text_from_url(url)
2302
+ File.open(output_file_path, 'wb') do |file|
2303
+ file.write(result)
2304
+ end
2305
+ rescue ApiException
2306
+ FileUtils.rm(output_file_path) if File.exist?(output_file_path)
2307
+ raise
2308
+ end
2309
+
2310
+ # Get the text from the specified pdf and write it to the specified stream.
2311
+ #
2312
+ # @param url Address of the PDF file.
2313
+ # @param stream The output stream where the resulted PDF will be written.
2314
+ def text_from_url_to_stream(url, stream)
2315
+ result = text_from_url(url)
2316
+ stream.write(result)
2317
+ end
2318
+
2319
+ # Get the text from the specified pdf with an asynchronous call.
2320
+ #
2321
+ # @param url Address of the PDF file.
2322
+ # @return Extracted text.
2323
+ def text_from_url_async(url)
2324
+ if !url.downcase.start_with?('http://') && !url.downcase.start_with?('https://')
2325
+ raise ApiException.new('The supported protocols for the PDFs available online are http:// and https://.'),
2326
+ 'The supported protocols for the PDFs available online are http:// and https://.'
2327
+ end
2328
+
2329
+ if url.downcase.start_with?('http://localhost')
2330
+ raise ApiException.new('Cannot convert local urls via this method. Use getTextFromFile instead.'),
2331
+ 'Cannot convert local urls via this method. Use text_from_file_async instead.'
2332
+ end
2333
+
2334
+ @parameters['action'] = 'Convert'
2335
+
2336
+ @files = {}
2337
+ @parameters['url'] = url
2338
+
2339
+ job_id = start_async_job_multipart_form_data
2340
+
2341
+ if job_id.nil? || job_id.empty?
2342
+ raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
2343
+ end
2344
+
2345
+ no_pings = 0
2346
+
2347
+ while no_pings < @async_calls_max_pings
2348
+ no_pings += 1
2349
+
2350
+ # sleep for a few seconds before next ping
2351
+ sleep(@async_calls_ping_interval)
2352
+
2353
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
2354
+ async_job_client.api_endpoint = @api_async_endpoint
2355
+
2356
+ result = async_job_client.result
2357
+
2358
+ next if result.nil?
2359
+
2360
+ @number_of_pages = async_job_client.number_of_pages
2361
+
2362
+ return result
2363
+ end
2364
+
2365
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
2366
+ 'Asynchronous call did not finish in expected timeframe.'
2367
+ end
2368
+
2369
+ # Get the text from the specified pdf with an asynchronous call and write it to the specified text file.
2370
+ #
2371
+ # @param url Address of the PDF file.
2372
+ # @param output_file_path The output file where the resulted text will be written.
2373
+ def text_from_url_to_file_async(url, output_file_path)
2374
+ result = text_from_url_async(url)
2375
+ File.open(output_file_path, 'wb') do |file|
2376
+ file.write(result)
2377
+ end
2378
+ rescue ApiException
2379
+ FileUtils.rm(output_file_path) if File.exist?(output_file_path)
2380
+ raise
2381
+ end
2382
+
2383
+ # Get the text from the specified pdf with an asynchronous call and write it to the specified stream.
2384
+ #
2385
+ # @param url Address of the PDF file.
2386
+ # @param stream The output stream where the resulted PDF will be written.
2387
+ def text_from_url_to_stream_async(url, stream)
2388
+ result = text_from_url_async(url)
2389
+ stream.write(result)
2390
+ end
2391
+
2392
+ # Search for a specific text in a PDF document.
2393
+ # Pages that participate to this operation are specified by start_page and end_page methods.
2394
+ #
2395
+ # @param input_pdf Path to a local PDF file.
2396
+ # @param text_to_search Text to search.
2397
+ # @param case_sensitive If the search is case sensitive or not.
2398
+ # @param whole_words_only If the search works on whole words or not.
2399
+ # @return List with text positions in the current PDF document.
2400
+ def search_file(input_pdf, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
2401
+ if text_to_search.nil? || text_to_search.empty?
2402
+ raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
2403
+ end
2404
+
2405
+ @parameters['async'] = 'False'
2406
+ @parameters['action'] = 'Search'
2407
+ @parameters.delete('url')
2408
+ @parameters['search_text'] = text_to_search
2409
+ @parameters['case_sensitive'] = case_sensitive
2410
+ @parameters['whole_words_only'] = whole_words_only
2411
+
2412
+ @files = {}
2413
+ @files['inputPdf'] = input_pdf
2414
+
2415
+ @headers['Accept'] = 'text/json'
2416
+
2417
+ result = perform_post_as_multipart_formdata
2418
+ return [] if result.nil? || result.empty?
2419
+
2420
+ JSON.parse(result)
2421
+ end
2422
+
2423
+ # Search for a specific text in a PDF document with an asynchronous call.
2424
+ # Pages that participate to this operation are specified by start_page and end_page methods.
2425
+ #
2426
+ # @param input_pdf Path to a local PDF file.
2427
+ # @param text_to_search Text to search.
2428
+ # @param case_sensitive If the search is case sensitive or not.
2429
+ # @param whole_words_only If the search works on whole words or not.
2430
+ # @return List with text positions in the current PDF document.
2431
+ def search_file_async(input_pdf, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
2432
+ if text_to_search.nil? || text_to_search.empty?
2433
+ raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
2434
+ end
2435
+
2436
+ @parameters['action'] = 'Search'
2437
+ @parameters.delete('url')
2438
+ @parameters['search_text'] = text_to_search
2439
+ @parameters['case_sensitive'] = case_sensitive
2440
+ @parameters['whole_words_only'] = whole_words_only
2441
+
2442
+ @files = {}
2443
+ @files['inputPdf'] = input_pdf
2444
+
2445
+ @headers['Accept'] = 'text/json'
2446
+
2447
+ job_id = start_async_job_multipart_form_data
2448
+
2449
+ if job_id.nil? || job_id.empty?
2450
+ raise ApiException.new('An error occurred launching the asynchronous call.'),
2451
+ 'An error occurred launching the asynchronous call.'
2452
+ end
2453
+
2454
+ no_pings = 0
2455
+
2456
+ while no_pings < @async_calls_max_pings
2457
+ no_pings += 1
2458
+
2459
+ # sleep for a few seconds before next ping
2460
+ sleep(@async_calls_ping_interval)
2461
+
2462
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
2463
+ async_job_client.api_endpoint = @api_async_endpoint
2464
+
2465
+ result = async_job_client.result
2466
+
2467
+ next if result.nil?
2468
+
2469
+ @number_of_pages = async_job_client.number_of_pages
2470
+ return [] if result.empty?
2471
+
2472
+ return JSON.parse(result)
2473
+ end
2474
+
2475
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
2476
+ 'Asynchronous call did not finish in expected timeframe.'
2477
+ end
2478
+
2479
+ # Search for a specific text in a PDF document.
2480
+ # Pages that participate to this operation are specified by start_page and end_page methods.
2481
+ #
2482
+ # @param url Address of the PDF file.
2483
+ # @param text_to_search Text to search.
2484
+ # @param case_sensitive If the search is case sensitive or not.
2485
+ # @param whole_words_only If the search works on whole words or not.
2486
+ # @return List with text positions in the current PDF document.
2487
+ def search_url(url, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
2488
+ if text_to_search.nil? || text_to_search.empty?
2489
+ raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
2490
+ end
2491
+
2492
+ @parameters['async'] = 'False'
2493
+ @parameters['action'] = 'Search'
2494
+ @parameters['search_text'] = text_to_search
2495
+ @parameters['case_sensitive'] = case_sensitive
2496
+ @parameters['whole_words_only'] = whole_words_only
2497
+
2498
+ @files = {}
2499
+ @parameters['url'] = url
2500
+
2501
+ @headers['Accept'] = 'text/json'
2502
+
2503
+ result = perform_post_as_multipart_formdata
2504
+ return [] if result.nil? || result.empty?
2505
+
2506
+ JSON.parse(result)
2507
+ end
2508
+
2509
+ # Search for a specific text in a PDF document with an asynchronous call.
2510
+ # Pages that participate to this operation are specified by start_page and end_page methods.
2511
+ #
2512
+ # @param url Address of the PDF file.
2513
+ # @param text_to_search Text to search.
2514
+ # @param case_sensitive If the search is case sensitive or not.
2515
+ # @param whole_words_only If the search works on whole words or not.
2516
+ # @return List with text positions in the current PDF document.
2517
+ def search_url_async(url, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
2518
+ if text_to_search.nil? || text_to_search.empty?
2519
+ raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
2520
+ end
2521
+
2522
+ @parameters['action'] = 'Search'
2523
+ @parameters['search_text'] = text_to_search
2524
+ @parameters['case_sensitive'] = case_sensitive
2525
+ @parameters['whole_words_only'] = whole_words_only
2526
+
2527
+ @files = {}
2528
+ @parameters['url'] = url
2529
+
2530
+ @headers['Accept'] = 'text/json'
2531
+
2532
+ job_id = start_async_job_multipart_form_data
2533
+
2534
+ if job_id.nil? || job_id.empty?
2535
+ raise ApiException.new('An error occurred launching the asynchronous call.'),
2536
+ 'An error occurred launching the asynchronous call.'
2537
+ end
2538
+
2539
+ no_pings = 0
2540
+
2541
+ while no_pings < @async_calls_max_pings
2542
+ no_pings += 1
2543
+
2544
+ # sleep for a few seconds before next ping
2545
+ sleep(@async_calls_ping_interval)
2546
+
2547
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
2548
+ async_job_client.api_endpoint = @api_async_endpoint
2549
+
2550
+ result = async_job_client.result
2551
+
2552
+ next if result.nil?
2553
+
2554
+ @number_of_pages = async_job_client.number_of_pages
2555
+ return [] if result.empty?
2556
+
2557
+ return JSON.parse(result)
2558
+ end
2559
+
2560
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
2561
+ 'Asynchronous call did not finish in expected timeframe.'
2562
+ end
2563
+
2564
+ # Set Start Page number. Default value is 1 (first page of the document).
2565
+ #
2566
+ # @param start_page Start page number (1-based).
2567
+ def start_page=(start_page)
2568
+ @parameters['start_page'] = start_page
2569
+ end
2570
+
2571
+ # Set End Page number. Default value is 0 (process till the last page of the document).
2572
+ #
2573
+ # @param end_page End page number (1-based).
2574
+ def end_page=(end_page)
2575
+ @parameters['end_page'] = end_page
2576
+ end
2577
+
2578
+ # Set PDF user password.
2579
+ #
2580
+ # @param user_password PDF user password.
2581
+ def user_password=(user_password)
2582
+ @parameters['user_password'] = user_password
2583
+ end
2584
+
2585
+ # Set the text layout. The default value is SelectPdf::TextLayout::ORIGINAL.
2586
+ #
2587
+ # @param text_layout The text layout. Possible values: Original, Reading. Use constants from SelectPdf::TextLayout class.
2588
+ def text_layout=(text_layout)
2589
+ unless [0, 1].include?(text_layout)
2590
+ raise ApiException.new('Allowed values for Text Layout: 0 (Original), 1 (Reading).'), 'Allowed values for Text Layout: 0 (Original), 1 (Reading).'
2591
+ end
2592
+
2593
+ @parameters['text_layout'] = text_layout
2594
+ end
2595
+
2596
+ # Set the output format. The default value is SelectPdf::OutputFormat::TEXT.
2597
+ #
2598
+ # @param output_format The output format. Possible values: Text, Html. Use constants from SelectPdf::OutputFormat class.
2599
+ def output_format=(output_format)
2600
+ unless [0, 1].include?(output_format)
2601
+ raise ApiException.new('Allowed values for Output Format: 0 (Text), 1 (Html).'), 'Allowed values for Output Format: 0 (Text), 1 (Html).'
2602
+ end
2603
+
2604
+ @parameters['output_format'] = output_format
2605
+ end
2606
+
2607
+ # Set the maximum amount of time (in seconds) for this job.
2608
+ # The default value is 30 seconds. Use a larger value (up to 120 seconds allowed) for large documents.
2609
+ #
2610
+ # @param timeout Timeout in seconds.
2611
+ def timeout=(timeout)
2612
+ @parameters['timeout'] = timeout
2613
+ end
2614
+
2615
+ # Set a custom parameter. Do not use this method unless advised by SelectPdf.
2616
+ #
2617
+ # @param parameter_name Parameter name.
2618
+ # @param parameter_value Parameter value.
2619
+ def set_custom_parameter(parameter_name, parameter_value)
2620
+ @parameters[parameter_name] = parameter_value
2621
+ end
2622
+ end
1528
2623
  end