selectpdf 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/selectpdf.rb CHANGED
@@ -5,16 +5,172 @@ require 'json'
5
5
  require 'fileutils'
6
6
 
7
7
  #
8
- # SelectPdf Online REST API Ruby client library. Contains a powerful HTML to PDF converter.
8
+ # SelectPdf Online REST API Ruby client library. Contains HTML to PDF converter, PDF merge, PDF to text extractor, search PDF.
9
9
  #
10
10
  #
11
11
  # Convert HTML to PDF
12
12
  #
13
- # {include:file:samples/simple_url_to_pdf.rb}
13
+ # require 'selectpdf'
14
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
15
+ #
16
+ # url = 'https://selectpdf.com'
17
+ # local_file = 'Test.pdf'
18
+ # api_key = 'Your API key here'
19
+ #
20
+ # begin
21
+ # api = SelectPdf::HtmlToPdfClient.new(api_key)
22
+ #
23
+ # api.page_size = SelectPdf::PageSize::A4
24
+ # api.margins = 0
25
+ # api.page_numbers = FALSE
26
+ # api.page_breaks_enhanced_algorithm = TRUE
27
+ #
28
+ # api.convert_url_to_file(url, local_file)
29
+ # rescue SelectPdf::ApiException => e
30
+ # print("An error occurred: #{e}")
31
+ # end
32
+ #
33
+ # Merge PDFs from local disk or public url and save result into a file on disk.
34
+ #
35
+ # require 'selectpdf'
36
+ #
37
+ # $stdout.sync = true
38
+ #
39
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
40
+ #
41
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
42
+ # test_pdf = 'Input.pdf'
43
+ # local_file = 'Result.pdf'
44
+ # api_key = 'Your API key here'
45
+ #
46
+ # begin
47
+ # client = SelectPdf::PdfMergeClient.new(api_key)
48
+ #
49
+ # # set parameters - see full list at https://selectpdf.com/pdf-merge-api/
50
+ #
51
+ # # specify the pdf files that will be merged (order will be preserved in the final pdf)
52
+ # client.add_file(test_pdf) # add PDF from local file
53
+ # client.add_url_file(test_url) # add PDF from public url
54
+ # # client.add_file(test_pdf, 'pdf_password') # add PDF (that requires a password) from local file
55
+ # # client.add_url_file(test_url, 'pdf_password') # add PDF (that requires a password) from public url
56
+ #
57
+ # print "Starting pdf merge ...\n"
58
+ #
59
+ # # merge pdfs to local file
60
+ # client.save_to_file(local_file)
61
+ #
62
+ # # merge pdfs to memory
63
+ # # pdf = client.save
64
+ #
65
+ # print "Finished! Number of pages: #{client.number_of_pages}.\n"
66
+ #
67
+ # # get API usage
68
+ # usage_client = SelectPdf::UsageClient.new(api_key)
69
+ # usage = usage_client.get_usage(FALSE)
70
+ # print("Usage: #{usage}\n")
71
+ # print('Conversions remained this month: ', usage['available'], "\n")
72
+ # rescue SelectPdf::ApiException => e
73
+ # print("An error occurred: #{e}")
74
+ # end
75
+ #
76
+ # Extract text from PDF
77
+ #
78
+ # require 'selectpdf'
79
+ #
80
+ # $stdout.sync = true
81
+ #
82
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
83
+ #
84
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
85
+ # test_pdf = 'Input.pdf'
86
+ # local_file = 'Result.txt'
87
+ # api_key = 'Your API key here'
88
+ #
89
+ # begin
90
+ # client = SelectPdf::PdfToTextClient.new(api_key)
91
+ #
92
+ # # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
93
+ # client.start_page = 1 # start page (processing starts from here)
94
+ # client.end_page = 0 # end page (set 0 to process file til the end)
95
+ # client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
96
+ #
97
+ # print "Starting pdf to text ...\n"
98
+ #
99
+ # # convert local pdf to local text file
100
+ # client.text_from_file_to_file(test_pdf, local_file)
101
+ #
102
+ # # extract text from local pdf to memory
103
+ # # text = client.text_from_file(test_pdf)
104
+ # # print text
105
+ #
106
+ # # convert pdf from public url to local text file
107
+ # # client.text_from_url_to_file(test_url, local_file)
108
+ #
109
+ # # extract text from pdf from public url to memory
110
+ # # text = client.text_from_url(test_url)
111
+ # # print text
112
+ #
113
+ # print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
114
+ #
115
+ # # get API usage
116
+ # usage_client = SelectPdf::UsageClient.new(api_key)
117
+ # usage = usage_client.get_usage(FALSE)
118
+ # print("Usage: #{usage}\n")
119
+ # print('Conversions remained this month: ', usage['available'], "\n")
120
+ # rescue SelectPdf::ApiException => e
121
+ # print("An error occurred: #{e}")
122
+ # end
123
+ #
124
+ # Search Pdf
125
+ #
126
+ # require 'selectpdf'
127
+ #
128
+ # $stdout.sync = true
129
+ #
130
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
131
+ #
132
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
133
+ # test_pdf = 'Input.pdf'
134
+ # api_key = 'Your API key here'
135
+ #
136
+ # begin
137
+ # client = SelectPdf::PdfToTextClient.new(api_key)
138
+ #
139
+ # # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
140
+ # client.start_page = 1 # start page (processing starts from here)
141
+ # client.end_page = 0 # end page (set 0 to process file til the end)
142
+ # client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
143
+ #
144
+ # print "Starting search pdf ...\n"
145
+ #
146
+ # # search local pdf
147
+ # results = client.search_file(test_pdf, 'pdf')
148
+ #
149
+ # # search pdf from public url
150
+ # # results = client.search_url(test_url, 'pdf')
151
+ #
152
+ # print "Search results: #{results}.\nSearch results count: #{results.length}\n"
153
+ #
154
+ # print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
155
+ #
156
+ # # get API usage
157
+ # usage_client = SelectPdf::UsageClient.new(api_key)
158
+ # usage = usage_client.get_usage(FALSE)
159
+ # print("Usage: #{usage}\n")
160
+ # print('Conversions remained this month: ', usage['available'], "\n")
161
+ # rescue SelectPdf::ApiException => e
162
+ # print("An error occurred: #{e}")
163
+ # end
164
+ #
14
165
  module SelectPdf
166
+ # Multipart/form-data boundary
15
167
  MULTIPART_FORM_DATA_BOUNDARY = '------------SelectPdf_Api_Boundry_$'
168
+
169
+ # New line
16
170
  NEW_LINE = "\r\n"
17
- CLIENT_VERSION = '1.3.0'
171
+
172
+ # Library version
173
+ CLIENT_VERSION = '1.4.0'
18
174
 
19
175
  attr_reader :code, :message
20
176
  #
@@ -470,7 +626,70 @@ module SelectPdf
470
626
  #
471
627
  # Code sample:
472
628
  #
473
- # {include:file:samples/html_to_pdf_main.rb}
629
+ # require 'selectpdf'
630
+ #
631
+ # $stdout.sync = true
632
+ #
633
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
634
+ #
635
+ # url = 'https://selectpdf.com'
636
+ # local_file = 'Test.pdf'
637
+ # api_key = 'Your API key here'
638
+ #
639
+ # begin
640
+ # client = SelectPdf::HtmlToPdfClient.new(api_key)
641
+ #
642
+ # # set parameters - see full list at https://selectpdf.com/html-to-pdf-api/
643
+ #
644
+ # client.page_size = SelectPdf::PageSize::A4 # PDF page size
645
+ # client.page_orientation = SelectPdf::PageOrientation::PORTRAIT # PDF page orientation
646
+ # client.margins = 0 # PDF page margins
647
+ # client.rendering_engine = SelectPdf::RenderingEngine::WEBKIT # rendering engine
648
+ # client.conversion_delay = 1 # conversion delay
649
+ # client.navigation_timeout = 30 # navigation timeout
650
+ # client.page_numbers = FALSE # page numbers
651
+ # client.page_breaks_enhanced_algorithm = TRUE # enhanced page break algorithm
652
+ #
653
+ # # additional properties
654
+ #
655
+ # # client.use_css_print = TRUE # enable CSS media print
656
+ # # client.disable_javascript = TRUE # disable javascript
657
+ # # client.disable_internal_links = TRUE # disable internal links
658
+ # # client.disable_external_links = TRUE # disable external links
659
+ # # client.keep_images_together = TRUE # keep images together
660
+ # # client.scale_images = TRUE # scale images to create smaller pdfs
661
+ # # client.single_page_pdf = TRUE # generate a single page PDF
662
+ # # client.user_password = 'password' # secure the PDF with a password
663
+ #
664
+ # # generate automatic bookmarks
665
+ #
666
+ # # client.pdf_bookmarks_selectors = 'H1, H2' # create outlines (bookmarks) for the specified elements
667
+ # # client.viewer_page_mode = SelectPdf::PageMode::USE_OUTLINES # display outlines (bookmarks) in viewer
668
+ #
669
+ # print "Starting conversion ...\n"
670
+ #
671
+ # # convert url to file
672
+ # client.convert_url_to_file(url, local_file)
673
+ #
674
+ # # convert url to memory
675
+ # # pdf = client.convert_url(url)
676
+ #
677
+ # # convert html string to file
678
+ # # client.convert_html_string_to_file('This is some <b>html</b>.', local_file)
679
+ #
680
+ # # convert html string to memory
681
+ # # pdf = client.convert_html_string('This is some <b>html</b>.')
682
+ #
683
+ # print "Finished! Number of pages: #{client.number_of_pages}.\n"
684
+ #
685
+ # # get API usage
686
+ # usage_client = SelectPdf::UsageClient.new(api_key)
687
+ # usage = usage_client.get_usage(FALSE)
688
+ # print("Usage: #{usage}\n")
689
+ # print('Conversions remained this month: ', usage['available'], "\n")
690
+ # rescue SelectPdf::ApiException => e
691
+ # print("An error occurred: #{e}")
692
+ # end
474
693
  class HtmlToPdfClient < ApiClient
475
694
  # Construct the Html To Pdf Client.
476
695
  #
@@ -1525,4 +1744,880 @@ module SelectPdf
1525
1744
  return nil
1526
1745
  end
1527
1746
  end
1747
+
1748
+ # Pdf Merge with SelectPdf Online API.
1749
+ #
1750
+ # Code sample:
1751
+ #
1752
+ # require 'selectpdf'
1753
+ #
1754
+ # $stdout.sync = true
1755
+ #
1756
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
1757
+ #
1758
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
1759
+ # test_pdf = 'Input.pdf'
1760
+ # local_file = 'Result.pdf'
1761
+ # api_key = 'Your API key here'
1762
+ #
1763
+ # begin
1764
+ # client = SelectPdf::PdfMergeClient.new(api_key)
1765
+ #
1766
+ # # set parameters - see full list at https://selectpdf.com/pdf-merge-api/
1767
+ #
1768
+ # # specify the pdf files that will be merged (order will be preserved in the final pdf)
1769
+ # client.add_file(test_pdf) # add PDF from local file
1770
+ # client.add_url_file(test_url) # add PDF from public url
1771
+ # # client.add_file(test_pdf, 'pdf_password') # add PDF (that requires a password) from local file
1772
+ # # client.add_url_file(test_url, 'pdf_password') # add PDF (that requires a password) from public url
1773
+ #
1774
+ # print "Starting pdf merge ...\n"
1775
+ #
1776
+ # # merge pdfs to local file
1777
+ # client.save_to_file(local_file)
1778
+ #
1779
+ # # merge pdfs to memory
1780
+ # # pdf = client.save
1781
+ #
1782
+ # print "Finished! Number of pages: #{client.number_of_pages}.\n"
1783
+ #
1784
+ # # get API usage
1785
+ # usage_client = SelectPdf::UsageClient.new(api_key)
1786
+ # usage = usage_client.get_usage(FALSE)
1787
+ # print("Usage: #{usage}\n")
1788
+ # print('Conversions remained this month: ', usage['available'], "\n")
1789
+ # rescue SelectPdf::ApiException => e
1790
+ # print("An error occurred: #{e}")
1791
+ # end
1792
+ class PdfMergeClient < ApiClient
1793
+ # Construct the Pdf Merge Client.
1794
+ #
1795
+ # @param api_key API Key.
1796
+ def initialize(api_key)
1797
+ super()
1798
+ @api_endpoint = 'https://selectpdf.com/api2/pdfmerge/'
1799
+ @parameters['key'] = api_key
1800
+
1801
+ @file_idx = 0
1802
+ end
1803
+
1804
+ # Add local PDF document to the list of input files.
1805
+ #
1806
+ # @param input_pdf Path to a local PDF file.
1807
+ # @param user_password User password for the PDF document (optional).
1808
+ def add_file(input_pdf, user_password = nil)
1809
+ @file_idx += 1
1810
+
1811
+ @files["file_#{@file_idx}"] = input_pdf
1812
+ @parameters.delete("url_#{@file_idx}")
1813
+
1814
+ if user_password.nil? || user_password.empty?
1815
+ @parameters.delete("password_#{@file_idx}")
1816
+ else
1817
+ @parameters["password_#{@file_idx}"] = user_password
1818
+ end
1819
+ end
1820
+
1821
+ # Add remote PDF document to the list of input files.
1822
+ #
1823
+ # @param input_url Url of a remote PDF file.
1824
+ # @param user_password User password for the PDF document (optional).
1825
+ def add_url_file(input_url, user_password = nil)
1826
+ @file_idx += 1
1827
+
1828
+ @parameters["url_#{@file_idx}"] = input_url
1829
+ @files.delete("file_#{@file_idx}")
1830
+
1831
+ if user_password.nil? || user_password.empty?
1832
+ @parameters.delete("password_#{@file_idx}")
1833
+ else
1834
+ @parameters["password_#{@file_idx}"] = user_password
1835
+ end
1836
+ end
1837
+
1838
+ # Merge all specified input pdfs and return the resulted PDF.
1839
+ #
1840
+ # @return Byte array containing the resulted PDF.
1841
+ def save
1842
+ @parameters['async'] = 'False'
1843
+ @parameters['files_no'] = @file_idx
1844
+
1845
+ result = perform_post_as_multipart_formdata
1846
+
1847
+ @file_idx = 0
1848
+ @files = {}
1849
+
1850
+ result
1851
+ end
1852
+
1853
+ # Merge all specified input pdfs and writes the resulted PDF to a specified stream.
1854
+ #
1855
+ # @param stream The output stream where the resulted PDF will be written.
1856
+ def save_to_stream(stream)
1857
+ result = save
1858
+ stream.write(result)
1859
+ end
1860
+
1861
+ # Merge all specified input pdfs and writes the resulted PDF to a local file.
1862
+ #
1863
+ # @param file_path Local file including path if necessary.
1864
+ def save_to_file(file_path)
1865
+ result = save
1866
+ File.open(file_path, 'wb') do |file|
1867
+ file.write(result)
1868
+ end
1869
+ rescue ApiException
1870
+ FileUtils.rm(file_path) if File.exist?(file_path)
1871
+ raise
1872
+ end
1873
+
1874
+ # Merge all specified input pdfs and return the resulted PDF. An asynchronous call is used.
1875
+ #
1876
+ # @return Byte array containing the resulted PDF.
1877
+ def save_async
1878
+ @parameters['files_no'] = @file_idx
1879
+
1880
+ job_id = start_async_job_multipart_form_data
1881
+
1882
+ if job_id.nil? || job_id.empty?
1883
+ raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
1884
+ end
1885
+
1886
+ no_pings = 0
1887
+
1888
+ while no_pings < @async_calls_max_pings
1889
+ no_pings += 1
1890
+
1891
+ # sleep for a few seconds before next ping
1892
+ sleep(@async_calls_ping_interval)
1893
+
1894
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
1895
+ async_job_client.api_endpoint = @api_async_endpoint
1896
+
1897
+ result = async_job_client.result
1898
+
1899
+ next if result.nil?
1900
+
1901
+ @number_of_pages = async_job_client.number_of_pages
1902
+ @file_idx = 0
1903
+ @files = {}
1904
+
1905
+ return result
1906
+ end
1907
+
1908
+ @file_idx = 0
1909
+ @files = {}
1910
+
1911
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'), 'Asynchronous call did not finish in expected timeframe.'
1912
+ end
1913
+
1914
+ # Merge all specified input pdfs and writes the resulted PDF to a specified stream. An asynchronous call is used.
1915
+ #
1916
+ # @param stream The output stream where the resulted PDF will be written.
1917
+ def save_to_stream_async(stream)
1918
+ result = save_async
1919
+ stream.write(result)
1920
+ end
1921
+
1922
+ # Merge all specified input pdfs and writes the resulted PDF to a local file. An asynchronous call is used.
1923
+ #
1924
+ # @param file_path Local file including path if necessary.
1925
+ def save_to_file_async(file_path)
1926
+ result = save_async
1927
+ File.open(file_path, 'wb') do |file|
1928
+ file.write(result)
1929
+ end
1930
+ rescue ApiException
1931
+ FileUtils.rm(file_path) if File.exist?(file_path)
1932
+ raise
1933
+ end
1934
+
1935
+ # Set the PDF document title.
1936
+ #
1937
+ # @param doc_title Document title.
1938
+ def doc_title=(doc_title)
1939
+ @parameters['doc_title'] = doc_title
1940
+ end
1941
+
1942
+ # Set the subject of the PDF document.
1943
+ #
1944
+ # @param doc_subject Document subject.
1945
+ def doc_subject=(doc_subject)
1946
+ @parameters['doc_subject'] = doc_subject
1947
+ end
1948
+
1949
+ # Set the PDF document keywords.
1950
+ #
1951
+ # @param doc_keywords Document keywords.
1952
+ def doc_keywords=(doc_keywords)
1953
+ @parameters['doc_keywords'] = doc_keywords
1954
+ end
1955
+
1956
+ # Set the name of the PDF document author.
1957
+ #
1958
+ # @param doc_author Document author.
1959
+ def doc_author=(doc_author)
1960
+ @parameters['doc_author'] = doc_author
1961
+ end
1962
+
1963
+ # Add the date and time when the PDF document was created to the PDF document information. The default value is False.
1964
+ #
1965
+ # @param doc_add_creation_date Add creation date to the document metadata or not.
1966
+ def doc_add_creation_date=(doc_add_creation_date)
1967
+ @parameters['doc_add_creation_date'] = doc_add_creation_date
1968
+ end
1969
+
1970
+ # Set the page layout to be used when the document is opened in a PDF viewer. The default value is SelectPdf::PageLayout::ONE_COLUMN.
1971
+ #
1972
+ # @param viewer_page_layout Page layout. Possible values: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).
1973
+ # Use constants from SelectPdf::PageLayout class.
1974
+ def viewer_page_layout=(viewer_page_layout)
1975
+ unless [0, 1, 2, 3].include?(viewer_page_layout)
1976
+ raise ApiException.new('Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).'), 'Allowed values for Page Layout: 0 (Single Page), 1 (One Column), 2 (Two Column Left), 3 (Two Column Right).'
1977
+ end
1978
+
1979
+ @parameters['viewer_page_layout'] = viewer_page_layout
1980
+ end
1981
+
1982
+ # Set the document page mode when the pdf document is opened in a PDF viewer. The default value is SelectPdf::PageMode::USE_NONE.
1983
+ #
1984
+ # @param viewer_page_mode Page mode. Possible values: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).
1985
+ # Use constants from SelectPdf::PageMode class.
1986
+ def viewer_page_mode=(viewer_page_mode)
1987
+ unless [0, 1, 2, 3, 4, 5].include?(viewer_page_mode)
1988
+ raise ApiException.new('Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).'),
1989
+ 'Allowed values for Page Mode: 0 (Use None), 1 (Use Outlines), 2 (Use Thumbs), 3 (Full Screen), 4 (Use OC), 5 (Use Attachments).'
1990
+ end
1991
+
1992
+ @parameters['viewer_page_mode'] = viewer_page_mode
1993
+ end
1994
+
1995
+ # Set a flag specifying whether to position the document's window in the center of the screen. The default value is False.
1996
+ #
1997
+ # @param viewer_center_window Center window or not.
1998
+ def viewer_center_window=(viewer_center_window)
1999
+ @parameters['viewer_center_window'] = viewer_center_window
2000
+ end
2001
+
2002
+ # Set a flag specifying whether the window's title bar should display the document title taken from document information. The default value is False.
2003
+ #
2004
+ # @param viewer_display_doc_title Display title or not.
2005
+ def viewer_display_doc_title=(viewer_display_doc_title)
2006
+ @parameters['viewer_display_doc_title'] = viewer_display_doc_title
2007
+ end
2008
+
2009
+ # Set a flag specifying whether to resize the document's window to fit the size of the first displayed page. The default value is False.
2010
+ #
2011
+ # @param viewer_fit_window Fit window or not.
2012
+ def viewer_fit_window=(viewer_fit_window)
2013
+ @parameters['viewer_fit_window'] = viewer_fit_window
2014
+ end
2015
+
2016
+ # Set a flag specifying whether to hide the pdf viewer application's menu bar when the document is active. The default value is False.
2017
+ #
2018
+ # @param viewer_hide_menu_bar Hide menu bar or not.
2019
+ def viewer_hide_menu_bar=(viewer_hide_menu_bar)
2020
+ @parameters['viewer_hide_menu_bar'] = viewer_hide_menu_bar
2021
+ end
2022
+
2023
+ # Set a flag specifying whether to hide the pdf viewer application's tool bars when the document is active. The default value is False.
2024
+ #
2025
+ # @param viewer_hide_toolbar Hide tool bars or not.
2026
+ def viewer_hide_toolbar=(viewer_hide_toolbar)
2027
+ @parameters['viewer_hide_toolbar'] = viewer_hide_toolbar
2028
+ end
2029
+
2030
+ # Set a flag specifying whether to hide user interface elements in the document's window (such as scroll bars and navigation controls), leaving only the document's contents displayed.
2031
+ #
2032
+ # @param viewer_hide_window_ui Hide window UI or not.
2033
+ def viewer_hide_window_ui=(viewer_hide_window_ui)
2034
+ @parameters['viewer_hide_window_ui'] = viewer_hide_window_ui
2035
+ end
2036
+
2037
+ # Set PDF user password.
2038
+ #
2039
+ # @param user_password PDF user password.
2040
+ def user_password=(user_password)
2041
+ @parameters['user_password'] = user_password
2042
+ end
2043
+
2044
+ # Set PDF owner password.
2045
+ #
2046
+ # @param owner_password PDF owner password.
2047
+ def owner_password=(owner_password)
2048
+ @parameters['owner_password'] = owner_password
2049
+ end
2050
+
2051
+ # Set the maximum amount of time (in seconds) for this job.
2052
+ # The default value is 30 seconds. Use a larger value (up to 120 seconds allowed) for large documents.
2053
+ #
2054
+ # @param timeout Timeout in seconds.
2055
+ def timeout=(timeout)
2056
+ @parameters['timeout'] = timeout
2057
+ end
2058
+
2059
+ # Set a custom parameter. Do not use this method unless advised by SelectPdf.
2060
+ #
2061
+ # @param parameter_name Parameter name.
2062
+ # @param parameter_value Parameter value.
2063
+ def set_custom_parameter(parameter_name, parameter_value)
2064
+ @parameters[parameter_name] = parameter_value
2065
+ end
2066
+ end
2067
+
2068
+ # Pdf To Text Conversion with SelectPdf Online API.
2069
+ #
2070
+ # Code Sample for PDF To Text
2071
+ #
2072
+ # require 'selectpdf'
2073
+ #
2074
+ # $stdout.sync = true
2075
+ #
2076
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
2077
+ #
2078
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
2079
+ # test_pdf = 'Input.pdf'
2080
+ # local_file = 'Result.txt'
2081
+ # api_key = 'Your API key here'
2082
+ #
2083
+ # begin
2084
+ # client = SelectPdf::PdfToTextClient.new(api_key)
2085
+ #
2086
+ # # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
2087
+ # client.start_page = 1 # start page (processing starts from here)
2088
+ # client.end_page = 0 # end page (set 0 to process file til the end)
2089
+ # client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
2090
+ #
2091
+ # print "Starting pdf to text ...\n"
2092
+ #
2093
+ # # convert local pdf to local text file
2094
+ # client.text_from_file_to_file(test_pdf, local_file)
2095
+ #
2096
+ # # extract text from local pdf to memory
2097
+ # # text = client.text_from_file(test_pdf)
2098
+ # # print text
2099
+ #
2100
+ # # convert pdf from public url to local text file
2101
+ # # client.text_from_url_to_file(test_url, local_file)
2102
+ #
2103
+ # # extract text from pdf from public url to memory
2104
+ # # text = client.text_from_url(test_url)
2105
+ # # print text
2106
+ #
2107
+ # print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
2108
+ #
2109
+ # # get API usage
2110
+ # usage_client = SelectPdf::UsageClient.new(api_key)
2111
+ # usage = usage_client.get_usage(FALSE)
2112
+ # print("Usage: #{usage}\n")
2113
+ # print('Conversions remained this month: ', usage['available'], "\n")
2114
+ # rescue SelectPdf::ApiException => e
2115
+ # print("An error occurred: #{e}")
2116
+ # end
2117
+ #
2118
+ # Code Sample for Search Pdf
2119
+ #
2120
+ # require 'selectpdf'
2121
+ #
2122
+ # $stdout.sync = true
2123
+ #
2124
+ # print "This is SelectPdf-#{SelectPdf::CLIENT_VERSION}\n"
2125
+ #
2126
+ # test_url = 'https://selectpdf.com/demo/files/selectpdf.pdf'
2127
+ # test_pdf = 'Input.pdf'
2128
+ # api_key = 'Your API key here'
2129
+ #
2130
+ # begin
2131
+ # client = SelectPdf::PdfToTextClient.new(api_key)
2132
+ #
2133
+ # # set parameters - see full list at https://selectpdf.com/pdf-to-text-api/
2134
+ # client.start_page = 1 # start page (processing starts from here)
2135
+ # client.end_page = 0 # end page (set 0 to process file til the end)
2136
+ # client.output_format = SelectPdf::OutputFormat::TEXT # set output format (Text or HTML)
2137
+ #
2138
+ # print "Starting search pdf ...\n"
2139
+ #
2140
+ # # search local pdf
2141
+ # results = client.search_file(test_pdf, 'pdf')
2142
+ #
2143
+ # # search pdf from public url
2144
+ # # results = client.search_url(test_url, 'pdf')
2145
+ #
2146
+ # print "Search results: #{results}.\nSearch results count: #{results.length}\n"
2147
+ #
2148
+ # print "Finished! Number of pages processed: #{client.number_of_pages}.\n"
2149
+ #
2150
+ # # get API usage
2151
+ # usage_client = SelectPdf::UsageClient.new(api_key)
2152
+ # usage = usage_client.get_usage(FALSE)
2153
+ # print("Usage: #{usage}\n")
2154
+ # print('Conversions remained this month: ', usage['available'], "\n")
2155
+ # rescue SelectPdf::ApiException => e
2156
+ # print("An error occurred: #{e}")
2157
+ # end
2158
+ #
2159
+ class PdfToTextClient < ApiClient
2160
+ # Construct the Pdf To Text Client.
2161
+ #
2162
+ # @param api_key API Key.
2163
+ def initialize(api_key)
2164
+ super()
2165
+ @api_endpoint = 'https://selectpdf.com/api2/pdftotext/'
2166
+ @parameters['key'] = api_key
2167
+
2168
+ @file_idx = 0
2169
+ end
2170
+
2171
+ # Get the text from the specified pdf.
2172
+ #
2173
+ # @param input_pdf Path to a local PDF file.
2174
+ # @return Extracted text.
2175
+ def text_from_file(input_pdf)
2176
+ @parameters['async'] = 'False'
2177
+ @parameters['action'] = 'Convert'
2178
+ @parameters.delete('url')
2179
+
2180
+ @files = {}
2181
+ @files['inputPdf'] = input_pdf
2182
+
2183
+ perform_post_as_multipart_formdata
2184
+ end
2185
+
2186
+ # Get the text from the specified pdf and write it to the specified text file.
2187
+ #
2188
+ # @param input_pdf Path to a local PDF file.
2189
+ # @param output_file_path The output file where the resulted text will be written.
2190
+ def text_from_file_to_file(input_pdf, output_file_path)
2191
+ result = text_from_file(input_pdf)
2192
+ File.open(output_file_path, 'wb') do |file|
2193
+ file.write(result)
2194
+ end
2195
+ rescue ApiException
2196
+ FileUtils.rm(output_file_path) if File.exist?(output_file_path)
2197
+ raise
2198
+ end
2199
+
2200
+ # Get the text from the specified pdf and write it to the specified stream.
2201
+ #
2202
+ # @param input_pdf Path to a local PDF file.
2203
+ # @param stream The output stream where the resulted PDF will be written.
2204
+ def text_from_file_to_stream(input_pdf, stream)
2205
+ result = text_from_file(input_pdf)
2206
+ stream.write(result)
2207
+ end
2208
+
2209
+ # Get the text from the specified pdf with an asynchronous call.
2210
+ #
2211
+ # @param input_pdf Path to a local PDF file.
2212
+ # @return Extracted text.
2213
+ def text_from_file_async(input_pdf)
2214
+ @parameters['action'] = 'Convert'
2215
+ @parameters.delete('url')
2216
+
2217
+ @files = {}
2218
+ @files['inputPdf'] = input_pdf
2219
+
2220
+ job_id = start_async_job_multipart_form_data
2221
+
2222
+ if job_id.nil? || job_id.empty?
2223
+ raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
2224
+ end
2225
+
2226
+ no_pings = 0
2227
+
2228
+ while no_pings < @async_calls_max_pings
2229
+ no_pings += 1
2230
+
2231
+ # sleep for a few seconds before next ping
2232
+ sleep(@async_calls_ping_interval)
2233
+
2234
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
2235
+ async_job_client.api_endpoint = @api_async_endpoint
2236
+
2237
+ result = async_job_client.result
2238
+
2239
+ next if result.nil?
2240
+
2241
+ @number_of_pages = async_job_client.number_of_pages
2242
+
2243
+ return result
2244
+ end
2245
+
2246
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'), 'Asynchronous call did not finish in expected timeframe.'
2247
+ end
2248
+
2249
+ # Get the text from the specified pdf with an asynchronous call and write it to the specified text file.
2250
+ #
2251
+ # @param input_pdf Path to a local PDF file.
2252
+ # @param output_file_path The output file where the resulted text will be written.
2253
+ def text_from_file_to_file_async(input_pdf, output_file_path)
2254
+ result = text_from_file_async(input_pdf)
2255
+ File.open(output_file_path, 'wb') do |file|
2256
+ file.write(result)
2257
+ end
2258
+ rescue ApiException
2259
+ FileUtils.rm(output_file_path) if File.exist?(output_file_path)
2260
+ raise
2261
+ end
2262
+
2263
+ # Get the text from the specified pdf with an asynchronous call and write it to the specified stream.
2264
+ #
2265
+ # @param input_pdf Path to a local PDF file.
2266
+ # @param stream The output stream where the resulted PDF will be written.
2267
+ def text_from_file_to_stream_async(input_pdf, stream)
2268
+ result = text_from_file_async(input_pdf)
2269
+ stream.write(result)
2270
+ end
2271
+
2272
+ # Get the text from the specified pdf.
2273
+ #
2274
+ # @param url Address of the PDF file.
2275
+ # @return Extracted text.
2276
+ def text_from_url(url)
2277
+ if !url.downcase.start_with?('http://') && !url.downcase.start_with?('https://')
2278
+ raise ApiException.new('The supported protocols for the PDFs available online are http:// and https://.'),
2279
+ 'The supported protocols for the PDFs available online are http:// and https://.'
2280
+ end
2281
+
2282
+ if url.downcase.start_with?('http://localhost')
2283
+ raise ApiException.new('Cannot convert local urls via this method. Use getTextFromFile instead.'),
2284
+ 'Cannot convert local urls via this method. Use text_from_file instead.'
2285
+ end
2286
+
2287
+ @parameters['async'] = 'False'
2288
+ @parameters['action'] = 'Convert'
2289
+
2290
+ @files = {}
2291
+ @parameters['url'] = url
2292
+
2293
+ perform_post_as_multipart_formdata
2294
+ end
2295
+
2296
+ # Get the text from the specified pdf and write it to the specified text file.
2297
+ #
2298
+ # @param url Address of the PDF file.
2299
+ # @param output_file_path The output file where the resulted text will be written.
2300
+ def text_from_url_to_file(url, output_file_path)
2301
+ result = text_from_url(url)
2302
+ File.open(output_file_path, 'wb') do |file|
2303
+ file.write(result)
2304
+ end
2305
+ rescue ApiException
2306
+ FileUtils.rm(output_file_path) if File.exist?(output_file_path)
2307
+ raise
2308
+ end
2309
+
2310
+ # Get the text from the specified pdf and write it to the specified stream.
2311
+ #
2312
+ # @param url Address of the PDF file.
2313
+ # @param stream The output stream where the resulted PDF will be written.
2314
+ def text_from_url_to_stream(url, stream)
2315
+ result = text_from_url(url)
2316
+ stream.write(result)
2317
+ end
2318
+
2319
+ # Get the text from the specified pdf with an asynchronous call.
2320
+ #
2321
+ # @param url Address of the PDF file.
2322
+ # @return Extracted text.
2323
+ def text_from_url_async(url)
2324
+ if !url.downcase.start_with?('http://') && !url.downcase.start_with?('https://')
2325
+ raise ApiException.new('The supported protocols for the PDFs available online are http:// and https://.'),
2326
+ 'The supported protocols for the PDFs available online are http:// and https://.'
2327
+ end
2328
+
2329
+ if url.downcase.start_with?('http://localhost')
2330
+ raise ApiException.new('Cannot convert local urls via this method. Use getTextFromFile instead.'),
2331
+ 'Cannot convert local urls via this method. Use text_from_file_async instead.'
2332
+ end
2333
+
2334
+ @parameters['action'] = 'Convert'
2335
+
2336
+ @files = {}
2337
+ @parameters['url'] = url
2338
+
2339
+ job_id = start_async_job_multipart_form_data
2340
+
2341
+ if job_id.nil? || job_id.empty?
2342
+ raise ApiException.new('An error occurred launching the asynchronous call.'), 'An error occurred launching the asynchronous call.'
2343
+ end
2344
+
2345
+ no_pings = 0
2346
+
2347
+ while no_pings < @async_calls_max_pings
2348
+ no_pings += 1
2349
+
2350
+ # sleep for a few seconds before next ping
2351
+ sleep(@async_calls_ping_interval)
2352
+
2353
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
2354
+ async_job_client.api_endpoint = @api_async_endpoint
2355
+
2356
+ result = async_job_client.result
2357
+
2358
+ next if result.nil?
2359
+
2360
+ @number_of_pages = async_job_client.number_of_pages
2361
+
2362
+ return result
2363
+ end
2364
+
2365
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
2366
+ 'Asynchronous call did not finish in expected timeframe.'
2367
+ end
2368
+
2369
+ # Get the text from the specified pdf with an asynchronous call and write it to the specified text file.
2370
+ #
2371
+ # @param url Address of the PDF file.
2372
+ # @param output_file_path The output file where the resulted text will be written.
2373
+ def text_from_url_to_file_async(url, output_file_path)
2374
+ result = text_from_url_async(url)
2375
+ File.open(output_file_path, 'wb') do |file|
2376
+ file.write(result)
2377
+ end
2378
+ rescue ApiException
2379
+ FileUtils.rm(output_file_path) if File.exist?(output_file_path)
2380
+ raise
2381
+ end
2382
+
2383
+ # Get the text from the specified pdf with an asynchronous call and write it to the specified stream.
2384
+ #
2385
+ # @param url Address of the PDF file.
2386
+ # @param stream The output stream where the resulted PDF will be written.
2387
+ def text_from_url_to_stream_async(url, stream)
2388
+ result = text_from_url_async(url)
2389
+ stream.write(result)
2390
+ end
2391
+
2392
+ # Search for a specific text in a PDF document.
2393
+ # Pages that participate to this operation are specified by start_page and end_page methods.
2394
+ #
2395
+ # @param input_pdf Path to a local PDF file.
2396
+ # @param text_to_search Text to search.
2397
+ # @param case_sensitive If the search is case sensitive or not.
2398
+ # @param whole_words_only If the search works on whole words or not.
2399
+ # @return List with text positions in the current PDF document.
2400
+ def search_file(input_pdf, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
2401
+ if text_to_search.nil? || text_to_search.empty?
2402
+ raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
2403
+ end
2404
+
2405
+ @parameters['async'] = 'False'
2406
+ @parameters['action'] = 'Search'
2407
+ @parameters.delete('url')
2408
+ @parameters['search_text'] = text_to_search
2409
+ @parameters['case_sensitive'] = case_sensitive
2410
+ @parameters['whole_words_only'] = whole_words_only
2411
+
2412
+ @files = {}
2413
+ @files['inputPdf'] = input_pdf
2414
+
2415
+ @headers['Accept'] = 'text/json'
2416
+
2417
+ result = perform_post_as_multipart_formdata
2418
+ return [] if result.nil? || result.empty?
2419
+
2420
+ JSON.parse(result)
2421
+ end
2422
+
2423
+ # Search for a specific text in a PDF document with an asynchronous call.
2424
+ # Pages that participate to this operation are specified by start_page and end_page methods.
2425
+ #
2426
+ # @param input_pdf Path to a local PDF file.
2427
+ # @param text_to_search Text to search.
2428
+ # @param case_sensitive If the search is case sensitive or not.
2429
+ # @param whole_words_only If the search works on whole words or not.
2430
+ # @return List with text positions in the current PDF document.
2431
+ def search_file_async(input_pdf, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
2432
+ if text_to_search.nil? || text_to_search.empty?
2433
+ raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
2434
+ end
2435
+
2436
+ @parameters['action'] = 'Search'
2437
+ @parameters.delete('url')
2438
+ @parameters['search_text'] = text_to_search
2439
+ @parameters['case_sensitive'] = case_sensitive
2440
+ @parameters['whole_words_only'] = whole_words_only
2441
+
2442
+ @files = {}
2443
+ @files['inputPdf'] = input_pdf
2444
+
2445
+ @headers['Accept'] = 'text/json'
2446
+
2447
+ job_id = start_async_job_multipart_form_data
2448
+
2449
+ if job_id.nil? || job_id.empty?
2450
+ raise ApiException.new('An error occurred launching the asynchronous call.'),
2451
+ 'An error occurred launching the asynchronous call.'
2452
+ end
2453
+
2454
+ no_pings = 0
2455
+
2456
+ while no_pings < @async_calls_max_pings
2457
+ no_pings += 1
2458
+
2459
+ # sleep for a few seconds before next ping
2460
+ sleep(@async_calls_ping_interval)
2461
+
2462
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
2463
+ async_job_client.api_endpoint = @api_async_endpoint
2464
+
2465
+ result = async_job_client.result
2466
+
2467
+ next if result.nil?
2468
+
2469
+ @number_of_pages = async_job_client.number_of_pages
2470
+ return [] if result.empty?
2471
+
2472
+ return JSON.parse(result)
2473
+ end
2474
+
2475
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
2476
+ 'Asynchronous call did not finish in expected timeframe.'
2477
+ end
2478
+
2479
+ # Search for a specific text in a PDF document.
2480
+ # Pages that participate to this operation are specified by start_page and end_page methods.
2481
+ #
2482
+ # @param url Address of the PDF file.
2483
+ # @param text_to_search Text to search.
2484
+ # @param case_sensitive If the search is case sensitive or not.
2485
+ # @param whole_words_only If the search works on whole words or not.
2486
+ # @return List with text positions in the current PDF document.
2487
+ def search_url(url, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
2488
+ if text_to_search.nil? || text_to_search.empty?
2489
+ raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
2490
+ end
2491
+
2492
+ @parameters['async'] = 'False'
2493
+ @parameters['action'] = 'Search'
2494
+ @parameters['search_text'] = text_to_search
2495
+ @parameters['case_sensitive'] = case_sensitive
2496
+ @parameters['whole_words_only'] = whole_words_only
2497
+
2498
+ @files = {}
2499
+ @parameters['url'] = url
2500
+
2501
+ @headers['Accept'] = 'text/json'
2502
+
2503
+ result = perform_post_as_multipart_formdata
2504
+ return [] if result.nil? || result.empty?
2505
+
2506
+ JSON.parse(result)
2507
+ end
2508
+
2509
+ # Search for a specific text in a PDF document with an asynchronous call.
2510
+ # Pages that participate to this operation are specified by start_page and end_page methods.
2511
+ #
2512
+ # @param url Address of the PDF file.
2513
+ # @param text_to_search Text to search.
2514
+ # @param case_sensitive If the search is case sensitive or not.
2515
+ # @param whole_words_only If the search works on whole words or not.
2516
+ # @return List with text positions in the current PDF document.
2517
+ def search_url_async(url, text_to_search, case_sensitive = FALSE, whole_words_only = FALSE)
2518
+ if text_to_search.nil? || text_to_search.empty?
2519
+ raise ApiException.new('Search text cannot be empty.'), 'Search text cannot be empty.'
2520
+ end
2521
+
2522
+ @parameters['action'] = 'Search'
2523
+ @parameters['search_text'] = text_to_search
2524
+ @parameters['case_sensitive'] = case_sensitive
2525
+ @parameters['whole_words_only'] = whole_words_only
2526
+
2527
+ @files = {}
2528
+ @parameters['url'] = url
2529
+
2530
+ @headers['Accept'] = 'text/json'
2531
+
2532
+ job_id = start_async_job_multipart_form_data
2533
+
2534
+ if job_id.nil? || job_id.empty?
2535
+ raise ApiException.new('An error occurred launching the asynchronous call.'),
2536
+ 'An error occurred launching the asynchronous call.'
2537
+ end
2538
+
2539
+ no_pings = 0
2540
+
2541
+ while no_pings < @async_calls_max_pings
2542
+ no_pings += 1
2543
+
2544
+ # sleep for a few seconds before next ping
2545
+ sleep(@async_calls_ping_interval)
2546
+
2547
+ async_job_client = AsyncJobClient.new(@parameters['key'], @job_id)
2548
+ async_job_client.api_endpoint = @api_async_endpoint
2549
+
2550
+ result = async_job_client.result
2551
+
2552
+ next if result.nil?
2553
+
2554
+ @number_of_pages = async_job_client.number_of_pages
2555
+ return [] if result.empty?
2556
+
2557
+ return JSON.parse(result)
2558
+ end
2559
+
2560
+ raise ApiException.new('Asynchronous call did not finish in expected timeframe.'),
2561
+ 'Asynchronous call did not finish in expected timeframe.'
2562
+ end
2563
+
2564
+ # Set Start Page number. Default value is 1 (first page of the document).
2565
+ #
2566
+ # @param start_page Start page number (1-based).
2567
+ def start_page=(start_page)
2568
+ @parameters['start_page'] = start_page
2569
+ end
2570
+
2571
+ # Set End Page number. Default value is 0 (process till the last page of the document).
2572
+ #
2573
+ # @param end_page End page number (1-based).
2574
+ def end_page=(end_page)
2575
+ @parameters['end_page'] = end_page
2576
+ end
2577
+
2578
+ # Set PDF user password.
2579
+ #
2580
+ # @param user_password PDF user password.
2581
+ def user_password=(user_password)
2582
+ @parameters['user_password'] = user_password
2583
+ end
2584
+
2585
+ # Set the text layout. The default value is SelectPdf::TextLayout::ORIGINAL.
2586
+ #
2587
+ # @param text_layout The text layout. Possible values: Original, Reading. Use constants from SelectPdf::TextLayout class.
2588
+ def text_layout=(text_layout)
2589
+ unless [0, 1].include?(text_layout)
2590
+ raise ApiException.new('Allowed values for Text Layout: 0 (Original), 1 (Reading).'), 'Allowed values for Text Layout: 0 (Original), 1 (Reading).'
2591
+ end
2592
+
2593
+ @parameters['text_layout'] = text_layout
2594
+ end
2595
+
2596
+ # Set the output format. The default value is SelectPdf::OutputFormat::TEXT.
2597
+ #
2598
+ # @param output_format The output format. Possible values: Text, Html. Use constants from SelectPdf::OutputFormat class.
2599
+ def output_format=(output_format)
2600
+ unless [0, 1].include?(output_format)
2601
+ raise ApiException.new('Allowed values for Output Format: 0 (Text), 1 (Html).'), 'Allowed values for Output Format: 0 (Text), 1 (Html).'
2602
+ end
2603
+
2604
+ @parameters['output_format'] = output_format
2605
+ end
2606
+
2607
+ # Set the maximum amount of time (in seconds) for this job.
2608
+ # The default value is 30 seconds. Use a larger value (up to 120 seconds allowed) for large documents.
2609
+ #
2610
+ # @param timeout Timeout in seconds.
2611
+ def timeout=(timeout)
2612
+ @parameters['timeout'] = timeout
2613
+ end
2614
+
2615
+ # Set a custom parameter. Do not use this method unless advised by SelectPdf.
2616
+ #
2617
+ # @param parameter_name Parameter name.
2618
+ # @param parameter_value Parameter value.
2619
+ def set_custom_parameter(parameter_name, parameter_value)
2620
+ @parameters[parameter_name] = parameter_value
2621
+ end
2622
+ end
1528
2623
  end