pdfium 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4f0a20b6dc310130f33f3fe36394b4cadc0bbf10
4
- data.tar.gz: 5af66006fa995e366e59c4798e8585e20ca04914
3
+ metadata.gz: 90ada68ee9bb62299a1efee382547acc23f87284
4
+ data.tar.gz: cfde2cce4a21b04710aac0609cca271424ab25ce
5
5
  SHA512:
6
- metadata.gz: bedd33815a4a6c4761a47b4c2f3679093c641554789cfa236365a2c6681352bca6c0e43b0a2a7b12ffdd1c29839c78d71cd3280bbc5e17afec52749e6f208677
7
- data.tar.gz: 966a05f3938c7433a4ba925a7c8a07fe0093a02639f7272b11e38876a78879a9b06abb48aa213a2e92e203152e635d104faafda464c11216ae804890ef1068db
6
+ metadata.gz: fbb57c9b0da747032ab1969b33c0c3dbe3d1848d960cc5021bb49abb7544c953c323e47cac359788196679ce0cbbf65bdcb6d8bfec1f01ed8f4bd50cf4129366
7
+ data.tar.gz: 0f13a3d72c9f5577de9d6d479f0d48e019fe1daf640528dd09259ad8ae2b156b43d97f2f72d320853d6367b22b3f7ee66a6e3d0105aee2acd48d25b23f149e17
data/.gitignore CHANGED
@@ -1,4 +1,7 @@
1
1
  .bundle
2
+ .yardoc
3
+ doc
4
+ pkg
2
5
  Gemfile.lock
3
6
  *.bundle
4
7
  *.so
@@ -0,0 +1,3 @@
1
+ lib/pdfium*
2
+ ext/pdfium_ext/*.cc
3
+ --no-private
data/README.md CHANGED
@@ -6,6 +6,33 @@ It currently has only very rudimantary PDF editing capabilities.
6
6
 
7
7
  RDoc documentation is also available and the test directory has examples of usage.
8
8
 
9
+ ## In memory render and extraction
10
+
11
+ ```ruby
12
+ # Assuming AWS::S3 is already authorized elsewhere
13
+ bucket = AWS::S3.new.buckets['my-pdfs']
14
+
15
+ pdf = PDFium::Document.from_memory bucket.objects['secrets.pdf'].read
16
+ pdf.pages.each do | page |
17
+
18
+ # render the complete page as a PNG with the height locked to 1000 pixels
19
+ # The width will be calculated to maintain the proper aspect ratio
20
+ path = "secrets/page-#{page.number}.png"
21
+ bucket.objects[path].write page.as_image(height: 1000).data(:png)
22
+
23
+ # extract and save each embedded image as a PNG
24
+ page.images.each do | image |
25
+ path = "secrets/page-#{page.number}-image-#{image.index}.png"
26
+ bucket.objects[path].write image.data(:png)
27
+ end
28
+
29
+ # Extract text from page. Will be encoded as UTF-16LE by default
30
+ path = "secrets/page-#{page.number}-text.txt"
31
+ bucket.objects[path].write page.text
32
+
33
+ end
34
+ ```
35
+
9
36
  ## Open and saveing
10
37
 
11
38
  ```ruby
@@ -28,7 +55,6 @@ pdf.metadata
28
55
  Returns a hash with keys = :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
29
56
 
30
57
 
31
-
32
58
  ## Bookmarks
33
59
 
34
60
  ```ruby
@@ -200,15 +200,12 @@ bookmark_destination(VALUE self)
200
200
  return hash;
201
201
  }
202
202
 
203
- VALUE
203
+ void
204
204
  define_bookmark_class(){
205
205
 
206
- #if RDOC_IS_STUPID_AND_CANNOT_PARSE_DOCUMENTATION
207
- VALUE RB_PDFium = rb_define_module("PDFium");
208
- #endif
209
- VALUE RB_PDFium = RB::PDFium();
206
+ VALUE PDFium = RB::PDFium();
210
207
 
211
- VALUE RB_Bookmark = rb_define_class_under(RB_PDFium, "Bookmark", rb_cObject);
208
+ VALUE RB_Bookmark = rb_define_class_under(PDFium, "Bookmark", rb_cObject);
212
209
  rb_define_alloc_func(RB_Bookmark, bookmark_allocate);
213
210
 
214
211
  rb_define_private_method (RB_Bookmark, "initialize", RUBY_METHOD_FUNC(bookmark_initialize), 1);
@@ -217,5 +214,4 @@ define_bookmark_class(){
217
214
  rb_define_method (RB_Bookmark, "children", RUBY_METHOD_FUNC(bookmark_children), 0);
218
215
  rb_define_method (RB_Bookmark, "destination", RUBY_METHOD_FUNC(bookmark_destination), 0);
219
216
 
220
- return RB_Bookmark;
221
217
  }
@@ -10,14 +10,8 @@
10
10
  #include "buffer_file_write.hpp"
11
11
 
12
12
  /////////////////////////////////////////////////////////////////////////
13
- // The Document class //
13
+ // The Document class //
14
14
  /////////////////////////////////////////////////////////////////////////
15
- /*
16
- * Document-class: PDFium::Document
17
- *
18
- * A Document represents a PDF file.
19
- *
20
- */
21
15
 
22
16
 
23
17
  // While you might think this would free the Document object it does not
@@ -74,7 +68,7 @@ document_initialize(int argc, VALUE *argv, VALUE self)
74
68
  *
75
69
  * Initializes a document from a binary string.
76
70
  *
77
- * See Image#data for an example of reading a PDF directly from Amazon S3
71
+ * See {PDFium::Image#data} for an example of reading a PDF directly from Amazon S3
78
72
  * and writing it's images completely in memory.
79
73
  */
80
74
  static VALUE
@@ -86,8 +80,8 @@ document_from_memory(VALUE klass, VALUE data){
86
80
  }
87
81
 
88
82
  /*
89
- * call-seq:
90
- * page_count -> Fixnum
83
+ * @overload page_count
84
+ * page_count -> Fixnum
91
85
  *
92
86
  * Returns the number of pages on a Document
93
87
  */
@@ -98,7 +92,9 @@ document_page_count(VALUE self)
98
92
  }
99
93
 
100
94
  // Not documented in favor of the Document#pages[] access
101
- /* :nodoc: */
95
+ /*
96
+ @private
97
+ */
102
98
  static VALUE
103
99
  document_page_at(VALUE self, VALUE rb_page_index)
104
100
  {
@@ -107,7 +103,8 @@ document_page_at(VALUE self, VALUE rb_page_index)
107
103
 
108
104
  /*
109
105
  * call-seq:
110
- * pages -> PDFium::PageList
106
+ * pages
107
+ * @return {PDFium::PageList}
111
108
  *
112
109
  * Returns a collection of all the pages on the document as a PDFium::PageList. Pages
113
110
  * are lazily loaded.
@@ -123,7 +120,7 @@ document_pages(VALUE self)
123
120
 
124
121
  // creates and yields a page. Not documented since all access
125
122
  // should got through the Pageist interface via the Document#pages method
126
- /* :nodoc: */
123
+ /* @private */
127
124
  static VALUE
128
125
  document_each_page(VALUE self)
129
126
  {
@@ -143,7 +140,9 @@ document_each_page(VALUE self)
143
140
 
144
141
  /*
145
142
  * call-seq:
146
- * bookmarks -> Bookmarks
143
+ * bookmarks
144
+ *
145
+ * @return {PDFium::BookmarkList}
147
146
  *
148
147
  * Retrieves the first Bookmark for a document
149
148
  */
@@ -160,13 +159,14 @@ document_bookmarks(VALUE self)
160
159
 
161
160
 
162
161
 
163
-
164
162
  /*
165
163
  * call-seq:
166
- * save -> Document
164
+ * save(file)
165
+ *
166
+ * @param file [String, Pathname] path to save the file to
167
+ * @return [Boolean] indicating success or failure
167
168
  *
168
- * Saves document to a PDF file. This method isn't terribly useful since there aren't
169
- * (yet) methods to add content to pages.
169
+ * Retrieves the first Bookmark for a document
170
170
  */
171
171
  static VALUE
172
172
  document_save(VALUE self, VALUE _path)
@@ -244,13 +244,13 @@ document_metadata(int argc, VALUE *argv, VALUE self)
244
244
  return metadata;
245
245
  }
246
246
 
247
- VALUE
247
+ void
248
248
  define_document_class()
249
249
  {
250
- VALUE RB_PDFium = RB::PDFium();
250
+ VALUE PDFium = RB::PDFium();
251
251
 
252
252
  // The Document class definition and methods
253
- VALUE RB_Document = rb_define_class_under(RB_PDFium, "Document", rb_cObject);
253
+ VALUE RB_Document = rb_define_class_under(PDFium, "Document", rb_cObject);
254
254
 
255
255
  rb_define_alloc_func(RB_Document, document_allocate);
256
256
 
@@ -264,5 +264,4 @@ define_document_class()
264
264
  rb_define_method (RB_Document, "metadata", RUBY_METHOD_FUNC(document_metadata), -1);
265
265
  rb_define_method (RB_Document, "bookmarks", RUBY_METHOD_FUNC(document_bookmarks), 0);
266
266
  rb_define_method (RB_Document, "save", RUBY_METHOD_FUNC(document_save), 1);
267
- return RB_Document;
268
267
  }
@@ -5,53 +5,36 @@ def existing(dirs)
5
5
  dirs.select{|dir| Dir.exist?(dir) }
6
6
  end
7
7
 
8
- LIB_DIRS=[]
9
- # if ENV['PDFIUM']
10
- # LIB_DIRS = [ "#{ENV['PDFIUM']}/out/Debug/lib.target" ]
11
- # HEADER_DIRS = [
12
- # "#{ENV['PDFIUM']}/fpdfsdk/include",
13
- # "#{ENV['PDFIUM']}/core/include",
14
- # "#{ENV['PDFIUM']}"
15
- # ]
16
8
 
17
- # else
18
- # LIB_DIRS = [
19
- # "/usr/local/lib/pdfium",
20
- # "/usr/lib/pdfium"
21
- # ]
9
+ if ENV['PDFIUM']
10
+ LIB_DIRS = [ "#{ENV['PDFIUM']}/out/Debug/lib.target" ]
11
+ HEADER_DIRS = [
12
+ "#{ENV['PDFIUM']}/public",
13
+ "#{ENV['PDFIUM']}/core/include",
14
+ "#{ENV['PDFIUM']}"
15
+ ]
22
16
 
23
- # HEADER_DIRS = [
24
- # "/usr/include/pdfium",
25
- # "/usr/local/include/pdfium",
26
- # "/usr/local/include/pdfium/fpdfsdk/include",
27
- # "/usr/local/include/pdfium/core/include"
28
- # ]
29
- # end
30
-
31
- HEADER_DIRS=[
32
- "/home/nas/pdfium/deb-package/pdfium/fpdfsdk/include",
33
- "/home/nas/pdfium/deb-package/pdfium/core/include",
34
- "/home/nas/pdfium/deb-package/pdfium"
35
- ]
17
+ else
18
+ LIB_DIRS = [
19
+ "/usr/local/lib/pdfium",
20
+ "/usr/lib/pdfium"
21
+ ]
22
+
23
+ HEADER_DIRS = [
24
+ "/usr/include/pdfium",
25
+ "/usr/include/pdfium/core/include",
26
+ "/usr/local/include/pdfium",
27
+ "/usr/local/include/pdfium/core/include"
28
+ ]
29
+ end
36
30
 
37
31
  have_library('pthread')
38
32
 
39
- DEBUG = ENV['DEBUG'] == '1'
33
+ DEBUG = ENV['DEBUG']
40
34
 
41
35
  $CPPFLAGS += " -Wall "
42
- $CPPFLAGS += " -g" #if DEBUG
36
+ $CPPFLAGS += " -g" if DEBUG
43
37
 
44
- # The order that the libs are listed matters for Linux!
45
- # to debug missing symbols you can run:
46
- # for l in `ls /usr/lib/pdfium/*.a`; do echo $l; nm $l | grep '<missing symbol>'; done
47
- # The listing with a "T" contains the symbol, the ones with a "U"
48
- # depend on it. The "U" libs must come after the "T"
49
- # LIBS=%w{
50
- # javascript bigint freetype fpdfdoc fpdftext formfiller icudata icuuc
51
- # icui18n v8_libbase v8_base v8_snapshot v8_libplatform jsapi pdfwindow fxedit
52
- # fxcrt fxcodec fpdfdoc fdrm fxge fpdfapi freetype pdfium
53
- # pthread freeimage
54
- # }
55
38
  LIBS=%w{pdfium freeimage}
56
39
 
57
40
  dir_config("libs", existing(HEADER_DIRS), existing(LIB_DIRS))
@@ -9,9 +9,9 @@
9
9
  * Document-class: PDFium::Image
10
10
  *
11
11
  * A Image can represent either a Page that
12
- * has been rendered to a Image via Page#as_image
12
+ * has been rendered to a Image via {PDFium::Page#as_image}
13
13
  *
14
- * Or an embedded image on a Page, obtained via Page#images
14
+ * Or an embedded image on a {PDFium::Page}, obtained via {PDFium::Page#images}
15
15
  */
16
16
 
17
17
 
@@ -215,10 +215,14 @@ image_as_science(VALUE self){
215
215
  }
216
216
 
217
217
  /*
218
- * call-seq:
219
- * save( file ) -> Boolean
220
- *
221
- * Save image to a file
218
+ call-seq:
219
+ save( file )
220
+
221
+ Save image to a file
222
+
223
+ @param file [String, Pathname] path to file to save image to
224
+ @return [Boolean] indicating success or failure
225
+
222
226
  */
223
227
  VALUE
224
228
  image_save(VALUE self, VALUE rb_file){
@@ -242,9 +246,10 @@ image_save(VALUE self, VALUE rb_file){
242
246
 
243
247
  /*
244
248
  call-seq:
245
- data(:format) -> Binary String
249
+ data(:format)
246
250
 
247
- Returns the binary data for the image in the specified format.
251
+ @param format [symbol] any file extension recogized by FreeImage.
252
+ @return String containing binary data for the image in the specified format.
248
253
 
249
254
  Used in conjuction with Document.from_memory this can render be used to
250
255
  render a PDF's pages completely in memory.
@@ -301,16 +306,15 @@ image_data(VALUE self, VALUE rb_format)
301
306
  return ret;
302
307
  }
303
308
 
304
- VALUE
309
+ void
305
310
  define_image_class(){
306
- VALUE RB_PDFium = RB::PDFium();
311
+ VALUE PDFium = RB::PDFium();
312
+ VALUE RB_Image = rb_define_class_under(PDFium, "Image", rb_cObject);
307
313
 
308
-
309
- VALUE RB_Image = rb_define_class_under(RB_PDFium, "Image", rb_cObject);
310
314
  rb_define_alloc_func(RB_Image, image_allocate);
311
315
  rb_define_private_method (RB_Image, "initialize", RUBY_METHOD_FUNC(image_initialize), -1);
312
316
 
313
- /* Returns the bouding box of the image as a PDFium::BoundingBox */
317
+ /* Returns the bouding box of the image as a {PDFium::BoundingBox} */
314
318
  rb_define_attr( RB_Image, "bounds", 1, 0 );
315
319
 
316
320
  /* Returns the index of the image on the page.
@@ -328,5 +332,5 @@ define_image_class(){
328
332
  rb_define_method( RB_Image, "save", RUBY_METHOD_FUNC(image_save), 1);
329
333
  rb_define_method( RB_Image, "data", RUBY_METHOD_FUNC(image_data), 1);
330
334
  rb_define_method( RB_Image, "as_science", RUBY_METHOD_FUNC(image_as_science),0);
331
- return RB_Image;
335
+
332
336
  }
@@ -22,8 +22,7 @@ static VALUE rb_sym_width;
22
22
 
23
23
  /*
24
24
  * Document-class: PDFium::Page
25
- *
26
- * A Page on a PDF Document
25
+ * A Page on a PDFium::Document
27
26
  */
28
27
  static void
29
28
  page_gc_free(PageWrapper* page)
@@ -338,9 +337,10 @@ page_each_image(VALUE self)
338
337
  PageWrapper *pw;
339
338
  Data_Get_Struct(self, PageWrapper, pw);
340
339
 
341
- auto count = pw->page()->CountObjects();
340
+ unsigned int count = pw->page()->CountObjects();
342
341
  int image_index=0;
343
- for (int index=0; index < count; index++){
342
+
343
+ for (unsigned int index=0; index < count; index++){
344
344
  CPDF_PageObject *object = pw->page()->GetObjectByIndex(index);
345
345
  if ( PDFPAGE_IMAGE == object->m_Type ){
346
346
  VALUE args[2];
@@ -360,18 +360,18 @@ page_each_image(VALUE self)
360
360
  }
361
361
 
362
362
 
363
- VALUE
363
+
364
+ void
364
365
  define_page_class()
365
366
  {
366
367
  rb_sym_width = ID2SYM(rb_intern("width"));
367
368
  rb_sym_height = ID2SYM(rb_intern("height"));
368
369
 
369
- VALUE RB_PDFium = RB::PDFium();
370
-
371
- // The Page class definition and methods
372
- VALUE RB_Page = rb_define_class_under(RB_PDFium, "Page", rb_cObject);
373
- //rb_define_alloc_func (RB_Page, page_allocate);
374
- //rb_define_private_method (RB_Page, "initialize", RUBY_METHOD_FUNC(page_initialize), -1);
370
+ VALUE PDFium = RB::PDFium();
371
+ /*
372
+ The Page class definition and methods
373
+ */
374
+ VALUE RB_Page = rb_define_class_under( PDFium, "Page", rb_cObject);
375
375
 
376
376
  rb_define_singleton_method(RB_Page, "new", RUBY_METHOD_FUNC(page_new), 0);
377
377
  rb_define_singleton_method(RB_Page, "open", RUBY_METHOD_FUNC(page_open), 2);
@@ -388,5 +388,5 @@ define_page_class()
388
388
 
389
389
  rb_define_method (RB_Page, "each_image", RUBY_METHOD_FUNC(page_each_image), 0);
390
390
 
391
- return RB_Page;
391
+
392
392
  }
@@ -3,20 +3,17 @@
3
3
 
4
4
  #include <stdlib.h>
5
5
  #include <inttypes.h>
6
- #include <fpdf_dataavail.h>
7
- #include <fpdf_ext.h>
8
- #include <fpdfformfill.h>
9
- #include <fpdftext.h>
10
- #include <fpdfview.h>
11
- #include <fpdfedit.h>
12
- #include <fpdfsave.h>
13
- #include <fpdfdoc.h>
14
6
  #include <iostream>
15
- #include <fpdfdoc/fpdf_doc.h>
16
7
 
17
- #include <fpdfapi/fpdf_render.h>
8
+ #include <fpdf_doc.h>
9
+ #include <fpdf_save.h>
10
+ #include <fpdf_edit.h>
11
+ #include <fpdf_text.h>
12
+ #include <fpdfdoc/fpdf_doc.h>
13
+ #include <fpdfapi/fpdf_page.h>
18
14
  #include <fpdfapi/fpdf_pageobj.h>
19
- #include <fpdfsdk/include/fsdk_rendercontext.h>
15
+ #include <fpdftext/fpdf_text.h>
16
+
20
17
  #include <FreeImage.h>
21
18
 
22
19
  #include "page_wrapper.h"
@@ -35,10 +32,10 @@ extern "C" {
35
32
  #define DEBUG_MSG(str) do { } while ( false )
36
33
  #endif
37
34
 
38
- VALUE define_bookmark_class();
39
- VALUE define_document_class();
40
- VALUE define_page_class();
41
- VALUE define_image_class();
35
+ void define_bookmark_class();
36
+ void define_document_class();
37
+ void define_page_class();
38
+ void define_image_class();
42
39
 
43
40
  // a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
44
41
 
@@ -1,4 +1,4 @@
1
1
  module PDFium
2
2
  # Gem version
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Stitt
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-05-31 00:00:00.000000000 Z
12
+ date: 2015-06-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -119,6 +119,7 @@ extra_rdoc_files: []
119
119
  files:
120
120
  - ".gitignore"
121
121
  - ".ruby-version"
122
+ - ".yardopts"
122
123
  - Gemfile
123
124
  - Guardfile
124
125
  - LICENSE.txt