pdfium 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4f0a20b6dc310130f33f3fe36394b4cadc0bbf10
4
- data.tar.gz: 5af66006fa995e366e59c4798e8585e20ca04914
3
+ metadata.gz: 90ada68ee9bb62299a1efee382547acc23f87284
4
+ data.tar.gz: cfde2cce4a21b04710aac0609cca271424ab25ce
5
5
  SHA512:
6
- metadata.gz: bedd33815a4a6c4761a47b4c2f3679093c641554789cfa236365a2c6681352bca6c0e43b0a2a7b12ffdd1c29839c78d71cd3280bbc5e17afec52749e6f208677
7
- data.tar.gz: 966a05f3938c7433a4ba925a7c8a07fe0093a02639f7272b11e38876a78879a9b06abb48aa213a2e92e203152e635d104faafda464c11216ae804890ef1068db
6
+ metadata.gz: fbb57c9b0da747032ab1969b33c0c3dbe3d1848d960cc5021bb49abb7544c953c323e47cac359788196679ce0cbbf65bdcb6d8bfec1f01ed8f4bd50cf4129366
7
+ data.tar.gz: 0f13a3d72c9f5577de9d6d479f0d48e019fe1daf640528dd09259ad8ae2b156b43d97f2f72d320853d6367b22b3f7ee66a6e3d0105aee2acd48d25b23f149e17
data/.gitignore CHANGED
@@ -1,4 +1,7 @@
1
1
  .bundle
2
+ .yardoc
3
+ doc
4
+ pkg
2
5
  Gemfile.lock
3
6
  *.bundle
4
7
  *.so
@@ -0,0 +1,3 @@
1
+ lib/pdfium*
2
+ ext/pdfium_ext/*.cc
3
+ --no-private
data/README.md CHANGED
@@ -6,6 +6,33 @@ It currently has only very rudimantary PDF editing capabilities.
6
6
 
7
7
  RDoc documentation is also available and the test directory has examples of usage.
8
8
 
9
+ ## In memory render and extraction
10
+
11
+ ```ruby
12
+ # Assuming AWS::S3 is already authorized elsewhere
13
+ bucket = AWS::S3.new.buckets['my-pdfs']
14
+
15
+ pdf = PDFium::Document.from_memory bucket.objects['secrets.pdf'].read
16
+ pdf.pages.each do | page |
17
+
18
+ # render the complete page as a PNG with the height locked to 1000 pixels
19
+ # The width will be calculated to maintain the proper aspect ratio
20
+ path = "secrets/page-#{page.number}.png"
21
+ bucket.objects[path].write page.as_image(height: 1000).data(:png)
22
+
23
+ # extract and save each embedded image as a PNG
24
+ page.images.each do | image |
25
+ path = "secrets/page-#{page.number}-image-#{image.index}.png"
26
+ bucket.objects[path].write image.data(:png)
27
+ end
28
+
29
+ # Extract text from page. Will be encoded as UTF-16LE by default
30
+ path = "secrets/page-#{page.number}-text.txt"
31
+ bucket.objects[path].write page.text
32
+
33
+ end
34
+ ```
35
+
9
36
  ## Open and saveing
10
37
 
11
38
  ```ruby
@@ -28,7 +55,6 @@ pdf.metadata
28
55
  Returns a hash with keys = :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
29
56
 
30
57
 
31
-
32
58
  ## Bookmarks
33
59
 
34
60
  ```ruby
@@ -200,15 +200,12 @@ bookmark_destination(VALUE self)
200
200
  return hash;
201
201
  }
202
202
 
203
- VALUE
203
+ void
204
204
  define_bookmark_class(){
205
205
 
206
- #if RDOC_IS_STUPID_AND_CANNOT_PARSE_DOCUMENTATION
207
- VALUE RB_PDFium = rb_define_module("PDFium");
208
- #endif
209
- VALUE RB_PDFium = RB::PDFium();
206
+ VALUE PDFium = RB::PDFium();
210
207
 
211
- VALUE RB_Bookmark = rb_define_class_under(RB_PDFium, "Bookmark", rb_cObject);
208
+ VALUE RB_Bookmark = rb_define_class_under(PDFium, "Bookmark", rb_cObject);
212
209
  rb_define_alloc_func(RB_Bookmark, bookmark_allocate);
213
210
 
214
211
  rb_define_private_method (RB_Bookmark, "initialize", RUBY_METHOD_FUNC(bookmark_initialize), 1);
@@ -217,5 +214,4 @@ define_bookmark_class(){
217
214
  rb_define_method (RB_Bookmark, "children", RUBY_METHOD_FUNC(bookmark_children), 0);
218
215
  rb_define_method (RB_Bookmark, "destination", RUBY_METHOD_FUNC(bookmark_destination), 0);
219
216
 
220
- return RB_Bookmark;
221
217
  }
@@ -10,14 +10,8 @@
10
10
  #include "buffer_file_write.hpp"
11
11
 
12
12
  /////////////////////////////////////////////////////////////////////////
13
- // The Document class //
13
+ // The Document class //
14
14
  /////////////////////////////////////////////////////////////////////////
15
- /*
16
- * Document-class: PDFium::Document
17
- *
18
- * A Document represents a PDF file.
19
- *
20
- */
21
15
 
22
16
 
23
17
  // While you might think this would free the Document object it does not
@@ -74,7 +68,7 @@ document_initialize(int argc, VALUE *argv, VALUE self)
74
68
  *
75
69
  * Initializes a document from a binary string.
76
70
  *
77
- * See Image#data for an example of reading a PDF directly from Amazon S3
71
+ * See {PDFium::Image#data} for an example of reading a PDF directly from Amazon S3
78
72
  * and writing it's images completely in memory.
79
73
  */
80
74
  static VALUE
@@ -86,8 +80,8 @@ document_from_memory(VALUE klass, VALUE data){
86
80
  }
87
81
 
88
82
  /*
89
- * call-seq:
90
- * page_count -> Fixnum
83
+ * @overload page_count
84
+ * page_count -> Fixnum
91
85
  *
92
86
  * Returns the number of pages on a Document
93
87
  */
@@ -98,7 +92,9 @@ document_page_count(VALUE self)
98
92
  }
99
93
 
100
94
  // Not documented in favor of the Document#pages[] access
101
- /* :nodoc: */
95
+ /*
96
+ @private
97
+ */
102
98
  static VALUE
103
99
  document_page_at(VALUE self, VALUE rb_page_index)
104
100
  {
@@ -107,7 +103,8 @@ document_page_at(VALUE self, VALUE rb_page_index)
107
103
 
108
104
  /*
109
105
  * call-seq:
110
- * pages -> PDFium::PageList
106
+ * pages
107
+ * @return {PDFium::PageList}
111
108
  *
112
109
  * Returns a collection of all the pages on the document as a PDFium::PageList. Pages
113
110
  * are lazily loaded.
@@ -123,7 +120,7 @@ document_pages(VALUE self)
123
120
 
124
121
  // creates and yields a page. Not documented since all access
125
122
  // should got through the Pageist interface via the Document#pages method
126
- /* :nodoc: */
123
+ /* @private */
127
124
  static VALUE
128
125
  document_each_page(VALUE self)
129
126
  {
@@ -143,7 +140,9 @@ document_each_page(VALUE self)
143
140
 
144
141
  /*
145
142
  * call-seq:
146
- * bookmarks -> Bookmarks
143
+ * bookmarks
144
+ *
145
+ * @return {PDFium::BookmarkList}
147
146
  *
148
147
  * Retrieves the first Bookmark for a document
149
148
  */
@@ -160,13 +159,14 @@ document_bookmarks(VALUE self)
160
159
 
161
160
 
162
161
 
163
-
164
162
  /*
165
163
  * call-seq:
166
- * save -> Document
164
+ * save(file)
165
+ *
166
+ * @param file [String, Pathname] path to save the file to
167
+ * @return [Boolean] indicating success or failure
167
168
  *
168
- * Saves document to a PDF file. This method isn't terribly useful since there aren't
169
- * (yet) methods to add content to pages.
169
+ * Retrieves the first Bookmark for a document
170
170
  */
171
171
  static VALUE
172
172
  document_save(VALUE self, VALUE _path)
@@ -244,13 +244,13 @@ document_metadata(int argc, VALUE *argv, VALUE self)
244
244
  return metadata;
245
245
  }
246
246
 
247
- VALUE
247
+ void
248
248
  define_document_class()
249
249
  {
250
- VALUE RB_PDFium = RB::PDFium();
250
+ VALUE PDFium = RB::PDFium();
251
251
 
252
252
  // The Document class definition and methods
253
- VALUE RB_Document = rb_define_class_under(RB_PDFium, "Document", rb_cObject);
253
+ VALUE RB_Document = rb_define_class_under(PDFium, "Document", rb_cObject);
254
254
 
255
255
  rb_define_alloc_func(RB_Document, document_allocate);
256
256
 
@@ -264,5 +264,4 @@ define_document_class()
264
264
  rb_define_method (RB_Document, "metadata", RUBY_METHOD_FUNC(document_metadata), -1);
265
265
  rb_define_method (RB_Document, "bookmarks", RUBY_METHOD_FUNC(document_bookmarks), 0);
266
266
  rb_define_method (RB_Document, "save", RUBY_METHOD_FUNC(document_save), 1);
267
- return RB_Document;
268
267
  }
@@ -5,53 +5,36 @@ def existing(dirs)
5
5
  dirs.select{|dir| Dir.exist?(dir) }
6
6
  end
7
7
 
8
- LIB_DIRS=[]
9
- # if ENV['PDFIUM']
10
- # LIB_DIRS = [ "#{ENV['PDFIUM']}/out/Debug/lib.target" ]
11
- # HEADER_DIRS = [
12
- # "#{ENV['PDFIUM']}/fpdfsdk/include",
13
- # "#{ENV['PDFIUM']}/core/include",
14
- # "#{ENV['PDFIUM']}"
15
- # ]
16
8
 
17
- # else
18
- # LIB_DIRS = [
19
- # "/usr/local/lib/pdfium",
20
- # "/usr/lib/pdfium"
21
- # ]
9
+ if ENV['PDFIUM']
10
+ LIB_DIRS = [ "#{ENV['PDFIUM']}/out/Debug/lib.target" ]
11
+ HEADER_DIRS = [
12
+ "#{ENV['PDFIUM']}/public",
13
+ "#{ENV['PDFIUM']}/core/include",
14
+ "#{ENV['PDFIUM']}"
15
+ ]
22
16
 
23
- # HEADER_DIRS = [
24
- # "/usr/include/pdfium",
25
- # "/usr/local/include/pdfium",
26
- # "/usr/local/include/pdfium/fpdfsdk/include",
27
- # "/usr/local/include/pdfium/core/include"
28
- # ]
29
- # end
30
-
31
- HEADER_DIRS=[
32
- "/home/nas/pdfium/deb-package/pdfium/fpdfsdk/include",
33
- "/home/nas/pdfium/deb-package/pdfium/core/include",
34
- "/home/nas/pdfium/deb-package/pdfium"
35
- ]
17
+ else
18
+ LIB_DIRS = [
19
+ "/usr/local/lib/pdfium",
20
+ "/usr/lib/pdfium"
21
+ ]
22
+
23
+ HEADER_DIRS = [
24
+ "/usr/include/pdfium",
25
+ "/usr/include/pdfium/core/include",
26
+ "/usr/local/include/pdfium",
27
+ "/usr/local/include/pdfium/core/include"
28
+ ]
29
+ end
36
30
 
37
31
  have_library('pthread')
38
32
 
39
- DEBUG = ENV['DEBUG'] == '1'
33
+ DEBUG = ENV['DEBUG']
40
34
 
41
35
  $CPPFLAGS += " -Wall "
42
- $CPPFLAGS += " -g" #if DEBUG
36
+ $CPPFLAGS += " -g" if DEBUG
43
37
 
44
- # The order that the libs are listed matters for Linux!
45
- # to debug missing symbols you can run:
46
- # for l in `ls /usr/lib/pdfium/*.a`; do echo $l; nm $l | grep '<missing symbol>'; done
47
- # The listing with a "T" contains the symbol, the ones with a "U"
48
- # depend on it. The "U" libs must come after the "T"
49
- # LIBS=%w{
50
- # javascript bigint freetype fpdfdoc fpdftext formfiller icudata icuuc
51
- # icui18n v8_libbase v8_base v8_snapshot v8_libplatform jsapi pdfwindow fxedit
52
- # fxcrt fxcodec fpdfdoc fdrm fxge fpdfapi freetype pdfium
53
- # pthread freeimage
54
- # }
55
38
  LIBS=%w{pdfium freeimage}
56
39
 
57
40
  dir_config("libs", existing(HEADER_DIRS), existing(LIB_DIRS))
@@ -9,9 +9,9 @@
9
9
  * Document-class: PDFium::Image
10
10
  *
11
11
  * A Image can represent either a Page that
12
- * has been rendered to a Image via Page#as_image
12
+ * has been rendered to a Image via {PDFium::Page#as_image}
13
13
  *
14
- * Or an embedded image on a Page, obtained via Page#images
14
+ * Or an embedded image on a {PDFium::Page}, obtained via {PDFium::Page#images}
15
15
  */
16
16
 
17
17
 
@@ -215,10 +215,14 @@ image_as_science(VALUE self){
215
215
  }
216
216
 
217
217
  /*
218
- * call-seq:
219
- * save( file ) -> Boolean
220
- *
221
- * Save image to a file
218
+ call-seq:
219
+ save( file )
220
+
221
+ Save image to a file
222
+
223
+ @param file [String, Pathname] path to file to save image to
224
+ @return [Boolean] indicating success or failure
225
+
222
226
  */
223
227
  VALUE
224
228
  image_save(VALUE self, VALUE rb_file){
@@ -242,9 +246,10 @@ image_save(VALUE self, VALUE rb_file){
242
246
 
243
247
  /*
244
248
  call-seq:
245
- data(:format) -> Binary String
249
+ data(:format)
246
250
 
247
- Returns the binary data for the image in the specified format.
251
+ @param format [symbol] any file extension recogized by FreeImage.
252
+ @return String containing binary data for the image in the specified format.
248
253
 
249
254
  Used in conjuction with Document.from_memory this can render be used to
250
255
  render a PDF's pages completely in memory.
@@ -301,16 +306,15 @@ image_data(VALUE self, VALUE rb_format)
301
306
  return ret;
302
307
  }
303
308
 
304
- VALUE
309
+ void
305
310
  define_image_class(){
306
- VALUE RB_PDFium = RB::PDFium();
311
+ VALUE PDFium = RB::PDFium();
312
+ VALUE RB_Image = rb_define_class_under(PDFium, "Image", rb_cObject);
307
313
 
308
-
309
- VALUE RB_Image = rb_define_class_under(RB_PDFium, "Image", rb_cObject);
310
314
  rb_define_alloc_func(RB_Image, image_allocate);
311
315
  rb_define_private_method (RB_Image, "initialize", RUBY_METHOD_FUNC(image_initialize), -1);
312
316
 
313
- /* Returns the bouding box of the image as a PDFium::BoundingBox */
317
+ /* Returns the bouding box of the image as a {PDFium::BoundingBox} */
314
318
  rb_define_attr( RB_Image, "bounds", 1, 0 );
315
319
 
316
320
  /* Returns the index of the image on the page.
@@ -328,5 +332,5 @@ define_image_class(){
328
332
  rb_define_method( RB_Image, "save", RUBY_METHOD_FUNC(image_save), 1);
329
333
  rb_define_method( RB_Image, "data", RUBY_METHOD_FUNC(image_data), 1);
330
334
  rb_define_method( RB_Image, "as_science", RUBY_METHOD_FUNC(image_as_science),0);
331
- return RB_Image;
335
+
332
336
  }
@@ -22,8 +22,7 @@ static VALUE rb_sym_width;
22
22
 
23
23
  /*
24
24
  * Document-class: PDFium::Page
25
- *
26
- * A Page on a PDF Document
25
+ * A Page on a PDFium::Document
27
26
  */
28
27
  static void
29
28
  page_gc_free(PageWrapper* page)
@@ -338,9 +337,10 @@ page_each_image(VALUE self)
338
337
  PageWrapper *pw;
339
338
  Data_Get_Struct(self, PageWrapper, pw);
340
339
 
341
- auto count = pw->page()->CountObjects();
340
+ unsigned int count = pw->page()->CountObjects();
342
341
  int image_index=0;
343
- for (int index=0; index < count; index++){
342
+
343
+ for (unsigned int index=0; index < count; index++){
344
344
  CPDF_PageObject *object = pw->page()->GetObjectByIndex(index);
345
345
  if ( PDFPAGE_IMAGE == object->m_Type ){
346
346
  VALUE args[2];
@@ -360,18 +360,18 @@ page_each_image(VALUE self)
360
360
  }
361
361
 
362
362
 
363
- VALUE
363
+
364
+ void
364
365
  define_page_class()
365
366
  {
366
367
  rb_sym_width = ID2SYM(rb_intern("width"));
367
368
  rb_sym_height = ID2SYM(rb_intern("height"));
368
369
 
369
- VALUE RB_PDFium = RB::PDFium();
370
-
371
- // The Page class definition and methods
372
- VALUE RB_Page = rb_define_class_under(RB_PDFium, "Page", rb_cObject);
373
- //rb_define_alloc_func (RB_Page, page_allocate);
374
- //rb_define_private_method (RB_Page, "initialize", RUBY_METHOD_FUNC(page_initialize), -1);
370
+ VALUE PDFium = RB::PDFium();
371
+ /*
372
+ The Page class definition and methods
373
+ */
374
+ VALUE RB_Page = rb_define_class_under( PDFium, "Page", rb_cObject);
375
375
 
376
376
  rb_define_singleton_method(RB_Page, "new", RUBY_METHOD_FUNC(page_new), 0);
377
377
  rb_define_singleton_method(RB_Page, "open", RUBY_METHOD_FUNC(page_open), 2);
@@ -388,5 +388,5 @@ define_page_class()
388
388
 
389
389
  rb_define_method (RB_Page, "each_image", RUBY_METHOD_FUNC(page_each_image), 0);
390
390
 
391
- return RB_Page;
391
+
392
392
  }
@@ -3,20 +3,17 @@
3
3
 
4
4
  #include <stdlib.h>
5
5
  #include <inttypes.h>
6
- #include <fpdf_dataavail.h>
7
- #include <fpdf_ext.h>
8
- #include <fpdfformfill.h>
9
- #include <fpdftext.h>
10
- #include <fpdfview.h>
11
- #include <fpdfedit.h>
12
- #include <fpdfsave.h>
13
- #include <fpdfdoc.h>
14
6
  #include <iostream>
15
- #include <fpdfdoc/fpdf_doc.h>
16
7
 
17
- #include <fpdfapi/fpdf_render.h>
8
+ #include <fpdf_doc.h>
9
+ #include <fpdf_save.h>
10
+ #include <fpdf_edit.h>
11
+ #include <fpdf_text.h>
12
+ #include <fpdfdoc/fpdf_doc.h>
13
+ #include <fpdfapi/fpdf_page.h>
18
14
  #include <fpdfapi/fpdf_pageobj.h>
19
- #include <fpdfsdk/include/fsdk_rendercontext.h>
15
+ #include <fpdftext/fpdf_text.h>
16
+
20
17
  #include <FreeImage.h>
21
18
 
22
19
  #include "page_wrapper.h"
@@ -35,10 +32,10 @@ extern "C" {
35
32
  #define DEBUG_MSG(str) do { } while ( false )
36
33
  #endif
37
34
 
38
- VALUE define_bookmark_class();
39
- VALUE define_document_class();
40
- VALUE define_page_class();
41
- VALUE define_image_class();
35
+ void define_bookmark_class();
36
+ void define_document_class();
37
+ void define_page_class();
38
+ void define_image_class();
42
39
 
43
40
  // a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
44
41
 
@@ -1,4 +1,4 @@
1
1
  module PDFium
2
2
  # Gem version
3
- VERSION = "0.0.1"
3
+ VERSION = "0.0.2"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Stitt
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-05-31 00:00:00.000000000 Z
12
+ date: 2015-06-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -119,6 +119,7 @@ extra_rdoc_files: []
119
119
  files:
120
120
  - ".gitignore"
121
121
  - ".ruby-version"
122
+ - ".yardopts"
122
123
  - Gemfile
123
124
  - Guardfile
124
125
  - LICENSE.txt