pdfium 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.yardopts +3 -0
- data/README.md +27 -1
- data/ext/pdfium_ext/bookmark.cc +3 -7
- data/ext/pdfium_ext/document.cc +21 -22
- data/ext/pdfium_ext/extconf.rb +22 -39
- data/ext/pdfium_ext/image.cc +18 -14
- data/ext/pdfium_ext/page.cc +12 -12
- data/ext/pdfium_ext/pdfium.h +12 -15
- data/lib/pdfium/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90ada68ee9bb62299a1efee382547acc23f87284
|
4
|
+
data.tar.gz: cfde2cce4a21b04710aac0609cca271424ab25ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fbb57c9b0da747032ab1969b33c0c3dbe3d1848d960cc5021bb49abb7544c953c323e47cac359788196679ce0cbbf65bdcb6d8bfec1f01ed8f4bd50cf4129366
|
7
|
+
data.tar.gz: 0f13a3d72c9f5577de9d6d479f0d48e019fe1daf640528dd09259ad8ae2b156b43d97f2f72d320853d6367b22b3f7ee66a6e3d0105aee2acd48d25b23f149e17
|
data/.gitignore
CHANGED
data/.yardopts
ADDED
data/README.md
CHANGED
@@ -6,6 +6,33 @@ It currently has only very rudimantary PDF editing capabilities.
|
|
6
6
|
|
7
7
|
RDoc documentation is also available and the test directory has examples of usage.
|
8
8
|
|
9
|
+
## In memory render and extraction
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
# Assuming AWS::S3 is already authorized elsewhere
|
13
|
+
bucket = AWS::S3.new.buckets['my-pdfs']
|
14
|
+
|
15
|
+
pdf = PDFium::Document.from_memory bucket.objects['secrets.pdf'].read
|
16
|
+
pdf.pages.each do | page |
|
17
|
+
|
18
|
+
# render the complete page as a PNG with the height locked to 1000 pixels
|
19
|
+
# The width will be calculated to maintain the proper aspect ratio
|
20
|
+
path = "secrets/page-#{page.number}.png"
|
21
|
+
bucket.objects[path].write page.as_image(height: 1000).data(:png)
|
22
|
+
|
23
|
+
# extract and save each embedded image as a PNG
|
24
|
+
page.images.each do | image |
|
25
|
+
path = "secrets/page-#{page.number}-image-#{image.index}.png"
|
26
|
+
bucket.objects[path].write image.data(:png)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Extract text from page. Will be encoded as UTF-16LE by default
|
30
|
+
path = "secrets/page-#{page.number}-text.txt"
|
31
|
+
bucket.objects[path].write page.text
|
32
|
+
|
33
|
+
end
|
34
|
+
```
|
35
|
+
|
9
36
|
## Open and saveing
|
10
37
|
|
11
38
|
```ruby
|
@@ -28,7 +55,6 @@ pdf.metadata
|
|
28
55
|
Returns a hash with keys = :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
|
29
56
|
|
30
57
|
|
31
|
-
|
32
58
|
## Bookmarks
|
33
59
|
|
34
60
|
```ruby
|
data/ext/pdfium_ext/bookmark.cc
CHANGED
@@ -200,15 +200,12 @@ bookmark_destination(VALUE self)
|
|
200
200
|
return hash;
|
201
201
|
}
|
202
202
|
|
203
|
-
|
203
|
+
void
|
204
204
|
define_bookmark_class(){
|
205
205
|
|
206
|
-
|
207
|
-
VALUE RB_PDFium = rb_define_module("PDFium");
|
208
|
-
#endif
|
209
|
-
VALUE RB_PDFium = RB::PDFium();
|
206
|
+
VALUE PDFium = RB::PDFium();
|
210
207
|
|
211
|
-
VALUE RB_Bookmark = rb_define_class_under(
|
208
|
+
VALUE RB_Bookmark = rb_define_class_under(PDFium, "Bookmark", rb_cObject);
|
212
209
|
rb_define_alloc_func(RB_Bookmark, bookmark_allocate);
|
213
210
|
|
214
211
|
rb_define_private_method (RB_Bookmark, "initialize", RUBY_METHOD_FUNC(bookmark_initialize), 1);
|
@@ -217,5 +214,4 @@ define_bookmark_class(){
|
|
217
214
|
rb_define_method (RB_Bookmark, "children", RUBY_METHOD_FUNC(bookmark_children), 0);
|
218
215
|
rb_define_method (RB_Bookmark, "destination", RUBY_METHOD_FUNC(bookmark_destination), 0);
|
219
216
|
|
220
|
-
return RB_Bookmark;
|
221
217
|
}
|
data/ext/pdfium_ext/document.cc
CHANGED
@@ -10,14 +10,8 @@
|
|
10
10
|
#include "buffer_file_write.hpp"
|
11
11
|
|
12
12
|
/////////////////////////////////////////////////////////////////////////
|
13
|
-
// The Document class
|
13
|
+
// The Document class //
|
14
14
|
/////////////////////////////////////////////////////////////////////////
|
15
|
-
/*
|
16
|
-
* Document-class: PDFium::Document
|
17
|
-
*
|
18
|
-
* A Document represents a PDF file.
|
19
|
-
*
|
20
|
-
*/
|
21
15
|
|
22
16
|
|
23
17
|
// While you might think this would free the Document object it does not
|
@@ -74,7 +68,7 @@ document_initialize(int argc, VALUE *argv, VALUE self)
|
|
74
68
|
*
|
75
69
|
* Initializes a document from a binary string.
|
76
70
|
*
|
77
|
-
* See Image#data for an example of reading a PDF directly from Amazon S3
|
71
|
+
* See {PDFium::Image#data} for an example of reading a PDF directly from Amazon S3
|
78
72
|
* and writing it's images completely in memory.
|
79
73
|
*/
|
80
74
|
static VALUE
|
@@ -86,8 +80,8 @@ document_from_memory(VALUE klass, VALUE data){
|
|
86
80
|
}
|
87
81
|
|
88
82
|
/*
|
89
|
-
*
|
90
|
-
*
|
83
|
+
* @overload page_count
|
84
|
+
* page_count -> Fixnum
|
91
85
|
*
|
92
86
|
* Returns the number of pages on a Document
|
93
87
|
*/
|
@@ -98,7 +92,9 @@ document_page_count(VALUE self)
|
|
98
92
|
}
|
99
93
|
|
100
94
|
// Not documented in favor of the Document#pages[] access
|
101
|
-
/*
|
95
|
+
/*
|
96
|
+
@private
|
97
|
+
*/
|
102
98
|
static VALUE
|
103
99
|
document_page_at(VALUE self, VALUE rb_page_index)
|
104
100
|
{
|
@@ -107,7 +103,8 @@ document_page_at(VALUE self, VALUE rb_page_index)
|
|
107
103
|
|
108
104
|
/*
|
109
105
|
* call-seq:
|
110
|
-
* pages
|
106
|
+
* pages
|
107
|
+
* @return {PDFium::PageList}
|
111
108
|
*
|
112
109
|
* Returns a collection of all the pages on the document as a PDFium::PageList. Pages
|
113
110
|
* are lazily loaded.
|
@@ -123,7 +120,7 @@ document_pages(VALUE self)
|
|
123
120
|
|
124
121
|
// creates and yields a page. Not documented since all access
|
125
122
|
// should got through the Pageist interface via the Document#pages method
|
126
|
-
/*
|
123
|
+
/* @private */
|
127
124
|
static VALUE
|
128
125
|
document_each_page(VALUE self)
|
129
126
|
{
|
@@ -143,7 +140,9 @@ document_each_page(VALUE self)
|
|
143
140
|
|
144
141
|
/*
|
145
142
|
* call-seq:
|
146
|
-
* bookmarks
|
143
|
+
* bookmarks
|
144
|
+
*
|
145
|
+
* @return {PDFium::BookmarkList}
|
147
146
|
*
|
148
147
|
* Retrieves the first Bookmark for a document
|
149
148
|
*/
|
@@ -160,13 +159,14 @@ document_bookmarks(VALUE self)
|
|
160
159
|
|
161
160
|
|
162
161
|
|
163
|
-
|
164
162
|
/*
|
165
163
|
* call-seq:
|
166
|
-
* save
|
164
|
+
* save(file)
|
165
|
+
*
|
166
|
+
* @param file [String, Pathname] path to save the file to
|
167
|
+
* @return [Boolean] indicating success or failure
|
167
168
|
*
|
168
|
-
*
|
169
|
-
* (yet) methods to add content to pages.
|
169
|
+
* Retrieves the first Bookmark for a document
|
170
170
|
*/
|
171
171
|
static VALUE
|
172
172
|
document_save(VALUE self, VALUE _path)
|
@@ -244,13 +244,13 @@ document_metadata(int argc, VALUE *argv, VALUE self)
|
|
244
244
|
return metadata;
|
245
245
|
}
|
246
246
|
|
247
|
-
|
247
|
+
void
|
248
248
|
define_document_class()
|
249
249
|
{
|
250
|
-
VALUE
|
250
|
+
VALUE PDFium = RB::PDFium();
|
251
251
|
|
252
252
|
// The Document class definition and methods
|
253
|
-
VALUE RB_Document = rb_define_class_under(
|
253
|
+
VALUE RB_Document = rb_define_class_under(PDFium, "Document", rb_cObject);
|
254
254
|
|
255
255
|
rb_define_alloc_func(RB_Document, document_allocate);
|
256
256
|
|
@@ -264,5 +264,4 @@ define_document_class()
|
|
264
264
|
rb_define_method (RB_Document, "metadata", RUBY_METHOD_FUNC(document_metadata), -1);
|
265
265
|
rb_define_method (RB_Document, "bookmarks", RUBY_METHOD_FUNC(document_bookmarks), 0);
|
266
266
|
rb_define_method (RB_Document, "save", RUBY_METHOD_FUNC(document_save), 1);
|
267
|
-
return RB_Document;
|
268
267
|
}
|
data/ext/pdfium_ext/extconf.rb
CHANGED
@@ -5,53 +5,36 @@ def existing(dirs)
|
|
5
5
|
dirs.select{|dir| Dir.exist?(dir) }
|
6
6
|
end
|
7
7
|
|
8
|
-
LIB_DIRS=[]
|
9
|
-
# if ENV['PDFIUM']
|
10
|
-
# LIB_DIRS = [ "#{ENV['PDFIUM']}/out/Debug/lib.target" ]
|
11
|
-
# HEADER_DIRS = [
|
12
|
-
# "#{ENV['PDFIUM']}/fpdfsdk/include",
|
13
|
-
# "#{ENV['PDFIUM']}/core/include",
|
14
|
-
# "#{ENV['PDFIUM']}"
|
15
|
-
# ]
|
16
8
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
#
|
21
|
-
#
|
9
|
+
if ENV['PDFIUM']
|
10
|
+
LIB_DIRS = [ "#{ENV['PDFIUM']}/out/Debug/lib.target" ]
|
11
|
+
HEADER_DIRS = [
|
12
|
+
"#{ENV['PDFIUM']}/public",
|
13
|
+
"#{ENV['PDFIUM']}/core/include",
|
14
|
+
"#{ENV['PDFIUM']}"
|
15
|
+
]
|
22
16
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
17
|
+
else
|
18
|
+
LIB_DIRS = [
|
19
|
+
"/usr/local/lib/pdfium",
|
20
|
+
"/usr/lib/pdfium"
|
21
|
+
]
|
22
|
+
|
23
|
+
HEADER_DIRS = [
|
24
|
+
"/usr/include/pdfium",
|
25
|
+
"/usr/include/pdfium/core/include",
|
26
|
+
"/usr/local/include/pdfium",
|
27
|
+
"/usr/local/include/pdfium/core/include"
|
28
|
+
]
|
29
|
+
end
|
36
30
|
|
37
31
|
have_library('pthread')
|
38
32
|
|
39
|
-
DEBUG = ENV['DEBUG']
|
33
|
+
DEBUG = ENV['DEBUG']
|
40
34
|
|
41
35
|
$CPPFLAGS += " -Wall "
|
42
|
-
$CPPFLAGS += " -g"
|
36
|
+
$CPPFLAGS += " -g" if DEBUG
|
43
37
|
|
44
|
-
# The order that the libs are listed matters for Linux!
|
45
|
-
# to debug missing symbols you can run:
|
46
|
-
# for l in `ls /usr/lib/pdfium/*.a`; do echo $l; nm $l | grep '<missing symbol>'; done
|
47
|
-
# The listing with a "T" contains the symbol, the ones with a "U"
|
48
|
-
# depend on it. The "U" libs must come after the "T"
|
49
|
-
# LIBS=%w{
|
50
|
-
# javascript bigint freetype fpdfdoc fpdftext formfiller icudata icuuc
|
51
|
-
# icui18n v8_libbase v8_base v8_snapshot v8_libplatform jsapi pdfwindow fxedit
|
52
|
-
# fxcrt fxcodec fpdfdoc fdrm fxge fpdfapi freetype pdfium
|
53
|
-
# pthread freeimage
|
54
|
-
# }
|
55
38
|
LIBS=%w{pdfium freeimage}
|
56
39
|
|
57
40
|
dir_config("libs", existing(HEADER_DIRS), existing(LIB_DIRS))
|
data/ext/pdfium_ext/image.cc
CHANGED
@@ -9,9 +9,9 @@
|
|
9
9
|
* Document-class: PDFium::Image
|
10
10
|
*
|
11
11
|
* A Image can represent either a Page that
|
12
|
-
* has been rendered to a Image via Page#as_image
|
12
|
+
* has been rendered to a Image via {PDFium::Page#as_image}
|
13
13
|
*
|
14
|
-
* Or an embedded image on a Page, obtained via Page#images
|
14
|
+
* Or an embedded image on a {PDFium::Page}, obtained via {PDFium::Page#images}
|
15
15
|
*/
|
16
16
|
|
17
17
|
|
@@ -215,10 +215,14 @@ image_as_science(VALUE self){
|
|
215
215
|
}
|
216
216
|
|
217
217
|
/*
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
218
|
+
call-seq:
|
219
|
+
save( file )
|
220
|
+
|
221
|
+
Save image to a file
|
222
|
+
|
223
|
+
@param file [String, Pathname] path to file to save image to
|
224
|
+
@return [Boolean] indicating success or failure
|
225
|
+
|
222
226
|
*/
|
223
227
|
VALUE
|
224
228
|
image_save(VALUE self, VALUE rb_file){
|
@@ -242,9 +246,10 @@ image_save(VALUE self, VALUE rb_file){
|
|
242
246
|
|
243
247
|
/*
|
244
248
|
call-seq:
|
245
|
-
data(:format)
|
249
|
+
data(:format)
|
246
250
|
|
247
|
-
|
251
|
+
@param format [symbol] any file extension recogized by FreeImage.
|
252
|
+
@return String containing binary data for the image in the specified format.
|
248
253
|
|
249
254
|
Used in conjuction with Document.from_memory this can render be used to
|
250
255
|
render a PDF's pages completely in memory.
|
@@ -301,16 +306,15 @@ image_data(VALUE self, VALUE rb_format)
|
|
301
306
|
return ret;
|
302
307
|
}
|
303
308
|
|
304
|
-
|
309
|
+
void
|
305
310
|
define_image_class(){
|
306
|
-
VALUE
|
311
|
+
VALUE PDFium = RB::PDFium();
|
312
|
+
VALUE RB_Image = rb_define_class_under(PDFium, "Image", rb_cObject);
|
307
313
|
|
308
|
-
|
309
|
-
VALUE RB_Image = rb_define_class_under(RB_PDFium, "Image", rb_cObject);
|
310
314
|
rb_define_alloc_func(RB_Image, image_allocate);
|
311
315
|
rb_define_private_method (RB_Image, "initialize", RUBY_METHOD_FUNC(image_initialize), -1);
|
312
316
|
|
313
|
-
/* Returns the bouding box of the image as a PDFium::BoundingBox */
|
317
|
+
/* Returns the bouding box of the image as a {PDFium::BoundingBox} */
|
314
318
|
rb_define_attr( RB_Image, "bounds", 1, 0 );
|
315
319
|
|
316
320
|
/* Returns the index of the image on the page.
|
@@ -328,5 +332,5 @@ define_image_class(){
|
|
328
332
|
rb_define_method( RB_Image, "save", RUBY_METHOD_FUNC(image_save), 1);
|
329
333
|
rb_define_method( RB_Image, "data", RUBY_METHOD_FUNC(image_data), 1);
|
330
334
|
rb_define_method( RB_Image, "as_science", RUBY_METHOD_FUNC(image_as_science),0);
|
331
|
-
|
335
|
+
|
332
336
|
}
|
data/ext/pdfium_ext/page.cc
CHANGED
@@ -22,8 +22,7 @@ static VALUE rb_sym_width;
|
|
22
22
|
|
23
23
|
/*
|
24
24
|
* Document-class: PDFium::Page
|
25
|
-
*
|
26
|
-
* A Page on a PDF Document
|
25
|
+
* A Page on a PDFium::Document
|
27
26
|
*/
|
28
27
|
static void
|
29
28
|
page_gc_free(PageWrapper* page)
|
@@ -338,9 +337,10 @@ page_each_image(VALUE self)
|
|
338
337
|
PageWrapper *pw;
|
339
338
|
Data_Get_Struct(self, PageWrapper, pw);
|
340
339
|
|
341
|
-
|
340
|
+
unsigned int count = pw->page()->CountObjects();
|
342
341
|
int image_index=0;
|
343
|
-
|
342
|
+
|
343
|
+
for (unsigned int index=0; index < count; index++){
|
344
344
|
CPDF_PageObject *object = pw->page()->GetObjectByIndex(index);
|
345
345
|
if ( PDFPAGE_IMAGE == object->m_Type ){
|
346
346
|
VALUE args[2];
|
@@ -360,18 +360,18 @@ page_each_image(VALUE self)
|
|
360
360
|
}
|
361
361
|
|
362
362
|
|
363
|
-
|
363
|
+
|
364
|
+
void
|
364
365
|
define_page_class()
|
365
366
|
{
|
366
367
|
rb_sym_width = ID2SYM(rb_intern("width"));
|
367
368
|
rb_sym_height = ID2SYM(rb_intern("height"));
|
368
369
|
|
369
|
-
VALUE
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
//rb_define_private_method (RB_Page, "initialize", RUBY_METHOD_FUNC(page_initialize), -1);
|
370
|
+
VALUE PDFium = RB::PDFium();
|
371
|
+
/*
|
372
|
+
The Page class definition and methods
|
373
|
+
*/
|
374
|
+
VALUE RB_Page = rb_define_class_under( PDFium, "Page", rb_cObject);
|
375
375
|
|
376
376
|
rb_define_singleton_method(RB_Page, "new", RUBY_METHOD_FUNC(page_new), 0);
|
377
377
|
rb_define_singleton_method(RB_Page, "open", RUBY_METHOD_FUNC(page_open), 2);
|
@@ -388,5 +388,5 @@ define_page_class()
|
|
388
388
|
|
389
389
|
rb_define_method (RB_Page, "each_image", RUBY_METHOD_FUNC(page_each_image), 0);
|
390
390
|
|
391
|
-
|
391
|
+
|
392
392
|
}
|
data/ext/pdfium_ext/pdfium.h
CHANGED
@@ -3,20 +3,17 @@
|
|
3
3
|
|
4
4
|
#include <stdlib.h>
|
5
5
|
#include <inttypes.h>
|
6
|
-
#include <fpdf_dataavail.h>
|
7
|
-
#include <fpdf_ext.h>
|
8
|
-
#include <fpdfformfill.h>
|
9
|
-
#include <fpdftext.h>
|
10
|
-
#include <fpdfview.h>
|
11
|
-
#include <fpdfedit.h>
|
12
|
-
#include <fpdfsave.h>
|
13
|
-
#include <fpdfdoc.h>
|
14
6
|
#include <iostream>
|
15
|
-
#include <fpdfdoc/fpdf_doc.h>
|
16
7
|
|
17
|
-
#include <
|
8
|
+
#include <fpdf_doc.h>
|
9
|
+
#include <fpdf_save.h>
|
10
|
+
#include <fpdf_edit.h>
|
11
|
+
#include <fpdf_text.h>
|
12
|
+
#include <fpdfdoc/fpdf_doc.h>
|
13
|
+
#include <fpdfapi/fpdf_page.h>
|
18
14
|
#include <fpdfapi/fpdf_pageobj.h>
|
19
|
-
#include <
|
15
|
+
#include <fpdftext/fpdf_text.h>
|
16
|
+
|
20
17
|
#include <FreeImage.h>
|
21
18
|
|
22
19
|
#include "page_wrapper.h"
|
@@ -35,10 +32,10 @@ extern "C" {
|
|
35
32
|
#define DEBUG_MSG(str) do { } while ( false )
|
36
33
|
#endif
|
37
34
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
35
|
+
void define_bookmark_class();
|
36
|
+
void define_document_class();
|
37
|
+
void define_page_class();
|
38
|
+
void define_image_class();
|
42
39
|
|
43
40
|
// a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
|
44
41
|
|
data/lib/pdfium/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Stitt
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-06-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -119,6 +119,7 @@ extra_rdoc_files: []
|
|
119
119
|
files:
|
120
120
|
- ".gitignore"
|
121
121
|
- ".ruby-version"
|
122
|
+
- ".yardopts"
|
122
123
|
- Gemfile
|
123
124
|
- Guardfile
|
124
125
|
- LICENSE.txt
|