pdfium 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.yardopts +3 -0
- data/README.md +27 -1
- data/ext/pdfium_ext/bookmark.cc +3 -7
- data/ext/pdfium_ext/document.cc +21 -22
- data/ext/pdfium_ext/extconf.rb +22 -39
- data/ext/pdfium_ext/image.cc +18 -14
- data/ext/pdfium_ext/page.cc +12 -12
- data/ext/pdfium_ext/pdfium.h +12 -15
- data/lib/pdfium/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90ada68ee9bb62299a1efee382547acc23f87284
|
4
|
+
data.tar.gz: cfde2cce4a21b04710aac0609cca271424ab25ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fbb57c9b0da747032ab1969b33c0c3dbe3d1848d960cc5021bb49abb7544c953c323e47cac359788196679ce0cbbf65bdcb6d8bfec1f01ed8f4bd50cf4129366
|
7
|
+
data.tar.gz: 0f13a3d72c9f5577de9d6d479f0d48e019fe1daf640528dd09259ad8ae2b156b43d97f2f72d320853d6367b22b3f7ee66a6e3d0105aee2acd48d25b23f149e17
|
data/.gitignore
CHANGED
data/.yardopts
ADDED
data/README.md
CHANGED
@@ -6,6 +6,33 @@ It currently has only very rudimantary PDF editing capabilities.
|
|
6
6
|
|
7
7
|
RDoc documentation is also available and the test directory has examples of usage.
|
8
8
|
|
9
|
+
## In memory render and extraction
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
# Assuming AWS::S3 is already authorized elsewhere
|
13
|
+
bucket = AWS::S3.new.buckets['my-pdfs']
|
14
|
+
|
15
|
+
pdf = PDFium::Document.from_memory bucket.objects['secrets.pdf'].read
|
16
|
+
pdf.pages.each do | page |
|
17
|
+
|
18
|
+
# render the complete page as a PNG with the height locked to 1000 pixels
|
19
|
+
# The width will be calculated to maintain the proper aspect ratio
|
20
|
+
path = "secrets/page-#{page.number}.png"
|
21
|
+
bucket.objects[path].write page.as_image(height: 1000).data(:png)
|
22
|
+
|
23
|
+
# extract and save each embedded image as a PNG
|
24
|
+
page.images.each do | image |
|
25
|
+
path = "secrets/page-#{page.number}-image-#{image.index}.png"
|
26
|
+
bucket.objects[path].write image.data(:png)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Extract text from page. Will be encoded as UTF-16LE by default
|
30
|
+
path = "secrets/page-#{page.number}-text.txt"
|
31
|
+
bucket.objects[path].write page.text
|
32
|
+
|
33
|
+
end
|
34
|
+
```
|
35
|
+
|
9
36
|
## Open and saveing
|
10
37
|
|
11
38
|
```ruby
|
@@ -28,7 +55,6 @@ pdf.metadata
|
|
28
55
|
Returns a hash with keys = :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
|
29
56
|
|
30
57
|
|
31
|
-
|
32
58
|
## Bookmarks
|
33
59
|
|
34
60
|
```ruby
|
data/ext/pdfium_ext/bookmark.cc
CHANGED
@@ -200,15 +200,12 @@ bookmark_destination(VALUE self)
|
|
200
200
|
return hash;
|
201
201
|
}
|
202
202
|
|
203
|
-
|
203
|
+
void
|
204
204
|
define_bookmark_class(){
|
205
205
|
|
206
|
-
|
207
|
-
VALUE RB_PDFium = rb_define_module("PDFium");
|
208
|
-
#endif
|
209
|
-
VALUE RB_PDFium = RB::PDFium();
|
206
|
+
VALUE PDFium = RB::PDFium();
|
210
207
|
|
211
|
-
VALUE RB_Bookmark = rb_define_class_under(
|
208
|
+
VALUE RB_Bookmark = rb_define_class_under(PDFium, "Bookmark", rb_cObject);
|
212
209
|
rb_define_alloc_func(RB_Bookmark, bookmark_allocate);
|
213
210
|
|
214
211
|
rb_define_private_method (RB_Bookmark, "initialize", RUBY_METHOD_FUNC(bookmark_initialize), 1);
|
@@ -217,5 +214,4 @@ define_bookmark_class(){
|
|
217
214
|
rb_define_method (RB_Bookmark, "children", RUBY_METHOD_FUNC(bookmark_children), 0);
|
218
215
|
rb_define_method (RB_Bookmark, "destination", RUBY_METHOD_FUNC(bookmark_destination), 0);
|
219
216
|
|
220
|
-
return RB_Bookmark;
|
221
217
|
}
|
data/ext/pdfium_ext/document.cc
CHANGED
@@ -10,14 +10,8 @@
|
|
10
10
|
#include "buffer_file_write.hpp"
|
11
11
|
|
12
12
|
/////////////////////////////////////////////////////////////////////////
|
13
|
-
// The Document class
|
13
|
+
// The Document class //
|
14
14
|
/////////////////////////////////////////////////////////////////////////
|
15
|
-
/*
|
16
|
-
* Document-class: PDFium::Document
|
17
|
-
*
|
18
|
-
* A Document represents a PDF file.
|
19
|
-
*
|
20
|
-
*/
|
21
15
|
|
22
16
|
|
23
17
|
// While you might think this would free the Document object it does not
|
@@ -74,7 +68,7 @@ document_initialize(int argc, VALUE *argv, VALUE self)
|
|
74
68
|
*
|
75
69
|
* Initializes a document from a binary string.
|
76
70
|
*
|
77
|
-
* See Image#data for an example of reading a PDF directly from Amazon S3
|
71
|
+
* See {PDFium::Image#data} for an example of reading a PDF directly from Amazon S3
|
78
72
|
* and writing it's images completely in memory.
|
79
73
|
*/
|
80
74
|
static VALUE
|
@@ -86,8 +80,8 @@ document_from_memory(VALUE klass, VALUE data){
|
|
86
80
|
}
|
87
81
|
|
88
82
|
/*
|
89
|
-
*
|
90
|
-
*
|
83
|
+
* @overload page_count
|
84
|
+
* page_count -> Fixnum
|
91
85
|
*
|
92
86
|
* Returns the number of pages on a Document
|
93
87
|
*/
|
@@ -98,7 +92,9 @@ document_page_count(VALUE self)
|
|
98
92
|
}
|
99
93
|
|
100
94
|
// Not documented in favor of the Document#pages[] access
|
101
|
-
/*
|
95
|
+
/*
|
96
|
+
@private
|
97
|
+
*/
|
102
98
|
static VALUE
|
103
99
|
document_page_at(VALUE self, VALUE rb_page_index)
|
104
100
|
{
|
@@ -107,7 +103,8 @@ document_page_at(VALUE self, VALUE rb_page_index)
|
|
107
103
|
|
108
104
|
/*
|
109
105
|
* call-seq:
|
110
|
-
* pages
|
106
|
+
* pages
|
107
|
+
* @return {PDFium::PageList}
|
111
108
|
*
|
112
109
|
* Returns a collection of all the pages on the document as a PDFium::PageList. Pages
|
113
110
|
* are lazily loaded.
|
@@ -123,7 +120,7 @@ document_pages(VALUE self)
|
|
123
120
|
|
124
121
|
// creates and yields a page. Not documented since all access
|
125
122
|
// should got through the Pageist interface via the Document#pages method
|
126
|
-
/*
|
123
|
+
/* @private */
|
127
124
|
static VALUE
|
128
125
|
document_each_page(VALUE self)
|
129
126
|
{
|
@@ -143,7 +140,9 @@ document_each_page(VALUE self)
|
|
143
140
|
|
144
141
|
/*
|
145
142
|
* call-seq:
|
146
|
-
* bookmarks
|
143
|
+
* bookmarks
|
144
|
+
*
|
145
|
+
* @return {PDFium::BookmarkList}
|
147
146
|
*
|
148
147
|
* Retrieves the first Bookmark for a document
|
149
148
|
*/
|
@@ -160,13 +159,14 @@ document_bookmarks(VALUE self)
|
|
160
159
|
|
161
160
|
|
162
161
|
|
163
|
-
|
164
162
|
/*
|
165
163
|
* call-seq:
|
166
|
-
* save
|
164
|
+
* save(file)
|
165
|
+
*
|
166
|
+
* @param file [String, Pathname] path to save the file to
|
167
|
+
* @return [Boolean] indicating success or failure
|
167
168
|
*
|
168
|
-
*
|
169
|
-
* (yet) methods to add content to pages.
|
169
|
+
* Retrieves the first Bookmark for a document
|
170
170
|
*/
|
171
171
|
static VALUE
|
172
172
|
document_save(VALUE self, VALUE _path)
|
@@ -244,13 +244,13 @@ document_metadata(int argc, VALUE *argv, VALUE self)
|
|
244
244
|
return metadata;
|
245
245
|
}
|
246
246
|
|
247
|
-
|
247
|
+
void
|
248
248
|
define_document_class()
|
249
249
|
{
|
250
|
-
VALUE
|
250
|
+
VALUE PDFium = RB::PDFium();
|
251
251
|
|
252
252
|
// The Document class definition and methods
|
253
|
-
VALUE RB_Document = rb_define_class_under(
|
253
|
+
VALUE RB_Document = rb_define_class_under(PDFium, "Document", rb_cObject);
|
254
254
|
|
255
255
|
rb_define_alloc_func(RB_Document, document_allocate);
|
256
256
|
|
@@ -264,5 +264,4 @@ define_document_class()
|
|
264
264
|
rb_define_method (RB_Document, "metadata", RUBY_METHOD_FUNC(document_metadata), -1);
|
265
265
|
rb_define_method (RB_Document, "bookmarks", RUBY_METHOD_FUNC(document_bookmarks), 0);
|
266
266
|
rb_define_method (RB_Document, "save", RUBY_METHOD_FUNC(document_save), 1);
|
267
|
-
return RB_Document;
|
268
267
|
}
|
data/ext/pdfium_ext/extconf.rb
CHANGED
@@ -5,53 +5,36 @@ def existing(dirs)
|
|
5
5
|
dirs.select{|dir| Dir.exist?(dir) }
|
6
6
|
end
|
7
7
|
|
8
|
-
LIB_DIRS=[]
|
9
|
-
# if ENV['PDFIUM']
|
10
|
-
# LIB_DIRS = [ "#{ENV['PDFIUM']}/out/Debug/lib.target" ]
|
11
|
-
# HEADER_DIRS = [
|
12
|
-
# "#{ENV['PDFIUM']}/fpdfsdk/include",
|
13
|
-
# "#{ENV['PDFIUM']}/core/include",
|
14
|
-
# "#{ENV['PDFIUM']}"
|
15
|
-
# ]
|
16
8
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
#
|
21
|
-
#
|
9
|
+
if ENV['PDFIUM']
|
10
|
+
LIB_DIRS = [ "#{ENV['PDFIUM']}/out/Debug/lib.target" ]
|
11
|
+
HEADER_DIRS = [
|
12
|
+
"#{ENV['PDFIUM']}/public",
|
13
|
+
"#{ENV['PDFIUM']}/core/include",
|
14
|
+
"#{ENV['PDFIUM']}"
|
15
|
+
]
|
22
16
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
17
|
+
else
|
18
|
+
LIB_DIRS = [
|
19
|
+
"/usr/local/lib/pdfium",
|
20
|
+
"/usr/lib/pdfium"
|
21
|
+
]
|
22
|
+
|
23
|
+
HEADER_DIRS = [
|
24
|
+
"/usr/include/pdfium",
|
25
|
+
"/usr/include/pdfium/core/include",
|
26
|
+
"/usr/local/include/pdfium",
|
27
|
+
"/usr/local/include/pdfium/core/include"
|
28
|
+
]
|
29
|
+
end
|
36
30
|
|
37
31
|
have_library('pthread')
|
38
32
|
|
39
|
-
DEBUG = ENV['DEBUG']
|
33
|
+
DEBUG = ENV['DEBUG']
|
40
34
|
|
41
35
|
$CPPFLAGS += " -Wall "
|
42
|
-
$CPPFLAGS += " -g"
|
36
|
+
$CPPFLAGS += " -g" if DEBUG
|
43
37
|
|
44
|
-
# The order that the libs are listed matters for Linux!
|
45
|
-
# to debug missing symbols you can run:
|
46
|
-
# for l in `ls /usr/lib/pdfium/*.a`; do echo $l; nm $l | grep '<missing symbol>'; done
|
47
|
-
# The listing with a "T" contains the symbol, the ones with a "U"
|
48
|
-
# depend on it. The "U" libs must come after the "T"
|
49
|
-
# LIBS=%w{
|
50
|
-
# javascript bigint freetype fpdfdoc fpdftext formfiller icudata icuuc
|
51
|
-
# icui18n v8_libbase v8_base v8_snapshot v8_libplatform jsapi pdfwindow fxedit
|
52
|
-
# fxcrt fxcodec fpdfdoc fdrm fxge fpdfapi freetype pdfium
|
53
|
-
# pthread freeimage
|
54
|
-
# }
|
55
38
|
LIBS=%w{pdfium freeimage}
|
56
39
|
|
57
40
|
dir_config("libs", existing(HEADER_DIRS), existing(LIB_DIRS))
|
data/ext/pdfium_ext/image.cc
CHANGED
@@ -9,9 +9,9 @@
|
|
9
9
|
* Document-class: PDFium::Image
|
10
10
|
*
|
11
11
|
* A Image can represent either a Page that
|
12
|
-
* has been rendered to a Image via Page#as_image
|
12
|
+
* has been rendered to a Image via {PDFium::Page#as_image}
|
13
13
|
*
|
14
|
-
* Or an embedded image on a Page, obtained via Page#images
|
14
|
+
* Or an embedded image on a {PDFium::Page}, obtained via {PDFium::Page#images}
|
15
15
|
*/
|
16
16
|
|
17
17
|
|
@@ -215,10 +215,14 @@ image_as_science(VALUE self){
|
|
215
215
|
}
|
216
216
|
|
217
217
|
/*
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
218
|
+
call-seq:
|
219
|
+
save( file )
|
220
|
+
|
221
|
+
Save image to a file
|
222
|
+
|
223
|
+
@param file [String, Pathname] path to file to save image to
|
224
|
+
@return [Boolean] indicating success or failure
|
225
|
+
|
222
226
|
*/
|
223
227
|
VALUE
|
224
228
|
image_save(VALUE self, VALUE rb_file){
|
@@ -242,9 +246,10 @@ image_save(VALUE self, VALUE rb_file){
|
|
242
246
|
|
243
247
|
/*
|
244
248
|
call-seq:
|
245
|
-
data(:format)
|
249
|
+
data(:format)
|
246
250
|
|
247
|
-
|
251
|
+
@param format [symbol] any file extension recogized by FreeImage.
|
252
|
+
@return String containing binary data for the image in the specified format.
|
248
253
|
|
249
254
|
Used in conjuction with Document.from_memory this can render be used to
|
250
255
|
render a PDF's pages completely in memory.
|
@@ -301,16 +306,15 @@ image_data(VALUE self, VALUE rb_format)
|
|
301
306
|
return ret;
|
302
307
|
}
|
303
308
|
|
304
|
-
|
309
|
+
void
|
305
310
|
define_image_class(){
|
306
|
-
VALUE
|
311
|
+
VALUE PDFium = RB::PDFium();
|
312
|
+
VALUE RB_Image = rb_define_class_under(PDFium, "Image", rb_cObject);
|
307
313
|
|
308
|
-
|
309
|
-
VALUE RB_Image = rb_define_class_under(RB_PDFium, "Image", rb_cObject);
|
310
314
|
rb_define_alloc_func(RB_Image, image_allocate);
|
311
315
|
rb_define_private_method (RB_Image, "initialize", RUBY_METHOD_FUNC(image_initialize), -1);
|
312
316
|
|
313
|
-
/* Returns the bouding box of the image as a PDFium::BoundingBox */
|
317
|
+
/* Returns the bouding box of the image as a {PDFium::BoundingBox} */
|
314
318
|
rb_define_attr( RB_Image, "bounds", 1, 0 );
|
315
319
|
|
316
320
|
/* Returns the index of the image on the page.
|
@@ -328,5 +332,5 @@ define_image_class(){
|
|
328
332
|
rb_define_method( RB_Image, "save", RUBY_METHOD_FUNC(image_save), 1);
|
329
333
|
rb_define_method( RB_Image, "data", RUBY_METHOD_FUNC(image_data), 1);
|
330
334
|
rb_define_method( RB_Image, "as_science", RUBY_METHOD_FUNC(image_as_science),0);
|
331
|
-
|
335
|
+
|
332
336
|
}
|
data/ext/pdfium_ext/page.cc
CHANGED
@@ -22,8 +22,7 @@ static VALUE rb_sym_width;
|
|
22
22
|
|
23
23
|
/*
|
24
24
|
* Document-class: PDFium::Page
|
25
|
-
*
|
26
|
-
* A Page on a PDF Document
|
25
|
+
* A Page on a PDFium::Document
|
27
26
|
*/
|
28
27
|
static void
|
29
28
|
page_gc_free(PageWrapper* page)
|
@@ -338,9 +337,10 @@ page_each_image(VALUE self)
|
|
338
337
|
PageWrapper *pw;
|
339
338
|
Data_Get_Struct(self, PageWrapper, pw);
|
340
339
|
|
341
|
-
|
340
|
+
unsigned int count = pw->page()->CountObjects();
|
342
341
|
int image_index=0;
|
343
|
-
|
342
|
+
|
343
|
+
for (unsigned int index=0; index < count; index++){
|
344
344
|
CPDF_PageObject *object = pw->page()->GetObjectByIndex(index);
|
345
345
|
if ( PDFPAGE_IMAGE == object->m_Type ){
|
346
346
|
VALUE args[2];
|
@@ -360,18 +360,18 @@ page_each_image(VALUE self)
|
|
360
360
|
}
|
361
361
|
|
362
362
|
|
363
|
-
|
363
|
+
|
364
|
+
void
|
364
365
|
define_page_class()
|
365
366
|
{
|
366
367
|
rb_sym_width = ID2SYM(rb_intern("width"));
|
367
368
|
rb_sym_height = ID2SYM(rb_intern("height"));
|
368
369
|
|
369
|
-
VALUE
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
//rb_define_private_method (RB_Page, "initialize", RUBY_METHOD_FUNC(page_initialize), -1);
|
370
|
+
VALUE PDFium = RB::PDFium();
|
371
|
+
/*
|
372
|
+
The Page class definition and methods
|
373
|
+
*/
|
374
|
+
VALUE RB_Page = rb_define_class_under( PDFium, "Page", rb_cObject);
|
375
375
|
|
376
376
|
rb_define_singleton_method(RB_Page, "new", RUBY_METHOD_FUNC(page_new), 0);
|
377
377
|
rb_define_singleton_method(RB_Page, "open", RUBY_METHOD_FUNC(page_open), 2);
|
@@ -388,5 +388,5 @@ define_page_class()
|
|
388
388
|
|
389
389
|
rb_define_method (RB_Page, "each_image", RUBY_METHOD_FUNC(page_each_image), 0);
|
390
390
|
|
391
|
-
|
391
|
+
|
392
392
|
}
|
data/ext/pdfium_ext/pdfium.h
CHANGED
@@ -3,20 +3,17 @@
|
|
3
3
|
|
4
4
|
#include <stdlib.h>
|
5
5
|
#include <inttypes.h>
|
6
|
-
#include <fpdf_dataavail.h>
|
7
|
-
#include <fpdf_ext.h>
|
8
|
-
#include <fpdfformfill.h>
|
9
|
-
#include <fpdftext.h>
|
10
|
-
#include <fpdfview.h>
|
11
|
-
#include <fpdfedit.h>
|
12
|
-
#include <fpdfsave.h>
|
13
|
-
#include <fpdfdoc.h>
|
14
6
|
#include <iostream>
|
15
|
-
#include <fpdfdoc/fpdf_doc.h>
|
16
7
|
|
17
|
-
#include <
|
8
|
+
#include <fpdf_doc.h>
|
9
|
+
#include <fpdf_save.h>
|
10
|
+
#include <fpdf_edit.h>
|
11
|
+
#include <fpdf_text.h>
|
12
|
+
#include <fpdfdoc/fpdf_doc.h>
|
13
|
+
#include <fpdfapi/fpdf_page.h>
|
18
14
|
#include <fpdfapi/fpdf_pageobj.h>
|
19
|
-
#include <
|
15
|
+
#include <fpdftext/fpdf_text.h>
|
16
|
+
|
20
17
|
#include <FreeImage.h>
|
21
18
|
|
22
19
|
#include "page_wrapper.h"
|
@@ -35,10 +32,10 @@ extern "C" {
|
|
35
32
|
#define DEBUG_MSG(str) do { } while ( false )
|
36
33
|
#endif
|
37
34
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
35
|
+
void define_bookmark_class();
|
36
|
+
void define_document_class();
|
37
|
+
void define_page_class();
|
38
|
+
void define_image_class();
|
42
39
|
|
43
40
|
// a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
|
44
41
|
|
data/lib/pdfium/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nathan Stitt
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-06-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -119,6 +119,7 @@ extra_rdoc_files: []
|
|
119
119
|
files:
|
120
120
|
- ".gitignore"
|
121
121
|
- ".ruby-version"
|
122
|
+
- ".yardopts"
|
122
123
|
- Gemfile
|
123
124
|
- Guardfile
|
124
125
|
- LICENSE.txt
|