pdfium 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/Gemfile +9 -0
- data/Guardfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +68 -0
- data/Rakefile +62 -0
- data/ext/pdfium_ext/bookmark.cc +221 -0
- data/ext/pdfium_ext/buffer_file_write.hpp +27 -0
- data/ext/pdfium_ext/document.cc +268 -0
- data/ext/pdfium_ext/document.h +66 -0
- data/ext/pdfium_ext/document_wrapper.cc +63 -0
- data/ext/pdfium_ext/document_wrapper.h +56 -0
- data/ext/pdfium_ext/extconf.h +3 -0
- data/ext/pdfium_ext/extconf.rb +76 -0
- data/ext/pdfium_ext/image.cc +332 -0
- data/ext/pdfium_ext/page.cc +392 -0
- data/ext/pdfium_ext/page.h +5 -0
- data/ext/pdfium_ext/page_object_wrapper.cc +38 -0
- data/ext/pdfium_ext/page_object_wrapper.h +27 -0
- data/ext/pdfium_ext/page_wrapper.cc +86 -0
- data/ext/pdfium_ext/page_wrapper.h +37 -0
- data/ext/pdfium_ext/pdfium.cc +115 -0
- data/ext/pdfium_ext/pdfium.h +69 -0
- data/lib/pdfium.rb +15 -0
- data/lib/pdfium/bookmark_list.rb +28 -0
- data/lib/pdfium/bounding_box.rb +16 -0
- data/lib/pdfium/image_list.rb +21 -0
- data/lib/pdfium/page_list.rb +36 -0
- data/lib/pdfium/page_sizes.rb +7 -0
- data/lib/pdfium/version.rb +4 -0
- data/pdfium.gemspec +29 -0
- data/test/benchmark-docsplit.rb +41 -0
- data/test/bookmarks_list_spec.rb +26 -0
- data/test/bookmarks_spec.rb +34 -0
- data/test/debug.rb +24 -0
- data/test/document_spec.rb +49 -0
- data/test/image_list_spec.rb +18 -0
- data/test/image_spec.rb +53 -0
- data/test/page_list_spec.rb +24 -0
- data/test/page_spec.rb +91 -0
- data/test/pdfium_spec.rb +15 -0
- data/test/profile.rb +29 -0
- data/test/spec_helper.rb +31 -0
- metadata +202 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
#ifndef __DOCUMENT_H__
|
2
|
+
#define __DOCUMENT_H__
|
3
|
+
|
4
|
+
#include "pdfium.h"
|
5
|
+
|
6
|
+
/* // https://redmine.ruby-lang.org/issues/6292 */
|
7
|
+
|
8
|
+
// Ruby will call dispose of the Page and DOCUMENT objects in whatever order it
|
9
|
+
// wishes to. This is problematic for PDFium. If a Document object is closed whie
|
10
|
+
// pages are still open, and then the Pages are closed later, it will segfault.
|
11
|
+
//
|
12
|
+
// To work around this, the Document keeps a reference to all open Pages. When a Page
|
13
|
+
// is deleted, it's destructor calls releasePage on the Document object.
|
14
|
+
//
|
15
|
+
// It does this so that it can keep track of all the Page objects that are in use
|
16
|
+
// and only release it's memory and close it's FPDF_DOCUMENT once all the pages
|
17
|
+
// are no longer used.
|
18
|
+
//
|
19
|
+
// It's reasonably safe to do so. Since we're only use the Document/Page classes
|
20
|
+
// from Ruby and control how they're called.
|
21
|
+
//
|
22
|
+
// A future improvement would be to write a custom smart pointer supervisor class
|
23
|
+
// to manage the interplay between the Document and Page objects
|
24
|
+
//
|
25
|
+
// Beware! As a side affect of the above, this class calls "delete this" on itself.
|
26
|
+
// Therefore it must be allocated on the heap (i.e. "new Document"),
|
27
|
+
// and not as part of an array (not new[]).
|
28
|
+
//
|
29
|
+
|
30
|
+
/* class Document { */
|
31
|
+
|
32
|
+
/* public: */
|
33
|
+
/* static void Initialize(); */
|
34
|
+
|
35
|
+
/* // an empty constructor. Ruby's allocate object doesn't have any arguments */
|
36
|
+
/* // so the Document allocation needs to function in the same manner */
|
37
|
+
/* Document(); */
|
38
|
+
|
39
|
+
/* bool initialize(const char* file); */
|
40
|
+
|
41
|
+
/* bool isValid(); */
|
42
|
+
|
43
|
+
/* int pageCount(); */
|
44
|
+
|
45
|
+
/* // Page* getPage(int page_index); */
|
46
|
+
/* void retain(Page* page); */
|
47
|
+
/* void release(Page* page); */
|
48
|
+
|
49
|
+
/* void markUnused(); */
|
50
|
+
|
51
|
+
/* FPDF_DOCUMENT pdfiumDocument(); */
|
52
|
+
|
53
|
+
/* ~Document(); */
|
54
|
+
|
55
|
+
/* private: */
|
56
|
+
|
57
|
+
|
58
|
+
/* std::unordered_set<Page*> _pages; */
|
59
|
+
/* bool _in_use; */
|
60
|
+
/* FPDF_DOCUMENT _document; */
|
61
|
+
/* void maybeKillSelf(); */
|
62
|
+
|
63
|
+
/* }; */
|
64
|
+
|
65
|
+
|
66
|
+
#endif // __DOCUMENT_H__
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#include "document_wrapper.h"
|
2
|
+
|
3
|
+
|
4
|
+
DocumentWrapper::DocumentWrapper()
|
5
|
+
: document(0), _in_use(true)
|
6
|
+
{ }
|
7
|
+
|
8
|
+
// Mark the Document object as no longer in use. At this
|
9
|
+
// point it may be freed once all Pages are also not
|
10
|
+
// in use
|
11
|
+
void
|
12
|
+
DocumentWrapper::markUnused(){
|
13
|
+
_in_use = false;
|
14
|
+
this->maybeKillSelf();
|
15
|
+
}
|
16
|
+
|
17
|
+
|
18
|
+
// a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
|
19
|
+
CPDF_Document*
|
20
|
+
RB2DOC(VALUE self) {
|
21
|
+
DocumentWrapper* doc;
|
22
|
+
Data_Get_Struct(self, DocumentWrapper, doc);
|
23
|
+
return doc->document;
|
24
|
+
}
|
25
|
+
|
26
|
+
|
27
|
+
// Retains a copy of the page, which will prevent
|
28
|
+
// the Document from being destroyed until the release()
|
29
|
+
// is called for the page
|
30
|
+
void
|
31
|
+
DocumentWrapper::retain(void *child){
|
32
|
+
_children.insert(child);
|
33
|
+
}
|
34
|
+
|
35
|
+
// Marks a page as no longer in use.
|
36
|
+
// Removes the page from the _pages set,
|
37
|
+
// If the page was the last one in the set and it's now empty,
|
38
|
+
// and the Document object is also no longer in use, then destroys the Document object
|
39
|
+
void
|
40
|
+
DocumentWrapper::release(void *child){
|
41
|
+
DEBUG_MSG("Release Doc Child: " << child);
|
42
|
+
_children.erase(child);
|
43
|
+
this->maybeKillSelf();
|
44
|
+
}
|
45
|
+
|
46
|
+
|
47
|
+
// Test if the Document is not in use and there are no pages
|
48
|
+
// that are still retained
|
49
|
+
void
|
50
|
+
DocumentWrapper::maybeKillSelf(){
|
51
|
+
DEBUG_MSG("Testing if killing Document: " << this);
|
52
|
+
if (_children.empty() && !_in_use){
|
53
|
+
DEBUG_MSG("Killing..");
|
54
|
+
delete this;
|
55
|
+
}
|
56
|
+
}
|
57
|
+
|
58
|
+
|
59
|
+
DocumentWrapper::~DocumentWrapper(){
|
60
|
+
if (document){ // the pdf might not have opened successfully
|
61
|
+
FPDF_CloseDocument(document);
|
62
|
+
}
|
63
|
+
}
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#ifndef __DOCUMENT_WRAPPER_H__
|
2
|
+
#define __DOCUMENT_WRAPPER_H__
|
3
|
+
extern "C" {
|
4
|
+
#include "ruby.h"
|
5
|
+
}
|
6
|
+
|
7
|
+
#include "pdfium.h"
|
8
|
+
#include "fpdf_ext.h"
|
9
|
+
#include <unordered_set>
|
10
|
+
/*
|
11
|
+
+---------------------------------------------------------------------------------------------+
|
12
|
+
| |
|
13
|
+
| This is a lightweight wrapper that mediates between |
|
14
|
+
| CPDF_Document and all the other types that depend on it |
|
15
|
+
| |
|
16
|
+
| Ruby will dispose of the Document and it's child objects objects in whatever order it |
|
17
|
+
| wishes to. This is problematic for PDFium. For instance, if a Document object |
|
18
|
+
| is closed whie pages are still open, and then the Pages are closed later, it will segfault. |
|
19
|
+
| |
|
20
|
+
| To work around this, when a dependent object is created, it calls retain on |
|
21
|
+
| the DocumentWrapper. Then when Ruby garbage collects a dependent object, |
|
22
|
+
| it's destructor calls release on the Document object. |
|
23
|
+
| |
|
24
|
+
| When Ruby GC's the DocumentWrapper itself, it checks to see if any objects are still |
|
25
|
+
| retained. If there are, it does not delete itself until they are all removed. |
|
26
|
+
| |
|
27
|
+
| Beware! As a side affect of the above, this class calls "delete this" on itself. |
|
28
|
+
| Therefore it must be allocated on the heap. (i.e. "new DocumentWrapper"), |
|
29
|
+
| and not as part of an array (not new[]). |
|
30
|
+
+---------------------------------------------------------------------------------------------+
|
31
|
+
*/
|
32
|
+
|
33
|
+
class DocumentWrapper {
|
34
|
+
|
35
|
+
public:
|
36
|
+
DocumentWrapper();
|
37
|
+
|
38
|
+
void retain(void *child);
|
39
|
+
void release(void *child);
|
40
|
+
|
41
|
+
void markUnused();
|
42
|
+
|
43
|
+
~DocumentWrapper();
|
44
|
+
|
45
|
+
CPDF_Document *document;
|
46
|
+
|
47
|
+
private:
|
48
|
+
|
49
|
+
bool _in_use;
|
50
|
+
void maybeKillSelf();
|
51
|
+
|
52
|
+
std::unordered_set<void*> _children;
|
53
|
+
|
54
|
+
};
|
55
|
+
|
56
|
+
#endif // __DOCUMENT_WRAPPER_H__
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require "mkmf"
|
2
|
+
require 'rbconfig'
|
3
|
+
|
4
|
+
def existing(dirs)
|
5
|
+
dirs.select{|dir| Dir.exist?(dir) }
|
6
|
+
end
|
7
|
+
|
8
|
+
LIB_DIRS=[]
|
9
|
+
# if ENV['PDFIUM']
|
10
|
+
# LIB_DIRS = [ "#{ENV['PDFIUM']}/out/Debug/lib.target" ]
|
11
|
+
# HEADER_DIRS = [
|
12
|
+
# "#{ENV['PDFIUM']}/fpdfsdk/include",
|
13
|
+
# "#{ENV['PDFIUM']}/core/include",
|
14
|
+
# "#{ENV['PDFIUM']}"
|
15
|
+
# ]
|
16
|
+
|
17
|
+
# else
|
18
|
+
# LIB_DIRS = [
|
19
|
+
# "/usr/local/lib/pdfium",
|
20
|
+
# "/usr/lib/pdfium"
|
21
|
+
# ]
|
22
|
+
|
23
|
+
# HEADER_DIRS = [
|
24
|
+
# "/usr/include/pdfium",
|
25
|
+
# "/usr/local/include/pdfium",
|
26
|
+
# "/usr/local/include/pdfium/fpdfsdk/include",
|
27
|
+
# "/usr/local/include/pdfium/core/include"
|
28
|
+
# ]
|
29
|
+
# end
|
30
|
+
|
31
|
+
HEADER_DIRS=[
|
32
|
+
"/home/nas/pdfium/deb-package/pdfium/fpdfsdk/include",
|
33
|
+
"/home/nas/pdfium/deb-package/pdfium/core/include",
|
34
|
+
"/home/nas/pdfium/deb-package/pdfium"
|
35
|
+
]
|
36
|
+
|
37
|
+
have_library('pthread')
|
38
|
+
|
39
|
+
DEBUG = ENV['DEBUG'] == '1'
|
40
|
+
|
41
|
+
$CPPFLAGS += " -Wall "
|
42
|
+
$CPPFLAGS += " -g" #if DEBUG
|
43
|
+
|
44
|
+
# The order that the libs are listed matters for Linux!
|
45
|
+
# to debug missing symbols you can run:
|
46
|
+
# for l in `ls /usr/lib/pdfium/*.a`; do echo $l; nm $l | grep '<missing symbol>'; done
|
47
|
+
# The listing with a "T" contains the symbol, the ones with a "U"
|
48
|
+
# depend on it. The "U" libs must come after the "T"
|
49
|
+
# LIBS=%w{
|
50
|
+
# javascript bigint freetype fpdfdoc fpdftext formfiller icudata icuuc
|
51
|
+
# icui18n v8_libbase v8_base v8_snapshot v8_libplatform jsapi pdfwindow fxedit
|
52
|
+
# fxcrt fxcodec fpdfdoc fdrm fxge fpdfapi freetype pdfium
|
53
|
+
# pthread freeimage
|
54
|
+
# }
|
55
|
+
LIBS=%w{pdfium freeimage}
|
56
|
+
|
57
|
+
dir_config("libs", existing(HEADER_DIRS), existing(LIB_DIRS))
|
58
|
+
|
59
|
+
LIBS.each do | lib |
|
60
|
+
have_library(lib) or abort "Didn't find library lib#{lib}"
|
61
|
+
end
|
62
|
+
|
63
|
+
if RUBY_PLATFORM =~ /darwin/
|
64
|
+
have_library('objc')
|
65
|
+
FRAMEWORKS = %w{AppKit CoreFoundation}
|
66
|
+
$LDFLAGS << FRAMEWORKS.map { |f| " -framework #{f}" }.join
|
67
|
+
else
|
68
|
+
$CPPFLAGS += " -fPIC"
|
69
|
+
end
|
70
|
+
|
71
|
+
$CPPFLAGS += " -std=c++11"
|
72
|
+
$defs.push "-DDEBUG=1" if DEBUG
|
73
|
+
|
74
|
+
create_header
|
75
|
+
|
76
|
+
create_makefile "pdfium_ext"
|
@@ -0,0 +1,332 @@
|
|
1
|
+
#include "pdfium.h"
|
2
|
+
#include <cstdint>
|
3
|
+
#include <cstring>
|
4
|
+
|
5
|
+
/////////////////////////////////////////////////////////////////////////
|
6
|
+
// The Image class
|
7
|
+
/////////////////////////////////////////////////////////////////////////
|
8
|
+
/*
|
9
|
+
* Document-class: PDFium::Image
|
10
|
+
*
|
11
|
+
* A Image can represent either a Page that
|
12
|
+
* has been rendered to a Image via Page#as_image
|
13
|
+
*
|
14
|
+
* Or an embedded image on a Page, obtained via Page#images
|
15
|
+
*/
|
16
|
+
|
17
|
+
|
18
|
+
static void
|
19
|
+
image_gc_free(ImageWrapper *img) {
|
20
|
+
delete img;
|
21
|
+
}
|
22
|
+
|
23
|
+
static VALUE
|
24
|
+
image_allocate(VALUE klass) {
|
25
|
+
auto img = new ImageWrapper;
|
26
|
+
return Data_Wrap_Struct(klass, NULL, image_gc_free, img );
|
27
|
+
}
|
28
|
+
|
29
|
+
/*
|
30
|
+
* call-seq:
|
31
|
+
* Image.new -> Image
|
32
|
+
*
|
33
|
+
* Initializes an image
|
34
|
+
*/
|
35
|
+
VALUE
|
36
|
+
image_initialize(int argc, VALUE *argv, VALUE self){
|
37
|
+
|
38
|
+
VALUE rb_page, rb_options;
|
39
|
+
rb_scan_args(argc,argv,"1:", &rb_page, &rb_options);
|
40
|
+
|
41
|
+
ImageWrapper *img;
|
42
|
+
Data_Get_Struct(self, ImageWrapper, img);
|
43
|
+
|
44
|
+
PageWrapper *pg;
|
45
|
+
Data_Get_Struct(rb_page, PageWrapper, pg);
|
46
|
+
img->wrap(pg);
|
47
|
+
|
48
|
+
if (NIL_P(rb_options)){
|
49
|
+
rb_options=rb_hash_new();
|
50
|
+
}
|
51
|
+
VALUE rb_width = RB::get_option(rb_options, "width");
|
52
|
+
if (!NIL_P(rb_width)){
|
53
|
+
rb_iv_set(self, "@width", rb_width);
|
54
|
+
}
|
55
|
+
VALUE rb_height = RB::get_option(rb_options, "height");
|
56
|
+
if (!NIL_P(rb_height)){
|
57
|
+
rb_iv_set(self, "@height", rb_height);
|
58
|
+
}
|
59
|
+
|
60
|
+
VALUE rb_bounds = RB::get_option(rb_options, "bounds");
|
61
|
+
VALUE rb_page_index = RB::get_option(rb_options, "index");
|
62
|
+
if (!NIL_P(rb_page_index)){
|
63
|
+
rb_iv_set(self, "@index", rb_page_index);
|
64
|
+
}
|
65
|
+
VALUE pg_object_index = RB::get_option(rb_options, "object_index");
|
66
|
+
if (NIL_P(rb_bounds) && NIL_P(pg_object_index)){
|
67
|
+
rb_raise(rb_eArgError, ":bounds or :object_index must be given");
|
68
|
+
}
|
69
|
+
if (!NIL_P(pg_object_index)){
|
70
|
+
img->page_object_index = FIX2INT(pg_object_index);
|
71
|
+
}
|
72
|
+
if (NIL_P(rb_bounds)){
|
73
|
+
CPDF_ImageObject *image = (CPDF_ImageObject*)pg->
|
74
|
+
page()->GetObjectByIndex(img->page_object_index);
|
75
|
+
|
76
|
+
|
77
|
+
VALUE bounds_args[4];
|
78
|
+
bounds_args[0] = rb_float_new( 0 );
|
79
|
+
bounds_args[1] = rb_float_new( image->m_pImage->GetPixelWidth() );
|
80
|
+
bounds_args[2] = rb_float_new( 0 );
|
81
|
+
bounds_args[3] = rb_float_new( image->m_pImage->GetPixelHeight() );
|
82
|
+
rb_bounds = rb_class_new_instance( 4, bounds_args, RB::BoundingBox() );
|
83
|
+
if (NIL_P(rb_height)){
|
84
|
+
rb_iv_set(self, "@height", INT2FIX(image->m_pImage->GetPixelHeight()) );
|
85
|
+
}
|
86
|
+
if (NIL_P(rb_width)){
|
87
|
+
rb_iv_set(self, "@width", INT2FIX(image->m_pImage->GetPixelWidth()) );
|
88
|
+
}
|
89
|
+
|
90
|
+
}
|
91
|
+
rb_iv_set(self, "@bounds", rb_bounds);
|
92
|
+
|
93
|
+
return Qnil;
|
94
|
+
}
|
95
|
+
|
96
|
+
FIBITMAP*
|
97
|
+
render_page(CPDF_Page *page, FREE_IMAGE_FORMAT format, int width, int height){
|
98
|
+
// Create bitmap. width, height, alpha 1=enabled,0=disabled
|
99
|
+
FPDF_BITMAP bitmap = FPDFBitmap_CreateEx(width, height, FPDFBitmap_BGR, NULL, 0);
|
100
|
+
|
101
|
+
// fill all pixels with white for the background color
|
102
|
+
FPDFBitmap_FillRect(bitmap, 0, 0, width, height, 0xFFFFFFFF);
|
103
|
+
|
104
|
+
// Render a page to a bitmap in RGBA format
|
105
|
+
// args are: *buffer, page, start_x, start_y, size_x, size_y, rotation, and flags
|
106
|
+
// flags are:
|
107
|
+
// 0 for normal display, or combination of flags defined below
|
108
|
+
// 0x01 Set if annotations are to be rendered
|
109
|
+
// 0x02 Set if using text rendering optimized for LCD display
|
110
|
+
// 0x04 Set if you don't want to use GDI+
|
111
|
+
FPDF_RenderPageBitmap(bitmap, page, 0, 0, width, height, 0, 0);
|
112
|
+
|
113
|
+
// The stride holds the width of one row in bytes. It may not be an exact
|
114
|
+
// multiple of the pixel width because the data may be packed to always end on a byte boundary
|
115
|
+
int stride = FPDFBitmap_GetStride(bitmap);
|
116
|
+
|
117
|
+
// Safety checks to make sure that the bitmap
|
118
|
+
// is properly sized and can be safely manipulated
|
119
|
+
if (stride < 0){
|
120
|
+
FPDFBitmap_Destroy(bitmap);
|
121
|
+
return NULL;
|
122
|
+
}
|
123
|
+
if (width > INT_MAX / height){
|
124
|
+
FPDFBitmap_Destroy(bitmap);
|
125
|
+
return NULL;
|
126
|
+
}
|
127
|
+
int out_len = stride * height;
|
128
|
+
if (out_len > INT_MAX / 3){
|
129
|
+
FPDFBitmap_Destroy(bitmap);
|
130
|
+
return NULL;
|
131
|
+
}
|
132
|
+
|
133
|
+
FIBITMAP *image = FreeImage_ConvertFromRawBits((BYTE*)FPDFBitmap_GetBuffer(bitmap),
|
134
|
+
width, height, stride, 24,
|
135
|
+
0xFF0000, 0x00FF00, 0x0000FF, true);
|
136
|
+
if ( FIF_GIF == format ){
|
137
|
+
FIBITMAP *gif = FreeImage_ColorQuantize(image, FIQ_WUQUANT);
|
138
|
+
FreeImage_Unload(image);
|
139
|
+
return gif;
|
140
|
+
} else {
|
141
|
+
return image;
|
142
|
+
}
|
143
|
+
}
|
144
|
+
|
145
|
+
|
146
|
+
FIBITMAP*
|
147
|
+
render_image(ImageWrapper *image_wrapper, int index, FREE_IMAGE_FORMAT format, int width, int height){
|
148
|
+
CPDF_Page *page = image_wrapper->page_wrapper->page();
|
149
|
+
CPDF_ImageObject *image_obj = static_cast<CPDF_ImageObject*>(page->GetObjectByIndex(index));
|
150
|
+
CFX_DIBSource *dib = image_obj->m_pImage->LoadDIBSource();
|
151
|
+
FIBITMAP *bmp = FreeImage_Allocate(dib->GetWidth(), dib->GetHeight(), dib->GetBPP());
|
152
|
+
unsigned int byte_width = dib->GetWidth() * (dib->GetBPP()/8);
|
153
|
+
for (int row=0; row < dib->GetHeight(); row++ ){
|
154
|
+
auto dest_row = FreeImage_GetScanLine(bmp,dib->GetHeight()-row-1);
|
155
|
+
std::memcpy(dest_row, dib->GetScanline(row), byte_width );
|
156
|
+
}
|
157
|
+
return bmp;
|
158
|
+
}
|
159
|
+
|
160
|
+
FIBITMAP*
|
161
|
+
render_to_bitmap(VALUE self, FREE_IMAGE_FORMAT format){
|
162
|
+
int width = 0;
|
163
|
+
int height = 0;
|
164
|
+
VALUE rb_width = rb_iv_get(self, "@width");
|
165
|
+
if (T_FIXNUM == TYPE(rb_width)){
|
166
|
+
width = FIX2INT(rb_width);
|
167
|
+
}
|
168
|
+
VALUE rb_height = rb_iv_get(self, "@height");
|
169
|
+
if (T_FIXNUM == TYPE(rb_height)){
|
170
|
+
height = FIX2INT(rb_height);
|
171
|
+
}
|
172
|
+
// we must have at least one of width or height
|
173
|
+
if (!width && !height){
|
174
|
+
rb_raise(rb_eRuntimeError, "Both height and width must be set to a number");
|
175
|
+
}
|
176
|
+
ImageWrapper *img;
|
177
|
+
Data_Get_Struct(self, ImageWrapper, img);
|
178
|
+
|
179
|
+
if (-1 == img->page_object_index){
|
180
|
+
CPDF_Page *page = img->page_wrapper->page();
|
181
|
+
return render_page(page, format, width, height);
|
182
|
+
} else {
|
183
|
+
return render_image(img, img->page_object_index, format, width, height);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
|
188
|
+
/*
|
189
|
+
* call-seq:
|
190
|
+
* as_science -> ImageScience instance
|
191
|
+
*
|
192
|
+
* Converts to an ImageScience bitmap and returns it.
|
193
|
+
* The ImageScience (https://github.com/seattlerb/image_science) library
|
194
|
+
* must be installed and required before calling this method or
|
195
|
+
* a NameError: uninitialized constant ImageScience exception will be raised.
|
196
|
+
|
197
|
+
=== Example
|
198
|
+
pdf = PDFium::Document.new( "test.pdf" )
|
199
|
+
page = pdf.pages.first
|
200
|
+
page.images.each do | image |
|
201
|
+
image.as_science.cropped_thumbnail 100 do |thumb|
|
202
|
+
thumb.save "image-#{image.index}-cropped.png"
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
*/
|
207
|
+
VALUE
|
208
|
+
image_as_science(VALUE self){
|
209
|
+
VALUE RBImageScience = rb_const_get(rb_cObject, rb_intern("ImageScience"));
|
210
|
+
FIBITMAP *image = render_to_bitmap(self, FIF_BMP);
|
211
|
+
|
212
|
+
VALUE instance = Data_Wrap_Struct(RBImageScience, NULL, NULL, image);
|
213
|
+
rb_iv_set(instance, "@file_type", INT2FIX(FIF_BMP));
|
214
|
+
return instance;
|
215
|
+
}
|
216
|
+
|
217
|
+
/*
|
218
|
+
* call-seq:
|
219
|
+
* save( file ) -> Boolean
|
220
|
+
*
|
221
|
+
* Save image to a file
|
222
|
+
*/
|
223
|
+
VALUE
|
224
|
+
image_save(VALUE self, VALUE rb_file){
|
225
|
+
// figure out the desired format from the file extension
|
226
|
+
const char* file = StringValuePtr(rb_file);
|
227
|
+
|
228
|
+
FREE_IMAGE_FORMAT format = FreeImage_GetFIFFromFilename(file);
|
229
|
+
if((format == FIF_UNKNOWN) || !FreeImage_FIFSupportsWriting(format)) {
|
230
|
+
rb_raise(rb_eArgError, "Unable to write to a image of that type");
|
231
|
+
}
|
232
|
+
|
233
|
+
FIBITMAP *image = render_to_bitmap(self, format);
|
234
|
+
|
235
|
+
bool success = FreeImage_Save(format, image, file, 0);
|
236
|
+
|
237
|
+
// unload the image
|
238
|
+
FreeImage_Unload(image);
|
239
|
+
|
240
|
+
return success ? Qtrue : Qfalse;
|
241
|
+
}
|
242
|
+
|
243
|
+
/*
|
244
|
+
call-seq:
|
245
|
+
data(:format) -> Binary String
|
246
|
+
|
247
|
+
Returns the binary data for the image in the specified format.
|
248
|
+
|
249
|
+
Used in conjuction with Document.from_memory this can render be used to
|
250
|
+
render a PDF's pages completely in memory.
|
251
|
+
|
252
|
+
=== Example rendering a PDF to AWS without hitting disk
|
253
|
+
# Assuming AWS::S3 is already authorized elsewhere
|
254
|
+
bucket = AWS::S3.new.buckets['my-pdfs']
|
255
|
+
pdf = PDFium::Document.from_memory bucket.objects['secrets.pdf'].read
|
256
|
+
pdf.pages.each do | page |
|
257
|
+
path = "secrets/page-#{page.number}.jpg"
|
258
|
+
bucket.objects[path].write page.as_image(height: 1000).data(:jpg)
|
259
|
+
page.images.each do | image |
|
260
|
+
path = "secrets/page-#{page.number}-image-#{image.index}.png"
|
261
|
+
bucket.objects[path].write image.data(:png)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
*/
|
266
|
+
static VALUE
|
267
|
+
image_data(VALUE self, VALUE rb_format)
|
268
|
+
{
|
269
|
+
VALUE path = RB::to_s(rb_format);
|
270
|
+
const char* type = StringValuePtr(path);
|
271
|
+
|
272
|
+
FREE_IMAGE_FORMAT format = FreeImage_GetFIFFromFilename(type);
|
273
|
+
if((format == FIF_UNKNOWN) || !FreeImage_FIFSupportsWriting(format)) {
|
274
|
+
rb_raise(rb_eArgError, "Unable to write to a image of that type");
|
275
|
+
}
|
276
|
+
|
277
|
+
FIBITMAP *image = render_to_bitmap(self, format);
|
278
|
+
|
279
|
+
FIMEMORY *mem = FreeImage_OpenMemory(); //mem_buffer, buf.st_size);
|
280
|
+
|
281
|
+
bool success = FreeImage_SaveToMemory(format, image, mem, 0);
|
282
|
+
if (!success){
|
283
|
+
FreeImage_Unload(image);
|
284
|
+
rb_raise(rb_eArgError, "Unable to save image to memory buffer");
|
285
|
+
}
|
286
|
+
|
287
|
+
long size = FreeImage_TellMemory(mem);
|
288
|
+
|
289
|
+
char *buffer = ALLOC_N(char, size);
|
290
|
+
|
291
|
+
FreeImage_SeekMemory(mem, 0L, SEEK_SET);
|
292
|
+
|
293
|
+
FreeImage_ReadMemory(buffer, size, 1, mem);
|
294
|
+
|
295
|
+
FreeImage_Unload(image);
|
296
|
+
|
297
|
+
FreeImage_CloseMemory(mem);
|
298
|
+
VALUE ret = rb_str_new(buffer, size);
|
299
|
+
|
300
|
+
xfree(buffer);
|
301
|
+
return ret;
|
302
|
+
}
|
303
|
+
|
304
|
+
VALUE
|
305
|
+
define_image_class(){
|
306
|
+
VALUE RB_PDFium = RB::PDFium();
|
307
|
+
|
308
|
+
|
309
|
+
VALUE RB_Image = rb_define_class_under(RB_PDFium, "Image", rb_cObject);
|
310
|
+
rb_define_alloc_func(RB_Image, image_allocate);
|
311
|
+
rb_define_private_method (RB_Image, "initialize", RUBY_METHOD_FUNC(image_initialize), -1);
|
312
|
+
|
313
|
+
/* Returns the bouding box of the image as a PDFium::BoundingBox */
|
314
|
+
rb_define_attr( RB_Image, "bounds", 1, 0 );
|
315
|
+
|
316
|
+
/* Returns the index of the image on the page.
|
317
|
+
* Note: The index method is only provided as a convience method.
|
318
|
+
* It has no relation to the position of images on the page.
|
319
|
+
* Do not depend on the top-left image being index 0, even if it often is. */
|
320
|
+
rb_define_attr( RB_Image, "index", 1, 0 );
|
321
|
+
|
322
|
+
/* Height of the image in pixels (Fixnum) */
|
323
|
+
rb_define_attr( RB_Image, "height", 1, 1 );
|
324
|
+
|
325
|
+
/* Width of the image in pixels (Fixnum) */
|
326
|
+
rb_define_attr( RB_Image, "width", 1, 1 );
|
327
|
+
|
328
|
+
rb_define_method( RB_Image, "save", RUBY_METHOD_FUNC(image_save), 1);
|
329
|
+
rb_define_method( RB_Image, "data", RUBY_METHOD_FUNC(image_data), 1);
|
330
|
+
rb_define_method( RB_Image, "as_science", RUBY_METHOD_FUNC(image_as_science),0);
|
331
|
+
return RB_Image;
|
332
|
+
}
|