pdfium 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/Gemfile +9 -0
- data/Guardfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +68 -0
- data/Rakefile +62 -0
- data/ext/pdfium_ext/bookmark.cc +221 -0
- data/ext/pdfium_ext/buffer_file_write.hpp +27 -0
- data/ext/pdfium_ext/document.cc +268 -0
- data/ext/pdfium_ext/document.h +66 -0
- data/ext/pdfium_ext/document_wrapper.cc +63 -0
- data/ext/pdfium_ext/document_wrapper.h +56 -0
- data/ext/pdfium_ext/extconf.h +3 -0
- data/ext/pdfium_ext/extconf.rb +76 -0
- data/ext/pdfium_ext/image.cc +332 -0
- data/ext/pdfium_ext/page.cc +392 -0
- data/ext/pdfium_ext/page.h +5 -0
- data/ext/pdfium_ext/page_object_wrapper.cc +38 -0
- data/ext/pdfium_ext/page_object_wrapper.h +27 -0
- data/ext/pdfium_ext/page_wrapper.cc +86 -0
- data/ext/pdfium_ext/page_wrapper.h +37 -0
- data/ext/pdfium_ext/pdfium.cc +115 -0
- data/ext/pdfium_ext/pdfium.h +69 -0
- data/lib/pdfium.rb +15 -0
- data/lib/pdfium/bookmark_list.rb +28 -0
- data/lib/pdfium/bounding_box.rb +16 -0
- data/lib/pdfium/image_list.rb +21 -0
- data/lib/pdfium/page_list.rb +36 -0
- data/lib/pdfium/page_sizes.rb +7 -0
- data/lib/pdfium/version.rb +4 -0
- data/pdfium.gemspec +29 -0
- data/test/benchmark-docsplit.rb +41 -0
- data/test/bookmarks_list_spec.rb +26 -0
- data/test/bookmarks_spec.rb +34 -0
- data/test/debug.rb +24 -0
- data/test/document_spec.rb +49 -0
- data/test/image_list_spec.rb +18 -0
- data/test/image_spec.rb +53 -0
- data/test/page_list_spec.rb +24 -0
- data/test/page_spec.rb +91 -0
- data/test/pdfium_spec.rb +15 -0
- data/test/profile.rb +29 -0
- data/test/spec_helper.rb +31 -0
- metadata +202 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
#include "page_wrapper.h"
|
2
|
+
#include "pdfium.h"
|
3
|
+
|
4
|
+
CPDF_Page*
|
5
|
+
RB2PG(VALUE self){
|
6
|
+
PageWrapper *page;
|
7
|
+
Data_Get_Struct(self, PageWrapper, page);
|
8
|
+
return page->page();
|
9
|
+
}
|
10
|
+
|
11
|
+
PageWrapper::PageWrapper(DocumentWrapper* doc, int page) :
|
12
|
+
document_wrapper(doc), _page_number(page), _in_use(true), _page(NULL)
|
13
|
+
{
|
14
|
+
this->document_wrapper->retain(this);
|
15
|
+
}
|
16
|
+
|
17
|
+
void
|
18
|
+
PageWrapper::unload(){
|
19
|
+
FPDF_ClosePage(_page);
|
20
|
+
_page = NULL;
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
// Mark the page object as no longer in use. At this
|
25
|
+
// point it may be freed once all children are also not
|
26
|
+
// in use
|
27
|
+
void
|
28
|
+
PageWrapper::markUnused(){
|
29
|
+
_in_use = false;
|
30
|
+
this->unload();
|
31
|
+
this->maybeKillSelf();
|
32
|
+
}
|
33
|
+
|
34
|
+
void
|
35
|
+
PageWrapper::setPage(CPDF_Page *page){
|
36
|
+
if (_page){
|
37
|
+
this->unload();
|
38
|
+
}
|
39
|
+
// unload won't work if the page has children
|
40
|
+
if (!_page){
|
41
|
+
this->_page=page;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
void
|
46
|
+
PageWrapper::retain(void *obj){
|
47
|
+
_children.insert(obj);
|
48
|
+
}
|
49
|
+
|
50
|
+
CPDF_Page *
|
51
|
+
PageWrapper::page(){
|
52
|
+
if (!_page){
|
53
|
+
_page = static_cast<CPDF_Page *>(FPDF_LoadPage(document_wrapper->document, _page_number));
|
54
|
+
}
|
55
|
+
return _page;
|
56
|
+
}
|
57
|
+
|
58
|
+
void
|
59
|
+
PageWrapper::release(void *obj){
|
60
|
+
_children.erase(obj);
|
61
|
+
this->maybeKillSelf();
|
62
|
+
|
63
|
+
}
|
64
|
+
|
65
|
+
// Test if the Document is not in use and there are no pages
|
66
|
+
// that are still retained
|
67
|
+
void
|
68
|
+
PageWrapper::maybeKillSelf(){
|
69
|
+
bool killable = _children.empty() && !_in_use;
|
70
|
+
DEBUG_MSG("Testing if killing Page: " << this << " " << killable );
|
71
|
+
if (killable){
|
72
|
+
delete this;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
// void
|
77
|
+
// PageWrapper::wrap(CPDF_Page *pg, DocumentWrapper *doc_wrapper){
|
78
|
+
// this->page = pg;
|
79
|
+
// this->doc = doc_wrapper;
|
80
|
+
// this->doc->retain(this);
|
81
|
+
// }
|
82
|
+
|
83
|
+
|
84
|
+
PageWrapper::~PageWrapper(){
|
85
|
+
this->unload();
|
86
|
+
}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#ifndef __PAGE_WRAPPER_H__
|
2
|
+
#define __PAGE_WRAPPER_H__
|
3
|
+
|
4
|
+
#include "pdfium.h"
|
5
|
+
#include <unordered_set>
|
6
|
+
|
7
|
+
class DocumentWrapper;
|
8
|
+
|
9
|
+
class PageWrapper {
|
10
|
+
|
11
|
+
public:
|
12
|
+
|
13
|
+
PageWrapper(DocumentWrapper* doc, int page);
|
14
|
+
void wrap(CPDF_Page *page, DocumentWrapper *doc_wrapper);
|
15
|
+
~PageWrapper();
|
16
|
+
|
17
|
+
void markUnused();
|
18
|
+
|
19
|
+
void retain(void *obj);
|
20
|
+
void release(void *obj);
|
21
|
+
|
22
|
+
CPDF_Page *page();
|
23
|
+
void unload();
|
24
|
+
void setPage(CPDF_Page *pg);
|
25
|
+
|
26
|
+
DocumentWrapper *document_wrapper;
|
27
|
+
int _page_number;
|
28
|
+
private:
|
29
|
+
|
30
|
+
bool _in_use;
|
31
|
+
CPDF_Page *_page;
|
32
|
+
std::unordered_set<void*> _children;
|
33
|
+
void maybeKillSelf();
|
34
|
+
};
|
35
|
+
|
36
|
+
|
37
|
+
#endif // __PAGE_WRAPPER_H__
|
@@ -0,0 +1,115 @@
|
|
1
|
+
#include <iostream>
|
2
|
+
#include <string>
|
3
|
+
|
4
|
+
#include "pdfium.h"
|
5
|
+
extern "C" {
|
6
|
+
#include "ruby/encoding.h"
|
7
|
+
}
|
8
|
+
|
9
|
+
#include <assert.h>
|
10
|
+
#include <list>
|
11
|
+
#include <string>
|
12
|
+
#include <utility>
|
13
|
+
#include <vector>
|
14
|
+
|
15
|
+
#include "document.h"
|
16
|
+
#include "page.h"
|
17
|
+
|
18
|
+
// // file local variables that are set in Init_pdfium_ext function
|
19
|
+
// // and are referenced elsewhere in file
|
20
|
+
// static VALUE rb_page; // Ruby definition of the Page class
|
21
|
+
|
22
|
+
const char*
|
23
|
+
PDFiumLastErrorString() {
|
24
|
+
switch(FPDF_GetLastError()){
|
25
|
+
case 0:
|
26
|
+
return "No Error";
|
27
|
+
case 1:
|
28
|
+
return "unknown error";
|
29
|
+
case 2:
|
30
|
+
return "file not found or could not be opened";
|
31
|
+
case 3:
|
32
|
+
return "file not in PDF format or corrupted";
|
33
|
+
case 4:
|
34
|
+
return "password required or incorrect password";
|
35
|
+
case 5:
|
36
|
+
return "unsupported security scheme";
|
37
|
+
case 6:
|
38
|
+
return "page not found or content error";
|
39
|
+
default:
|
40
|
+
return "error code unknown";
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
VALUE _get(const char *name){
|
46
|
+
return rb_const_get(RB::PDFium(), rb_intern(name));
|
47
|
+
}
|
48
|
+
VALUE RB::PDFium(){
|
49
|
+
static VALUE val = rb_const_get(rb_cObject, rb_intern("PDFium"));
|
50
|
+
return val;
|
51
|
+
}
|
52
|
+
VALUE RB::Page(){
|
53
|
+
static VALUE val = _get("Page");
|
54
|
+
return val;
|
55
|
+
}
|
56
|
+
VALUE RB::Image(){
|
57
|
+
static VALUE val = _get("Image");
|
58
|
+
return val;
|
59
|
+
}
|
60
|
+
VALUE RB::BoundingBox(){
|
61
|
+
static VALUE val = _get("BoundingBox");
|
62
|
+
return val;
|
63
|
+
}
|
64
|
+
VALUE RB::Bookmark(){
|
65
|
+
static VALUE val = _get("Bookmark");
|
66
|
+
return val;
|
67
|
+
}
|
68
|
+
VALUE RB::Document(){
|
69
|
+
static VALUE val = _get("Document");
|
70
|
+
return val;
|
71
|
+
}
|
72
|
+
VALUE RB::BookmarkList(){
|
73
|
+
static VALUE val = _get("BookmarkList");
|
74
|
+
return val;
|
75
|
+
}
|
76
|
+
VALUE RB::PageList(){
|
77
|
+
static VALUE val = _get("PageList");
|
78
|
+
return val;
|
79
|
+
}
|
80
|
+
VALUE RB::ImageList(){
|
81
|
+
static VALUE val = _get("ImageList");
|
82
|
+
return val;
|
83
|
+
}
|
84
|
+
|
85
|
+
VALUE
|
86
|
+
RB::to_string(const CFX_WideString &wstr){
|
87
|
+
static rb_encoding *enc = rb_enc_find("UTF-16LE");
|
88
|
+
return rb_enc_str_new(wstr.UTF16LE_Encode().c_str(),(wstr.GetLength()*2), enc);
|
89
|
+
}
|
90
|
+
|
91
|
+
ID RB::to_s(VALUE obj){
|
92
|
+
static ID id = rb_intern("to_s");
|
93
|
+
return rb_funcall(obj, id, 0);
|
94
|
+
}
|
95
|
+
|
96
|
+
VALUE RB::type(VALUE obj){
|
97
|
+
static ID id = rb_intern("class");
|
98
|
+
return rb_funcall(obj, id, 0);
|
99
|
+
}
|
100
|
+
|
101
|
+
VALUE RB::get_option(VALUE options, const std::string &key){
|
102
|
+
return rb_hash_aref(options, ID2SYM(rb_intern(key.c_str())));
|
103
|
+
}
|
104
|
+
|
105
|
+
extern "C" void
|
106
|
+
Init_pdfium_ext()
|
107
|
+
{
|
108
|
+
// Initialize the PDFium library
|
109
|
+
FPDF_InitLibrary();
|
110
|
+
|
111
|
+
define_document_class();
|
112
|
+
define_page_class();
|
113
|
+
define_bookmark_class();
|
114
|
+
define_image_class();
|
115
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#ifndef __PDFIUM_H__
|
2
|
+
#define __PDFIUM_H__
|
3
|
+
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <inttypes.h>
|
6
|
+
#include <fpdf_dataavail.h>
|
7
|
+
#include <fpdf_ext.h>
|
8
|
+
#include <fpdfformfill.h>
|
9
|
+
#include <fpdftext.h>
|
10
|
+
#include <fpdfview.h>
|
11
|
+
#include <fpdfedit.h>
|
12
|
+
#include <fpdfsave.h>
|
13
|
+
#include <fpdfdoc.h>
|
14
|
+
#include <iostream>
|
15
|
+
#include <fpdfdoc/fpdf_doc.h>
|
16
|
+
|
17
|
+
#include <fpdfapi/fpdf_render.h>
|
18
|
+
#include <fpdfapi/fpdf_pageobj.h>
|
19
|
+
#include <fpdfsdk/include/fsdk_rendercontext.h>
|
20
|
+
#include <FreeImage.h>
|
21
|
+
|
22
|
+
#include "page_wrapper.h"
|
23
|
+
#include "page_object_wrapper.h"
|
24
|
+
#include "document_wrapper.h"
|
25
|
+
|
26
|
+
// auto generated by mkmf
|
27
|
+
#include "extconf.h"
|
28
|
+
extern "C" {
|
29
|
+
#include "ruby.h"
|
30
|
+
}
|
31
|
+
|
32
|
+
#ifdef DEBUG
|
33
|
+
#define DEBUG_MSG(str) do { std::cout << str << std::endl; } while( false )
|
34
|
+
#else
|
35
|
+
#define DEBUG_MSG(str) do { } while ( false )
|
36
|
+
#endif
|
37
|
+
|
38
|
+
VALUE define_bookmark_class();
|
39
|
+
VALUE define_document_class();
|
40
|
+
VALUE define_page_class();
|
41
|
+
VALUE define_image_class();
|
42
|
+
|
43
|
+
// a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
|
44
|
+
|
45
|
+
CPDF_Page* RB2PG(VALUE RB_Page);
|
46
|
+
CPDF_Document* RB2DOC(VALUE RB_DocumentWrapper);
|
47
|
+
CPDF_ImageObject* RB2IMG(VALUE RB_Image);
|
48
|
+
|
49
|
+
const char* PDFiumLastErrorString();
|
50
|
+
|
51
|
+
class RB {
|
52
|
+
public:
|
53
|
+
static VALUE PDFium();
|
54
|
+
static VALUE Bookmark();
|
55
|
+
static VALUE Document();
|
56
|
+
static VALUE Page();
|
57
|
+
static VALUE BookmarkList();
|
58
|
+
static VALUE BoundingBox();
|
59
|
+
static VALUE StringIO();
|
60
|
+
static VALUE Image();
|
61
|
+
static VALUE PageList();
|
62
|
+
static VALUE ImageList();
|
63
|
+
static VALUE type(VALUE object);
|
64
|
+
static VALUE to_string(const CFX_WideString &widestring);
|
65
|
+
static ID to_s(VALUE object);
|
66
|
+
static VALUE get_option(VALUE hash, const std::string &key);
|
67
|
+
};
|
68
|
+
|
69
|
+
#endif // __PDFIUM_H__
|
data/lib/pdfium.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require_relative "pdfium/version"
|
3
|
+
require_relative "pdfium/bookmark_list"
|
4
|
+
require_relative "pdfium/page_list"
|
5
|
+
require_relative "pdfium/page_sizes"
|
6
|
+
require_relative "pdfium/bounding_box"
|
7
|
+
require_relative "pdfium/image_list"
|
8
|
+
|
9
|
+
|
10
|
+
# PDFium is an Ruby interface to the Google PDFium PDF renderer.
|
11
|
+
module PDFium
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
require_relative "pdfium_ext"
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module PDFium
|
2
|
+
|
3
|
+
# A list of bookmarks for a Document or a Bookmark
|
4
|
+
class BookmarkList
|
5
|
+
|
6
|
+
include Enumerable
|
7
|
+
# Not used directly, but called from Document#bookmarks
|
8
|
+
def initialize(initial)
|
9
|
+
@first=initial
|
10
|
+
end
|
11
|
+
|
12
|
+
# Calls block once for each bookmark that exists at the current level
|
13
|
+
# Since bookmarks form a tree, each bookmark may have one or more children
|
14
|
+
def each
|
15
|
+
bookmark = @first
|
16
|
+
while bookmark
|
17
|
+
yield bookmark
|
18
|
+
bookmark = bookmark.next_sibling
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# True if no bookmarks exist, false if at least one is present
|
23
|
+
def empty?
|
24
|
+
@first.nil?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module PDFium
|
2
|
+
|
3
|
+
# The size of an object. Used with both Page and Image
|
4
|
+
class BoundingBox
|
5
|
+
|
6
|
+
# dimensions for the BoundingBox. Fixnum given in terms of points
|
7
|
+
attr_reader :left, :right, :top, :bottom
|
8
|
+
|
9
|
+
# Left, Right, Top, Bottom
|
10
|
+
def initialize(l,r,t,b)
|
11
|
+
@left=l; @right=r; @top=t; @bottom=b
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module PDFium
|
2
|
+
|
3
|
+
# A list of Image instances for a given Page. Is returned by Page#images
|
4
|
+
# Images are lazily loaded upon request.
|
5
|
+
class ImageList
|
6
|
+
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# Load list for a given page. Not normally called directly, but from Page#images
|
10
|
+
def initialize(page)
|
11
|
+
@page=page
|
12
|
+
end
|
13
|
+
|
14
|
+
# Calls block once for each object on the document
|
15
|
+
def each(&block)
|
16
|
+
@page.each_image(&block)
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module PDFium
|
2
|
+
|
3
|
+
# A list of Page objects associated with a Document.
|
4
|
+
class PageList
|
5
|
+
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
# Create a new listing from the given document.
|
9
|
+
# Not normally called directly, is called internally by Document#pages
|
10
|
+
def initialize(document)
|
11
|
+
@document=document
|
12
|
+
end
|
13
|
+
|
14
|
+
# Calls block once for each page on the document, yielding the current page
|
15
|
+
# After the page is yielded, Page#unload will be automatically called.
|
16
|
+
#
|
17
|
+
# _note_ Subsequent calls to this function will return different Page instances.
|
18
|
+
def each(&block)
|
19
|
+
@document.each_page(&block)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns the number of pages on the document
|
23
|
+
def count
|
24
|
+
@document.page_count
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns a Page instance for the given number.
|
28
|
+
# If the given page_number is not valid, an ArgumentError will be raised.
|
29
|
+
#
|
30
|
+
# _note_ Subsequent calls to this function will return different Page instances.
|
31
|
+
def [](index)
|
32
|
+
@document.page_at(index)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|