pdfium 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/Gemfile +9 -0
- data/Guardfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +68 -0
- data/Rakefile +62 -0
- data/ext/pdfium_ext/bookmark.cc +221 -0
- data/ext/pdfium_ext/buffer_file_write.hpp +27 -0
- data/ext/pdfium_ext/document.cc +268 -0
- data/ext/pdfium_ext/document.h +66 -0
- data/ext/pdfium_ext/document_wrapper.cc +63 -0
- data/ext/pdfium_ext/document_wrapper.h +56 -0
- data/ext/pdfium_ext/extconf.h +3 -0
- data/ext/pdfium_ext/extconf.rb +76 -0
- data/ext/pdfium_ext/image.cc +332 -0
- data/ext/pdfium_ext/page.cc +392 -0
- data/ext/pdfium_ext/page.h +5 -0
- data/ext/pdfium_ext/page_object_wrapper.cc +38 -0
- data/ext/pdfium_ext/page_object_wrapper.h +27 -0
- data/ext/pdfium_ext/page_wrapper.cc +86 -0
- data/ext/pdfium_ext/page_wrapper.h +37 -0
- data/ext/pdfium_ext/pdfium.cc +115 -0
- data/ext/pdfium_ext/pdfium.h +69 -0
- data/lib/pdfium.rb +15 -0
- data/lib/pdfium/bookmark_list.rb +28 -0
- data/lib/pdfium/bounding_box.rb +16 -0
- data/lib/pdfium/image_list.rb +21 -0
- data/lib/pdfium/page_list.rb +36 -0
- data/lib/pdfium/page_sizes.rb +7 -0
- data/lib/pdfium/version.rb +4 -0
- data/pdfium.gemspec +29 -0
- data/test/benchmark-docsplit.rb +41 -0
- data/test/bookmarks_list_spec.rb +26 -0
- data/test/bookmarks_spec.rb +34 -0
- data/test/debug.rb +24 -0
- data/test/document_spec.rb +49 -0
- data/test/image_list_spec.rb +18 -0
- data/test/image_spec.rb +53 -0
- data/test/page_list_spec.rb +24 -0
- data/test/page_spec.rb +91 -0
- data/test/pdfium_spec.rb +15 -0
- data/test/profile.rb +29 -0
- data/test/spec_helper.rb +31 -0
- metadata +202 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
#include "page_wrapper.h"
|
2
|
+
#include "pdfium.h"
|
3
|
+
|
4
|
+
CPDF_Page*
|
5
|
+
RB2PG(VALUE self){
|
6
|
+
PageWrapper *page;
|
7
|
+
Data_Get_Struct(self, PageWrapper, page);
|
8
|
+
return page->page();
|
9
|
+
}
|
10
|
+
|
11
|
+
PageWrapper::PageWrapper(DocumentWrapper* doc, int page) :
|
12
|
+
document_wrapper(doc), _page_number(page), _in_use(true), _page(NULL)
|
13
|
+
{
|
14
|
+
this->document_wrapper->retain(this);
|
15
|
+
}
|
16
|
+
|
17
|
+
void
|
18
|
+
PageWrapper::unload(){
|
19
|
+
FPDF_ClosePage(_page);
|
20
|
+
_page = NULL;
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
// Mark the page object as no longer in use. At this
|
25
|
+
// point it may be freed once all children are also not
|
26
|
+
// in use
|
27
|
+
void
|
28
|
+
PageWrapper::markUnused(){
|
29
|
+
_in_use = false;
|
30
|
+
this->unload();
|
31
|
+
this->maybeKillSelf();
|
32
|
+
}
|
33
|
+
|
34
|
+
void
|
35
|
+
PageWrapper::setPage(CPDF_Page *page){
|
36
|
+
if (_page){
|
37
|
+
this->unload();
|
38
|
+
}
|
39
|
+
// unload won't work if the page has children
|
40
|
+
if (!_page){
|
41
|
+
this->_page=page;
|
42
|
+
}
|
43
|
+
}
|
44
|
+
|
45
|
+
void
|
46
|
+
PageWrapper::retain(void *obj){
|
47
|
+
_children.insert(obj);
|
48
|
+
}
|
49
|
+
|
50
|
+
CPDF_Page *
|
51
|
+
PageWrapper::page(){
|
52
|
+
if (!_page){
|
53
|
+
_page = static_cast<CPDF_Page *>(FPDF_LoadPage(document_wrapper->document, _page_number));
|
54
|
+
}
|
55
|
+
return _page;
|
56
|
+
}
|
57
|
+
|
58
|
+
void
|
59
|
+
PageWrapper::release(void *obj){
|
60
|
+
_children.erase(obj);
|
61
|
+
this->maybeKillSelf();
|
62
|
+
|
63
|
+
}
|
64
|
+
|
65
|
+
// Test if the Document is not in use and there are no pages
|
66
|
+
// that are still retained
|
67
|
+
void
|
68
|
+
PageWrapper::maybeKillSelf(){
|
69
|
+
bool killable = _children.empty() && !_in_use;
|
70
|
+
DEBUG_MSG("Testing if killing Page: " << this << " " << killable );
|
71
|
+
if (killable){
|
72
|
+
delete this;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
// void
|
77
|
+
// PageWrapper::wrap(CPDF_Page *pg, DocumentWrapper *doc_wrapper){
|
78
|
+
// this->page = pg;
|
79
|
+
// this->doc = doc_wrapper;
|
80
|
+
// this->doc->retain(this);
|
81
|
+
// }
|
82
|
+
|
83
|
+
|
84
|
+
PageWrapper::~PageWrapper(){
|
85
|
+
this->unload();
|
86
|
+
}
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#ifndef __PAGE_WRAPPER_H__
|
2
|
+
#define __PAGE_WRAPPER_H__
|
3
|
+
|
4
|
+
#include "pdfium.h"
|
5
|
+
#include <unordered_set>
|
6
|
+
|
7
|
+
class DocumentWrapper;
|
8
|
+
|
9
|
+
class PageWrapper {
|
10
|
+
|
11
|
+
public:
|
12
|
+
|
13
|
+
PageWrapper(DocumentWrapper* doc, int page);
|
14
|
+
void wrap(CPDF_Page *page, DocumentWrapper *doc_wrapper);
|
15
|
+
~PageWrapper();
|
16
|
+
|
17
|
+
void markUnused();
|
18
|
+
|
19
|
+
void retain(void *obj);
|
20
|
+
void release(void *obj);
|
21
|
+
|
22
|
+
CPDF_Page *page();
|
23
|
+
void unload();
|
24
|
+
void setPage(CPDF_Page *pg);
|
25
|
+
|
26
|
+
DocumentWrapper *document_wrapper;
|
27
|
+
int _page_number;
|
28
|
+
private:
|
29
|
+
|
30
|
+
bool _in_use;
|
31
|
+
CPDF_Page *_page;
|
32
|
+
std::unordered_set<void*> _children;
|
33
|
+
void maybeKillSelf();
|
34
|
+
};
|
35
|
+
|
36
|
+
|
37
|
+
#endif // __PAGE_WRAPPER_H__
|
@@ -0,0 +1,115 @@
|
|
1
|
+
#include <iostream>
|
2
|
+
#include <string>
|
3
|
+
|
4
|
+
#include "pdfium.h"
|
5
|
+
extern "C" {
|
6
|
+
#include "ruby/encoding.h"
|
7
|
+
}
|
8
|
+
|
9
|
+
#include <assert.h>
|
10
|
+
#include <list>
|
11
|
+
#include <string>
|
12
|
+
#include <utility>
|
13
|
+
#include <vector>
|
14
|
+
|
15
|
+
#include "document.h"
|
16
|
+
#include "page.h"
|
17
|
+
|
18
|
+
// // file local variables that are set in Init_pdfium_ext function
|
19
|
+
// // and are referenced elsewhere in file
|
20
|
+
// static VALUE rb_page; // Ruby definition of the Page class
|
21
|
+
|
22
|
+
const char*
|
23
|
+
PDFiumLastErrorString() {
|
24
|
+
switch(FPDF_GetLastError()){
|
25
|
+
case 0:
|
26
|
+
return "No Error";
|
27
|
+
case 1:
|
28
|
+
return "unknown error";
|
29
|
+
case 2:
|
30
|
+
return "file not found or could not be opened";
|
31
|
+
case 3:
|
32
|
+
return "file not in PDF format or corrupted";
|
33
|
+
case 4:
|
34
|
+
return "password required or incorrect password";
|
35
|
+
case 5:
|
36
|
+
return "unsupported security scheme";
|
37
|
+
case 6:
|
38
|
+
return "page not found or content error";
|
39
|
+
default:
|
40
|
+
return "error code unknown";
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
|
45
|
+
VALUE _get(const char *name){
|
46
|
+
return rb_const_get(RB::PDFium(), rb_intern(name));
|
47
|
+
}
|
48
|
+
VALUE RB::PDFium(){
|
49
|
+
static VALUE val = rb_const_get(rb_cObject, rb_intern("PDFium"));
|
50
|
+
return val;
|
51
|
+
}
|
52
|
+
VALUE RB::Page(){
|
53
|
+
static VALUE val = _get("Page");
|
54
|
+
return val;
|
55
|
+
}
|
56
|
+
VALUE RB::Image(){
|
57
|
+
static VALUE val = _get("Image");
|
58
|
+
return val;
|
59
|
+
}
|
60
|
+
VALUE RB::BoundingBox(){
|
61
|
+
static VALUE val = _get("BoundingBox");
|
62
|
+
return val;
|
63
|
+
}
|
64
|
+
VALUE RB::Bookmark(){
|
65
|
+
static VALUE val = _get("Bookmark");
|
66
|
+
return val;
|
67
|
+
}
|
68
|
+
VALUE RB::Document(){
|
69
|
+
static VALUE val = _get("Document");
|
70
|
+
return val;
|
71
|
+
}
|
72
|
+
VALUE RB::BookmarkList(){
|
73
|
+
static VALUE val = _get("BookmarkList");
|
74
|
+
return val;
|
75
|
+
}
|
76
|
+
VALUE RB::PageList(){
|
77
|
+
static VALUE val = _get("PageList");
|
78
|
+
return val;
|
79
|
+
}
|
80
|
+
VALUE RB::ImageList(){
|
81
|
+
static VALUE val = _get("ImageList");
|
82
|
+
return val;
|
83
|
+
}
|
84
|
+
|
85
|
+
VALUE
|
86
|
+
RB::to_string(const CFX_WideString &wstr){
|
87
|
+
static rb_encoding *enc = rb_enc_find("UTF-16LE");
|
88
|
+
return rb_enc_str_new(wstr.UTF16LE_Encode().c_str(),(wstr.GetLength()*2), enc);
|
89
|
+
}
|
90
|
+
|
91
|
+
ID RB::to_s(VALUE obj){
|
92
|
+
static ID id = rb_intern("to_s");
|
93
|
+
return rb_funcall(obj, id, 0);
|
94
|
+
}
|
95
|
+
|
96
|
+
VALUE RB::type(VALUE obj){
|
97
|
+
static ID id = rb_intern("class");
|
98
|
+
return rb_funcall(obj, id, 0);
|
99
|
+
}
|
100
|
+
|
101
|
+
VALUE RB::get_option(VALUE options, const std::string &key){
|
102
|
+
return rb_hash_aref(options, ID2SYM(rb_intern(key.c_str())));
|
103
|
+
}
|
104
|
+
|
105
|
+
extern "C" void
|
106
|
+
Init_pdfium_ext()
|
107
|
+
{
|
108
|
+
// Initialize the PDFium library
|
109
|
+
FPDF_InitLibrary();
|
110
|
+
|
111
|
+
define_document_class();
|
112
|
+
define_page_class();
|
113
|
+
define_bookmark_class();
|
114
|
+
define_image_class();
|
115
|
+
}
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#ifndef __PDFIUM_H__
|
2
|
+
#define __PDFIUM_H__
|
3
|
+
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <inttypes.h>
|
6
|
+
#include <fpdf_dataavail.h>
|
7
|
+
#include <fpdf_ext.h>
|
8
|
+
#include <fpdfformfill.h>
|
9
|
+
#include <fpdftext.h>
|
10
|
+
#include <fpdfview.h>
|
11
|
+
#include <fpdfedit.h>
|
12
|
+
#include <fpdfsave.h>
|
13
|
+
#include <fpdfdoc.h>
|
14
|
+
#include <iostream>
|
15
|
+
#include <fpdfdoc/fpdf_doc.h>
|
16
|
+
|
17
|
+
#include <fpdfapi/fpdf_render.h>
|
18
|
+
#include <fpdfapi/fpdf_pageobj.h>
|
19
|
+
#include <fpdfsdk/include/fsdk_rendercontext.h>
|
20
|
+
#include <FreeImage.h>
|
21
|
+
|
22
|
+
#include "page_wrapper.h"
|
23
|
+
#include "page_object_wrapper.h"
|
24
|
+
#include "document_wrapper.h"
|
25
|
+
|
26
|
+
// auto generated by mkmf
|
27
|
+
#include "extconf.h"
|
28
|
+
extern "C" {
|
29
|
+
#include "ruby.h"
|
30
|
+
}
|
31
|
+
|
32
|
+
#ifdef DEBUG
|
33
|
+
#define DEBUG_MSG(str) do { std::cout << str << std::endl; } while( false )
|
34
|
+
#else
|
35
|
+
#define DEBUG_MSG(str) do { } while ( false )
|
36
|
+
#endif
|
37
|
+
|
38
|
+
VALUE define_bookmark_class();
|
39
|
+
VALUE define_document_class();
|
40
|
+
VALUE define_page_class();
|
41
|
+
VALUE define_image_class();
|
42
|
+
|
43
|
+
// a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
|
44
|
+
|
45
|
+
CPDF_Page* RB2PG(VALUE RB_Page);
|
46
|
+
CPDF_Document* RB2DOC(VALUE RB_DocumentWrapper);
|
47
|
+
CPDF_ImageObject* RB2IMG(VALUE RB_Image);
|
48
|
+
|
49
|
+
const char* PDFiumLastErrorString();
|
50
|
+
|
51
|
+
class RB {
|
52
|
+
public:
|
53
|
+
static VALUE PDFium();
|
54
|
+
static VALUE Bookmark();
|
55
|
+
static VALUE Document();
|
56
|
+
static VALUE Page();
|
57
|
+
static VALUE BookmarkList();
|
58
|
+
static VALUE BoundingBox();
|
59
|
+
static VALUE StringIO();
|
60
|
+
static VALUE Image();
|
61
|
+
static VALUE PageList();
|
62
|
+
static VALUE ImageList();
|
63
|
+
static VALUE type(VALUE object);
|
64
|
+
static VALUE to_string(const CFX_WideString &widestring);
|
65
|
+
static ID to_s(VALUE object);
|
66
|
+
static VALUE get_option(VALUE hash, const std::string &key);
|
67
|
+
};
|
68
|
+
|
69
|
+
#endif // __PDFIUM_H__
|
data/lib/pdfium.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require_relative "pdfium/version"
|
3
|
+
require_relative "pdfium/bookmark_list"
|
4
|
+
require_relative "pdfium/page_list"
|
5
|
+
require_relative "pdfium/page_sizes"
|
6
|
+
require_relative "pdfium/bounding_box"
|
7
|
+
require_relative "pdfium/image_list"
|
8
|
+
|
9
|
+
|
10
|
+
# PDFium is an Ruby interface to the Google PDFium PDF renderer.
|
11
|
+
module PDFium
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
require_relative "pdfium_ext"
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module PDFium
|
2
|
+
|
3
|
+
# A list of bookmarks for a Document or a Bookmark
|
4
|
+
class BookmarkList
|
5
|
+
|
6
|
+
include Enumerable
|
7
|
+
# Not used directly, but called from Document#bookmarks
|
8
|
+
def initialize(initial)
|
9
|
+
@first=initial
|
10
|
+
end
|
11
|
+
|
12
|
+
# Calls block once for each bookmark that exists at the current level
|
13
|
+
# Since bookmarks form a tree, each bookmark may have one or more children
|
14
|
+
def each
|
15
|
+
bookmark = @first
|
16
|
+
while bookmark
|
17
|
+
yield bookmark
|
18
|
+
bookmark = bookmark.next_sibling
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# True if no bookmarks exist, false if at least one is present
|
23
|
+
def empty?
|
24
|
+
@first.nil?
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module PDFium
|
2
|
+
|
3
|
+
# The size of an object. Used with both Page and Image
|
4
|
+
class BoundingBox
|
5
|
+
|
6
|
+
# dimensions for the BoundingBox. Fixnum given in terms of points
|
7
|
+
attr_reader :left, :right, :top, :bottom
|
8
|
+
|
9
|
+
# Left, Right, Top, Bottom
|
10
|
+
def initialize(l,r,t,b)
|
11
|
+
@left=l; @right=r; @top=t; @bottom=b
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module PDFium
|
2
|
+
|
3
|
+
# A list of Image instances for a given Page. Is returned by Page#images
|
4
|
+
# Images are lazily loaded upon request.
|
5
|
+
class ImageList
|
6
|
+
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
# Load list for a given page. Not normally called directly, but from Page#images
|
10
|
+
def initialize(page)
|
11
|
+
@page=page
|
12
|
+
end
|
13
|
+
|
14
|
+
# Calls block once for each object on the document
|
15
|
+
def each(&block)
|
16
|
+
@page.each_image(&block)
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module PDFium
|
2
|
+
|
3
|
+
# A list of Page objects associated with a Document.
|
4
|
+
class PageList
|
5
|
+
|
6
|
+
include Enumerable
|
7
|
+
|
8
|
+
# Create a new listing from the given document.
|
9
|
+
# Not normally called directly, is called internally by Document#pages
|
10
|
+
def initialize(document)
|
11
|
+
@document=document
|
12
|
+
end
|
13
|
+
|
14
|
+
# Calls block once for each page on the document, yielding the current page
|
15
|
+
# After the page is yielded, Page#unload will be automatically called.
|
16
|
+
#
|
17
|
+
# _note_ Subsequent calls to this function will return different Page instances.
|
18
|
+
def each(&block)
|
19
|
+
@document.each_page(&block)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns the number of pages on the document
|
23
|
+
def count
|
24
|
+
@document.page_count
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns a Page instance for the given number.
|
28
|
+
# If the given page_number is not valid, an ArgumentError will be raised.
|
29
|
+
#
|
30
|
+
# _note_ Subsequent calls to this function will return different Page instances.
|
31
|
+
def [](index)
|
32
|
+
@document.page_at(index)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|