pdfium 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/Gemfile +9 -0
- data/Guardfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +68 -0
- data/Rakefile +62 -0
- data/ext/pdfium_ext/bookmark.cc +221 -0
- data/ext/pdfium_ext/buffer_file_write.hpp +27 -0
- data/ext/pdfium_ext/document.cc +268 -0
- data/ext/pdfium_ext/document.h +66 -0
- data/ext/pdfium_ext/document_wrapper.cc +63 -0
- data/ext/pdfium_ext/document_wrapper.h +56 -0
- data/ext/pdfium_ext/extconf.h +3 -0
- data/ext/pdfium_ext/extconf.rb +76 -0
- data/ext/pdfium_ext/image.cc +332 -0
- data/ext/pdfium_ext/page.cc +392 -0
- data/ext/pdfium_ext/page.h +5 -0
- data/ext/pdfium_ext/page_object_wrapper.cc +38 -0
- data/ext/pdfium_ext/page_object_wrapper.h +27 -0
- data/ext/pdfium_ext/page_wrapper.cc +86 -0
- data/ext/pdfium_ext/page_wrapper.h +37 -0
- data/ext/pdfium_ext/pdfium.cc +115 -0
- data/ext/pdfium_ext/pdfium.h +69 -0
- data/lib/pdfium.rb +15 -0
- data/lib/pdfium/bookmark_list.rb +28 -0
- data/lib/pdfium/bounding_box.rb +16 -0
- data/lib/pdfium/image_list.rb +21 -0
- data/lib/pdfium/page_list.rb +36 -0
- data/lib/pdfium/page_sizes.rb +7 -0
- data/lib/pdfium/version.rb +4 -0
- data/pdfium.gemspec +29 -0
- data/test/benchmark-docsplit.rb +41 -0
- data/test/bookmarks_list_spec.rb +26 -0
- data/test/bookmarks_spec.rb +34 -0
- data/test/debug.rb +24 -0
- data/test/document_spec.rb +49 -0
- data/test/image_list_spec.rb +18 -0
- data/test/image_spec.rb +53 -0
- data/test/page_list_spec.rb +24 -0
- data/test/page_spec.rb +91 -0
- data/test/pdfium_spec.rb +15 -0
- data/test/profile.rb +29 -0
- data/test/spec_helper.rb +31 -0
- metadata +202 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4f0a20b6dc310130f33f3fe36394b4cadc0bbf10
|
4
|
+
data.tar.gz: 5af66006fa995e366e59c4798e8585e20ca04914
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bedd33815a4a6c4761a47b4c2f3679093c641554789cfa236365a2c6681352bca6c0e43b0a2a7b12ffdd1c29839c78d71cd3280bbc5e17afec52749e6f208677
|
7
|
+
data.tar.gz: 966a05f3938c7433a4ba925a7c8a07fe0093a02639f7272b11e38876a78879a9b06abb48aa213a2e92e203152e635d104faafda464c11216ae804890ef1068db
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.2.0
|
data/Gemfile
ADDED
data/Guardfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Nathan Stitt
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
# Ruby bindings for Google's PDFium project
|
2
|
+
|
3
|
+
This allows Ruby efficiently to extract information from PDF files.
|
4
|
+
|
5
|
+
It currently has only very rudimantary PDF editing capabilities.
|
6
|
+
|
7
|
+
RDoc documentation is also available and the test directory has examples of usage.
|
8
|
+
|
9
|
+
## Open and saveing
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
pdf = PDFium::Document.new("test.pdf")
|
13
|
+
pdf.save
|
14
|
+
```
|
15
|
+
|
16
|
+
## Document information
|
17
|
+
|
18
|
+
Page count:
|
19
|
+
```ruby
|
20
|
+
pdf.page_count
|
21
|
+
```
|
22
|
+
|
23
|
+
PDF Metadata:
|
24
|
+
```ruby
|
25
|
+
pdf.metadata
|
26
|
+
```
|
27
|
+
|
28
|
+
Returns a hash with keys = :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
## Bookmarks
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
def print_bookmarks(list, indent=0)
|
36
|
+
list.bookmarks.each do | bm |
|
37
|
+
print ' ' * indent
|
38
|
+
puts bm.title
|
39
|
+
print_marks( bm.children )
|
40
|
+
end
|
41
|
+
end
|
42
|
+
print_bookmarks( pdf.bookmarks )
|
43
|
+
```
|
44
|
+
|
45
|
+
## Render page as an image
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
pdf.each_page | page |
|
49
|
+
page.as_image(width: 800).save("test-{page.number}.png")
|
50
|
+
end
|
51
|
+
```
|
52
|
+
|
53
|
+
## Extract embedded images from page
|
54
|
+
```ruby
|
55
|
+
doc = PDFium::Document.new("test.pdf")
|
56
|
+
page = doc.page_at(0)
|
57
|
+
page.images do |image|
|
58
|
+
img.save("page-0-image-#{image.index}.png")
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
## Text access
|
63
|
+
|
64
|
+
Text is returned as a UTF-16LE encoded string. Future version may return position information as well
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
pdf.page_at(0).text.encode!("ASCII-8BIT")
|
68
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rdoc/task'
|
4
|
+
|
5
|
+
Rake::TestTask.new do |t|
|
6
|
+
t.libs << 'test'
|
7
|
+
t.pattern = "test/*_spec.rb"
|
8
|
+
end
|
9
|
+
|
10
|
+
RDOC_FILES = FileList["README.md",
|
11
|
+
"lib/pdfium.rb",
|
12
|
+
"lib/pdfium/*.rb",
|
13
|
+
"ext/pdfium_ext/*.cc"
|
14
|
+
]
|
15
|
+
Rake::RDocTask.new do |rd|
|
16
|
+
rd.main = "README.md"
|
17
|
+
rd.options << "--verbose"
|
18
|
+
rd.rdoc_files.include(RDOC_FILES)
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
require "bundler/gem_tasks"
|
23
|
+
|
24
|
+
require "rake/extensiontask"
|
25
|
+
Rake::ExtensionTask.new("pdfium_ext") do | ext |
|
26
|
+
ext.source_pattern = "*.cc"
|
27
|
+
end
|
28
|
+
|
29
|
+
task :buildtest => :compile do
|
30
|
+
Rake::Task["test"].invoke
|
31
|
+
end
|
32
|
+
|
33
|
+
task :console do
|
34
|
+
require 'irb'
|
35
|
+
require 'irb/completion'
|
36
|
+
require 'pdfium'
|
37
|
+
ARGV.clear
|
38
|
+
IRB.start
|
39
|
+
end
|
40
|
+
|
41
|
+
# valgrind and Ruby
|
42
|
+
# http://blog.flavorjon.es/2009/06/easily-valgrind-gdb-your-ruby-c.html
|
43
|
+
# http://blog.evanweaver.com/2008/02/05/valgrind-and-ruby/
|
44
|
+
namespace :test do
|
45
|
+
# partial-loads-ok and undef-value-errors necessary to ignore
|
46
|
+
# spurious (and eminently ignorable) warnings from the ruby
|
47
|
+
# interpreter
|
48
|
+
VALGRIND_BASIC_OPTS = <<-EOS
|
49
|
+
--tool=memcheck
|
50
|
+
--dsymutil=yes \
|
51
|
+
--num-callers=50 --error-limit=no --leak-check=full \
|
52
|
+
--partial-loads-ok=yes --undef-value-errors=no
|
53
|
+
EOS
|
54
|
+
|
55
|
+
SUPRESS = ""# "--suppressions=./valgrind.supp"
|
56
|
+
desc "run test suite under valgrind with basic ruby options"
|
57
|
+
task :valgrind => :compile do
|
58
|
+
cmdline = "valgrind #{SUPRESS} #{VALGRIND_BASIC_OPTS} ruby rake test"
|
59
|
+
puts cmdline
|
60
|
+
system cmdline
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,221 @@
|
|
1
|
+
#include "pdfium.h"
|
2
|
+
|
3
|
+
/////////////////////////////////////////////////////////////////////////
|
4
|
+
// The Bookmark class
|
5
|
+
/////////////////////////////////////////////////////////////////////////
|
6
|
+
/*
|
7
|
+
* Document-class: PDFium::Bookmark
|
8
|
+
*
|
9
|
+
* Bookmarks on a Document form a tree structure.
|
10
|
+
* Each can have siblings and children
|
11
|
+
*
|
12
|
+
*/
|
13
|
+
|
14
|
+
|
15
|
+
class Bookmark {
|
16
|
+
public:
|
17
|
+
Bookmark():
|
18
|
+
doc_wrapper(0),
|
19
|
+
bookmark(0){}
|
20
|
+
~Bookmark(){
|
21
|
+
if (doc_wrapper)
|
22
|
+
doc_wrapper->release(this);
|
23
|
+
if (bookmark)
|
24
|
+
delete bookmark;
|
25
|
+
}
|
26
|
+
DocumentWrapper *doc_wrapper;
|
27
|
+
CPDF_Bookmark *bookmark;
|
28
|
+
};
|
29
|
+
|
30
|
+
// a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
|
31
|
+
CPDF_Bookmark *
|
32
|
+
RB2BM(VALUE self) {
|
33
|
+
Bookmark *bm;
|
34
|
+
Data_Get_Struct(self, Bookmark, bm);
|
35
|
+
return bm->bookmark;
|
36
|
+
}
|
37
|
+
|
38
|
+
static void
|
39
|
+
bookmark_gc_free(Bookmark *bm) {
|
40
|
+
delete bm;
|
41
|
+
}
|
42
|
+
|
43
|
+
static VALUE
|
44
|
+
bookmark_allocate(VALUE klass) {
|
45
|
+
auto bm = new Bookmark;
|
46
|
+
return Data_Wrap_Struct(klass, NULL, bookmark_gc_free, bm );
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
/*
|
51
|
+
* call-seq:
|
52
|
+
* Bookmark.new
|
53
|
+
*
|
54
|
+
* Initializes a bookmark. Not intended for direct use, but called
|
55
|
+
* internally from Document#bookmarks
|
56
|
+
*/
|
57
|
+
VALUE
|
58
|
+
bookmark_initialize(VALUE self, VALUE options){
|
59
|
+
Bookmark *bm;
|
60
|
+
Data_Get_Struct(self, Bookmark, bm);
|
61
|
+
DocumentWrapper *doc;
|
62
|
+
CPDF_Bookmark bookmark;
|
63
|
+
|
64
|
+
if (TYPE(options) != T_HASH){
|
65
|
+
rb_raise(rb_eArgError, "no options given");
|
66
|
+
return Qnil;
|
67
|
+
}
|
68
|
+
|
69
|
+
VALUE reference;
|
70
|
+
if ( !NIL_P(reference = RB::get_option(options,"document")) ){
|
71
|
+
// we're the first bookmark on a document
|
72
|
+
Data_Get_Struct(reference, DocumentWrapper, doc);
|
73
|
+
bookmark = CPDF_Bookmark(NULL);
|
74
|
+
CPDF_BookmarkTree tree(doc->document);
|
75
|
+
bm->bookmark = new CPDF_Bookmark( tree.GetFirstChild(bookmark).GetDict() );
|
76
|
+
} else if ( !NIL_P(reference = RB::get_option(options,"parent")) ){
|
77
|
+
// we're the first sibling on a parent bookmark
|
78
|
+
Bookmark *reference_bm;
|
79
|
+
Data_Get_Struct(reference, Bookmark, reference_bm);
|
80
|
+
doc = reference_bm->doc_wrapper;
|
81
|
+
bm->bookmark = new CPDF_Bookmark(reference_bm->bookmark->GetDict());
|
82
|
+
} else if ( !NIL_P(reference = RB::get_option(options,"sibling")) ){
|
83
|
+
// we're the next bookmark after a sibling bookmark
|
84
|
+
Bookmark *reference_bm;
|
85
|
+
Data_Get_Struct(reference, Bookmark, reference_bm);
|
86
|
+
doc = reference_bm->doc_wrapper;
|
87
|
+
CPDF_BookmarkTree tree(doc->document);
|
88
|
+
bm->bookmark = new CPDF_Bookmark( tree.GetNextSibling(*reference_bm->bookmark) );
|
89
|
+
} else {
|
90
|
+
rb_raise(rb_eArgError, "options must contain either :document, :parent or :sibling");
|
91
|
+
return Qnil;
|
92
|
+
}
|
93
|
+
|
94
|
+
bm->doc_wrapper = doc;
|
95
|
+
doc->retain(bm);
|
96
|
+
|
97
|
+
return Qnil;
|
98
|
+
}
|
99
|
+
|
100
|
+
/*
|
101
|
+
* call-seq:
|
102
|
+
* children -> BookmarkList
|
103
|
+
*
|
104
|
+
* All Bookmarks that are children. If the Bookmark has no children, an empty list is returned
|
105
|
+
*/
|
106
|
+
static VALUE
|
107
|
+
bookmark_children(VALUE self)
|
108
|
+
{
|
109
|
+
Bookmark *bm;
|
110
|
+
Data_Get_Struct(self, Bookmark, bm);
|
111
|
+
|
112
|
+
CPDF_BookmarkTree tree(bm->doc_wrapper->document);
|
113
|
+
CPDF_Bookmark child( tree.GetFirstChild(*bm->bookmark) );
|
114
|
+
|
115
|
+
VALUE args[1];
|
116
|
+
|
117
|
+
if (child.GetDict()){
|
118
|
+
args[0] = rb_hash_new();
|
119
|
+
rb_hash_aset(args[0], ID2SYM(rb_intern("parent")), self);
|
120
|
+
args[0] = rb_class_new_instance( 1, args, RB::Bookmark() );
|
121
|
+
} else {
|
122
|
+
args[0] = Qnil; //rb_class_new_instance( 1, args, T_NIL );
|
123
|
+
}
|
124
|
+
return rb_class_new_instance( 1, args, RB::BookmarkList() );
|
125
|
+
|
126
|
+
}
|
127
|
+
|
128
|
+
/*
|
129
|
+
* call-seq:
|
130
|
+
* next_sibling -> Bookmark
|
131
|
+
*
|
132
|
+
* Returns the Bookmark that comes after this one
|
133
|
+
*/
|
134
|
+
static VALUE
|
135
|
+
bookmark_next_sibling(VALUE self)
|
136
|
+
{
|
137
|
+
Bookmark *bm;
|
138
|
+
Data_Get_Struct(self, Bookmark, bm);
|
139
|
+
CPDF_BookmarkTree tree(bm->doc_wrapper->document);
|
140
|
+
CPDF_Bookmark next = tree.GetNextSibling(*bm->bookmark);
|
141
|
+
|
142
|
+
if (next.GetDict()){
|
143
|
+
VALUE args[1];
|
144
|
+
args[0] = rb_hash_new();
|
145
|
+
rb_hash_aset(args[0], ID2SYM(rb_intern("sibling")), self);
|
146
|
+
return rb_class_new_instance( 1, args, RB::Bookmark() );
|
147
|
+
} else {
|
148
|
+
return Qnil;
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
|
153
|
+
/*
|
154
|
+
* call-seq:
|
155
|
+
* title -> String encoded as UTF-16LE
|
156
|
+
*
|
157
|
+
* Returns the title of the bookmark in UTF-16LE format.
|
158
|
+
* This means that the string cannot be directly compared to a ASCII string, and must be converted.
|
159
|
+
*
|
160
|
+
* bookmark.title.encode!("ASCII-8BIT")
|
161
|
+
*
|
162
|
+
*/
|
163
|
+
static VALUE
|
164
|
+
bookmark_title(VALUE self)
|
165
|
+
{
|
166
|
+
return RB::to_string( RB2BM(self)->GetTitle() );
|
167
|
+
}
|
168
|
+
|
169
|
+
/*
|
170
|
+
* call-seq:
|
171
|
+
* destination -> Hash
|
172
|
+
*
|
173
|
+
* Returns the destination data of the bookmark.
|
174
|
+
* Only the destination type is tested.
|
175
|
+
* Bug reports and confirmation on the action type is appreciated.
|
176
|
+
*/
|
177
|
+
static VALUE
|
178
|
+
bookmark_destination(VALUE self)
|
179
|
+
{
|
180
|
+
Bookmark *bm;
|
181
|
+
Data_Get_Struct(self, Bookmark, bm);
|
182
|
+
auto doc = bm->doc_wrapper->document;
|
183
|
+
VALUE hash=rb_hash_new();
|
184
|
+
CPDF_Dest dest = bm->bookmark->GetDest( doc );
|
185
|
+
if (dest){
|
186
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("destination")));
|
187
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("page_number") ), INT2NUM(dest.GetPageIndex(doc)));
|
188
|
+
} else {
|
189
|
+
CPDF_Action action = bm->bookmark->GetAction();
|
190
|
+
if (action){
|
191
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("action")));
|
192
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("action") ),
|
193
|
+
rb_str_new2( action.GetTypeName().c_str() ) );
|
194
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("uri") ),
|
195
|
+
rb_str_new2( action.GetURI(doc).c_str() ) );
|
196
|
+
} else {
|
197
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("unknown")));
|
198
|
+
}
|
199
|
+
}
|
200
|
+
return hash;
|
201
|
+
}
|
202
|
+
|
203
|
+
VALUE
|
204
|
+
define_bookmark_class(){
|
205
|
+
|
206
|
+
#if RDOC_IS_STUPID_AND_CANNOT_PARSE_DOCUMENTATION
|
207
|
+
VALUE RB_PDFium = rb_define_module("PDFium");
|
208
|
+
#endif
|
209
|
+
VALUE RB_PDFium = RB::PDFium();
|
210
|
+
|
211
|
+
VALUE RB_Bookmark = rb_define_class_under(RB_PDFium, "Bookmark", rb_cObject);
|
212
|
+
rb_define_alloc_func(RB_Bookmark, bookmark_allocate);
|
213
|
+
|
214
|
+
rb_define_private_method (RB_Bookmark, "initialize", RUBY_METHOD_FUNC(bookmark_initialize), 1);
|
215
|
+
rb_define_method (RB_Bookmark, "title", RUBY_METHOD_FUNC(bookmark_title), 0);
|
216
|
+
rb_define_method (RB_Bookmark, "next_sibling", RUBY_METHOD_FUNC(bookmark_next_sibling),0);
|
217
|
+
rb_define_method (RB_Bookmark, "children", RUBY_METHOD_FUNC(bookmark_children), 0);
|
218
|
+
rb_define_method (RB_Bookmark, "destination", RUBY_METHOD_FUNC(bookmark_destination), 0);
|
219
|
+
|
220
|
+
return RB_Bookmark;
|
221
|
+
}
|
@@ -0,0 +1,27 @@
|
|
1
|
+
// Implementation of FPDF_FILEWRITE into a file.
|
2
|
+
class BufferFileWrite : public FPDF_FILEWRITE {
|
3
|
+
public:
|
4
|
+
BufferFileWrite( const std::string &file ) :
|
5
|
+
_file( file, std::ios::out | std::ios::binary )
|
6
|
+
{
|
7
|
+
version = 1;
|
8
|
+
WriteBlock = &WriteBlockImpl;
|
9
|
+
}
|
10
|
+
~BufferFileWrite() {
|
11
|
+
_file.close();
|
12
|
+
}
|
13
|
+
|
14
|
+
private:
|
15
|
+
int DoWriteBlock(const void* data, unsigned long size){
|
16
|
+
_file.write(static_cast<const char*>(data), size);
|
17
|
+
return 1;
|
18
|
+
}
|
19
|
+
static int WriteBlockImpl(FPDF_FILEWRITE* this_file_write, const void* data,
|
20
|
+
unsigned long size){
|
21
|
+
BufferFileWrite* mem_buffer_file_write =
|
22
|
+
static_cast<BufferFileWrite*>(this_file_write);
|
23
|
+
return mem_buffer_file_write->DoWriteBlock(data, size);
|
24
|
+
}
|
25
|
+
|
26
|
+
std::ofstream _file;
|
27
|
+
};
|
@@ -0,0 +1,268 @@
|
|
1
|
+
#include "document.h"
|
2
|
+
#include "pdfium.h"
|
3
|
+
#include <cstring>
|
4
|
+
#include <iostream>
|
5
|
+
#include <stdlib.h>
|
6
|
+
#include <assert.h>
|
7
|
+
#include <stdio.h>
|
8
|
+
#include <fstream>
|
9
|
+
#include <map>
|
10
|
+
#include "buffer_file_write.hpp"
|
11
|
+
|
12
|
+
/////////////////////////////////////////////////////////////////////////
|
13
|
+
// The Document class //
|
14
|
+
/////////////////////////////////////////////////////////////////////////
|
15
|
+
/*
|
16
|
+
* Document-class: PDFium::Document
|
17
|
+
*
|
18
|
+
* A Document represents a PDF file.
|
19
|
+
*
|
20
|
+
*/
|
21
|
+
|
22
|
+
|
23
|
+
// While you might think this would free the Document object it does not
|
24
|
+
// Instead it simply marks the Document as no longer in use, and then it
|
25
|
+
// will release itself when there are no Pages in use.
|
26
|
+
// https://redmine.ruby-lang.org/issues/6292
|
27
|
+
static void
|
28
|
+
document_gc_free(DocumentWrapper* doc)
|
29
|
+
{
|
30
|
+
DEBUG_MSG("GC Free Doc: " << doc);
|
31
|
+
// Note: we do not actually destroy the object yet.
|
32
|
+
// instead we mark it as unused and it will remove itself
|
33
|
+
// once all pages are finished
|
34
|
+
doc->markUnused();
|
35
|
+
}
|
36
|
+
|
37
|
+
static VALUE
|
38
|
+
document_allocate(VALUE klass)
|
39
|
+
{
|
40
|
+
DocumentWrapper *doc = new DocumentWrapper();
|
41
|
+
DEBUG_MSG("Alloc PDF: " << doc);
|
42
|
+
return Data_Wrap_Struct(klass, NULL, document_gc_free, doc );
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
/*
|
47
|
+
* call-seq:
|
48
|
+
* Document.new( path_to_pdf_file ) -> Document
|
49
|
+
* Document.new() -> An empty PDF Document with no pages
|
50
|
+
*
|
51
|
+
* Initializes a document either from a PDF file or creates a blank document
|
52
|
+
*/
|
53
|
+
VALUE
|
54
|
+
document_initialize(int argc, VALUE *argv, VALUE self)
|
55
|
+
{
|
56
|
+
DocumentWrapper* d;
|
57
|
+
Data_Get_Struct(self, DocumentWrapper, d);
|
58
|
+
if (argc){
|
59
|
+
VALUE path = RB::to_s(argv[0]); // call to_s in case it's a Pathname
|
60
|
+
d->document = (CPDF_Document*)FPDF_LoadDocument(StringValuePtr(path), NULL);
|
61
|
+
} else {
|
62
|
+
d->document = (CPDF_Document*)FPDF_CreateNewDocument();
|
63
|
+
}
|
64
|
+
if (! d->document ){
|
65
|
+
rb_raise(rb_eArgError, "Unable to create document: %s", PDFiumLastErrorString());
|
66
|
+
}
|
67
|
+
return Qnil;
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
/*
|
72
|
+
* call-seq:
|
73
|
+
* Document.from_memory( pdf_data ) -> Document
|
74
|
+
*
|
75
|
+
* Initializes a document from a binary string.
|
76
|
+
*
|
77
|
+
* See Image#data for an example of reading a PDF directly from Amazon S3
|
78
|
+
* and writing it's images completely in memory.
|
79
|
+
*/
|
80
|
+
static VALUE
|
81
|
+
document_from_memory(VALUE klass, VALUE data){
|
82
|
+
DocumentWrapper *doc = new DocumentWrapper();
|
83
|
+
VALUE instance = Data_Wrap_Struct(klass, NULL, document_gc_free, doc );
|
84
|
+
doc->document = (CPDF_Document*)FPDF_LoadMemDocument(RSTRING_PTR(data), RSTRING_LEN(data),NULL);
|
85
|
+
return instance;
|
86
|
+
}
|
87
|
+
|
88
|
+
/*
|
89
|
+
* call-seq:
|
90
|
+
* page_count -> Fixnum
|
91
|
+
*
|
92
|
+
* Returns the number of pages on a Document
|
93
|
+
*/
|
94
|
+
static VALUE
|
95
|
+
document_page_count(VALUE self)
|
96
|
+
{
|
97
|
+
return INT2NUM( RB2DOC(self)->GetPageCount() );
|
98
|
+
}
|
99
|
+
|
100
|
+
// Not documented in favor of the Document#pages[] access
|
101
|
+
/* :nodoc: */
|
102
|
+
static VALUE
|
103
|
+
document_page_at(VALUE self, VALUE rb_page_index)
|
104
|
+
{
|
105
|
+
return rb_funcall(RB::Page(), rb_intern("open"), 2, self, rb_page_index);
|
106
|
+
}
|
107
|
+
|
108
|
+
/*
|
109
|
+
* call-seq:
|
110
|
+
* pages -> PDFium::PageList
|
111
|
+
*
|
112
|
+
* Returns a collection of all the pages on the document as a PDFium::PageList. Pages
|
113
|
+
* are lazily loaded.
|
114
|
+
*
|
115
|
+
*/
|
116
|
+
static VALUE
|
117
|
+
document_pages(VALUE self)
|
118
|
+
{
|
119
|
+
VALUE args[1];
|
120
|
+
args[0] = self;
|
121
|
+
return rb_class_new_instance( 1, args, RB::PageList() );
|
122
|
+
}
|
123
|
+
|
124
|
+
// creates and yields a page. Not documented since all access
|
125
|
+
// should got through the Pageist interface via the Document#pages method
|
126
|
+
/* :nodoc: */
|
127
|
+
static VALUE
|
128
|
+
document_each_page(VALUE self)
|
129
|
+
{
|
130
|
+
auto doc = RB2DOC(self);
|
131
|
+
auto count = doc->GetPageCount();
|
132
|
+
for (int pg=0; pg < count; pg++){
|
133
|
+
VALUE page = document_page_at(self, INT2FIX(pg));
|
134
|
+
rb_yield(page);
|
135
|
+
PageWrapper *pw;
|
136
|
+
Data_Get_Struct(page, PageWrapper, pw);
|
137
|
+
pw->unload();
|
138
|
+
}
|
139
|
+
return self;
|
140
|
+
}
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
/*
|
145
|
+
* call-seq:
|
146
|
+
* bookmarks -> Bookmarks
|
147
|
+
*
|
148
|
+
* Retrieves the first Bookmark for a document
|
149
|
+
*/
|
150
|
+
static VALUE
|
151
|
+
document_bookmarks(VALUE self)
|
152
|
+
{
|
153
|
+
VALUE args[1];
|
154
|
+
args[0] = rb_hash_new();
|
155
|
+
rb_hash_aset(args[0], ID2SYM(rb_intern("document")), self);
|
156
|
+
VALUE bm = rb_class_new_instance( 1, args, RB::Bookmark() );
|
157
|
+
args[0] = bm;
|
158
|
+
return rb_class_new_instance( 1, args, RB::BookmarkList() );
|
159
|
+
}
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
/*
|
165
|
+
* call-seq:
|
166
|
+
* save -> Document
|
167
|
+
*
|
168
|
+
* Saves document to a PDF file. This method isn't terribly useful since there aren't
|
169
|
+
* (yet) methods to add content to pages.
|
170
|
+
*/
|
171
|
+
static VALUE
|
172
|
+
document_save(VALUE self, VALUE _path)
|
173
|
+
{
|
174
|
+
auto doc = RB2DOC(self);
|
175
|
+
VALUE path = RB::to_s(_path); // call to_s in case it's a Pathname
|
176
|
+
BufferFileWrite output_file_write(StringValuePtr(path));
|
177
|
+
FPDF_SaveAsCopy(doc, &output_file_write, FPDF_REMOVE_SECURITY);
|
178
|
+
return self;
|
179
|
+
}
|
180
|
+
|
181
|
+
|
182
|
+
/*
|
183
|
+
call-seq:
|
184
|
+
metadata -> Hash
|
185
|
+
|
186
|
+
Retrieves and optionally sets the metadata on a document. Returns a hash with the following keys:
|
187
|
+
|
188
|
+
:title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
|
189
|
+
|
190
|
+
An empty Hash will be returned if the metadata cannot be read
|
191
|
+
|
192
|
+
All values in the hash are encoded as UTF-16LE strings.
|
193
|
+
|
194
|
+
If caled with a block, the values will be passed to it and updates written back to the Document
|
195
|
+
|
196
|
+
=== Example
|
197
|
+
pdf = PDFium::Document.new( "test.pdf" )
|
198
|
+
pdf.metadata do | md |
|
199
|
+
md[:title] = "My Awesome PDF"
|
200
|
+
md[:author] = "Nathan Stitt"
|
201
|
+
end
|
202
|
+
pdf.metadata[:author] # => "Nathan Stitt"
|
203
|
+
|
204
|
+
*/
|
205
|
+
VALUE
|
206
|
+
document_metadata(int argc, VALUE *argv, VALUE self)
|
207
|
+
{
|
208
|
+
auto doc = RB2DOC(self);
|
209
|
+
VALUE metadata = rb_hash_new();
|
210
|
+
CPDF_Dictionary* info = doc->GetInfo();
|
211
|
+
if (!info)
|
212
|
+
return metadata;
|
213
|
+
|
214
|
+
VALUE block;
|
215
|
+
rb_scan_args(argc, argv, "0&", &block);
|
216
|
+
|
217
|
+
std::map<std::string, std::string> keys = {
|
218
|
+
{ "Title", "title" },
|
219
|
+
{ "Author", "author" },
|
220
|
+
{ "Subject", "subject" },
|
221
|
+
{ "Keywords", "keywords"},
|
222
|
+
{ "Creator", "creator" },
|
223
|
+
{ "Producer", "producer"},
|
224
|
+
{ "CreationDate", "creation_date" },
|
225
|
+
{ "ModDate", "mod_date" }
|
226
|
+
};
|
227
|
+
|
228
|
+
for (auto& kv : keys) {
|
229
|
+
rb_hash_aset(metadata,
|
230
|
+
ID2SYM( rb_intern( kv.second.c_str() ) ),
|
231
|
+
RB::to_string( info->GetUnicodeText( kv.first.c_str() ) )
|
232
|
+
);
|
233
|
+
}
|
234
|
+
|
235
|
+
if (RTEST(block)){
|
236
|
+
rb_yield( metadata );
|
237
|
+
for (auto& kv : keys) {
|
238
|
+
VALUE value = RB::get_option(metadata, kv.second);
|
239
|
+
auto bs = CFX_ByteString( RSTRING_PTR(value), RSTRING_LEN(value) );
|
240
|
+
info->SetAtString(kv.first.c_str(), bs);
|
241
|
+
}
|
242
|
+
}
|
243
|
+
|
244
|
+
return metadata;
|
245
|
+
}
|
246
|
+
|
247
|
+
VALUE
|
248
|
+
define_document_class()
|
249
|
+
{
|
250
|
+
VALUE RB_PDFium = RB::PDFium();
|
251
|
+
|
252
|
+
// The Document class definition and methods
|
253
|
+
VALUE RB_Document = rb_define_class_under(RB_PDFium, "Document", rb_cObject);
|
254
|
+
|
255
|
+
rb_define_alloc_func(RB_Document, document_allocate);
|
256
|
+
|
257
|
+
rb_define_singleton_method(RB_Document, "from_memory", RUBY_METHOD_FUNC(document_from_memory), 1);
|
258
|
+
|
259
|
+
rb_define_private_method (RB_Document, "initialize", RUBY_METHOD_FUNC(document_initialize), -1);
|
260
|
+
rb_define_method (RB_Document, "page_count", RUBY_METHOD_FUNC(document_page_count), 0);
|
261
|
+
rb_define_method (RB_Document, "page_at", RUBY_METHOD_FUNC(document_page_at), 1);
|
262
|
+
rb_define_method (RB_Document, "each_page", RUBY_METHOD_FUNC(document_each_page), 0);
|
263
|
+
rb_define_method (RB_Document, "pages", RUBY_METHOD_FUNC(document_pages), 0);
|
264
|
+
rb_define_method (RB_Document, "metadata", RUBY_METHOD_FUNC(document_metadata), -1);
|
265
|
+
rb_define_method (RB_Document, "bookmarks", RUBY_METHOD_FUNC(document_bookmarks), 0);
|
266
|
+
rb_define_method (RB_Document, "save", RUBY_METHOD_FUNC(document_save), 1);
|
267
|
+
return RB_Document;
|
268
|
+
}
|