pdfium 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.ruby-version +1 -0
- data/Gemfile +9 -0
- data/Guardfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +68 -0
- data/Rakefile +62 -0
- data/ext/pdfium_ext/bookmark.cc +221 -0
- data/ext/pdfium_ext/buffer_file_write.hpp +27 -0
- data/ext/pdfium_ext/document.cc +268 -0
- data/ext/pdfium_ext/document.h +66 -0
- data/ext/pdfium_ext/document_wrapper.cc +63 -0
- data/ext/pdfium_ext/document_wrapper.h +56 -0
- data/ext/pdfium_ext/extconf.h +3 -0
- data/ext/pdfium_ext/extconf.rb +76 -0
- data/ext/pdfium_ext/image.cc +332 -0
- data/ext/pdfium_ext/page.cc +392 -0
- data/ext/pdfium_ext/page.h +5 -0
- data/ext/pdfium_ext/page_object_wrapper.cc +38 -0
- data/ext/pdfium_ext/page_object_wrapper.h +27 -0
- data/ext/pdfium_ext/page_wrapper.cc +86 -0
- data/ext/pdfium_ext/page_wrapper.h +37 -0
- data/ext/pdfium_ext/pdfium.cc +115 -0
- data/ext/pdfium_ext/pdfium.h +69 -0
- data/lib/pdfium.rb +15 -0
- data/lib/pdfium/bookmark_list.rb +28 -0
- data/lib/pdfium/bounding_box.rb +16 -0
- data/lib/pdfium/image_list.rb +21 -0
- data/lib/pdfium/page_list.rb +36 -0
- data/lib/pdfium/page_sizes.rb +7 -0
- data/lib/pdfium/version.rb +4 -0
- data/pdfium.gemspec +29 -0
- data/test/benchmark-docsplit.rb +41 -0
- data/test/bookmarks_list_spec.rb +26 -0
- data/test/bookmarks_spec.rb +34 -0
- data/test/debug.rb +24 -0
- data/test/document_spec.rb +49 -0
- data/test/image_list_spec.rb +18 -0
- data/test/image_spec.rb +53 -0
- data/test/page_list_spec.rb +24 -0
- data/test/page_spec.rb +91 -0
- data/test/pdfium_spec.rb +15 -0
- data/test/profile.rb +29 -0
- data/test/spec_helper.rb +31 -0
- metadata +202 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4f0a20b6dc310130f33f3fe36394b4cadc0bbf10
|
4
|
+
data.tar.gz: 5af66006fa995e366e59c4798e8585e20ca04914
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bedd33815a4a6c4761a47b4c2f3679093c641554789cfa236365a2c6681352bca6c0e43b0a2a7b12ffdd1c29839c78d71cd3280bbc5e17afec52749e6f208677
|
7
|
+
data.tar.gz: 966a05f3938c7433a4ba925a7c8a07fe0093a02639f7272b11e38876a78879a9b06abb48aa213a2e92e203152e635d104faafda464c11216ae804890ef1068db
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.2.0
|
data/Gemfile
ADDED
data/Guardfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Nathan Stitt
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
# Ruby bindings for Google's PDFium project
|
2
|
+
|
3
|
+
This allows Ruby efficiently to extract information from PDF files.
|
4
|
+
|
5
|
+
It currently has only very rudimantary PDF editing capabilities.
|
6
|
+
|
7
|
+
RDoc documentation is also available and the test directory has examples of usage.
|
8
|
+
|
9
|
+
## Open and saveing
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
pdf = PDFium::Document.new("test.pdf")
|
13
|
+
pdf.save
|
14
|
+
```
|
15
|
+
|
16
|
+
## Document information
|
17
|
+
|
18
|
+
Page count:
|
19
|
+
```ruby
|
20
|
+
pdf.page_count
|
21
|
+
```
|
22
|
+
|
23
|
+
PDF Metadata:
|
24
|
+
```ruby
|
25
|
+
pdf.metadata
|
26
|
+
```
|
27
|
+
|
28
|
+
Returns a hash with keys = :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
|
29
|
+
|
30
|
+
|
31
|
+
|
32
|
+
## Bookmarks
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
def print_bookmarks(list, indent=0)
|
36
|
+
list.bookmarks.each do | bm |
|
37
|
+
print ' ' * indent
|
38
|
+
puts bm.title
|
39
|
+
print_marks( bm.children )
|
40
|
+
end
|
41
|
+
end
|
42
|
+
print_bookmarks( pdf.bookmarks )
|
43
|
+
```
|
44
|
+
|
45
|
+
## Render page as an image
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
pdf.each_page | page |
|
49
|
+
page.as_image(width: 800).save("test-{page.number}.png")
|
50
|
+
end
|
51
|
+
```
|
52
|
+
|
53
|
+
## Extract embedded images from page
|
54
|
+
```ruby
|
55
|
+
doc = PDFium::Document.new("test.pdf")
|
56
|
+
page = doc.page_at(0)
|
57
|
+
page.images do |image|
|
58
|
+
img.save("page-0-image-#{image.index}.png")
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
## Text access
|
63
|
+
|
64
|
+
Text is returned as a UTF-16LE encoded string. Future version may return position information as well
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
pdf.page_at(0).text.encode!("ASCII-8BIT")
|
68
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rdoc/task'
|
4
|
+
|
5
|
+
Rake::TestTask.new do |t|
|
6
|
+
t.libs << 'test'
|
7
|
+
t.pattern = "test/*_spec.rb"
|
8
|
+
end
|
9
|
+
|
10
|
+
RDOC_FILES = FileList["README.md",
|
11
|
+
"lib/pdfium.rb",
|
12
|
+
"lib/pdfium/*.rb",
|
13
|
+
"ext/pdfium_ext/*.cc"
|
14
|
+
]
|
15
|
+
Rake::RDocTask.new do |rd|
|
16
|
+
rd.main = "README.md"
|
17
|
+
rd.options << "--verbose"
|
18
|
+
rd.rdoc_files.include(RDOC_FILES)
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
require "bundler/gem_tasks"
|
23
|
+
|
24
|
+
require "rake/extensiontask"
|
25
|
+
Rake::ExtensionTask.new("pdfium_ext") do | ext |
|
26
|
+
ext.source_pattern = "*.cc"
|
27
|
+
end
|
28
|
+
|
29
|
+
task :buildtest => :compile do
|
30
|
+
Rake::Task["test"].invoke
|
31
|
+
end
|
32
|
+
|
33
|
+
task :console do
|
34
|
+
require 'irb'
|
35
|
+
require 'irb/completion'
|
36
|
+
require 'pdfium'
|
37
|
+
ARGV.clear
|
38
|
+
IRB.start
|
39
|
+
end
|
40
|
+
|
41
|
+
# valgrind and Ruby
|
42
|
+
# http://blog.flavorjon.es/2009/06/easily-valgrind-gdb-your-ruby-c.html
|
43
|
+
# http://blog.evanweaver.com/2008/02/05/valgrind-and-ruby/
|
44
|
+
namespace :test do
|
45
|
+
# partial-loads-ok and undef-value-errors necessary to ignore
|
46
|
+
# spurious (and eminently ignorable) warnings from the ruby
|
47
|
+
# interpreter
|
48
|
+
VALGRIND_BASIC_OPTS = <<-EOS
|
49
|
+
--tool=memcheck
|
50
|
+
--dsymutil=yes \
|
51
|
+
--num-callers=50 --error-limit=no --leak-check=full \
|
52
|
+
--partial-loads-ok=yes --undef-value-errors=no
|
53
|
+
EOS
|
54
|
+
|
55
|
+
SUPRESS = ""# "--suppressions=./valgrind.supp"
|
56
|
+
desc "run test suite under valgrind with basic ruby options"
|
57
|
+
task :valgrind => :compile do
|
58
|
+
cmdline = "valgrind #{SUPRESS} #{VALGRIND_BASIC_OPTS} ruby rake test"
|
59
|
+
puts cmdline
|
60
|
+
system cmdline
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,221 @@
|
|
1
|
+
#include "pdfium.h"
|
2
|
+
|
3
|
+
/////////////////////////////////////////////////////////////////////////
|
4
|
+
// The Bookmark class
|
5
|
+
/////////////////////////////////////////////////////////////////////////
|
6
|
+
/*
|
7
|
+
* Document-class: PDFium::Bookmark
|
8
|
+
*
|
9
|
+
* Bookmarks on a Document form a tree structure.
|
10
|
+
* Each can have siblings and children
|
11
|
+
*
|
12
|
+
*/
|
13
|
+
|
14
|
+
|
15
|
+
class Bookmark {
|
16
|
+
public:
|
17
|
+
Bookmark():
|
18
|
+
doc_wrapper(0),
|
19
|
+
bookmark(0){}
|
20
|
+
~Bookmark(){
|
21
|
+
if (doc_wrapper)
|
22
|
+
doc_wrapper->release(this);
|
23
|
+
if (bookmark)
|
24
|
+
delete bookmark;
|
25
|
+
}
|
26
|
+
DocumentWrapper *doc_wrapper;
|
27
|
+
CPDF_Bookmark *bookmark;
|
28
|
+
};
|
29
|
+
|
30
|
+
// a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
|
31
|
+
CPDF_Bookmark *
|
32
|
+
RB2BM(VALUE self) {
|
33
|
+
Bookmark *bm;
|
34
|
+
Data_Get_Struct(self, Bookmark, bm);
|
35
|
+
return bm->bookmark;
|
36
|
+
}
|
37
|
+
|
38
|
+
static void
|
39
|
+
bookmark_gc_free(Bookmark *bm) {
|
40
|
+
delete bm;
|
41
|
+
}
|
42
|
+
|
43
|
+
static VALUE
|
44
|
+
bookmark_allocate(VALUE klass) {
|
45
|
+
auto bm = new Bookmark;
|
46
|
+
return Data_Wrap_Struct(klass, NULL, bookmark_gc_free, bm );
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
/*
|
51
|
+
* call-seq:
|
52
|
+
* Bookmark.new
|
53
|
+
*
|
54
|
+
* Initializes a bookmark. Not intended for direct use, but called
|
55
|
+
* internally from Document#bookmarks
|
56
|
+
*/
|
57
|
+
VALUE
|
58
|
+
bookmark_initialize(VALUE self, VALUE options){
|
59
|
+
Bookmark *bm;
|
60
|
+
Data_Get_Struct(self, Bookmark, bm);
|
61
|
+
DocumentWrapper *doc;
|
62
|
+
CPDF_Bookmark bookmark;
|
63
|
+
|
64
|
+
if (TYPE(options) != T_HASH){
|
65
|
+
rb_raise(rb_eArgError, "no options given");
|
66
|
+
return Qnil;
|
67
|
+
}
|
68
|
+
|
69
|
+
VALUE reference;
|
70
|
+
if ( !NIL_P(reference = RB::get_option(options,"document")) ){
|
71
|
+
// we're the first bookmark on a document
|
72
|
+
Data_Get_Struct(reference, DocumentWrapper, doc);
|
73
|
+
bookmark = CPDF_Bookmark(NULL);
|
74
|
+
CPDF_BookmarkTree tree(doc->document);
|
75
|
+
bm->bookmark = new CPDF_Bookmark( tree.GetFirstChild(bookmark).GetDict() );
|
76
|
+
} else if ( !NIL_P(reference = RB::get_option(options,"parent")) ){
|
77
|
+
// we're the first sibling on a parent bookmark
|
78
|
+
Bookmark *reference_bm;
|
79
|
+
Data_Get_Struct(reference, Bookmark, reference_bm);
|
80
|
+
doc = reference_bm->doc_wrapper;
|
81
|
+
bm->bookmark = new CPDF_Bookmark(reference_bm->bookmark->GetDict());
|
82
|
+
} else if ( !NIL_P(reference = RB::get_option(options,"sibling")) ){
|
83
|
+
// we're the next bookmark after a sibling bookmark
|
84
|
+
Bookmark *reference_bm;
|
85
|
+
Data_Get_Struct(reference, Bookmark, reference_bm);
|
86
|
+
doc = reference_bm->doc_wrapper;
|
87
|
+
CPDF_BookmarkTree tree(doc->document);
|
88
|
+
bm->bookmark = new CPDF_Bookmark( tree.GetNextSibling(*reference_bm->bookmark) );
|
89
|
+
} else {
|
90
|
+
rb_raise(rb_eArgError, "options must contain either :document, :parent or :sibling");
|
91
|
+
return Qnil;
|
92
|
+
}
|
93
|
+
|
94
|
+
bm->doc_wrapper = doc;
|
95
|
+
doc->retain(bm);
|
96
|
+
|
97
|
+
return Qnil;
|
98
|
+
}
|
99
|
+
|
100
|
+
/*
|
101
|
+
* call-seq:
|
102
|
+
* children -> BookmarkList
|
103
|
+
*
|
104
|
+
* All Bookmarks that are children. If the Bookmark has no children, an empty list is returned
|
105
|
+
*/
|
106
|
+
static VALUE
|
107
|
+
bookmark_children(VALUE self)
|
108
|
+
{
|
109
|
+
Bookmark *bm;
|
110
|
+
Data_Get_Struct(self, Bookmark, bm);
|
111
|
+
|
112
|
+
CPDF_BookmarkTree tree(bm->doc_wrapper->document);
|
113
|
+
CPDF_Bookmark child( tree.GetFirstChild(*bm->bookmark) );
|
114
|
+
|
115
|
+
VALUE args[1];
|
116
|
+
|
117
|
+
if (child.GetDict()){
|
118
|
+
args[0] = rb_hash_new();
|
119
|
+
rb_hash_aset(args[0], ID2SYM(rb_intern("parent")), self);
|
120
|
+
args[0] = rb_class_new_instance( 1, args, RB::Bookmark() );
|
121
|
+
} else {
|
122
|
+
args[0] = Qnil; //rb_class_new_instance( 1, args, T_NIL );
|
123
|
+
}
|
124
|
+
return rb_class_new_instance( 1, args, RB::BookmarkList() );
|
125
|
+
|
126
|
+
}
|
127
|
+
|
128
|
+
/*
|
129
|
+
* call-seq:
|
130
|
+
* next_sibling -> Bookmark
|
131
|
+
*
|
132
|
+
* Returns the Bookmark that comes after this one
|
133
|
+
*/
|
134
|
+
static VALUE
|
135
|
+
bookmark_next_sibling(VALUE self)
|
136
|
+
{
|
137
|
+
Bookmark *bm;
|
138
|
+
Data_Get_Struct(self, Bookmark, bm);
|
139
|
+
CPDF_BookmarkTree tree(bm->doc_wrapper->document);
|
140
|
+
CPDF_Bookmark next = tree.GetNextSibling(*bm->bookmark);
|
141
|
+
|
142
|
+
if (next.GetDict()){
|
143
|
+
VALUE args[1];
|
144
|
+
args[0] = rb_hash_new();
|
145
|
+
rb_hash_aset(args[0], ID2SYM(rb_intern("sibling")), self);
|
146
|
+
return rb_class_new_instance( 1, args, RB::Bookmark() );
|
147
|
+
} else {
|
148
|
+
return Qnil;
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
|
153
|
+
/*
|
154
|
+
* call-seq:
|
155
|
+
* title -> String encoded as UTF-16LE
|
156
|
+
*
|
157
|
+
* Returns the title of the bookmark in UTF-16LE format.
|
158
|
+
* This means that the string cannot be directly compared to a ASCII string, and must be converted.
|
159
|
+
*
|
160
|
+
* bookmark.title.encode!("ASCII-8BIT")
|
161
|
+
*
|
162
|
+
*/
|
163
|
+
static VALUE
|
164
|
+
bookmark_title(VALUE self)
|
165
|
+
{
|
166
|
+
return RB::to_string( RB2BM(self)->GetTitle() );
|
167
|
+
}
|
168
|
+
|
169
|
+
/*
|
170
|
+
* call-seq:
|
171
|
+
* destination -> Hash
|
172
|
+
*
|
173
|
+
* Returns the destination data of the bookmark.
|
174
|
+
* Only the destination type is tested.
|
175
|
+
* Bug reports and confirmation on the action type is appreciated.
|
176
|
+
*/
|
177
|
+
static VALUE
|
178
|
+
bookmark_destination(VALUE self)
|
179
|
+
{
|
180
|
+
Bookmark *bm;
|
181
|
+
Data_Get_Struct(self, Bookmark, bm);
|
182
|
+
auto doc = bm->doc_wrapper->document;
|
183
|
+
VALUE hash=rb_hash_new();
|
184
|
+
CPDF_Dest dest = bm->bookmark->GetDest( doc );
|
185
|
+
if (dest){
|
186
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("destination")));
|
187
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("page_number") ), INT2NUM(dest.GetPageIndex(doc)));
|
188
|
+
} else {
|
189
|
+
CPDF_Action action = bm->bookmark->GetAction();
|
190
|
+
if (action){
|
191
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("action")));
|
192
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("action") ),
|
193
|
+
rb_str_new2( action.GetTypeName().c_str() ) );
|
194
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("uri") ),
|
195
|
+
rb_str_new2( action.GetURI(doc).c_str() ) );
|
196
|
+
} else {
|
197
|
+
rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("unknown")));
|
198
|
+
}
|
199
|
+
}
|
200
|
+
return hash;
|
201
|
+
}
|
202
|
+
|
203
|
+
VALUE
|
204
|
+
define_bookmark_class(){
|
205
|
+
|
206
|
+
#if RDOC_IS_STUPID_AND_CANNOT_PARSE_DOCUMENTATION
|
207
|
+
VALUE RB_PDFium = rb_define_module("PDFium");
|
208
|
+
#endif
|
209
|
+
VALUE RB_PDFium = RB::PDFium();
|
210
|
+
|
211
|
+
VALUE RB_Bookmark = rb_define_class_under(RB_PDFium, "Bookmark", rb_cObject);
|
212
|
+
rb_define_alloc_func(RB_Bookmark, bookmark_allocate);
|
213
|
+
|
214
|
+
rb_define_private_method (RB_Bookmark, "initialize", RUBY_METHOD_FUNC(bookmark_initialize), 1);
|
215
|
+
rb_define_method (RB_Bookmark, "title", RUBY_METHOD_FUNC(bookmark_title), 0);
|
216
|
+
rb_define_method (RB_Bookmark, "next_sibling", RUBY_METHOD_FUNC(bookmark_next_sibling),0);
|
217
|
+
rb_define_method (RB_Bookmark, "children", RUBY_METHOD_FUNC(bookmark_children), 0);
|
218
|
+
rb_define_method (RB_Bookmark, "destination", RUBY_METHOD_FUNC(bookmark_destination), 0);
|
219
|
+
|
220
|
+
return RB_Bookmark;
|
221
|
+
}
|
@@ -0,0 +1,27 @@
|
|
1
|
+
// Implementation of FPDF_FILEWRITE into a file.
|
2
|
+
class BufferFileWrite : public FPDF_FILEWRITE {
|
3
|
+
public:
|
4
|
+
BufferFileWrite( const std::string &file ) :
|
5
|
+
_file( file, std::ios::out | std::ios::binary )
|
6
|
+
{
|
7
|
+
version = 1;
|
8
|
+
WriteBlock = &WriteBlockImpl;
|
9
|
+
}
|
10
|
+
~BufferFileWrite() {
|
11
|
+
_file.close();
|
12
|
+
}
|
13
|
+
|
14
|
+
private:
|
15
|
+
int DoWriteBlock(const void* data, unsigned long size){
|
16
|
+
_file.write(static_cast<const char*>(data), size);
|
17
|
+
return 1;
|
18
|
+
}
|
19
|
+
static int WriteBlockImpl(FPDF_FILEWRITE* this_file_write, const void* data,
|
20
|
+
unsigned long size){
|
21
|
+
BufferFileWrite* mem_buffer_file_write =
|
22
|
+
static_cast<BufferFileWrite*>(this_file_write);
|
23
|
+
return mem_buffer_file_write->DoWriteBlock(data, size);
|
24
|
+
}
|
25
|
+
|
26
|
+
std::ofstream _file;
|
27
|
+
};
|
@@ -0,0 +1,268 @@
|
|
1
|
+
#include "document.h"
|
2
|
+
#include "pdfium.h"
|
3
|
+
#include <cstring>
|
4
|
+
#include <iostream>
|
5
|
+
#include <stdlib.h>
|
6
|
+
#include <assert.h>
|
7
|
+
#include <stdio.h>
|
8
|
+
#include <fstream>
|
9
|
+
#include <map>
|
10
|
+
#include "buffer_file_write.hpp"
|
11
|
+
|
12
|
+
/////////////////////////////////////////////////////////////////////////
|
13
|
+
// The Document class //
|
14
|
+
/////////////////////////////////////////////////////////////////////////
|
15
|
+
/*
|
16
|
+
* Document-class: PDFium::Document
|
17
|
+
*
|
18
|
+
* A Document represents a PDF file.
|
19
|
+
*
|
20
|
+
*/
|
21
|
+
|
22
|
+
|
23
|
+
// While you might think this would free the Document object it does not
|
24
|
+
// Instead it simply marks the Document as no longer in use, and then it
|
25
|
+
// will release itself when there are no Pages in use.
|
26
|
+
// https://redmine.ruby-lang.org/issues/6292
|
27
|
+
static void
|
28
|
+
document_gc_free(DocumentWrapper* doc)
|
29
|
+
{
|
30
|
+
DEBUG_MSG("GC Free Doc: " << doc);
|
31
|
+
// Note: we do not actually destroy the object yet.
|
32
|
+
// instead we mark it as unused and it will remove itself
|
33
|
+
// once all pages are finished
|
34
|
+
doc->markUnused();
|
35
|
+
}
|
36
|
+
|
37
|
+
static VALUE
|
38
|
+
document_allocate(VALUE klass)
|
39
|
+
{
|
40
|
+
DocumentWrapper *doc = new DocumentWrapper();
|
41
|
+
DEBUG_MSG("Alloc PDF: " << doc);
|
42
|
+
return Data_Wrap_Struct(klass, NULL, document_gc_free, doc );
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
/*
|
47
|
+
* call-seq:
|
48
|
+
* Document.new( path_to_pdf_file ) -> Document
|
49
|
+
* Document.new() -> An empty PDF Document with no pages
|
50
|
+
*
|
51
|
+
* Initializes a document either from a PDF file or creates a blank document
|
52
|
+
*/
|
53
|
+
VALUE
|
54
|
+
document_initialize(int argc, VALUE *argv, VALUE self)
|
55
|
+
{
|
56
|
+
DocumentWrapper* d;
|
57
|
+
Data_Get_Struct(self, DocumentWrapper, d);
|
58
|
+
if (argc){
|
59
|
+
VALUE path = RB::to_s(argv[0]); // call to_s in case it's a Pathname
|
60
|
+
d->document = (CPDF_Document*)FPDF_LoadDocument(StringValuePtr(path), NULL);
|
61
|
+
} else {
|
62
|
+
d->document = (CPDF_Document*)FPDF_CreateNewDocument();
|
63
|
+
}
|
64
|
+
if (! d->document ){
|
65
|
+
rb_raise(rb_eArgError, "Unable to create document: %s", PDFiumLastErrorString());
|
66
|
+
}
|
67
|
+
return Qnil;
|
68
|
+
}
|
69
|
+
|
70
|
+
|
71
|
+
/*
|
72
|
+
* call-seq:
|
73
|
+
* Document.from_memory( pdf_data ) -> Document
|
74
|
+
*
|
75
|
+
* Initializes a document from a binary string.
|
76
|
+
*
|
77
|
+
* See Image#data for an example of reading a PDF directly from Amazon S3
|
78
|
+
* and writing it's images completely in memory.
|
79
|
+
*/
|
80
|
+
static VALUE
|
81
|
+
document_from_memory(VALUE klass, VALUE data){
|
82
|
+
DocumentWrapper *doc = new DocumentWrapper();
|
83
|
+
VALUE instance = Data_Wrap_Struct(klass, NULL, document_gc_free, doc );
|
84
|
+
doc->document = (CPDF_Document*)FPDF_LoadMemDocument(RSTRING_PTR(data), RSTRING_LEN(data),NULL);
|
85
|
+
return instance;
|
86
|
+
}
|
87
|
+
|
88
|
+
/*
|
89
|
+
* call-seq:
|
90
|
+
* page_count -> Fixnum
|
91
|
+
*
|
92
|
+
* Returns the number of pages on a Document
|
93
|
+
*/
|
94
|
+
static VALUE
|
95
|
+
document_page_count(VALUE self)
|
96
|
+
{
|
97
|
+
return INT2NUM( RB2DOC(self)->GetPageCount() );
|
98
|
+
}
|
99
|
+
|
100
|
+
// Not documented in favor of the Document#pages[] access
|
101
|
+
/* :nodoc: */
|
102
|
+
static VALUE
|
103
|
+
document_page_at(VALUE self, VALUE rb_page_index)
|
104
|
+
{
|
105
|
+
return rb_funcall(RB::Page(), rb_intern("open"), 2, self, rb_page_index);
|
106
|
+
}
|
107
|
+
|
108
|
+
/*
|
109
|
+
* call-seq:
|
110
|
+
* pages -> PDFium::PageList
|
111
|
+
*
|
112
|
+
* Returns a collection of all the pages on the document as a PDFium::PageList. Pages
|
113
|
+
* are lazily loaded.
|
114
|
+
*
|
115
|
+
*/
|
116
|
+
static VALUE
|
117
|
+
document_pages(VALUE self)
|
118
|
+
{
|
119
|
+
VALUE args[1];
|
120
|
+
args[0] = self;
|
121
|
+
return rb_class_new_instance( 1, args, RB::PageList() );
|
122
|
+
}
|
123
|
+
|
124
|
+
// creates and yields a page. Not documented since all access
|
125
|
+
// should got through the Pageist interface via the Document#pages method
|
126
|
+
/* :nodoc: */
|
127
|
+
static VALUE
|
128
|
+
document_each_page(VALUE self)
|
129
|
+
{
|
130
|
+
auto doc = RB2DOC(self);
|
131
|
+
auto count = doc->GetPageCount();
|
132
|
+
for (int pg=0; pg < count; pg++){
|
133
|
+
VALUE page = document_page_at(self, INT2FIX(pg));
|
134
|
+
rb_yield(page);
|
135
|
+
PageWrapper *pw;
|
136
|
+
Data_Get_Struct(page, PageWrapper, pw);
|
137
|
+
pw->unload();
|
138
|
+
}
|
139
|
+
return self;
|
140
|
+
}
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
/*
|
145
|
+
* call-seq:
|
146
|
+
* bookmarks -> Bookmarks
|
147
|
+
*
|
148
|
+
* Retrieves the first Bookmark for a document
|
149
|
+
*/
|
150
|
+
static VALUE
|
151
|
+
document_bookmarks(VALUE self)
|
152
|
+
{
|
153
|
+
VALUE args[1];
|
154
|
+
args[0] = rb_hash_new();
|
155
|
+
rb_hash_aset(args[0], ID2SYM(rb_intern("document")), self);
|
156
|
+
VALUE bm = rb_class_new_instance( 1, args, RB::Bookmark() );
|
157
|
+
args[0] = bm;
|
158
|
+
return rb_class_new_instance( 1, args, RB::BookmarkList() );
|
159
|
+
}
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
/*
|
165
|
+
* call-seq:
|
166
|
+
* save -> Document
|
167
|
+
*
|
168
|
+
* Saves document to a PDF file. This method isn't terribly useful since there aren't
|
169
|
+
* (yet) methods to add content to pages.
|
170
|
+
*/
|
171
|
+
static VALUE
|
172
|
+
document_save(VALUE self, VALUE _path)
|
173
|
+
{
|
174
|
+
auto doc = RB2DOC(self);
|
175
|
+
VALUE path = RB::to_s(_path); // call to_s in case it's a Pathname
|
176
|
+
BufferFileWrite output_file_write(StringValuePtr(path));
|
177
|
+
FPDF_SaveAsCopy(doc, &output_file_write, FPDF_REMOVE_SECURITY);
|
178
|
+
return self;
|
179
|
+
}
|
180
|
+
|
181
|
+
|
182
|
+
/*
|
183
|
+
call-seq:
|
184
|
+
metadata -> Hash
|
185
|
+
|
186
|
+
Retrieves and optionally sets the metadata on a document. Returns a hash with the following keys:
|
187
|
+
|
188
|
+
:title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
|
189
|
+
|
190
|
+
An empty Hash will be returned if the metadata cannot be read
|
191
|
+
|
192
|
+
All values in the hash are encoded as UTF-16LE strings.
|
193
|
+
|
194
|
+
If caled with a block, the values will be passed to it and updates written back to the Document
|
195
|
+
|
196
|
+
=== Example
|
197
|
+
pdf = PDFium::Document.new( "test.pdf" )
|
198
|
+
pdf.metadata do | md |
|
199
|
+
md[:title] = "My Awesome PDF"
|
200
|
+
md[:author] = "Nathan Stitt"
|
201
|
+
end
|
202
|
+
pdf.metadata[:author] # => "Nathan Stitt"
|
203
|
+
|
204
|
+
*/
|
205
|
+
VALUE
|
206
|
+
document_metadata(int argc, VALUE *argv, VALUE self)
|
207
|
+
{
|
208
|
+
auto doc = RB2DOC(self);
|
209
|
+
VALUE metadata = rb_hash_new();
|
210
|
+
CPDF_Dictionary* info = doc->GetInfo();
|
211
|
+
if (!info)
|
212
|
+
return metadata;
|
213
|
+
|
214
|
+
VALUE block;
|
215
|
+
rb_scan_args(argc, argv, "0&", &block);
|
216
|
+
|
217
|
+
std::map<std::string, std::string> keys = {
|
218
|
+
{ "Title", "title" },
|
219
|
+
{ "Author", "author" },
|
220
|
+
{ "Subject", "subject" },
|
221
|
+
{ "Keywords", "keywords"},
|
222
|
+
{ "Creator", "creator" },
|
223
|
+
{ "Producer", "producer"},
|
224
|
+
{ "CreationDate", "creation_date" },
|
225
|
+
{ "ModDate", "mod_date" }
|
226
|
+
};
|
227
|
+
|
228
|
+
for (auto& kv : keys) {
|
229
|
+
rb_hash_aset(metadata,
|
230
|
+
ID2SYM( rb_intern( kv.second.c_str() ) ),
|
231
|
+
RB::to_string( info->GetUnicodeText( kv.first.c_str() ) )
|
232
|
+
);
|
233
|
+
}
|
234
|
+
|
235
|
+
if (RTEST(block)){
|
236
|
+
rb_yield( metadata );
|
237
|
+
for (auto& kv : keys) {
|
238
|
+
VALUE value = RB::get_option(metadata, kv.second);
|
239
|
+
auto bs = CFX_ByteString( RSTRING_PTR(value), RSTRING_LEN(value) );
|
240
|
+
info->SetAtString(kv.first.c_str(), bs);
|
241
|
+
}
|
242
|
+
}
|
243
|
+
|
244
|
+
return metadata;
|
245
|
+
}
|
246
|
+
|
247
|
+
VALUE
|
248
|
+
define_document_class()
|
249
|
+
{
|
250
|
+
VALUE RB_PDFium = RB::PDFium();
|
251
|
+
|
252
|
+
// The Document class definition and methods
|
253
|
+
VALUE RB_Document = rb_define_class_under(RB_PDFium, "Document", rb_cObject);
|
254
|
+
|
255
|
+
rb_define_alloc_func(RB_Document, document_allocate);
|
256
|
+
|
257
|
+
rb_define_singleton_method(RB_Document, "from_memory", RUBY_METHOD_FUNC(document_from_memory), 1);
|
258
|
+
|
259
|
+
rb_define_private_method (RB_Document, "initialize", RUBY_METHOD_FUNC(document_initialize), -1);
|
260
|
+
rb_define_method (RB_Document, "page_count", RUBY_METHOD_FUNC(document_page_count), 0);
|
261
|
+
rb_define_method (RB_Document, "page_at", RUBY_METHOD_FUNC(document_page_at), 1);
|
262
|
+
rb_define_method (RB_Document, "each_page", RUBY_METHOD_FUNC(document_each_page), 0);
|
263
|
+
rb_define_method (RB_Document, "pages", RUBY_METHOD_FUNC(document_pages), 0);
|
264
|
+
rb_define_method (RB_Document, "metadata", RUBY_METHOD_FUNC(document_metadata), -1);
|
265
|
+
rb_define_method (RB_Document, "bookmarks", RUBY_METHOD_FUNC(document_bookmarks), 0);
|
266
|
+
rb_define_method (RB_Document, "save", RUBY_METHOD_FUNC(document_save), 1);
|
267
|
+
return RB_Document;
|
268
|
+
}
|