pdfium 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +9 -0
  5. data/Guardfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +68 -0
  8. data/Rakefile +62 -0
  9. data/ext/pdfium_ext/bookmark.cc +221 -0
  10. data/ext/pdfium_ext/buffer_file_write.hpp +27 -0
  11. data/ext/pdfium_ext/document.cc +268 -0
  12. data/ext/pdfium_ext/document.h +66 -0
  13. data/ext/pdfium_ext/document_wrapper.cc +63 -0
  14. data/ext/pdfium_ext/document_wrapper.h +56 -0
  15. data/ext/pdfium_ext/extconf.h +3 -0
  16. data/ext/pdfium_ext/extconf.rb +76 -0
  17. data/ext/pdfium_ext/image.cc +332 -0
  18. data/ext/pdfium_ext/page.cc +392 -0
  19. data/ext/pdfium_ext/page.h +5 -0
  20. data/ext/pdfium_ext/page_object_wrapper.cc +38 -0
  21. data/ext/pdfium_ext/page_object_wrapper.h +27 -0
  22. data/ext/pdfium_ext/page_wrapper.cc +86 -0
  23. data/ext/pdfium_ext/page_wrapper.h +37 -0
  24. data/ext/pdfium_ext/pdfium.cc +115 -0
  25. data/ext/pdfium_ext/pdfium.h +69 -0
  26. data/lib/pdfium.rb +15 -0
  27. data/lib/pdfium/bookmark_list.rb +28 -0
  28. data/lib/pdfium/bounding_box.rb +16 -0
  29. data/lib/pdfium/image_list.rb +21 -0
  30. data/lib/pdfium/page_list.rb +36 -0
  31. data/lib/pdfium/page_sizes.rb +7 -0
  32. data/lib/pdfium/version.rb +4 -0
  33. data/pdfium.gemspec +29 -0
  34. data/test/benchmark-docsplit.rb +41 -0
  35. data/test/bookmarks_list_spec.rb +26 -0
  36. data/test/bookmarks_spec.rb +34 -0
  37. data/test/debug.rb +24 -0
  38. data/test/document_spec.rb +49 -0
  39. data/test/image_list_spec.rb +18 -0
  40. data/test/image_spec.rb +53 -0
  41. data/test/page_list_spec.rb +24 -0
  42. data/test/page_spec.rb +91 -0
  43. data/test/pdfium_spec.rb +15 -0
  44. data/test/profile.rb +29 -0
  45. data/test/spec_helper.rb +31 -0
  46. metadata +202 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4f0a20b6dc310130f33f3fe36394b4cadc0bbf10
4
+ data.tar.gz: 5af66006fa995e366e59c4798e8585e20ca04914
5
+ SHA512:
6
+ metadata.gz: bedd33815a4a6c4761a47b4c2f3679093c641554789cfa236365a2c6681352bca6c0e43b0a2a7b12ffdd1c29839c78d71cd3280bbc5e17afec52749e6f208677
7
+ data.tar.gz: 966a05f3938c7433a4ba925a7c8a07fe0093a02639f7272b11e38876a78879a9b06abb48aa213a2e92e203152e635d104faafda464c11216ae804890ef1068db
@@ -0,0 +1,9 @@
1
+ .bundle
2
+ Gemfile.lock
3
+ *.bundle
4
+ *.so
5
+ *.o
6
+ *.a
7
+ mkmf.log
8
+ tmp
9
+ test/pdfs
@@ -0,0 +1 @@
1
+ ruby-2.2.0
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pdfium.gemspec
4
+ gemspec
5
+
6
+
7
+ group :development do
8
+ gem 'pry-nav'
9
+ end
@@ -0,0 +1,7 @@
1
+ guard :minitest do
2
+ watch(%r{^test/(.*)_spec\.rb$})
3
+ end
4
+
5
+ guard 'rake', :task => 'buildtest' do
6
+ watch %r{ext/pdfium_ext/.*\.(cc|h)$}
7
+ end
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Nathan Stitt
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,68 @@
1
+ # Ruby bindings for Google's PDFium project
2
+
3
+ This allows Ruby efficiently to extract information from PDF files.
4
+
5
+ It currently has only very rudimantary PDF editing capabilities.
6
+
7
+ RDoc documentation is also available and the test directory has examples of usage.
8
+
9
+ ## Open and saveing
10
+
11
+ ```ruby
12
+ pdf = PDFium::Document.new("test.pdf")
13
+ pdf.save
14
+ ```
15
+
16
+ ## Document information
17
+
18
+ Page count:
19
+ ```ruby
20
+ pdf.page_count
21
+ ```
22
+
23
+ PDF Metadata:
24
+ ```ruby
25
+ pdf.metadata
26
+ ```
27
+
28
+ Returns a hash with keys = :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
29
+
30
+
31
+
32
+ ## Bookmarks
33
+
34
+ ```ruby
35
+ def print_bookmarks(list, indent=0)
36
+ list.bookmarks.each do | bm |
37
+ print ' ' * indent
38
+ puts bm.title
39
+ print_marks( bm.children )
40
+ end
41
+ end
42
+ print_bookmarks( pdf.bookmarks )
43
+ ```
44
+
45
+ ## Render page as an image
46
+
47
+ ```ruby
48
+ pdf.each_page | page |
49
+ page.as_image(width: 800).save("test-{page.number}.png")
50
+ end
51
+ ```
52
+
53
+ ## Extract embedded images from page
54
+ ```ruby
55
+ doc = PDFium::Document.new("test.pdf")
56
+ page = doc.page_at(0)
57
+ page.images do |image|
58
+ img.save("page-0-image-#{image.index}.png")
59
+ end
60
+ ```
61
+
62
+ ## Text access
63
+
64
+ Text is returned as a UTF-16LE encoded string. Future version may return position information as well
65
+
66
+ ```ruby
67
+ pdf.page_at(0).text.encode!("ASCII-8BIT")
68
+ ```
@@ -0,0 +1,62 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+ require 'rdoc/task'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_spec.rb"
8
+ end
9
+
10
+ RDOC_FILES = FileList["README.md",
11
+ "lib/pdfium.rb",
12
+ "lib/pdfium/*.rb",
13
+ "ext/pdfium_ext/*.cc"
14
+ ]
15
+ Rake::RDocTask.new do |rd|
16
+ rd.main = "README.md"
17
+ rd.options << "--verbose"
18
+ rd.rdoc_files.include(RDOC_FILES)
19
+ end
20
+
21
+
22
+ require "bundler/gem_tasks"
23
+
24
+ require "rake/extensiontask"
25
+ Rake::ExtensionTask.new("pdfium_ext") do | ext |
26
+ ext.source_pattern = "*.cc"
27
+ end
28
+
29
+ task :buildtest => :compile do
30
+ Rake::Task["test"].invoke
31
+ end
32
+
33
+ task :console do
34
+ require 'irb'
35
+ require 'irb/completion'
36
+ require 'pdfium'
37
+ ARGV.clear
38
+ IRB.start
39
+ end
40
+
41
+ # valgrind and Ruby
42
+ # http://blog.flavorjon.es/2009/06/easily-valgrind-gdb-your-ruby-c.html
43
+ # http://blog.evanweaver.com/2008/02/05/valgrind-and-ruby/
44
+ namespace :test do
45
+ # partial-loads-ok and undef-value-errors necessary to ignore
46
+ # spurious (and eminently ignorable) warnings from the ruby
47
+ # interpreter
48
+ VALGRIND_BASIC_OPTS = <<-EOS
49
+ --tool=memcheck
50
+ --dsymutil=yes \
51
+ --num-callers=50 --error-limit=no --leak-check=full \
52
+ --partial-loads-ok=yes --undef-value-errors=no
53
+ EOS
54
+
55
+ SUPRESS = ""# "--suppressions=./valgrind.supp"
56
+ desc "run test suite under valgrind with basic ruby options"
57
+ task :valgrind => :compile do
58
+ cmdline = "valgrind #{SUPRESS} #{VALGRIND_BASIC_OPTS} ruby rake test"
59
+ puts cmdline
60
+ system cmdline
61
+ end
62
+ end
@@ -0,0 +1,221 @@
1
+ #include "pdfium.h"
2
+
3
+ /////////////////////////////////////////////////////////////////////////
4
+ // The Bookmark class
5
+ /////////////////////////////////////////////////////////////////////////
6
+ /*
7
+ * Document-class: PDFium::Bookmark
8
+ *
9
+ * Bookmarks on a Document form a tree structure.
10
+ * Each can have siblings and children
11
+ *
12
+ */
13
+
14
+
15
+ class Bookmark {
16
+ public:
17
+ Bookmark():
18
+ doc_wrapper(0),
19
+ bookmark(0){}
20
+ ~Bookmark(){
21
+ if (doc_wrapper)
22
+ doc_wrapper->release(this);
23
+ if (bookmark)
24
+ delete bookmark;
25
+ }
26
+ DocumentWrapper *doc_wrapper;
27
+ CPDF_Bookmark *bookmark;
28
+ };
29
+
30
+ // a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
31
+ CPDF_Bookmark *
32
+ RB2BM(VALUE self) {
33
+ Bookmark *bm;
34
+ Data_Get_Struct(self, Bookmark, bm);
35
+ return bm->bookmark;
36
+ }
37
+
38
+ static void
39
+ bookmark_gc_free(Bookmark *bm) {
40
+ delete bm;
41
+ }
42
+
43
+ static VALUE
44
+ bookmark_allocate(VALUE klass) {
45
+ auto bm = new Bookmark;
46
+ return Data_Wrap_Struct(klass, NULL, bookmark_gc_free, bm );
47
+ }
48
+
49
+
50
+ /*
51
+ * call-seq:
52
+ * Bookmark.new
53
+ *
54
+ * Initializes a bookmark. Not intended for direct use, but called
55
+ * internally from Document#bookmarks
56
+ */
57
+ VALUE
58
+ bookmark_initialize(VALUE self, VALUE options){
59
+ Bookmark *bm;
60
+ Data_Get_Struct(self, Bookmark, bm);
61
+ DocumentWrapper *doc;
62
+ CPDF_Bookmark bookmark;
63
+
64
+ if (TYPE(options) != T_HASH){
65
+ rb_raise(rb_eArgError, "no options given");
66
+ return Qnil;
67
+ }
68
+
69
+ VALUE reference;
70
+ if ( !NIL_P(reference = RB::get_option(options,"document")) ){
71
+ // we're the first bookmark on a document
72
+ Data_Get_Struct(reference, DocumentWrapper, doc);
73
+ bookmark = CPDF_Bookmark(NULL);
74
+ CPDF_BookmarkTree tree(doc->document);
75
+ bm->bookmark = new CPDF_Bookmark( tree.GetFirstChild(bookmark).GetDict() );
76
+ } else if ( !NIL_P(reference = RB::get_option(options,"parent")) ){
77
+ // we're the first sibling on a parent bookmark
78
+ Bookmark *reference_bm;
79
+ Data_Get_Struct(reference, Bookmark, reference_bm);
80
+ doc = reference_bm->doc_wrapper;
81
+ bm->bookmark = new CPDF_Bookmark(reference_bm->bookmark->GetDict());
82
+ } else if ( !NIL_P(reference = RB::get_option(options,"sibling")) ){
83
+ // we're the next bookmark after a sibling bookmark
84
+ Bookmark *reference_bm;
85
+ Data_Get_Struct(reference, Bookmark, reference_bm);
86
+ doc = reference_bm->doc_wrapper;
87
+ CPDF_BookmarkTree tree(doc->document);
88
+ bm->bookmark = new CPDF_Bookmark( tree.GetNextSibling(*reference_bm->bookmark) );
89
+ } else {
90
+ rb_raise(rb_eArgError, "options must contain either :document, :parent or :sibling");
91
+ return Qnil;
92
+ }
93
+
94
+ bm->doc_wrapper = doc;
95
+ doc->retain(bm);
96
+
97
+ return Qnil;
98
+ }
99
+
100
+ /*
101
+ * call-seq:
102
+ * children -> BookmarkList
103
+ *
104
+ * All Bookmarks that are children. If the Bookmark has no children, an empty list is returned
105
+ */
106
+ static VALUE
107
+ bookmark_children(VALUE self)
108
+ {
109
+ Bookmark *bm;
110
+ Data_Get_Struct(self, Bookmark, bm);
111
+
112
+ CPDF_BookmarkTree tree(bm->doc_wrapper->document);
113
+ CPDF_Bookmark child( tree.GetFirstChild(*bm->bookmark) );
114
+
115
+ VALUE args[1];
116
+
117
+ if (child.GetDict()){
118
+ args[0] = rb_hash_new();
119
+ rb_hash_aset(args[0], ID2SYM(rb_intern("parent")), self);
120
+ args[0] = rb_class_new_instance( 1, args, RB::Bookmark() );
121
+ } else {
122
+ args[0] = Qnil; //rb_class_new_instance( 1, args, T_NIL );
123
+ }
124
+ return rb_class_new_instance( 1, args, RB::BookmarkList() );
125
+
126
+ }
127
+
128
+ /*
129
+ * call-seq:
130
+ * next_sibling -> Bookmark
131
+ *
132
+ * Returns the Bookmark that comes after this one
133
+ */
134
+ static VALUE
135
+ bookmark_next_sibling(VALUE self)
136
+ {
137
+ Bookmark *bm;
138
+ Data_Get_Struct(self, Bookmark, bm);
139
+ CPDF_BookmarkTree tree(bm->doc_wrapper->document);
140
+ CPDF_Bookmark next = tree.GetNextSibling(*bm->bookmark);
141
+
142
+ if (next.GetDict()){
143
+ VALUE args[1];
144
+ args[0] = rb_hash_new();
145
+ rb_hash_aset(args[0], ID2SYM(rb_intern("sibling")), self);
146
+ return rb_class_new_instance( 1, args, RB::Bookmark() );
147
+ } else {
148
+ return Qnil;
149
+ }
150
+ }
151
+
152
+
153
+ /*
154
+ * call-seq:
155
+ * title -> String encoded as UTF-16LE
156
+ *
157
+ * Returns the title of the bookmark in UTF-16LE format.
158
+ * This means that the string cannot be directly compared to a ASCII string, and must be converted.
159
+ *
160
+ * bookmark.title.encode!("ASCII-8BIT")
161
+ *
162
+ */
163
+ static VALUE
164
+ bookmark_title(VALUE self)
165
+ {
166
+ return RB::to_string( RB2BM(self)->GetTitle() );
167
+ }
168
+
169
+ /*
170
+ * call-seq:
171
+ * destination -> Hash
172
+ *
173
+ * Returns the destination data of the bookmark.
174
+ * Only the destination type is tested.
175
+ * Bug reports and confirmation on the action type is appreciated.
176
+ */
177
+ static VALUE
178
+ bookmark_destination(VALUE self)
179
+ {
180
+ Bookmark *bm;
181
+ Data_Get_Struct(self, Bookmark, bm);
182
+ auto doc = bm->doc_wrapper->document;
183
+ VALUE hash=rb_hash_new();
184
+ CPDF_Dest dest = bm->bookmark->GetDest( doc );
185
+ if (dest){
186
+ rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("destination")));
187
+ rb_hash_aset(hash, ID2SYM( rb_intern("page_number") ), INT2NUM(dest.GetPageIndex(doc)));
188
+ } else {
189
+ CPDF_Action action = bm->bookmark->GetAction();
190
+ if (action){
191
+ rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("action")));
192
+ rb_hash_aset(hash, ID2SYM( rb_intern("action") ),
193
+ rb_str_new2( action.GetTypeName().c_str() ) );
194
+ rb_hash_aset(hash, ID2SYM( rb_intern("uri") ),
195
+ rb_str_new2( action.GetURI(doc).c_str() ) );
196
+ } else {
197
+ rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("unknown")));
198
+ }
199
+ }
200
+ return hash;
201
+ }
202
+
203
+ VALUE
204
+ define_bookmark_class(){
205
+
206
+ #if RDOC_IS_STUPID_AND_CANNOT_PARSE_DOCUMENTATION
207
+ VALUE RB_PDFium = rb_define_module("PDFium");
208
+ #endif
209
+ VALUE RB_PDFium = RB::PDFium();
210
+
211
+ VALUE RB_Bookmark = rb_define_class_under(RB_PDFium, "Bookmark", rb_cObject);
212
+ rb_define_alloc_func(RB_Bookmark, bookmark_allocate);
213
+
214
+ rb_define_private_method (RB_Bookmark, "initialize", RUBY_METHOD_FUNC(bookmark_initialize), 1);
215
+ rb_define_method (RB_Bookmark, "title", RUBY_METHOD_FUNC(bookmark_title), 0);
216
+ rb_define_method (RB_Bookmark, "next_sibling", RUBY_METHOD_FUNC(bookmark_next_sibling),0);
217
+ rb_define_method (RB_Bookmark, "children", RUBY_METHOD_FUNC(bookmark_children), 0);
218
+ rb_define_method (RB_Bookmark, "destination", RUBY_METHOD_FUNC(bookmark_destination), 0);
219
+
220
+ return RB_Bookmark;
221
+ }
@@ -0,0 +1,27 @@
1
+ // Implementation of FPDF_FILEWRITE into a file.
2
+ class BufferFileWrite : public FPDF_FILEWRITE {
3
+ public:
4
+ BufferFileWrite( const std::string &file ) :
5
+ _file( file, std::ios::out | std::ios::binary )
6
+ {
7
+ version = 1;
8
+ WriteBlock = &WriteBlockImpl;
9
+ }
10
+ ~BufferFileWrite() {
11
+ _file.close();
12
+ }
13
+
14
+ private:
15
+ int DoWriteBlock(const void* data, unsigned long size){
16
+ _file.write(static_cast<const char*>(data), size);
17
+ return 1;
18
+ }
19
+ static int WriteBlockImpl(FPDF_FILEWRITE* this_file_write, const void* data,
20
+ unsigned long size){
21
+ BufferFileWrite* mem_buffer_file_write =
22
+ static_cast<BufferFileWrite*>(this_file_write);
23
+ return mem_buffer_file_write->DoWriteBlock(data, size);
24
+ }
25
+
26
+ std::ofstream _file;
27
+ };
@@ -0,0 +1,268 @@
1
+ #include "document.h"
2
+ #include "pdfium.h"
3
+ #include <cstring>
4
+ #include <iostream>
5
+ #include <stdlib.h>
6
+ #include <assert.h>
7
+ #include <stdio.h>
8
+ #include <fstream>
9
+ #include <map>
10
+ #include "buffer_file_write.hpp"
11
+
12
+ /////////////////////////////////////////////////////////////////////////
13
+ // The Document class //
14
+ /////////////////////////////////////////////////////////////////////////
15
+ /*
16
+ * Document-class: PDFium::Document
17
+ *
18
+ * A Document represents a PDF file.
19
+ *
20
+ */
21
+
22
+
23
+ // While you might think this would free the Document object it does not
24
+ // Instead it simply marks the Document as no longer in use, and then it
25
+ // will release itself when there are no Pages in use.
26
+ // https://redmine.ruby-lang.org/issues/6292
27
+ static void
28
+ document_gc_free(DocumentWrapper* doc)
29
+ {
30
+ DEBUG_MSG("GC Free Doc: " << doc);
31
+ // Note: we do not actually destroy the object yet.
32
+ // instead we mark it as unused and it will remove itself
33
+ // once all pages are finished
34
+ doc->markUnused();
35
+ }
36
+
37
+ static VALUE
38
+ document_allocate(VALUE klass)
39
+ {
40
+ DocumentWrapper *doc = new DocumentWrapper();
41
+ DEBUG_MSG("Alloc PDF: " << doc);
42
+ return Data_Wrap_Struct(klass, NULL, document_gc_free, doc );
43
+ }
44
+
45
+
46
+ /*
47
+ * call-seq:
48
+ * Document.new( path_to_pdf_file ) -> Document
49
+ * Document.new() -> An empty PDF Document with no pages
50
+ *
51
+ * Initializes a document either from a PDF file or creates a blank document
52
+ */
53
+ VALUE
54
+ document_initialize(int argc, VALUE *argv, VALUE self)
55
+ {
56
+ DocumentWrapper* d;
57
+ Data_Get_Struct(self, DocumentWrapper, d);
58
+ if (argc){
59
+ VALUE path = RB::to_s(argv[0]); // call to_s in case it's a Pathname
60
+ d->document = (CPDF_Document*)FPDF_LoadDocument(StringValuePtr(path), NULL);
61
+ } else {
62
+ d->document = (CPDF_Document*)FPDF_CreateNewDocument();
63
+ }
64
+ if (! d->document ){
65
+ rb_raise(rb_eArgError, "Unable to create document: %s", PDFiumLastErrorString());
66
+ }
67
+ return Qnil;
68
+ }
69
+
70
+
71
+ /*
72
+ * call-seq:
73
+ * Document.from_memory( pdf_data ) -> Document
74
+ *
75
+ * Initializes a document from a binary string.
76
+ *
77
+ * See Image#data for an example of reading a PDF directly from Amazon S3
78
+ * and writing it's images completely in memory.
79
+ */
80
+ static VALUE
81
+ document_from_memory(VALUE klass, VALUE data){
82
+ DocumentWrapper *doc = new DocumentWrapper();
83
+ VALUE instance = Data_Wrap_Struct(klass, NULL, document_gc_free, doc );
84
+ doc->document = (CPDF_Document*)FPDF_LoadMemDocument(RSTRING_PTR(data), RSTRING_LEN(data),NULL);
85
+ return instance;
86
+ }
87
+
88
+ /*
89
+ * call-seq:
90
+ * page_count -> Fixnum
91
+ *
92
+ * Returns the number of pages on a Document
93
+ */
94
+ static VALUE
95
+ document_page_count(VALUE self)
96
+ {
97
+ return INT2NUM( RB2DOC(self)->GetPageCount() );
98
+ }
99
+
100
+ // Not documented in favor of the Document#pages[] access
101
+ /* :nodoc: */
102
+ static VALUE
103
+ document_page_at(VALUE self, VALUE rb_page_index)
104
+ {
105
+ return rb_funcall(RB::Page(), rb_intern("open"), 2, self, rb_page_index);
106
+ }
107
+
108
+ /*
109
+ * call-seq:
110
+ * pages -> PDFium::PageList
111
+ *
112
+ * Returns a collection of all the pages on the document as a PDFium::PageList. Pages
113
+ * are lazily loaded.
114
+ *
115
+ */
116
+ static VALUE
117
+ document_pages(VALUE self)
118
+ {
119
+ VALUE args[1];
120
+ args[0] = self;
121
+ return rb_class_new_instance( 1, args, RB::PageList() );
122
+ }
123
+
124
+ // creates and yields a page. Not documented since all access
125
+ // should got through the Pageist interface via the Document#pages method
126
+ /* :nodoc: */
127
+ static VALUE
128
+ document_each_page(VALUE self)
129
+ {
130
+ auto doc = RB2DOC(self);
131
+ auto count = doc->GetPageCount();
132
+ for (int pg=0; pg < count; pg++){
133
+ VALUE page = document_page_at(self, INT2FIX(pg));
134
+ rb_yield(page);
135
+ PageWrapper *pw;
136
+ Data_Get_Struct(page, PageWrapper, pw);
137
+ pw->unload();
138
+ }
139
+ return self;
140
+ }
141
+
142
+
143
+
144
+ /*
145
+ * call-seq:
146
+ * bookmarks -> Bookmarks
147
+ *
148
+ * Retrieves the first Bookmark for a document
149
+ */
150
+ static VALUE
151
+ document_bookmarks(VALUE self)
152
+ {
153
+ VALUE args[1];
154
+ args[0] = rb_hash_new();
155
+ rb_hash_aset(args[0], ID2SYM(rb_intern("document")), self);
156
+ VALUE bm = rb_class_new_instance( 1, args, RB::Bookmark() );
157
+ args[0] = bm;
158
+ return rb_class_new_instance( 1, args, RB::BookmarkList() );
159
+ }
160
+
161
+
162
+
163
+
164
+ /*
165
+ * call-seq:
166
+ * save -> Document
167
+ *
168
+ * Saves document to a PDF file. This method isn't terribly useful since there aren't
169
+ * (yet) methods to add content to pages.
170
+ */
171
+ static VALUE
172
+ document_save(VALUE self, VALUE _path)
173
+ {
174
+ auto doc = RB2DOC(self);
175
+ VALUE path = RB::to_s(_path); // call to_s in case it's a Pathname
176
+ BufferFileWrite output_file_write(StringValuePtr(path));
177
+ FPDF_SaveAsCopy(doc, &output_file_write, FPDF_REMOVE_SECURITY);
178
+ return self;
179
+ }
180
+
181
+
182
+ /*
183
+ call-seq:
184
+ metadata -> Hash
185
+
186
+ Retrieves and optionally sets the metadata on a document. Returns a hash with the following keys:
187
+
188
+ :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
189
+
190
+ An empty Hash will be returned if the metadata cannot be read
191
+
192
+ All values in the hash are encoded as UTF-16LE strings.
193
+
194
+ If caled with a block, the values will be passed to it and updates written back to the Document
195
+
196
+ === Example
197
+ pdf = PDFium::Document.new( "test.pdf" )
198
+ pdf.metadata do | md |
199
+ md[:title] = "My Awesome PDF"
200
+ md[:author] = "Nathan Stitt"
201
+ end
202
+ pdf.metadata[:author] # => "Nathan Stitt"
203
+
204
+ */
205
+ VALUE
206
+ document_metadata(int argc, VALUE *argv, VALUE self)
207
+ {
208
+ auto doc = RB2DOC(self);
209
+ VALUE metadata = rb_hash_new();
210
+ CPDF_Dictionary* info = doc->GetInfo();
211
+ if (!info)
212
+ return metadata;
213
+
214
+ VALUE block;
215
+ rb_scan_args(argc, argv, "0&", &block);
216
+
217
+ std::map<std::string, std::string> keys = {
218
+ { "Title", "title" },
219
+ { "Author", "author" },
220
+ { "Subject", "subject" },
221
+ { "Keywords", "keywords"},
222
+ { "Creator", "creator" },
223
+ { "Producer", "producer"},
224
+ { "CreationDate", "creation_date" },
225
+ { "ModDate", "mod_date" }
226
+ };
227
+
228
+ for (auto& kv : keys) {
229
+ rb_hash_aset(metadata,
230
+ ID2SYM( rb_intern( kv.second.c_str() ) ),
231
+ RB::to_string( info->GetUnicodeText( kv.first.c_str() ) )
232
+ );
233
+ }
234
+
235
+ if (RTEST(block)){
236
+ rb_yield( metadata );
237
+ for (auto& kv : keys) {
238
+ VALUE value = RB::get_option(metadata, kv.second);
239
+ auto bs = CFX_ByteString( RSTRING_PTR(value), RSTRING_LEN(value) );
240
+ info->SetAtString(kv.first.c_str(), bs);
241
+ }
242
+ }
243
+
244
+ return metadata;
245
+ }
246
+
247
+ VALUE
248
+ define_document_class()
249
+ {
250
+ VALUE RB_PDFium = RB::PDFium();
251
+
252
+ // The Document class definition and methods
253
+ VALUE RB_Document = rb_define_class_under(RB_PDFium, "Document", rb_cObject);
254
+
255
+ rb_define_alloc_func(RB_Document, document_allocate);
256
+
257
+ rb_define_singleton_method(RB_Document, "from_memory", RUBY_METHOD_FUNC(document_from_memory), 1);
258
+
259
+ rb_define_private_method (RB_Document, "initialize", RUBY_METHOD_FUNC(document_initialize), -1);
260
+ rb_define_method (RB_Document, "page_count", RUBY_METHOD_FUNC(document_page_count), 0);
261
+ rb_define_method (RB_Document, "page_at", RUBY_METHOD_FUNC(document_page_at), 1);
262
+ rb_define_method (RB_Document, "each_page", RUBY_METHOD_FUNC(document_each_page), 0);
263
+ rb_define_method (RB_Document, "pages", RUBY_METHOD_FUNC(document_pages), 0);
264
+ rb_define_method (RB_Document, "metadata", RUBY_METHOD_FUNC(document_metadata), -1);
265
+ rb_define_method (RB_Document, "bookmarks", RUBY_METHOD_FUNC(document_bookmarks), 0);
266
+ rb_define_method (RB_Document, "save", RUBY_METHOD_FUNC(document_save), 1);
267
+ return RB_Document;
268
+ }