pdfium 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +9 -0
  5. data/Guardfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +68 -0
  8. data/Rakefile +62 -0
  9. data/ext/pdfium_ext/bookmark.cc +221 -0
  10. data/ext/pdfium_ext/buffer_file_write.hpp +27 -0
  11. data/ext/pdfium_ext/document.cc +268 -0
  12. data/ext/pdfium_ext/document.h +66 -0
  13. data/ext/pdfium_ext/document_wrapper.cc +63 -0
  14. data/ext/pdfium_ext/document_wrapper.h +56 -0
  15. data/ext/pdfium_ext/extconf.h +3 -0
  16. data/ext/pdfium_ext/extconf.rb +76 -0
  17. data/ext/pdfium_ext/image.cc +332 -0
  18. data/ext/pdfium_ext/page.cc +392 -0
  19. data/ext/pdfium_ext/page.h +5 -0
  20. data/ext/pdfium_ext/page_object_wrapper.cc +38 -0
  21. data/ext/pdfium_ext/page_object_wrapper.h +27 -0
  22. data/ext/pdfium_ext/page_wrapper.cc +86 -0
  23. data/ext/pdfium_ext/page_wrapper.h +37 -0
  24. data/ext/pdfium_ext/pdfium.cc +115 -0
  25. data/ext/pdfium_ext/pdfium.h +69 -0
  26. data/lib/pdfium.rb +15 -0
  27. data/lib/pdfium/bookmark_list.rb +28 -0
  28. data/lib/pdfium/bounding_box.rb +16 -0
  29. data/lib/pdfium/image_list.rb +21 -0
  30. data/lib/pdfium/page_list.rb +36 -0
  31. data/lib/pdfium/page_sizes.rb +7 -0
  32. data/lib/pdfium/version.rb +4 -0
  33. data/pdfium.gemspec +29 -0
  34. data/test/benchmark-docsplit.rb +41 -0
  35. data/test/bookmarks_list_spec.rb +26 -0
  36. data/test/bookmarks_spec.rb +34 -0
  37. data/test/debug.rb +24 -0
  38. data/test/document_spec.rb +49 -0
  39. data/test/image_list_spec.rb +18 -0
  40. data/test/image_spec.rb +53 -0
  41. data/test/page_list_spec.rb +24 -0
  42. data/test/page_spec.rb +91 -0
  43. data/test/pdfium_spec.rb +15 -0
  44. data/test/profile.rb +29 -0
  45. data/test/spec_helper.rb +31 -0
  46. metadata +202 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4f0a20b6dc310130f33f3fe36394b4cadc0bbf10
4
+ data.tar.gz: 5af66006fa995e366e59c4798e8585e20ca04914
5
+ SHA512:
6
+ metadata.gz: bedd33815a4a6c4761a47b4c2f3679093c641554789cfa236365a2c6681352bca6c0e43b0a2a7b12ffdd1c29839c78d71cd3280bbc5e17afec52749e6f208677
7
+ data.tar.gz: 966a05f3938c7433a4ba925a7c8a07fe0093a02639f7272b11e38876a78879a9b06abb48aa213a2e92e203152e635d104faafda464c11216ae804890ef1068db
@@ -0,0 +1,9 @@
1
+ .bundle
2
+ Gemfile.lock
3
+ *.bundle
4
+ *.so
5
+ *.o
6
+ *.a
7
+ mkmf.log
8
+ tmp
9
+ test/pdfs
@@ -0,0 +1 @@
1
+ ruby-2.2.0
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in pdfium.gemspec
4
+ gemspec
5
+
6
+
7
+ group :development do
8
+ gem 'pry-nav'
9
+ end
@@ -0,0 +1,7 @@
1
+ guard :minitest do
2
+ watch(%r{^test/(.*)_spec\.rb$})
3
+ end
4
+
5
+ guard 'rake', :task => 'buildtest' do
6
+ watch %r{ext/pdfium_ext/.*\.(cc|h)$}
7
+ end
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Nathan Stitt
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,68 @@
1
+ # Ruby bindings for Google's PDFium project
2
+
3
+ This allows Ruby efficiently to extract information from PDF files.
4
+
5
+ It currently has only very rudimantary PDF editing capabilities.
6
+
7
+ RDoc documentation is also available and the test directory has examples of usage.
8
+
9
+ ## Open and saveing
10
+
11
+ ```ruby
12
+ pdf = PDFium::Document.new("test.pdf")
13
+ pdf.save
14
+ ```
15
+
16
+ ## Document information
17
+
18
+ Page count:
19
+ ```ruby
20
+ pdf.page_count
21
+ ```
22
+
23
+ PDF Metadata:
24
+ ```ruby
25
+ pdf.metadata
26
+ ```
27
+
28
+ Returns a hash with keys = :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
29
+
30
+
31
+
32
+ ## Bookmarks
33
+
34
+ ```ruby
35
+ def print_bookmarks(list, indent=0)
36
+ list.bookmarks.each do | bm |
37
+ print ' ' * indent
38
+ puts bm.title
39
+ print_marks( bm.children )
40
+ end
41
+ end
42
+ print_bookmarks( pdf.bookmarks )
43
+ ```
44
+
45
+ ## Render page as an image
46
+
47
+ ```ruby
48
+ pdf.each_page | page |
49
+ page.as_image(width: 800).save("test-{page.number}.png")
50
+ end
51
+ ```
52
+
53
+ ## Extract embedded images from page
54
+ ```ruby
55
+ doc = PDFium::Document.new("test.pdf")
56
+ page = doc.page_at(0)
57
+ page.images do |image|
58
+ img.save("page-0-image-#{image.index}.png")
59
+ end
60
+ ```
61
+
62
+ ## Text access
63
+
64
+ Text is returned as a UTF-16LE encoded string. Future version may return position information as well
65
+
66
+ ```ruby
67
+ pdf.page_at(0).text.encode!("ASCII-8BIT")
68
+ ```
@@ -0,0 +1,62 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rake/testtask'
3
+ require 'rdoc/task'
4
+
5
+ Rake::TestTask.new do |t|
6
+ t.libs << 'test'
7
+ t.pattern = "test/*_spec.rb"
8
+ end
9
+
10
+ RDOC_FILES = FileList["README.md",
11
+ "lib/pdfium.rb",
12
+ "lib/pdfium/*.rb",
13
+ "ext/pdfium_ext/*.cc"
14
+ ]
15
+ Rake::RDocTask.new do |rd|
16
+ rd.main = "README.md"
17
+ rd.options << "--verbose"
18
+ rd.rdoc_files.include(RDOC_FILES)
19
+ end
20
+
21
+
22
+ require "bundler/gem_tasks"
23
+
24
+ require "rake/extensiontask"
25
+ Rake::ExtensionTask.new("pdfium_ext") do | ext |
26
+ ext.source_pattern = "*.cc"
27
+ end
28
+
29
+ task :buildtest => :compile do
30
+ Rake::Task["test"].invoke
31
+ end
32
+
33
+ task :console do
34
+ require 'irb'
35
+ require 'irb/completion'
36
+ require 'pdfium'
37
+ ARGV.clear
38
+ IRB.start
39
+ end
40
+
41
+ # valgrind and Ruby
42
+ # http://blog.flavorjon.es/2009/06/easily-valgrind-gdb-your-ruby-c.html
43
+ # http://blog.evanweaver.com/2008/02/05/valgrind-and-ruby/
44
+ namespace :test do
45
+ # partial-loads-ok and undef-value-errors necessary to ignore
46
+ # spurious (and eminently ignorable) warnings from the ruby
47
+ # interpreter
48
+ VALGRIND_BASIC_OPTS = <<-EOS
49
+ --tool=memcheck
50
+ --dsymutil=yes \
51
+ --num-callers=50 --error-limit=no --leak-check=full \
52
+ --partial-loads-ok=yes --undef-value-errors=no
53
+ EOS
54
+
55
+ SUPRESS = ""# "--suppressions=./valgrind.supp"
56
+ desc "run test suite under valgrind with basic ruby options"
57
+ task :valgrind => :compile do
58
+ cmdline = "valgrind #{SUPRESS} #{VALGRIND_BASIC_OPTS} ruby rake test"
59
+ puts cmdline
60
+ system cmdline
61
+ end
62
+ end
@@ -0,0 +1,221 @@
1
+ #include "pdfium.h"
2
+
3
+ /////////////////////////////////////////////////////////////////////////
4
+ // The Bookmark class
5
+ /////////////////////////////////////////////////////////////////////////
6
+ /*
7
+ * Document-class: PDFium::Bookmark
8
+ *
9
+ * Bookmarks on a Document form a tree structure.
10
+ * Each can have siblings and children
11
+ *
12
+ */
13
+
14
+
15
+ class Bookmark {
16
+ public:
17
+ Bookmark():
18
+ doc_wrapper(0),
19
+ bookmark(0){}
20
+ ~Bookmark(){
21
+ if (doc_wrapper)
22
+ doc_wrapper->release(this);
23
+ if (bookmark)
24
+ delete bookmark;
25
+ }
26
+ DocumentWrapper *doc_wrapper;
27
+ CPDF_Bookmark *bookmark;
28
+ };
29
+
30
+ // a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
31
+ CPDF_Bookmark *
32
+ RB2BM(VALUE self) {
33
+ Bookmark *bm;
34
+ Data_Get_Struct(self, Bookmark, bm);
35
+ return bm->bookmark;
36
+ }
37
+
38
+ static void
39
+ bookmark_gc_free(Bookmark *bm) {
40
+ delete bm;
41
+ }
42
+
43
+ static VALUE
44
+ bookmark_allocate(VALUE klass) {
45
+ auto bm = new Bookmark;
46
+ return Data_Wrap_Struct(klass, NULL, bookmark_gc_free, bm );
47
+ }
48
+
49
+
50
+ /*
51
+ * call-seq:
52
+ * Bookmark.new
53
+ *
54
+ * Initializes a bookmark. Not intended for direct use, but called
55
+ * internally from Document#bookmarks
56
+ */
57
+ VALUE
58
+ bookmark_initialize(VALUE self, VALUE options){
59
+ Bookmark *bm;
60
+ Data_Get_Struct(self, Bookmark, bm);
61
+ DocumentWrapper *doc;
62
+ CPDF_Bookmark bookmark;
63
+
64
+ if (TYPE(options) != T_HASH){
65
+ rb_raise(rb_eArgError, "no options given");
66
+ return Qnil;
67
+ }
68
+
69
+ VALUE reference;
70
+ if ( !NIL_P(reference = RB::get_option(options,"document")) ){
71
+ // we're the first bookmark on a document
72
+ Data_Get_Struct(reference, DocumentWrapper, doc);
73
+ bookmark = CPDF_Bookmark(NULL);
74
+ CPDF_BookmarkTree tree(doc->document);
75
+ bm->bookmark = new CPDF_Bookmark( tree.GetFirstChild(bookmark).GetDict() );
76
+ } else if ( !NIL_P(reference = RB::get_option(options,"parent")) ){
77
+ // we're the first sibling on a parent bookmark
78
+ Bookmark *reference_bm;
79
+ Data_Get_Struct(reference, Bookmark, reference_bm);
80
+ doc = reference_bm->doc_wrapper;
81
+ bm->bookmark = new CPDF_Bookmark(reference_bm->bookmark->GetDict());
82
+ } else if ( !NIL_P(reference = RB::get_option(options,"sibling")) ){
83
+ // we're the next bookmark after a sibling bookmark
84
+ Bookmark *reference_bm;
85
+ Data_Get_Struct(reference, Bookmark, reference_bm);
86
+ doc = reference_bm->doc_wrapper;
87
+ CPDF_BookmarkTree tree(doc->document);
88
+ bm->bookmark = new CPDF_Bookmark( tree.GetNextSibling(*reference_bm->bookmark) );
89
+ } else {
90
+ rb_raise(rb_eArgError, "options must contain either :document, :parent or :sibling");
91
+ return Qnil;
92
+ }
93
+
94
+ bm->doc_wrapper = doc;
95
+ doc->retain(bm);
96
+
97
+ return Qnil;
98
+ }
99
+
100
+ /*
101
+ * call-seq:
102
+ * children -> BookmarkList
103
+ *
104
+ * All Bookmarks that are children. If the Bookmark has no children, an empty list is returned
105
+ */
106
+ static VALUE
107
+ bookmark_children(VALUE self)
108
+ {
109
+ Bookmark *bm;
110
+ Data_Get_Struct(self, Bookmark, bm);
111
+
112
+ CPDF_BookmarkTree tree(bm->doc_wrapper->document);
113
+ CPDF_Bookmark child( tree.GetFirstChild(*bm->bookmark) );
114
+
115
+ VALUE args[1];
116
+
117
+ if (child.GetDict()){
118
+ args[0] = rb_hash_new();
119
+ rb_hash_aset(args[0], ID2SYM(rb_intern("parent")), self);
120
+ args[0] = rb_class_new_instance( 1, args, RB::Bookmark() );
121
+ } else {
122
+ args[0] = Qnil; //rb_class_new_instance( 1, args, T_NIL );
123
+ }
124
+ return rb_class_new_instance( 1, args, RB::BookmarkList() );
125
+
126
+ }
127
+
128
+ /*
129
+ * call-seq:
130
+ * next_sibling -> Bookmark
131
+ *
132
+ * Returns the Bookmark that comes after this one
133
+ */
134
+ static VALUE
135
+ bookmark_next_sibling(VALUE self)
136
+ {
137
+ Bookmark *bm;
138
+ Data_Get_Struct(self, Bookmark, bm);
139
+ CPDF_BookmarkTree tree(bm->doc_wrapper->document);
140
+ CPDF_Bookmark next = tree.GetNextSibling(*bm->bookmark);
141
+
142
+ if (next.GetDict()){
143
+ VALUE args[1];
144
+ args[0] = rb_hash_new();
145
+ rb_hash_aset(args[0], ID2SYM(rb_intern("sibling")), self);
146
+ return rb_class_new_instance( 1, args, RB::Bookmark() );
147
+ } else {
148
+ return Qnil;
149
+ }
150
+ }
151
+
152
+
153
+ /*
154
+ * call-seq:
155
+ * title -> String encoded as UTF-16LE
156
+ *
157
+ * Returns the title of the bookmark in UTF-16LE format.
158
+ * This means that the string cannot be directly compared to a ASCII string, and must be converted.
159
+ *
160
+ * bookmark.title.encode!("ASCII-8BIT")
161
+ *
162
+ */
163
+ static VALUE
164
+ bookmark_title(VALUE self)
165
+ {
166
+ return RB::to_string( RB2BM(self)->GetTitle() );
167
+ }
168
+
169
+ /*
170
+ * call-seq:
171
+ * destination -> Hash
172
+ *
173
+ * Returns the destination data of the bookmark.
174
+ * Only the destination type is tested.
175
+ * Bug reports and confirmation on the action type is appreciated.
176
+ */
177
+ static VALUE
178
+ bookmark_destination(VALUE self)
179
+ {
180
+ Bookmark *bm;
181
+ Data_Get_Struct(self, Bookmark, bm);
182
+ auto doc = bm->doc_wrapper->document;
183
+ VALUE hash=rb_hash_new();
184
+ CPDF_Dest dest = bm->bookmark->GetDest( doc );
185
+ if (dest){
186
+ rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("destination")));
187
+ rb_hash_aset(hash, ID2SYM( rb_intern("page_number") ), INT2NUM(dest.GetPageIndex(doc)));
188
+ } else {
189
+ CPDF_Action action = bm->bookmark->GetAction();
190
+ if (action){
191
+ rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("action")));
192
+ rb_hash_aset(hash, ID2SYM( rb_intern("action") ),
193
+ rb_str_new2( action.GetTypeName().c_str() ) );
194
+ rb_hash_aset(hash, ID2SYM( rb_intern("uri") ),
195
+ rb_str_new2( action.GetURI(doc).c_str() ) );
196
+ } else {
197
+ rb_hash_aset(hash, ID2SYM( rb_intern("type") ), ID2SYM(rb_intern("unknown")));
198
+ }
199
+ }
200
+ return hash;
201
+ }
202
+
203
+ VALUE
204
+ define_bookmark_class(){
205
+
206
+ #if RDOC_IS_STUPID_AND_CANNOT_PARSE_DOCUMENTATION
207
+ VALUE RB_PDFium = rb_define_module("PDFium");
208
+ #endif
209
+ VALUE RB_PDFium = RB::PDFium();
210
+
211
+ VALUE RB_Bookmark = rb_define_class_under(RB_PDFium, "Bookmark", rb_cObject);
212
+ rb_define_alloc_func(RB_Bookmark, bookmark_allocate);
213
+
214
+ rb_define_private_method (RB_Bookmark, "initialize", RUBY_METHOD_FUNC(bookmark_initialize), 1);
215
+ rb_define_method (RB_Bookmark, "title", RUBY_METHOD_FUNC(bookmark_title), 0);
216
+ rb_define_method (RB_Bookmark, "next_sibling", RUBY_METHOD_FUNC(bookmark_next_sibling),0);
217
+ rb_define_method (RB_Bookmark, "children", RUBY_METHOD_FUNC(bookmark_children), 0);
218
+ rb_define_method (RB_Bookmark, "destination", RUBY_METHOD_FUNC(bookmark_destination), 0);
219
+
220
+ return RB_Bookmark;
221
+ }
@@ -0,0 +1,27 @@
1
+ // Implementation of FPDF_FILEWRITE into a file.
2
+ class BufferFileWrite : public FPDF_FILEWRITE {
3
+ public:
4
+ BufferFileWrite( const std::string &file ) :
5
+ _file( file, std::ios::out | std::ios::binary )
6
+ {
7
+ version = 1;
8
+ WriteBlock = &WriteBlockImpl;
9
+ }
10
+ ~BufferFileWrite() {
11
+ _file.close();
12
+ }
13
+
14
+ private:
15
+ int DoWriteBlock(const void* data, unsigned long size){
16
+ _file.write(static_cast<const char*>(data), size);
17
+ return 1;
18
+ }
19
+ static int WriteBlockImpl(FPDF_FILEWRITE* this_file_write, const void* data,
20
+ unsigned long size){
21
+ BufferFileWrite* mem_buffer_file_write =
22
+ static_cast<BufferFileWrite*>(this_file_write);
23
+ return mem_buffer_file_write->DoWriteBlock(data, size);
24
+ }
25
+
26
+ std::ofstream _file;
27
+ };
@@ -0,0 +1,268 @@
1
+ #include "document.h"
2
+ #include "pdfium.h"
3
+ #include <cstring>
4
+ #include <iostream>
5
+ #include <stdlib.h>
6
+ #include <assert.h>
7
+ #include <stdio.h>
8
+ #include <fstream>
9
+ #include <map>
10
+ #include "buffer_file_write.hpp"
11
+
12
+ /////////////////////////////////////////////////////////////////////////
13
+ // The Document class //
14
+ /////////////////////////////////////////////////////////////////////////
15
+ /*
16
+ * Document-class: PDFium::Document
17
+ *
18
+ * A Document represents a PDF file.
19
+ *
20
+ */
21
+
22
+
23
+ // While you might think this would free the Document object it does not
24
+ // Instead it simply marks the Document as no longer in use, and then it
25
+ // will release itself when there are no Pages in use.
26
+ // https://redmine.ruby-lang.org/issues/6292
27
+ static void
28
+ document_gc_free(DocumentWrapper* doc)
29
+ {
30
+ DEBUG_MSG("GC Free Doc: " << doc);
31
+ // Note: we do not actually destroy the object yet.
32
+ // instead we mark it as unused and it will remove itself
33
+ // once all pages are finished
34
+ doc->markUnused();
35
+ }
36
+
37
+ static VALUE
38
+ document_allocate(VALUE klass)
39
+ {
40
+ DocumentWrapper *doc = new DocumentWrapper();
41
+ DEBUG_MSG("Alloc PDF: " << doc);
42
+ return Data_Wrap_Struct(klass, NULL, document_gc_free, doc );
43
+ }
44
+
45
+
46
+ /*
47
+ * call-seq:
48
+ * Document.new( path_to_pdf_file ) -> Document
49
+ * Document.new() -> An empty PDF Document with no pages
50
+ *
51
+ * Initializes a document either from a PDF file or creates a blank document
52
+ */
53
+ VALUE
54
+ document_initialize(int argc, VALUE *argv, VALUE self)
55
+ {
56
+ DocumentWrapper* d;
57
+ Data_Get_Struct(self, DocumentWrapper, d);
58
+ if (argc){
59
+ VALUE path = RB::to_s(argv[0]); // call to_s in case it's a Pathname
60
+ d->document = (CPDF_Document*)FPDF_LoadDocument(StringValuePtr(path), NULL);
61
+ } else {
62
+ d->document = (CPDF_Document*)FPDF_CreateNewDocument();
63
+ }
64
+ if (! d->document ){
65
+ rb_raise(rb_eArgError, "Unable to create document: %s", PDFiumLastErrorString());
66
+ }
67
+ return Qnil;
68
+ }
69
+
70
+
71
+ /*
72
+ * call-seq:
73
+ * Document.from_memory( pdf_data ) -> Document
74
+ *
75
+ * Initializes a document from a binary string.
76
+ *
77
+ * See Image#data for an example of reading a PDF directly from Amazon S3
78
+ * and writing it's images completely in memory.
79
+ */
80
+ static VALUE
81
+ document_from_memory(VALUE klass, VALUE data){
82
+ DocumentWrapper *doc = new DocumentWrapper();
83
+ VALUE instance = Data_Wrap_Struct(klass, NULL, document_gc_free, doc );
84
+ doc->document = (CPDF_Document*)FPDF_LoadMemDocument(RSTRING_PTR(data), RSTRING_LEN(data),NULL);
85
+ return instance;
86
+ }
87
+
88
+ /*
89
+ * call-seq:
90
+ * page_count -> Fixnum
91
+ *
92
+ * Returns the number of pages on a Document
93
+ */
94
+ static VALUE
95
+ document_page_count(VALUE self)
96
+ {
97
+ return INT2NUM( RB2DOC(self)->GetPageCount() );
98
+ }
99
+
100
+ // Not documented in favor of the Document#pages[] access
101
+ /* :nodoc: */
102
+ static VALUE
103
+ document_page_at(VALUE self, VALUE rb_page_index)
104
+ {
105
+ return rb_funcall(RB::Page(), rb_intern("open"), 2, self, rb_page_index);
106
+ }
107
+
108
+ /*
109
+ * call-seq:
110
+ * pages -> PDFium::PageList
111
+ *
112
+ * Returns a collection of all the pages on the document as a PDFium::PageList. Pages
113
+ * are lazily loaded.
114
+ *
115
+ */
116
+ static VALUE
117
+ document_pages(VALUE self)
118
+ {
119
+ VALUE args[1];
120
+ args[0] = self;
121
+ return rb_class_new_instance( 1, args, RB::PageList() );
122
+ }
123
+
124
+ // creates and yields a page. Not documented since all access
125
+ // should got through the Pageist interface via the Document#pages method
126
+ /* :nodoc: */
127
+ static VALUE
128
+ document_each_page(VALUE self)
129
+ {
130
+ auto doc = RB2DOC(self);
131
+ auto count = doc->GetPageCount();
132
+ for (int pg=0; pg < count; pg++){
133
+ VALUE page = document_page_at(self, INT2FIX(pg));
134
+ rb_yield(page);
135
+ PageWrapper *pw;
136
+ Data_Get_Struct(page, PageWrapper, pw);
137
+ pw->unload();
138
+ }
139
+ return self;
140
+ }
141
+
142
+
143
+
144
+ /*
145
+ * call-seq:
146
+ * bookmarks -> Bookmarks
147
+ *
148
+ * Retrieves the first Bookmark for a document
149
+ */
150
+ static VALUE
151
+ document_bookmarks(VALUE self)
152
+ {
153
+ VALUE args[1];
154
+ args[0] = rb_hash_new();
155
+ rb_hash_aset(args[0], ID2SYM(rb_intern("document")), self);
156
+ VALUE bm = rb_class_new_instance( 1, args, RB::Bookmark() );
157
+ args[0] = bm;
158
+ return rb_class_new_instance( 1, args, RB::BookmarkList() );
159
+ }
160
+
161
+
162
+
163
+
164
+ /*
165
+ * call-seq:
166
+ * save -> Document
167
+ *
168
+ * Saves document to a PDF file. This method isn't terribly useful since there aren't
169
+ * (yet) methods to add content to pages.
170
+ */
171
+ static VALUE
172
+ document_save(VALUE self, VALUE _path)
173
+ {
174
+ auto doc = RB2DOC(self);
175
+ VALUE path = RB::to_s(_path); // call to_s in case it's a Pathname
176
+ BufferFileWrite output_file_write(StringValuePtr(path));
177
+ FPDF_SaveAsCopy(doc, &output_file_write, FPDF_REMOVE_SECURITY);
178
+ return self;
179
+ }
180
+
181
+
182
+ /*
183
+ call-seq:
184
+ metadata -> Hash
185
+
186
+ Retrieves and optionally sets the metadata on a document. Returns a hash with the following keys:
187
+
188
+ :title, :author :subject, :keywords, :creator, :producer, :creation_date, :mod_date
189
+
190
+ An empty Hash will be returned if the metadata cannot be read
191
+
192
+ All values in the hash are encoded as UTF-16LE strings.
193
+
194
+ If caled with a block, the values will be passed to it and updates written back to the Document
195
+
196
+ === Example
197
+ pdf = PDFium::Document.new( "test.pdf" )
198
+ pdf.metadata do | md |
199
+ md[:title] = "My Awesome PDF"
200
+ md[:author] = "Nathan Stitt"
201
+ end
202
+ pdf.metadata[:author] # => "Nathan Stitt"
203
+
204
+ */
205
+ VALUE
206
+ document_metadata(int argc, VALUE *argv, VALUE self)
207
+ {
208
+ auto doc = RB2DOC(self);
209
+ VALUE metadata = rb_hash_new();
210
+ CPDF_Dictionary* info = doc->GetInfo();
211
+ if (!info)
212
+ return metadata;
213
+
214
+ VALUE block;
215
+ rb_scan_args(argc, argv, "0&", &block);
216
+
217
+ std::map<std::string, std::string> keys = {
218
+ { "Title", "title" },
219
+ { "Author", "author" },
220
+ { "Subject", "subject" },
221
+ { "Keywords", "keywords"},
222
+ { "Creator", "creator" },
223
+ { "Producer", "producer"},
224
+ { "CreationDate", "creation_date" },
225
+ { "ModDate", "mod_date" }
226
+ };
227
+
228
+ for (auto& kv : keys) {
229
+ rb_hash_aset(metadata,
230
+ ID2SYM( rb_intern( kv.second.c_str() ) ),
231
+ RB::to_string( info->GetUnicodeText( kv.first.c_str() ) )
232
+ );
233
+ }
234
+
235
+ if (RTEST(block)){
236
+ rb_yield( metadata );
237
+ for (auto& kv : keys) {
238
+ VALUE value = RB::get_option(metadata, kv.second);
239
+ auto bs = CFX_ByteString( RSTRING_PTR(value), RSTRING_LEN(value) );
240
+ info->SetAtString(kv.first.c_str(), bs);
241
+ }
242
+ }
243
+
244
+ return metadata;
245
+ }
246
+
247
+ VALUE
248
+ define_document_class()
249
+ {
250
+ VALUE RB_PDFium = RB::PDFium();
251
+
252
+ // The Document class definition and methods
253
+ VALUE RB_Document = rb_define_class_under(RB_PDFium, "Document", rb_cObject);
254
+
255
+ rb_define_alloc_func(RB_Document, document_allocate);
256
+
257
+ rb_define_singleton_method(RB_Document, "from_memory", RUBY_METHOD_FUNC(document_from_memory), 1);
258
+
259
+ rb_define_private_method (RB_Document, "initialize", RUBY_METHOD_FUNC(document_initialize), -1);
260
+ rb_define_method (RB_Document, "page_count", RUBY_METHOD_FUNC(document_page_count), 0);
261
+ rb_define_method (RB_Document, "page_at", RUBY_METHOD_FUNC(document_page_at), 1);
262
+ rb_define_method (RB_Document, "each_page", RUBY_METHOD_FUNC(document_each_page), 0);
263
+ rb_define_method (RB_Document, "pages", RUBY_METHOD_FUNC(document_pages), 0);
264
+ rb_define_method (RB_Document, "metadata", RUBY_METHOD_FUNC(document_metadata), -1);
265
+ rb_define_method (RB_Document, "bookmarks", RUBY_METHOD_FUNC(document_bookmarks), 0);
266
+ rb_define_method (RB_Document, "save", RUBY_METHOD_FUNC(document_save), 1);
267
+ return RB_Document;
268
+ }