pdfium 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +9 -0
  5. data/Guardfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +68 -0
  8. data/Rakefile +62 -0
  9. data/ext/pdfium_ext/bookmark.cc +221 -0
  10. data/ext/pdfium_ext/buffer_file_write.hpp +27 -0
  11. data/ext/pdfium_ext/document.cc +268 -0
  12. data/ext/pdfium_ext/document.h +66 -0
  13. data/ext/pdfium_ext/document_wrapper.cc +63 -0
  14. data/ext/pdfium_ext/document_wrapper.h +56 -0
  15. data/ext/pdfium_ext/extconf.h +3 -0
  16. data/ext/pdfium_ext/extconf.rb +76 -0
  17. data/ext/pdfium_ext/image.cc +332 -0
  18. data/ext/pdfium_ext/page.cc +392 -0
  19. data/ext/pdfium_ext/page.h +5 -0
  20. data/ext/pdfium_ext/page_object_wrapper.cc +38 -0
  21. data/ext/pdfium_ext/page_object_wrapper.h +27 -0
  22. data/ext/pdfium_ext/page_wrapper.cc +86 -0
  23. data/ext/pdfium_ext/page_wrapper.h +37 -0
  24. data/ext/pdfium_ext/pdfium.cc +115 -0
  25. data/ext/pdfium_ext/pdfium.h +69 -0
  26. data/lib/pdfium.rb +15 -0
  27. data/lib/pdfium/bookmark_list.rb +28 -0
  28. data/lib/pdfium/bounding_box.rb +16 -0
  29. data/lib/pdfium/image_list.rb +21 -0
  30. data/lib/pdfium/page_list.rb +36 -0
  31. data/lib/pdfium/page_sizes.rb +7 -0
  32. data/lib/pdfium/version.rb +4 -0
  33. data/pdfium.gemspec +29 -0
  34. data/test/benchmark-docsplit.rb +41 -0
  35. data/test/bookmarks_list_spec.rb +26 -0
  36. data/test/bookmarks_spec.rb +34 -0
  37. data/test/debug.rb +24 -0
  38. data/test/document_spec.rb +49 -0
  39. data/test/image_list_spec.rb +18 -0
  40. data/test/image_spec.rb +53 -0
  41. data/test/page_list_spec.rb +24 -0
  42. data/test/page_spec.rb +91 -0
  43. data/test/pdfium_spec.rb +15 -0
  44. data/test/profile.rb +29 -0
  45. data/test/spec_helper.rb +31 -0
  46. metadata +202 -0
@@ -0,0 +1,86 @@
1
+ #include "page_wrapper.h"
2
+ #include "pdfium.h"
3
+
4
+ CPDF_Page*
5
+ RB2PG(VALUE self){
6
+ PageWrapper *page;
7
+ Data_Get_Struct(self, PageWrapper, page);
8
+ return page->page();
9
+ }
10
+
11
+ PageWrapper::PageWrapper(DocumentWrapper* doc, int page) :
12
+ document_wrapper(doc), _page_number(page), _in_use(true), _page(NULL)
13
+ {
14
+ this->document_wrapper->retain(this);
15
+ }
16
+
17
+ void
18
+ PageWrapper::unload(){
19
+ FPDF_ClosePage(_page);
20
+ _page = NULL;
21
+ }
22
+
23
+
24
+ // Mark the page object as no longer in use. At this
25
+ // point it may be freed once all children are also not
26
+ // in use
27
+ void
28
+ PageWrapper::markUnused(){
29
+ _in_use = false;
30
+ this->unload();
31
+ this->maybeKillSelf();
32
+ }
33
+
34
+ void
35
+ PageWrapper::setPage(CPDF_Page *page){
36
+ if (_page){
37
+ this->unload();
38
+ }
39
+ // unload won't work if the page has children
40
+ if (!_page){
41
+ this->_page=page;
42
+ }
43
+ }
44
+
45
+ void
46
+ PageWrapper::retain(void *obj){
47
+ _children.insert(obj);
48
+ }
49
+
50
+ CPDF_Page *
51
+ PageWrapper::page(){
52
+ if (!_page){
53
+ _page = static_cast<CPDF_Page *>(FPDF_LoadPage(document_wrapper->document, _page_number));
54
+ }
55
+ return _page;
56
+ }
57
+
58
+ void
59
+ PageWrapper::release(void *obj){
60
+ _children.erase(obj);
61
+ this->maybeKillSelf();
62
+
63
+ }
64
+
65
+ // Test if the Document is not in use and there are no pages
66
+ // that are still retained
67
+ void
68
+ PageWrapper::maybeKillSelf(){
69
+ bool killable = _children.empty() && !_in_use;
70
+ DEBUG_MSG("Testing if killing Page: " << this << " " << killable );
71
+ if (killable){
72
+ delete this;
73
+ }
74
+ }
75
+
76
+ // void
77
+ // PageWrapper::wrap(CPDF_Page *pg, DocumentWrapper *doc_wrapper){
78
+ // this->page = pg;
79
+ // this->doc = doc_wrapper;
80
+ // this->doc->retain(this);
81
+ // }
82
+
83
+
84
+ PageWrapper::~PageWrapper(){
85
+ this->unload();
86
+ }
@@ -0,0 +1,37 @@
1
+ #ifndef __PAGE_WRAPPER_H__
2
+ #define __PAGE_WRAPPER_H__
3
+
4
+ #include "pdfium.h"
5
+ #include <unordered_set>
6
+
7
+ class DocumentWrapper;
8
+
9
+ class PageWrapper {
10
+
11
+ public:
12
+
13
+ PageWrapper(DocumentWrapper* doc, int page);
14
+ void wrap(CPDF_Page *page, DocumentWrapper *doc_wrapper);
15
+ ~PageWrapper();
16
+
17
+ void markUnused();
18
+
19
+ void retain(void *obj);
20
+ void release(void *obj);
21
+
22
+ CPDF_Page *page();
23
+ void unload();
24
+ void setPage(CPDF_Page *pg);
25
+
26
+ DocumentWrapper *document_wrapper;
27
+ int _page_number;
28
+ private:
29
+
30
+ bool _in_use;
31
+ CPDF_Page *_page;
32
+ std::unordered_set<void*> _children;
33
+ void maybeKillSelf();
34
+ };
35
+
36
+
37
+ #endif // __PAGE_WRAPPER_H__
@@ -0,0 +1,115 @@
1
+ #include <iostream>
2
+ #include <string>
3
+
4
+ #include "pdfium.h"
5
+ extern "C" {
6
+ #include "ruby/encoding.h"
7
+ }
8
+
9
+ #include <assert.h>
10
+ #include <list>
11
+ #include <string>
12
+ #include <utility>
13
+ #include <vector>
14
+
15
+ #include "document.h"
16
+ #include "page.h"
17
+
18
+ // // file local variables that are set in Init_pdfium_ext function
19
+ // // and are referenced elsewhere in file
20
+ // static VALUE rb_page; // Ruby definition of the Page class
21
+
22
+ const char*
23
+ PDFiumLastErrorString() {
24
+ switch(FPDF_GetLastError()){
25
+ case 0:
26
+ return "No Error";
27
+ case 1:
28
+ return "unknown error";
29
+ case 2:
30
+ return "file not found or could not be opened";
31
+ case 3:
32
+ return "file not in PDF format or corrupted";
33
+ case 4:
34
+ return "password required or incorrect password";
35
+ case 5:
36
+ return "unsupported security scheme";
37
+ case 6:
38
+ return "page not found or content error";
39
+ default:
40
+ return "error code unknown";
41
+ }
42
+ }
43
+
44
+
45
+ VALUE _get(const char *name){
46
+ return rb_const_get(RB::PDFium(), rb_intern(name));
47
+ }
48
+ VALUE RB::PDFium(){
49
+ static VALUE val = rb_const_get(rb_cObject, rb_intern("PDFium"));
50
+ return val;
51
+ }
52
+ VALUE RB::Page(){
53
+ static VALUE val = _get("Page");
54
+ return val;
55
+ }
56
+ VALUE RB::Image(){
57
+ static VALUE val = _get("Image");
58
+ return val;
59
+ }
60
+ VALUE RB::BoundingBox(){
61
+ static VALUE val = _get("BoundingBox");
62
+ return val;
63
+ }
64
+ VALUE RB::Bookmark(){
65
+ static VALUE val = _get("Bookmark");
66
+ return val;
67
+ }
68
+ VALUE RB::Document(){
69
+ static VALUE val = _get("Document");
70
+ return val;
71
+ }
72
+ VALUE RB::BookmarkList(){
73
+ static VALUE val = _get("BookmarkList");
74
+ return val;
75
+ }
76
+ VALUE RB::PageList(){
77
+ static VALUE val = _get("PageList");
78
+ return val;
79
+ }
80
+ VALUE RB::ImageList(){
81
+ static VALUE val = _get("ImageList");
82
+ return val;
83
+ }
84
+
85
+ VALUE
86
+ RB::to_string(const CFX_WideString &wstr){
87
+ static rb_encoding *enc = rb_enc_find("UTF-16LE");
88
+ return rb_enc_str_new(wstr.UTF16LE_Encode().c_str(),(wstr.GetLength()*2), enc);
89
+ }
90
+
91
+ ID RB::to_s(VALUE obj){
92
+ static ID id = rb_intern("to_s");
93
+ return rb_funcall(obj, id, 0);
94
+ }
95
+
96
+ VALUE RB::type(VALUE obj){
97
+ static ID id = rb_intern("class");
98
+ return rb_funcall(obj, id, 0);
99
+ }
100
+
101
+ VALUE RB::get_option(VALUE options, const std::string &key){
102
+ return rb_hash_aref(options, ID2SYM(rb_intern(key.c_str())));
103
+ }
104
+
105
+ extern "C" void
106
+ Init_pdfium_ext()
107
+ {
108
+ // Initialize the PDFium library
109
+ FPDF_InitLibrary();
110
+
111
+ define_document_class();
112
+ define_page_class();
113
+ define_bookmark_class();
114
+ define_image_class();
115
+ }
@@ -0,0 +1,69 @@
1
+ #ifndef __PDFIUM_H__
2
+ #define __PDFIUM_H__
3
+
4
+ #include <stdlib.h>
5
+ #include <inttypes.h>
6
+ #include <fpdf_dataavail.h>
7
+ #include <fpdf_ext.h>
8
+ #include <fpdfformfill.h>
9
+ #include <fpdftext.h>
10
+ #include <fpdfview.h>
11
+ #include <fpdfedit.h>
12
+ #include <fpdfsave.h>
13
+ #include <fpdfdoc.h>
14
+ #include <iostream>
15
+ #include <fpdfdoc/fpdf_doc.h>
16
+
17
+ #include <fpdfapi/fpdf_render.h>
18
+ #include <fpdfapi/fpdf_pageobj.h>
19
+ #include <fpdfsdk/include/fsdk_rendercontext.h>
20
+ #include <FreeImage.h>
21
+
22
+ #include "page_wrapper.h"
23
+ #include "page_object_wrapper.h"
24
+ #include "document_wrapper.h"
25
+
26
+ // auto generated by mkmf
27
+ #include "extconf.h"
28
+ extern "C" {
29
+ #include "ruby.h"
30
+ }
31
+
32
+ #ifdef DEBUG
33
+ #define DEBUG_MSG(str) do { std::cout << str << std::endl; } while( false )
34
+ #else
35
+ #define DEBUG_MSG(str) do { } while ( false )
36
+ #endif
37
+
38
+ VALUE define_bookmark_class();
39
+ VALUE define_document_class();
40
+ VALUE define_page_class();
41
+ VALUE define_image_class();
42
+
43
+ // a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
44
+
45
+ CPDF_Page* RB2PG(VALUE RB_Page);
46
+ CPDF_Document* RB2DOC(VALUE RB_DocumentWrapper);
47
+ CPDF_ImageObject* RB2IMG(VALUE RB_Image);
48
+
49
+ const char* PDFiumLastErrorString();
50
+
51
+ class RB {
52
+ public:
53
+ static VALUE PDFium();
54
+ static VALUE Bookmark();
55
+ static VALUE Document();
56
+ static VALUE Page();
57
+ static VALUE BookmarkList();
58
+ static VALUE BoundingBox();
59
+ static VALUE StringIO();
60
+ static VALUE Image();
61
+ static VALUE PageList();
62
+ static VALUE ImageList();
63
+ static VALUE type(VALUE object);
64
+ static VALUE to_string(const CFX_WideString &widestring);
65
+ static ID to_s(VALUE object);
66
+ static VALUE get_option(VALUE hash, const std::string &key);
67
+ };
68
+
69
+ #endif // __PDFIUM_H__
@@ -0,0 +1,15 @@
1
+ require 'stringio'
2
+ require_relative "pdfium/version"
3
+ require_relative "pdfium/bookmark_list"
4
+ require_relative "pdfium/page_list"
5
+ require_relative "pdfium/page_sizes"
6
+ require_relative "pdfium/bounding_box"
7
+ require_relative "pdfium/image_list"
8
+
9
+
10
+ # PDFium is an Ruby interface to the Google PDFium PDF renderer.
11
+ module PDFium
12
+
13
+ end
14
+
15
+ require_relative "pdfium_ext"
@@ -0,0 +1,28 @@
1
+ module PDFium
2
+
3
+ # A list of bookmarks for a Document or a Bookmark
4
+ class BookmarkList
5
+
6
+ include Enumerable
7
+ # Not used directly, but called from Document#bookmarks
8
+ def initialize(initial)
9
+ @first=initial
10
+ end
11
+
12
+ # Calls block once for each bookmark that exists at the current level
13
+ # Since bookmarks form a tree, each bookmark may have one or more children
14
+ def each
15
+ bookmark = @first
16
+ while bookmark
17
+ yield bookmark
18
+ bookmark = bookmark.next_sibling
19
+ end
20
+ end
21
+
22
+ # True if no bookmarks exist, false if at least one is present
23
+ def empty?
24
+ @first.nil?
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,16 @@
1
+ module PDFium
2
+
3
+ # The size of an object. Used with both Page and Image
4
+ class BoundingBox
5
+
6
+ # dimensions for the BoundingBox. Fixnum given in terms of points
7
+ attr_reader :left, :right, :top, :bottom
8
+
9
+ # Left, Right, Top, Bottom
10
+ def initialize(l,r,t,b)
11
+ @left=l; @right=r; @top=t; @bottom=b
12
+ end
13
+
14
+ end
15
+
16
+ end
@@ -0,0 +1,21 @@
1
+ module PDFium
2
+
3
+ # A list of Image instances for a given Page. Is returned by Page#images
4
+ # Images are lazily loaded upon request.
5
+ class ImageList
6
+
7
+ include Enumerable
8
+
9
+ # Load list for a given page. Not normally called directly, but from Page#images
10
+ def initialize(page)
11
+ @page=page
12
+ end
13
+
14
+ # Calls block once for each object on the document
15
+ def each(&block)
16
+ @page.each_image(&block)
17
+ end
18
+
19
+ end
20
+
21
+ end
@@ -0,0 +1,36 @@
1
+ module PDFium
2
+
3
+ # A list of Page objects associated with a Document.
4
+ class PageList
5
+
6
+ include Enumerable
7
+
8
+ # Create a new listing from the given document.
9
+ # Not normally called directly, is called internally by Document#pages
10
+ def initialize(document)
11
+ @document=document
12
+ end
13
+
14
+ # Calls block once for each page on the document, yielding the current page
15
+ # After the page is yielded, Page#unload will be automatically called.
16
+ #
17
+ # _note_ Subsequent calls to this function will return different Page instances.
18
+ def each(&block)
19
+ @document.each_page(&block)
20
+ end
21
+
22
+ # Returns the number of pages on the document
23
+ def count
24
+ @document.page_count
25
+ end
26
+
27
+ # Returns a Page instance for the given number.
28
+ # If the given page_number is not valid, an ArgumentError will be raised.
29
+ #
30
+ # _note_ Subsequent calls to this function will return different Page instances.
31
+ def [](index)
32
+ @document.page_at(index)
33
+ end
34
+ end
35
+
36
+ end
@@ -0,0 +1,7 @@
1
+ module PDFium
2
+ # Standard A4 paper size
3
+ A4 = [ 595, 842 ]
4
+ # Standard US Letter size
5
+ LETTER = [612,792]
6
+
7
+ end
@@ -0,0 +1,4 @@
1
+ module PDFium
2
+ # Gem version
3
+ VERSION = "0.0.1"
4
+ end