pdfium 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +9 -0
  5. data/Guardfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +68 -0
  8. data/Rakefile +62 -0
  9. data/ext/pdfium_ext/bookmark.cc +221 -0
  10. data/ext/pdfium_ext/buffer_file_write.hpp +27 -0
  11. data/ext/pdfium_ext/document.cc +268 -0
  12. data/ext/pdfium_ext/document.h +66 -0
  13. data/ext/pdfium_ext/document_wrapper.cc +63 -0
  14. data/ext/pdfium_ext/document_wrapper.h +56 -0
  15. data/ext/pdfium_ext/extconf.h +3 -0
  16. data/ext/pdfium_ext/extconf.rb +76 -0
  17. data/ext/pdfium_ext/image.cc +332 -0
  18. data/ext/pdfium_ext/page.cc +392 -0
  19. data/ext/pdfium_ext/page.h +5 -0
  20. data/ext/pdfium_ext/page_object_wrapper.cc +38 -0
  21. data/ext/pdfium_ext/page_object_wrapper.h +27 -0
  22. data/ext/pdfium_ext/page_wrapper.cc +86 -0
  23. data/ext/pdfium_ext/page_wrapper.h +37 -0
  24. data/ext/pdfium_ext/pdfium.cc +115 -0
  25. data/ext/pdfium_ext/pdfium.h +69 -0
  26. data/lib/pdfium.rb +15 -0
  27. data/lib/pdfium/bookmark_list.rb +28 -0
  28. data/lib/pdfium/bounding_box.rb +16 -0
  29. data/lib/pdfium/image_list.rb +21 -0
  30. data/lib/pdfium/page_list.rb +36 -0
  31. data/lib/pdfium/page_sizes.rb +7 -0
  32. data/lib/pdfium/version.rb +4 -0
  33. data/pdfium.gemspec +29 -0
  34. data/test/benchmark-docsplit.rb +41 -0
  35. data/test/bookmarks_list_spec.rb +26 -0
  36. data/test/bookmarks_spec.rb +34 -0
  37. data/test/debug.rb +24 -0
  38. data/test/document_spec.rb +49 -0
  39. data/test/image_list_spec.rb +18 -0
  40. data/test/image_spec.rb +53 -0
  41. data/test/page_list_spec.rb +24 -0
  42. data/test/page_spec.rb +91 -0
  43. data/test/pdfium_spec.rb +15 -0
  44. data/test/profile.rb +29 -0
  45. data/test/spec_helper.rb +31 -0
  46. metadata +202 -0
@@ -0,0 +1,86 @@
1
+ #include "page_wrapper.h"
2
+ #include "pdfium.h"
3
+
4
+ CPDF_Page*
5
+ RB2PG(VALUE self){
6
+ PageWrapper *page;
7
+ Data_Get_Struct(self, PageWrapper, page);
8
+ return page->page();
9
+ }
10
+
11
+ PageWrapper::PageWrapper(DocumentWrapper* doc, int page) :
12
+ document_wrapper(doc), _page_number(page), _in_use(true), _page(NULL)
13
+ {
14
+ this->document_wrapper->retain(this);
15
+ }
16
+
17
+ void
18
+ PageWrapper::unload(){
19
+ FPDF_ClosePage(_page);
20
+ _page = NULL;
21
+ }
22
+
23
+
24
+ // Mark the page object as no longer in use. At this
25
+ // point it may be freed once all children are also not
26
+ // in use
27
+ void
28
+ PageWrapper::markUnused(){
29
+ _in_use = false;
30
+ this->unload();
31
+ this->maybeKillSelf();
32
+ }
33
+
34
+ void
35
+ PageWrapper::setPage(CPDF_Page *page){
36
+ if (_page){
37
+ this->unload();
38
+ }
39
+ // unload won't work if the page has children
40
+ if (!_page){
41
+ this->_page=page;
42
+ }
43
+ }
44
+
45
+ void
46
+ PageWrapper::retain(void *obj){
47
+ _children.insert(obj);
48
+ }
49
+
50
+ CPDF_Page *
51
+ PageWrapper::page(){
52
+ if (!_page){
53
+ _page = static_cast<CPDF_Page *>(FPDF_LoadPage(document_wrapper->document, _page_number));
54
+ }
55
+ return _page;
56
+ }
57
+
58
+ void
59
+ PageWrapper::release(void *obj){
60
+ _children.erase(obj);
61
+ this->maybeKillSelf();
62
+
63
+ }
64
+
65
+ // Test if the Document is not in use and there are no pages
66
+ // that are still retained
67
+ void
68
+ PageWrapper::maybeKillSelf(){
69
+ bool killable = _children.empty() && !_in_use;
70
+ DEBUG_MSG("Testing if killing Page: " << this << " " << killable );
71
+ if (killable){
72
+ delete this;
73
+ }
74
+ }
75
+
76
+ // void
77
+ // PageWrapper::wrap(CPDF_Page *pg, DocumentWrapper *doc_wrapper){
78
+ // this->page = pg;
79
+ // this->doc = doc_wrapper;
80
+ // this->doc->retain(this);
81
+ // }
82
+
83
+
84
+ PageWrapper::~PageWrapper(){
85
+ this->unload();
86
+ }
@@ -0,0 +1,37 @@
1
+ #ifndef __PAGE_WRAPPER_H__
2
+ #define __PAGE_WRAPPER_H__
3
+
4
+ #include "pdfium.h"
5
+ #include <unordered_set>
6
+
7
+ class DocumentWrapper;
8
+
9
+ class PageWrapper {
10
+
11
+ public:
12
+
13
+ PageWrapper(DocumentWrapper* doc, int page);
14
+ void wrap(CPDF_Page *page, DocumentWrapper *doc_wrapper);
15
+ ~PageWrapper();
16
+
17
+ void markUnused();
18
+
19
+ void retain(void *obj);
20
+ void release(void *obj);
21
+
22
+ CPDF_Page *page();
23
+ void unload();
24
+ void setPage(CPDF_Page *pg);
25
+
26
+ DocumentWrapper *document_wrapper;
27
+ int _page_number;
28
+ private:
29
+
30
+ bool _in_use;
31
+ CPDF_Page *_page;
32
+ std::unordered_set<void*> _children;
33
+ void maybeKillSelf();
34
+ };
35
+
36
+
37
+ #endif // __PAGE_WRAPPER_H__
@@ -0,0 +1,115 @@
1
+ #include <iostream>
2
+ #include <string>
3
+
4
+ #include "pdfium.h"
5
+ extern "C" {
6
+ #include "ruby/encoding.h"
7
+ }
8
+
9
+ #include <assert.h>
10
+ #include <list>
11
+ #include <string>
12
+ #include <utility>
13
+ #include <vector>
14
+
15
+ #include "document.h"
16
+ #include "page.h"
17
+
18
+ // // file local variables that are set in Init_pdfium_ext function
19
+ // // and are referenced elsewhere in file
20
+ // static VALUE rb_page; // Ruby definition of the Page class
21
+
22
+ const char*
23
+ PDFiumLastErrorString() {
24
+ switch(FPDF_GetLastError()){
25
+ case 0:
26
+ return "No Error";
27
+ case 1:
28
+ return "unknown error";
29
+ case 2:
30
+ return "file not found or could not be opened";
31
+ case 3:
32
+ return "file not in PDF format or corrupted";
33
+ case 4:
34
+ return "password required or incorrect password";
35
+ case 5:
36
+ return "unsupported security scheme";
37
+ case 6:
38
+ return "page not found or content error";
39
+ default:
40
+ return "error code unknown";
41
+ }
42
+ }
43
+
44
+
45
+ VALUE _get(const char *name){
46
+ return rb_const_get(RB::PDFium(), rb_intern(name));
47
+ }
48
+ VALUE RB::PDFium(){
49
+ static VALUE val = rb_const_get(rb_cObject, rb_intern("PDFium"));
50
+ return val;
51
+ }
52
+ VALUE RB::Page(){
53
+ static VALUE val = _get("Page");
54
+ return val;
55
+ }
56
+ VALUE RB::Image(){
57
+ static VALUE val = _get("Image");
58
+ return val;
59
+ }
60
+ VALUE RB::BoundingBox(){
61
+ static VALUE val = _get("BoundingBox");
62
+ return val;
63
+ }
64
+ VALUE RB::Bookmark(){
65
+ static VALUE val = _get("Bookmark");
66
+ return val;
67
+ }
68
+ VALUE RB::Document(){
69
+ static VALUE val = _get("Document");
70
+ return val;
71
+ }
72
+ VALUE RB::BookmarkList(){
73
+ static VALUE val = _get("BookmarkList");
74
+ return val;
75
+ }
76
+ VALUE RB::PageList(){
77
+ static VALUE val = _get("PageList");
78
+ return val;
79
+ }
80
+ VALUE RB::ImageList(){
81
+ static VALUE val = _get("ImageList");
82
+ return val;
83
+ }
84
+
85
+ VALUE
86
+ RB::to_string(const CFX_WideString &wstr){
87
+ static rb_encoding *enc = rb_enc_find("UTF-16LE");
88
+ return rb_enc_str_new(wstr.UTF16LE_Encode().c_str(),(wstr.GetLength()*2), enc);
89
+ }
90
+
91
+ ID RB::to_s(VALUE obj){
92
+ static ID id = rb_intern("to_s");
93
+ return rb_funcall(obj, id, 0);
94
+ }
95
+
96
+ VALUE RB::type(VALUE obj){
97
+ static ID id = rb_intern("class");
98
+ return rb_funcall(obj, id, 0);
99
+ }
100
+
101
+ VALUE RB::get_option(VALUE options, const std::string &key){
102
+ return rb_hash_aref(options, ID2SYM(rb_intern(key.c_str())));
103
+ }
104
+
105
+ extern "C" void
106
+ Init_pdfium_ext()
107
+ {
108
+ // Initialize the PDFium library
109
+ FPDF_InitLibrary();
110
+
111
+ define_document_class();
112
+ define_page_class();
113
+ define_bookmark_class();
114
+ define_image_class();
115
+ }
@@ -0,0 +1,69 @@
1
+ #ifndef __PDFIUM_H__
2
+ #define __PDFIUM_H__
3
+
4
+ #include <stdlib.h>
5
+ #include <inttypes.h>
6
+ #include <fpdf_dataavail.h>
7
+ #include <fpdf_ext.h>
8
+ #include <fpdfformfill.h>
9
+ #include <fpdftext.h>
10
+ #include <fpdfview.h>
11
+ #include <fpdfedit.h>
12
+ #include <fpdfsave.h>
13
+ #include <fpdfdoc.h>
14
+ #include <iostream>
15
+ #include <fpdfdoc/fpdf_doc.h>
16
+
17
+ #include <fpdfapi/fpdf_render.h>
18
+ #include <fpdfapi/fpdf_pageobj.h>
19
+ #include <fpdfsdk/include/fsdk_rendercontext.h>
20
+ #include <FreeImage.h>
21
+
22
+ #include "page_wrapper.h"
23
+ #include "page_object_wrapper.h"
24
+ #include "document_wrapper.h"
25
+
26
+ // auto generated by mkmf
27
+ #include "extconf.h"
28
+ extern "C" {
29
+ #include "ruby.h"
30
+ }
31
+
32
+ #ifdef DEBUG
33
+ #define DEBUG_MSG(str) do { std::cout << str << std::endl; } while( false )
34
+ #else
35
+ #define DEBUG_MSG(str) do { } while ( false )
36
+ #endif
37
+
38
+ VALUE define_bookmark_class();
39
+ VALUE define_document_class();
40
+ VALUE define_page_class();
41
+ VALUE define_image_class();
42
+
43
+ // a utility method to extract the reference to the FPDF_DOCUMENT from the Ruby/C++ wrapping
44
+
45
+ CPDF_Page* RB2PG(VALUE RB_Page);
46
+ CPDF_Document* RB2DOC(VALUE RB_DocumentWrapper);
47
+ CPDF_ImageObject* RB2IMG(VALUE RB_Image);
48
+
49
+ const char* PDFiumLastErrorString();
50
+
51
+ class RB {
52
+ public:
53
+ static VALUE PDFium();
54
+ static VALUE Bookmark();
55
+ static VALUE Document();
56
+ static VALUE Page();
57
+ static VALUE BookmarkList();
58
+ static VALUE BoundingBox();
59
+ static VALUE StringIO();
60
+ static VALUE Image();
61
+ static VALUE PageList();
62
+ static VALUE ImageList();
63
+ static VALUE type(VALUE object);
64
+ static VALUE to_string(const CFX_WideString &widestring);
65
+ static ID to_s(VALUE object);
66
+ static VALUE get_option(VALUE hash, const std::string &key);
67
+ };
68
+
69
+ #endif // __PDFIUM_H__
@@ -0,0 +1,15 @@
1
+ require 'stringio'
2
+ require_relative "pdfium/version"
3
+ require_relative "pdfium/bookmark_list"
4
+ require_relative "pdfium/page_list"
5
+ require_relative "pdfium/page_sizes"
6
+ require_relative "pdfium/bounding_box"
7
+ require_relative "pdfium/image_list"
8
+
9
+
10
+ # PDFium is an Ruby interface to the Google PDFium PDF renderer.
11
+ module PDFium
12
+
13
+ end
14
+
15
+ require_relative "pdfium_ext"
@@ -0,0 +1,28 @@
1
+ module PDFium
2
+
3
+ # A list of bookmarks for a Document or a Bookmark
4
+ class BookmarkList
5
+
6
+ include Enumerable
7
+ # Not used directly, but called from Document#bookmarks
8
+ def initialize(initial)
9
+ @first=initial
10
+ end
11
+
12
+ # Calls block once for each bookmark that exists at the current level
13
+ # Since bookmarks form a tree, each bookmark may have one or more children
14
+ def each
15
+ bookmark = @first
16
+ while bookmark
17
+ yield bookmark
18
+ bookmark = bookmark.next_sibling
19
+ end
20
+ end
21
+
22
+ # True if no bookmarks exist, false if at least one is present
23
+ def empty?
24
+ @first.nil?
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,16 @@
1
+ module PDFium
2
+
3
+ # The size of an object. Used with both Page and Image
4
+ class BoundingBox
5
+
6
+ # dimensions for the BoundingBox. Fixnum given in terms of points
7
+ attr_reader :left, :right, :top, :bottom
8
+
9
+ # Left, Right, Top, Bottom
10
+ def initialize(l,r,t,b)
11
+ @left=l; @right=r; @top=t; @bottom=b
12
+ end
13
+
14
+ end
15
+
16
+ end
@@ -0,0 +1,21 @@
1
+ module PDFium
2
+
3
+ # A list of Image instances for a given Page. Is returned by Page#images
4
+ # Images are lazily loaded upon request.
5
+ class ImageList
6
+
7
+ include Enumerable
8
+
9
+ # Load list for a given page. Not normally called directly, but from Page#images
10
+ def initialize(page)
11
+ @page=page
12
+ end
13
+
14
+ # Calls block once for each object on the document
15
+ def each(&block)
16
+ @page.each_image(&block)
17
+ end
18
+
19
+ end
20
+
21
+ end
@@ -0,0 +1,36 @@
1
+ module PDFium
2
+
3
+ # A list of Page objects associated with a Document.
4
+ class PageList
5
+
6
+ include Enumerable
7
+
8
+ # Create a new listing from the given document.
9
+ # Not normally called directly, is called internally by Document#pages
10
+ def initialize(document)
11
+ @document=document
12
+ end
13
+
14
+ # Calls block once for each page on the document, yielding the current page
15
+ # After the page is yielded, Page#unload will be automatically called.
16
+ #
17
+ # _note_ Subsequent calls to this function will return different Page instances.
18
+ def each(&block)
19
+ @document.each_page(&block)
20
+ end
21
+
22
+ # Returns the number of pages on the document
23
+ def count
24
+ @document.page_count
25
+ end
26
+
27
+ # Returns a Page instance for the given number.
28
+ # If the given page_number is not valid, an ArgumentError will be raised.
29
+ #
30
+ # _note_ Subsequent calls to this function will return different Page instances.
31
+ def [](index)
32
+ @document.page_at(index)
33
+ end
34
+ end
35
+
36
+ end
@@ -0,0 +1,7 @@
1
+ module PDFium
2
+ # Standard A4 paper size
3
+ A4 = [ 595, 842 ]
4
+ # Standard US Letter size
5
+ LETTER = [612,792]
6
+
7
+ end
@@ -0,0 +1,4 @@
1
+ module PDFium
2
+ # Gem version
3
+ VERSION = "0.0.1"
4
+ end