act_as_page_extractor 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,125 +0,0 @@
1
- require 'act_as_page_extractor/version'
2
-
3
- require 'active_record'
4
-
5
- require 'awesome_print'
6
- require 'filesize'
7
- require 'total_compressor'
8
- require 'docsplit'
9
- require 'pdf_utils'
10
- require 'prawn'
11
- require 'pdf-reader'
12
-
13
- require 'act_as_page_extractor/modules/tools.rb'
14
- require 'act_as_page_extractor/modules/validating.rb'
15
- require 'act_as_page_extractor/modules/unzipping.rb'
16
- require 'act_as_page_extractor/modules/extracting.rb'
17
- require 'act_as_page_extractor/modules/saving.rb'
18
-
19
- require 'act_as_page_extractor/modules/interface'
20
-
21
- module ActAsPageExtractor
22
-
23
- extend ActiveSupport::Concern
24
-
25
- included do
26
- before_create { self.page_extraction_state = EXTRACTING_STATES[:new] }
27
- before_destroy :remove_files
28
- end
29
-
30
- # attr_reader :options
31
-
32
- module ClassMethods
33
- def act_as_page_extractor(options: {})
34
- define_method(:save_as_pdf){|*args| options[:save_as_pdf] }
35
- define_method(:extracted_filename){|*args| self.send(options[:filename].to_sym) }
36
- ActAsPageExtractor.define_singleton_method(:extracted_filename) {|*args| options[:filename] }
37
- ActAsPageExtractor.define_singleton_method(:document_class) {|*args| options[:document_class].constantize }
38
- define_method(:extracted_document_id){|*args| options[:document_id] }
39
- define_method(:additional_fields){|*args| options[:additional_fields] }
40
- end
41
- end
42
-
43
- EXTRACTING_STATES = {
44
- new: 'new',
45
- extracting: 'extracting',
46
- extracted: 'extracted',
47
- 'error.extraction': 'error.extraction'
48
- }.freeze
49
-
50
- TMP_EXTRACTION_FILE_STORAGE = "#{Dir.pwd}/tmp/page_extraction".freeze
51
- FILE_STORAGE = "#{Dir.pwd}/public".freeze
52
- PDF_STORAGE = "#{FILE_STORAGE}/uploads/extracted/pdf".freeze
53
-
54
- def initialized
55
- # add all need callbacks
56
- #on destroy remove pdf
57
-
58
- #Add to Readme!!
59
- #rails g act_as_page_extractor:migration Document category_id user_id
60
- # add to [Document] model:
61
- # has_many :extracted_pages, dependent: :destroy
62
- create_pdf_dir
63
- end
64
-
65
- def page_extract!
66
- initialized
67
- cleanup_pages
68
- create_tmp_dir
69
- begin
70
- copy_document
71
- # debug_info
72
- unzip_document
73
- if valid_document
74
- extract_pages
75
- save_to_db
76
- end
77
- ensure
78
- update_state
79
- save_pdf
80
- finish
81
- end
82
- end
83
-
84
- def create_pdf_dir
85
- if save_as_pdf
86
- FileUtils::mkdir_p(PDF_STORAGE) unless File.exists?(PDF_STORAGE)
87
- end
88
- end
89
-
90
- def create_tmp_dir
91
- @tmp_dir = "#{TMP_EXTRACTION_FILE_STORAGE}/#{SecureRandom.hex(6)}"
92
- FileUtils::mkdir_p(@tmp_dir) unless File.exists?(@tmp_dir)
93
- end
94
-
95
- def copy_document
96
- @origin_document_path = "#{FILE_STORAGE}#{self.send(:extracted_filename).url.to_s}"
97
- ap @origin_document_path
98
- FileUtils.cp(@origin_document_path, @tmp_dir)
99
- @copy_document_path = "#{@tmp_dir}/#{@origin_document_path.split("/").last}"
100
- @document_filename = @origin_document_path.split("/").last
101
- end
102
-
103
- def finish
104
- remove_tmp_dir
105
- end
106
-
107
- def remove_tmp_dir
108
- FileUtils.rm_rf(@tmp_dir) if @tmp_dir =~ /\/tmp\//
109
- end
110
- end
111
-
112
- # rails g model ExtractedPage page:text document_id:integer category_id:integer page_number:integer
113
-
114
- # Rails 4 way
115
- # 9.2.7.1 Multiple Callback Methods in One Class
116
- # 258 page
117
-
118
- # class ActiveRecord::Base
119
- # def self.acts_as_page_extractor(document_field=:filename)
120
- # auditor = Auditor.new(audit_log)
121
- # after_create auditor
122
- # after_update auditor
123
- # after_destroy auditor
124
- # end
125
- # end
Binary file