act_as_page_extractor 0.6.2 → 0.6.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/Gemfile.lock +7 -4
- data/act_as_page_extractor.gemspec +1 -1
- data/lib/act_as_page_extractor/modules/extracting.rb +8 -1
- data/lib/act_as_page_extractor/modules/interface.rb +1 -1
- data/lib/act_as_page_extractor/modules/saving.rb +1 -1
- data/lib/act_as_page_extractor/modules/tools.rb +1 -1
- data/lib/act_as_page_extractor/version.rb +1 -1
- data/lib/act_as_page_extractor.rb +2 -2
- data/spec/act_as_page_extractor_spec.rb +13 -13
- data/spec/support/models.rb +1 -1
- data/test/Oscar_Wilde_The_Happy_Prince_en.doc +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx.7z +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx.rar +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx.zip +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.html +395 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.odt +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.pdf +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.rtf +257 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.txt +79 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.wrong +0 -0
- metadata +26 -26
- data/test/test-doc-3-pages.doc +0 -0
- data/test/test-doc-3-pages.docx +0 -0
- data/test/test-doc-3-pages.docx.7z +0 -0
- data/test/test-doc-3-pages.docx.rar +0 -0
- data/test/test-doc-3-pages.docx.zip +0 -0
- data/test/test-doc-3-pages.html +0 -279
- data/test/test-doc-3-pages.odt +0 -0
- data/test/test-doc-3-pages.pdf +0 -0
- data/test/test-doc-3-pages.rtf +0 -339
- data/test/test-doc-3-pages.txt +0 -125
- data/test/test-doc-3-pages.wrong +0 -0
data/test/test-doc-3-pages.txt
DELETED
@@ -1,125 +0,0 @@
|
|
1
|
-
require 'act_as_page_extractor/version'
|
2
|
-
|
3
|
-
require 'active_record'
|
4
|
-
|
5
|
-
require 'awesome_print'
|
6
|
-
require 'filesize'
|
7
|
-
require 'total_compressor'
|
8
|
-
require 'docsplit'
|
9
|
-
require 'pdf_utils'
|
10
|
-
require 'prawn'
|
11
|
-
require 'pdf-reader'
|
12
|
-
|
13
|
-
require 'act_as_page_extractor/modules/tools.rb'
|
14
|
-
require 'act_as_page_extractor/modules/validating.rb'
|
15
|
-
require 'act_as_page_extractor/modules/unzipping.rb'
|
16
|
-
require 'act_as_page_extractor/modules/extracting.rb'
|
17
|
-
require 'act_as_page_extractor/modules/saving.rb'
|
18
|
-
|
19
|
-
require 'act_as_page_extractor/modules/interface'
|
20
|
-
|
21
|
-
module ActAsPageExtractor
|
22
|
-
|
23
|
-
extend ActiveSupport::Concern
|
24
|
-
|
25
|
-
included do
|
26
|
-
before_create { self.page_extraction_state = EXTRACTING_STATES[:new] }
|
27
|
-
before_destroy :remove_files
|
28
|
-
end
|
29
|
-
|
30
|
-
# attr_reader :options
|
31
|
-
|
32
|
-
module ClassMethods
|
33
|
-
def act_as_page_extractor(options: {})
|
34
|
-
define_method(:save_as_pdf){|*args| options[:save_as_pdf] }
|
35
|
-
define_method(:extracted_filename){|*args| self.send(options[:filename].to_sym) }
|
36
|
-
ActAsPageExtractor.define_singleton_method(:extracted_filename) {|*args| options[:filename] }
|
37
|
-
ActAsPageExtractor.define_singleton_method(:document_class) {|*args| options[:document_class].constantize }
|
38
|
-
define_method(:extracted_document_id){|*args| options[:document_id] }
|
39
|
-
define_method(:additional_fields){|*args| options[:additional_fields] }
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
EXTRACTING_STATES = {
|
44
|
-
new: 'new',
|
45
|
-
extracting: 'extracting',
|
46
|
-
extracted: 'extracted',
|
47
|
-
'error.extraction': 'error.extraction'
|
48
|
-
}.freeze
|
49
|
-
|
50
|
-
TMP_EXTRACTION_FILE_STORAGE = "#{Dir.pwd}/tmp/page_extraction".freeze
|
51
|
-
FILE_STORAGE = "#{Dir.pwd}/public".freeze
|
52
|
-
PDF_STORAGE = "#{FILE_STORAGE}/uploads/extracted/pdf".freeze
|
53
|
-
|
54
|
-
def initialized
|
55
|
-
# add all need callbacks
|
56
|
-
#on destroy remove pdf
|
57
|
-
|
58
|
-
#Add to Readme!!
|
59
|
-
#rails g act_as_page_extractor:migration Document category_id user_id
|
60
|
-
# add to [Document] model:
|
61
|
-
# has_many :extracted_pages, dependent: :destroy
|
62
|
-
create_pdf_dir
|
63
|
-
end
|
64
|
-
|
65
|
-
def page_extract!
|
66
|
-
initialized
|
67
|
-
cleanup_pages
|
68
|
-
create_tmp_dir
|
69
|
-
begin
|
70
|
-
copy_document
|
71
|
-
# debug_info
|
72
|
-
unzip_document
|
73
|
-
if valid_document
|
74
|
-
extract_pages
|
75
|
-
save_to_db
|
76
|
-
end
|
77
|
-
ensure
|
78
|
-
update_state
|
79
|
-
save_pdf
|
80
|
-
finish
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def create_pdf_dir
|
85
|
-
if save_as_pdf
|
86
|
-
FileUtils::mkdir_p(PDF_STORAGE) unless File.exists?(PDF_STORAGE)
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
def create_tmp_dir
|
91
|
-
@tmp_dir = "#{TMP_EXTRACTION_FILE_STORAGE}/#{SecureRandom.hex(6)}"
|
92
|
-
FileUtils::mkdir_p(@tmp_dir) unless File.exists?(@tmp_dir)
|
93
|
-
end
|
94
|
-
|
95
|
-
def copy_document
|
96
|
-
@origin_document_path = "#{FILE_STORAGE}#{self.send(:extracted_filename).url.to_s}"
|
97
|
-
ap @origin_document_path
|
98
|
-
FileUtils.cp(@origin_document_path, @tmp_dir)
|
99
|
-
@copy_document_path = "#{@tmp_dir}/#{@origin_document_path.split("/").last}"
|
100
|
-
@document_filename = @origin_document_path.split("/").last
|
101
|
-
end
|
102
|
-
|
103
|
-
def finish
|
104
|
-
remove_tmp_dir
|
105
|
-
end
|
106
|
-
|
107
|
-
def remove_tmp_dir
|
108
|
-
FileUtils.rm_rf(@tmp_dir) if @tmp_dir =~ /\/tmp\//
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
# rails g model ExtractedPage page:text document_id:integer category_id:integer page_number:integer
|
113
|
-
|
114
|
-
# Rails 4 way
|
115
|
-
# 9.2.7.1 Multiple Callback Methods in One Class
|
116
|
-
# 258 page
|
117
|
-
|
118
|
-
# class ActiveRecord::Base
|
119
|
-
# def self.acts_as_page_extractor(document_field=:filename)
|
120
|
-
# auditor = Auditor.new(audit_log)
|
121
|
-
# after_create auditor
|
122
|
-
# after_update auditor
|
123
|
-
# after_destroy auditor
|
124
|
-
# end
|
125
|
-
# end
|
data/test/test-doc-3-pages.wrong
DELETED
Binary file
|