act_as_page_extractor 0.6.2 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/Gemfile.lock +7 -4
- data/act_as_page_extractor.gemspec +1 -1
- data/lib/act_as_page_extractor/modules/extracting.rb +8 -1
- data/lib/act_as_page_extractor/modules/interface.rb +1 -1
- data/lib/act_as_page_extractor/modules/saving.rb +1 -1
- data/lib/act_as_page_extractor/modules/tools.rb +1 -1
- data/lib/act_as_page_extractor/version.rb +1 -1
- data/lib/act_as_page_extractor.rb +2 -2
- data/spec/act_as_page_extractor_spec.rb +13 -13
- data/spec/support/models.rb +1 -1
- data/test/Oscar_Wilde_The_Happy_Prince_en.doc +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx.7z +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx.rar +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.docx.zip +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.html +395 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.odt +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.pdf +0 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.rtf +257 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.txt +79 -0
- data/test/Oscar_Wilde_The_Happy_Prince_en.wrong +0 -0
- metadata +26 -26
- data/test/test-doc-3-pages.doc +0 -0
- data/test/test-doc-3-pages.docx +0 -0
- data/test/test-doc-3-pages.docx.7z +0 -0
- data/test/test-doc-3-pages.docx.rar +0 -0
- data/test/test-doc-3-pages.docx.zip +0 -0
- data/test/test-doc-3-pages.html +0 -279
- data/test/test-doc-3-pages.odt +0 -0
- data/test/test-doc-3-pages.pdf +0 -0
- data/test/test-doc-3-pages.rtf +0 -339
- data/test/test-doc-3-pages.txt +0 -125
- data/test/test-doc-3-pages.wrong +0 -0
data/test/test-doc-3-pages.txt
DELETED
@@ -1,125 +0,0 @@
|
|
1
|
-
require 'act_as_page_extractor/version'
|
2
|
-
|
3
|
-
require 'active_record'
|
4
|
-
|
5
|
-
require 'awesome_print'
|
6
|
-
require 'filesize'
|
7
|
-
require 'total_compressor'
|
8
|
-
require 'docsplit'
|
9
|
-
require 'pdf_utils'
|
10
|
-
require 'prawn'
|
11
|
-
require 'pdf-reader'
|
12
|
-
|
13
|
-
require 'act_as_page_extractor/modules/tools.rb'
|
14
|
-
require 'act_as_page_extractor/modules/validating.rb'
|
15
|
-
require 'act_as_page_extractor/modules/unzipping.rb'
|
16
|
-
require 'act_as_page_extractor/modules/extracting.rb'
|
17
|
-
require 'act_as_page_extractor/modules/saving.rb'
|
18
|
-
|
19
|
-
require 'act_as_page_extractor/modules/interface'
|
20
|
-
|
21
|
-
module ActAsPageExtractor
|
22
|
-
|
23
|
-
extend ActiveSupport::Concern
|
24
|
-
|
25
|
-
included do
|
26
|
-
before_create { self.page_extraction_state = EXTRACTING_STATES[:new] }
|
27
|
-
before_destroy :remove_files
|
28
|
-
end
|
29
|
-
|
30
|
-
# attr_reader :options
|
31
|
-
|
32
|
-
module ClassMethods
|
33
|
-
def act_as_page_extractor(options: {})
|
34
|
-
define_method(:save_as_pdf){|*args| options[:save_as_pdf] }
|
35
|
-
define_method(:extracted_filename){|*args| self.send(options[:filename].to_sym) }
|
36
|
-
ActAsPageExtractor.define_singleton_method(:extracted_filename) {|*args| options[:filename] }
|
37
|
-
ActAsPageExtractor.define_singleton_method(:document_class) {|*args| options[:document_class].constantize }
|
38
|
-
define_method(:extracted_document_id){|*args| options[:document_id] }
|
39
|
-
define_method(:additional_fields){|*args| options[:additional_fields] }
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
EXTRACTING_STATES = {
|
44
|
-
new: 'new',
|
45
|
-
extracting: 'extracting',
|
46
|
-
extracted: 'extracted',
|
47
|
-
'error.extraction': 'error.extraction'
|
48
|
-
}.freeze
|
49
|
-
|
50
|
-
TMP_EXTRACTION_FILE_STORAGE = "#{Dir.pwd}/tmp/page_extraction".freeze
|
51
|
-
FILE_STORAGE = "#{Dir.pwd}/public".freeze
|
52
|
-
PDF_STORAGE = "#{FILE_STORAGE}/uploads/extracted/pdf".freeze
|
53
|
-
|
54
|
-
def initialized
|
55
|
-
# add all need callbacks
|
56
|
-
#on destroy remove pdf
|
57
|
-
|
58
|
-
#Add to Readme!!
|
59
|
-
#rails g act_as_page_extractor:migration Document category_id user_id
|
60
|
-
# add to [Document] model:
|
61
|
-
# has_many :extracted_pages, dependent: :destroy
|
62
|
-
create_pdf_dir
|
63
|
-
end
|
64
|
-
|
65
|
-
def page_extract!
|
66
|
-
initialized
|
67
|
-
cleanup_pages
|
68
|
-
create_tmp_dir
|
69
|
-
begin
|
70
|
-
copy_document
|
71
|
-
# debug_info
|
72
|
-
unzip_document
|
73
|
-
if valid_document
|
74
|
-
extract_pages
|
75
|
-
save_to_db
|
76
|
-
end
|
77
|
-
ensure
|
78
|
-
update_state
|
79
|
-
save_pdf
|
80
|
-
finish
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
def create_pdf_dir
|
85
|
-
if save_as_pdf
|
86
|
-
FileUtils::mkdir_p(PDF_STORAGE) unless File.exists?(PDF_STORAGE)
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
def create_tmp_dir
|
91
|
-
@tmp_dir = "#{TMP_EXTRACTION_FILE_STORAGE}/#{SecureRandom.hex(6)}"
|
92
|
-
FileUtils::mkdir_p(@tmp_dir) unless File.exists?(@tmp_dir)
|
93
|
-
end
|
94
|
-
|
95
|
-
def copy_document
|
96
|
-
@origin_document_path = "#{FILE_STORAGE}#{self.send(:extracted_filename).url.to_s}"
|
97
|
-
ap @origin_document_path
|
98
|
-
FileUtils.cp(@origin_document_path, @tmp_dir)
|
99
|
-
@copy_document_path = "#{@tmp_dir}/#{@origin_document_path.split("/").last}"
|
100
|
-
@document_filename = @origin_document_path.split("/").last
|
101
|
-
end
|
102
|
-
|
103
|
-
def finish
|
104
|
-
remove_tmp_dir
|
105
|
-
end
|
106
|
-
|
107
|
-
def remove_tmp_dir
|
108
|
-
FileUtils.rm_rf(@tmp_dir) if @tmp_dir =~ /\/tmp\//
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
|
-
# rails g model ExtractedPage page:text document_id:integer category_id:integer page_number:integer
|
113
|
-
|
114
|
-
# Rails 4 way
|
115
|
-
# 9.2.7.1 Multiple Callback Methods in One Class
|
116
|
-
# 258 page
|
117
|
-
|
118
|
-
# class ActiveRecord::Base
|
119
|
-
# def self.acts_as_page_extractor(document_field=:filename)
|
120
|
-
# auditor = Auditor.new(audit_log)
|
121
|
-
# after_create auditor
|
122
|
-
# after_update auditor
|
123
|
-
# after_destroy auditor
|
124
|
-
# end
|
125
|
-
# end
|
data/test/test-doc-3-pages.wrong
DELETED
Binary file
|