act_as_page_extractor 0.6.2 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/Gemfile.lock +7 -4
  4. data/act_as_page_extractor.gemspec +1 -1
  5. data/lib/act_as_page_extractor/modules/extracting.rb +8 -1
  6. data/lib/act_as_page_extractor/modules/interface.rb +1 -1
  7. data/lib/act_as_page_extractor/modules/saving.rb +1 -1
  8. data/lib/act_as_page_extractor/modules/tools.rb +1 -1
  9. data/lib/act_as_page_extractor/version.rb +1 -1
  10. data/lib/act_as_page_extractor.rb +2 -2
  11. data/spec/act_as_page_extractor_spec.rb +13 -13
  12. data/spec/support/models.rb +1 -1
  13. data/test/Oscar_Wilde_The_Happy_Prince_en.doc +0 -0
  14. data/test/Oscar_Wilde_The_Happy_Prince_en.docx +0 -0
  15. data/test/Oscar_Wilde_The_Happy_Prince_en.docx.7z +0 -0
  16. data/test/Oscar_Wilde_The_Happy_Prince_en.docx.rar +0 -0
  17. data/test/Oscar_Wilde_The_Happy_Prince_en.docx.zip +0 -0
  18. data/test/Oscar_Wilde_The_Happy_Prince_en.html +395 -0
  19. data/test/Oscar_Wilde_The_Happy_Prince_en.odt +0 -0
  20. data/test/Oscar_Wilde_The_Happy_Prince_en.pdf +0 -0
  21. data/test/Oscar_Wilde_The_Happy_Prince_en.rtf +257 -0
  22. data/test/Oscar_Wilde_The_Happy_Prince_en.txt +79 -0
  23. data/test/Oscar_Wilde_The_Happy_Prince_en.wrong +0 -0
  24. metadata +26 -26
  25. data/test/test-doc-3-pages.doc +0 -0
  26. data/test/test-doc-3-pages.docx +0 -0
  27. data/test/test-doc-3-pages.docx.7z +0 -0
  28. data/test/test-doc-3-pages.docx.rar +0 -0
  29. data/test/test-doc-3-pages.docx.zip +0 -0
  30. data/test/test-doc-3-pages.html +0 -279
  31. data/test/test-doc-3-pages.odt +0 -0
  32. data/test/test-doc-3-pages.pdf +0 -0
  33. data/test/test-doc-3-pages.rtf +0 -339
  34. data/test/test-doc-3-pages.txt +0 -125
  35. data/test/test-doc-3-pages.wrong +0 -0
@@ -1,125 +0,0 @@
1
- require 'act_as_page_extractor/version'
2
-
3
- require 'active_record'
4
-
5
- require 'awesome_print'
6
- require 'filesize'
7
- require 'total_compressor'
8
- require 'docsplit'
9
- require 'pdf_utils'
10
- require 'prawn'
11
- require 'pdf-reader'
12
-
13
- require 'act_as_page_extractor/modules/tools.rb'
14
- require 'act_as_page_extractor/modules/validating.rb'
15
- require 'act_as_page_extractor/modules/unzipping.rb'
16
- require 'act_as_page_extractor/modules/extracting.rb'
17
- require 'act_as_page_extractor/modules/saving.rb'
18
-
19
- require 'act_as_page_extractor/modules/interface'
20
-
21
- module ActAsPageExtractor
22
-
23
- extend ActiveSupport::Concern
24
-
25
- included do
26
- before_create { self.page_extraction_state = EXTRACTING_STATES[:new] }
27
- before_destroy :remove_files
28
- end
29
-
30
- # attr_reader :options
31
-
32
- module ClassMethods
33
- def act_as_page_extractor(options: {})
34
- define_method(:save_as_pdf){|*args| options[:save_as_pdf] }
35
- define_method(:extracted_filename){|*args| self.send(options[:filename].to_sym) }
36
- ActAsPageExtractor.define_singleton_method(:extracted_filename) {|*args| options[:filename] }
37
- ActAsPageExtractor.define_singleton_method(:document_class) {|*args| options[:document_class].constantize }
38
- define_method(:extracted_document_id){|*args| options[:document_id] }
39
- define_method(:additional_fields){|*args| options[:additional_fields] }
40
- end
41
- end
42
-
43
- EXTRACTING_STATES = {
44
- new: 'new',
45
- extracting: 'extracting',
46
- extracted: 'extracted',
47
- 'error.extraction': 'error.extraction'
48
- }.freeze
49
-
50
- TMP_EXTRACTION_FILE_STORAGE = "#{Dir.pwd}/tmp/page_extraction".freeze
51
- FILE_STORAGE = "#{Dir.pwd}/public".freeze
52
- PDF_STORAGE = "#{FILE_STORAGE}/uploads/extracted/pdf".freeze
53
-
54
- def initialized
55
- # add all need callbacks
56
- #on destroy remove pdf
57
-
58
- #Add to Readme!!
59
- #rails g act_as_page_extractor:migration Document category_id user_id
60
- # add to [Document] model:
61
- # has_many :extracted_pages, dependent: :destroy
62
- create_pdf_dir
63
- end
64
-
65
- def page_extract!
66
- initialized
67
- cleanup_pages
68
- create_tmp_dir
69
- begin
70
- copy_document
71
- # debug_info
72
- unzip_document
73
- if valid_document
74
- extract_pages
75
- save_to_db
76
- end
77
- ensure
78
- update_state
79
- save_pdf
80
- finish
81
- end
82
- end
83
-
84
- def create_pdf_dir
85
- if save_as_pdf
86
- FileUtils::mkdir_p(PDF_STORAGE) unless File.exists?(PDF_STORAGE)
87
- end
88
- end
89
-
90
- def create_tmp_dir
91
- @tmp_dir = "#{TMP_EXTRACTION_FILE_STORAGE}/#{SecureRandom.hex(6)}"
92
- FileUtils::mkdir_p(@tmp_dir) unless File.exists?(@tmp_dir)
93
- end
94
-
95
- def copy_document
96
- @origin_document_path = "#{FILE_STORAGE}#{self.send(:extracted_filename).url.to_s}"
97
- ap @origin_document_path
98
- FileUtils.cp(@origin_document_path, @tmp_dir)
99
- @copy_document_path = "#{@tmp_dir}/#{@origin_document_path.split("/").last}"
100
- @document_filename = @origin_document_path.split("/").last
101
- end
102
-
103
- def finish
104
- remove_tmp_dir
105
- end
106
-
107
- def remove_tmp_dir
108
- FileUtils.rm_rf(@tmp_dir) if @tmp_dir =~ /\/tmp\//
109
- end
110
- end
111
-
112
- # rails g model ExtractedPage page:text document_id:integer category_id:integer page_number:integer
113
-
114
- # Rails 4 way
115
- # 9.2.7.1 Multiple Callback Methods in One Class
116
- # 258 page
117
-
118
- # class ActiveRecord::Base
119
- # def self.acts_as_page_extractor(document_field=:filename)
120
- # auditor = Auditor.new(audit_log)
121
- # after_create auditor
122
- # after_update auditor
123
- # after_destroy auditor
124
- # end
125
- # end
Binary file