act_as_page_extractor 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +57 -0
  3. data/.rmvrc +1 -0
  4. data/.rspec +3 -0
  5. data/.ruby-gemset +1 -0
  6. data/.ruby-version +1 -0
  7. data/Gemfile +22 -0
  8. data/Gemfile.lock +107 -0
  9. data/LICENSE +21 -0
  10. data/README.md +119 -0
  11. data/Rakefile +6 -0
  12. data/act_as_page_extractor.gemspec +34 -0
  13. data/lib/act_as_page_extractor.rb +126 -0
  14. data/lib/act_as_page_extractor/modules/extracting.rb +35 -0
  15. data/lib/act_as_page_extractor/modules/interface.rb +30 -0
  16. data/lib/act_as_page_extractor/modules/saving.rb +47 -0
  17. data/lib/act_as_page_extractor/modules/tools.rb +54 -0
  18. data/lib/act_as_page_extractor/modules/unzipping.rb +15 -0
  19. data/lib/act_as_page_extractor/modules/validating.rb +22 -0
  20. data/lib/act_as_page_extractor/version.rb +5 -0
  21. data/lib/generators/act_as_page_extractor/migration_generator.rb +49 -0
  22. data/lib/generators/act_as_page_extractor/templates/act_as_page_extractor.rb.erb +14 -0
  23. data/lib/generators/act_as_page_extractor/templates/add_page_extractor_fields_to_documents.rb.erb +8 -0
  24. data/lib/generators/act_as_page_extractor/templates/create_extracted_pages_table.rb.erb +19 -0
  25. data/lib/generators/act_as_page_extractor/templates/extracted_page.rb.erb +3 -0
  26. data/spec/act_as_page_extractor_spec.rb +46 -0
  27. data/spec/spec_helper.rb +8 -0
  28. data/spec/support/models.rb +92 -0
  29. data/test/test-doc-3-pages.doc +0 -0
  30. data/test/test-doc-3-pages.docx +0 -0
  31. data/test/test-doc-3-pages.docx.7z +0 -0
  32. data/test/test-doc-3-pages.docx.rar +0 -0
  33. data/test/test-doc-3-pages.docx.zip +0 -0
  34. data/test/test-doc-3-pages.html +279 -0
  35. data/test/test-doc-3-pages.odt +0 -0
  36. data/test/test-doc-3-pages.pdf +0 -0
  37. data/test/test-doc-3-pages.rtf +339 -0
  38. data/test/test-doc-3-pages.txt +125 -0
  39. data/test/test-doc-3-pages.wrong +0 -0
  40. metadata +279 -0
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1,279 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
5
+ <title></title>
6
+ <meta name="generator" content="LibreOffice 4.2.8.2 (Linux)">
7
+ <meta name="created" content="20160916;170510000000000">
8
+ <meta name="changed" content="0;0">
9
+ <style type="text/css">
10
+ <!--
11
+ @page { size: 8.5in 11in; margin: 0.79in }
12
+ p { margin-bottom: 0.1in; direction: ltr; color: #00000a; line-height: 120%; text-align: left; widows: 0; orphans: 0 }
13
+ p.western { font-family: "Liberation Serif", serif; font-size: 12pt; so-language: en-US }
14
+ p.cjk { font-family: "Droid Sans Fallback"; font-size: 12pt; so-language: zh-CN }
15
+ p.ctl { font-family: "FreeSans"; font-size: 12pt; so-language: hi-IN }
16
+ -->
17
+ </style>
18
+ </head>
19
+ <body lang="en-US" text="#00000a" dir="ltr">
20
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
21
+ 'act_as_page_extractor/version'</p>
22
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
23
+ </p>
24
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
25
+ 'active_record'</p>
26
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
27
+ </p>
28
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
29
+ 'awesome_print'</p>
30
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
31
+ 'filesize'</p>
32
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
33
+ 'total_compressor'</p>
34
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
35
+ 'docsplit'</p>
36
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
37
+ 'pdf_utils'</p>
38
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
39
+ 'prawn'</p>
40
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
41
+ 'pdf-reader'</p>
42
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
43
+ </p>
44
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
45
+ 'act_as_page_extractor/modules/tools.rb'</p>
46
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
47
+ 'act_as_page_extractor/modules/validating.rb'</p>
48
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
49
+ 'act_as_page_extractor/modules/unzipping.rb'</p>
50
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
51
+ 'act_as_page_extractor/modules/extracting.rb'</p>
52
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
53
+ 'act_as_page_extractor/modules/saving.rb'</p>
54
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
55
+ </p>
56
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">require
57
+ 'act_as_page_extractor/modules/interface'</p>
58
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
59
+ </p>
60
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">module
61
+ ActAsPageExtractor</p>
62
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
63
+ </p>
64
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
65
+ extend ActiveSupport::Concern</p>
66
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
67
+ </p>
68
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
69
+ included do</p>
70
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
71
+ before_create { self.page_extraction_state = EXTRACTING_STATES[:new]
72
+ }</p>
73
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
74
+ before_destroy :remove_files</p>
75
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
76
+ end</p>
77
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
78
+ </p>
79
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"> #
80
+ attr_reader :options</p>
81
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
82
+ </p>
83
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
84
+ module ClassMethods</p>
85
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
86
+ def act_as_page_extractor(options: {})</p>
87
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
88
+ define_method(:save_as_pdf){|*args| options[:save_as_pdf] }</p>
89
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
90
+ define_method(:extracted_filename){|*args|
91
+ self.send(options[:filename].to_sym) }</p>
92
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
93
+ ActAsPageExtractor.define_singleton_method(:extracted_filename)
94
+ {|*args| options[:filename] }</p>
95
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
96
+ ActAsPageExtractor.define_singleton_method(:document_class) {|*args|
97
+ options[:document_class].constantize }</p>
98
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
99
+ define_method(:extracted_document_id){|*args| options[:document_id]
100
+ }</p>
101
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
102
+ define_method(:additional_fields){|*args|
103
+ options[:additional_fields] }</p>
104
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
105
+ end</p>
106
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
107
+ end</p>
108
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
109
+ </p>
110
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
111
+ EXTRACTING_STATES = {</p>
112
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
113
+ new: 'new',</p>
114
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
115
+ extracting: 'extracting',</p>
116
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
117
+ extracted: 'extracted',</p>
118
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
119
+ 'error.extraction': 'error.extraction'</p>
120
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
121
+ }.freeze</p>
122
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
123
+ </p>
124
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
125
+ TMP_EXTRACTION_FILE_STORAGE = &quot;#{Dir.pwd}/tmp/page_extraction&quot;.freeze</p>
126
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
127
+ FILE_STORAGE = &quot;#{Dir.pwd}/public&quot;.freeze</p>
128
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
129
+ PDF_STORAGE = &quot;#{FILE_STORAGE}/uploads/extracted/pdf&quot;.freeze</p>
130
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
131
+ </p>
132
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
133
+ def initialized</p>
134
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
135
+ # add all need callbacks</p>
136
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
137
+ #on destroy remove pdf</p>
138
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
139
+ </p>
140
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
141
+ #Add to Readme!!</p>
142
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
143
+ #rails g act_as_page_extractor:migration Document category_id user_id</p>
144
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
145
+ # add to [Document] model:</p>
146
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
147
+ # has_many :extracted_pages, dependent: :destroy</p>
148
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
149
+ create_pdf_dir</p>
150
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
151
+ end</p>
152
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
153
+ </p>
154
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
155
+ def page_extract!</p>
156
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
157
+ initialized</p>
158
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
159
+ cleanup_pages</p>
160
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
161
+ create_tmp_dir</p>
162
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
163
+ begin</p>
164
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
165
+ copy_document</p>
166
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
167
+ # debug_info</p>
168
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
169
+ unzip_document</p>
170
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
171
+ if valid_document</p>
172
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
173
+ extract_pages</p>
174
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
175
+ save_to_db</p>
176
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
177
+ end</p>
178
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
179
+ ensure</p>
180
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
181
+ update_state</p>
182
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
183
+ save_pdf</p>
184
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
185
+ finish</p>
186
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
187
+ end</p>
188
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
189
+ end</p>
190
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
191
+ </p>
192
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
193
+ def create_pdf_dir</p>
194
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
195
+ if save_as_pdf</p>
196
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
197
+ FileUtils::mkdir_p(PDF_STORAGE) unless File.exists?(PDF_STORAGE)</p>
198
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
199
+ end</p>
200
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
201
+ end</p>
202
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
203
+ </p>
204
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
205
+ def create_tmp_dir</p>
206
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
207
+ @tmp_dir = &quot;#{TMP_EXTRACTION_FILE_STORAGE}/#{SecureRandom.hex(6)}&quot;</p>
208
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
209
+ FileUtils::mkdir_p(@tmp_dir) unless File.exists?(@tmp_dir)</p>
210
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
211
+ end</p>
212
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
213
+ </p>
214
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
215
+ def copy_document</p>
216
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
217
+ @origin_document_path =
218
+ &quot;#{FILE_STORAGE}#{self.send(:extracted_filename).url.to_s}&quot;</p>
219
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
220
+ ap @origin_document_path</p>
221
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
222
+ FileUtils.cp(@origin_document_path, @tmp_dir)</p>
223
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
224
+ @copy_document_path =
225
+ &quot;#{@tmp_dir}/#{@origin_document_path.split(&quot;/&quot;).last}&quot;</p>
226
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
227
+ @document_filename = @origin_document_path.split(&quot;/&quot;).last</p>
228
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
229
+ end</p>
230
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
231
+ </p>
232
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
233
+ def finish</p>
234
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
235
+ remove_tmp_dir</p>
236
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
237
+ end</p>
238
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
239
+ </p>
240
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
241
+ def remove_tmp_dir</p>
242
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
243
+ FileUtils.rm_rf(@tmp_dir) if @tmp_dir =~ /\/tmp\//</p>
244
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">
245
+ end</p>
246
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">end</p>
247
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
248
+ </p>
249
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
250
+ rails g model ExtractedPage page:text document_id:integer
251
+ category_id:integer page_number:integer</p>
252
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
253
+ </p>
254
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
255
+ Rails 4 way</p>
256
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
257
+ 9.2.7.1 Multiple Callback Methods in One Class</p>
258
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
259
+ 258 page</p>
260
+ <p class="western" style="margin-bottom: 0in; line-height: 100%"><br>
261
+ </p>
262
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
263
+ class ActiveRecord::Base</p>
264
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
265
+ def self.acts_as_page_extractor(document_field=:filename)</p>
266
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
267
+ auditor = Auditor.new(audit_log)</p>
268
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
269
+ after_create auditor</p>
270
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
271
+ after_update auditor</p>
272
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
273
+ after_destroy auditor</p>
274
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
275
+ end</p>
276
+ <p class="western" style="margin-bottom: 0in; line-height: 100%">#
277
+ end</p>
278
+ </body>
279
+ </html>
Binary file
Binary file
@@ -0,0 +1,339 @@
1
+ {\rtf1\ansi\deff3\adeflang1025
2
+ {\fonttbl{\f0\froman\fprq2\fcharset0 Times New Roman;}{\f1\froman\fprq2\fcharset2 Symbol;}{\f2\fswiss\fprq2\fcharset0 Arial;}{\f3\froman\fprq2\fcharset0 Liberation Serif{\*\falt Times New Roman};}{\f4\fswiss\fprq2\fcharset0 Liberation Sans{\*\falt Arial};}{\f5\fnil\fprq2\fcharset0 Droid Sans Fallback;}{\f6\fnil\fprq2\fcharset0 FreeSans;}{\f7\fswiss\fprq0\fcharset128 FreeSans;}}
3
+ {\colortbl;\red0\green0\blue0;\red0\green0\blue10;\red128\green128\blue128;}
4
+ {\stylesheet{\s0\snext0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033 Normal;}
5
+ {\s15\sbasedon0\snext16\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\sb240\sa120\keepn\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs28\alang1081\loch\f4\fs28\lang1033 Heading;}
6
+ {\s16\sbasedon0\snext16\sl288\slmult1\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\sb0\sa140\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\loch\f3\fs24\lang1033 Text Body;}
7
+ {\s17\sbasedon16\snext17\sl288\slmult1\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\sb0\sa140\ltrpar\cf2\dbch\af5\langfe2052\dbch\af7\afs24\alang1081\loch\f3\fs24\lang1033 List;}
8
+ {\s18\sbasedon0\snext18\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\sb120\sa120\noline\ltrpar\cf2\i\dbch\af5\langfe2052\dbch\af7\afs24\alang1081\ai\loch\f3\fs24\lang1033 Caption;}
9
+ {\s19\sbasedon0\snext19\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\noline\ltrpar\cf2\dbch\af5\langfe2052\dbch\af7\afs24\alang1081\loch\f3\fs24\lang1033 Index;}
10
+ }{\info{\creatim\yr2016\mo9\dy16\hr17\min5}{\revtim\yr0\mo0\dy0\hr0\min0}{\printim\yr0\mo0\dy0\hr0\min0}{\comment LibreOffice}{\vern67241986}}\deftab709
11
+ \viewscale95
12
+ {\*\pgdsctbl
13
+ {\pgdsc0\pgdscuse451\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\pgdscnxt0 Default Style;}}
14
+ \formshade{\*\pgdscno0}\paperh15840\paperw12240\margl1134\margr1134\margt1134\margb1134\sectd\sbknone\sectunlocked1\pgndec\pgwsxn12240\pghsxn15840\marglsxn1134\margrsxn1134\margtsxn1134\margbsxn1134\ftnbj\ftnstart1\ftnrstcont\ftnnar\aenddoc\aftnrstcont\aftnstart1\aftnnrlc
15
+ \pgndec\pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
16
+ require 'act_as_page_extractor/version'}
17
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
18
+
19
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
20
+ require 'active_record'}
21
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
22
+
23
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
24
+ require 'awesome_print'}
25
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
26
+ require 'filesize'}
27
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
28
+ require 'total_compressor'}
29
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
30
+ require 'docsplit'}
31
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
32
+ require 'pdf_utils'}
33
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
34
+ require 'prawn'}
35
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
36
+ require 'pdf-reader'}
37
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
38
+
39
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
40
+ require 'act_as_page_extractor/modules/tools.rb'}
41
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
42
+ require 'act_as_page_extractor/modules/validating.rb'}
43
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
44
+ require 'act_as_page_extractor/modules/unzipping.rb'}
45
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
46
+ require 'act_as_page_extractor/modules/extracting.rb'}
47
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
48
+ require 'act_as_page_extractor/modules/saving.rb'}
49
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
50
+
51
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
52
+ require 'act_as_page_extractor/modules/interface'}
53
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
54
+
55
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
56
+ module ActAsPageExtractor}
57
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
58
+
59
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
60
+ }{\rtlch \ltrch\loch
61
+ extend ActiveSupport::Concern}
62
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
63
+
64
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
65
+ }{\rtlch \ltrch\loch
66
+ included do}
67
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
68
+ }{\rtlch \ltrch\loch
69
+ before_create \{ self.page_extraction_state = EXTRACTING_STATES[:new] \}}
70
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
71
+ }{\rtlch \ltrch\loch
72
+ before_destroy :remove_files}
73
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
74
+ }{\rtlch \ltrch\loch
75
+ end}
76
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
77
+
78
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
79
+ }{\rtlch \ltrch\loch
80
+ # attr_reader :options}
81
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
82
+
83
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
84
+ }{\rtlch \ltrch\loch
85
+ module ClassMethods}
86
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
87
+ }{\rtlch \ltrch\loch
88
+ def act_as_page_extractor(options: \{\})}
89
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
90
+ }{\rtlch \ltrch\loch
91
+ define_method(:save_as_pdf)\{|*args| options[:save_as_pdf] \}}
92
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
93
+ }{\rtlch \ltrch\loch
94
+ define_method(:extracted_filename)\{|*args| self.send(options[:filename].to_sym) \}}
95
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
96
+ }{\rtlch \ltrch\loch
97
+ ActAsPageExtractor.define_singleton_method(:extracted_filename) \{|*args| options[:filename] \}}
98
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
99
+ }{\rtlch \ltrch\loch
100
+ ActAsPageExtractor.define_singleton_method(:document_class) \{|*args| options[:document_class].constantize \}}
101
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
102
+ }{\rtlch \ltrch\loch
103
+ define_method(:extracted_document_id)\{|*args| options[:document_id] \}}
104
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
105
+ }{\rtlch \ltrch\loch
106
+ define_method(:additional_fields)\{|*args| options[:additional_fields] \}}
107
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
108
+ }{\rtlch \ltrch\loch
109
+ end}
110
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
111
+ }{\rtlch \ltrch\loch
112
+ end}
113
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
114
+
115
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
116
+ }{\rtlch \ltrch\loch
117
+ EXTRACTING_STATES = \{}
118
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
119
+ }{\rtlch \ltrch\loch
120
+ new: 'new',}
121
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
122
+ }{\rtlch \ltrch\loch
123
+ extracting: 'extracting',}
124
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
125
+ }{\rtlch \ltrch\loch
126
+ extracted: 'extracted',}
127
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
128
+ }{\rtlch \ltrch\loch
129
+ 'error.extraction': 'error.extraction'}
130
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
131
+ }{\rtlch \ltrch\loch
132
+ \}.freeze}
133
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
134
+
135
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
136
+ }{\rtlch \ltrch\loch
137
+ TMP_EXTRACTION_FILE_STORAGE = "#\{Dir.pwd\}/tmp/page_extraction".freeze}
138
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
139
+ }{\rtlch \ltrch\loch
140
+ FILE_STORAGE = "#\{Dir.pwd\}/public".freeze}
141
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
142
+ }{\rtlch \ltrch\loch
143
+ PDF_STORAGE = "#\{FILE_STORAGE\}/uploads/extracted/pdf".freeze}
144
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
145
+
146
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
147
+ }{\rtlch \ltrch\loch
148
+ def initialized}
149
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
150
+ }{\rtlch \ltrch\loch
151
+ # add all need callbacks}
152
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
153
+ }{\rtlch \ltrch\loch
154
+ #on destroy remove pdf}
155
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
156
+
157
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
158
+ }{\rtlch \ltrch\loch
159
+ #Add to Readme!!}
160
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
161
+ }{\rtlch \ltrch\loch
162
+ #rails g act_as_page_extractor:migration Document category_id user_id}
163
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
164
+ }{\rtlch \ltrch\loch
165
+ # add to [Document] model:}
166
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
167
+ }{\rtlch \ltrch\loch
168
+ # has_many :extracted_pages, dependent: :destroy}
169
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
170
+ }{\rtlch \ltrch\loch
171
+ create_pdf_dir}
172
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
173
+ }{\rtlch \ltrch\loch
174
+ end}
175
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
176
+
177
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
178
+ }{\rtlch \ltrch\loch
179
+ def page_extract!}
180
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
181
+ }{\rtlch \ltrch\loch
182
+ initialized}
183
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
184
+ }{\rtlch \ltrch\loch
185
+ cleanup_pages}
186
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
187
+ }{\rtlch \ltrch\loch
188
+ create_tmp_dir}
189
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
190
+ }{\rtlch \ltrch\loch
191
+ begin}
192
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
193
+ }{\rtlch \ltrch\loch
194
+ copy_document}
195
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
196
+ }{\rtlch \ltrch\loch
197
+ # debug_info}
198
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
199
+ }{\rtlch \ltrch\loch
200
+ unzip_document}
201
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
202
+ }{\rtlch \ltrch\loch
203
+ if valid_document}
204
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
205
+ }{\rtlch \ltrch\loch
206
+ extract_pages}
207
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
208
+ }{\rtlch \ltrch\loch
209
+ save_to_db}
210
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
211
+ }{\rtlch \ltrch\loch
212
+ end}
213
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
214
+ }{\rtlch \ltrch\loch
215
+ ensure}
216
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
217
+ }{\rtlch \ltrch\loch
218
+ update_state}
219
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
220
+ }{\rtlch \ltrch\loch
221
+ save_pdf}
222
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
223
+ }{\rtlch \ltrch\loch
224
+ finish}
225
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
226
+ }{\rtlch \ltrch\loch
227
+ end}
228
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
229
+ }{\rtlch \ltrch\loch
230
+ end}
231
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
232
+
233
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
234
+ }{\rtlch \ltrch\loch
235
+ def create_pdf_dir}
236
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
237
+ }{\rtlch \ltrch\loch
238
+ if save_as_pdf}
239
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
240
+ }{\rtlch \ltrch\loch
241
+ FileUtils::mkdir_p(PDF_STORAGE) unless File.exists?(PDF_STORAGE)}
242
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
243
+ }{\rtlch \ltrch\loch
244
+ end}
245
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
246
+ }{\rtlch \ltrch\loch
247
+ end}
248
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
249
+
250
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
251
+ }{\rtlch \ltrch\loch
252
+ def create_tmp_dir}
253
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
254
+ }{\rtlch \ltrch\loch
255
+ @tmp_dir = "#\{TMP_EXTRACTION_FILE_STORAGE\}/#\{SecureRandom.hex(6)\}"}
256
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
257
+ }{\rtlch \ltrch\loch
258
+ FileUtils::mkdir_p(@tmp_dir) unless File.exists?(@tmp_dir)}
259
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
260
+ }{\rtlch \ltrch\loch
261
+ end}
262
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
263
+
264
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
265
+ }{\rtlch \ltrch\loch
266
+ def copy_document}
267
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
268
+ }{\rtlch \ltrch\loch
269
+ @origin_document_path = "#\{FILE_STORAGE\}#\{self.send(:extracted_filename).url.to_s\}"}
270
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
271
+ }{\rtlch \ltrch\loch
272
+ ap @origin_document_path}
273
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
274
+ }{\rtlch \ltrch\loch
275
+ FileUtils.cp(@origin_document_path, @tmp_dir)}
276
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
277
+ }{\rtlch \ltrch\loch
278
+ @copy_document_path = "#\{@tmp_dir\}/#\{@origin_document_path.split("/").last\}"}
279
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
280
+ }{\rtlch \ltrch\loch
281
+ @document_filename = @origin_document_path.split("/").last}
282
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
283
+ }{\rtlch \ltrch\loch
284
+ end}
285
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
286
+
287
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
288
+ }{\rtlch \ltrch\loch
289
+ def finish}
290
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
291
+ }{\rtlch \ltrch\loch
292
+ remove_tmp_dir}
293
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
294
+ }{\rtlch \ltrch\loch
295
+ end}
296
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
297
+
298
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
299
+ }{\rtlch \ltrch\loch
300
+ def remove_tmp_dir}
301
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
302
+ }{\rtlch \ltrch\loch
303
+ FileUtils.rm_rf(@tmp_dir) if @tmp_dir =~ /\\/tmp\\//}
304
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch
305
+ }{\rtlch \ltrch\loch
306
+ end}
307
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
308
+ end}
309
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
310
+
311
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
312
+ # rails g model ExtractedPage page:text document_id:integer category_id:integer page_number:integer}
313
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
314
+
315
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
316
+ # Rails 4 way}
317
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
318
+ # 9.2.7.1 Multiple Callback Methods in One Class}
319
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
320
+ # 258 page}
321
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033\rtlch \ltrch\loch
322
+
323
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
324
+ # class ActiveRecord::Base}
325
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
326
+ # def self.acts_as_page_extractor(document_field=:filename)}
327
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
328
+ # auditor = Auditor.new(audit_log)}
329
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
330
+ # after_create auditor}
331
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
332
+ # after_update auditor}
333
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
334
+ # after_destroy auditor}
335
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
336
+ # end}
337
+ \par \pard\plain \s0\ql\nowidctlpar{\*\hyphen2\hyphlead2\hyphtrail2\hyphmax0}\ltrpar\cf2\dbch\af5\langfe2052\dbch\af6\afs24\alang1081\kerning1\loch\f3\fs24\lang1033{\rtlch \ltrch\loch
338
+ # end}
339
+ \par }