paperless 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/paperless CHANGED
@@ -131,6 +131,9 @@ desc 'Create a new note from a file'
131
131
  arg_name 'file_name'
132
132
  command :create do |c|
133
133
 
134
+ c.desc 'Dump the OCR text for the document to the terminal.'
135
+ c.switch :dump, :negatable => false, :default_value => false
136
+
134
137
  c.desc 'Open a prompt to rename the file before its processed through the rules.'
135
138
  c.switch :prompt, :negatable => false, :default_value => false
136
139
 
@@ -147,14 +150,15 @@ command :create do |c|
147
150
 
148
151
  args.each do |file|
149
152
 
150
- if File.exists?(File.expand_path file)
153
+ if File.exists?(File.expand_path file) && !File.directory?(File.expand_path file)
151
154
  file = File.expand_path file
152
155
  else
153
- raise "File does not exist (#{file})"
156
+ raise "File does not exist or is a directory (#{file})"
154
157
  end
155
158
 
156
- if options[:prompt] && !global_options[:simulate]
157
- # Cannot rename the file in simulate mode since we dont want to change the file name
159
+ old_filename = nil
160
+
161
+ if options[:prompt]
158
162
  file_ext = File.extname(file)
159
163
  filename = File.basename(file, file_ext)
160
164
  user_input = `#{COCOADIALOG} standard-inputbox --title "Paperless Prompt" --informative-text "Rename your file before its processed with rules..." --text "#{filename}" --no-newline --string-output`.split("\n")
@@ -162,6 +166,10 @@ command :create do |c|
162
166
  if user_input[0].match(/ok/i) && user_input[1] != File.basename(file, file_ext)
163
167
  new_filename = File.join(File.dirname(file), user_input[1] + file_ext)
164
168
  puts "Renaming file based on input to #{new_filename}"
169
+ if global_options[:simulate]
170
+ # save the file name to rename it back later
171
+ old_filename = file
172
+ end
165
173
  File.rename(file, new_filename)
166
174
  file = new_filename
167
175
  end
@@ -182,7 +190,7 @@ command :create do |c|
182
190
  file_ext = File.extname(file).gsub(/\./,'')
183
191
  if file_ext == Paperless::PDF_EXT && options[:ocr]
184
192
  puts "OCRing file..."
185
- engine.ocr
193
+ engine.ocr(options[:dump])
186
194
  end
187
195
 
188
196
  if options[:proc_rules]
@@ -193,6 +201,11 @@ command :create do |c|
193
201
  if global_options[:simulate]
194
202
  puts "Simulating changes..."
195
203
  engine.print
204
+
205
+ if global_options[:simulate] && options[:prompt] && !old_filename.nil?
206
+ puts "Renaming file back to #{old_filename}"
207
+ File.rename(file, old_filename)
208
+ end
196
209
  else
197
210
  puts "Saving #{file} to #{engine.service}"
198
211
  engine.create({:delete => options[:delete]})
@@ -2,6 +2,7 @@ require 'date'
2
2
 
3
3
  module DateSearch
4
4
 
5
+ SEP_NOSPACE = '\.\/\-\,'
5
6
  SEP = '\. \/\-\,'
6
7
  DAY = '(\d{1,2})'
7
8
  MONTH = '([a-zA-Z]{3,15})'
@@ -49,7 +50,25 @@ module DateSearch
49
50
 
50
51
  def date_search(text,date_locale)
51
52
  date = nil
52
- if match = text.match(/#{MONTH}[#{SEP}]{0,3}#{DAY}[#{SEP}]{1,3}#{YEAR}#{END_DATE}/i)
53
+ if match = text.match(/#{DAY}[#{SEP_NOSPACE}]+#{DAY}[#{SEP_NOSPACE}]+#{YEAR}/)
54
+ # US: 12-29-2011
55
+ # Euro: 29-12-2011
56
+ year = valid_year(match[3])
57
+ day = date_locale == 'us' ? valid_day(match[2]) : valid_day(match[1])
58
+ month = date_locale == 'us' ? valid_month(match[1]) : valid_month(match[2])
59
+
60
+ if month && day && year
61
+ puts "Basing the date off the discovered string (3): #{match[0]}"
62
+ begin
63
+ date = DateTime.new(year,month,day)
64
+ rescue
65
+ puts "WARNING: Unable to create date object. #{$!}"
66
+ date = nil
67
+ end
68
+ else
69
+ puts "WARNING: The discovered date string does not validate: #{match[0]}"
70
+ end
71
+ elsif match = text.match(/#{MONTH}[#{SEP}]{0,3}#{DAY}[#{SEP}]{1,3}#{YEAR}/i)
53
72
  # December 29, 2011
54
73
  if valid_day(match[2]) && valid_year(match[3])
55
74
  puts "Basing the date off the discovered string (1): #{match[0]}"
@@ -60,7 +79,7 @@ module DateSearch
60
79
  date = nil
61
80
  end
62
81
  end
63
- elsif match = text.match(/#{DAY}[#{SEP}]{0,3}#{MONTH}[#{SEP}]{0,3}#{YEAR}#{END_DATE}/i)
82
+ elsif match = text.match(/#{DAY}[#{SEP}]{0,3}#{MONTH}[#{SEP}]{0,3}#{YEAR}/i)
64
83
  # 29 December 2011
65
84
  if valid_day(match[1]) && valid_year(match[3])
66
85
  puts "Basing the date off the discovered string (2): #{match[0]}"
@@ -71,23 +90,16 @@ module DateSearch
71
90
  date = nil
72
91
  end
73
92
  end
74
- elsif match = text.match(/#{DAY}[#{SEP}]+#{DAY}[#{SEP}]+#{YEAR}#{END_DATE}/)
75
- # US: 12-29-2011
76
- # Euro: 29-12-2011
77
- year = valid_year(match[3])
78
- day = date_locale == 'us' ? valid_day(match[2]) : valid_day(match[1])
79
- month = date_locale == 'us' ? valid_month(match[1]) : valid_month(match[2])
80
-
81
- if month && day && year
82
- puts "Basing the date off the discovered string (3): #{match[0]}"
93
+ elsif match = text.match(/#{MONTH}[#{SEP}]{0,3}#{YEAR}/i)
94
+ # December 2011
95
+ if valid_year(match[2])
96
+ puts "Basing the date off the discovered string (2): #{match[0]}"
83
97
  begin
84
- date = DateTime.new(year,month,day)
98
+ date = DateTime.parse(repair_ocr_string(match[0]))
85
99
  rescue
86
100
  puts "WARNING: Unable to create date object. #{$!}"
87
101
  date = nil
88
102
  end
89
- else
90
- puts "WARNING: The discovered date string does not validate: #{match[0]}"
91
103
  end
92
104
  end
93
105
  date
@@ -8,6 +8,7 @@ module Paperless
8
8
  PDF_EXT = 'pdf'
9
9
  DATE_VAR = '<date>'
10
10
  MATCH_VAR = '<match>'
11
+ FILENAME_VAR = '<filename>'
11
12
  FILEDATE = 'filedate'
12
13
  TODAY = 'today'
13
14
 
@@ -15,6 +16,7 @@ module Paperless
15
16
 
16
17
  PDFPEN_ENGINE = 'pdfpen'
17
18
  PDFPENPRO_ENGINE = 'pdfpenpro'
19
+ PDFPENPRO6_ENGINE = 'pdfpenpro6'
18
20
  ACROBAT_ENGINE = 'acrobat'
19
21
  DEVONTHINKPRO_ENGINE = 'devonthinkpro'
20
22
  DEVONTHINKPRO_SERVICE = 'devonthinkpro'
@@ -25,7 +27,7 @@ module Paperless
25
27
 
26
28
  def initialize(options)
27
29
  @destination = nil
28
- @service = nil
30
+ @service = options[:default_service]
29
31
  @title = nil
30
32
  @date = DateTime.now
31
33
  @tags = Array.new()
@@ -119,7 +121,7 @@ module Paperless
119
121
  # First check if there are actually any date rules
120
122
  @rules.each do |rule|
121
123
  if rule.condition == Paperless::DATE_VAR
122
- @date = date_search(text,@date_locale)
124
+ @date = date_search(text,@date_locale) || date_search(@file,@date_locale)
123
125
  end
124
126
  end
125
127
 
@@ -141,6 +143,8 @@ module Paperless
141
143
  reader.pages.each do |page|
142
144
  break if @date = date_search(page.text,@date_locale)
143
145
  end
146
+ # Check for the date in the file name if not found in the content
147
+ @date = date_search(@file,@date_locale) if @date.nil?
144
148
  break
145
149
  end
146
150
  end
@@ -151,9 +155,21 @@ module Paperless
151
155
  end
152
156
  end
153
157
 
154
- def ocr
158
+ def ocr(dump = false)
159
+ reader = PDF::Reader.new(@file)
160
+ if reader.pages.length > 0
161
+ text = reader.pages[0].text
162
+ if !text.nil? && text != ''
163
+ puts text if dump
164
+ puts "This doc already seems to be OCR'd. Not processing through #{@ocr_engine}"
165
+ return
166
+ end
167
+ end
168
+
155
169
  puts "Running OCR on file with #{@ocr_engine}"
156
170
  ocr_engine = case @ocr_engine
171
+ when /^#{PDFPENPRO6_ENGINE}$/i then PaperlessOCR::PDFpenPro6.new
172
+ when /^#{PDFPEN6_ENGINE}$/i then PaperlessOCR::PDFpen6.new
157
173
  when /^#{PDFPENPRO_ENGINE}$/i then PaperlessOCR::PDFpenPro.new
158
174
  when /^#{PDFPEN_ENGINE}$/i then PaperlessOCR::PDFpen.new
159
175
  when /^#{ACROBAT_ENGINE}$/i then PaperlessOCR::Acrobat.new
@@ -163,6 +179,16 @@ module Paperless
163
179
 
164
180
  if ocr_engine
165
181
  ocr_engine.ocr({:file => @file})
182
+
183
+ if dump
184
+ puts "Dumping Page Content..."
185
+ # Print the contents of the doc
186
+ reader = PDF::Reader.new(@file)
187
+ reader.pages.each do |page|
188
+ puts page.text
189
+ end
190
+ end
191
+
166
192
  else
167
193
  puts "WARNING: No valid OCR engine was defined."
168
194
  end
@@ -180,15 +206,16 @@ module Paperless
180
206
  if service
181
207
  self.print
182
208
 
183
- destination = @destination.nil? ? @default_destination : @destination
184
- # :created => @date
209
+ destination = @destination.nil? ? @default_destination : @destination
210
+ title = @title.nil? ? File.basename(@file, File.extname(@file)) : @title
211
+
185
212
  service.create({
186
213
  :delete => options[:delete],
187
214
  :destination => destination,
188
215
  :text_ext => @text_ext,
189
216
  :file => @file,
190
217
  :date => @date,
191
- :title => @title,
218
+ :title => title,
192
219
  :tags => @tags
193
220
  })
194
221
  else
@@ -198,7 +225,7 @@ module Paperless
198
225
 
199
226
  def print
200
227
  service = @service.nil? ? @default_service : @service
201
- title = @title.nil? ? File.basename(@file) : @title
228
+ title = @title.nil? ? File.basename(@file, File.extname(@file)) : @title
202
229
 
203
230
  destination = @destination.nil? ? @default_destination : @destination
204
231
  if destination == PaperlessService::Finder::NO_MOVE && service == PaperlessService::FINDER.downcase
@@ -23,6 +23,7 @@ module PaperlessOCR
23
23
  sleep 1
24
24
  end
25
25
  doc.close(:saving => :yes)
26
+ sleep 3
26
27
  rescue
27
28
  puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
28
29
  end
@@ -0,0 +1,33 @@
1
+ require 'appscript'
2
+ include Appscript
3
+
4
+ module PaperlessOCR
5
+
6
+ PDFPEN6 = 'PDFpen 6.app'
7
+
8
+ class PDFpen6
9
+ def initialize
10
+ @engine = PaperlessOCR::PDFPEN6
11
+ @app = app(@engine)
12
+ @app.activate
13
+ end
14
+
15
+ def ocr(options)
16
+ begin
17
+ doc = @app.open MacTypes::Alias.path(options[:file])
18
+ doc.ocr
19
+
20
+ app("System Events").processes['PDFpen 6'].visible.set(false)
21
+
22
+ while doc.performing_ocr.get
23
+ sleep 1
24
+ end
25
+ doc.close(:saving => :yes)
26
+ sleep 3
27
+ rescue
28
+ puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
29
+ end
30
+ end
31
+
32
+ end
33
+ end
@@ -23,6 +23,7 @@ module PaperlessOCR
23
23
  sleep 1
24
24
  end
25
25
  doc.close(:saving => :yes)
26
+ sleep 3
26
27
  rescue
27
28
  puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
28
29
  end
@@ -0,0 +1,33 @@
1
+ require 'appscript'
2
+ include Appscript
3
+
4
+ module PaperlessOCR
5
+
6
+ PDFPENPRO6 = 'PDFpenPro 6.app'
7
+
8
+ class PDFpenPro6
9
+ def initialize
10
+ @engine = PaperlessOCR::PDFPENPRO6
11
+ @app = app(@engine)
12
+ @app.activate
13
+ end
14
+
15
+ def ocr(options)
16
+ begin
17
+ doc = @app.open MacTypes::Alias.path(options[:file])
18
+ doc.ocr
19
+
20
+ app("System Events").processes['PDFpenPro 6'].visible.set(false)
21
+
22
+ while doc.performing_ocr.get
23
+ sleep 1
24
+ end
25
+ doc.close(:saving => :yes)
26
+ sleep 3
27
+ rescue
28
+ puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
29
+ end
30
+ end
31
+
32
+ end
33
+ end
@@ -14,6 +14,7 @@ module Paperless
14
14
  @description = options['description']
15
15
  @tags = options['tags'].nil? ? Array.new : options['tags'].split
16
16
  @date_stamp = DateTime.now
17
+ @filename = ''
17
18
  @date_default_format = '%Y-%m-%d'
18
19
  @matched = false
19
20
  end
@@ -28,6 +29,9 @@ module Paperless
28
29
  def match(file,text)
29
30
  return @matched if @matched
30
31
 
32
+ file_ext = File.extname(file)
33
+ @filename = File.basename(file, file_ext)
34
+
31
35
  if @condition == Paperless::DATE_VAR
32
36
  @date = date
33
37
  @matched = true
@@ -57,6 +61,7 @@ module Paperless
57
61
 
58
62
  def sub_var(attribute, value)
59
63
  unless attribute.nil?
64
+ attribute.gsub!(/#{Paperless::FILENAME_VAR}/, @filename)
60
65
  attribute.gsub!(/#{Paperless::MATCH_VAR}/, value)
61
66
  attribute.gsub!(/#{Paperless::DATE_VAR}/, @date_stamp.strftime(@date_default_format))
62
67
 
@@ -24,17 +24,28 @@ module PaperlessService
24
24
  text_ext = options[:text_ext]
25
25
 
26
26
  create_options = { :created => date }
27
- file_ext = File.extname(from_file.gsub(/\./,''))
27
+ file_ext = File.extname(from_file)
28
+ file_dir = File.dirname(from_file)
29
+ file_name = File.basename(from_file)
28
30
 
29
- if text_ext.index file_ext
31
+ if file_name != title
32
+ new_filename = File.join(file_dir, title + file_ext)
33
+ File.rename(from_file, new_filename)
34
+ from_file = new_filename
35
+ end
36
+
37
+ if text_ext.index file_ext.gsub!(/\./,'')
38
+ puts "Adding text note into Evernote"
30
39
  create_options[:with_text] = File.open(from_file, "rb") {|io| io.read}
31
40
  else
32
41
  if file_ext.match(/md$/i)
33
42
  # If this is a mardown file insert it into Evernote as html
43
+ puts "Converting Markdown to HTML"
34
44
  text = File.open(from_file, "rb") {|io| io.read}
35
45
  create_options[:with_html] = Markdown.new(text).to_html
36
46
  else
37
47
  # Create a note from a file and let Evernote choose how to attach the file
48
+ puts "Adding note into Evernote"
38
49
  create_options[:from_file] = MacTypes::FileURL.path(from_file)
39
50
  end
40
51
  end
@@ -18,28 +18,34 @@ module PaperlessService
18
18
  destination = options[:destination]
19
19
  date = options[:date]
20
20
  from_file = options[:file]
21
- title = options[:title]
21
+ title = options[:title] || File.basename(from_file, File.extname(from_file))
22
22
  tags = options[:tags].collect!{|x| x="'#{x}'"} # Add quotes around each tag in case there is a space
23
23
 
24
24
  if destination == NO_MOVE || destination == File.dirname(from_file)
25
25
  new_filename = File.join(File.dirname(from_file), title + File.extname(from_file))
26
+ puts "New filename (1): #{new_filename}"
26
27
  else
27
28
  FileUtils.mkdir_p destination unless File.exists?(destination)
28
29
  new_filename = File.join(destination, title + File.extname(from_file))
30
+ puts "New filename (2): #{new_filename}"
29
31
  end
30
32
 
31
- FileUtils.cp from_file, new_filename, :force => true
33
+ puts "Copying File..."
34
+ FileUtils.cp from_file, new_filename, :verbose => true
32
35
 
33
36
  time = Time.new(date.year, date.month, date.day)
37
+ puts "Modifying the time of the file to be #{time.to_s}"
34
38
  FileUtils.touch new_filename, {:mtime => time}
35
39
 
36
40
  if tags.length > 0
37
41
  # Add open meta tags to file
42
+ puts "Tagging file"
38
43
  system("#{OPENMETA} -p '#{new_filename}' -a #{tags.join(' ')}")
39
44
  end
40
45
 
41
46
  if options[:delete] && from_file != new_filename
42
- FileUtils.rm from_file, :force => true
47
+ puts "Removing original file"
48
+ FileUtils.rm from_file, :force => true, :verbose => true
43
49
  end
44
50
  end
45
51
 
@@ -1,3 +1,3 @@
1
1
  module Paperless
2
- VERSION = '0.1.0'
2
+ VERSION = '0.2.0'
3
3
  end
data/lib/paperless.rb CHANGED
@@ -6,6 +6,8 @@ require 'paperless/services/evernote.rb'
6
6
  require 'paperless/services/devonthinkpro.rb'
7
7
  require 'paperless/services/finder.rb'
8
8
  require 'paperless/ocr_engines/acrobat.rb'
9
+ require 'paperless/ocr_engines/pdfpen6.rb'
10
+ require 'paperless/ocr_engines/pdfpenpro6.rb'
9
11
  require 'paperless/ocr_engines/pdfpen.rb'
10
12
  require 'paperless/ocr_engines/pdfpenpro.rb'
11
13
  require 'paperless/ocr_engines/devonthinkpro.rb'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: paperless
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-07 00:00:00.000000000 Z
12
+ date: 2013-03-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -190,6 +190,8 @@ files:
190
190
  - lib/paperless/services/finder.rb
191
191
  - lib/paperless/services/devonthinkpro.rb
192
192
  - lib/paperless/ocr_engines/acrobat.rb
193
+ - lib/paperless/ocr_engines/pdfpen6.rb
194
+ - lib/paperless/ocr_engines/pdfpenpro6.rb
193
195
  - lib/paperless/ocr_engines/pdfpen.rb
194
196
  - lib/paperless/ocr_engines/pdfpenpro.rb
195
197
  - lib/paperless/ocr_engines/devonthinkpro.rb
@@ -222,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
222
224
  version: '0'
223
225
  segments:
224
226
  - 0
225
- hash: 763194951971081562
227
+ hash: -3000972400223417895
226
228
  requirements: []
227
229
  rubyforge_project:
228
230
  rubygems_version: 1.8.24