paperless 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/paperless CHANGED
@@ -131,6 +131,9 @@ desc 'Create a new note from a file'
131
131
  arg_name 'file_name'
132
132
  command :create do |c|
133
133
 
134
+ c.desc 'Dump the OCR text for the document to the terminal.'
135
+ c.switch :dump, :negatable => false, :default_value => false
136
+
134
137
  c.desc 'Open a prompt to rename the file before its processed through the rules.'
135
138
  c.switch :prompt, :negatable => false, :default_value => false
136
139
 
@@ -147,14 +150,15 @@ command :create do |c|
147
150
 
148
151
  args.each do |file|
149
152
 
150
- if File.exists?(File.expand_path file)
153
+ if File.exists?(File.expand_path file) && !File.directory?(File.expand_path file)
151
154
  file = File.expand_path file
152
155
  else
153
- raise "File does not exist (#{file})"
156
+ raise "File does not exist or is a directory (#{file})"
154
157
  end
155
158
 
156
- if options[:prompt] && !global_options[:simulate]
157
- # Cannot rename the file in simulate mode since we dont want to change the file name
159
+ old_filename = nil
160
+
161
+ if options[:prompt]
158
162
  file_ext = File.extname(file)
159
163
  filename = File.basename(file, file_ext)
160
164
  user_input = `#{COCOADIALOG} standard-inputbox --title "Paperless Prompt" --informative-text "Rename your file before its processed with rules..." --text "#{filename}" --no-newline --string-output`.split("\n")
@@ -162,6 +166,10 @@ command :create do |c|
162
166
  if user_input[0].match(/ok/i) && user_input[1] != File.basename(file, file_ext)
163
167
  new_filename = File.join(File.dirname(file), user_input[1] + file_ext)
164
168
  puts "Renaming file based on input to #{new_filename}"
169
+ if global_options[:simulate]
170
+ # save the file name to rename it back later
171
+ old_filename = file
172
+ end
165
173
  File.rename(file, new_filename)
166
174
  file = new_filename
167
175
  end
@@ -182,7 +190,7 @@ command :create do |c|
182
190
  file_ext = File.extname(file).gsub(/\./,'')
183
191
  if file_ext == Paperless::PDF_EXT && options[:ocr]
184
192
  puts "OCRing file..."
185
- engine.ocr
193
+ engine.ocr(options[:dump])
186
194
  end
187
195
 
188
196
  if options[:proc_rules]
@@ -193,6 +201,11 @@ command :create do |c|
193
201
  if global_options[:simulate]
194
202
  puts "Simulating changes..."
195
203
  engine.print
204
+
205
+ if global_options[:simulate] && options[:prompt] && !old_filename.nil?
206
+ puts "Renaming file back to #{old_filename}"
207
+ File.rename(file, old_filename)
208
+ end
196
209
  else
197
210
  puts "Saving #{file} to #{engine.service}"
198
211
  engine.create({:delete => options[:delete]})
@@ -2,6 +2,7 @@ require 'date'
2
2
 
3
3
  module DateSearch
4
4
 
5
+ SEP_NOSPACE = '\.\/\-\,'
5
6
  SEP = '\. \/\-\,'
6
7
  DAY = '(\d{1,2})'
7
8
  MONTH = '([a-zA-Z]{3,15})'
@@ -49,7 +50,25 @@ module DateSearch
49
50
 
50
51
  def date_search(text,date_locale)
51
52
  date = nil
52
- if match = text.match(/#{MONTH}[#{SEP}]{0,3}#{DAY}[#{SEP}]{1,3}#{YEAR}#{END_DATE}/i)
53
+ if match = text.match(/#{DAY}[#{SEP_NOSPACE}]+#{DAY}[#{SEP_NOSPACE}]+#{YEAR}/)
54
+ # US: 12-29-2011
55
+ # Euro: 29-12-2011
56
+ year = valid_year(match[3])
57
+ day = date_locale == 'us' ? valid_day(match[2]) : valid_day(match[1])
58
+ month = date_locale == 'us' ? valid_month(match[1]) : valid_month(match[2])
59
+
60
+ if month && day && year
61
+ puts "Basing the date off the discovered string (3): #{match[0]}"
62
+ begin
63
+ date = DateTime.new(year,month,day)
64
+ rescue
65
+ puts "WARNING: Unable to create date object. #{$!}"
66
+ date = nil
67
+ end
68
+ else
69
+ puts "WARNING: The discovered date string does not validate: #{match[0]}"
70
+ end
71
+ elsif match = text.match(/#{MONTH}[#{SEP}]{0,3}#{DAY}[#{SEP}]{1,3}#{YEAR}/i)
53
72
  # December 29, 2011
54
73
  if valid_day(match[2]) && valid_year(match[3])
55
74
  puts "Basing the date off the discovered string (1): #{match[0]}"
@@ -60,7 +79,7 @@ module DateSearch
60
79
  date = nil
61
80
  end
62
81
  end
63
- elsif match = text.match(/#{DAY}[#{SEP}]{0,3}#{MONTH}[#{SEP}]{0,3}#{YEAR}#{END_DATE}/i)
82
+ elsif match = text.match(/#{DAY}[#{SEP}]{0,3}#{MONTH}[#{SEP}]{0,3}#{YEAR}/i)
64
83
  # 29 December 2011
65
84
  if valid_day(match[1]) && valid_year(match[3])
66
85
  puts "Basing the date off the discovered string (2): #{match[0]}"
@@ -71,23 +90,16 @@ module DateSearch
71
90
  date = nil
72
91
  end
73
92
  end
74
- elsif match = text.match(/#{DAY}[#{SEP}]+#{DAY}[#{SEP}]+#{YEAR}#{END_DATE}/)
75
- # US: 12-29-2011
76
- # Euro: 29-12-2011
77
- year = valid_year(match[3])
78
- day = date_locale == 'us' ? valid_day(match[2]) : valid_day(match[1])
79
- month = date_locale == 'us' ? valid_month(match[1]) : valid_month(match[2])
80
-
81
- if month && day && year
82
- puts "Basing the date off the discovered string (3): #{match[0]}"
93
+ elsif match = text.match(/#{MONTH}[#{SEP}]{0,3}#{YEAR}/i)
94
+ # December 2011
95
+ if valid_year(match[2])
96
+ puts "Basing the date off the discovered string (2): #{match[0]}"
83
97
  begin
84
- date = DateTime.new(year,month,day)
98
+ date = DateTime.parse(repair_ocr_string(match[0]))
85
99
  rescue
86
100
  puts "WARNING: Unable to create date object. #{$!}"
87
101
  date = nil
88
102
  end
89
- else
90
- puts "WARNING: The discovered date string does not validate: #{match[0]}"
91
103
  end
92
104
  end
93
105
  date
@@ -8,6 +8,7 @@ module Paperless
8
8
  PDF_EXT = 'pdf'
9
9
  DATE_VAR = '<date>'
10
10
  MATCH_VAR = '<match>'
11
+ FILENAME_VAR = '<filename>'
11
12
  FILEDATE = 'filedate'
12
13
  TODAY = 'today'
13
14
 
@@ -15,6 +16,7 @@ module Paperless
15
16
 
16
17
  PDFPEN_ENGINE = 'pdfpen'
17
18
  PDFPENPRO_ENGINE = 'pdfpenpro'
19
+ PDFPENPRO6_ENGINE = 'pdfpenpro6'
18
20
  ACROBAT_ENGINE = 'acrobat'
19
21
  DEVONTHINKPRO_ENGINE = 'devonthinkpro'
20
22
  DEVONTHINKPRO_SERVICE = 'devonthinkpro'
@@ -25,7 +27,7 @@ module Paperless
25
27
 
26
28
  def initialize(options)
27
29
  @destination = nil
28
- @service = nil
30
+ @service = options[:default_service]
29
31
  @title = nil
30
32
  @date = DateTime.now
31
33
  @tags = Array.new()
@@ -119,7 +121,7 @@ module Paperless
119
121
  # First check if there are actually any date rules
120
122
  @rules.each do |rule|
121
123
  if rule.condition == Paperless::DATE_VAR
122
- @date = date_search(text,@date_locale)
124
+ @date = date_search(text,@date_locale) || date_search(@file,@date_locale)
123
125
  end
124
126
  end
125
127
 
@@ -141,6 +143,8 @@ module Paperless
141
143
  reader.pages.each do |page|
142
144
  break if @date = date_search(page.text,@date_locale)
143
145
  end
146
+ # Check for the date in the file name if not found in the content
147
+ @date = date_search(@file,@date_locale) if @date.nil?
144
148
  break
145
149
  end
146
150
  end
@@ -151,9 +155,21 @@ module Paperless
151
155
  end
152
156
  end
153
157
 
154
- def ocr
158
+ def ocr(dump = false)
159
+ reader = PDF::Reader.new(@file)
160
+ if reader.pages.length > 0
161
+ text = reader.pages[0].text
162
+ if !text.nil? && text != ''
163
+ puts text if dump
164
+ puts "This doc already seems to be OCR'd. Not processing through #{@ocr_engine}"
165
+ return
166
+ end
167
+ end
168
+
155
169
  puts "Running OCR on file with #{@ocr_engine}"
156
170
  ocr_engine = case @ocr_engine
171
+ when /^#{PDFPENPRO6_ENGINE}$/i then PaperlessOCR::PDFpenPro6.new
172
+ when /^#{PDFPEN6_ENGINE}$/i then PaperlessOCR::PDFpen6.new
157
173
  when /^#{PDFPENPRO_ENGINE}$/i then PaperlessOCR::PDFpenPro.new
158
174
  when /^#{PDFPEN_ENGINE}$/i then PaperlessOCR::PDFpen.new
159
175
  when /^#{ACROBAT_ENGINE}$/i then PaperlessOCR::Acrobat.new
@@ -163,6 +179,16 @@ module Paperless
163
179
 
164
180
  if ocr_engine
165
181
  ocr_engine.ocr({:file => @file})
182
+
183
+ if dump
184
+ puts "Dumping Page Content..."
185
+ # Print the contents of the doc
186
+ reader = PDF::Reader.new(@file)
187
+ reader.pages.each do |page|
188
+ puts page.text
189
+ end
190
+ end
191
+
166
192
  else
167
193
  puts "WARNING: No valid OCR engine was defined."
168
194
  end
@@ -180,15 +206,16 @@ module Paperless
180
206
  if service
181
207
  self.print
182
208
 
183
- destination = @destination.nil? ? @default_destination : @destination
184
- # :created => @date
209
+ destination = @destination.nil? ? @default_destination : @destination
210
+ title = @title.nil? ? File.basename(@file, File.extname(@file)) : @title
211
+
185
212
  service.create({
186
213
  :delete => options[:delete],
187
214
  :destination => destination,
188
215
  :text_ext => @text_ext,
189
216
  :file => @file,
190
217
  :date => @date,
191
- :title => @title,
218
+ :title => title,
192
219
  :tags => @tags
193
220
  })
194
221
  else
@@ -198,7 +225,7 @@ module Paperless
198
225
 
199
226
  def print
200
227
  service = @service.nil? ? @default_service : @service
201
- title = @title.nil? ? File.basename(@file) : @title
228
+ title = @title.nil? ? File.basename(@file, File.extname(@file)) : @title
202
229
 
203
230
  destination = @destination.nil? ? @default_destination : @destination
204
231
  if destination == PaperlessService::Finder::NO_MOVE && service == PaperlessService::FINDER.downcase
@@ -23,6 +23,7 @@ module PaperlessOCR
23
23
  sleep 1
24
24
  end
25
25
  doc.close(:saving => :yes)
26
+ sleep 3
26
27
  rescue
27
28
  puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
28
29
  end
@@ -0,0 +1,33 @@
1
+ require 'appscript'
2
+ include Appscript
3
+
4
+ module PaperlessOCR
5
+
6
+ PDFPEN6 = 'PDFpen 6.app'
7
+
8
+ class PDFpen6
9
+ def initialize
10
+ @engine = PaperlessOCR::PDFPEN6
11
+ @app = app(@engine)
12
+ @app.activate
13
+ end
14
+
15
+ def ocr(options)
16
+ begin
17
+ doc = @app.open MacTypes::Alias.path(options[:file])
18
+ doc.ocr
19
+
20
+ app("System Events").processes['PDFpen 6'].visible.set(false)
21
+
22
+ while doc.performing_ocr.get
23
+ sleep 1
24
+ end
25
+ doc.close(:saving => :yes)
26
+ sleep 3
27
+ rescue
28
+ puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
29
+ end
30
+ end
31
+
32
+ end
33
+ end
@@ -23,6 +23,7 @@ module PaperlessOCR
23
23
  sleep 1
24
24
  end
25
25
  doc.close(:saving => :yes)
26
+ sleep 3
26
27
  rescue
27
28
  puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
28
29
  end
@@ -0,0 +1,33 @@
1
+ require 'appscript'
2
+ include Appscript
3
+
4
+ module PaperlessOCR
5
+
6
+ PDFPENPRO6 = 'PDFpenPro 6.app'
7
+
8
+ class PDFpenPro6
9
+ def initialize
10
+ @engine = PaperlessOCR::PDFPENPRO6
11
+ @app = app(@engine)
12
+ @app.activate
13
+ end
14
+
15
+ def ocr(options)
16
+ begin
17
+ doc = @app.open MacTypes::Alias.path(options[:file])
18
+ doc.ocr
19
+
20
+ app("System Events").processes['PDFpenPro 6'].visible.set(false)
21
+
22
+ while doc.performing_ocr.get
23
+ sleep 1
24
+ end
25
+ doc.close(:saving => :yes)
26
+ sleep 3
27
+ rescue
28
+ puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
29
+ end
30
+ end
31
+
32
+ end
33
+ end
@@ -14,6 +14,7 @@ module Paperless
14
14
  @description = options['description']
15
15
  @tags = options['tags'].nil? ? Array.new : options['tags'].split
16
16
  @date_stamp = DateTime.now
17
+ @filename = ''
17
18
  @date_default_format = '%Y-%m-%d'
18
19
  @matched = false
19
20
  end
@@ -28,6 +29,9 @@ module Paperless
28
29
  def match(file,text)
29
30
  return @matched if @matched
30
31
 
32
+ file_ext = File.extname(file)
33
+ @filename = File.basename(file, file_ext)
34
+
31
35
  if @condition == Paperless::DATE_VAR
32
36
  @date = date
33
37
  @matched = true
@@ -57,6 +61,7 @@ module Paperless
57
61
 
58
62
  def sub_var(attribute, value)
59
63
  unless attribute.nil?
64
+ attribute.gsub!(/#{Paperless::FILENAME_VAR}/, @filename)
60
65
  attribute.gsub!(/#{Paperless::MATCH_VAR}/, value)
61
66
  attribute.gsub!(/#{Paperless::DATE_VAR}/, @date_stamp.strftime(@date_default_format))
62
67
 
@@ -24,17 +24,28 @@ module PaperlessService
24
24
  text_ext = options[:text_ext]
25
25
 
26
26
  create_options = { :created => date }
27
- file_ext = File.extname(from_file.gsub(/\./,''))
27
+ file_ext = File.extname(from_file)
28
+ file_dir = File.dirname(from_file)
29
+ file_name = File.basename(from_file)
28
30
 
29
- if text_ext.index file_ext
31
+ if file_name != title
32
+ new_filename = File.join(file_dir, title + file_ext)
33
+ File.rename(from_file, new_filename)
34
+ from_file = new_filename
35
+ end
36
+
37
+ if text_ext.index file_ext.gsub!(/\./,'')
38
+ puts "Adding text note into Evernote"
30
39
  create_options[:with_text] = File.open(from_file, "rb") {|io| io.read}
31
40
  else
32
41
  if file_ext.match(/md$/i)
33
42
  # If this is a mardown file insert it into Evernote as html
43
+ puts "Converting Markdown to HTML"
34
44
  text = File.open(from_file, "rb") {|io| io.read}
35
45
  create_options[:with_html] = Markdown.new(text).to_html
36
46
  else
37
47
  # Create a note from a file and let Evernote choose how to attach the file
48
+ puts "Adding note into Evernote"
38
49
  create_options[:from_file] = MacTypes::FileURL.path(from_file)
39
50
  end
40
51
  end
@@ -18,28 +18,34 @@ module PaperlessService
18
18
  destination = options[:destination]
19
19
  date = options[:date]
20
20
  from_file = options[:file]
21
- title = options[:title]
21
+ title = options[:title] || File.basename(from_file, File.extname(from_file))
22
22
  tags = options[:tags].collect!{|x| x="'#{x}'"} # Add quotes around each tag in case there is a space
23
23
 
24
24
  if destination == NO_MOVE || destination == File.dirname(from_file)
25
25
  new_filename = File.join(File.dirname(from_file), title + File.extname(from_file))
26
+ puts "New filename (1): #{new_filename}"
26
27
  else
27
28
  FileUtils.mkdir_p destination unless File.exists?(destination)
28
29
  new_filename = File.join(destination, title + File.extname(from_file))
30
+ puts "New filename (2): #{new_filename}"
29
31
  end
30
32
 
31
- FileUtils.cp from_file, new_filename, :force => true
33
+ puts "Copying File..."
34
+ FileUtils.cp from_file, new_filename, :verbose => true
32
35
 
33
36
  time = Time.new(date.year, date.month, date.day)
37
+ puts "Modifying the time of the file to be #{time.to_s}"
34
38
  FileUtils.touch new_filename, {:mtime => time}
35
39
 
36
40
  if tags.length > 0
37
41
  # Add open meta tags to file
42
+ puts "Tagging file"
38
43
  system("#{OPENMETA} -p '#{new_filename}' -a #{tags.join(' ')}")
39
44
  end
40
45
 
41
46
  if options[:delete] && from_file != new_filename
42
- FileUtils.rm from_file, :force => true
47
+ puts "Removing original file"
48
+ FileUtils.rm from_file, :force => true, :verbose => true
43
49
  end
44
50
  end
45
51
 
@@ -1,3 +1,3 @@
1
1
  module Paperless
2
- VERSION = '0.1.0'
2
+ VERSION = '0.2.0'
3
3
  end
data/lib/paperless.rb CHANGED
@@ -6,6 +6,8 @@ require 'paperless/services/evernote.rb'
6
6
  require 'paperless/services/devonthinkpro.rb'
7
7
  require 'paperless/services/finder.rb'
8
8
  require 'paperless/ocr_engines/acrobat.rb'
9
+ require 'paperless/ocr_engines/pdfpen6.rb'
10
+ require 'paperless/ocr_engines/pdfpenpro6.rb'
9
11
  require 'paperless/ocr_engines/pdfpen.rb'
10
12
  require 'paperless/ocr_engines/pdfpenpro.rb'
11
13
  require 'paperless/ocr_engines/devonthinkpro.rb'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: paperless
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-03-07 00:00:00.000000000 Z
12
+ date: 2013-03-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
@@ -190,6 +190,8 @@ files:
190
190
  - lib/paperless/services/finder.rb
191
191
  - lib/paperless/services/devonthinkpro.rb
192
192
  - lib/paperless/ocr_engines/acrobat.rb
193
+ - lib/paperless/ocr_engines/pdfpen6.rb
194
+ - lib/paperless/ocr_engines/pdfpenpro6.rb
193
195
  - lib/paperless/ocr_engines/pdfpen.rb
194
196
  - lib/paperless/ocr_engines/pdfpenpro.rb
195
197
  - lib/paperless/ocr_engines/devonthinkpro.rb
@@ -222,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
222
224
  version: '0'
223
225
  segments:
224
226
  - 0
225
- hash: 763194951971081562
227
+ hash: -3000972400223417895
226
228
  requirements: []
227
229
  rubyforge_project:
228
230
  rubygems_version: 1.8.24