paperless 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/paperless +18 -5
- data/lib/paperless/date_search.rb +26 -14
- data/lib/paperless/engine.rb +34 -7
- data/lib/paperless/ocr_engines/pdfpen.rb +1 -0
- data/lib/paperless/ocr_engines/pdfpen6.rb +33 -0
- data/lib/paperless/ocr_engines/pdfpenpro.rb +1 -0
- data/lib/paperless/ocr_engines/pdfpenpro6.rb +33 -0
- data/lib/paperless/rule.rb +5 -0
- data/lib/paperless/services/evernote.rb +13 -2
- data/lib/paperless/services/finder.rb +9 -3
- data/lib/paperless/version.rb +1 -1
- data/lib/paperless.rb +2 -0
- metadata +5 -3
data/bin/paperless
CHANGED
@@ -131,6 +131,9 @@ desc 'Create a new note from a file'
|
|
131
131
|
arg_name 'file_name'
|
132
132
|
command :create do |c|
|
133
133
|
|
134
|
+
c.desc 'Dump the OCR text for the document to the terminal.'
|
135
|
+
c.switch :dump, :negatable => false, :default_value => false
|
136
|
+
|
134
137
|
c.desc 'Open a prompt to rename the file before its processed through the rules.'
|
135
138
|
c.switch :prompt, :negatable => false, :default_value => false
|
136
139
|
|
@@ -147,14 +150,15 @@ command :create do |c|
|
|
147
150
|
|
148
151
|
args.each do |file|
|
149
152
|
|
150
|
-
if File.exists?(File.expand_path file)
|
153
|
+
if File.exists?(File.expand_path file) && !File.directory?(File.expand_path file)
|
151
154
|
file = File.expand_path file
|
152
155
|
else
|
153
|
-
raise "File does not exist (#{file})"
|
156
|
+
raise "File does not exist or is a directory (#{file})"
|
154
157
|
end
|
155
158
|
|
156
|
-
|
157
|
-
|
159
|
+
old_filename = nil
|
160
|
+
|
161
|
+
if options[:prompt]
|
158
162
|
file_ext = File.extname(file)
|
159
163
|
filename = File.basename(file, file_ext)
|
160
164
|
user_input = `#{COCOADIALOG} standard-inputbox --title "Paperless Prompt" --informative-text "Rename your file before its processed with rules..." --text "#{filename}" --no-newline --string-output`.split("\n")
|
@@ -162,6 +166,10 @@ command :create do |c|
|
|
162
166
|
if user_input[0].match(/ok/i) && user_input[1] != File.basename(file, file_ext)
|
163
167
|
new_filename = File.join(File.dirname(file), user_input[1] + file_ext)
|
164
168
|
puts "Renaming file based on input to #{new_filename}"
|
169
|
+
if global_options[:simulate]
|
170
|
+
# save the file name to rename it back later
|
171
|
+
old_filename = file
|
172
|
+
end
|
165
173
|
File.rename(file, new_filename)
|
166
174
|
file = new_filename
|
167
175
|
end
|
@@ -182,7 +190,7 @@ command :create do |c|
|
|
182
190
|
file_ext = File.extname(file).gsub(/\./,'')
|
183
191
|
if file_ext == Paperless::PDF_EXT && options[:ocr]
|
184
192
|
puts "OCRing file..."
|
185
|
-
engine.ocr
|
193
|
+
engine.ocr(options[:dump])
|
186
194
|
end
|
187
195
|
|
188
196
|
if options[:proc_rules]
|
@@ -193,6 +201,11 @@ command :create do |c|
|
|
193
201
|
if global_options[:simulate]
|
194
202
|
puts "Simulating changes..."
|
195
203
|
engine.print
|
204
|
+
|
205
|
+
if global_options[:simulate] && options[:prompt] && !old_filename.nil?
|
206
|
+
puts "Renaming file back to #{old_filename}"
|
207
|
+
File.rename(file, old_filename)
|
208
|
+
end
|
196
209
|
else
|
197
210
|
puts "Saving #{file} to #{engine.service}"
|
198
211
|
engine.create({:delete => options[:delete]})
|
@@ -2,6 +2,7 @@ require 'date'
|
|
2
2
|
|
3
3
|
module DateSearch
|
4
4
|
|
5
|
+
SEP_NOSPACE = '\.\/\-\,'
|
5
6
|
SEP = '\. \/\-\,'
|
6
7
|
DAY = '(\d{1,2})'
|
7
8
|
MONTH = '([a-zA-Z]{3,15})'
|
@@ -49,7 +50,25 @@ module DateSearch
|
|
49
50
|
|
50
51
|
def date_search(text,date_locale)
|
51
52
|
date = nil
|
52
|
-
if match = text.match(/#{
|
53
|
+
if match = text.match(/#{DAY}[#{SEP_NOSPACE}]+#{DAY}[#{SEP_NOSPACE}]+#{YEAR}/)
|
54
|
+
# US: 12-29-2011
|
55
|
+
# Euro: 29-12-2011
|
56
|
+
year = valid_year(match[3])
|
57
|
+
day = date_locale == 'us' ? valid_day(match[2]) : valid_day(match[1])
|
58
|
+
month = date_locale == 'us' ? valid_month(match[1]) : valid_month(match[2])
|
59
|
+
|
60
|
+
if month && day && year
|
61
|
+
puts "Basing the date off the discovered string (3): #{match[0]}"
|
62
|
+
begin
|
63
|
+
date = DateTime.new(year,month,day)
|
64
|
+
rescue
|
65
|
+
puts "WARNING: Unable to create date object. #{$!}"
|
66
|
+
date = nil
|
67
|
+
end
|
68
|
+
else
|
69
|
+
puts "WARNING: The discovered date string does not validate: #{match[0]}"
|
70
|
+
end
|
71
|
+
elsif match = text.match(/#{MONTH}[#{SEP}]{0,3}#{DAY}[#{SEP}]{1,3}#{YEAR}/i)
|
53
72
|
# December 29, 2011
|
54
73
|
if valid_day(match[2]) && valid_year(match[3])
|
55
74
|
puts "Basing the date off the discovered string (1): #{match[0]}"
|
@@ -60,7 +79,7 @@ module DateSearch
|
|
60
79
|
date = nil
|
61
80
|
end
|
62
81
|
end
|
63
|
-
elsif match = text.match(/#{DAY}[#{SEP}]{0,3}#{MONTH}[#{SEP}]{0,3}#{YEAR}
|
82
|
+
elsif match = text.match(/#{DAY}[#{SEP}]{0,3}#{MONTH}[#{SEP}]{0,3}#{YEAR}/i)
|
64
83
|
# 29 December 2011
|
65
84
|
if valid_day(match[1]) && valid_year(match[3])
|
66
85
|
puts "Basing the date off the discovered string (2): #{match[0]}"
|
@@ -71,23 +90,16 @@ module DateSearch
|
|
71
90
|
date = nil
|
72
91
|
end
|
73
92
|
end
|
74
|
-
elsif match = text.match(/#{
|
75
|
-
#
|
76
|
-
|
77
|
-
|
78
|
-
day = date_locale == 'us' ? valid_day(match[2]) : valid_day(match[1])
|
79
|
-
month = date_locale == 'us' ? valid_month(match[1]) : valid_month(match[2])
|
80
|
-
|
81
|
-
if month && day && year
|
82
|
-
puts "Basing the date off the discovered string (3): #{match[0]}"
|
93
|
+
elsif match = text.match(/#{MONTH}[#{SEP}]{0,3}#{YEAR}/i)
|
94
|
+
# December 2011
|
95
|
+
if valid_year(match[2])
|
96
|
+
puts "Basing the date off the discovered string (2): #{match[0]}"
|
83
97
|
begin
|
84
|
-
date = DateTime.
|
98
|
+
date = DateTime.parse(repair_ocr_string(match[0]))
|
85
99
|
rescue
|
86
100
|
puts "WARNING: Unable to create date object. #{$!}"
|
87
101
|
date = nil
|
88
102
|
end
|
89
|
-
else
|
90
|
-
puts "WARNING: The discovered date string does not validate: #{match[0]}"
|
91
103
|
end
|
92
104
|
end
|
93
105
|
date
|
data/lib/paperless/engine.rb
CHANGED
@@ -8,6 +8,7 @@ module Paperless
|
|
8
8
|
PDF_EXT = 'pdf'
|
9
9
|
DATE_VAR = '<date>'
|
10
10
|
MATCH_VAR = '<match>'
|
11
|
+
FILENAME_VAR = '<filename>'
|
11
12
|
FILEDATE = 'filedate'
|
12
13
|
TODAY = 'today'
|
13
14
|
|
@@ -15,6 +16,7 @@ module Paperless
|
|
15
16
|
|
16
17
|
PDFPEN_ENGINE = 'pdfpen'
|
17
18
|
PDFPENPRO_ENGINE = 'pdfpenpro'
|
19
|
+
PDFPENPRO6_ENGINE = 'pdfpenpro6'
|
18
20
|
ACROBAT_ENGINE = 'acrobat'
|
19
21
|
DEVONTHINKPRO_ENGINE = 'devonthinkpro'
|
20
22
|
DEVONTHINKPRO_SERVICE = 'devonthinkpro'
|
@@ -25,7 +27,7 @@ module Paperless
|
|
25
27
|
|
26
28
|
def initialize(options)
|
27
29
|
@destination = nil
|
28
|
-
@service =
|
30
|
+
@service = options[:default_service]
|
29
31
|
@title = nil
|
30
32
|
@date = DateTime.now
|
31
33
|
@tags = Array.new()
|
@@ -119,7 +121,7 @@ module Paperless
|
|
119
121
|
# First check if there are actually any date rules
|
120
122
|
@rules.each do |rule|
|
121
123
|
if rule.condition == Paperless::DATE_VAR
|
122
|
-
@date = date_search(text,@date_locale)
|
124
|
+
@date = date_search(text,@date_locale) || date_search(@file,@date_locale)
|
123
125
|
end
|
124
126
|
end
|
125
127
|
|
@@ -141,6 +143,8 @@ module Paperless
|
|
141
143
|
reader.pages.each do |page|
|
142
144
|
break if @date = date_search(page.text,@date_locale)
|
143
145
|
end
|
146
|
+
# Check for the date in the file name if not found in the content
|
147
|
+
@date = date_search(@file,@date_locale) if @date.nil?
|
144
148
|
break
|
145
149
|
end
|
146
150
|
end
|
@@ -151,9 +155,21 @@ module Paperless
|
|
151
155
|
end
|
152
156
|
end
|
153
157
|
|
154
|
-
def ocr
|
158
|
+
def ocr(dump = false)
|
159
|
+
reader = PDF::Reader.new(@file)
|
160
|
+
if reader.pages.length > 0
|
161
|
+
text = reader.pages[0].text
|
162
|
+
if !text.nil? && text != ''
|
163
|
+
puts text if dump
|
164
|
+
puts "This doc already seems to be OCR'd. Not processing through #{@ocr_engine}"
|
165
|
+
return
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
155
169
|
puts "Running OCR on file with #{@ocr_engine}"
|
156
170
|
ocr_engine = case @ocr_engine
|
171
|
+
when /^#{PDFPENPRO6_ENGINE}$/i then PaperlessOCR::PDFpenPro6.new
|
172
|
+
when /^#{PDFPEN6_ENGINE}$/i then PaperlessOCR::PDFpen6.new
|
157
173
|
when /^#{PDFPENPRO_ENGINE}$/i then PaperlessOCR::PDFpenPro.new
|
158
174
|
when /^#{PDFPEN_ENGINE}$/i then PaperlessOCR::PDFpen.new
|
159
175
|
when /^#{ACROBAT_ENGINE}$/i then PaperlessOCR::Acrobat.new
|
@@ -163,6 +179,16 @@ module Paperless
|
|
163
179
|
|
164
180
|
if ocr_engine
|
165
181
|
ocr_engine.ocr({:file => @file})
|
182
|
+
|
183
|
+
if dump
|
184
|
+
puts "Dumping Page Content..."
|
185
|
+
# Print the contents of the doc
|
186
|
+
reader = PDF::Reader.new(@file)
|
187
|
+
reader.pages.each do |page|
|
188
|
+
puts page.text
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
166
192
|
else
|
167
193
|
puts "WARNING: No valid OCR engine was defined."
|
168
194
|
end
|
@@ -180,15 +206,16 @@ module Paperless
|
|
180
206
|
if service
|
181
207
|
self.print
|
182
208
|
|
183
|
-
destination = @destination.nil? ? @default_destination
|
184
|
-
|
209
|
+
destination = @destination.nil? ? @default_destination : @destination
|
210
|
+
title = @title.nil? ? File.basename(@file, File.extname(@file)) : @title
|
211
|
+
|
185
212
|
service.create({
|
186
213
|
:delete => options[:delete],
|
187
214
|
:destination => destination,
|
188
215
|
:text_ext => @text_ext,
|
189
216
|
:file => @file,
|
190
217
|
:date => @date,
|
191
|
-
:title =>
|
218
|
+
:title => title,
|
192
219
|
:tags => @tags
|
193
220
|
})
|
194
221
|
else
|
@@ -198,7 +225,7 @@ module Paperless
|
|
198
225
|
|
199
226
|
def print
|
200
227
|
service = @service.nil? ? @default_service : @service
|
201
|
-
title = @title.nil? ? File.basename(@file) : @title
|
228
|
+
title = @title.nil? ? File.basename(@file, File.extname(@file)) : @title
|
202
229
|
|
203
230
|
destination = @destination.nil? ? @default_destination : @destination
|
204
231
|
if destination == PaperlessService::Finder::NO_MOVE && service == PaperlessService::FINDER.downcase
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'appscript'
|
2
|
+
include Appscript
|
3
|
+
|
4
|
+
module PaperlessOCR
|
5
|
+
|
6
|
+
PDFPEN6 = 'PDFpen 6.app'
|
7
|
+
|
8
|
+
class PDFpen6
|
9
|
+
def initialize
|
10
|
+
@engine = PaperlessOCR::PDFPEN6
|
11
|
+
@app = app(@engine)
|
12
|
+
@app.activate
|
13
|
+
end
|
14
|
+
|
15
|
+
def ocr(options)
|
16
|
+
begin
|
17
|
+
doc = @app.open MacTypes::Alias.path(options[:file])
|
18
|
+
doc.ocr
|
19
|
+
|
20
|
+
app("System Events").processes['PDFpen 6'].visible.set(false)
|
21
|
+
|
22
|
+
while doc.performing_ocr.get
|
23
|
+
sleep 1
|
24
|
+
end
|
25
|
+
doc.close(:saving => :yes)
|
26
|
+
sleep 3
|
27
|
+
rescue
|
28
|
+
puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'appscript'
|
2
|
+
include Appscript
|
3
|
+
|
4
|
+
module PaperlessOCR
|
5
|
+
|
6
|
+
PDFPENPRO6 = 'PDFpenPro 6.app'
|
7
|
+
|
8
|
+
class PDFpenPro6
|
9
|
+
def initialize
|
10
|
+
@engine = PaperlessOCR::PDFPENPRO6
|
11
|
+
@app = app(@engine)
|
12
|
+
@app.activate
|
13
|
+
end
|
14
|
+
|
15
|
+
def ocr(options)
|
16
|
+
begin
|
17
|
+
doc = @app.open MacTypes::Alias.path(options[:file])
|
18
|
+
doc.ocr
|
19
|
+
|
20
|
+
app("System Events").processes['PDFpenPro 6'].visible.set(false)
|
21
|
+
|
22
|
+
while doc.performing_ocr.get
|
23
|
+
sleep 1
|
24
|
+
end
|
25
|
+
doc.close(:saving => :yes)
|
26
|
+
sleep 3
|
27
|
+
rescue
|
28
|
+
puts "WARNING: There was an error OCRing the document with #{@engine}: #{$!}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
data/lib/paperless/rule.rb
CHANGED
@@ -14,6 +14,7 @@ module Paperless
|
|
14
14
|
@description = options['description']
|
15
15
|
@tags = options['tags'].nil? ? Array.new : options['tags'].split
|
16
16
|
@date_stamp = DateTime.now
|
17
|
+
@filename = ''
|
17
18
|
@date_default_format = '%Y-%m-%d'
|
18
19
|
@matched = false
|
19
20
|
end
|
@@ -28,6 +29,9 @@ module Paperless
|
|
28
29
|
def match(file,text)
|
29
30
|
return @matched if @matched
|
30
31
|
|
32
|
+
file_ext = File.extname(file)
|
33
|
+
@filename = File.basename(file, file_ext)
|
34
|
+
|
31
35
|
if @condition == Paperless::DATE_VAR
|
32
36
|
@date = date
|
33
37
|
@matched = true
|
@@ -57,6 +61,7 @@ module Paperless
|
|
57
61
|
|
58
62
|
def sub_var(attribute, value)
|
59
63
|
unless attribute.nil?
|
64
|
+
attribute.gsub!(/#{Paperless::FILENAME_VAR}/, @filename)
|
60
65
|
attribute.gsub!(/#{Paperless::MATCH_VAR}/, value)
|
61
66
|
attribute.gsub!(/#{Paperless::DATE_VAR}/, @date_stamp.strftime(@date_default_format))
|
62
67
|
|
@@ -24,17 +24,28 @@ module PaperlessService
|
|
24
24
|
text_ext = options[:text_ext]
|
25
25
|
|
26
26
|
create_options = { :created => date }
|
27
|
-
file_ext = File.extname(from_file
|
27
|
+
file_ext = File.extname(from_file)
|
28
|
+
file_dir = File.dirname(from_file)
|
29
|
+
file_name = File.basename(from_file)
|
28
30
|
|
29
|
-
if
|
31
|
+
if file_name != title
|
32
|
+
new_filename = File.join(file_dir, title + file_ext)
|
33
|
+
File.rename(from_file, new_filename)
|
34
|
+
from_file = new_filename
|
35
|
+
end
|
36
|
+
|
37
|
+
if text_ext.index file_ext.gsub!(/\./,'')
|
38
|
+
puts "Adding text note into Evernote"
|
30
39
|
create_options[:with_text] = File.open(from_file, "rb") {|io| io.read}
|
31
40
|
else
|
32
41
|
if file_ext.match(/md$/i)
|
33
42
|
# If this is a mardown file insert it into Evernote as html
|
43
|
+
puts "Converting Markdown to HTML"
|
34
44
|
text = File.open(from_file, "rb") {|io| io.read}
|
35
45
|
create_options[:with_html] = Markdown.new(text).to_html
|
36
46
|
else
|
37
47
|
# Create a note from a file and let Evernote choose how to attach the file
|
48
|
+
puts "Adding note into Evernote"
|
38
49
|
create_options[:from_file] = MacTypes::FileURL.path(from_file)
|
39
50
|
end
|
40
51
|
end
|
@@ -18,28 +18,34 @@ module PaperlessService
|
|
18
18
|
destination = options[:destination]
|
19
19
|
date = options[:date]
|
20
20
|
from_file = options[:file]
|
21
|
-
title = options[:title]
|
21
|
+
title = options[:title] || File.basename(from_file, File.extname(from_file))
|
22
22
|
tags = options[:tags].collect!{|x| x="'#{x}'"} # Add quotes around each tag in case there is a space
|
23
23
|
|
24
24
|
if destination == NO_MOVE || destination == File.dirname(from_file)
|
25
25
|
new_filename = File.join(File.dirname(from_file), title + File.extname(from_file))
|
26
|
+
puts "New filename (1): #{new_filename}"
|
26
27
|
else
|
27
28
|
FileUtils.mkdir_p destination unless File.exists?(destination)
|
28
29
|
new_filename = File.join(destination, title + File.extname(from_file))
|
30
|
+
puts "New filename (2): #{new_filename}"
|
29
31
|
end
|
30
32
|
|
31
|
-
|
33
|
+
puts "Copying File..."
|
34
|
+
FileUtils.cp from_file, new_filename, :verbose => true
|
32
35
|
|
33
36
|
time = Time.new(date.year, date.month, date.day)
|
37
|
+
puts "Modifying the time of the file to be #{time.to_s}"
|
34
38
|
FileUtils.touch new_filename, {:mtime => time}
|
35
39
|
|
36
40
|
if tags.length > 0
|
37
41
|
# Add open meta tags to file
|
42
|
+
puts "Tagging file"
|
38
43
|
system("#{OPENMETA} -p '#{new_filename}' -a #{tags.join(' ')}")
|
39
44
|
end
|
40
45
|
|
41
46
|
if options[:delete] && from_file != new_filename
|
42
|
-
|
47
|
+
puts "Removing original file"
|
48
|
+
FileUtils.rm from_file, :force => true, :verbose => true
|
43
49
|
end
|
44
50
|
end
|
45
51
|
|
data/lib/paperless/version.rb
CHANGED
data/lib/paperless.rb
CHANGED
@@ -6,6 +6,8 @@ require 'paperless/services/evernote.rb'
|
|
6
6
|
require 'paperless/services/devonthinkpro.rb'
|
7
7
|
require 'paperless/services/finder.rb'
|
8
8
|
require 'paperless/ocr_engines/acrobat.rb'
|
9
|
+
require 'paperless/ocr_engines/pdfpen6.rb'
|
10
|
+
require 'paperless/ocr_engines/pdfpenpro6.rb'
|
9
11
|
require 'paperless/ocr_engines/pdfpen.rb'
|
10
12
|
require 'paperless/ocr_engines/pdfpenpro.rb'
|
11
13
|
require 'paperless/ocr_engines/devonthinkpro.rb'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: paperless
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-03-
|
12
|
+
date: 2013-03-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
@@ -190,6 +190,8 @@ files:
|
|
190
190
|
- lib/paperless/services/finder.rb
|
191
191
|
- lib/paperless/services/devonthinkpro.rb
|
192
192
|
- lib/paperless/ocr_engines/acrobat.rb
|
193
|
+
- lib/paperless/ocr_engines/pdfpen6.rb
|
194
|
+
- lib/paperless/ocr_engines/pdfpenpro6.rb
|
193
195
|
- lib/paperless/ocr_engines/pdfpen.rb
|
194
196
|
- lib/paperless/ocr_engines/pdfpenpro.rb
|
195
197
|
- lib/paperless/ocr_engines/devonthinkpro.rb
|
@@ -222,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
222
224
|
version: '0'
|
223
225
|
segments:
|
224
226
|
- 0
|
225
|
-
hash:
|
227
|
+
hash: -3000972400223417895
|
226
228
|
requirements: []
|
227
229
|
rubyforge_project:
|
228
230
|
rubygems_version: 1.8.24
|