paperless 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/README.rdoc +30 -0
  2. data/bin/CocoaDialog.app/Contents/Info.plist +28 -0
  3. data/bin/CocoaDialog.app/Contents/MacOS/CocoaDialog +0 -0
  4. data/bin/CocoaDialog.app/Contents/PkgInfo +1 -0
  5. data/bin/CocoaDialog.app/Contents/Resources/COPYING +281 -0
  6. data/bin/CocoaDialog.app/Contents/Resources/Changelog +73 -0
  7. data/bin/CocoaDialog.app/Contents/Resources/Info.plist +28 -0
  8. data/bin/CocoaDialog.app/Contents/Resources/InfoPlist.strings +0 -0
  9. data/bin/CocoaDialog.app/Contents/Resources/Inputbox.nib/classes.nib +51 -0
  10. data/bin/CocoaDialog.app/Contents/Resources/Inputbox.nib/info.nib +16 -0
  11. data/bin/CocoaDialog.app/Contents/Resources/Inputbox.nib/keyedobjects.nib +0 -0
  12. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/classes.nib +7 -0
  13. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/info.nib +21 -0
  14. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/info.nib.orig +21 -0
  15. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/objects.nib +0 -0
  16. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/objects.nib.orig +0 -0
  17. data/bin/CocoaDialog.app/Contents/Resources/Msgbox.nib/classes.nib +27 -0
  18. data/bin/CocoaDialog.app/Contents/Resources/Msgbox.nib/info.nib +16 -0
  19. data/bin/CocoaDialog.app/Contents/Resources/Msgbox.nib/keyedobjects.nib +0 -0
  20. data/bin/CocoaDialog.app/Contents/Resources/PopUpButton.nib/classes.nib +55 -0
  21. data/bin/CocoaDialog.app/Contents/Resources/PopUpButton.nib/info.nib +16 -0
  22. data/bin/CocoaDialog.app/Contents/Resources/PopUpButton.nib/keyedobjects.nib +0 -0
  23. data/bin/CocoaDialog.app/Contents/Resources/Progressbar.nib/classes.nib +13 -0
  24. data/bin/CocoaDialog.app/Contents/Resources/Progressbar.nib/info.nib +16 -0
  25. data/bin/CocoaDialog.app/Contents/Resources/Progressbar.nib/keyedobjects.nib +0 -0
  26. data/bin/CocoaDialog.app/Contents/Resources/SecureInputbox.nib/classes.nib +40 -0
  27. data/bin/CocoaDialog.app/Contents/Resources/SecureInputbox.nib/info.nib +16 -0
  28. data/bin/CocoaDialog.app/Contents/Resources/SecureInputbox.nib/keyedobjects.nib +0 -0
  29. data/bin/CocoaDialog.app/Contents/Resources/Textbox.nib/classes.nib +26 -0
  30. data/bin/CocoaDialog.app/Contents/Resources/Textbox.nib/info.nib +16 -0
  31. data/bin/CocoaDialog.app/Contents/Resources/Textbox.nib/keyedobjects.nib +0 -0
  32. data/bin/CocoaDialog.app/Contents/Resources/atom.icns +0 -0
  33. data/bin/CocoaDialog.app/Contents/Resources/cocoadialog.icns +0 -0
  34. data/bin/CocoaDialog.app/Contents/Resources/computer.icns +0 -0
  35. data/bin/CocoaDialog.app/Contents/Resources/document.icns +0 -0
  36. data/bin/CocoaDialog.app/Contents/Resources/find.icns +0 -0
  37. data/bin/CocoaDialog.app/Contents/Resources/finder.icns +0 -0
  38. data/bin/CocoaDialog.app/Contents/Resources/firewire.icns +0 -0
  39. data/bin/CocoaDialog.app/Contents/Resources/folder.icns +0 -0
  40. data/bin/CocoaDialog.app/Contents/Resources/gear.icns +0 -0
  41. data/bin/CocoaDialog.app/Contents/Resources/globe.icns +0 -0
  42. data/bin/CocoaDialog.app/Contents/Resources/hazard.icns +0 -0
  43. data/bin/CocoaDialog.app/Contents/Resources/heart.icns +0 -0
  44. data/bin/CocoaDialog.app/Contents/Resources/hourglass.icns +0 -0
  45. data/bin/CocoaDialog.app/Contents/Resources/info.icns +0 -0
  46. data/bin/CocoaDialog.app/Contents/Resources/ipod.icns +0 -0
  47. data/bin/CocoaDialog.app/Contents/Resources/person.icns +0 -0
  48. data/bin/CocoaDialog.app/Contents/Resources/sound.icns +0 -0
  49. data/bin/CocoaDialog.app/Contents/Resources/x.icns +0 -0
  50. data/bin/openmeta +0 -0
  51. data/bin/paperless +234 -0
  52. data/lib/paperless/date_search.rb +96 -0
  53. data/lib/paperless/engine.rb +220 -0
  54. data/lib/paperless/ocr_engines/acrobat.rb +51 -0
  55. data/lib/paperless/ocr_engines/devonthinkpro.rb +26 -0
  56. data/lib/paperless/ocr_engines/pdfpen.rb +32 -0
  57. data/lib/paperless/ocr_engines/pdfpenpro.rb +32 -0
  58. data/lib/paperless/rule.rb +82 -0
  59. data/lib/paperless/services/devonthinkpro.rb +54 -0
  60. data/lib/paperless/services/evernote.rb +55 -0
  61. data/lib/paperless/services/finder.rb +47 -0
  62. data/lib/paperless/version.rb +3 -0
  63. data/lib/paperless.rb +14 -0
  64. data/paperless.rdoc +5 -0
  65. metadata +233 -0
@@ -0,0 +1,55 @@
1
+ {
2
+ IBClasses = (
3
+ {CLASS = CDControl; LANGUAGE = ObjC; SUPERCLASS = NSObject; },
4
+ {
5
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; };
6
+ CLASS = CDInputboxControl;
7
+ LANGUAGE = ObjC;
8
+ OUTLETS = {
9
+ button1 = NSButton;
10
+ button2 = NSButton;
11
+ button3 = NSButton;
12
+ label = NSTextField;
13
+ panel = NSPanel;
14
+ textField = NSTextField;
15
+ };
16
+ SUPERCLASS = CDControl;
17
+ },
18
+ {
19
+ ACTIONS = {selectionChanged = id; };
20
+ CLASS = CDPopUpButtonControl;
21
+ LANGUAGE = ObjC;
22
+ OUTLETS = {popup = NSPopUpButton; };
23
+ SUPERCLASS = CDThreeButtonControl;
24
+ },
25
+ {
26
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; };
27
+ CLASS = CDTextboxControl;
28
+ LANGUAGE = ObjC;
29
+ OUTLETS = {
30
+ button1 = NSButton;
31
+ button2 = NSButton;
32
+ button3 = NSButton;
33
+ label = NSTextField;
34
+ panel = NSPanel;
35
+ textView = NSTextView;
36
+ };
37
+ SUPERCLASS = CDControl;
38
+ },
39
+ {
40
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; timeout = id; };
41
+ CLASS = CDThreeButtonControl;
42
+ LANGUAGE = ObjC;
43
+ OUTLETS = {
44
+ button1 = NSButton;
45
+ button2 = NSButton;
46
+ button3 = NSButton;
47
+ expandingLabel = NSTextField;
48
+ panel = NSPanel;
49
+ };
50
+ SUPERCLASS = CDControl;
51
+ },
52
+ {CLASS = FirstResponder; LANGUAGE = ObjC; SUPERCLASS = NSObject; }
53
+ );
54
+ IBVersion = 1;
55
+ }
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>IBDocumentLocation</key>
6
+ <string>120 158 356 240 0 0 1440 878 </string>
7
+ <key>IBFramework Version</key>
8
+ <string>443.0</string>
9
+ <key>IBOpenObjects</key>
10
+ <array>
11
+ <integer>5</integer>
12
+ </array>
13
+ <key>IBSystem Version</key>
14
+ <string>8H14</string>
15
+ </dict>
16
+ </plist>
@@ -0,0 +1,13 @@
1
+ {
2
+ IBClasses = (
3
+ {CLASS = CDControl; LANGUAGE = ObjC; SUPERCLASS = NSObject; },
4
+ {
5
+ CLASS = CDProgressbarControl;
6
+ LANGUAGE = ObjC;
7
+ OUTLETS = {label = NSTextField; panel = NSPanel; progressBar = NSProgressIndicator; };
8
+ SUPERCLASS = CDControl;
9
+ },
10
+ {CLASS = FirstResponder; LANGUAGE = ObjC; SUPERCLASS = NSObject; }
11
+ );
12
+ IBVersion = 1;
13
+ }
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>IBDocumentLocation</key>
6
+ <string>128 15 356 240 0 0 1600 1002 </string>
7
+ <key>IBFramework Version</key>
8
+ <string>443.0</string>
9
+ <key>IBOpenObjects</key>
10
+ <array>
11
+ <integer>5</integer>
12
+ </array>
13
+ <key>IBSystem Version</key>
14
+ <string>8F46</string>
15
+ </dict>
16
+ </plist>
@@ -0,0 +1,40 @@
1
+ {
2
+ IBClasses = (
3
+ {CLASS = CDControl; LANGUAGE = ObjC; SUPERCLASS = NSObject; },
4
+ {
5
+ CLASS = CDInputboxControl;
6
+ LANGUAGE = ObjC;
7
+ OUTLETS = {textField = NSTextField; };
8
+ SUPERCLASS = CDThreeButtonControl;
9
+ },
10
+ {
11
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; };
12
+ CLASS = CDTextboxControl;
13
+ LANGUAGE = ObjC;
14
+ OUTLETS = {
15
+ button1 = NSButton;
16
+ button2 = NSButton;
17
+ button3 = NSButton;
18
+ label = NSTextField;
19
+ panel = NSPanel;
20
+ textView = NSTextView;
21
+ };
22
+ SUPERCLASS = CDControl;
23
+ },
24
+ {
25
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; timeout = id; };
26
+ CLASS = CDThreeButtonControl;
27
+ LANGUAGE = ObjC;
28
+ OUTLETS = {
29
+ button1 = NSButton;
30
+ button2 = NSButton;
31
+ button3 = NSButton;
32
+ expandingLabel = NSTextField;
33
+ panel = NSPanel;
34
+ };
35
+ SUPERCLASS = CDControl;
36
+ },
37
+ {CLASS = FirstResponder; LANGUAGE = ObjC; SUPERCLASS = NSObject; }
38
+ );
39
+ IBVersion = 1;
40
+ }
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>IBDocumentLocation</key>
6
+ <string>82 210 356 240 0 0 1440 878 </string>
7
+ <key>IBFramework Version</key>
8
+ <string>443.0</string>
9
+ <key>IBOpenObjects</key>
10
+ <array>
11
+ <integer>5</integer>
12
+ </array>
13
+ <key>IBSystem Version</key>
14
+ <string>8H14</string>
15
+ </dict>
16
+ </plist>
@@ -0,0 +1,26 @@
1
+ {
2
+ IBClasses = (
3
+ {CLASS = CDControl; LANGUAGE = ObjC; SUPERCLASS = NSObject; },
4
+ {
5
+ CLASS = CDTextboxControl;
6
+ LANGUAGE = ObjC;
7
+ OUTLETS = {textView = NSTextView; };
8
+ SUPERCLASS = CDThreeButtonControl;
9
+ },
10
+ {
11
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; timeout = id; };
12
+ CLASS = CDThreeButtonControl;
13
+ LANGUAGE = ObjC;
14
+ OUTLETS = {
15
+ button1 = NSButton;
16
+ button2 = NSButton;
17
+ button3 = NSButton;
18
+ expandingLabel = NSTextField;
19
+ panel = NSPanel;
20
+ };
21
+ SUPERCLASS = CDControl;
22
+ },
23
+ {CLASS = FirstResponder; LANGUAGE = ObjC; SUPERCLASS = NSObject; }
24
+ );
25
+ IBVersion = 1;
26
+ }
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>IBDocumentLocation</key>
6
+ <string>117 70 356 240 0 0 1440 878 </string>
7
+ <key>IBFramework Version</key>
8
+ <string>443.0</string>
9
+ <key>IBOpenObjects</key>
10
+ <array>
11
+ <integer>5</integer>
12
+ </array>
13
+ <key>IBSystem Version</key>
14
+ <string>8H14</string>
15
+ </dict>
16
+ </plist>
data/bin/openmeta ADDED
Binary file
data/bin/paperless ADDED
@@ -0,0 +1,234 @@
1
+ #!/usr/bin/env ruby
2
+ require 'gli'
3
+ require 'yaml'
4
+ require 'paperless'
5
+
6
+ # Set the path to the CocoaDialog.app and openmeta
7
+ COCOADIALOG = File.dirname(__FILE__) +'/CocoaDialog.app/Contents/MacOS/CocoaDialog'
8
+ OPENMETA = File.dirname(__FILE__) +'/openmeta'
9
+
10
+ include GLI::App
11
+
12
+ program_desc 'A command-line interface for Paperless workflows that apply rules in order to auto-sort notes into notebooks/folders.'
13
+
14
+ version Paperless::VERSION
15
+
16
+ config_file '.paperless.rc'
17
+
18
+ desc 'Only print what would be done. Nothing actually gets modified.'
19
+ switch :simulate, :negatable => false, :default_value => false
20
+
21
+ desc 'The name of the app to OCR pdf documents (pdfpen|pdfpenpro|none)'
22
+ default_value 'none'
23
+ arg_name 'OCR App'
24
+ flag :ocr_engine
25
+
26
+ desc 'A list of file extenstions that will be treated as text when added to services like Evernote.'
27
+ default_value 'txt md mmd'
28
+ arg_name 'Text extensions'
29
+ flag :text_ext
30
+
31
+ desc 'Do not use.'
32
+ flag :rules
33
+
34
+ desc 'The path to a new rules file. If not defined, the global rules from the config file will be used.'
35
+ arg_name 'Rules file'
36
+ flag :rules_file
37
+
38
+ desc 'The application where the document will be added to.'
39
+ default_value 'evernote'
40
+ arg_name 'Services'
41
+ flag :service
42
+
43
+ desc 'The default format for the date when inserted using <date> variable.'
44
+ default_value '%Y-%m-%d'
45
+ arg_name 'Date Format'
46
+ flag :date_format
47
+
48
+ desc 'The locale format of the date: "us" or "euro"'
49
+ default_value 'us'
50
+ arg_name 'Date Locale'
51
+ flag :date_locale
52
+
53
+ desc 'If the date cannot be discovered within the doucment contents, then use "filedate" or "today" as the default.'
54
+ default_value 'filedate'
55
+ arg_name 'Date Default'
56
+ flag :date_default
57
+
58
+ desc 'Default destination to add notes into'
59
+ default_value 'Inbox'
60
+ arg_name 'Notebook'
61
+ flag :destination
62
+
63
+ pre do |global_options,command,options,args|
64
+ # Pre logic here
65
+ # Return true to proceed; false to abort and not call the
66
+ # chosen command
67
+ # Use skips_pre before a command to skip this block
68
+ # on that command only
69
+ global_options[:text_ext] = global_options[:text_ext].split
70
+
71
+ # Load new rules file is passed
72
+ if global_options[:rules_file]
73
+ if File.exists?(File.expand_path global_options[:rules_file])
74
+ yaml = YAML.load File.expand_path global_options[:rules_file]
75
+ global_options[:rules] = yaml[:rules]
76
+ end
77
+ end
78
+
79
+ # puts global_options.inspect
80
+ true
81
+ end
82
+
83
+ post do |global_options,command,options,args|
84
+ # Post logic here
85
+ # Use skips_post before a command to skip this
86
+ # block on that command only
87
+
88
+ # Run Sync if its requested
89
+ end
90
+
91
+ on_error do |exception|
92
+ # Error logic here
93
+ # return false to skip default error handling
94
+
95
+ puts "There was an error processing the command."
96
+ true
97
+ end
98
+
99
+ # Assign Command
100
+ desc 'Edit the configuration file in TextEdit'
101
+ command :editconfig do |c|
102
+
103
+ c.action do |global_options,options,args|
104
+ system("open -a TextEdit ~/.paperless.rc")
105
+ end
106
+ end
107
+
108
+
109
+ # Append Command
110
+ # desc 'Append data to the end of an existing note.'
111
+ # arg_name 'note_name'
112
+ # command :append do |c|
113
+
114
+ # c.action do |global_options,options,args|
115
+
116
+ # end
117
+ # end
118
+
119
+ # Assign Command
120
+ desc 'Assign a tag to an existing note. (Not yet implemented)'
121
+ arg_name 'tag_name', :multiple
122
+ command :assign do |c|
123
+
124
+ c.action do |global_options,options,args|
125
+
126
+ end
127
+ end
128
+
129
+ # Create Command
130
+ desc 'Create a new note from a file'
131
+ arg_name 'file_name'
132
+ command :create do |c|
133
+
134
+ c.desc 'Open a prompt to rename the file before its processed through the rules.'
135
+ c.switch :prompt, :negatable => false, :default_value => false
136
+
137
+ c.desc 'Process the file through the rules.'
138
+ c.switch :proc_rules, :default_value => true
139
+
140
+ c.desc 'Delete the original file after its been imported into the target service.'
141
+ c.switch :delete, :default_value => true
142
+
143
+ c.desc 'OCR the document if it is a PDF'
144
+ c.switch :ocr, :negatable => false, :default_value => false
145
+
146
+ c.action do |global_options,options,args|
147
+
148
+ args.each do |file|
149
+
150
+ if File.exists?(File.expand_path file)
151
+ file = File.expand_path file
152
+ else
153
+ raise "File does not exist (#{file})"
154
+ end
155
+
156
+ if options[:prompt] && !global_options[:simulate]
157
+ # Cannot rename the file in simulate mode since we dont want to change the file name
158
+ file_ext = File.extname(file)
159
+ filename = File.basename(file, file_ext)
160
+ user_input = `#{COCOADIALOG} standard-inputbox --title "Paperless Prompt" --informative-text "Rename your file before its processed with rules..." --text "#{filename}" --no-newline --string-output`.split("\n")
161
+
162
+ if user_input[0].match(/ok/i) && user_input[1] != File.basename(file, file_ext)
163
+ new_filename = File.join(File.dirname(file), user_input[1] + file_ext)
164
+ puts "Renaming file based on input to #{new_filename}"
165
+ File.rename(file, new_filename)
166
+ file = new_filename
167
+ end
168
+ end
169
+
170
+ engine = Paperless::Engine.new({
171
+ :file => file,
172
+ :ocr_engine => global_options[:ocr_engine],
173
+ :text_ext => global_options[:text_ext],
174
+ :default_destination => global_options[:destination],
175
+ :date_format => global_options[:date_format],
176
+ :date_locale => global_options[:date_locale],
177
+ :date_default => global_options[:date_default],
178
+ :default_service => global_options[:service],
179
+ :rules => global_options[:rules]
180
+ })
181
+
182
+ file_ext = File.extname(file).gsub(/\./,'')
183
+ if file_ext == Paperless::PDF_EXT && options[:ocr]
184
+ puts "OCRing file..."
185
+ engine.ocr
186
+ end
187
+
188
+ if options[:proc_rules]
189
+ puts "Processing rules..."
190
+ engine.process_rules
191
+ end
192
+
193
+ if global_options[:simulate]
194
+ puts "Simulating changes..."
195
+ engine.print
196
+ else
197
+ puts "Saving #{file} to #{engine.service}"
198
+ engine.create({:delete => options[:delete]})
199
+ end
200
+ end
201
+ end
202
+ end
203
+
204
+ # Info Command
205
+ # desc 'Get Evernote account Info'
206
+ # arg_name 'note_name'
207
+ # command :info do |c|
208
+
209
+ # c.action do |global_options,options,args|
210
+
211
+ # end
212
+ # end
213
+
214
+ # Search Command
215
+ desc 'Search for a note in Evernote and perform actions on them. (Not yet implemented)'
216
+ arg_name 'note_name'
217
+ command :search do |c|
218
+
219
+ c.action do |global_options,options,args|
220
+
221
+ end
222
+ end
223
+
224
+ # Unassign Command
225
+ desc 'Remove tags from a note. (Not yet implemented)'
226
+ arg_name 'note_name'
227
+ command :unassign do |c|
228
+
229
+ c.action do |global_options,options,args|
230
+
231
+ end
232
+ end
233
+
234
+ exit run(ARGV)
@@ -0,0 +1,96 @@
1
+ require 'date'
2
+
3
+ module DateSearch
4
+
5
+ SEP = '\. \/\-\,'
6
+ DAY = '(\d{1,2})'
7
+ MONTH = '([a-zA-Z]{3,15})'
8
+ YEAR = '(\d{4}|\d{2})'
9
+ END_DATE = '(\s|$)'
10
+
11
+ def valid_day(num)
12
+ day = num.to_i
13
+ return day <= 31 ? day : nil;
14
+ end
15
+
16
+ def valid_month(num)
17
+ month = num.to_i
18
+ return month <= 12 ? month : nil;
19
+ end
20
+
21
+ def valid_year(num)
22
+ year = num.to_i
23
+ now = DateTime.now
24
+
25
+ if year < 100
26
+ #transform 2 digit date into 4 digit date
27
+ now_two_digit_year = now.year - 2000
28
+ # In the 1900s? Need to add 1900. Else add 2000
29
+ year += year > now_two_digit_year ? 1900 : 2000
30
+ end
31
+
32
+ # No file can have a date prior to 1970
33
+ return year > 1970 && year <= now.year ? year : nil;
34
+ end
35
+
36
+ def repair_ocr_string(string)
37
+ string.downcase!
38
+ prev = ''
39
+ new_string = ''
40
+
41
+ # I noticed that letters tend to get duplicated during OCR. This tries to fix that.
42
+ # This only looks at letters since numbers could be duplicated
43
+ string.each_char {|letter|
44
+ new_string += letter unless letter == prev && letter.match(/[a-z][A-Z]/)
45
+ prev = letter
46
+ }
47
+ new_string
48
+ end
49
+
50
+ def date_search(text,date_locale)
51
+ date = nil
52
+ if match = text.match(/#{MONTH}[#{SEP}]{0,3}#{DAY}[#{SEP}]{1,3}#{YEAR}#{END_DATE}/i)
53
+ # December 29, 2011
54
+ if valid_day(match[2]) && valid_year(match[3])
55
+ puts "Basing the date off the discovered string (1): #{match[0]}"
56
+ begin
57
+ date = DateTime.parse(repair_ocr_string(match[0]))
58
+ rescue
59
+ puts "WARNING: Unable to create date object. #{$!}"
60
+ date = nil
61
+ end
62
+ end
63
+ elsif match = text.match(/#{DAY}[#{SEP}]{0,3}#{MONTH}[#{SEP}]{0,3}#{YEAR}#{END_DATE}/i)
64
+ # 29 December 2011
65
+ if valid_day(match[1]) && valid_year(match[3])
66
+ puts "Basing the date off the discovered string (2): #{match[0]}"
67
+ begin
68
+ date = DateTime.parse(repair_ocr_string(match[0]))
69
+ rescue
70
+ puts "WARNING: Unable to create date object. #{$!}"
71
+ date = nil
72
+ end
73
+ end
74
+ elsif match = text.match(/#{DAY}[#{SEP}]+#{DAY}[#{SEP}]+#{YEAR}#{END_DATE}/)
75
+ # US: 12-29-2011
76
+ # Euro: 29-12-2011
77
+ year = valid_year(match[3])
78
+ day = date_locale == 'us' ? valid_day(match[2]) : valid_day(match[1])
79
+ month = date_locale == 'us' ? valid_month(match[1]) : valid_month(match[2])
80
+
81
+ if month && day && year
82
+ puts "Basing the date off the discovered string (3): #{match[0]}"
83
+ begin
84
+ date = DateTime.new(year,month,day)
85
+ rescue
86
+ puts "WARNING: Unable to create date object. #{$!}"
87
+ date = nil
88
+ end
89
+ else
90
+ puts "WARNING: The discovered date string does not validate: #{match[0]}"
91
+ end
92
+ end
93
+ date
94
+ end
95
+
96
+ end