paperless 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (65) hide show
  1. data/README.rdoc +30 -0
  2. data/bin/CocoaDialog.app/Contents/Info.plist +28 -0
  3. data/bin/CocoaDialog.app/Contents/MacOS/CocoaDialog +0 -0
  4. data/bin/CocoaDialog.app/Contents/PkgInfo +1 -0
  5. data/bin/CocoaDialog.app/Contents/Resources/COPYING +281 -0
  6. data/bin/CocoaDialog.app/Contents/Resources/Changelog +73 -0
  7. data/bin/CocoaDialog.app/Contents/Resources/Info.plist +28 -0
  8. data/bin/CocoaDialog.app/Contents/Resources/InfoPlist.strings +0 -0
  9. data/bin/CocoaDialog.app/Contents/Resources/Inputbox.nib/classes.nib +51 -0
  10. data/bin/CocoaDialog.app/Contents/Resources/Inputbox.nib/info.nib +16 -0
  11. data/bin/CocoaDialog.app/Contents/Resources/Inputbox.nib/keyedobjects.nib +0 -0
  12. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/classes.nib +7 -0
  13. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/info.nib +21 -0
  14. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/info.nib.orig +21 -0
  15. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/objects.nib +0 -0
  16. data/bin/CocoaDialog.app/Contents/Resources/MainMenu.nib/objects.nib.orig +0 -0
  17. data/bin/CocoaDialog.app/Contents/Resources/Msgbox.nib/classes.nib +27 -0
  18. data/bin/CocoaDialog.app/Contents/Resources/Msgbox.nib/info.nib +16 -0
  19. data/bin/CocoaDialog.app/Contents/Resources/Msgbox.nib/keyedobjects.nib +0 -0
  20. data/bin/CocoaDialog.app/Contents/Resources/PopUpButton.nib/classes.nib +55 -0
  21. data/bin/CocoaDialog.app/Contents/Resources/PopUpButton.nib/info.nib +16 -0
  22. data/bin/CocoaDialog.app/Contents/Resources/PopUpButton.nib/keyedobjects.nib +0 -0
  23. data/bin/CocoaDialog.app/Contents/Resources/Progressbar.nib/classes.nib +13 -0
  24. data/bin/CocoaDialog.app/Contents/Resources/Progressbar.nib/info.nib +16 -0
  25. data/bin/CocoaDialog.app/Contents/Resources/Progressbar.nib/keyedobjects.nib +0 -0
  26. data/bin/CocoaDialog.app/Contents/Resources/SecureInputbox.nib/classes.nib +40 -0
  27. data/bin/CocoaDialog.app/Contents/Resources/SecureInputbox.nib/info.nib +16 -0
  28. data/bin/CocoaDialog.app/Contents/Resources/SecureInputbox.nib/keyedobjects.nib +0 -0
  29. data/bin/CocoaDialog.app/Contents/Resources/Textbox.nib/classes.nib +26 -0
  30. data/bin/CocoaDialog.app/Contents/Resources/Textbox.nib/info.nib +16 -0
  31. data/bin/CocoaDialog.app/Contents/Resources/Textbox.nib/keyedobjects.nib +0 -0
  32. data/bin/CocoaDialog.app/Contents/Resources/atom.icns +0 -0
  33. data/bin/CocoaDialog.app/Contents/Resources/cocoadialog.icns +0 -0
  34. data/bin/CocoaDialog.app/Contents/Resources/computer.icns +0 -0
  35. data/bin/CocoaDialog.app/Contents/Resources/document.icns +0 -0
  36. data/bin/CocoaDialog.app/Contents/Resources/find.icns +0 -0
  37. data/bin/CocoaDialog.app/Contents/Resources/finder.icns +0 -0
  38. data/bin/CocoaDialog.app/Contents/Resources/firewire.icns +0 -0
  39. data/bin/CocoaDialog.app/Contents/Resources/folder.icns +0 -0
  40. data/bin/CocoaDialog.app/Contents/Resources/gear.icns +0 -0
  41. data/bin/CocoaDialog.app/Contents/Resources/globe.icns +0 -0
  42. data/bin/CocoaDialog.app/Contents/Resources/hazard.icns +0 -0
  43. data/bin/CocoaDialog.app/Contents/Resources/heart.icns +0 -0
  44. data/bin/CocoaDialog.app/Contents/Resources/hourglass.icns +0 -0
  45. data/bin/CocoaDialog.app/Contents/Resources/info.icns +0 -0
  46. data/bin/CocoaDialog.app/Contents/Resources/ipod.icns +0 -0
  47. data/bin/CocoaDialog.app/Contents/Resources/person.icns +0 -0
  48. data/bin/CocoaDialog.app/Contents/Resources/sound.icns +0 -0
  49. data/bin/CocoaDialog.app/Contents/Resources/x.icns +0 -0
  50. data/bin/openmeta +0 -0
  51. data/bin/paperless +234 -0
  52. data/lib/paperless/date_search.rb +96 -0
  53. data/lib/paperless/engine.rb +220 -0
  54. data/lib/paperless/ocr_engines/acrobat.rb +51 -0
  55. data/lib/paperless/ocr_engines/devonthinkpro.rb +26 -0
  56. data/lib/paperless/ocr_engines/pdfpen.rb +32 -0
  57. data/lib/paperless/ocr_engines/pdfpenpro.rb +32 -0
  58. data/lib/paperless/rule.rb +82 -0
  59. data/lib/paperless/services/devonthinkpro.rb +54 -0
  60. data/lib/paperless/services/evernote.rb +55 -0
  61. data/lib/paperless/services/finder.rb +47 -0
  62. data/lib/paperless/version.rb +3 -0
  63. data/lib/paperless.rb +14 -0
  64. data/paperless.rdoc +5 -0
  65. metadata +233 -0
@@ -0,0 +1,55 @@
1
+ {
2
+ IBClasses = (
3
+ {CLASS = CDControl; LANGUAGE = ObjC; SUPERCLASS = NSObject; },
4
+ {
5
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; };
6
+ CLASS = CDInputboxControl;
7
+ LANGUAGE = ObjC;
8
+ OUTLETS = {
9
+ button1 = NSButton;
10
+ button2 = NSButton;
11
+ button3 = NSButton;
12
+ label = NSTextField;
13
+ panel = NSPanel;
14
+ textField = NSTextField;
15
+ };
16
+ SUPERCLASS = CDControl;
17
+ },
18
+ {
19
+ ACTIONS = {selectionChanged = id; };
20
+ CLASS = CDPopUpButtonControl;
21
+ LANGUAGE = ObjC;
22
+ OUTLETS = {popup = NSPopUpButton; };
23
+ SUPERCLASS = CDThreeButtonControl;
24
+ },
25
+ {
26
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; };
27
+ CLASS = CDTextboxControl;
28
+ LANGUAGE = ObjC;
29
+ OUTLETS = {
30
+ button1 = NSButton;
31
+ button2 = NSButton;
32
+ button3 = NSButton;
33
+ label = NSTextField;
34
+ panel = NSPanel;
35
+ textView = NSTextView;
36
+ };
37
+ SUPERCLASS = CDControl;
38
+ },
39
+ {
40
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; timeout = id; };
41
+ CLASS = CDThreeButtonControl;
42
+ LANGUAGE = ObjC;
43
+ OUTLETS = {
44
+ button1 = NSButton;
45
+ button2 = NSButton;
46
+ button3 = NSButton;
47
+ expandingLabel = NSTextField;
48
+ panel = NSPanel;
49
+ };
50
+ SUPERCLASS = CDControl;
51
+ },
52
+ {CLASS = FirstResponder; LANGUAGE = ObjC; SUPERCLASS = NSObject; }
53
+ );
54
+ IBVersion = 1;
55
+ }
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>IBDocumentLocation</key>
6
+ <string>120 158 356 240 0 0 1440 878 </string>
7
+ <key>IBFramework Version</key>
8
+ <string>443.0</string>
9
+ <key>IBOpenObjects</key>
10
+ <array>
11
+ <integer>5</integer>
12
+ </array>
13
+ <key>IBSystem Version</key>
14
+ <string>8H14</string>
15
+ </dict>
16
+ </plist>
@@ -0,0 +1,13 @@
1
+ {
2
+ IBClasses = (
3
+ {CLASS = CDControl; LANGUAGE = ObjC; SUPERCLASS = NSObject; },
4
+ {
5
+ CLASS = CDProgressbarControl;
6
+ LANGUAGE = ObjC;
7
+ OUTLETS = {label = NSTextField; panel = NSPanel; progressBar = NSProgressIndicator; };
8
+ SUPERCLASS = CDControl;
9
+ },
10
+ {CLASS = FirstResponder; LANGUAGE = ObjC; SUPERCLASS = NSObject; }
11
+ );
12
+ IBVersion = 1;
13
+ }
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>IBDocumentLocation</key>
6
+ <string>128 15 356 240 0 0 1600 1002 </string>
7
+ <key>IBFramework Version</key>
8
+ <string>443.0</string>
9
+ <key>IBOpenObjects</key>
10
+ <array>
11
+ <integer>5</integer>
12
+ </array>
13
+ <key>IBSystem Version</key>
14
+ <string>8F46</string>
15
+ </dict>
16
+ </plist>
@@ -0,0 +1,40 @@
1
+ {
2
+ IBClasses = (
3
+ {CLASS = CDControl; LANGUAGE = ObjC; SUPERCLASS = NSObject; },
4
+ {
5
+ CLASS = CDInputboxControl;
6
+ LANGUAGE = ObjC;
7
+ OUTLETS = {textField = NSTextField; };
8
+ SUPERCLASS = CDThreeButtonControl;
9
+ },
10
+ {
11
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; };
12
+ CLASS = CDTextboxControl;
13
+ LANGUAGE = ObjC;
14
+ OUTLETS = {
15
+ button1 = NSButton;
16
+ button2 = NSButton;
17
+ button3 = NSButton;
18
+ label = NSTextField;
19
+ panel = NSPanel;
20
+ textView = NSTextView;
21
+ };
22
+ SUPERCLASS = CDControl;
23
+ },
24
+ {
25
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; timeout = id; };
26
+ CLASS = CDThreeButtonControl;
27
+ LANGUAGE = ObjC;
28
+ OUTLETS = {
29
+ button1 = NSButton;
30
+ button2 = NSButton;
31
+ button3 = NSButton;
32
+ expandingLabel = NSTextField;
33
+ panel = NSPanel;
34
+ };
35
+ SUPERCLASS = CDControl;
36
+ },
37
+ {CLASS = FirstResponder; LANGUAGE = ObjC; SUPERCLASS = NSObject; }
38
+ );
39
+ IBVersion = 1;
40
+ }
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>IBDocumentLocation</key>
6
+ <string>82 210 356 240 0 0 1440 878 </string>
7
+ <key>IBFramework Version</key>
8
+ <string>443.0</string>
9
+ <key>IBOpenObjects</key>
10
+ <array>
11
+ <integer>5</integer>
12
+ </array>
13
+ <key>IBSystem Version</key>
14
+ <string>8H14</string>
15
+ </dict>
16
+ </plist>
@@ -0,0 +1,26 @@
1
+ {
2
+ IBClasses = (
3
+ {CLASS = CDControl; LANGUAGE = ObjC; SUPERCLASS = NSObject; },
4
+ {
5
+ CLASS = CDTextboxControl;
6
+ LANGUAGE = ObjC;
7
+ OUTLETS = {textView = NSTextView; };
8
+ SUPERCLASS = CDThreeButtonControl;
9
+ },
10
+ {
11
+ ACTIONS = {button1Pressed = id; button2Pressed = id; button3Pressed = id; timeout = id; };
12
+ CLASS = CDThreeButtonControl;
13
+ LANGUAGE = ObjC;
14
+ OUTLETS = {
15
+ button1 = NSButton;
16
+ button2 = NSButton;
17
+ button3 = NSButton;
18
+ expandingLabel = NSTextField;
19
+ panel = NSPanel;
20
+ };
21
+ SUPERCLASS = CDControl;
22
+ },
23
+ {CLASS = FirstResponder; LANGUAGE = ObjC; SUPERCLASS = NSObject; }
24
+ );
25
+ IBVersion = 1;
26
+ }
@@ -0,0 +1,16 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3
+ <plist version="1.0">
4
+ <dict>
5
+ <key>IBDocumentLocation</key>
6
+ <string>117 70 356 240 0 0 1440 878 </string>
7
+ <key>IBFramework Version</key>
8
+ <string>443.0</string>
9
+ <key>IBOpenObjects</key>
10
+ <array>
11
+ <integer>5</integer>
12
+ </array>
13
+ <key>IBSystem Version</key>
14
+ <string>8H14</string>
15
+ </dict>
16
+ </plist>
data/bin/openmeta ADDED
Binary file
data/bin/paperless ADDED
@@ -0,0 +1,234 @@
1
+ #!/usr/bin/env ruby
2
+ require 'gli'
3
+ require 'yaml'
4
+ require 'paperless'
5
+
6
+ # Set the path to the CocoaDialog.app and openmeta
7
+ COCOADIALOG = File.dirname(__FILE__) +'/CocoaDialog.app/Contents/MacOS/CocoaDialog'
8
+ OPENMETA = File.dirname(__FILE__) +'/openmeta'
9
+
10
+ include GLI::App
11
+
12
+ program_desc 'A command-line interface for Paperless workflows that apply rules in order to auto-sort notes into notebooks/folders.'
13
+
14
+ version Paperless::VERSION
15
+
16
+ config_file '.paperless.rc'
17
+
18
+ desc 'Only print what would be done. Nothing actually gets modified.'
19
+ switch :simulate, :negatable => false, :default_value => false
20
+
21
+ desc 'The name of the app to OCR pdf documents (pdfpen|pdfpenpro|none)'
22
+ default_value 'none'
23
+ arg_name 'OCR App'
24
+ flag :ocr_engine
25
+
26
+ desc 'A list of file extenstions that will be treated as text when added to services like Evernote.'
27
+ default_value 'txt md mmd'
28
+ arg_name 'Text extensions'
29
+ flag :text_ext
30
+
31
+ desc 'Do not use.'
32
+ flag :rules
33
+
34
+ desc 'The path to a new rules file. If not defined, the global rules from the config file will be used.'
35
+ arg_name 'Rules file'
36
+ flag :rules_file
37
+
38
+ desc 'The application where the document will be added to.'
39
+ default_value 'evernote'
40
+ arg_name 'Services'
41
+ flag :service
42
+
43
+ desc 'The default format for the date when inserted using <date> variable.'
44
+ default_value '%Y-%m-%d'
45
+ arg_name 'Date Format'
46
+ flag :date_format
47
+
48
+ desc 'The locale format of the date: "us" or "euro"'
49
+ default_value 'us'
50
+ arg_name 'Date Locale'
51
+ flag :date_locale
52
+
53
+ desc 'If the date cannot be discovered within the doucment contents, then use "filedate" or "today" as the default.'
54
+ default_value 'filedate'
55
+ arg_name 'Date Default'
56
+ flag :date_default
57
+
58
+ desc 'Default destination to add notes into'
59
+ default_value 'Inbox'
60
+ arg_name 'Notebook'
61
+ flag :destination
62
+
63
+ pre do |global_options,command,options,args|
64
+ # Pre logic here
65
+ # Return true to proceed; false to abort and not call the
66
+ # chosen command
67
+ # Use skips_pre before a command to skip this block
68
+ # on that command only
69
+ global_options[:text_ext] = global_options[:text_ext].split
70
+
71
+ # Load new rules file is passed
72
+ if global_options[:rules_file]
73
+ if File.exists?(File.expand_path global_options[:rules_file])
74
+ yaml = YAML.load File.expand_path global_options[:rules_file]
75
+ global_options[:rules] = yaml[:rules]
76
+ end
77
+ end
78
+
79
+ # puts global_options.inspect
80
+ true
81
+ end
82
+
83
+ post do |global_options,command,options,args|
84
+ # Post logic here
85
+ # Use skips_post before a command to skip this
86
+ # block on that command only
87
+
88
+ # Run Sync if its requested
89
+ end
90
+
91
+ on_error do |exception|
92
+ # Error logic here
93
+ # return false to skip default error handling
94
+
95
+ puts "There was an error processing the command."
96
+ true
97
+ end
98
+
99
+ # Assign Command
100
+ desc 'Edit the configuration file in TextEdit'
101
+ command :editconfig do |c|
102
+
103
+ c.action do |global_options,options,args|
104
+ system("open -a TextEdit ~/.paperless.rc")
105
+ end
106
+ end
107
+
108
+
109
+ # Append Command
110
+ # desc 'Append data to the end of an existing note.'
111
+ # arg_name 'note_name'
112
+ # command :append do |c|
113
+
114
+ # c.action do |global_options,options,args|
115
+
116
+ # end
117
+ # end
118
+
119
+ # Assign Command
120
+ desc 'Assign a tag to an existing note. (Not yet implemented)'
121
+ arg_name 'tag_name', :multiple
122
+ command :assign do |c|
123
+
124
+ c.action do |global_options,options,args|
125
+
126
+ end
127
+ end
128
+
129
+ # Create Command
130
+ desc 'Create a new note from a file'
131
+ arg_name 'file_name'
132
+ command :create do |c|
133
+
134
+ c.desc 'Open a prompt to rename the file before its processed through the rules.'
135
+ c.switch :prompt, :negatable => false, :default_value => false
136
+
137
+ c.desc 'Process the file through the rules.'
138
+ c.switch :proc_rules, :default_value => true
139
+
140
+ c.desc 'Delete the original file after its been imported into the target service.'
141
+ c.switch :delete, :default_value => true
142
+
143
+ c.desc 'OCR the document if it is a PDF'
144
+ c.switch :ocr, :negatable => false, :default_value => false
145
+
146
+ c.action do |global_options,options,args|
147
+
148
+ args.each do |file|
149
+
150
+ if File.exists?(File.expand_path file)
151
+ file = File.expand_path file
152
+ else
153
+ raise "File does not exist (#{file})"
154
+ end
155
+
156
+ if options[:prompt] && !global_options[:simulate]
157
+ # Cannot rename the file in simulate mode since we dont want to change the file name
158
+ file_ext = File.extname(file)
159
+ filename = File.basename(file, file_ext)
160
+ user_input = `#{COCOADIALOG} standard-inputbox --title "Paperless Prompt" --informative-text "Rename your file before its processed with rules..." --text "#{filename}" --no-newline --string-output`.split("\n")
161
+
162
+ if user_input[0].match(/ok/i) && user_input[1] != File.basename(file, file_ext)
163
+ new_filename = File.join(File.dirname(file), user_input[1] + file_ext)
164
+ puts "Renaming file based on input to #{new_filename}"
165
+ File.rename(file, new_filename)
166
+ file = new_filename
167
+ end
168
+ end
169
+
170
+ engine = Paperless::Engine.new({
171
+ :file => file,
172
+ :ocr_engine => global_options[:ocr_engine],
173
+ :text_ext => global_options[:text_ext],
174
+ :default_destination => global_options[:destination],
175
+ :date_format => global_options[:date_format],
176
+ :date_locale => global_options[:date_locale],
177
+ :date_default => global_options[:date_default],
178
+ :default_service => global_options[:service],
179
+ :rules => global_options[:rules]
180
+ })
181
+
182
+ file_ext = File.extname(file).gsub(/\./,'')
183
+ if file_ext == Paperless::PDF_EXT && options[:ocr]
184
+ puts "OCRing file..."
185
+ engine.ocr
186
+ end
187
+
188
+ if options[:proc_rules]
189
+ puts "Processing rules..."
190
+ engine.process_rules
191
+ end
192
+
193
+ if global_options[:simulate]
194
+ puts "Simulating changes..."
195
+ engine.print
196
+ else
197
+ puts "Saving #{file} to #{engine.service}"
198
+ engine.create({:delete => options[:delete]})
199
+ end
200
+ end
201
+ end
202
+ end
203
+
204
+ # Info Command
205
+ # desc 'Get Evernote account Info'
206
+ # arg_name 'note_name'
207
+ # command :info do |c|
208
+
209
+ # c.action do |global_options,options,args|
210
+
211
+ # end
212
+ # end
213
+
214
+ # Search Command
215
+ desc 'Search for a note in Evernote and perform actions on them. (Not yet implemented)'
216
+ arg_name 'note_name'
217
+ command :search do |c|
218
+
219
+ c.action do |global_options,options,args|
220
+
221
+ end
222
+ end
223
+
224
+ # Unassign Command
225
+ desc 'Remove tags from a note. (Not yet implemented)'
226
+ arg_name 'note_name'
227
+ command :unassign do |c|
228
+
229
+ c.action do |global_options,options,args|
230
+
231
+ end
232
+ end
233
+
234
+ exit run(ARGV)
@@ -0,0 +1,96 @@
1
+ require 'date'
2
+
3
+ module DateSearch
4
+
5
+ SEP = '\. \/\-\,'
6
+ DAY = '(\d{1,2})'
7
+ MONTH = '([a-zA-Z]{3,15})'
8
+ YEAR = '(\d{4}|\d{2})'
9
+ END_DATE = '(\s|$)'
10
+
11
+ def valid_day(num)
12
+ day = num.to_i
13
+ return day <= 31 ? day : nil;
14
+ end
15
+
16
+ def valid_month(num)
17
+ month = num.to_i
18
+ return month <= 12 ? month : nil;
19
+ end
20
+
21
+ def valid_year(num)
22
+ year = num.to_i
23
+ now = DateTime.now
24
+
25
+ if year < 100
26
+ #transform 2 digit date into 4 digit date
27
+ now_two_digit_year = now.year - 2000
28
+ # In the 1900s? Need to add 1900. Else add 2000
29
+ year += year > now_two_digit_year ? 1900 : 2000
30
+ end
31
+
32
+ # No file can have a date prior to 1970
33
+ return year > 1970 && year <= now.year ? year : nil;
34
+ end
35
+
36
+ def repair_ocr_string(string)
37
+ string.downcase!
38
+ prev = ''
39
+ new_string = ''
40
+
41
+ # I noticed that letters tend to get duplicated during OCR. This tries to fix that.
42
+ # This only looks at letters since numbers could be duplicated
43
+ string.each_char {|letter|
44
+ new_string += letter unless letter == prev && letter.match(/[a-z][A-Z]/)
45
+ prev = letter
46
+ }
47
+ new_string
48
+ end
49
+
50
+ def date_search(text,date_locale)
51
+ date = nil
52
+ if match = text.match(/#{MONTH}[#{SEP}]{0,3}#{DAY}[#{SEP}]{1,3}#{YEAR}#{END_DATE}/i)
53
+ # December 29, 2011
54
+ if valid_day(match[2]) && valid_year(match[3])
55
+ puts "Basing the date off the discovered string (1): #{match[0]}"
56
+ begin
57
+ date = DateTime.parse(repair_ocr_string(match[0]))
58
+ rescue
59
+ puts "WARNING: Unable to create date object. #{$!}"
60
+ date = nil
61
+ end
62
+ end
63
+ elsif match = text.match(/#{DAY}[#{SEP}]{0,3}#{MONTH}[#{SEP}]{0,3}#{YEAR}#{END_DATE}/i)
64
+ # 29 December 2011
65
+ if valid_day(match[1]) && valid_year(match[3])
66
+ puts "Basing the date off the discovered string (2): #{match[0]}"
67
+ begin
68
+ date = DateTime.parse(repair_ocr_string(match[0]))
69
+ rescue
70
+ puts "WARNING: Unable to create date object. #{$!}"
71
+ date = nil
72
+ end
73
+ end
74
+ elsif match = text.match(/#{DAY}[#{SEP}]+#{DAY}[#{SEP}]+#{YEAR}#{END_DATE}/)
75
+ # US: 12-29-2011
76
+ # Euro: 29-12-2011
77
+ year = valid_year(match[3])
78
+ day = date_locale == 'us' ? valid_day(match[2]) : valid_day(match[1])
79
+ month = date_locale == 'us' ? valid_month(match[1]) : valid_month(match[2])
80
+
81
+ if month && day && year
82
+ puts "Basing the date off the discovered string (3): #{match[0]}"
83
+ begin
84
+ date = DateTime.new(year,month,day)
85
+ rescue
86
+ puts "WARNING: Unable to create date object. #{$!}"
87
+ date = nil
88
+ end
89
+ else
90
+ puts "WARNING: The discovered date string does not validate: #{match[0]}"
91
+ end
92
+ end
93
+ date
94
+ end
95
+
96
+ end