pseudo_date 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest ADDED
@@ -0,0 +1,11 @@
1
+ Manifest
2
+ README.mdown
3
+ Rakefile
4
+ lib/core_extensions/object.rb
5
+ lib/core_extensions/string.rb
6
+ lib/pseudo_date.rb
7
+ lib/pseudo_date/parser.rb
8
+ lib/pseudo_date/pseudo_date.rb
9
+ pseudo_date.gemspec
10
+ test/test_helper.rb
11
+ test/test_parser.rb
data/README.mdown ADDED
@@ -0,0 +1,69 @@
1
+ # Pseudo Date
2
+
3
+ ## What is a Pseudo Date?
4
+
5
+ It's a date but not really. A PseudoDate object has a day, month, and year but it does not require all of them like the built in ruby date classes. This allows you to parse obscure date strings that may or may not be complete.
6
+
7
+ ## What Is This For?
8
+
9
+ PseudoDate was created to parse odd dates in odd formats and attempt to extract as much information from them as possible. It's especially handy when you're trying to convert a date string that has come from an OCR'd source.
10
+
11
+ ## Assumptions
12
+
13
+ As with all parsing, one needs to make assumptions. The main assumption made here is that all dates will be in the past.
14
+
15
+ ## Other Notes
16
+
17
+ PseudoDate stores date attributes in strings instead of integers to avoid losing the preceding '0' on various attributes. This was a decision made when first creating the class because of the way things were being output in the project it was created for.
18
+
19
+ ## Compatability
20
+
21
+ PseudoDates are not really compatible with other built-in date/time objects. They do support some of the basic methods for abstracting numbers though. PseudoDates that are of exact precision can be turned into ruby date objects.
22
+
23
+ >> p = PseudoDate.new('19850625')
24
+ => #<PseudoDate:0x10190eff0 @month="06", @date_hash={:day=>"25", :month=>"06", :year=>"1985"}, year"1985", day"25"
25
+ >> p.precision
26
+ => "exact"
27
+ >> p.year
28
+ => "1985"
29
+ >> p.month
30
+ => "06"
31
+ >> p.day
32
+ => "25"
33
+ >> p.to_date
34
+ => #<Date: 4892483/2,0,2299161>
35
+
36
+ ## Examples
37
+
38
+ >> PseudoDate.new('19850625').to_hash
39
+ => {:day=>"25", :month=>"06", :year=>"1985"}
40
+
41
+ >> PseudoDate.new('1985-25-06').to_hash
42
+ => {:day=>"25", :month=>"06", :year=>"1985"}
43
+
44
+ >> PseudoDate.new('06-25-1985').to_hash
45
+ => {:day=>"25", :month=>"06", :year=>"1985"}
46
+
47
+ >> PseudoDate.new('25-06-1985').to_hash
48
+ => {:day=>"25", :month=>"06", :year=>"1985"}
49
+
50
+ >> PseudoDate.new('06/25/1985').to_hash
51
+ => {:day=>"25", :month=>"06", :year=>"1985"}
52
+
53
+ >> PseudoDate.new('06/1985').to_hash
54
+ => {:day=>"00", :month=>"06", :year=>"1985"}
55
+
56
+ >> PseudoDate.new('85').to_hash
57
+ => {:day=>"00", :month=>"00", :year=>"1985"}
58
+
59
+ >> PseudoDate.new('1985').to_hash
60
+ => {:day=>"00", :month=>"00", :year=>"1985"}
61
+
62
+ >> PseudoDate.new('Jun 25, 1985').to_hash
63
+ => {:day=>"25", :month=>"06", :year=>"1985"}
64
+
65
+ ## Patches, Bugfixes, Additions...
66
+
67
+ Feel free to fork and add stuff as you see fit. This was written quickly to solve some problems so it could certainly use some more structure and organization.
68
+
69
+ If you add something or fix something, make sure you run the very basic tests and add any new ones as required by your changes.
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'echoe'
4
+
5
+ Echoe.new('pseudo_date', '0.1.0') do |p|
6
+ p.description = 'Date parser and container for partial or incomplete dates.'
7
+ p.url = 'http://github.com/PatrickTulskie/pseudo_date'
8
+ p.author = 'Patrick Tulskie'
9
+ p.email = 'PatrickTulskie@gmail.com'
10
+ p.ignore_pattern = ['tmp/*', 'script/*', 'lib/main.rb']
11
+ end
12
+
13
+ Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
@@ -0,0 +1,7 @@
1
+ class Object
2
+
3
+ def is_numeric?(n)
4
+ n.nil? ? false : Integer(n) rescue false
5
+ end
6
+
7
+ end
@@ -0,0 +1,11 @@
1
+ class String
2
+
3
+ def to_date_hash
4
+ Parser.parse(self)
5
+ end
6
+
7
+ def to_pseudo_date
8
+ PseudoDate.new(self)
9
+ end
10
+
11
+ end
@@ -0,0 +1,90 @@
1
+ require 'date'
2
+ class Parser
3
+
4
+ def self.parse(input)
5
+ date_hash = {}
6
+ # Minor Pre Cleanup
7
+ input.strip!; input.gsub!('~','')
8
+
9
+ date = input.split(/\/|-/).length < 3 ? nil : Date.parse(input) rescue nil
10
+ if date
11
+ date_hash = { :year => date.year.to_s, :month => date.month.to_s, :day => date.day.to_s }
12
+ else
13
+ year, month, day = parse_string(input)
14
+ date_hash = { :year => year, :month => month, :day => day }
15
+ end
16
+
17
+ # Post parsing cleanup
18
+ date_hash.each do |key, value|
19
+ date_hash[key] = if value.nil?
20
+ key.to_s == 'year' ? '0000' : '00'
21
+ else
22
+ date_hash[key] = value.to_s.strip
23
+ end
24
+ end
25
+
26
+ # Cleanup the single digit values
27
+ unless date_hash.empty?
28
+ date_hash.each do |key,value|
29
+ date_hash[key] = "0#{value}" if value.to_s.length == 1
30
+ end
31
+ end
32
+
33
+ # Two character years
34
+ if date_hash[:year].length == 2
35
+ date_hash[:year] = date_hash[:year].to_i > Date.today.year.to_s.slice(2..4).to_i ? "19#{date_hash[:year]}" : "20#{date_hash[:year]}"
36
+ end
37
+
38
+ # Attempt to correct some known OCR issues
39
+ if date_hash[:year].to_s.match('00') && date_hash[:year] != '0000'
40
+ date_hash[:year] = "2#{date_hash[:year].slice(1..3)}"
41
+ end
42
+
43
+ return date_hash.empty? ? nil : date_hash
44
+ end
45
+
46
+ private
47
+
48
+ def self.parse_string(input)
49
+ day, month, year = "00", "00", "0000"
50
+ if input.match('/') # 02/25/2008
51
+ date_array = input.split('/')
52
+ if date_array.length == 3
53
+ begin
54
+ parsed_date = Date.parse(self)
55
+ month, day, year = parsed_date.month, parsed_date.day, parsed_date.year
56
+ rescue
57
+ month, day, year = date_array
58
+ end
59
+ elsif date_array.length == 2
60
+ month, year = date_array
61
+ end
62
+ elsif input.length == 8 && is_numeric?(input) # 20080225
63
+ year, month, day = input.slice(0..3), input.slice(4..5), input.slice(6..7)
64
+ elsif input.match('-') # 1985-09-25 or 02-25-2008
65
+ date_array = input.split('-')
66
+ year = date_array.select{ |part| part.length == 4 }.first
67
+ unless year.nil? || date_array.length != 3
68
+ if date_array.first == year
69
+ month = date_array.last
70
+ day = date_array[1]
71
+ else
72
+ month = date_array.first
73
+ day = date_array[1]
74
+ end
75
+ month, day = [day, month] if month.to_i > 12 && month.to_i > day.to_i
76
+ end
77
+ elsif input.length == 4 # 2004
78
+ year = input.to_s if (input.slice(0..1) == '19' || input.slice(0..1) == '20')
79
+ elsif input.length == 2 # 85
80
+ year = (input.to_i > Date.today.year.to_s.slice(2..4).to_i) ? "19#{input}" : "20#{input}"
81
+ elsif input.match(/\w/) # Jun 23, 2004
82
+ begin
83
+ d = Date.parse(input)
84
+ year, month, day = d.year.to_s, d.month.to_s, d.day.to_s
85
+ rescue; end
86
+ end
87
+ return [year, month, day]
88
+ end
89
+
90
+ end
@@ -0,0 +1,75 @@
1
+ class PseudoDate
2
+ attr_accessor :year, :day, :month, :date_hash
3
+
4
+ def initialize(input)
5
+ @date_hash = if input.is_a?(Hash)
6
+ input.to_hash.symbolize_keys
7
+ # But it's already a hash! Well, sometimes silly people pass a Mash object into this without realizing it.
8
+ else
9
+ input = '00000000' if input.to_s.strip == '19000000'
10
+ @date_hash = input.to_s.strip.to_date_hash
11
+ end
12
+ if @date_hash
13
+ @year = @date_hash[:year].to_s.match('1900') ? '0000' : @date_hash[:year].to_s.strip
14
+ @month = @date_hash[:month].to_s.strip
15
+ @day = @date_hash[:day].to_s.strip
16
+ else
17
+ @year = @month = @day = nil
18
+ end
19
+ if @year && @year.match('~') && @year.length == 3
20
+ @year = @year.to_i
21
+ @year = @year > Date.today.year.to_s.gsub('20','').to_i ? "19#{@year}" : "20#{@year}"
22
+ end
23
+ correct_digits
24
+ @year.to_s.gsub!('~','')
25
+ end
26
+
27
+ def precision
28
+ correct_digits
29
+ if @year.nil? || (@year.to_s == '0000' && @month.to_s == '00') || (@year.to_s == "8888")
30
+ "invalid"
31
+ elsif self.to_date
32
+ 'exact'
33
+ elsif @month != '00' && @day == '00'
34
+ 'year_month'
35
+ elsif @month == '00' && @day == '00'
36
+ 'year'
37
+ else
38
+ 'invalid'
39
+ end
40
+ end
41
+
42
+ def valid?
43
+ !(@date_hash.nil? || @date_hash.empty?)
44
+ end
45
+
46
+ def to_date
47
+ self.valid? ? Date.parse("#{@year}-#{@month}-#{@day}") : nil rescue nil
48
+ end
49
+
50
+ def to_s
51
+ return "" unless self.valid?
52
+
53
+ case self.precision
54
+ when 'invalid'; ""
55
+ when 'weak_year'; ""
56
+ when 'exact'; "#{month}/#{day}/#{year}"
57
+ when 'year_month'; "#{month}/#{year}"
58
+ when 'year'; year
59
+ else; ''
60
+ end
61
+ end
62
+
63
+ def to_hash
64
+ @date_hash
65
+ end
66
+
67
+ private
68
+
69
+ def correct_digits
70
+ @year = '0000' if @year.to_s.strip.length == 0
71
+ @month = '00' if @month.to_s.strip.length == 0
72
+ @day = '00' if @day.to_s.strip.length == 0
73
+ end
74
+
75
+ end
@@ -0,0 +1,4 @@
1
+ require 'pseudo_date/pseudo_date'
2
+ require 'pseudo_date/parser'
3
+ require 'core_extensions/string'
4
+ require 'core_extensions/object'
@@ -0,0 +1,31 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{pseudo_date}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["Patrick Tulskie"]
9
+ s.date = %q{2010-01-08}
10
+ s.description = %q{Date parser and container for partial or incomplete dates.}
11
+ s.email = %q{PatrickTulskie@gmail.com}
12
+ s.extra_rdoc_files = ["README.mdown", "lib/core_extensions/object.rb", "lib/core_extensions/string.rb", "lib/pseudo_date.rb", "lib/pseudo_date/parser.rb", "lib/pseudo_date/pseudo_date.rb"]
13
+ s.files = ["Manifest", "README.mdown", "Rakefile", "lib/core_extensions/object.rb", "lib/core_extensions/string.rb", "lib/pseudo_date.rb", "lib/pseudo_date/parser.rb", "lib/pseudo_date/pseudo_date.rb", "test/test_helper.rb", "test/test_parser.rb", "pseudo_date.gemspec"]
14
+ s.homepage = %q{http://github.com/PatrickTulskie/pseudo_date}
15
+ s.rdoc_options = ["--line-numbers", "--inline-source", "--title", "Pseudo_date", "--main", "README.mdown"]
16
+ s.require_paths = ["lib"]
17
+ s.rubyforge_project = %q{pseudo_date}
18
+ s.rubygems_version = %q{1.3.5}
19
+ s.summary = %q{Date parser and container for partial or incomplete dates.}
20
+ s.test_files = ["test/test_helper.rb", "test/test_parser.rb"]
21
+
22
+ if s.respond_to? :specification_version then
23
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
24
+ s.specification_version = 3
25
+
26
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
27
+ else
28
+ end
29
+ else
30
+ end
31
+ end
@@ -0,0 +1,4 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+ require File.dirname(__FILE__) + '/../lib/pseudo_date'
@@ -0,0 +1,137 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestParser < Test::Unit::TestCase
4
+
5
+ context "Date formats" do
6
+
7
+ setup do
8
+ @day = '25'
9
+ @month = '06'
10
+ @year = '1985'
11
+ @string_date = 'Jun 25, 1985'
12
+ end
13
+
14
+ # 19850625
15
+ context "yearmonthday" do
16
+ should 'be exact precision' do
17
+ assert_equal PseudoDate.new("#{@year}#{@month}#{@day}").precision, 'exact'
18
+ end
19
+
20
+ should 'match original input' do
21
+ pd = PseudoDate.new("#{@year}#{@month}#{@day}")
22
+ assert_equal @day, pd.day
23
+ assert_equal @month, pd.month
24
+ assert_equal @year, pd.year
25
+ end
26
+ end
27
+
28
+ # 1985-25-06
29
+ context "year-day-month" do
30
+ should 'be exact precision' do
31
+ assert_equal PseudoDate.new("#{@year}-#{@day}-#{@month}").precision, 'exact'
32
+ end
33
+
34
+ should 'match original input' do
35
+ pd = PseudoDate.new("#{@year}-#{@day}-#{@month}")
36
+ assert_equal @day, pd.day
37
+ assert_equal @month, pd.month
38
+ assert_equal @year, pd.year
39
+ end
40
+ end
41
+
42
+ # 06-25-1985
43
+ context "month-day-year" do
44
+ should 'be exact precision' do
45
+ assert_equal PseudoDate.new("#{@month}-#{@day}-#{@year}").precision, 'exact'
46
+ end
47
+
48
+ should 'match original input' do
49
+ pd = PseudoDate.new("#{@month}-#{@day}-#{@year}")
50
+ assert_equal @day, pd.day
51
+ assert_equal @month, pd.month
52
+ assert_equal @year, pd.year
53
+ end
54
+ end
55
+
56
+ # 25-06-1985
57
+ context "day-month-year" do
58
+ should 'be exact precision' do
59
+ assert_equal PseudoDate.new("#{@day}-#{@month}-#{@year}").precision, 'exact'
60
+ end
61
+
62
+ should 'match original input' do
63
+ pd = PseudoDate.new("#{@day}-#{@month}-#{@year}")
64
+ assert_equal @day, pd.day
65
+ assert_equal @month, pd.month
66
+ assert_equal @year, pd.year
67
+ end
68
+ end
69
+
70
+ # 06/25/1985
71
+ context "month/day/year" do
72
+ should 'be exact precision' do
73
+ assert_equal PseudoDate.new("#{@month}/#{@day}/#{@year}").precision, 'exact'
74
+ end
75
+
76
+ should 'match original input' do
77
+ pd = PseudoDate.new("#{@month}/#{@day}/#{@year}")
78
+ assert_equal @day, pd.day
79
+ assert_equal @month, pd.month
80
+ assert_equal @year, pd.year
81
+ end
82
+ end
83
+
84
+ # 06/1985
85
+ context "month/year" do
86
+ should 'be partial precision' do
87
+ assert_equal PseudoDate.new("#{@month}/#{@year}").precision, 'year_month'
88
+ end
89
+
90
+ should 'match original input' do
91
+ pd = PseudoDate.new("#{@month}/#{@year}")
92
+ assert_equal @month, pd.month
93
+ assert_equal @year, pd.year
94
+ end
95
+ end
96
+
97
+ # 85
98
+ context "two digit year" do
99
+ should 'be year precision' do
100
+ assert_equal PseudoDate.new("85").precision, 'year'
101
+ end
102
+
103
+ should 'match original input' do
104
+ pd = PseudoDate.new("85")
105
+ assert_equal @year, pd.year
106
+ end
107
+ end
108
+
109
+ # 1985
110
+ context "four digit year" do
111
+ should 'be year precision' do
112
+ assert_equal PseudoDate.new(@year).precision, 'year'
113
+ end
114
+
115
+ should 'match original input' do
116
+ pd = PseudoDate.new(@year)
117
+ assert_equal @year, pd.year
118
+ end
119
+ end
120
+
121
+ # Jun 25, 1985
122
+ context "string date" do
123
+ should 'be exact precision' do
124
+ assert_equal PseudoDate.new(@string_date).precision, 'exact'
125
+ end
126
+
127
+ should 'match original input' do
128
+ pd = PseudoDate.new(@string_date)
129
+ assert_equal @day, pd.day
130
+ assert_equal @month, pd.month
131
+ assert_equal @year, pd.year
132
+ end
133
+ end
134
+
135
+ end
136
+
137
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pseudo_date
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Patrick Tulskie
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-01-08 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Date parser and container for partial or incomplete dates.
17
+ email: PatrickTulskie@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.mdown
24
+ - lib/core_extensions/object.rb
25
+ - lib/core_extensions/string.rb
26
+ - lib/pseudo_date.rb
27
+ - lib/pseudo_date/parser.rb
28
+ - lib/pseudo_date/pseudo_date.rb
29
+ files:
30
+ - Manifest
31
+ - README.mdown
32
+ - Rakefile
33
+ - lib/core_extensions/object.rb
34
+ - lib/core_extensions/string.rb
35
+ - lib/pseudo_date.rb
36
+ - lib/pseudo_date/parser.rb
37
+ - lib/pseudo_date/pseudo_date.rb
38
+ - test/test_helper.rb
39
+ - test/test_parser.rb
40
+ - pseudo_date.gemspec
41
+ has_rdoc: true
42
+ homepage: http://github.com/PatrickTulskie/pseudo_date
43
+ licenses: []
44
+
45
+ post_install_message:
46
+ rdoc_options:
47
+ - --line-numbers
48
+ - --inline-source
49
+ - --title
50
+ - Pseudo_date
51
+ - --main
52
+ - README.mdown
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - ">="
64
+ - !ruby/object:Gem::Version
65
+ version: "1.2"
66
+ version:
67
+ requirements: []
68
+
69
+ rubyforge_project: pseudo_date
70
+ rubygems_version: 1.3.5
71
+ signing_key:
72
+ specification_version: 3
73
+ summary: Date parser and container for partial or incomplete dates.
74
+ test_files:
75
+ - test/test_helper.rb
76
+ - test/test_parser.rb