pseudo_date 0.1.6 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile.lock +20 -10
- data/README.mdown +5 -3
- data/lib/pseudo_date/parser.rb +21 -1
- data/lib/pseudo_date/pseudo_date.rb +6 -6
- data/lib/pseudo_date/version.rb +1 -1
- data/pseudo_date.gemspec +3 -1
- data/spec/parser_spec.rb +14 -0
- data/spec/pseudo_date_spec.rb +4 -4
- data/spec/spec_helper.rb +7 -0
- metadata +26 -15
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 62d190d71ddd979da81fbea52eb5e2df961dcb4c
|
4
|
+
data.tar.gz: ec4fe5b5c8739ad59631bce20bb5bfd320a57b2e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7fb8dd8c6bc227e74d89190d8870071235823db1b6810acc8596eed61b5ae4745e7960ad57b6f76fec45f76375858d0f8385c2361d8a78de9cde91c8a7cdf5a3
|
7
|
+
data.tar.gz: 2298c31062c1295cca27a575e977a82b7e1b881eddf2f0dfb2858dffbbcd6d09eea2cb358b3f48a3348ec140336d70236702a3f9fda96ed88fbbaa30c6c1119e
|
data/Gemfile.lock
CHANGED
@@ -1,24 +1,34 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
pseudo_date (0.1.
|
4
|
+
pseudo_date (0.1.6)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
rspec
|
16
|
-
|
17
|
-
|
9
|
+
byebug (3.1.2)
|
10
|
+
columnize (~> 0.8)
|
11
|
+
debugger-linecache (~> 1.2)
|
12
|
+
columnize (0.8.9)
|
13
|
+
debugger-linecache (1.2.0)
|
14
|
+
diff-lcs (1.2.5)
|
15
|
+
rspec (3.0.0)
|
16
|
+
rspec-core (~> 3.0.0)
|
17
|
+
rspec-expectations (~> 3.0.0)
|
18
|
+
rspec-mocks (~> 3.0.0)
|
19
|
+
rspec-core (3.0.1)
|
20
|
+
rspec-support (~> 3.0.0)
|
21
|
+
rspec-expectations (3.0.1)
|
22
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
+
rspec-support (~> 3.0.0)
|
24
|
+
rspec-mocks (3.0.1)
|
25
|
+
rspec-support (~> 3.0.0)
|
26
|
+
rspec-support (3.0.0)
|
18
27
|
|
19
28
|
PLATFORMS
|
20
29
|
ruby
|
21
30
|
|
22
31
|
DEPENDENCIES
|
32
|
+
byebug
|
23
33
|
pseudo_date!
|
24
34
|
rspec
|
data/README.mdown
CHANGED
@@ -6,15 +6,17 @@ It's a date but not really. A PseudoDate object has a day, month, and year but
|
|
6
6
|
|
7
7
|
## What Is This For?
|
8
8
|
|
9
|
-
PseudoDate was created to parse odd dates in odd formats and attempt to extract as much information from them as possible. It's especially handy when you're trying to convert a date string that has come from an OCR'd source.
|
9
|
+
PseudoDate was created to parse odd dates in odd formats and attempt to extract as much information from them as possible. It's especially handy when you're trying to convert a date string that has come from an OCR'd source. It was primarily written to parse dates in American public record data in an effort to have a common date format when doing record matching.
|
10
10
|
|
11
11
|
## Assumptions
|
12
12
|
|
13
|
-
As with all parsing, one needs to make assumptions. The main assumption made here is that all dates will be in the past.
|
13
|
+
As with all parsing, one needs to make assumptions. The main assumption made here is that all dates will be in the past. Dates that appear to be far-future are generally labeled as "invalid."
|
14
|
+
|
15
|
+
Since this gem was built for trying to wrangle OCR'd dates we have to make some assumptions when it comes to date formats. As of `0.2.0` the gem now assumes that dates separated by a "/" are American dates and those that are separated by a "-" are European dates. Future versions may allow some configuration for this depending on your usage but in my experience there has not been a need for that. This mimics the behavior from Ruby 1.8.7 which was changed in Ruby 1.9+.
|
14
16
|
|
15
17
|
## Other Notes
|
16
18
|
|
17
|
-
PseudoDate stores date attributes in strings instead of integers to avoid losing the preceding '0' on various attributes. This was a decision made when first creating the class because of the way things were being output in the project it was created for.
|
19
|
+
PseudoDate stores date attributes in strings instead of integers to avoid losing the preceding '0' on various attributes. This was a decision made when first creating the class because of the way things were being output in the project it was created for. There has been some discussion about switching these to integers in order to help save on memory but no decision has been made here either way.
|
18
20
|
|
19
21
|
## Compatability
|
20
22
|
|
data/lib/pseudo_date/parser.rb
CHANGED
@@ -1,12 +1,16 @@
|
|
1
1
|
require 'date'
|
2
2
|
class Parser
|
3
3
|
|
4
|
+
AMERICAN_DATE_FORMAT = '%m/%d/%Y'
|
5
|
+
EUROPEAN_DATE_FORMAT = '%Y-%m-%d'
|
6
|
+
|
4
7
|
def self.parse(input)
|
5
8
|
date_hash = {}
|
6
9
|
# Minor Pre Cleanup
|
7
10
|
input.strip!; input.gsub!('~','')
|
8
11
|
|
9
|
-
date =
|
12
|
+
date = parse_with_poro_date(input)
|
13
|
+
|
10
14
|
if date
|
11
15
|
date_hash = { :year => date.year.to_s, :month => date.month.to_s, :day => date.day.to_s }
|
12
16
|
else
|
@@ -45,6 +49,22 @@ class Parser
|
|
45
49
|
|
46
50
|
private
|
47
51
|
|
52
|
+
def self.parse_with_poro_date(string)
|
53
|
+
# If our date has 3 parts then let's try to parse it with Date::strptime
|
54
|
+
if string.split(/\/|-/).length < 3
|
55
|
+
case string
|
56
|
+
when /-/ # Europeans generally use hyphens to separate date pieces
|
57
|
+
Date.strptime(string, EUROPEAN_DATE_FORMAT)
|
58
|
+
when /\// # Americans usually use a / to separate date pieces
|
59
|
+
Date.strptime(string, AMERICAN_DATE_FORMAT)
|
60
|
+
end
|
61
|
+
else
|
62
|
+
nil # Not enough parts so just return nil
|
63
|
+
end
|
64
|
+
rescue
|
65
|
+
nil # We don't actually care why Date is complaining. We'll fall back to slower parsing later.
|
66
|
+
end
|
67
|
+
|
48
68
|
def self.parse_string(input)
|
49
69
|
day, month, year = "00", "00", "0000"
|
50
70
|
if input.match('/') # 02/25/2008
|
@@ -86,22 +86,22 @@ class PseudoDate
|
|
86
86
|
when 'exact'
|
87
87
|
self.to_date < other.to_date
|
88
88
|
when 'year_month'
|
89
|
-
self.year == other.year ? (self.month < other.month) : (self.year < other.year)
|
89
|
+
self.year == other.year ? (self.month.to_i < other.month.to_i) : (self.year.to_i < other.year.to_i)
|
90
90
|
when 'year'
|
91
|
-
self.year < other.year
|
91
|
+
self.year.to_i < other.year.to_i
|
92
92
|
when 'mixed'
|
93
93
|
if self.precision == 'invalid'
|
94
94
|
true
|
95
95
|
elsif other.precision == 'invalid'
|
96
96
|
false
|
97
|
-
elsif self.year == other.year
|
98
|
-
if self.month == other.month
|
97
|
+
elsif self.year.to_i == other.year.to_i
|
98
|
+
if self.month.to_i == other.month.to_i
|
99
99
|
self.day.to_i < other.day.to_i
|
100
100
|
else
|
101
|
-
self.month < other.month
|
101
|
+
self.month.to_i < other.month.to_i
|
102
102
|
end
|
103
103
|
else
|
104
|
-
self.year < other.year
|
104
|
+
self.year.to_i < other.year.to_i
|
105
105
|
end
|
106
106
|
else
|
107
107
|
false
|
data/lib/pseudo_date/version.rb
CHANGED
data/pseudo_date.gemspec
CHANGED
@@ -16,6 +16,8 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
|
+
gem.license = 'MIT'
|
19
20
|
|
20
|
-
gem.add_development_dependency("rspec")
|
21
|
+
gem.add_development_dependency("rspec", "~> 0")
|
22
|
+
gem.add_development_dependency("byebug", "~> 0")
|
21
23
|
end
|
data/spec/parser_spec.rb
CHANGED
@@ -95,6 +95,20 @@ describe "PseudoDate Parsing" do
|
|
95
95
|
end
|
96
96
|
end
|
97
97
|
|
98
|
+
# 06/7/1985
|
99
|
+
context "month/day/year" do
|
100
|
+
it 'should be exact precision' do
|
101
|
+
PseudoDate.new("#{@month}/7/#{@year}").precision.should == 'exact'
|
102
|
+
end
|
103
|
+
|
104
|
+
it 'should match original input' do
|
105
|
+
pd = PseudoDate.new("#{@month}/7/#{@year}")
|
106
|
+
pd.day.should == "07"
|
107
|
+
pd.month.should == @month
|
108
|
+
pd.year.should == @year
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
98
112
|
# 06/1985
|
99
113
|
context "month/year" do
|
100
114
|
it 'should be partial precision' do
|
data/spec/pseudo_date_spec.rb
CHANGED
@@ -22,7 +22,7 @@ describe "PseudoDate" do
|
|
22
22
|
it "should demonstrate later dates as greater than older dates" do
|
23
23
|
old_date = PseudoDate.new(:year => @year, :month => @month)
|
24
24
|
new_date = PseudoDate.new(:year => 1996, :month => @month)
|
25
|
-
(old_date < new_date).should
|
25
|
+
(old_date < new_date).should == true
|
26
26
|
end
|
27
27
|
it "should respond properly with the spaceship operator" do
|
28
28
|
old_date = PseudoDate.new(:year => @year, :month => @month)
|
@@ -37,7 +37,7 @@ describe "PseudoDate" do
|
|
37
37
|
it "should demonstrate later dates as greater than older dates" do
|
38
38
|
old_date = PseudoDate.new(:year => @year, :month => @month, :day => @day)
|
39
39
|
new_date = PseudoDate.new(:year => 1996, :month => @month, :day => @day)
|
40
|
-
(old_date < new_date).should
|
40
|
+
(old_date < new_date).should == true
|
41
41
|
end
|
42
42
|
it "should respond properly with the spaceship operator" do
|
43
43
|
old_date = PseudoDate.new(:year => @year, :month => @month, :day => @day)
|
@@ -52,12 +52,12 @@ describe "PseudoDate" do
|
|
52
52
|
it "should demonstrate later dates as greater than older dates" do
|
53
53
|
old_date = PseudoDate.new(:year => @year, :month => @month)
|
54
54
|
new_date = PseudoDate.new(:year => 1996, :month => @month, :day => @day)
|
55
|
-
(old_date < new_date).should
|
55
|
+
(old_date < new_date).should == true
|
56
56
|
end
|
57
57
|
it "should demonstrate invalid dates as less than complete dates" do
|
58
58
|
complete = PseudoDate.new(:year => @year, :month => @month)
|
59
59
|
invalid = PseudoDate.new("")
|
60
|
-
(complete > invalid).should
|
60
|
+
(complete > invalid).should == true
|
61
61
|
end
|
62
62
|
it "should respond properly with the spaceship operator" do
|
63
63
|
old_date = PseudoDate.new(:year => @year, :month => @month)
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,30 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pseudo_date
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.2.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Patrick Tulskie
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-
|
11
|
+
date: 2014-06-19 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rspec
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - "~>"
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: byebug
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
28
39
|
- !ruby/object:Gem::Version
|
29
40
|
version: '0'
|
30
41
|
description: Date parser and container for partial or incomplete dates.
|
@@ -34,7 +45,7 @@ executables: []
|
|
34
45
|
extensions: []
|
35
46
|
extra_rdoc_files: []
|
36
47
|
files:
|
37
|
-
- .gitignore
|
48
|
+
- ".gitignore"
|
38
49
|
- Gemfile
|
39
50
|
- Gemfile.lock
|
40
51
|
- Manifest
|
@@ -52,28 +63,28 @@ files:
|
|
52
63
|
- spec/pseudo_date_spec.rb
|
53
64
|
- spec/spec_helper.rb
|
54
65
|
homepage: http://github.com/PatrickTulskie/pseudo_date
|
55
|
-
licenses:
|
66
|
+
licenses:
|
67
|
+
- MIT
|
68
|
+
metadata: {}
|
56
69
|
post_install_message:
|
57
70
|
rdoc_options: []
|
58
71
|
require_paths:
|
59
72
|
- lib
|
60
73
|
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
-
none: false
|
62
74
|
requirements:
|
63
|
-
- -
|
75
|
+
- - ">="
|
64
76
|
- !ruby/object:Gem::Version
|
65
77
|
version: '0'
|
66
78
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
-
none: false
|
68
79
|
requirements:
|
69
|
-
- -
|
80
|
+
- - ">="
|
70
81
|
- !ruby/object:Gem::Version
|
71
82
|
version: '0'
|
72
83
|
requirements: []
|
73
84
|
rubyforge_project:
|
74
|
-
rubygems_version:
|
85
|
+
rubygems_version: 2.2.2
|
75
86
|
signing_key:
|
76
|
-
specification_version:
|
87
|
+
specification_version: 4
|
77
88
|
summary: Date parser and container for partial or incomplete dates.
|
78
89
|
test_files:
|
79
90
|
- spec/parser_spec.rb
|