cultural_dates 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +21 -0
- data/README.md +86 -0
- data/Rakefile +10 -0
- data/cultural_dates.gemspec +40 -0
- data/lib/cultural_dates.rb +11 -0
- data/lib/cultural_dates/cultural_date.rb +156 -0
- data/lib/cultural_dates/cultural_interval.rb +164 -0
- data/lib/cultural_dates/date_parser.rb +44 -0
- data/lib/cultural_dates/date_string_parser.rb +50 -0
- data/lib/cultural_dates/date_transform.rb +177 -0
- data/lib/cultural_dates/date_word_helpers.rb +34 -0
- data/lib/cultural_dates/version.rb +3 -0
- metadata +172 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: fed7386233bac77b2728f75eb1368b8a2c6ee29c
|
4
|
+
data.tar.gz: 4001fe2cedd271f848de78fc629677d8703ff79b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9706a8615ea4c54a54377ef1f31016cb529ba633423e8a6ba8cbe3aeac8d5fced68c4fff1a88eaa4930a47477a324e48f87f890b5cf3c5198c49c836f2d6ef88
|
7
|
+
data.tar.gz: ada2411bdc35b4efe20a9bf2a1fc2070074e4b745cb86e8da5d84a2f59375e20c4ea0db42ea1a070ae48c5240c1f9627495961f32dd657533de34378abfa6bde
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2017 Carnegie Museum of Art
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
# Cultural Dates
|
2
|
+
|
3
|
+
Have you ever wished that there was an easy way for computers to make sense of the complications around the vague, imprecise dates cultural historians use? Perhaps you've gone looking for concepts like [Allen Interval algebra](https://en.wikipedia.org/wiki/Allen%27s_interval_algebra), [XML Schema dates](https://www.w3.org/TR/xmlschema-2/) or [CIDOC-CRM](http://www.cidoc-crm.org), but you've backed away from the edge of that abyss, shaking your head and hoping your sanity remains intact.
|
4
|
+
|
5
|
+
`cultural_dates` is a library written from the *other side*, providing sensible defaults and formats for humans and comprehensive schemas and abstractions for computers. At its core, it's a parser that takes human-readable expressions like "Sometime after the 1880s until at least October 1920" and converts it into concrete dates in various forms, while still trying to maintain the underlying precision and expression. It also goes the other way, taking the underlying data model and humanizing it into strings that have meaning for humans.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'cultural_dates'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install cultural_dates
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
require "cultural_dates"
|
27
|
+
|
28
|
+
string = "January 2001"
|
29
|
+
date = CulturalDates::CulturalDate.new(string)
|
30
|
+
|
31
|
+
puts "String: #{string}"
|
32
|
+
puts "as EDTF: #{date.to_edtf}" # <-- 2001-01-uu
|
33
|
+
puts "as a Ruby Date: #{date.value}" # <-- 2001-01-01
|
34
|
+
puts "as a String: #{date.to_s}" # <-- January 2001
|
35
|
+
puts "Earliest Bound: #{date.earliest}" # <-- 2001-01-01
|
36
|
+
puts "Latest Bound: #{date.latest}" # <-- 2001-01-31
|
37
|
+
|
38
|
+
puts "\n-------------------------------------------------------------------\n\n"
|
39
|
+
|
40
|
+
interval_string ="sometime between 1650 and January 2001 until October 15, 2006"
|
41
|
+
interval = CulturalDates::CulturalInterval.new(interval_string)
|
42
|
+
|
43
|
+
puts "Interval to Parse:"
|
44
|
+
puts " #{interval_string}"
|
45
|
+
|
46
|
+
puts "\n Four-point dates as strings:"
|
47
|
+
puts " Begin of the Begin: #{interval.botb}" # <-- 1650
|
48
|
+
puts " End of the Begin: #{interval.eotb}" # <-- January 2001
|
49
|
+
puts " Begin of the End: #{interval.bote}" # <-- October 15, 2006
|
50
|
+
puts " End of the End: #{interval.eote}" # <-- October 15, 2006
|
51
|
+
|
52
|
+
puts "\n and as EDTF:"
|
53
|
+
puts " Begin of the Begin: #{interval.botb.to_edtf}" # <-- 1650-uu-uu
|
54
|
+
puts " End of the Begin: #{interval.eotb.to_edtf}" # <-- 2001-01-uu
|
55
|
+
puts " Begin of the End: #{interval.bote.to_edtf}" # <-- 2006-10-15
|
56
|
+
puts " End of the End: #{interval.eote.to_edtf}" # <-- 2006-10-15
|
57
|
+
|
58
|
+
puts "\nas EDTF Intervals"
|
59
|
+
puts " Beginning: #{interval.begin_interval}" # <-- 1650-01-01/2001-01-31
|
60
|
+
puts " Ending: #{interval.end_interval}" # <-- 2006-10-15/2006-10-15
|
61
|
+
puts " Possible: #{interval.possible_interval}" # <-- 1650-01-01/2006-10-15
|
62
|
+
puts " Definite: #{interval.definite_interval}" # <-- 2001-01-31/2006-10-15
|
63
|
+
|
64
|
+
puts "\nas Bounds"
|
65
|
+
puts " Earliest Bound: #{interval.earliest}" # <-- 1650-01-01
|
66
|
+
puts " Latest Bound: #{interval.latest}" # <-- 2006-10-15
|
67
|
+
puts " Earliest Def. Bound: #{interval.earliest_definite}" # <-- 2001-01-31
|
68
|
+
puts " Latest Definite Bound: #{interval.latest_definite}" # <-- 2006-10-15
|
69
|
+
|
70
|
+
puts "\nBack to String:"
|
71
|
+
puts " #{interval.to_s}" # <-- sometime between 1650 and January 2001 until October 15, 2006
|
72
|
+
```
|
73
|
+
|
74
|
+
## Implementation Details
|
75
|
+
|
76
|
+
Under the hood, it wraps the wonderful [edtf-ruby](https://github.com/inukshuk/edtf-ruby) gem and uses a useful subset of EDTF as a data model. It uses [parslet](http://kschiess.github.io/parslet/) to handle the string parsing, and the [ruby-rdf](https://github.com/ruby-rdf) suite of tools to generate RDF.
|
77
|
+
|
78
|
+
|
79
|
+
## Contributing
|
80
|
+
|
81
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/arttracks/cultural_dates.
|
82
|
+
|
83
|
+
## License
|
84
|
+
|
85
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
86
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'cultural_dates/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "cultural_dates"
|
8
|
+
spec.version = CulturalDates::VERSION
|
9
|
+
spec.authors = ["David Newbury"]
|
10
|
+
spec.email = ["david.newbury@gmail.com"]
|
11
|
+
spec.summary = %q{Parse and validate dates and intervals for cultural heritage applications.}
|
12
|
+
spec.homepage = "http://www.museumprovenance.org"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
16
|
+
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
17
|
+
if spec.respond_to?(:metadata)
|
18
|
+
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
19
|
+
else
|
20
|
+
raise "RubyGems 2.0 or newer is required to protect against " \
|
21
|
+
"public gem pushes."
|
22
|
+
end
|
23
|
+
|
24
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
25
|
+
f.match(%r{^(test|spec|features)/})
|
26
|
+
end
|
27
|
+
spec.bindir = "exe"
|
28
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
29
|
+
spec.require_paths = ["lib"]
|
30
|
+
|
31
|
+
spec.add_runtime_dependency 'edtf', "~> 3.0.2"
|
32
|
+
spec.add_runtime_dependency 'parslet', "~> 1.7"
|
33
|
+
spec.add_runtime_dependency 'activesupport', '~> 4.2'
|
34
|
+
spec.add_runtime_dependency 'linkeddata'
|
35
|
+
|
36
|
+
spec.add_development_dependency "bundler", "~> 1.13"
|
37
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
38
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
39
|
+
spec.add_development_dependency "m", '~> 1.5.0'
|
40
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require "cultural_dates/version"
|
2
|
+
require "cultural_dates/cultural_date"
|
3
|
+
require "cultural_dates/cultural_interval"
|
4
|
+
require "cultural_dates/date_parser"
|
5
|
+
require "cultural_dates/date_string_parser"
|
6
|
+
require "cultural_dates/date_transform"
|
7
|
+
require 'parslet'
|
8
|
+
require 'parslet/convenience'
|
9
|
+
|
10
|
+
module CulturalDates
|
11
|
+
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
module CulturalDates
|
2
|
+
|
3
|
+
class CulturalDate
|
4
|
+
include Comparable
|
5
|
+
|
6
|
+
attr_reader :value
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def parse(val)
|
10
|
+
return CulturalDate.new(val)
|
11
|
+
end
|
12
|
+
|
13
|
+
def edtf(edtf_date)
|
14
|
+
val = CulturalDate.new
|
15
|
+
date = Date.edtf(edtf_date) || EDTF::Unknown.new
|
16
|
+
val.instance_variable_set(:@value, date)
|
17
|
+
val
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def values
|
22
|
+
@value.values
|
23
|
+
end
|
24
|
+
|
25
|
+
def <=>(other)
|
26
|
+
case other
|
27
|
+
when CulturalDates::CulturalDate
|
28
|
+
@value <=> other.value
|
29
|
+
when ::Date
|
30
|
+
@value <=> other
|
31
|
+
else
|
32
|
+
@value <=> other
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def inspect
|
37
|
+
@value.inspect
|
38
|
+
end
|
39
|
+
|
40
|
+
def initialize(val="")
|
41
|
+
if val
|
42
|
+
begin
|
43
|
+
parse_result = DateParser.new.parse(val)
|
44
|
+
transformed_result = DateTransform.new.apply(parse_result)
|
45
|
+
if transformed_result
|
46
|
+
@value = Date.edtf(transformed_result)
|
47
|
+
end
|
48
|
+
rescue Parslet::ParseFailed => e
|
49
|
+
# puts e
|
50
|
+
@value = nil
|
51
|
+
end
|
52
|
+
else
|
53
|
+
@value = nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def known?
|
58
|
+
return false if @value.instance_of?(EDTF::Unknown) || @value.nil?
|
59
|
+
true
|
60
|
+
end
|
61
|
+
|
62
|
+
def unknown?
|
63
|
+
return !self.known?
|
64
|
+
end
|
65
|
+
|
66
|
+
def earliest
|
67
|
+
return nil if @value.nil?
|
68
|
+
return @value if @value.instance_of? EDTF::Unknown
|
69
|
+
new_d = EDTF.parse(@value.to_s)
|
70
|
+
if new_d.year < 0
|
71
|
+
if @value.unspecified.year[2]
|
72
|
+
new_d = new_d.advance(:years =>-99)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
new_d
|
76
|
+
end
|
77
|
+
|
78
|
+
def latest
|
79
|
+
return nil if @value.nil?
|
80
|
+
return @value if @value.instance_of? EDTF::Unknown
|
81
|
+
new_d = @value.clone
|
82
|
+
if new_d.unspecified.year[2]
|
83
|
+
new_d = new_d.advance(:years =>99) if new_d.year >=0
|
84
|
+
|
85
|
+
new_d.year_precision!
|
86
|
+
elsif new_d.unspecified.year[3]
|
87
|
+
new_d = new_d.advance(:years =>9) if new_d.year >=0
|
88
|
+
new_d.year_precision!
|
89
|
+
elsif new_d.unspecified? :day
|
90
|
+
new_d.month_precision!
|
91
|
+
if new_d.unspecified? :month
|
92
|
+
new_d.year_precision!
|
93
|
+
end
|
94
|
+
end
|
95
|
+
new_d = new_d.succ
|
96
|
+
new_d.day_precision!
|
97
|
+
new_d - 1
|
98
|
+
end
|
99
|
+
|
100
|
+
def to_edtf
|
101
|
+
return nil if @value.nil?
|
102
|
+
return @value.edtf
|
103
|
+
end
|
104
|
+
|
105
|
+
def to_s
|
106
|
+
date = @value
|
107
|
+
return nil unless date.is_a? Date
|
108
|
+
str = ""
|
109
|
+
if !date.unspecified? :day
|
110
|
+
str = date.strftime("%B %-d, ")
|
111
|
+
if date.year >=0
|
112
|
+
year_str = date.year.to_s
|
113
|
+
year_str += " CE" if date.year < 1000
|
114
|
+
else
|
115
|
+
year_str = "#{-date.year} BCE"
|
116
|
+
end
|
117
|
+
str += year_str
|
118
|
+
|
119
|
+
elsif !date.unspecified? :month
|
120
|
+
str = date.strftime("%B ")
|
121
|
+
if date.year >=1
|
122
|
+
year_str = date.year.to_s
|
123
|
+
year_str += " CE" if date.year < 1000
|
124
|
+
elsif year == 0
|
125
|
+
year_str = "1 BCE"
|
126
|
+
else
|
127
|
+
year_str = "#{-year} BCE"
|
128
|
+
end
|
129
|
+
str += year_str
|
130
|
+
|
131
|
+
elsif !date.unspecified? :year
|
132
|
+
if date.year >=1
|
133
|
+
str = date.year.to_s
|
134
|
+
str += " CE" if date.year < 1000
|
135
|
+
else
|
136
|
+
str = "#{-date.year+1} BCE"
|
137
|
+
end
|
138
|
+
elsif !date.unspecified.year[2]
|
139
|
+
str = "the #{date.year}s"
|
140
|
+
else
|
141
|
+
bce = false
|
142
|
+
year = (date.year/100+1)
|
143
|
+
if year <= 0
|
144
|
+
year = -(year-2)
|
145
|
+
bce = true
|
146
|
+
end
|
147
|
+
str = "the #{year.ordinalize} century"
|
148
|
+
str += " CE" if year >= 1 && year < 10 && !bce
|
149
|
+
str += " BCE" if bce
|
150
|
+
str
|
151
|
+
end
|
152
|
+
str += "?" unless date.certain?
|
153
|
+
str
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
@@ -0,0 +1,164 @@
|
|
1
|
+
module CulturalDates
|
2
|
+
|
3
|
+
class CulturalInterval
|
4
|
+
include Comparable
|
5
|
+
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def parse(val, debug=false)
|
9
|
+
return CulturalInterval.new(val, debug)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(val="", debug=false)
|
14
|
+
if val
|
15
|
+
begin
|
16
|
+
parse_result = DateStringParser.new.parse_with_debug(val, reporter: Parslet::ErrorReporter::Deepest.new)
|
17
|
+
puts "parse_result: #{parse_result.inspect}" if debug
|
18
|
+
transformed_result = DateTransform.new.apply(parse_result)
|
19
|
+
puts "transformed_result: #{transformed_result.inspect}" if debug
|
20
|
+
if transformed_result
|
21
|
+
@value = transformed_result
|
22
|
+
end
|
23
|
+
rescue Parslet::ParseFailed => e
|
24
|
+
@value = nil
|
25
|
+
end
|
26
|
+
else
|
27
|
+
@value = nil
|
28
|
+
end
|
29
|
+
# puts "@value: #{@value}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def botb
|
33
|
+
@botb ||= CulturalDate.edtf @value[:botb]
|
34
|
+
end
|
35
|
+
|
36
|
+
def bote
|
37
|
+
@bote ||= CulturalDate.edtf @value[:bote]
|
38
|
+
end
|
39
|
+
|
40
|
+
def eotb
|
41
|
+
@eotb ||= CulturalDate.edtf @value[:eotb]
|
42
|
+
end
|
43
|
+
|
44
|
+
def eote
|
45
|
+
@eote ||= CulturalDate.edtf @value[:eote]
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
def earliest
|
50
|
+
return nil if botb.unknown?
|
51
|
+
botb.earliest
|
52
|
+
end
|
53
|
+
|
54
|
+
def latest
|
55
|
+
return nil if eote.unknown?
|
56
|
+
eote.latest
|
57
|
+
end
|
58
|
+
|
59
|
+
def earliest_definite
|
60
|
+
return nil if eotb.unknown? && bote.unknown?
|
61
|
+
return latest_definite if eotb.unknown?
|
62
|
+
return eotb.earliest if eotb == bote
|
63
|
+
eotb.latest
|
64
|
+
end
|
65
|
+
|
66
|
+
def latest_definite
|
67
|
+
return nil if bote.unknown? && eotb.unknown?
|
68
|
+
return earliest_definite if bote.unknown?
|
69
|
+
return bote.latest if eotb == bote
|
70
|
+
bote.earliest
|
71
|
+
end
|
72
|
+
|
73
|
+
def begin_interval
|
74
|
+
from = earliest || :unknown
|
75
|
+
to = earliest_definite || :unknown
|
76
|
+
EDTF::Interval.new(from, to)
|
77
|
+
end
|
78
|
+
alias :beginning_interval :begin_interval
|
79
|
+
alias :beginning :begin_interval
|
80
|
+
|
81
|
+
|
82
|
+
def end_interval
|
83
|
+
from = latest_definite || :unknown
|
84
|
+
to = latest || :unknown
|
85
|
+
EDTF::Interval.new(from, to)
|
86
|
+
end
|
87
|
+
alias :ending_interval :end_interval
|
88
|
+
alias :ending :end_interval
|
89
|
+
|
90
|
+
def possible_interval
|
91
|
+
from = earliest || :unknown
|
92
|
+
to = latest || :unknown
|
93
|
+
EDTF::Interval.new(from, to)
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
def definite_interval
|
99
|
+
from = earliest_definite || :unknown
|
100
|
+
to = latest_definite || :unknown
|
101
|
+
EDTF::Interval.new(from, to)
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
def to_edtf
|
106
|
+
possible_interval.edtf
|
107
|
+
end
|
108
|
+
|
109
|
+
def to_definite_edtf
|
110
|
+
definite_interval.edtf
|
111
|
+
end
|
112
|
+
|
113
|
+
# Generate a textual representation of the timeframe of the period.
|
114
|
+
# @return [String]
|
115
|
+
def to_s
|
116
|
+
|
117
|
+
# Handle special "throughout" case
|
118
|
+
if (eotb.known? && bote.known?) && !(botb.known? || eote.known?) && eotb == bote
|
119
|
+
return "throughout #{eotb}"
|
120
|
+
end
|
121
|
+
|
122
|
+
# Handle special "throughout, until" case
|
123
|
+
if (eotb.known? && bote.known? && eote.known?) && !botb.known? && eotb == bote
|
124
|
+
return "throughout #{eotb} until no later than #{eote}"
|
125
|
+
end
|
126
|
+
|
127
|
+
# Handle special "on" case
|
128
|
+
if (botb.known? && eotb.known? && bote.known? && eote.known?) &&
|
129
|
+
(botb == eotb && bote == eote && botb == eote) &&
|
130
|
+
botb.earliest == botb.latest
|
131
|
+
return "on #{botb}"
|
132
|
+
end
|
133
|
+
|
134
|
+
first_string = ""
|
135
|
+
if botb.known? && eotb.known?
|
136
|
+
if botb == eotb
|
137
|
+
first_string = botb
|
138
|
+
else
|
139
|
+
first_string = "sometime between #{botb} and #{eotb}"
|
140
|
+
end
|
141
|
+
elsif botb.known?
|
142
|
+
first_string = "after #{botb}"
|
143
|
+
elsif eotb.known?
|
144
|
+
first_string = "by #{eotb}"
|
145
|
+
end
|
146
|
+
|
147
|
+
second_string = nil
|
148
|
+
if bote.known? && eote.known?
|
149
|
+
if bote == eote
|
150
|
+
second_string = bote
|
151
|
+
else
|
152
|
+
second_string = "sometime between #{bote} and #{eote}"
|
153
|
+
end
|
154
|
+
elsif bote.known?
|
155
|
+
second_string = "at least #{bote}"
|
156
|
+
elsif eote.known?
|
157
|
+
second_string = "no later than #{eote}"
|
158
|
+
end
|
159
|
+
|
160
|
+
[first_string,second_string].compact.join(" until ").strip
|
161
|
+
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
require_relative "date_word_helpers"
|
4
|
+
|
5
|
+
module CulturalDates
|
6
|
+
|
7
|
+
#
|
8
|
+
# * :era
|
9
|
+
# * :century
|
10
|
+
# * :decade
|
11
|
+
# * :year
|
12
|
+
# * :month
|
13
|
+
# * :day
|
14
|
+
# * :timezone
|
15
|
+
#
|
16
|
+
class DateParser < Parslet::Parser
|
17
|
+
include DateWordHelpers
|
18
|
+
include Parslet
|
19
|
+
|
20
|
+
|
21
|
+
rule(:century) { (the.maybe >> century_number >> century_word) >> era.maybe >> certainty}
|
22
|
+
rule(:decade) { (the.maybe >> decade_year) >> era.maybe >> certainty}
|
23
|
+
rule(:year) { year_year >> era.maybe >> certainty}
|
24
|
+
rule(:month) { month_name >> year_year >> era.maybe >> certainty}
|
25
|
+
rule(:day) { month_name >> day_number >> (comma | space) >> year_year >> era.maybe >> certainty}
|
26
|
+
rule(:euroday) { day_number >> space >> month_name >> year_year >> era.maybe >> certainty}
|
27
|
+
rule(:numdate) { month_number >> str("/") >> day_number >> str("/") >> year_year >> era.maybe >> certainty }
|
28
|
+
rule(:isodate) { year_year >> str("-") >> month_number >> str("-") >> day_number >> timezone.maybe >> era.maybe >> certainty }
|
29
|
+
|
30
|
+
rule(:date) {
|
31
|
+
(century |
|
32
|
+
decade |
|
33
|
+
day |
|
34
|
+
year |
|
35
|
+
month |
|
36
|
+
euroday |
|
37
|
+
numdate |
|
38
|
+
isodate).as(:date)
|
39
|
+
}
|
40
|
+
|
41
|
+
root(:date)
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
require_relative "date_parser"
|
4
|
+
|
5
|
+
module CulturalDates
|
6
|
+
class DateStringParser < Parslet::Parser
|
7
|
+
include DateWordHelpers
|
8
|
+
|
9
|
+
date = DateParser.new
|
10
|
+
|
11
|
+
|
12
|
+
# KEYWORDS
|
13
|
+
rule(:sometime) { str("sometime") >> space}
|
14
|
+
rule(:no_date) { (str("no date").as(:nodate) | str("").as(:nodate))}
|
15
|
+
rule(:begin_end_separator) { space.maybe >> str("until") >> space }
|
16
|
+
rule(:after) { sometime.maybe >> str("after") >> space }
|
17
|
+
rule(:by) { str("by") >> space }
|
18
|
+
rule(:at_least) { str("at least") >> space }
|
19
|
+
rule(:before) { str("no later than") >> space }
|
20
|
+
rule(:between) { sometime.maybe >> str("between") >> space }
|
21
|
+
rule(:in_kw) { str("throughout") >> space }
|
22
|
+
rule(:on_kw) { str("on") >> space }
|
23
|
+
rule(:and_kw) { space? >> str("and") >> space?}
|
24
|
+
|
25
|
+
# CLAUSES
|
26
|
+
rule(:begin_date) { after >> date.as(:botb) |
|
27
|
+
by >> date.as(:eotb) }
|
28
|
+
rule(:end_date) { at_least >> date.as(:bote) |
|
29
|
+
before >> date.as(:eote) }
|
30
|
+
rule(:in_date) { in_kw >> date.as(:in) }
|
31
|
+
rule(:on_date) { on_kw >> date.as(:on) }
|
32
|
+
rule(:between_begin) { between >> date.as(:botb) >> and_kw >> date.as(:eotb)}
|
33
|
+
rule(:between_end) { between >> date.as(:bote) >> and_kw >> date.as(:eote)}
|
34
|
+
|
35
|
+
rule (:start_clause) {(in_date | between_begin | begin_date | date.as(:begin))}
|
36
|
+
rule (:end_clause) {(between_end | end_date | date.as(:end))}
|
37
|
+
|
38
|
+
# SENTENCE GRAMMARS
|
39
|
+
rule(:one_date) {
|
40
|
+
start_clause >> begin_end_separator >> end_clause |
|
41
|
+
start_clause >> begin_end_separator.absent? |
|
42
|
+
begin_end_separator >> end_clause
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
rule(:date_string) { one_date | on_date | no_date }
|
47
|
+
root(:date_string)
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,177 @@
|
|
1
|
+
require "edtf"
|
2
|
+
require 'active_support/core_ext/integer/inflections'
|
3
|
+
|
4
|
+
|
5
|
+
module CulturalDates
|
6
|
+
class DateTransform < Parslet::Transform
|
7
|
+
|
8
|
+
rule(:certainty_value => simple(:x)) { x != "?"}
|
9
|
+
|
10
|
+
rule(begin: subtree(:x)) {{botb: x, eotb: x}}
|
11
|
+
rule(begin: subtree(:x), bote: subtree(:y)) {{botb: x, eotb: x, bote: y}}
|
12
|
+
rule(begin: subtree(:x), eote: subtree(:z)) {{botb: x, eotb: x, eote: z}}
|
13
|
+
rule(begin: subtree(:x), bote: subtree(:y), eote: subtree(:z)) {{botb: x, eotb: x, bote: y, eote: z}}
|
14
|
+
|
15
|
+
rule(end: subtree(:x)) { {bote: x, eote: x}}
|
16
|
+
rule(end: subtree(:x), botb: subtree(:y)) { {bote: x, eote: x, botb: y}}
|
17
|
+
rule(end: subtree(:x), eotb: subtree(:z)) { {bote: x, eote: x, eotb: z}}
|
18
|
+
rule(end: subtree(:x), botb: subtree(:y), eotb: subtree(:z)) { {bote: x, eote: x, botb: y, eotb: z}}
|
19
|
+
|
20
|
+
|
21
|
+
rule(:in => subtree(:x)) { {eotb: x, bote: x}}
|
22
|
+
rule(:in => subtree(:x), eote: subtree(:y)) { {eotb: x, bote: x, eote: y}}
|
23
|
+
|
24
|
+
# This should never happen.
|
25
|
+
# rule(:in => subtree(:x), botb: subtree(:y)) { {eotb: x, bote: x, botb: y}}
|
26
|
+
|
27
|
+
# TODO: This should check for day precision, but currently does not.
|
28
|
+
rule(:on => subtree(:x)) { {eotb: x, bote: x, botb: x, eote: x}}
|
29
|
+
rule(:nodate => simple(:x)) {nil}
|
30
|
+
|
31
|
+
rule(:date => subtree(:x)) do |dictionary|
|
32
|
+
obj = regularize(dictionary[:x])
|
33
|
+
date = to_edtf_date(obj)
|
34
|
+
date.edtf
|
35
|
+
end
|
36
|
+
|
37
|
+
class << self
|
38
|
+
|
39
|
+
def to_edtf_date(d)
|
40
|
+
|
41
|
+
if d[:day]
|
42
|
+
date = Date.new(d[:year],d[:month],d[:day])
|
43
|
+
date.day_precision!
|
44
|
+
elsif(d[:month])
|
45
|
+
date = Date.new(d[:year],d[:month])
|
46
|
+
date.unspecified! :day
|
47
|
+
elsif(d[:year])
|
48
|
+
if (d[:era] == "BCE")
|
49
|
+
date = Date.new(d[:year]+1)
|
50
|
+
else
|
51
|
+
date = Date.new(d[:year])
|
52
|
+
end
|
53
|
+
date.unspecified! :month
|
54
|
+
date.unspecified! :day
|
55
|
+
elsif(d[:decade])
|
56
|
+
date = Date.new(d[:decade])
|
57
|
+
date.unspecified.year[3]= true
|
58
|
+
date.unspecified! :month
|
59
|
+
date.unspecified! :day
|
60
|
+
elsif(d[:century])
|
61
|
+
c = d[:century] * 100
|
62
|
+
c -=99 if (d[:era] == "BCE")
|
63
|
+
date = Date.new(c)
|
64
|
+
date.unspecified.year[3]= true
|
65
|
+
date.unspecified.year[2]= true
|
66
|
+
date.unspecified! :month
|
67
|
+
date.unspecified! :day
|
68
|
+
end
|
69
|
+
date.uncertain! unless d[:certainty]
|
70
|
+
date
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def regularize(date_obj)
|
75
|
+
date_obj = regularize_era(date_obj)
|
76
|
+
date_obj = regularize_century(date_obj)
|
77
|
+
date_obj = regularize_decade(date_obj)
|
78
|
+
date_obj = regularize_year(date_obj)
|
79
|
+
date_obj = regularize_month(date_obj)
|
80
|
+
date_obj = regularize_day(date_obj)
|
81
|
+
date_obj
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
def regularize_era(date_obj)
|
88
|
+
if date_obj[:era].to_s[0] && date_obj[:era].to_s[0].downcase == "b"
|
89
|
+
date_obj[:era] = "BCE"
|
90
|
+
else
|
91
|
+
date_obj[:era] = "CE"
|
92
|
+
end
|
93
|
+
date_obj
|
94
|
+
end
|
95
|
+
|
96
|
+
def regularize_century(date_obj)
|
97
|
+
return date_obj if date_obj[:century].is_a? Integer
|
98
|
+
if date_obj[:century]
|
99
|
+
date_obj[:century] = date_obj[:century].to_i - 1
|
100
|
+
if date_obj[:era] == "BCE"
|
101
|
+
date_obj[:century] = -date_obj[:century]
|
102
|
+
end
|
103
|
+
else
|
104
|
+
date_obj[:century] = nil
|
105
|
+
end
|
106
|
+
date_obj
|
107
|
+
end
|
108
|
+
|
109
|
+
def regularize_decade(date_obj)
|
110
|
+
return date_obj if date_obj[:decade].is_a? Integer
|
111
|
+
|
112
|
+
if date_obj[:decade]
|
113
|
+
date_obj[:decade] = date_obj[:decade].to_i
|
114
|
+
if date_obj[:era] == "BCE"
|
115
|
+
date_obj[:decade] = -date_obj[:decade]
|
116
|
+
end
|
117
|
+
else
|
118
|
+
date_obj[:decade] = nil
|
119
|
+
end
|
120
|
+
date_obj
|
121
|
+
end
|
122
|
+
|
123
|
+
def regularize_year(date_obj)
|
124
|
+
return date_obj if date_obj[:year].is_a? Integer
|
125
|
+
|
126
|
+
if date_obj[:year]
|
127
|
+
date_obj[:year] = date_obj[:year].to_i
|
128
|
+
if date_obj[:era] == "BCE"
|
129
|
+
date_obj[:year] = -date_obj[:year]
|
130
|
+
end
|
131
|
+
else
|
132
|
+
date_obj[:year] = nil
|
133
|
+
end
|
134
|
+
date_obj
|
135
|
+
end
|
136
|
+
|
137
|
+
def regularize_day(date_obj)
|
138
|
+
return date_obj if date_obj[:day].is_a? Integer
|
139
|
+
if date_obj[:day]
|
140
|
+
date_obj[:day] = date_obj[:day].to_i
|
141
|
+
else
|
142
|
+
date_obj[:day] = nil
|
143
|
+
end
|
144
|
+
date_obj
|
145
|
+
end
|
146
|
+
|
147
|
+
def regularize_month(date_obj)
|
148
|
+
return date_obj if date_obj[:month].is_a? Integer
|
149
|
+
|
150
|
+
if date_obj[:month]
|
151
|
+
month = date_obj[:month].to_i
|
152
|
+
if month == 0
|
153
|
+
month = case date_obj[:month].to_s[0...3].downcase
|
154
|
+
when "jan" then 1
|
155
|
+
when "feb" then 2
|
156
|
+
when "mar" then 3
|
157
|
+
when "apr" then 4
|
158
|
+
when "may" then 5
|
159
|
+
when "jun" then 6
|
160
|
+
when "jul" then 7
|
161
|
+
when "aug" then 8
|
162
|
+
when "sep" then 9
|
163
|
+
when "oct" then 10
|
164
|
+
when "nov" then 11
|
165
|
+
when "dec" then 12
|
166
|
+
end
|
167
|
+
end
|
168
|
+
date_obj[:month] = month
|
169
|
+
else
|
170
|
+
date_obj[:month] = nil
|
171
|
+
end
|
172
|
+
|
173
|
+
return date_obj
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "parslet"
|
2
|
+
|
3
|
+
module CulturalDates
|
4
|
+
module DateWordHelpers
|
5
|
+
include Parslet
|
6
|
+
|
7
|
+
rule(:space) { match('\s').repeat(1) }
|
8
|
+
rule(:space?) { space.maybe }
|
9
|
+
rule(:comma) { str(",") >> space }
|
10
|
+
rule(:period) { str(".") >> space }
|
11
|
+
rule(:certainty) { (str("?") | str("")).as(:certainty_value).as(:certainty)}
|
12
|
+
|
13
|
+
rule(:ordinal_suffix) { str("th") | str("st") | str("nd") | str("rd")}
|
14
|
+
rule(:era) { (str("CE") | str("ce") | str("BCE") | str("bce") | str("AD") | str("ad") | str("BC") | str('bc') | str("")).as(:era)}
|
15
|
+
rule(:the) { match['tT'] >> str("he") >> space}
|
16
|
+
rule(:century_word) { match['cC'] >> str("entury") >> space?}
|
17
|
+
rule(:century_number) { match['0-9'].repeat(1,2).as(:century) >> ordinal_suffix.maybe >> space}
|
18
|
+
rule(:decade_year) { (match["0-9"].repeat(1,3) >> str("0")).as(:decade) >> str("s") >> space?}
|
19
|
+
rule(:year_year) { (match["0-9"].repeat(1,4).as(:year) >> space?)}
|
20
|
+
|
21
|
+
rule(:month_names_tc) {str("January") | str("February") | str("March") | str("April") | str("May") | str("June") | str("July") | str("August") | str("September") | str("October") | str("November") | str("December") }
|
22
|
+
rule(:month_names_lc) {str("january") | str("february") | str("march") | str("april") | str("may") | str("june") | str("july") | str("august") | str("september") | str("october") | str("november") | str("december") }
|
23
|
+
rule(:month_abb_tc) {str("Jan") | str("Feb") | str("Mar") | str("Apr") | str("Jun") | str("Jul") | str("Aug") | str("Sept")| str("Sep") | str("Oct") | str("Nov") | str("Dec") }
|
24
|
+
rule(:month_abb_lc) {str("jan") | str("feb") | str("mar") | str("apr") | str("jun") | str("jul") | str("aug") | str("sept")| str("sep") | str("oct") | str("nov") | str("dec") }
|
25
|
+
rule(:month_spelling) {str("febuary")}
|
26
|
+
rule(:month_name) {(month_names_tc| month_names_lc | month_abb_tc | month_abb_lc | month_spelling).as(:month) >> (period | comma | space | str("., "))}
|
27
|
+
|
28
|
+
rule(:day_number) { match['0-9'].repeat(1,2).as(:day) >> ordinal_suffix.maybe}
|
29
|
+
rule(:month_number) { (match['0-1'].maybe >> match["0-9"]).as(:month)}
|
30
|
+
|
31
|
+
rule(:timezone) {str("Z").as(:timezone) | (match["-+"] >> match["01"] >> match["0-9"] >> str(":") >> match["0-9"].repeat(2,2)).as(:timezone)}
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,172 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cultural_dates
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Newbury
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-01-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: edtf
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.0.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 3.0.2
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: parslet
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.7'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.7'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: activesupport
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '4.2'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '4.2'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: linkeddata
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: bundler
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.13'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.13'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rake
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '10.0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '10.0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: minitest
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '5.0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '5.0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: m
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 1.5.0
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 1.5.0
|
125
|
+
description:
|
126
|
+
email:
|
127
|
+
- david.newbury@gmail.com
|
128
|
+
executables: []
|
129
|
+
extensions: []
|
130
|
+
extra_rdoc_files: []
|
131
|
+
files:
|
132
|
+
- ".gitignore"
|
133
|
+
- Gemfile
|
134
|
+
- LICENSE.txt
|
135
|
+
- README.md
|
136
|
+
- Rakefile
|
137
|
+
- cultural_dates.gemspec
|
138
|
+
- lib/cultural_dates.rb
|
139
|
+
- lib/cultural_dates/cultural_date.rb
|
140
|
+
- lib/cultural_dates/cultural_interval.rb
|
141
|
+
- lib/cultural_dates/date_parser.rb
|
142
|
+
- lib/cultural_dates/date_string_parser.rb
|
143
|
+
- lib/cultural_dates/date_transform.rb
|
144
|
+
- lib/cultural_dates/date_word_helpers.rb
|
145
|
+
- lib/cultural_dates/version.rb
|
146
|
+
homepage: http://www.museumprovenance.org
|
147
|
+
licenses:
|
148
|
+
- MIT
|
149
|
+
metadata:
|
150
|
+
allowed_push_host: https://rubygems.org
|
151
|
+
post_install_message:
|
152
|
+
rdoc_options: []
|
153
|
+
require_paths:
|
154
|
+
- lib
|
155
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - ">="
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0'
|
160
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
161
|
+
requirements:
|
162
|
+
- - ">="
|
163
|
+
- !ruby/object:Gem::Version
|
164
|
+
version: '0'
|
165
|
+
requirements: []
|
166
|
+
rubyforge_project:
|
167
|
+
rubygems_version: 2.2.2
|
168
|
+
signing_key:
|
169
|
+
specification_version: 4
|
170
|
+
summary: Parse and validate dates and intervals for cultural heritage applications.
|
171
|
+
test_files: []
|
172
|
+
has_rdoc:
|