date_parser 0.1.41 → 0.1.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +4 -0
- data/README.md +4 -0
- data/lib/date_parser/natural_date_parsing.rb +57 -16
- data/lib/spec/date_parser_spec.rb +27 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c0959b29b43db4cbc85e1caf41bbc772a982dcd3
|
4
|
+
data.tar.gz: c21a12f1c9db39be7ddbbb0ed04ad2e579dd4990
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3c64ede34f21aa666a6c84af33518649aebcfd89b10810cefa43092a36a3e6bbdcf11feee065485f512a3f811fcb1f147ae61022b74fee26bde6410b607c08f7
|
7
|
+
data.tar.gz: 70792a4a296919f6b3290e04244cc5791db7c75d33b3f9949781233734b7c45dd052cf3c69780286b37cf461c0c0fa5f652125480a27b73d13ac57b72a51a826
|
data/NEWS.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# DateParser 0.1.51
|
2
|
+
* Missed a common case: XX/XX/XXXX and variations. Now resolved.
|
3
|
+
* Strengthened check for dates that could be of the form XX/XX
|
4
|
+
|
1
5
|
# DateParser 0.1.4
|
2
6
|
* Improved namespacing.
|
3
7
|
+ NaturalDateParsing and Utils now part of the DateParser namespace.
|
data/README.md
CHANGED
@@ -40,6 +40,10 @@ DateParser::parse(text, creation_date).to_s
|
|
40
40
|
text = "7-24-2015"
|
41
41
|
DateParser::parse(text).to_s
|
42
42
|
#=> [#<Date: 2015-07-24 ((2457228j,0s,0n),+0s,2299161j)>]
|
43
|
+
|
44
|
+
text = "7/24/2015"
|
45
|
+
DateParser::parse(text).to_s
|
46
|
+
#=> [#<Date: 2015-07-24 ((2457228j,0s,0n),+0s,2299161j)>]
|
43
47
|
|
44
48
|
|
45
49
|
text = "2012-02-12"
|
@@ -263,11 +263,6 @@ module DateParser
|
|
263
263
|
end
|
264
264
|
end
|
265
265
|
|
266
|
-
# Parsing strings of the form XX/XX
|
267
|
-
if word.include? '/'
|
268
|
-
return slash_date(word, creation_date)
|
269
|
-
end
|
270
|
-
|
271
266
|
# Parsing strings like "23rd"
|
272
267
|
if (SUFFIXED_NUMERIC_DAY.include? word) && parse_ambiguous_dates
|
273
268
|
return numeric_single_day(word, creation_date)
|
@@ -283,11 +278,15 @@ module DateParser
|
|
283
278
|
return default_year(word)
|
284
279
|
end
|
285
280
|
|
286
|
-
# Parsing XX-XX-XXXX
|
281
|
+
# Parsing XX-XX-XXXX, XXXX-XX-XX, XX/XX/XXXX, or XXXX/XX/XX
|
287
282
|
if full_numeric_date?(word)
|
288
283
|
return full_numeric_date(word)
|
289
284
|
end
|
290
285
|
|
286
|
+
# Parsing strings of the form XX/XX
|
287
|
+
if slash_date?(word)
|
288
|
+
return slash_date(word, creation_date)
|
289
|
+
end
|
291
290
|
end
|
292
291
|
|
293
292
|
|
@@ -389,8 +388,11 @@ module DateParser
|
|
389
388
|
end
|
390
389
|
|
391
390
|
# Parses a single word of the form XXXX-XX-XX, DD-MM-YYYY or MM-DD-YYYY
|
391
|
+
# Also accepts words of the form XXXX/XX/XX
|
392
392
|
def NaturalDateParsing.full_numeric_date(word)
|
393
|
-
|
393
|
+
demarcating_token = get_demarcating_token(word)
|
394
|
+
|
395
|
+
subparts = word.split(demarcating_token)
|
394
396
|
|
395
397
|
# This is a weak check to see where the year is
|
396
398
|
year_index = (subparts[0].to_i).abs > 31 ? 0 : 2
|
@@ -456,22 +458,43 @@ module DateParser
|
|
456
458
|
return ((date1 - date2) / 7).to_i
|
457
459
|
end
|
458
460
|
|
459
|
-
#
|
461
|
+
# Determines if a given date could be a slash date.
|
462
|
+
# I.e., of the form XX/XX
|
463
|
+
def NaturalDateParsing.slash_date?(word)
|
464
|
+
substrings = word.split("/")
|
465
|
+
|
466
|
+
if substrings.size != 2
|
467
|
+
return false
|
468
|
+
end
|
469
|
+
|
470
|
+
for substring in substrings do
|
471
|
+
if !Utils.is_int?(substring)
|
472
|
+
return false
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
return true
|
477
|
+
end
|
478
|
+
|
479
|
+
# Is it generally of the form XXXX-XX-XX or XXXX/XX/XX?
|
460
480
|
def NaturalDateParsing.full_numeric_date?(word)
|
461
|
-
|
481
|
+
demarcating_token = get_demarcating_token(word)
|
482
|
+
substrings = word.split(demarcating_token)
|
462
483
|
|
463
|
-
if
|
464
|
-
|
465
|
-
|
466
|
-
|
484
|
+
if substrings.length != 3
|
485
|
+
return false
|
486
|
+
end
|
487
|
+
|
488
|
+
for substring in substrings do
|
489
|
+
if !Utils.is_int?(substring)
|
490
|
+
return false
|
467
491
|
end
|
468
|
-
else
|
469
|
-
output = false
|
470
492
|
end
|
471
493
|
|
472
|
-
return
|
494
|
+
return true
|
473
495
|
end
|
474
496
|
|
497
|
+
# Converts a numeric month to a string.
|
475
498
|
def NaturalDateParsing.numeric_month_to_string(numeric)
|
476
499
|
months = ["january", "february", "march", "april", "may", "june",
|
477
500
|
"july", "august", "september", "october", "november",
|
@@ -480,5 +503,23 @@ module DateParser
|
|
480
503
|
return months[numeric - 1]
|
481
504
|
end
|
482
505
|
|
506
|
+
# Given a string, tries to determine if the word
|
507
|
+
# contains a demarcating token such as '-' or '/'
|
508
|
+
# If so, returns that demarcating token. Assumes that
|
509
|
+
# only one such token is present.
|
510
|
+
#
|
511
|
+
# If no such token is found, returns an empty string.
|
512
|
+
def NaturalDateParsing.get_demarcating_token(word)
|
513
|
+
demarcating_token = ""
|
514
|
+
|
515
|
+
if word.include? "-"
|
516
|
+
demarcating_token = "-"
|
517
|
+
elsif word.include? "/"
|
518
|
+
demarcating_token = "/"
|
519
|
+
end
|
520
|
+
|
521
|
+
return demarcating_token
|
522
|
+
end
|
523
|
+
|
483
524
|
end
|
484
525
|
end
|
@@ -114,6 +114,33 @@ describe DateParser do
|
|
114
114
|
expect(DateParser::parse(text)).to eql(answer)
|
115
115
|
end
|
116
116
|
end
|
117
|
+
|
118
|
+
context "Parse date separated by /" do
|
119
|
+
text = "2012/02/12"
|
120
|
+
answer = [Date.parse("2012-02-12")]
|
121
|
+
|
122
|
+
it "correctly grabs the date" do
|
123
|
+
expect(DateParser::parse(text)).to eql(answer)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
context "Parse American date separated by /" do
|
128
|
+
text = "7/24/2015"
|
129
|
+
answer = [Date.parse("July 24, 2015")]
|
130
|
+
|
131
|
+
it "correctly grabs the date" do
|
132
|
+
expect(DateParser::parse(text)).to eql(answer)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
context "Parse International Standard fully numeric date separated by /" do
|
137
|
+
text = "24/07/2015"
|
138
|
+
answer = [Date.parse("24-07-2015")]
|
139
|
+
|
140
|
+
it "correctly grabs the date" do
|
141
|
+
expect(DateParser::parse(text)).to eql(answer)
|
142
|
+
end
|
143
|
+
end
|
117
144
|
end
|
118
145
|
|
119
146
|
#########################################################
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: date_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.51
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Kwon
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-02-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: DateParser is a simple, fast, and effective way to parse dates from natural
|
14
14
|
language text.
|