timetwister 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OWYzMGVhZTE3NjE4ZDE4M2ZhMmQwZDJlNzQ4OWE2MTVmOWU5NzAzNQ==
4
+ NWRiMWE3MzYwNGZiM2M3NWY5MzIxYmVkMjdhNTI0Njk0YmNhZTVkNw==
5
5
  data.tar.gz: !binary |-
6
- ZTAxNzI5OGJhMGJjM2M0ZTc1ODEzZGI4OWQ2OTgyZTRlOTRhNWNiNw==
6
+ NTg1OTRiNDA2Y2E0ODE0YjBiOTg5M2RjZmM0Mzg0YWVlZTVlNDQ0Zg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ODMxNTEzYzViMTU2MmE2NjIzYjI1MTllODRiNzQ3NDE5NDA2OWIzNWQ3ZjIy
10
- YzI1NzNiNDhiMWQ5M2FlODE1N2VlN2IxYjI1ZDJmZDI2ZTRhYzNjOGMwZDI2
11
- ZjI1NWJhZDJiNDkwN2MyNDVjY2Q5NmU5MGZkZWE3ZWFmNGEwNGY=
9
+ YWQxOTQxYTdkNjExYTdjMTBhNjMzODViMTllYmY0Y2YyNjQ2NmEzZDMzODBj
10
+ MzExN2VkY2RmNzQ0M2RjNmQ5ZDAyMjEwZDA1ZGNlODYzY2JhODBiODRhNzNl
11
+ ZjZhZDk1ZDA0YzAwODEyNjEyODU3ZTUwYjBhYWVkYTk0NzU0MWY=
12
12
  data.tar.gz: !binary |-
13
- MWNhYWYxMDE3M2E2NjFmZTQ2OTc3ZGY5YmE4ZjViOTA4ZTkxMmZhZDc2MDY1
14
- ZGY4ZmVmMWZjNzViNmExMThlZjRiZTMzZTc2MzEyNzYwYTg3ZTliZTcyNWQ4
15
- YWQ2ZWYzMjYzMDcxODU3ZTNiNmEyMjRjZWMyODQyZjhmYWJjMDI=
13
+ NWUzOGYzNGNlZWZhZWFjZjZlM2I4MzcyMWYwZmFkMDg2ODgyNmFkOTk4NTY2
14
+ NzQyZjkzYWRiZWJmM2JhZDlmZjI1NGM3YjdhZGY0YjAzNjM3MGFjMjM2NWQ5
15
+ OTNlZGM0ZWUwODhiYTY4NmIxODQ5NGVhNWRkN2M5MDVjZDM1Nzg=
data/Gemfile CHANGED
@@ -4,6 +4,7 @@ source 'https://rubygems.org'
4
4
 
5
5
  group :development, :test do
6
6
  gem 'rspec'
7
+ gem 'coveralls', require: false
7
8
  end
8
9
 
9
10
  gemspec
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
1
  [![Build Status](https://travis-ci.org/alexduryee/timetwister.svg?branch=master)](https://travis-ci.org/alexduryee/timetwister)
2
+ [![Coverage Status](https://coveralls.io/repos/alexduryee/timetwister/badge.svg?branch=master&service=github)](https://coveralls.io/github/alexduryee/timetwister?branch=master)
2
3
 
3
4
  # Timetwister
4
5
 
@@ -26,12 +27,25 @@ Or install it yourself as:
26
27
 
27
28
  Takes a date (or list of dates) as a string, and returns a list of hashes with parsed date data.
28
29
 
30
+ ### Ruby
31
+
32
+ Returns a list of hashes of parsed date data.
33
+
29
34
  ```ruby
30
35
  require 'timetwister'
31
36
  Timetwister.parse("Jun 1898 - [July 4 1900]")
32
37
  => [{:original_string=>"Jun 1898 - July 4 1900", :index_dates=>[1898, 1899, 1900], :date_start=>"1898-06-01", :date_end=>"1900-07-04", :date_start_full=>"1898-06-01", :date_end_full=>"1900-07-04", :inclusive_range=>true, :certainty=>"inferred", :test_data=>"330"}]
33
38
  ```
34
39
 
40
+ ### Command Line
41
+
42
+ Returns a JSON object of parsed date data.
43
+
44
+ ```bash
45
+ $ timetwister 'december 21 2015'
46
+ [{"original_string":"december 21 2015","index_dates":[2015],"date_start":"2015-12-21","date_end":"2015-12-21","date_start_full":"2015-12-21","date_end_full":"2015-12-21","inclusive_range":null,"certainty":null,"test_data":"200"}]
47
+ ```
48
+
35
49
  Output explanation:
36
50
 
37
51
  - `:original_string` is the original input
@@ -41,6 +55,9 @@ Output explanation:
41
55
  - `:inclusive_range` is whether or not the input value is a range
42
56
  - `:certainty` is the certainty of the provided date, based on use of flags and punctuation
43
57
 
58
+
59
+
60
+
44
61
  ## Contributing
45
62
 
46
63
  1. Fork it ( https://github.com/[my-github-username]/timetwister/fork )
data/lib/timetwister.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require "timetwister/version"
2
2
  require "timetwister/parser"
3
+ require "timetwister/utilities"
3
4
 
4
5
  module Timetwister
5
6
 
@@ -13,7 +14,7 @@ module Timetwister
13
14
 
14
15
  # check for dates of form "Month Day(-Day), Year" before splitting on commas
15
16
  # (removes certainty markers as to not jam the regex)
16
- if Parser.replace_ordinals(conj).gsub(/[\?\[\]]/, '').match(/[a-z]*\.?\s[0-9]{1,2}(\s?-[0-9]{1,2})?\,\s[0-9]{4}/i)
17
+ if Utilities.replace_ordinals(conj).gsub(/[\?\[\]]/, '').match(/[a-z]*\.?\s[0-9]{1,2}(\s?-[0-9]{1,2})?\,\s[0-9]{4}/i)
17
18
  out << Parser.string_to_dates(conj, options)
18
19
  else
19
20
  conj.split(',').each do |comma|
@@ -1,6 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  require 'chronic'
4
+ require 'timetwister/utilities'
4
5
 
5
6
  class Parser
6
7
 
@@ -11,7 +12,7 @@ class Parser
11
12
  @dates = { :original_string => str, :index_dates => [], :date_start => nil, :date_end => nil,
12
13
  :date_start_full => nil, :date_end_full => nil, :inclusive_range => nil, :certainty => nil }
13
14
 
14
- @regex_tokens = regex_tokens
15
+ @regex_tokens = Utilities.regex_tokens
15
16
 
16
17
  # defensive checks against very malformed date strings
17
18
  if str.include?('??')
@@ -19,12 +20,12 @@ class Parser
19
20
  end
20
21
 
21
22
  # perform this here, before the string gets purged of certainty indicators
22
- @dates[:certainty] = return_certainty(@string)
23
+ @dates[:certainty] = Utilities.return_certainty(@string)
23
24
 
24
25
  # normalize the string into the parser's preferred form
25
- @string = clean_string(@string)
26
- @string = language_to_english(@string)
27
- @string = replace_ordinals(@string)
26
+ @string = Utilities.clean_string(@string)
27
+ @string = Utilities.language_to_english(@string)
28
+ @string = Utilities.replace_ordinals(@string)
28
29
 
29
30
  # parse!
30
31
  self.match_replace
@@ -39,7 +40,7 @@ class Parser
39
40
  @dates[:date_end] = @dates[:date_start]
40
41
  end
41
42
 
42
- stringify_values
43
+ @dates = Utilities.stringify_values(@dates)
43
44
  add_full_dates
44
45
 
45
46
  return @dates
@@ -372,6 +373,7 @@ class Parser
372
373
  end
373
374
 
374
375
 
376
+ # 1999
375
377
  def self.proc_year_range
376
378
  proc = Proc.new do |string|
377
379
  # Only supports years from 1000
@@ -389,9 +391,7 @@ class Parser
389
391
  end
390
392
  end
391
393
 
392
-
393
-
394
-
394
+ # 1999 - 2010s
395
395
  def self.proc_range_year_to_decade
396
396
  proc = Proc.new do |string|
397
397
  range = year_range(string)
@@ -409,6 +409,7 @@ class Parser
409
409
  end
410
410
 
411
411
 
412
+ # 1990-91
412
413
  def self.proc_year_range_short
413
414
  proc = Proc.new do |string|
414
415
  range = string.split('-')
@@ -426,6 +427,7 @@ class Parser
426
427
  end
427
428
  end
428
429
 
430
+ # this may be obsolete - however, keep it just in case
429
431
  def self.proc_year_range_list_combo
430
432
  proc = Proc.new do |string|
431
433
  ranges = []
@@ -460,7 +462,7 @@ class Parser
460
462
  end
461
463
  end
462
464
 
463
-
465
+ # 1990s
464
466
  def self.proc_decade_s
465
467
  proc = Proc.new do |string|
466
468
  decade = string.match(/[0-9]{3}0/).to_s
@@ -472,7 +474,7 @@ class Parser
472
474
  end
473
475
  end
474
476
 
475
-
477
+ # 19--
476
478
  def self.proc_century_with_placeholders
477
479
  proc = Proc.new do |string|
478
480
  century = string.match(/[0-9]{2}/).to_s
@@ -485,7 +487,7 @@ class Parser
485
487
  end
486
488
  end
487
489
 
488
-
490
+ # early 1990s
489
491
  def self.proc_decade_s_qualified
490
492
  proc = Proc.new do |string|
491
493
  decade = string.match(/[0-9]{3}0/).to_s
@@ -506,7 +508,7 @@ class Parser
506
508
  end
507
509
  end
508
510
 
509
-
511
+ # 1990s-2000s
510
512
  def self.proc_decade_s_range
511
513
  proc = Proc.new do |string|
512
514
  decades = string.scan(/[0-9]{3}0/)
@@ -520,18 +522,21 @@ class Parser
520
522
  end
521
523
  end
522
524
 
523
-
525
+ # 1999-09-09
526
+ # September 9, 1999
527
+ # 1999 September 9
524
528
  def self.proc_full_date_single
525
529
  proc = Proc.new do |string|
526
530
  datetime = full_date_single_to_datetime(string)
527
531
  if datetime
528
- full_date_single_keydates(string,datetime,'%Y-%m-%d')
532
+ @dates[:date_start] = datetime.strftime('%Y-%m-%d')
529
533
  @dates[:index_dates] << datetime.strftime('%Y').to_i
530
534
  end
531
535
  end
532
536
  end
533
537
 
534
-
538
+ # September 1999
539
+ # 1999 September
535
540
  def self.proc_month_year_single
536
541
  proc = Proc.new do |string|
537
542
  string.gsub!(/\?/,'')
@@ -549,16 +554,12 @@ class Parser
549
554
 
550
555
  datetime = Chronic.parse(string)
551
556
  if datetime
552
- full_date_single_keydates(string,datetime, '%Y-%m')
557
+ @dates[:date_start] = datetime.strftime('%Y-%m')
553
558
  @dates[:index_dates] << datetime.strftime('%Y').to_i
554
559
  end
555
560
  end
556
561
  end
557
562
 
558
-
559
-
560
-
561
-
562
563
  # "1976 July 4 - 1981 October 1", etc.
563
564
  # call with second argument 'month' if no day value is present
564
565
  def self.proc_full_date_single_range
@@ -584,7 +585,7 @@ class Parser
584
585
  month_date_end = datetime_end.strftime('%Y-%m')
585
586
  month_date_end_parts = month_date_end.split('-')
586
587
 
587
- month_date_end_last = days_in_month(month_date_end_parts[1],month_date_end_parts[0]).to_s
588
+ month_date_end_last = Utilities.days_in_month(month_date_end_parts[1],month_date_end_parts[0]).to_s
588
589
  month_date_full = month_date_end + "-#{month_date_end_last}"
589
590
 
590
591
  datetime_end = Chronic.parse(month_date_full)
@@ -604,7 +605,7 @@ class Parser
604
605
  # 1980 1-20 Feb.
605
606
  def self.proc_single_month_date_range
606
607
  proc = Proc.new do |string|
607
- year = extract_year(string)
608
+ year = Utilities.extract_year(string)
608
609
  day_range = string.match(/\d{1,2}\-\d{1,2}/).to_s
609
610
  string.gsub!(Regexp.new(day_range),'')
610
611
  month = string.strip
@@ -638,7 +639,7 @@ class Parser
638
639
  year_start = datetime_start.strftime('%Y').to_i
639
640
  year_end = datetime_end.strftime('%Y').to_i
640
641
 
641
- if datetime_comparitor(datetime_end) < datetime_comparitor(datetime_start)
642
+ if Utilities.datetime_comparitor(datetime_end) < Utilities.datetime_comparitor(datetime_start)
642
643
  # this range is reversed in error
643
644
  years = [year_end,year_start]
644
645
  year_start, year_end = years[0], years[1]
@@ -647,8 +648,8 @@ class Parser
647
648
  end
648
649
 
649
650
  @dates[:index_dates] += (year_start..year_end).to_a
650
- @dates[:date_start] = datetime_start.strftime(is8601_string_format dates[0])
651
- @dates[:date_end] = datetime_end.strftime(is8601_string_format dates[1])
651
+ @dates[:date_start] = datetime_start.strftime(iso8601_string_format dates[0])
652
+ @dates[:date_end] = datetime_end.strftime(iso8601_string_format dates[1])
652
653
  @dates[:inclusive_range] = true
653
654
 
654
655
  end
@@ -665,8 +666,8 @@ class Parser
665
666
  first_month = string.match(@regex_tokens[:named_month]).to_s
666
667
  last_month = string.match(@regex_tokens[:named_month] + '$').to_s
667
668
 
668
- # chronic is fiddly about short months with periods
669
- # (e.g. "may.") so we remove them
669
+ # chronic is fiddly about short months with periods
670
+ # (e.g. "may.") so we remove them
670
671
  date_string_first = first_month.delete('.') + ' 1,' + year
671
672
  datetime_first = Chronic.parse(date_string_first)
672
673
  if !last_month.empty?
@@ -703,7 +704,7 @@ class Parser
703
704
  else
704
705
  datetime_start = full_date_single_to_datetime(dates[0] + "-01-01")
705
706
  datetime_end_tmp = full_date_single_to_datetime(dates[1] + "-28")
706
- datetime_end = full_date_single_to_datetime(dates[1] + "-" + days_in_month(datetime_end_tmp.month, datetime_end_tmp.year).to_s)
707
+ datetime_end = full_date_single_to_datetime(dates[1] + "-" + Utilities.days_in_month(datetime_end_tmp.month, datetime_end_tmp.year).to_s)
707
708
  end
708
709
 
709
710
  if datetime_start && datetime_end
@@ -781,7 +782,7 @@ class Parser
781
782
  month_date_end = datetime_end.strftime('%Y-%m')
782
783
  month_date_end_parts = month_date_end.split('-')
783
784
 
784
- month_date_end_last = days_in_month(month_date_end_parts[1],month_date_end_parts[0]).to_s
785
+ month_date_end_last = Utilities.days_in_month(month_date_end_parts[1],month_date_end_parts[0]).to_s
785
786
  month_date_full = month_date_end + "-#{month_date_end_last}"
786
787
 
787
788
  datetime_end = Chronic.parse(month_date_full)
@@ -820,46 +821,9 @@ class Parser
820
821
  end
821
822
  end
822
823
 
823
- def self.regex_tokens
824
- return {
825
- # 1969, [1969], c1969
826
- :year => '[\[\sc\(]{0,3}[0-2][0-9]{3}[\]\s\.\,;\?\)]{0,3}',
827
- # - or 'to'
828
- :range_delimiter => '\s*((\-)|(to))\s*',
829
- # , or ;
830
- :list_delimiter => '\s*[\,\;]\s*',
831
- # , or ;
832
- :range_or_list_delimiter => '\s*([\,\;]|((\-)|(to)))\s*',
833
- # n.d., undated, etc.
834
- :nd => '[\[\s]{0,2}\b([Uu]+ndated\.?)|([nN]o?\.?\s*[dD](ate)?\.?)\b[\s\]\.]{0,3}',
835
- # 1960s, 1960's
836
- :decade_s => '[\[\s]{0,2}[0-9]{3}0\'?s[\]\s]{0,2}',
837
-
838
- # 1970-75
839
- :year_range_short => '\s*[0-9]{4}\s?\-\s*(([2-9][0-9])|(1[3-9]))\s*',
840
-
841
- # 196-
842
- :decade_aacr => '[0-9]{3}\-',
843
- # named months, including abbreviations (case insensitive)
844
- :named_month => '\s*(?i)\b((jan(uary)?)|(feb(ruary)?)|(mar(ch)?)|(apr(il)?)|(may)|(jun(e)?)|(jul(y)?)|(aug(ust)?)|(sep(t|tember)?)|(oct(ober)?)|(nov(ember)?)|(dec(ember)?))\b\.?\s*',
845
- # circa, ca. - also matches 'c.', which is actually 'copyright', but is still not something we need to deal with
846
- :circa => '\s*[Cc](irc)?a?\.?\s*',
847
- # early, late, mid-
848
- :decade_qualifier => '([Ee]arly)|([Mm]id)|([Ll]ate)\-?',
849
- # 06-16-1972, 6-16-1972
850
- :numeric_date_us => '(0?1)|(0?2)|(0?3)|(0?4)|(0?5)|(0?6)|(0?7)|(0?8)|(0?9)|1[0-2][\-\/](([0-2]?[0-9])|3[01])[\-\/])?[12][0-9]{3}',
851
- # 1972-06-16
852
- :iso8601 => '[0-9]{4}\-[0-9]{2}\-[0-9]{2}',
853
- :iso8601_full => '[0-9]{4}((\-[0-9]{2})(\-[0-9]{2})?)?',
854
- :iso8601_month => '[0-9]{4}\-[0-9]{2}',
855
- :anchor_start => '^[^\w\d]*',
856
- :anchor_end => '[^\w\d]*$',
857
- :optional_comma => '[\s\,]*',
858
- :day_of_month => '\s*(([0-2]?[0-9])|(3[0-1]))\s*'
859
- }
860
- end
861
-
862
824
 
825
+ # Transform full date strings into parsed datetime objects
826
+ # e.g. "September 9, 1999" -> datetime
863
827
  def self.full_date_single_to_datetime(string)
864
828
  new_string = string.clone
865
829
  if new_string.match(/\d{4}\-\d{2}\-\d{2}/)
@@ -895,7 +859,7 @@ class Parser
895
859
  year_start = datetime_start.strftime('%Y').to_i
896
860
  year_end = datetime_end.strftime('%Y').to_i
897
861
 
898
- if datetime_comparitor(datetime_end) > datetime_comparitor(datetime_start)
862
+ if Utilities.datetime_comparitor(datetime_end) > Utilities.datetime_comparitor(datetime_start)
899
863
 
900
864
  @dates[:index_dates] += (year_start..year_end).to_a
901
865
 
@@ -907,12 +871,7 @@ class Parser
907
871
  end
908
872
  end
909
873
 
910
-
911
- def self.full_date_single_keydates(string,datetime,format)
912
- @dates[:date_start] = datetime.strftime(format)
913
- end
914
-
915
-
874
+ # generates date_start and date_end from index_dates list
916
875
  def self.process_year_range
917
876
  @dates[:index_dates].sort!
918
877
  @dates[:index_dates].uniq!
@@ -921,7 +880,8 @@ class Parser
921
880
  end
922
881
 
923
882
 
924
- def self.is8601_string_format(iso_8601_date)
883
+ # detects format of ISO8601 date to pass to strftime
884
+ def self.iso8601_string_format(iso_8601_date)
925
885
  if iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/)
926
886
  return '%Y-%m-%d'
927
887
  elsif iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}$/)
@@ -931,7 +891,7 @@ class Parser
931
891
  end
932
892
  end
933
893
 
934
-
894
+ # generates datetime from ISO8601-formatted date
935
895
  def self.iso8601_datetime(iso_8601_date)
936
896
  if iso_8601_date.match(/^[0-9]{4}\-[0-9]{2}\-[0-9]{2}$/)
937
897
  Chronic.parse(iso_8601_date)
@@ -942,112 +902,13 @@ class Parser
942
902
  end
943
903
  end
944
904
 
945
-
946
- # Removes the first 4-digit number found in the string and returns it
947
- def self.extract_year(string)
948
- year = string.match(/\d{4}/).to_s
949
- string.gsub!(Regexp.new(year),'')
950
- year
951
- end
952
-
953
-
954
- # removes sub-strings that do not contain parsable data
955
- def self.clean_string(string)
956
- r = @regex_tokens
957
- # remove n.y. and variants from beginning of string
958
- substrings = [
959
- /\[n\.?y\.?\]/,
960
- /[\[\]\(\)]/,
961
- /[\.\,\)\;\:]*$/,
962
- /\?/,
963
- /approx\.?(imately)?/i,
964
- /\s#{regex_tokens[:circa]}\s/,
965
- /^#{regex_tokens[:circa]}\s/,
966
- Regexp.new("([\,\;\s(and)]{0,4}#{regex_tokens[:nd]})?$")
967
- ]
968
-
969
- # transform seasons to months
970
- string.gsub!(/[Ww]inter/, " January 1 - March 20 ")
971
- string.gsub!(/[Ss]pring/, " March 20 - June 21 ")
972
- string.gsub!(/[Ss]ummer/, " June 21 - September 23 ")
973
- string.gsub!(/[Aa]utumn/, " September 23 - December 22 ")
974
- string.gsub!(/[Ff]all/, " September 23 - December 22 ")
975
-
976
- # remove days of the week
977
- dow = [/[Ss]unday,?\s+/, /[Mm]onday,?\s+/, /[Tt]uesday,?\s+/, /[Ww]ednesday,?\s+/, /[Tt]hursday,?\s+/, /[Ff]riday,?\s+/, /[Ss]aturday,?\s+/]
978
- dow.each {|d| string.gsub!(d, '')}
979
-
980
- # remove times of day
981
- tod = [/[Mm]orning,?\s+/, /[Aa]fternoon,?\s+/, /[Ee]vening,?\s+/, /[Nn]ight,?\s+/]
982
- tod.each {|t| string.gsub!(t, '')}
983
-
984
- # remove single question marks
985
- string.gsub!(/([0-9])\?([^\?])/,'\1\2')
986
-
987
- substrings.each { |s| string.gsub!(s,'') }
988
- string.strip!
989
- string
990
- end
991
-
992
905
  def self.year_range(string)
993
906
  range = string.scan(Regexp.new(@regex_tokens[:year]))
994
907
  range.each { |d| d.gsub!(/[^0-9]*/,'') }
995
908
  range.map { |y| y.to_i }
996
909
  end
997
910
 
998
-
999
- def self.datetime_comparitor(datetime)
1000
- d = datetime.to_s
1001
- d.gsub!(/[^\d]/,'')
1002
- return d.to_i
1003
- end
1004
-
1005
-
1006
- def self.leap_year?(year)
1007
- year = (year.kind_of? String) ? year.to_i : year
1008
- if year % 400 == 0
1009
- return true
1010
- elsif year % 100 == 0
1011
- return false
1012
- elsif year % 4 == 0
1013
- return true
1014
- else
1015
- return false
1016
- end
1017
- end
1018
-
1019
-
1020
- # month and year must be numeric
1021
- def self.days_in_month(month,year)
1022
- month = month.kind_of?(String) ? month.to_i : month
1023
- year = year.kind_of?(String) ? year.to_i : year
1024
- days = {
1025
- 1 => 31,
1026
- 2 => leap_year?(year) ? 29 : 28,
1027
- 3 => 31,
1028
- 4 => 30,
1029
- 5 => 31,
1030
- 6 => 30,
1031
- 7 => 31,
1032
- 8 => 31,
1033
- 9 => 30,
1034
- 10 => 31,
1035
- 11 => 30,
1036
- 12 => 31
1037
- }
1038
- days[month]
1039
- end
1040
-
1041
-
1042
- def self.stringify_values
1043
- @dates.each do |k,v|
1044
- if v.is_a?(Fixnum)
1045
- @dates[k] = v.to_s
1046
- end
1047
- end
1048
- end
1049
-
1050
-
911
+ # enrich the final output hash with more comprehensive date metadata
1051
912
  def self.add_full_dates
1052
913
  if @dates[:date_start] && !@dates[:date_start_full]
1053
914
  if @dates[:date_start].match(/\d{4}\-\d{2}\-\d{2}/)
@@ -1065,7 +926,7 @@ class Parser
1065
926
  year = @dates[:date_end].match(/^\d{4}/).to_s
1066
927
  if @dates[:date_end].match(/\d{4}\-\d{2}/)
1067
928
  month = @dates[:date_end].match(/\d{2}$/).to_s
1068
- day = days_in_month(month,year).to_s
929
+ day = Utilities.days_in_month(month,year).to_s
1069
930
  @dates[:date_end_full] = @dates[:date_end] + "-#{day}"
1070
931
  elsif @dates[:date_end].match(/\d{4}/)
1071
932
  @dates[:date_end_full] = @dates[:date_end] + "-12-31"
@@ -1073,120 +934,4 @@ class Parser
1073
934
  end
1074
935
  end
1075
936
  end
1076
-
1077
- def self.return_certainty(str)
1078
-
1079
- # order of precedence, from least to most certain:
1080
- # 1) questionable dates
1081
- # 2) approximate dates
1082
- # 3) inferred dates
1083
-
1084
- if str.include?('?')
1085
- return 'questionable'
1086
- end
1087
-
1088
- if str.downcase.include?('ca') || \
1089
- str.downcase.include?('approx')
1090
- return 'approximate'
1091
- end
1092
-
1093
- if str.include?('[') || str.include?(']')
1094
- return 'inferred'
1095
- end
1096
-
1097
- return nil
1098
- end
1099
-
1100
- def self.replace_ordinals(str)
1101
-
1102
- work_str = str.clone
1103
-
1104
- ordinals = {
1105
- # replace fulltext ordinals with numbers
1106
- 'first' => '1',
1107
- 'second' => '2',
1108
- 'third' => '3',
1109
- 'fourth' => '4',
1110
- 'fifth' => '5',
1111
- 'sixth' => '6',
1112
- 'seventh' => '7',
1113
- 'eighth' => '8',
1114
- 'ninth' => '9',
1115
- 'tenth' => '10',
1116
- 'eleventh' => '11',
1117
- 'twelfth' => '12',
1118
- 'thirteenth' => '13',
1119
- 'fourteenth' => '14',
1120
- 'fifteenth' => '15',
1121
- 'sixteenth' => '16',
1122
- 'seventeenth' => '17',
1123
- 'eighteenth' => '18',
1124
- 'nineteenth' => '19',
1125
- 'twentieth' => '20',
1126
- 'twenty-' => '2',
1127
- 'thirtieth' => '30',
1128
- 'thirty-' => '3',
1129
-
1130
- # replace numeric ordinals with plain numbers
1131
- '1st' => '1',
1132
- '2nd' => '2',
1133
- '3rd' => '3',
1134
- '3d' => '3',
1135
- '4th' => '4',
1136
- '5th' => '5',
1137
- '6th' => '6',
1138
- '7th' => '7',
1139
- '8th' => '8',
1140
- '9th' => '9',
1141
- '0th' => '0'
1142
- }
1143
-
1144
- ordinals.each do |key, value|
1145
- work_str.gsub!(Regexp.new(key), value)
1146
- end
1147
-
1148
- return work_str
1149
- end
1150
-
1151
- def self.language_to_english(str)
1152
-
1153
- work_str = str.clone
1154
-
1155
- languages = {
1156
-
1157
- # french
1158
- 'janvier' => 'January',
1159
- 'février' => 'February',
1160
- 'mars' => 'March',
1161
- 'avril' => 'April',
1162
- 'mai' => 'May',
1163
- 'juin' => 'June',
1164
- 'juillet' => 'July',
1165
- 'août' => 'August',
1166
- 'septembre' => 'September',
1167
- 'octobre' => 'October',
1168
- 'novembre' => 'November',
1169
- 'décembre' => 'December',
1170
-
1171
- # spanish
1172
- 'enero' => 'January',
1173
- 'febrero' => 'February',
1174
- 'marzo' => 'March',
1175
- 'abril' => 'April',
1176
- 'mayo' => 'May',
1177
- 'junio' => 'June',
1178
- 'julio' => 'July',
1179
- 'agosto' => 'August',
1180
- 'septiembre' => 'September',
1181
- 'octubre' => 'October',
1182
- 'noviembre' => 'November',
1183
- 'diciembre' => 'December'
1184
- }
1185
-
1186
- languages.each do |key, value|
1187
- work_str.gsub!(/#{key}/i, value)
1188
- end
1189
-
1190
- return work_str
1191
- end
1192
937
  end
@@ -0,0 +1,279 @@
1
+ # encoding: utf-8
2
+
3
+ class Utilities
4
+
5
+ # walk through a hash and transforms all ints to strings
6
+ # input: a hash
7
+ # output: same hash, but with all Fixnums converted to strings
8
+ def self.stringify_values(hash)
9
+ hash.each do |k,v|
10
+ if v.is_a?(Fixnum)
11
+ hash[k] = v.to_s
12
+ end
13
+ end
14
+
15
+ return hash
16
+ end
17
+
18
+ # return MODS certainty from a date string
19
+ # input: freetext date string
20
+ # output: string representing the date certainty
21
+ def self.return_certainty(str)
22
+
23
+ # order of precedence, from least to most certain:
24
+ # 1) questionable dates
25
+ # 2) approximate dates
26
+ # 3) inferred dates
27
+
28
+ if str.include?('?')
29
+ return 'questionable'
30
+ end
31
+
32
+ if str.downcase.include?('ca') || \
33
+ str.downcase.include?('approx')
34
+ return 'approximate'
35
+ end
36
+
37
+ if str.include?('[') || str.include?(']')
38
+ return 'inferred'
39
+ end
40
+
41
+ return nil
42
+ end
43
+
44
+ # replaces ordinal numbers in a date string with flat numbers
45
+ # input: freetext date string
46
+ # output: same string, but with ordinals replaced by numbers
47
+ def self.replace_ordinals(str)
48
+
49
+ work_str = str.clone
50
+
51
+ ordinals = {
52
+ # replace fulltext ordinals with numbers
53
+ 'first' => '1',
54
+ 'second' => '2',
55
+ 'third' => '3',
56
+ 'fourth' => '4',
57
+ 'fifth' => '5',
58
+ 'sixth' => '6',
59
+ 'seventh' => '7',
60
+ 'eighth' => '8',
61
+ 'ninth' => '9',
62
+ 'tenth' => '10',
63
+ 'eleventh' => '11',
64
+ 'twelfth' => '12',
65
+ 'thirteenth' => '13',
66
+ 'fourteenth' => '14',
67
+ 'fifteenth' => '15',
68
+ 'sixteenth' => '16',
69
+ 'seventeenth' => '17',
70
+ 'eighteenth' => '18',
71
+ 'nineteenth' => '19',
72
+ 'twentieth' => '20',
73
+ 'twenty-' => '2',
74
+ 'thirtieth' => '30',
75
+ 'thirty-' => '3',
76
+
77
+ # replace numeric ordinals with plain numbers
78
+ '1st' => '1',
79
+ '2nd' => '2',
80
+ '3rd' => '3',
81
+ '3d' => '3',
82
+ '4th' => '4',
83
+ '5th' => '5',
84
+ '6th' => '6',
85
+ '7th' => '7',
86
+ '8th' => '8',
87
+ '9th' => '9',
88
+ '0th' => '0'
89
+ }
90
+
91
+ ordinals.each do |key, value|
92
+ work_str.gsub!(Regexp.new(key), value)
93
+ end
94
+
95
+ return work_str
96
+ end
97
+
98
+ # replaces non-english language months with english months
99
+ # input: freetext date string
100
+ # output: same string, but with months replaced by english months
101
+ def self.language_to_english(str)
102
+
103
+ work_str = str.clone
104
+
105
+ languages = {
106
+
107
+ # french
108
+ 'janvier' => 'January',
109
+ 'février' => 'February',
110
+ 'mars' => 'March',
111
+ 'avril' => 'April',
112
+ 'mai' => 'May',
113
+ 'juin' => 'June',
114
+ 'juillet' => 'July',
115
+ 'août' => 'August',
116
+ 'septembre' => 'September',
117
+ 'octobre' => 'October',
118
+ 'novembre' => 'November',
119
+ 'décembre' => 'December',
120
+
121
+ # spanish
122
+ 'enero' => 'January',
123
+ 'febrero' => 'February',
124
+ 'marzo' => 'March',
125
+ 'abril' => 'April',
126
+ 'mayo' => 'May',
127
+ 'junio' => 'June',
128
+ 'julio' => 'July',
129
+ 'agosto' => 'August',
130
+ 'septiembre' => 'September',
131
+ 'octubre' => 'October',
132
+ 'noviembre' => 'November',
133
+ 'diciembre' => 'December'
134
+ }
135
+
136
+ languages.each do |key, value|
137
+ work_str.gsub!(/#{key}/i, value)
138
+ end
139
+
140
+ return work_str
141
+ end
142
+
143
+ # returns the days in a given month
144
+ # input: a month and year (int, or numeric strings)
145
+ # output: the number of days in that month in that year
146
+ def self.days_in_month(month,year)
147
+ month = month.kind_of?(String) ? month.to_i : month
148
+ year = year.kind_of?(String) ? year.to_i : year
149
+ days = {
150
+ 1 => 31,
151
+ 2 => leap_year?(year) ? 29 : 28,
152
+ 3 => 31,
153
+ 4 => 30,
154
+ 5 => 31,
155
+ 6 => 30,
156
+ 7 => 31,
157
+ 8 => 31,
158
+ 9 => 30,
159
+ 10 => 31,
160
+ 11 => 30,
161
+ 12 => 31
162
+ }
163
+ days[month]
164
+ end
165
+
166
+
167
+ # transforms a datetime object into an int
168
+ # input: datetime
169
+ # output: same datetime, transformed into an int
170
+ def self.datetime_comparitor(datetime)
171
+ d = datetime.to_s
172
+ d.gsub!(/[^\d]/,'')
173
+ return d.to_i
174
+ end
175
+
176
+
177
+ # determines if a year is leap or not
178
+ # input: a year as an int or string
179
+ # output: boolean of whether the year is leap or not
180
+ def self.leap_year?(year)
181
+ year = (year.kind_of? String) ? year.to_i : year
182
+ if year % 400 == 0
183
+ return true
184
+ elsif year % 100 == 0
185
+ return false
186
+ elsif year % 4 == 0
187
+ return true
188
+ else
189
+ return false
190
+ end
191
+ end
192
+
193
+ # removes sub-strings that do not contain parsable data
194
+ # input: freetext string
195
+ # output: same string, ready for the parser
196
+ def self.clean_string(string)
197
+ r = @regex_tokens
198
+ # remove n.y. and variants from beginning of string
199
+ substrings = [
200
+ /\[n\.?y\.?\]/,
201
+ /[\[\]\(\)]/,
202
+ /[\.\,\)\;\:]*$/,
203
+ /\?/,
204
+ /approx\.?(imately)?/i,
205
+ /\s#{regex_tokens[:circa]}\s/,
206
+ /^#{regex_tokens[:circa]}\s/,
207
+ Regexp.new("([\,\;\s(and)]{0,4}#{regex_tokens[:nd]})?$")
208
+ ]
209
+
210
+ # transform seasons to months
211
+ string.gsub!(/[Ww]inter/, " January 1 - March 20 ")
212
+ string.gsub!(/[Ss]pring/, " March 20 - June 21 ")
213
+ string.gsub!(/[Ss]ummer/, " June 21 - September 23 ")
214
+ string.gsub!(/[Aa]utumn/, " September 23 - December 22 ")
215
+ string.gsub!(/[Ff]all/, " September 23 - December 22 ")
216
+
217
+ # remove days of the week
218
+ dow = [/[Ss]unday,?\s+/, /[Mm]onday,?\s+/, /[Tt]uesday,?\s+/, /[Ww]ednesday,?\s+/, /[Tt]hursday,?\s+/, /[Ff]riday,?\s+/, /[Ss]aturday,?\s+/]
219
+ dow.each {|d| string.gsub!(d, '')}
220
+
221
+ # remove times of day
222
+ tod = [/[Mm]orning,?\s+/, /[Aa]fternoon,?\s+/, /[Ee]vening,?\s+/, /[Nn]ight,?\s+/]
223
+ tod.each {|t| string.gsub!(t, '')}
224
+
225
+ # remove single question marks
226
+ string.gsub!(/([0-9])\?([^\?])/,'\1\2')
227
+
228
+ substrings.each { |s| string.gsub!(s,'') }
229
+ string.strip!
230
+ return string
231
+ end
232
+
233
+ # Removes the first 4-digit number found in the string and returns it
234
+ def self.extract_year(string)
235
+ year = string.match(/\d{4}/).to_s
236
+ string.gsub!(Regexp.new(year),'')
237
+ year
238
+ end
239
+
240
+ # regexes used by parser to detect various date forms
241
+ def self.regex_tokens
242
+ return {
243
+ # 1969, [1969], c1969
244
+ :year => '[\[\sc\(]{0,3}[0-2][0-9]{3}[\]\s\.\,;\?\)]{0,3}',
245
+ # - or 'to'
246
+ :range_delimiter => '\s*((\-)|(to))\s*',
247
+ # , or ;
248
+ :list_delimiter => '\s*[\,\;]\s*',
249
+ # , or ;
250
+ :range_or_list_delimiter => '\s*([\,\;]|((\-)|(to)))\s*',
251
+ # n.d., undated, etc.
252
+ :nd => '[\[\s]{0,2}\b([Uu]+ndated\.?)|([nN]o?\.?\s*[dD](ate)?\.?)\b[\s\]\.]{0,3}',
253
+ # 1960s, 1960's
254
+ :decade_s => '[\[\s]{0,2}[0-9]{3}0\'?s[\]\s]{0,2}',
255
+
256
+ # 1970-75
257
+ :year_range_short => '\s*[0-9]{4}\s?\-\s*(([2-9][0-9])|(1[3-9]))\s*',
258
+
259
+ # 196-
260
+ :decade_aacr => '[0-9]{3}\-',
261
+ # named months, including abbreviations (case insensitive)
262
+ :named_month => '\s*(?i)\b((jan(uary)?)|(feb(ruary)?)|(mar(ch)?)|(apr(il)?)|(may)|(jun(e)?)|(jul(y)?)|(aug(ust)?)|(sep(t|tember)?)|(oct(ober)?)|(nov(ember)?)|(dec(ember)?))\b\.?\s*',
263
+ # circa, ca. - also matches 'c.', which is actually 'copyright', but is still not something we need to deal with
264
+ :circa => '\s*[Cc](irc)?a?\.?\s*',
265
+ # early, late, mid-
266
+ :decade_qualifier => '([Ee]arly)|([Mm]id)|([Ll]ate)\-?',
267
+ # 06-16-1972, 6-16-1972
268
+ :numeric_date_us => '(0?1)|(0?2)|(0?3)|(0?4)|(0?5)|(0?6)|(0?7)|(0?8)|(0?9)|1[0-2][\-\/](([0-2]?[0-9])|3[01])[\-\/])?[12][0-9]{3}',
269
+ # 1972-06-16
270
+ :iso8601 => '[0-9]{4}\-[0-9]{2}\-[0-9]{2}',
271
+ :iso8601_full => '[0-9]{4}((\-[0-9]{2})(\-[0-9]{2})?)?',
272
+ :iso8601_month => '[0-9]{4}\-[0-9]{2}',
273
+ :anchor_start => '^[^\w\d]*',
274
+ :anchor_end => '[^\w\d]*$',
275
+ :optional_comma => '[\s\,]*',
276
+ :day_of_month => '\s*(([0-2]?[0-9])|(3[0-1]))\s*'
277
+ }
278
+ end
279
+ end
@@ -1,3 +1,3 @@
1
1
  module Timetwister
2
- VERSION = "0.2.2"
2
+ VERSION = "0.2.3"
3
3
  end
data/spec/dates_spec.rb CHANGED
@@ -12,11 +12,14 @@ describe Timetwister do
12
12
  end
13
13
 
14
14
  it "parses ISO 8601 date ranges" do
15
- date = Timetwister.parse("1776-07-04/1789-03-01")
16
- expect(date[0][:date_start]).to eq("1776-07-04")
17
- expect(date[0][:date_end]).to eq("1789-03-01")
18
- expect(date[0][:inclusive_range]).to eq(true)
19
- expect(date[0][:test_data]).to eq("40")
15
+ forms = ["1776-07-04/1789-03-01", "1789-03-01/1776-07-04"]
16
+ forms.each do |f|
17
+ date = Timetwister.parse(f)
18
+ expect(date[0][:date_start]).to eq("1776-07-04")
19
+ expect(date[0][:date_end]).to eq("1789-03-01")
20
+ expect(date[0][:inclusive_range]).to eq(true)
21
+ expect(date[0][:test_data]).to eq("40")
22
+ end
20
23
  end
21
24
 
22
25
  it "parses definite and approximate single years" do
@@ -44,7 +47,7 @@ describe Timetwister do
44
47
 
45
48
  it "parses ranges of full dates" do
46
49
 
47
- forms = ["July 4 1776 - March 1 1789", "4 July 1776 - 1 March 1789", "1776 July 4 - 1789 March 1", "1776 4 July - 1789 1 March"]
50
+ forms = ["July 4 1776 - March 1 1789", "4 July 1776 - 1 March 1789", "1776 July 4 - 1789 March 1", "1776 4 July - 1789 1 March", "1776 4 July to 1789 1 March"]
48
51
  forms.each do |f|
49
52
  date = Timetwister.parse(f)
50
53
  expect(date[0][:date_start]).to eq("1776-07-04")
@@ -261,7 +264,7 @@ describe Timetwister do
261
264
  end
262
265
 
263
266
  it "parses day/month ranges within a single year" do
264
- forms = ["4 May - 10 July 1776", "1776 May 4 - July 10", "May 4 - July 10 1776", "4 May - 10 July 1776"]
267
+ forms = ["4 May - 10 July 1776", "1776 May 4 - July 10", "May 4 - July 10 1776", "4 May - 10 July 1776", "4 May to 10 July 1776"]
265
268
  forms.each do |f|
266
269
  date = Timetwister.parse(f)
267
270
  expect(date[0][:date_start]).to eq("1776-05-04")
@@ -275,7 +278,7 @@ describe Timetwister do
275
278
  end
276
279
 
277
280
  it "parses full date + year/month date range" do
278
- forms = ["4 July 1776 - March 1789", "1776 July 4 - 1789 March"]
281
+ forms = ["4 July 1776 - March 1789", "1776 July 4 - 1789 March", "1776 July 4 to 1789 March"]
279
282
  forms.each do |f|
280
283
  date = Timetwister.parse(f)
281
284
  expect(date[0][:date_start]).to eq("1776-07-04")
@@ -303,7 +306,7 @@ describe Timetwister do
303
306
  # the normalized dates returned here are a bit funny
304
307
  # we could do with standardizing them
305
308
  it "parses year + month/year range" do
306
- forms = ["1776 - March 1789", "1776 - 1789 March"]
309
+ forms = ["1776 - March 1789", "1776 - 1789 March", "1776 to 1789 March"]
307
310
  forms.each do |f|
308
311
  date = Timetwister.parse(f)
309
312
  expect(date[0][:date_start]).to eq("1776-01")
data/spec/spec_helper.rb CHANGED
@@ -16,6 +16,10 @@
16
16
  # users commonly want.
17
17
  #
18
18
  # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
+
20
+ require 'coveralls'
21
+ Coveralls.wear!
22
+
19
23
  RSpec.configure do |config|
20
24
  # rspec-expectations config goes here. You can use an alternate
21
25
  # assertion/expectation library such as wrong or the stdlib/minitest
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: timetwister
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Duryee
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-21 00:00:00.000000000 Z
11
+ date: 2015-12-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -69,6 +69,7 @@ files:
69
69
  - bin/timetwister
70
70
  - lib/timetwister.rb
71
71
  - lib/timetwister/parser.rb
72
+ - lib/timetwister/utilities.rb
72
73
  - lib/timetwister/version.rb
73
74
  - spec/dates_spec.rb
74
75
  - spec/spec_helper.rb