gedcom_ruby 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gedcom_ruby/date.rb +245 -0
- data/lib/gedcom_ruby/date_parser.rb +952 -0
- data/spec/date_spec.rb +44 -0
- data/spec/datepart_spec.rb +87 -0
- data/spec/gedcoms/3_indis.ged +6 -0
- data/spec/gedcoms/5_lines.ged +5 -0
- data/spec/gedcoms/linewrap_conc.ged +7 -0
- data/spec/gedcoms/linewrap_cont.ged +5 -0
- data/spec/gedcoms/simple.ged +48 -0
- data/spec/parser_spec.rb +135 -0
- data/spec/spec_helper.rb +7 -0
- metadata +23 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 652c0329a3da725d78fc30443b0e062f3f833d8e
|
4
|
+
data.tar.gz: fa7b0f98c14928554d1b8bb8b74dbd2a2d1be230
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2f42a68770f124ee2b8937355b99adcba49625a57e8e5a042ba31284b153163902ae7373112537f6eb73d4555a7b70704e6a200403627d0ba60a881d5fce0cb9
|
7
|
+
data.tar.gz: 6fa98503efb3513e5a931ff360cef94ea25dc53dde29b6644519d0de8e01bd71bd97eb64008c74c6da2715ffcb83eb91d5ce08c7711a2ebc8889b10ec15cd740
|
@@ -0,0 +1,245 @@
|
|
1
|
+
# -------------------------------------------------------------------------
|
2
|
+
# gedcom_date.rb -- module definition for GEDCOM date handler
|
3
|
+
# Copyright (C) 2008 Phillip Davies (binary011010@verizon.net)
|
4
|
+
# -------------------------------------------------------------------------
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License as published by the Free Software Foundation; either
|
8
|
+
# version 2.1 of the License, or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This library is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
# Lesser General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
16
|
+
# License along with this library; if not, write to the Free Software
|
17
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
# -------------------------------------------------------------------------
|
19
|
+
#
|
20
|
+
require 'gedcom_ruby/date_parser'
|
21
|
+
module GEDCOM
|
22
|
+
|
23
|
+
class DatePart < GEDCOM_DATE_PARSER::GEDDate
|
24
|
+
|
25
|
+
# Flags
|
26
|
+
NONE = GEDCOM_DATE_PARSER::GFNONE
|
27
|
+
PHRASE = GEDCOM_DATE_PARSER::GFPHRASE
|
28
|
+
NONSTANDARD = GEDCOM_DATE_PARSER::GFNONSTANDARD
|
29
|
+
NOFLAG = GEDCOM_DATE_PARSER::GFNOFLAG
|
30
|
+
NODAY = GEDCOM_DATE_PARSER::GFNODAY
|
31
|
+
NOMONTH = GEDCOM_DATE_PARSER::GFNOMONTH
|
32
|
+
NOYEAR = GEDCOM_DATE_PARSER::GFNOYEAR
|
33
|
+
YEARSPAN = GEDCOM_DATE_PARSER::GFYEARSPAN
|
34
|
+
|
35
|
+
def initialize(type=GEDCOM_DATE_PARSER::GCTGREGORIAN, flags=NONE, data=nil)
|
36
|
+
super( type, flags, data )
|
37
|
+
end
|
38
|
+
|
39
|
+
def calendar
|
40
|
+
@type
|
41
|
+
end
|
42
|
+
|
43
|
+
def compliance
|
44
|
+
@flags
|
45
|
+
end
|
46
|
+
|
47
|
+
def phrase
|
48
|
+
raise DateFormatException if( @flags != PHRASE )
|
49
|
+
@data
|
50
|
+
end
|
51
|
+
|
52
|
+
def has_day?
|
53
|
+
return false if ( @flags == PHRASE )
|
54
|
+
return ((@data.flags & NODAY) != 0 ? false : true)
|
55
|
+
end
|
56
|
+
|
57
|
+
def has_month?
|
58
|
+
return false if ( @flags == PHRASE )
|
59
|
+
return ((@data.flags & NOMONTH) != 0 ? false : true)
|
60
|
+
end
|
61
|
+
|
62
|
+
def has_year?
|
63
|
+
return false if ( @flags == PHRASE )
|
64
|
+
return ((@data.flags & NOYEAR) != 0 ? false : true)
|
65
|
+
end
|
66
|
+
|
67
|
+
def has_year_span?
|
68
|
+
return false if ( @flags == PHRASE )
|
69
|
+
return ((@data.flags & YEARSPAN) != 0 ? true : false)
|
70
|
+
end
|
71
|
+
|
72
|
+
def day
|
73
|
+
raise DateFormatException, "date has no day" if (@flags == PHRASE || (@data.flags & NODAY) != 0)
|
74
|
+
@data.day
|
75
|
+
end
|
76
|
+
|
77
|
+
def month
|
78
|
+
raise DateFormatException, "date has no month" if (@flags == PHRASE || (@data.flags & NOMONTH) != 0)
|
79
|
+
@data.month
|
80
|
+
end
|
81
|
+
|
82
|
+
def year
|
83
|
+
raise DateFormatException, "date has no year" if (@flags == PHRASE || (@data.flags & NOYEAR) != 0)
|
84
|
+
@data.year
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_year
|
88
|
+
raise DateFormatException, "date has no year span" if (@flags == PHRASE || (@data.flags & YEARSPAN) == 0)
|
89
|
+
@data.year2
|
90
|
+
end
|
91
|
+
|
92
|
+
def epoch
|
93
|
+
raise DateFormatException, "only gregorian dates have epoch" if ( @flags == PHRASE || @type != GEDCOM_DATE_PARSER::GCTGREGORIAN )
|
94
|
+
return (( @data.adbc == GEDCOM_DATE_PARSER::GEDADBCBC ) ? "BC" : "AD" )
|
95
|
+
end
|
96
|
+
|
97
|
+
def to_s
|
98
|
+
GEDCOM_DATE_PARSER::DateParser.build_gedcom_date_part_string( self )
|
99
|
+
end
|
100
|
+
|
101
|
+
def <=>( dp )
|
102
|
+
return -1 if has_year? and !dp.has_year?
|
103
|
+
return 1 if !has_year? and dp.has_year?
|
104
|
+
|
105
|
+
if has_year? and dp.has_year?
|
106
|
+
rc = ( year <=> dp.year )
|
107
|
+
return rc unless rc == 0
|
108
|
+
end
|
109
|
+
|
110
|
+
return -1 if dp.has_month? and !dp.has_month?
|
111
|
+
return 1 if !dp.has_month? and dp.has_month?
|
112
|
+
|
113
|
+
if has_month? and dp.has_month?
|
114
|
+
rc = ( month <=> dp.month )
|
115
|
+
return rc unless rc == 0
|
116
|
+
end
|
117
|
+
|
118
|
+
return -1 if dp.has_day? and !dp.has_day?
|
119
|
+
return 1 if !dp.has_day? and dp.has_day?
|
120
|
+
|
121
|
+
if has_day? and dp.has_day?
|
122
|
+
rc = ( day <=> dp.day )
|
123
|
+
return rc unless rc == 0
|
124
|
+
end
|
125
|
+
|
126
|
+
return 0
|
127
|
+
end
|
128
|
+
end #/ DatePart
|
129
|
+
|
130
|
+
class Date < GEDCOM_DATE_PARSER::GEDDateValue
|
131
|
+
# Calendar types
|
132
|
+
NONE = GEDCOM_DATE_PARSER::GCNONE
|
133
|
+
ABOUT = GEDCOM_DATE_PARSER::GCABOUT
|
134
|
+
CALCULATED = GEDCOM_DATE_PARSER::GCCALCULATED
|
135
|
+
ESTIMATED = GEDCOM_DATE_PARSER::GCESTIMATED
|
136
|
+
BEFORE = GEDCOM_DATE_PARSER::GCBEFORE
|
137
|
+
AFTER = GEDCOM_DATE_PARSER::GCAFTER
|
138
|
+
BETWEEN = GEDCOM_DATE_PARSER::GCBETWEEN
|
139
|
+
FROM = GEDCOM_DATE_PARSER::GCFROM
|
140
|
+
TO = GEDCOM_DATE_PARSER::GCTO
|
141
|
+
FROMTO = GEDCOM_DATE_PARSER::GCFROMTO
|
142
|
+
INTERPRETED = GEDCOM_DATE_PARSER::GCINTERPRETED
|
143
|
+
CHILD = GEDCOM_DATE_PARSER::GCCHILD
|
144
|
+
CLEARED = GEDCOM_DATE_PARSER::GCCLEARED
|
145
|
+
COMPLETED = GEDCOM_DATE_PARSER::GCCOMPLETED
|
146
|
+
INFANT = GEDCOM_DATE_PARSER::GCINFANT
|
147
|
+
PRE1970 = GEDCOM_DATE_PARSER::GCPRE1970
|
148
|
+
QUALIFIED = GEDCOM_DATE_PARSER::GCQUALIFIED
|
149
|
+
STILLBORN = GEDCOM_DATE_PARSER::GCSTILLBORN
|
150
|
+
SUBMITTED = GEDCOM_DATE_PARSER::GCSUBMITTED
|
151
|
+
UNCLEARED = GEDCOM_DATE_PARSER::GCUNCLEARED
|
152
|
+
BIC = GEDCOM_DATE_PARSER::GCBIC
|
153
|
+
DNS = GEDCOM_DATE_PARSER::GCDNS
|
154
|
+
DNSCAN = GEDCOM_DATE_PARSER::GCDNSCAN
|
155
|
+
DEAD = GEDCOM_DATE_PARSER::GCDEAD
|
156
|
+
|
157
|
+
def Date.safe_new( parm )
|
158
|
+
Date.new( parm ) { |errmsg| }
|
159
|
+
end
|
160
|
+
|
161
|
+
def initialize ( date_str, calendar=DateType::DEFAULT )
|
162
|
+
begin
|
163
|
+
@date1 = DatePart.new
|
164
|
+
@date2 = DatePart.new
|
165
|
+
super(GEDCOM_DATE_PARSER::DateParser::GEDFNONE, @date1, @date2)
|
166
|
+
GEDCOM_DATE_PARSER::DateParser.parse_gedcom_date( date_str, self, calendar )
|
167
|
+
rescue GEDCOM_DATE_PARSER::DateParseException
|
168
|
+
err_msg = "format error at '"
|
169
|
+
if (@date1 && (@date1.flags & DatePart::NONSTANDARD))
|
170
|
+
err_msg += @date1.data.to_s
|
171
|
+
elsif (@date2)
|
172
|
+
err_msg += @date2.data.to_s
|
173
|
+
end
|
174
|
+
err_msg += "'"
|
175
|
+
if (block_given?)
|
176
|
+
yield( err_msg )
|
177
|
+
else
|
178
|
+
raise DateFormatException, err_msg
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
|
183
|
+
def format
|
184
|
+
@flags
|
185
|
+
end
|
186
|
+
|
187
|
+
def first
|
188
|
+
@date1
|
189
|
+
end
|
190
|
+
|
191
|
+
def last
|
192
|
+
@date2
|
193
|
+
end
|
194
|
+
|
195
|
+
def to_s
|
196
|
+
GEDCOM_DATE_PARSER::DateParser.build_gedcom_date_string( self )
|
197
|
+
end
|
198
|
+
|
199
|
+
def is_date?
|
200
|
+
(@flags & (NONE | ABOUT | CALCULATED | ESTIMATED | BEFORE | AFTER | BETWEEN \
|
201
|
+
| FROM | TO | FROMTO | INTERPRETED)) != 0 ? false : true
|
202
|
+
end
|
203
|
+
|
204
|
+
def is_range?
|
205
|
+
(@flags & (BETWEEN | FROMTO)) != 0 ? true : false
|
206
|
+
end
|
207
|
+
|
208
|
+
def <=>( d )
|
209
|
+
if is_date? and d.is_date?
|
210
|
+
rc = ( first <=> d.first )
|
211
|
+
return rc unless rc == 0
|
212
|
+
|
213
|
+
if is_range? and d.is_range?
|
214
|
+
return ( last <=> d.last )
|
215
|
+
elsif is_range?
|
216
|
+
return 1
|
217
|
+
elsif d.is_range?
|
218
|
+
return -1
|
219
|
+
end
|
220
|
+
|
221
|
+
return 0
|
222
|
+
elsif is_date?
|
223
|
+
return -1
|
224
|
+
elsif d.is_date?
|
225
|
+
return 1
|
226
|
+
end
|
227
|
+
|
228
|
+
return format <=> d.format
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
class DateType
|
233
|
+
GREGORIAN = GEDCOM_DATE_PARSER::GCTGREGORIAN
|
234
|
+
JULIAN = GEDCOM_DATE_PARSER::GCTJULIAN
|
235
|
+
HEBREW = GEDCOM_DATE_PARSER::GCTHEBREW
|
236
|
+
FRENCH = GEDCOM_DATE_PARSER::GCTFRENCH
|
237
|
+
FUTURE = GEDCOM_DATE_PARSER::GCTFUTURE
|
238
|
+
UNKNOWN = GEDCOM_DATE_PARSER::GCTUNKNOWN
|
239
|
+
DEFAULT = GEDCOM_DATE_PARSER::GCTDEFAULT
|
240
|
+
end
|
241
|
+
|
242
|
+
class DateFormatException < Exception
|
243
|
+
|
244
|
+
end
|
245
|
+
end
|
@@ -0,0 +1,952 @@
|
|
1
|
+
# -------------------------------------------------------------------------
|
2
|
+
# gedcom_date_parser.rb -- module definition for GEDCOM date parser
|
3
|
+
# Copyright (C) 2008 Phillip Davies (binary011010@verizon.net)
|
4
|
+
# -------------------------------------------------------------------------
|
5
|
+
# This library is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU Lesser General Public
|
7
|
+
# License as published by the Free Software Foundation; either
|
8
|
+
# version 2.1 of the License, or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# This library is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
# Lesser General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU Lesser General Public
|
16
|
+
# License along with this library; if not, write to the Free Software
|
17
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
18
|
+
# -------------------------------------------------------------------------
|
19
|
+
#
|
20
|
+
module GEDCOM_DATE_PARSER
|
21
|
+
# Token Constants
|
22
|
+
# General Tokens
|
23
|
+
TKERROR = -2
|
24
|
+
TKEOF = -1
|
25
|
+
TKNONE = 0
|
26
|
+
|
27
|
+
TKNUMBER = 1
|
28
|
+
TKMONTH = 2
|
29
|
+
TKAPPROXIMATED = 3
|
30
|
+
TKRANGE = 4
|
31
|
+
TKPERIOD = 5
|
32
|
+
TKINTERPRETED = 6
|
33
|
+
TKLPAREN = 7
|
34
|
+
TKRPAREN = 8
|
35
|
+
TKBC = 9
|
36
|
+
TKAND = 10
|
37
|
+
TKTO = 11
|
38
|
+
TKSLASH = 12
|
39
|
+
TKSTATUS = 13
|
40
|
+
TKOTHER = 14
|
41
|
+
|
42
|
+
# Specific Tokens
|
43
|
+
TKJANUARY = 1
|
44
|
+
TKFEBRUARY = 2
|
45
|
+
TKMARCH = 3
|
46
|
+
TKAPRIL = 4
|
47
|
+
TKMAY = 5
|
48
|
+
TKJUNE = 6
|
49
|
+
TKJULY = 7
|
50
|
+
TKAUGUST = 8
|
51
|
+
TKSEPTEMBER = 9
|
52
|
+
TKOCTOBER = 10
|
53
|
+
TKNOVEMBER = 11
|
54
|
+
TKDECEMBER = 12
|
55
|
+
|
56
|
+
TKVENDEMIAIRE = 13
|
57
|
+
TKBRUMAIRE = 14
|
58
|
+
TKFRIMAIRE = 15
|
59
|
+
TKNIVOSE = 16
|
60
|
+
TKPLUVIOSE = 17
|
61
|
+
TKVENTOSE = 18
|
62
|
+
TKGERMINAL = 19
|
63
|
+
TKFLOREAL = 20
|
64
|
+
TKPRAIRIAL = 21
|
65
|
+
TKMESSIDOR = 22
|
66
|
+
TKTHERMIDOR = 23
|
67
|
+
TKFRUCTIDOR = 24
|
68
|
+
TKJOUR_COMP = 25
|
69
|
+
TKJOUR = 26
|
70
|
+
TKCOMP = 27
|
71
|
+
|
72
|
+
TKTISHRI = 28
|
73
|
+
TKCHESHVAN = 29
|
74
|
+
TKKISLEV = 30
|
75
|
+
TKTEVET = 31
|
76
|
+
TKSHEVAT = 32
|
77
|
+
TKADAR = 33
|
78
|
+
TKADAR_SHENI = 34
|
79
|
+
TKNISAN = 35
|
80
|
+
TKIYAR = 36
|
81
|
+
TKSIVAN = 37
|
82
|
+
TKTAMMUZ = 38
|
83
|
+
TKAV = 39
|
84
|
+
TKELUL = 40
|
85
|
+
TKSHENI = 41
|
86
|
+
|
87
|
+
TKABOUT = 80
|
88
|
+
TKCALCULATED = 81
|
89
|
+
TKESTIMATED = 82
|
90
|
+
TKBEFORE = 83
|
91
|
+
TKAFTER = 84
|
92
|
+
TKBETWEEN = 85
|
93
|
+
TKFROM = 86
|
94
|
+
|
95
|
+
TKCHILD = 87
|
96
|
+
TKCLEARED = 88
|
97
|
+
TKCOMPLETED = 89
|
98
|
+
TKINFANT = 90
|
99
|
+
TKPRE1970 = 91
|
100
|
+
TKQUALIFIED = 92
|
101
|
+
TKSTILLBORN = 93
|
102
|
+
TKSUBMITTED = 94
|
103
|
+
TKUNCLEARED = 95
|
104
|
+
TKBIC = 96 #Born In the Covenant
|
105
|
+
TKDNS = 97 #Do Not Submit
|
106
|
+
TKDNSCAN = 98 #Do Not Submit / Cancelled
|
107
|
+
TKDEAD = 99
|
108
|
+
|
109
|
+
#states
|
110
|
+
ST_DV_ERROR = -1
|
111
|
+
ST_DV_START = 1
|
112
|
+
ST_DV_DATE = 2
|
113
|
+
ST_DV_DATE_APPROX = 3
|
114
|
+
ST_DV_DATE_RANGE = 4
|
115
|
+
ST_DV_TO = 5
|
116
|
+
ST_DV_DATE_PERIOD = 6
|
117
|
+
ST_DV_DATE_INTERP = 7
|
118
|
+
ST_DV_DATE_PHRASE = 8
|
119
|
+
ST_DV_AND = 9
|
120
|
+
ST_DV_STATUS = 10
|
121
|
+
ST_DV_END = 11
|
122
|
+
|
123
|
+
ST_DT_ERROR = -1
|
124
|
+
ST_DT_START = 1
|
125
|
+
ST_DT_NUMBER = 2
|
126
|
+
ST_DT_MONTH = 3
|
127
|
+
ST_DT_SLASH = 4
|
128
|
+
ST_DT_BC = 5
|
129
|
+
ST_DT_END = 6
|
130
|
+
|
131
|
+
|
132
|
+
# After parsing, all flags should be available as booleans with accessors
|
133
|
+
GCTGREGORIAN = 0
|
134
|
+
GCTJULIAN = 1
|
135
|
+
GCTHEBREW = 2
|
136
|
+
GCTFRENCH = 3
|
137
|
+
GCTFUTURE = 4
|
138
|
+
GCTUNKNOWN = 99
|
139
|
+
|
140
|
+
GCTDEFAULT = GCTGREGORIAN
|
141
|
+
|
142
|
+
# date constants
|
143
|
+
|
144
|
+
GCNONE = 0
|
145
|
+
|
146
|
+
# approximated date constants
|
147
|
+
|
148
|
+
GCABOUT = 1
|
149
|
+
GCCALCULATED = 2
|
150
|
+
GCESTIMATED = 3
|
151
|
+
|
152
|
+
# date range constants
|
153
|
+
|
154
|
+
GCBEFORE = 4
|
155
|
+
GCAFTER = 5
|
156
|
+
GCBETWEEN = 6
|
157
|
+
|
158
|
+
# date period constants
|
159
|
+
|
160
|
+
GCFROM = 7
|
161
|
+
GCTO = 8
|
162
|
+
GCFROMTO = 9
|
163
|
+
|
164
|
+
# other date constants
|
165
|
+
|
166
|
+
GCINTERPRETED = 10
|
167
|
+
|
168
|
+
# LDS ordinance constants
|
169
|
+
|
170
|
+
GCCHILD = 11
|
171
|
+
GCCLEARED = 12
|
172
|
+
GCCOMPLETED = 13
|
173
|
+
GCINFANT = 14
|
174
|
+
GCPRE1970 = 15
|
175
|
+
GCQUALIFIED = 16
|
176
|
+
GCSTILLBORN = 17
|
177
|
+
GCSUBMITTED = 18
|
178
|
+
GCUNCLEARED = 19
|
179
|
+
GCBIC = 20 # Born In the Covenant
|
180
|
+
GCDNS = 21 # Do Not Submit
|
181
|
+
GCDNSCAN = 22 # Do Not Submit / Cancelled
|
182
|
+
GCDEAD = 23
|
183
|
+
|
184
|
+
# date flags
|
185
|
+
|
186
|
+
GFNONE = 0
|
187
|
+
GFPHRASE = 1
|
188
|
+
GFNONSTANDARD = 2
|
189
|
+
|
190
|
+
# date bit flags
|
191
|
+
|
192
|
+
GFNOFLAG = 0
|
193
|
+
GFNODAY = 1
|
194
|
+
GFNOMONTH = 2
|
195
|
+
GFNOYEAR = 4
|
196
|
+
GFYEARSPAN = 8
|
197
|
+
|
198
|
+
# data type constants
|
199
|
+
|
200
|
+
GCMAXPHRASEBUFFERSIZE = 35
|
201
|
+
|
202
|
+
# BC / AD
|
203
|
+
GEDADBCBC = 0
|
204
|
+
GEDADBCAD = 1
|
205
|
+
|
206
|
+
Default_Months = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
207
|
+
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec" ]
|
208
|
+
|
209
|
+
Hebrew_Months = [ "Tishri", "Cheshvan", "Kislev", "Tevet", "Shevat", "Adar",
|
210
|
+
"Adar Sheni", "Nisan", "Iyar", "Sivan", "Tammuz", "Av",
|
211
|
+
"Elul", "Sheni" ]
|
212
|
+
|
213
|
+
French_Months = [ "Vend", "Brum", "Frim", "Niv", "Pluv", "Vent", "Germ", "Flor",
|
214
|
+
"Prair", "Mess", "Therm", "Fruct", "J. Comp", "Jour", "Comp" ]
|
215
|
+
|
216
|
+
class Token
|
217
|
+
attr_accessor :lexeme, :general, :specific
|
218
|
+
def initialize(lex, gen, spec)
|
219
|
+
@lexeme = lex
|
220
|
+
@general = gen
|
221
|
+
@specific = spec
|
222
|
+
end
|
223
|
+
end
|
224
|
+
TokenTable = []
|
225
|
+
TokenTable << Token.new("(", TKLPAREN, 0 )
|
226
|
+
TokenTable << Token.new(")", TKRPAREN, 0 )
|
227
|
+
TokenTable << Token.new("-", TKSLASH, 0 )
|
228
|
+
TokenTable << Token.new("/", TKSLASH, 0 )
|
229
|
+
TokenTable << Token.new("AAV", TKMONTH, TKAV )
|
230
|
+
TokenTable << Token.new("ABOUT", TKAPPROXIMATED, TKABOUT )
|
231
|
+
TokenTable << Token.new("ABT", TKAPPROXIMATED, TKABOUT )
|
232
|
+
TokenTable << Token.new("ADAR", TKMONTH, TKADAR )
|
233
|
+
TokenTable << Token.new("ADR", TKMONTH, TKADAR )
|
234
|
+
TokenTable << Token.new("AFTER", TKRANGE, TKAFTER )
|
235
|
+
TokenTable << Token.new("AND", TKAND, 0 )
|
236
|
+
TokenTable << Token.new("APRIL", TKMONTH, TKAPRIL )
|
237
|
+
TokenTable << Token.new("AUGUST", TKMONTH, TKAUGUST )
|
238
|
+
TokenTable << Token.new("AV", TKMONTH, TKAV )
|
239
|
+
TokenTable << Token.new("BC", TKBC, 0 )
|
240
|
+
TokenTable << Token.new("BEFORE", TKRANGE, TKBEFORE )
|
241
|
+
TokenTable << Token.new("BETWEEN", TKRANGE, TKBETWEEN )
|
242
|
+
TokenTable << Token.new("BIC", TKSTATUS, TKBIC )
|
243
|
+
TokenTable << Token.new("BRUMAIRE", TKMONTH, TKBRUMAIRE )
|
244
|
+
TokenTable << Token.new("CALCULATED", TKAPPROXIMATED, TKCALCULATED )
|
245
|
+
TokenTable << Token.new("CHESHVAN", TKMONTH, TKCHESHVAN )
|
246
|
+
TokenTable << Token.new("CHILD", TKSTATUS, TKCHILD )
|
247
|
+
TokenTable << Token.new("CLEARED", TKSTATUS, TKCLEARED )
|
248
|
+
TokenTable << Token.new("COMPLETED", TKSTATUS, TKCOMPLETED )
|
249
|
+
TokenTable << Token.new("COMPLIMENTAIRS", TKMONTH, TKCOMP )
|
250
|
+
TokenTable << Token.new("CSH", TKMONTH, TKCHESHVAN )
|
251
|
+
TokenTable << Token.new("DEAD", TKSTATUS, TKDEAD )
|
252
|
+
TokenTable << Token.new("DECEMBER", TKMONTH, TKDECEMBER )
|
253
|
+
TokenTable << Token.new("DNS", TKSTATUS, TKDNS )
|
254
|
+
TokenTable << Token.new("DNSCAN", TKSTATUS, TKDNSCAN )
|
255
|
+
TokenTable << Token.new("ELL", TKMONTH, TKELUL )
|
256
|
+
TokenTable << Token.new("ELUL", TKMONTH, TKELUL )
|
257
|
+
TokenTable << Token.new("ESTIMATED", TKAPPROXIMATED, TKESTIMATED )
|
258
|
+
TokenTable << Token.new("FEBRUARY", TKMONTH, TKFEBRUARY )
|
259
|
+
TokenTable << Token.new("FLOREAL", TKMONTH, TKFLOREAL )
|
260
|
+
TokenTable << Token.new("FRIMAIRE", TKMONTH, TKFRIMAIRE )
|
261
|
+
TokenTable << Token.new("FROM", TKPERIOD, TKFROM )
|
262
|
+
TokenTable << Token.new("FRUCTIDOR", TKMONTH, TKFRUCTIDOR )
|
263
|
+
TokenTable << Token.new("GERMINAL", TKMONTH, TKGERMINAL )
|
264
|
+
TokenTable << Token.new("INFANT", TKSTATUS, TKINFANT )
|
265
|
+
TokenTable << Token.new("INTERPRETED", TKINTERPRETED, 0 )
|
266
|
+
TokenTable << Token.new("IYAR", TKMONTH, TKIYAR )
|
267
|
+
TokenTable << Token.new("IYR", TKMONTH, TKIYAR )
|
268
|
+
TokenTable << Token.new("JANUARY", TKMONTH, TKJANUARY )
|
269
|
+
TokenTable << Token.new("JOUR", TKMONTH, TKJOUR )
|
270
|
+
TokenTable << Token.new("JULY", TKMONTH, TKJULY )
|
271
|
+
TokenTable << Token.new("JUNE", TKMONTH, TKJUNE )
|
272
|
+
TokenTable << Token.new("KISLEV", TKMONTH, TKKISLEV )
|
273
|
+
TokenTable << Token.new("KSL", TKMONTH, TKKISLEV )
|
274
|
+
TokenTable << Token.new("MARCH", TKMONTH, TKMARCH )
|
275
|
+
TokenTable << Token.new("MAY", TKMONTH, TKMAY )
|
276
|
+
TokenTable << Token.new("MESSIDOR", TKMONTH, TKMESSIDOR )
|
277
|
+
TokenTable << Token.new("NISAN", TKMONTH, TKNISAN )
|
278
|
+
TokenTable << Token.new("NIVOSE", TKMONTH, TKNIVOSE )
|
279
|
+
TokenTable << Token.new("NOVEMBER", TKMONTH, TKNOVEMBER )
|
280
|
+
TokenTable << Token.new("NSN", TKMONTH, TKNISAN )
|
281
|
+
TokenTable << Token.new("OCTOBER", TKMONTH, TKOCTOBER )
|
282
|
+
TokenTable << Token.new("PLUVIOSE", TKMONTH, TKPLUVIOSE )
|
283
|
+
TokenTable << Token.new("PRAIRIAL", TKMONTH, TKPRAIRIAL )
|
284
|
+
TokenTable << Token.new("PRE1970", TKSTATUS, TKPRE1970 )
|
285
|
+
TokenTable << Token.new("QUALIFIED", TKSTATUS, TKQUALIFIED )
|
286
|
+
TokenTable << Token.new("SEPTEMBER", TKMONTH, TKSEPTEMBER )
|
287
|
+
TokenTable << Token.new("SHENI", TKMONTH, TKSHENI )
|
288
|
+
TokenTable << Token.new("SHEVAT", TKMONTH, TKSHEVAT )
|
289
|
+
TokenTable << Token.new("SHV", TKMONTH, TKSHEVAT )
|
290
|
+
TokenTable << Token.new("SIVAN", TKMONTH, TKSIVAN )
|
291
|
+
TokenTable << Token.new("STILLBORN", TKSTATUS, TKSTILLBORN )
|
292
|
+
TokenTable << Token.new("SUBMITTED", TKSTATUS, TKSUBMITTED )
|
293
|
+
TokenTable << Token.new("SVN", TKMONTH, TKSIVAN )
|
294
|
+
TokenTable << Token.new("TAMMUZ", TKMONTH, TKTAMMUZ )
|
295
|
+
TokenTable << Token.new("TEVET", TKMONTH, TKTEVET )
|
296
|
+
TokenTable << Token.new("THERMIDOR", TKMONTH, TKTHERMIDOR )
|
297
|
+
TokenTable << Token.new("TISHRI", TKMONTH, TKTISHRI )
|
298
|
+
TokenTable << Token.new("TMZ", TKMONTH, TKTAMMUZ )
|
299
|
+
TokenTable << Token.new("TO", TKTO, 0 )
|
300
|
+
TokenTable << Token.new("TSH", TKMONTH, TKTISHRI )
|
301
|
+
TokenTable << Token.new("TVT", TKMONTH, TKTEVET )
|
302
|
+
TokenTable << Token.new("UNCLEARED", TKSTATUS, TKUNCLEARED )
|
303
|
+
TokenTable << Token.new("VENDEMIAIRE", TKMONTH, TKVENDEMIAIRE )
|
304
|
+
TokenTable << Token.new("VENTOSE", TKMONTH, TKVENTOSE )
|
305
|
+
TokenTable << Token.new(0, 0, 0 )
|
306
|
+
|
307
|
+
|
308
|
+
class GEDStateEntry
|
309
|
+
attr_accessor :state, :input, :nextState, :action
|
310
|
+
def initialize(st, ip, ns, a)
|
311
|
+
@state = st
|
312
|
+
@input = ip
|
313
|
+
@nextState = ns
|
314
|
+
@action = a
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
DateValueStateTable = []
|
319
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKNUMBER, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
320
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKMONTH, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
321
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKAPPROXIMATED, ST_DV_DATE_APPROX, 1 ) # 1: set the approx type
|
322
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKRANGE, ST_DV_DATE_RANGE, 2 ) # 2: set the range type
|
323
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKTO, ST_DV_TO, 3 ) # 3: set the period type
|
324
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKPERIOD, ST_DV_DATE_PERIOD, 3 ) # 3: set the period type
|
325
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKINTERPRETED, ST_DV_DATE_INTERP, 4 ) # 4: set interpreted
|
326
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKLPAREN, ST_DV_DATE_PHRASE, 5 ) # 5: get remaining buffer as phrase
|
327
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKSTATUS, ST_DV_STATUS, 10 ) # 10: set status
|
328
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_START, TKEOF, ST_DV_END, 6 ) # 6: if 'between' and not second date read, error, else terminate
|
329
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE, TKLPAREN, ST_DV_DATE_PHRASE, 7 ) # 7: if 'interpreted', get remaining buffer as phrase
|
330
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE, TKAND, ST_DV_AND, 8 ) # 8: if 'between', prepare to read next date
|
331
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE, TKTO, ST_DV_TO, 9 ) # 9: if 'from', set FROMTO, prepare to read next date
|
332
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE, TKEOF, ST_DV_END, 6 ) # 6: if 'between' and not second date read, error, else terminate
|
333
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE_APPROX, TKNUMBER, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
334
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE_APPROX, TKMONTH, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
335
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE_RANGE, TKNUMBER, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
336
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE_RANGE, TKMONTH, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
337
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_TO, TKNUMBER, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
338
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_TO, TKMONTH, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
339
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE_PERIOD, TKNUMBER, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
340
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE_PERIOD, TKMONTH, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
341
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE_INTERP, TKNUMBER, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
342
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE_INTERP, TKMONTH, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
343
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_DATE_PHRASE, TKEOF, ST_DV_END, 6 ) # 6: if 'between' and not second date read, error, else terminate
|
344
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_AND, TKNUMBER, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
345
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_AND, TKMONTH, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
346
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_TO, TKNUMBER, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
347
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_TO, TKMONTH, ST_DV_DATE, 0 ) # 0: inc dates read, parse a date
|
348
|
+
DateValueStateTable << GEDStateEntry.new( ST_DV_STATUS, TKEOF, ST_DV_END, 6 )
|
349
|
+
DateValueStateTable << GEDStateEntry.new( 0, 0, 0, 0 )
|
350
|
+
|
351
|
+
DateStateTable = []
|
352
|
+
DateStateTable << GEDStateEntry.new( ST_DT_START, TKNUMBER, ST_DT_NUMBER, 0 ) # 0: store number, set NUMBER
|
353
|
+
DateStateTable << GEDStateEntry.new( ST_DT_START, TKMONTH, ST_DT_MONTH, 1 ) # 1: if MONTH, then error, else set number to be day, set month, set MONTH
|
354
|
+
DateStateTable << GEDStateEntry.new( ST_DT_NUMBER, TKMONTH, ST_DT_MONTH, 1 ) # 1: if MONTH, then error, else set number to be day, set month, set MONTH
|
355
|
+
DateStateTable << GEDStateEntry.new( ST_DT_NUMBER, TKSLASH, ST_DT_SLASH, 2 ) # 2: if SLASH, then error, else set SLASH, set number to be year
|
356
|
+
DateStateTable << GEDStateEntry.new( ST_DT_NUMBER, TKBC, ST_DT_BC, 3 ) # 3: if not SLASH set number to be year, set bc
|
357
|
+
DateStateTable << GEDStateEntry.new( ST_DT_NUMBER, TKEOF, ST_DT_END, 4 ) # 4: if not SLASH set number to be year, terminate
|
358
|
+
DateStateTable << GEDStateEntry.new( ST_DT_NUMBER, TKTO, ST_DT_END, 4 ) # 4: if TO set number to be year, terminate
|
359
|
+
DateStateTable << GEDStateEntry.new( ST_DT_NUMBER, TKAND, ST_DT_END, 4 ) # 4: if AND set number to be year, terminate
|
360
|
+
DateStateTable << GEDStateEntry.new( ST_DT_MONTH, TKNUMBER, ST_DT_NUMBER, 5 ) # 5: if NUMBER, set number to be day. set number to be year, store number, set NUMBER
|
361
|
+
DateStateTable << GEDStateEntry.new( ST_DT_MONTH, TKEOF, ST_DT_END, 6 ) # 6: terminate
|
362
|
+
DateStateTable << GEDStateEntry.new( ST_DT_MONTH, TKTO, ST_DT_END, 6 ) # 6: if TO, terminate
|
363
|
+
DateStateTable << GEDStateEntry.new( ST_DT_MONTH, TKAND, ST_DT_END, 6 ) # 6: if AND, terminate
|
364
|
+
DateStateTable << GEDStateEntry.new( ST_DT_SLASH, TKNUMBER, ST_DT_NUMBER, 7 ) # 7: set number to be year2
|
365
|
+
DateStateTable << GEDStateEntry.new( ST_DT_BC, TKEOF, ST_DT_END, 6 ) # 6: terminate
|
366
|
+
DateStateTable << GEDStateEntry.new( 0, 0, 0, 0 )
|
367
|
+
|
368
|
+
class GEDParserState
|
369
|
+
attr_accessor :buffer, :lastGeneralToken, :lastSpecificToken, :pos
|
370
|
+
def initialize( buf, lgt, lst, p )
|
371
|
+
@buffer = buf
|
372
|
+
@lastGeneralToken = lgt
|
373
|
+
@lastSpecificToken = lst
|
374
|
+
@pos = p
|
375
|
+
end
|
376
|
+
end
|
377
|
+
|
378
|
+
# Gregorian Date Class
|
379
|
+
class GEDDateGreg
|
380
|
+
attr_accessor :flags, :day, :month, :year, :year2, :adbc
|
381
|
+
def initialize(flg, d, m, y, y2, adbc)
|
382
|
+
@flags = flg
|
383
|
+
@day = d
|
384
|
+
@month = m
|
385
|
+
@year = y
|
386
|
+
@year2 = y2
|
387
|
+
@adbc = adbc
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
# General Date Class
|
392
|
+
class GEDDateGeneral
|
393
|
+
attr_accessor :flags, :day, :month, :year
|
394
|
+
def initialize(flg, d, m, y)
|
395
|
+
@flags = flg
|
396
|
+
@day = d
|
397
|
+
@month = m
|
398
|
+
@year = y
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
class GEDDate
|
403
|
+
attr_accessor :type, :flags, :data
|
404
|
+
def initialize(type, flags, data)
|
405
|
+
@type = type
|
406
|
+
@flags = flags
|
407
|
+
@data = data # Data should be either a string, Gregorian date or General Date
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
class GEDDateValue # This should be the end result of our parsing
|
412
|
+
attr_accessor :flags, :date1, :date2
|
413
|
+
def initialize(flags, d1, d2)
|
414
|
+
@flags = flags
|
415
|
+
@date1 = d1
|
416
|
+
@date2 = d2
|
417
|
+
end
|
418
|
+
end
|
419
|
+
|
420
|
+
class DateParser
|
421
|
+
GEDFNONE = 0
|
422
|
+
GEDFBETWEEN = 1
|
423
|
+
GEDFFROM = 2
|
424
|
+
GEDFINTERP = 4
|
425
|
+
GEDFNUMBER = 8
|
426
|
+
GEDFMONTH = 16
|
427
|
+
GEDFSLASH = 32
|
428
|
+
|
429
|
+
def self.get_token( parser )
|
430
|
+
# Get a single token from this parser state (class method)
|
431
|
+
# Inputs: parser - parser state (GEDParserState)
|
432
|
+
# Outputs: general - general token
|
433
|
+
# specific - specific token
|
434
|
+
startPos = parser.pos
|
435
|
+
|
436
|
+
# if we've got a token saved in the parser, return it
|
437
|
+
if ( parser.lastGeneralToken != TKNONE )
|
438
|
+
general = parser.lastGeneralToken
|
439
|
+
specific = parser.lastSpecificToken
|
440
|
+
parser.lastGeneralToken = TKNONE
|
441
|
+
parser.lastSpecificToken = TKNONE
|
442
|
+
return general, specific
|
443
|
+
end
|
444
|
+
|
445
|
+
#eat leading white-space
|
446
|
+
parser.pos+=1 while ( parser.buffer[ parser.pos, 1 ]==" " )
|
447
|
+
|
448
|
+
#if the buffer is empty, return TKEOF
|
449
|
+
if ( parser.buffer[ parser.pos, 1 ] == nil || parser.buffer[parser.pos, 1] == "")
|
450
|
+
specific = TKNONE
|
451
|
+
general = TKEOF
|
452
|
+
return general, specific
|
453
|
+
end
|
454
|
+
|
455
|
+
lexeme = ""
|
456
|
+
# if it's a number, parse it out and return it
|
457
|
+
if ( parser.buffer[ parser.pos, 1 ] =~ /[0-9]/ )
|
458
|
+
while ( parser.buffer[ parser.pos, 1 ] =~ /[0-9]/)
|
459
|
+
lexeme << parser.buffer[ parser.pos, 1 ]
|
460
|
+
parser.pos+=1
|
461
|
+
end
|
462
|
+
specific = lexeme.to_i
|
463
|
+
general = TKNUMBER
|
464
|
+
return general, specific
|
465
|
+
end
|
466
|
+
|
467
|
+
currentToken = 0
|
468
|
+
lexPos = 0
|
469
|
+
# if it is not a number, incrementally look at each token in the table
|
470
|
+
while ( TokenTable[ currentToken ].lexeme != 0 )
|
471
|
+
lexeme << parser.buffer[ parser.pos, 1 ].upcase
|
472
|
+
lexPos+=1
|
473
|
+
parser.pos+=1
|
474
|
+
|
475
|
+
if( lexeme[ lexPos-1, 1 ] != TokenTable[ currentToken ].lexeme[ lexPos-1, 1 ] )
|
476
|
+
currentToken+=1 while( ( TokenTable[ currentToken ].lexeme != 0 ) &&
|
477
|
+
( (TokenTable[ currentToken ].lexeme[0, lexPos] <=> lexeme[0, lexPos] ) < 0 ) )
|
478
|
+
|
479
|
+
#if the lexeme does not appear in the table, exit with an error
|
480
|
+
break if ( TokenTable[ currentToken ].lexeme == 0 || \
|
481
|
+
(TokenTable[ currentToken ].lexeme[0, lexPos] <=> lexeme[0, lexPos] ) != 0 )
|
482
|
+
|
483
|
+
end
|
484
|
+
|
485
|
+
#if the lexeme terminates, return the value of the current token
|
486
|
+
if( ( ( lexeme[0,1] =~ /[a-zA-Z]/) && ( parser.buffer[ parser.pos, 1 ] !~ /[0-9a-zA-Z]/) ) ||
|
487
|
+
( ( lexeme[0,1] !~ /[a-zA-Z]/ ) && ( lexPos >= TokenTable[ currentToken ].lexeme.length ) ) )
|
488
|
+
specific = TokenTable[ currentToken ].specific
|
489
|
+
general = TokenTable[ currentToken ].general
|
490
|
+
return general, specific
|
491
|
+
end
|
492
|
+
|
493
|
+
#if the current token terminates before the lexeme, then we have an error
|
494
|
+
break if ( TokenTable[ currentToken ].lexeme[ lexPos, 1 ] == nil )
|
495
|
+
|
496
|
+
end
|
497
|
+
|
498
|
+
parser.pos = startPos
|
499
|
+
|
500
|
+
specific = TKNONE
|
501
|
+
general = TKERROR
|
502
|
+
|
503
|
+
return general, specific
|
504
|
+
end
|
505
|
+
|
506
|
+
def self.put_token( parser, general, specific )
|
507
|
+
# Update the parser state (class method)
|
508
|
+
# Inputs: parser - parser state (GEDParserState)
|
509
|
+
# general - general token
|
510
|
+
# specific - specific token
|
511
|
+
# Outputs: None
|
512
|
+
parser.lastGeneralToken = general
|
513
|
+
parser.lastSpecificToken = specific
|
514
|
+
end
|
515
|
+
|
516
|
+
def self.get_date_text( date )
|
517
|
+
# Stringify the GEDCOM Date (class method)
|
518
|
+
# Inputs: date - Date Part (GEDDate)
|
519
|
+
# Outputs: buffer - Output string
|
520
|
+
buffer = ""
|
521
|
+
|
522
|
+
if ( (date.flags & (GFPHRASE | GFNONSTANDARD)) != 0)
|
523
|
+
buffer += date.data
|
524
|
+
return buffer
|
525
|
+
end
|
526
|
+
|
527
|
+
case ( date.type )
|
528
|
+
when GCTHEBREW
|
529
|
+
months = Hebrew_Months
|
530
|
+
when GCTFRENCH
|
531
|
+
months = French_Months
|
532
|
+
else
|
533
|
+
months = Default_Months
|
534
|
+
end
|
535
|
+
|
536
|
+
return buffer if not (date.data)
|
537
|
+
|
538
|
+
if ( date.data.flags && (( date.data.flags & GFNODAY ) == 0) )
|
539
|
+
buffer += date.data.day.to_s
|
540
|
+
buffer += " " if ( (( date.data.flags & GFNOMONTH ) == 0) || (( date.data.flags & GFNOYEAR ) == 0) )
|
541
|
+
end
|
542
|
+
|
543
|
+
if ( date.data.flags && (( date.data.flags & GFNOMONTH ) == 0) )
|
544
|
+
buffer += months[ date.data.month - 1 ]
|
545
|
+
buffer += " " if( ( date.data.flags & GFNOYEAR ) == 0 )
|
546
|
+
end
|
547
|
+
|
548
|
+
if ( date.data.flags && (( date.data.flags & GFNOYEAR ) == 0) )
|
549
|
+
buffer += date.data.year.to_s
|
550
|
+
if ( ( date.data.flags & GFYEARSPAN ) != 0 )
|
551
|
+
buffer += "-"
|
552
|
+
buffer += date.data.year2.to_s
|
553
|
+
end
|
554
|
+
end
|
555
|
+
|
556
|
+
buffer += " BC" if ( (date.type == GCTGREGORIAN) && (date.data.adbc != GEDADBCAD) )
|
557
|
+
buffer
|
558
|
+
end
|
559
|
+
|
560
|
+
def self.validate_month_for_type( month, calType )
|
561
|
+
# Make sure this is a valid month for this calendar type (class method)
|
562
|
+
# Inputs: parser - parser state
|
563
|
+
# Outputs: general - general token
|
564
|
+
# specific - specific token
|
565
|
+
case calType
|
566
|
+
when GCTGREGORIAN || GCTJULIAN
|
567
|
+
return ( month - TKJANUARY + 1 ) if( month >= TKJANUARY && month <= TKDECEMBER )
|
568
|
+
|
569
|
+
when GCTHEBREW
|
570
|
+
return ( month - TKTISHRI + 1 ) if( month >= TKTISHRI && month <= TKELUL )
|
571
|
+
|
572
|
+
when GCTFRENCH
|
573
|
+
return ( month - TKVENDEMIAIRE + 1 )if( month >= TKVENDEMIAIRE && month <= TKJOUR_COMP )
|
574
|
+
end
|
575
|
+
return -1
|
576
|
+
end
|
577
|
+
|
578
|
+
def self.parse_date_part( parser, datePart, type )
|
579
|
+
# Parse out a date part (class method)
|
580
|
+
# Inputs: parser - parser state
|
581
|
+
# datePart - date part (GEDDate)
|
582
|
+
# type - calendar type
|
583
|
+
# Outputs: None (updated date part)
|
584
|
+
state = ST_DT_START
|
585
|
+
flags = GEDFNONE
|
586
|
+
|
587
|
+
# Initialize the datePart, in case it contains old data
|
588
|
+
datePart.type = type
|
589
|
+
datePart.flags = GFNONE
|
590
|
+
if (type == GCTGREGORIAN)
|
591
|
+
datePart.data = GEDDateGreg.new(flags, 0, 0, 0, 0, GEDADBCAD)
|
592
|
+
else
|
593
|
+
datePart.data = GEDDateGeneral.new(flags, 0, 0, 0)
|
594
|
+
end
|
595
|
+
number = 0
|
596
|
+
|
597
|
+
while ( ( state != ST_DT_END ) && ( state != ST_DT_ERROR ) )
|
598
|
+
general, specific = get_token( parser )
|
599
|
+
raise DateParseException, "error parsing datepart, pre-transition" if (general == TKERROR)
|
600
|
+
transitionFound = 0
|
601
|
+
|
602
|
+
case ( general )
|
603
|
+
when TKNUMBER
|
604
|
+
when TKMONTH
|
605
|
+
when TKSLASH
|
606
|
+
when TKBC
|
607
|
+
when TKEOF
|
608
|
+
when TKERROR
|
609
|
+
when TKTO, TKAND
|
610
|
+
put_token( parser, general, specific)
|
611
|
+
else
|
612
|
+
put_token( parser, general, specific )
|
613
|
+
general = TKEOF
|
614
|
+
specific = TKNONE
|
615
|
+
break
|
616
|
+
end
|
617
|
+
|
618
|
+
DateStateTable.each do |dateState|
|
619
|
+
break if dateState.state < 1
|
620
|
+
|
621
|
+
if( ( dateState.state == state ) && ( dateState.input == general ) )
|
622
|
+
state = dateState.nextState
|
623
|
+
transitionFound = 1
|
624
|
+
|
625
|
+
case dateState.action
|
626
|
+
# 0: store number, set NUMBER
|
627
|
+
when 0
|
628
|
+
number = specific
|
629
|
+
flags |= GEDFNUMBER
|
630
|
+
|
631
|
+
# 1: if MONTH, then error, else set number to be day, set month, set MONTH
|
632
|
+
when 1
|
633
|
+
if ( type == GCTFRENCH )
|
634
|
+
# if the token is "JOUR", make sure they also typed at least
|
635
|
+
# part of "COMPLIMENTAIRES"
|
636
|
+
|
637
|
+
case specific
|
638
|
+
when TKJOUR
|
639
|
+
general, specific = get_token( parser )
|
640
|
+
raise DateParseException, "error parsing datepart, post-JOUR (french calendar)" if (general == TKERROR)
|
641
|
+
if ( general != TKMONTH && specific != TKCOMP )
|
642
|
+
state = ST_DT_ERROR
|
643
|
+
put_token( parser, general, specific )
|
644
|
+
end #fall through
|
645
|
+
|
646
|
+
when TKCOMP
|
647
|
+
specific = TKJOUR_COMP
|
648
|
+
end
|
649
|
+
elsif ( type == GCTHEBREW )
|
650
|
+
# if the token is "ADAR", see if it is followed by "SHENI",
|
651
|
+
# and if it is, change the month to "ADAR SHENI"
|
652
|
+
|
653
|
+
if( specific == TKADAR )
|
654
|
+
general, specific = get_token( parser )
|
655
|
+
raise DateParseException, "error parsing datepart, post-ADAR" if (general == TKERROR)
|
656
|
+
if( general == TKMONTH && specific == TKSHENI )
|
657
|
+
specific = TKADAR_SHENI
|
658
|
+
else
|
659
|
+
put_token( parser, general, specific )
|
660
|
+
end
|
661
|
+
end
|
662
|
+
end
|
663
|
+
|
664
|
+
if ( ( flags & GEDFMONTH ) != 0 )
|
665
|
+
state = ST_DT_ERROR
|
666
|
+
else
|
667
|
+
month = validate_month_for_type( specific, type )
|
668
|
+
if ( month < 1 )
|
669
|
+
state = ST_DT_ERROR
|
670
|
+
else
|
671
|
+
datePart.data.day = number
|
672
|
+
datePart.data.month = month
|
673
|
+
end
|
674
|
+
flags |= GEDFMONTH
|
675
|
+
number = 0
|
676
|
+
end
|
677
|
+
|
678
|
+
# 2: if SLASH, then error, else set SLASH, set number to be year
|
679
|
+
when 2
|
680
|
+
if ( ( ( flags & GEDFSLASH ) != 0 ) || ( type != GCTGREGORIAN ) )
|
681
|
+
state = ST_DT_ERROR
|
682
|
+
else
|
683
|
+
datePart.data.year = number if ( number > 0 )
|
684
|
+
|
685
|
+
datePart.data.flags |= GFYEARSPAN
|
686
|
+
number = 0
|
687
|
+
flags |= GEDFSLASH
|
688
|
+
end
|
689
|
+
|
690
|
+
# 3: if not SLASH set number to be year, set bc
|
691
|
+
# 4: if not SLASH set number to be year, terminate
|
692
|
+
# 6: terminate
|
693
|
+
when 3, 4, 6
|
694
|
+
if (dateState.action == 3)
|
695
|
+
if( type != GCTGREGORIAN )
|
696
|
+
state = ST_DT_ERROR
|
697
|
+
break
|
698
|
+
end
|
699
|
+
datePart.data.adbc = GEDADBCBC
|
700
|
+
end
|
701
|
+
|
702
|
+
if (dateState.action == 3 || dateState.action == 4)
|
703
|
+
if( ( number > 0 ) && ( ( flags & GEDFSLASH ) == 0 ) )
|
704
|
+
datePart.data.year = number
|
705
|
+
number = 0
|
706
|
+
end
|
707
|
+
end
|
708
|
+
|
709
|
+
|
710
|
+
datePart.data.flags |= GFNODAY if( datePart.data.day < 1 )
|
711
|
+
|
712
|
+
datePart.data.flags |= GFNOMONTH if( datePart.data.month < 1 )
|
713
|
+
|
714
|
+
datePart.data.flags |= GFNOYEAR if( datePart.data.year < 1 )
|
715
|
+
|
716
|
+
|
717
|
+
# 5: if NUMBER, set number to be day. set number to be year, store number, set NUMBER
|
718
|
+
when 5
|
719
|
+
datePart.data.day = number if( ( number > 0 ) && ( ( flags & GEDFNUMBER ) != 0 ) )
|
720
|
+
|
721
|
+
datePart.data.year = specific
|
722
|
+
|
723
|
+
number = 0
|
724
|
+
flags |= GEDFNUMBER
|
725
|
+
|
726
|
+
# 7: set number to be year2 (Gregorian Calendar)
|
727
|
+
when 7
|
728
|
+
datePart.data.year2 = ( specific % 100 )
|
729
|
+
number = 0
|
730
|
+
end
|
731
|
+
|
732
|
+
break
|
733
|
+
end
|
734
|
+
end
|
735
|
+
|
736
|
+
state = ST_DT_ERROR if( transitionFound == 0 )
|
737
|
+
end
|
738
|
+
|
739
|
+
raise DateParseException, "error parsing datepart, general" if( state == ST_DT_ERROR )
|
740
|
+
|
741
|
+
end
|
742
|
+
|
743
|
+
|
744
|
+
def self.parse_gedcom_date( dateString, date, type = GCTDEFAULT )
|
745
|
+
# Parse out a GEDCOM date (class method)
|
746
|
+
# Inputs: dateString - String containing GEDCOM date
|
747
|
+
# date - date (GEDDateValue)
|
748
|
+
# type - calendar type
|
749
|
+
# Outputs: None (updated date)
|
750
|
+
|
751
|
+
parser = GEDParserState.new( "", 0, 0, 0 )
|
752
|
+
parser.buffer = dateString
|
753
|
+
|
754
|
+
# New date 1 if it's nil
|
755
|
+
date.date1 = GEDDate.new( type, GFNONE, nil ) if not date.date1
|
756
|
+
datePart = date.date1
|
757
|
+
|
758
|
+
state = ST_DV_START
|
759
|
+
flags = GEDFNONE
|
760
|
+
datesRead = 0
|
761
|
+
|
762
|
+
while ( ( state != ST_DV_END ) && ( state != ST_DV_ERROR ) )
|
763
|
+
savePos = parser.pos
|
764
|
+
general, specific = get_token( parser )
|
765
|
+
raise DateParseException, "error parsing date" if (general == TKERROR)
|
766
|
+
transitionFound = 0
|
767
|
+
|
768
|
+
DateValueStateTable.each do |dateValueState|
|
769
|
+
break if dateValueState.state < 1
|
770
|
+
|
771
|
+
if( ( dateValueState.state == state ) && ( dateValueState.input == general ) )
|
772
|
+
|
773
|
+
transitionFound = 1
|
774
|
+
state = dateValueState.nextState
|
775
|
+
|
776
|
+
case ( dateValueState.action )
|
777
|
+
# 0: inc dates read, parse a date
|
778
|
+
when 0
|
779
|
+
put_token( parser, general, specific )
|
780
|
+
begin
|
781
|
+
if (datesRead != 0)
|
782
|
+
# New date 2 if it's nil
|
783
|
+
date.date2 = GEDDate.new( type, GFNONE, nil ) if not date.date2
|
784
|
+
datePart = date.date2
|
785
|
+
end
|
786
|
+
parse_date_part( parser, datePart, type )
|
787
|
+
datesRead+=1
|
788
|
+
rescue
|
789
|
+
state = ST_DV_ERROR
|
790
|
+
end
|
791
|
+
|
792
|
+
# 1: set the approx type
|
793
|
+
when 1
|
794
|
+
date.flags = case specific
|
795
|
+
when TKABOUT then GCABOUT
|
796
|
+
when TKCALCULATED then GCCALCULATED
|
797
|
+
when TKESTIMATED then GCESTIMATED
|
798
|
+
end
|
799
|
+
|
800
|
+
# 2: set the range type
|
801
|
+
when 2
|
802
|
+
date.flags = case specific
|
803
|
+
when TKBEFORE then GCBEFORE
|
804
|
+
when TKAFTER then GCAFTER
|
805
|
+
when TKBETWEEN
|
806
|
+
flags |= GEDFBETWEEN
|
807
|
+
GCBETWEEN
|
808
|
+
end
|
809
|
+
|
810
|
+
# 3: set the period type
|
811
|
+
when 3
|
812
|
+
if general == TKTO
|
813
|
+
date.flags = GCTO
|
814
|
+
elsif specific == TKFROM
|
815
|
+
date.flags = GCFROM
|
816
|
+
flags |= GEDFFROM
|
817
|
+
end
|
818
|
+
|
819
|
+
# 4: set interpreted
|
820
|
+
when 4
|
821
|
+
date.flags = GCINTERPRETED
|
822
|
+
flags |= GEDFINTERP
|
823
|
+
|
824
|
+
# 5: get remaining buffer as phrase
|
825
|
+
# 7: if 'interpreted', get remaining buffer as phrase
|
826
|
+
when 5, 7
|
827
|
+
# This is kind of a sucky way to handle this, but the shared functionality
|
828
|
+
# between action 5 and 7 doesn't seem like enough to warrant breaking out
|
829
|
+
# into it's own method.
|
830
|
+
if dateValueState.action == 7 && ( flags & GEDFINTERP ) == 0
|
831
|
+
state = ST_DV_ERROR
|
832
|
+
break
|
833
|
+
end
|
834
|
+
|
835
|
+
# Strip off trailing whitespace and closing parenthesis
|
836
|
+
buffer = parser.buffer.slice( parser.pos, parser.buffer.length ).rstrip.split( ')' )[0]
|
837
|
+
datePart.data = buffer
|
838
|
+
datePart.flags = GFPHRASE
|
839
|
+
parser.pos = parser.buffer.length
|
840
|
+
|
841
|
+
# 6: if 'between' and not second date read, error, else terminate
|
842
|
+
when 6
|
843
|
+
state = ST_DV_ERROR if( ( ( flags & GEDFBETWEEN ) != 0 ) && datesRead < 2 )
|
844
|
+
|
845
|
+
# else -- nextState is ST_DV_END, so we're done!
|
846
|
+
|
847
|
+
# 7: see above 5
|
848
|
+
|
849
|
+
# 8: if 'between', prepare to read next date
|
850
|
+
when 8
|
851
|
+
state = ST_DV_ERROR if( ( flags & GEDFBETWEEN ) == 0 )
|
852
|
+
|
853
|
+
# 9: if 'from', set FROMTO, prepare to read next date
|
854
|
+
when 9
|
855
|
+
if( ( flags & GEDFFROM ) == 0 )
|
856
|
+
state = ST_DV_ERROR
|
857
|
+
else
|
858
|
+
date.flags = GCFROMTO
|
859
|
+
end
|
860
|
+
|
861
|
+
# 10: set status
|
862
|
+
when 10
|
863
|
+
date.flags = case specific
|
864
|
+
when TKCHILD then GCCHILD
|
865
|
+
when TKCLEARED then GCCLEARED
|
866
|
+
when TKCOMPLETED then GCCOMPLETED
|
867
|
+
when TKINFANT then GCINFANT
|
868
|
+
when TKPRE1970 then GCPRE1970
|
869
|
+
when TKQUALIFIED then GCQUALIFIED
|
870
|
+
when TKSTILLBORN then GCSTILLBORN
|
871
|
+
when TKSUBMITTED then GCSUBMITTED
|
872
|
+
when TKUNCLEARED then GCUNCLEARED
|
873
|
+
when TKBIC then GCBIC
|
874
|
+
when TKDNS then GCDNS
|
875
|
+
when TKDNSCAN then GCDNSCAN
|
876
|
+
when TKDEAD then GCDEAD
|
877
|
+
end
|
878
|
+
end
|
879
|
+
break # ... Out of the DateValueStateTable.each block
|
880
|
+
end
|
881
|
+
end
|
882
|
+
|
883
|
+
state = ST_DV_ERROR if( transitionFound == 0 )
|
884
|
+
end
|
885
|
+
|
886
|
+
if( state == ST_DV_ERROR )
|
887
|
+
parser.pos = savePos
|
888
|
+
datePart.flags = GFNONSTANDARD
|
889
|
+
datePart.data = parser.buffer.slice( parser.pos, parser.buffer.length )
|
890
|
+
raise DateParseException, "error parsing date, general"
|
891
|
+
end
|
892
|
+
end
|
893
|
+
|
894
|
+
def self.build_gedcom_date_string( date )
|
895
|
+
# Stringify a GEDCOM date (class method)
|
896
|
+
# Inputs: date - date (GEDDateValue)
|
897
|
+
# Outputs: buffer - output string
|
898
|
+
buffer = ""
|
899
|
+
|
900
|
+
case ( date.flags )
|
901
|
+
when GCABOUT then buffer += "abt "
|
902
|
+
when GCCALCULATED then buffer += "cal "
|
903
|
+
when GCESTIMATED then buffer += "est "
|
904
|
+
when GCBEFORE then buffer += "bef "
|
905
|
+
when GCAFTER then buffer += "aft "
|
906
|
+
when GCBETWEEN then buffer += "bet "
|
907
|
+
when GCFROM then
|
908
|
+
when GCFROMTO then buffer += "from "
|
909
|
+
when GCTO then buffer += "to "
|
910
|
+
when GCINTERPRETED then buffer += "int "
|
911
|
+
|
912
|
+
when GCCHILD then buffer += "child"; return
|
913
|
+
when GCCLEARED then buffer += "cleared"; return
|
914
|
+
when GCCOMPLETED then buffer += "completed"; return
|
915
|
+
when GCINFANT then buffer += "infant"; return
|
916
|
+
when GCPRE1970 then buffer += "pre-1970"; return
|
917
|
+
when GCQUALIFIED then buffer += "qualified"; return
|
918
|
+
when GCSTILLBORN then buffer += "stillborn"; return
|
919
|
+
when GCSUBMITTED then buffer += "submitted"; return
|
920
|
+
when GCUNCLEARED then buffer += "uncleared"; return
|
921
|
+
when GCBIC then buffer += "BIC"; return
|
922
|
+
when GCDNS then buffer += "DNS"; return
|
923
|
+
when GCDNSCAN then buffer += "DNSCAN"; return
|
924
|
+
when GCDEAD then buffer += "dead"; return
|
925
|
+
end
|
926
|
+
|
927
|
+
buffer += get_date_text( date.date1 ) if (date.date1)
|
928
|
+
|
929
|
+
case ( date.flags )
|
930
|
+
when GCBETWEEN then buffer += " and "
|
931
|
+
when GCFROMTO then buffer += " to "
|
932
|
+
end
|
933
|
+
|
934
|
+
buffer += get_date_text( date.date2 ) if (date.date2)
|
935
|
+
buffer
|
936
|
+
end
|
937
|
+
|
938
|
+
def self.build_gedcom_date_part_string( date )
|
939
|
+
# Stringify a GEDCOM date part (class method)
|
940
|
+
# Inputs: date - date part (GEDDate)
|
941
|
+
# Outputs: buffer - output string
|
942
|
+
buffer = ""
|
943
|
+
buffer += get_date_text( date )
|
944
|
+
buffer
|
945
|
+
end
|
946
|
+
|
947
|
+
end
|
948
|
+
|
949
|
+
class DateParseException < Exception
|
950
|
+
|
951
|
+
end
|
952
|
+
end
|