datte 0.1.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,23 +19,36 @@ module Datte
19
19
  end
20
20
  end
21
21
 
22
- return @date.to_datetime
22
+ ABSOLUTE_TIMES.each do |matcher|
23
+ if md = @body.match(matcher)
24
+ @date.update_time(md)
25
+ p @date
26
+ break
27
+ end
28
+ end
23
29
 
24
- # MATCHER.each do |matcher|
25
- # @body.match(matcher) do |md|
26
- # y = year(md)
27
- # m = month(md)
28
- # d = day(md)
29
- # #h = hour(md)
30
- # #m = minute(md)
31
- #
32
- # @md = md
33
- # return y, m, d
34
- # end
35
- # end
30
+ NOUNS.each do |matcher_s, method|
31
+ matcher = Regexp.new(matcher_s.to_s)
32
+ if md = @body.match(matcher)
33
+ eval(method)
34
+ break
35
+ end
36
+ end
37
+
38
+ AFTERS.each do |matcher|
39
+ if md = @body.match(matcher)
40
+ @date.after(md)
41
+ break
42
+ end
43
+ end
44
+
45
+
46
+ return @date.to_datetime
36
47
  end
37
48
 
38
49
  private
39
50
 
51
+ def next_day(day)
52
+ end
40
53
  end
41
54
  end
@@ -37,17 +37,25 @@ module Datte
37
37
  '(?<min>\d{1,2})分'
38
38
  ].map { |pattern| Regexp.compile(pattern) }.freeze
39
39
 
40
- # DATES = {
41
- # '明日|あした': 'send(:next_day, 1)',
42
- # '明後日|あさって': 'send(:next_day, 2)',
43
- # '': '',
44
- # }
40
+ NOUNS = {
41
+ '明日|あした|あす': 'send(:next_day, 1)',
42
+ '明後日|あさって': 'send(:next_day, 2)',
43
+ '明々後日|しあさって': 'send(:next_day, 3)',
44
+ '今日|きょう': 'send(:next_day, 0)'
45
+ }
45
46
 
46
- # TIMES = {
47
- # '(?<hour>\d{1,2})時(?<min>\d{1,2})分',
48
- # '(?<hour>\d{1,2})',
49
- # ''
50
- # }
47
+ AFTERS = [
48
+ # 何年後
49
+ '(?<year>\d{1)年後',
50
+ # 何ヶ月後
51
+ '(?<month>\d{1,2}ヶ月後)',
52
+ # 何日後
53
+ '(?<day>\d{1,2})日後',
54
+ # 何時間後
55
+ '(?<hour>\d{1,2})時間後',
56
+ # 何分後
57
+ '(?<min>\d{1,2})分後'
58
+ ].map { |pattern| Regexp.compile(pattern) }.freeze
51
59
 
52
60
  class DatetimeTable
53
61
  def initialize
@@ -2,22 +2,20 @@ module Datte
2
2
  class Dattetime
3
3
 
4
4
  DEFAULT_OPTIONS = {
5
- force_update: false
5
+ force_update: false,
6
+ level: 1
6
7
  }
7
8
 
8
- attr_reader :year, :month, :day, :hour, :min, :sec
9
+ attr_reader :year, :month, :day, :hour, :min
9
10
 
10
11
  def initialize(options = {})
11
12
  @options = DEFAULT_OPTIONS.merge(options)
12
- @date = Date.today
13
- end
14
-
15
- def to_s
16
- @date.to_s
13
+ @date = DateTime.now
17
14
  end
18
15
 
19
16
  def to_datetime
20
- DateTime.new(@year, @month, @day, 12, 0, 0) rescue nil
17
+ return nil unless check_level?
18
+ DateTime.new(y, m, d, h, mi, 0) rescue nil
21
19
  end
22
20
 
23
21
  # 年か月か日を更新
@@ -29,18 +27,29 @@ module Datte
29
27
  end
30
28
 
31
29
  # 時か分を更新
32
- def update_time(hour, min)
33
- end
34
-
35
- # 何年後、何ヶ月後、何日後
36
- def after_date(year, month, day)
30
+ def update_time(md, options = @options)
31
+ op = @options[:force_update] ? '=' : '||='
32
+ eval("@hour #{op} hour!(md)")
33
+ eval("@min #{op} min!(md)")
37
34
  end
38
35
 
39
- # 何時間後、何分後
40
- def after_time(hour, min)
36
+ # 何年後、何ヶ月後、何日後, 何時間後, 何分後
37
+ def after(md)
38
+ @date >> (md[:year].to_i * 12) if md.matched?(:year) # 何年後
39
+ @date >> md[:month].to_i if md.matched?(:month) # 何ヶ月後
40
+ @date + md[:day].to_i if md.matched?(:day) # 何日後
41
+ @date + Rational(md[:hour].to_i, 24) # 何時間後
42
+ @date + Rational(md[:hour].to_i, 24 * 60) # 何分後
41
43
  end
42
44
 
43
45
  private
46
+
47
+ def y; @year || now[:year] end
48
+ def m; @month || now[:month] end
49
+ def d; @day || now[:day] end
50
+ def h; @hour || now[:hour] end
51
+ def mi; @min || 0 end
52
+
44
53
  def now
45
54
  d = DateTime.now
46
55
  { year: d.year, month: d.month, day: d.day, hour: d.hour, min: d.min }
@@ -57,6 +66,22 @@ module Datte
57
66
  def day!(md)
58
67
  md.matched?(:day) ? md[:day].to_i : now[:day]
59
68
  end
69
+
70
+ def hour!(md)
71
+ md.matched?(:hour) ? md[:hour].to_i : now[:hour]
72
+ end
73
+
74
+ def min!(md)
75
+ md.matched?(:min) ? md[:min].to_i : 0
76
+ end
77
+
78
+ def check_level?
79
+ counter = 0
80
+ [@year, @month, @day, @hour, @min].each do |check|
81
+ counter += 1 unless check.nil?
82
+ end
83
+ @options[:level] < counter
84
+ end
60
85
  end
61
86
  end
62
87
 
@@ -0,0 +1,203 @@
1
+ module Datte
2
+ class Train
3
+ FNAME = File.join(File.dirname(__FILE__), 'data.txt')
4
+
5
+ # SEE: http://qiita.com/Hironsan/items/326b66711eb4196aa9d4
6
+
7
+ def initialize(path = FNAME)
8
+ train_sents = corpus_read
9
+ x = corpus_read[0]
10
+ p x
11
+ p '==='
12
+ p sent2features(x)
13
+ #sent2features(corpus_read[0])[0]
14
+ #sent2features(train_sents[0])[0]
15
+ end
16
+
17
+ def x_train
18
+ train_sents.each do |s|
19
+ return sent2features(s)
20
+ end
21
+ end
22
+
23
+ def y_train
24
+ train_sents.each do |s|
25
+ return sent2labels(s)
26
+ end
27
+ end
28
+
29
+ def x_test
30
+ test_sents.each do |s|
31
+ return sent2features(s)
32
+ end
33
+ end
34
+
35
+ def y_test
36
+ test_sents.each do |s|
37
+ return sent2labels(s)
38
+ end
39
+ end
40
+
41
+ def train()
42
+ end
43
+
44
+ private
45
+ def hiragana?(ch)
46
+ 0x3040 <= ch.ord && ch.ord <= 0x309F
47
+ end
48
+
49
+ def katakana(ch)
50
+ 0x30A0 <= ch.ord && ch.ord <= 0x30FF
51
+ end
52
+
53
+ def space?(ch)
54
+ !(ch =~ /^\s*$/).nil?
55
+ end
56
+
57
+ def integer?(ch)
58
+ Integer(ch)
59
+ Integer(ch)
60
+ true
61
+ rescue ArgumentError
62
+ false
63
+ end
64
+
65
+ def lower?(ch)
66
+ ch == ch.downcase
67
+ end
68
+
69
+ def upper?(ch)
70
+ ch == ch.upcase
71
+ end
72
+
73
+ def chara_type(ch)
74
+ if space?(ch) then 'ZSPACE'
75
+ elsif integer?(ch) then 'ZDIGIT'
76
+ elsif lower?(ch) then 'ZLLET'
77
+ elsif upper?(ch) then 'ZULET'
78
+ elsif hiragana?(ch) then 'HIRAG'
79
+ elsif katakana?(ch) then 'KATAK'
80
+ else 'OTHER'
81
+ end
82
+ end
83
+
84
+ def chara_types(str)
85
+ types = str.each_char.to_a.map do |ch|
86
+ chara_type(ch)
87
+ end
88
+ types.uniq.sort().join('-')
89
+ end
90
+
91
+ def extract_pos(morph)
92
+ idx = morph.index('*')
93
+ morph[1, idx-1].join('-')
94
+ end
95
+
96
+ def word2features(sent, i)
97
+ word = sent[i][0]
98
+ chtype = chara_types(sent[i][0])
99
+ postag = extract_pos(sent[i])
100
+
101
+ features = [
102
+ 'bias',
103
+ 'word=' + word,
104
+ 'type=' + chtype,
105
+ 'pos_tag=' + postag
106
+ ]
107
+
108
+ if i >= 2
109
+ word2 = sent[i-2][0]
110
+ chtype2 = chara_types(sent[i-2][0])
111
+ postag2 = extract_pos(sent[i-2])
112
+ iobtag2 = sent[i-2][-1]
113
+ features.push(*[
114
+ '-2:word=' + word2,
115
+ '-2:type=' + chtype2,
116
+ '-2:postag=' + postag2,
117
+ '-2:iobtag=' + iobtag2
118
+ ])
119
+ else
120
+ features.push('BOS')
121
+ end
122
+
123
+ if i >= 1
124
+ word1 = sent[i-1][0]
125
+ chtype1 = chara_types(sent[i-1][0])
126
+ postag1 = extract_pos(sent[i-1])
127
+ iobtag1 = sent[i-1][-1]
128
+ features.push(*[
129
+ '-1:word=' + word1,
130
+ '-1:type=' + chtype1,
131
+ '-1:postag=' + postag1,
132
+ '-1:iobtag=' + iobtag1
133
+ ])
134
+ else
135
+ features.push('BOS')
136
+ end
137
+
138
+ if i < sent.length - 1
139
+ word1 = sent[i+1][0]
140
+ chtype1 = chara_types(sent[i+1][0])
141
+ postag1 = extract_pos(sent[i+1])
142
+ features.push(*[
143
+ '+1:word=' + word1,
144
+ '+1:type=' + chtype1,
145
+ '+1:postag=' + postag1
146
+ ])
147
+ else
148
+ features.push('EOS')
149
+ end
150
+
151
+ if i < sent.length - 2
152
+ word2 = sent[i+2][0]
153
+ chtype2 = chara_types(sent[i+2][0])
154
+ postag2 = extract_pos(sent[i+2])
155
+ features.push(*[
156
+ '+2:word=' + word2,
157
+ '+2:type=' + chtype2,
158
+ '+2:postag=' + postag2
159
+ ])
160
+ else
161
+ features.push('EOS')
162
+ end
163
+
164
+ return features
165
+ end
166
+
167
+ def sent2features(sent)
168
+ (0..(sent.length)).to_a.map do |i|
169
+ return word2features(sent, i)
170
+ end
171
+ end
172
+
173
+ def sent2labels(sent)
174
+ sent.map do |morph|
175
+ morph[-1]
176
+ end
177
+ end
178
+
179
+ def sent2tokens(sent)
180
+ sent.map do |morph|
181
+ morph[0]
182
+ end
183
+ end
184
+
185
+ def corpus_read
186
+ sents, sent = [], []
187
+
188
+ File.open(FNAME, 'r') do |file|
189
+ file.each_line do |line|
190
+ if line == "\n"
191
+ sents.push(sent)
192
+ sent = []
193
+ next
194
+ end
195
+ morph_info = line.strip().split(' ')
196
+ sent.push(morph_info)
197
+ end
198
+ end
199
+ sents
200
+ end
201
+
202
+ end
203
+ end
data/lib/datte/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Datte
2
- VERSION = "0.1.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/datte.rb CHANGED
@@ -4,3 +4,4 @@ require File.join(File.dirname(__FILE__), 'datte', 'parser')
4
4
  require File.join(File.dirname(__FILE__), 'datte', 'date_parser')
5
5
  require File.join(File.dirname(__FILE__), 'datte', 'dattetime')
6
6
  require File.join(File.dirname(__FILE__), 'datte', 'datetime_table')
7
+ require File.join(File.dirname(__FILE__), 'datte', 'train')
data/spec/datte_spec.rb CHANGED
@@ -5,7 +5,13 @@ describe Datte do
5
5
  expect(Datte::VERSION).not_to be nil
6
6
  end
7
7
 
8
- it "does something useful" do
9
- expect(false).to eq(true)
8
+ describe "::Parser" do
9
+ datte = Datte::Parser.new
10
+
11
+ it "2016/11/1日に遊ぼー" do
12
+ body = "2016/11/1日に遊ぼー"
13
+ d = DateTime.now
14
+ expect(datte.parse_date(body)).to eq(DateTime.new(2016, 11, 1, d.hour, 0, 0))
15
+ end
10
16
  end
11
17
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - pokohide
@@ -73,10 +73,12 @@ files:
73
73
  - bin/setup
74
74
  - datte.gemspec
75
75
  - lib/datte.rb
76
+ - lib/datte/data.txt
76
77
  - lib/datte/date_parser.rb
77
78
  - lib/datte/datetime_table.rb
78
79
  - lib/datte/dattetime.rb
79
80
  - lib/datte/parser.rb
81
+ - lib/datte/train.rb
80
82
  - lib/datte/version.rb
81
83
  - spec/datte_spec.rb
82
84
  - spec/spec_helper.rb