datte 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,23 +19,36 @@ module Datte
19
19
  end
20
20
  end
21
21
 
22
- return @date.to_datetime
22
+ ABSOLUTE_TIMES.each do |matcher|
23
+ if md = @body.match(matcher)
24
+ @date.update_time(md)
25
+ p @date
26
+ break
27
+ end
28
+ end
23
29
 
24
- # MATCHER.each do |matcher|
25
- # @body.match(matcher) do |md|
26
- # y = year(md)
27
- # m = month(md)
28
- # d = day(md)
29
- # #h = hour(md)
30
- # #m = minute(md)
31
- #
32
- # @md = md
33
- # return y, m, d
34
- # end
35
- # end
30
+ NOUNS.each do |matcher_s, method|
31
+ matcher = Regexp.new(matcher_s.to_s)
32
+ if md = @body.match(matcher)
33
+ eval(method)
34
+ break
35
+ end
36
+ end
37
+
38
+ AFTERS.each do |matcher|
39
+ if md = @body.match(matcher)
40
+ @date.after(md)
41
+ break
42
+ end
43
+ end
44
+
45
+
46
+ return @date.to_datetime
36
47
  end
37
48
 
38
49
  private
39
50
 
51
+ def next_day(day)
52
+ end
40
53
  end
41
54
  end
@@ -37,17 +37,25 @@ module Datte
37
37
  '(?<min>\d{1,2})分'
38
38
  ].map { |pattern| Regexp.compile(pattern) }.freeze
39
39
 
40
- # DATES = {
41
- # '明日|あした': 'send(:next_day, 1)',
42
- # '明後日|あさって': 'send(:next_day, 2)',
43
- # '': '',
44
- # }
40
+ NOUNS = {
41
+ '明日|あした|あす': 'send(:next_day, 1)',
42
+ '明後日|あさって': 'send(:next_day, 2)',
43
+ '明々後日|しあさって': 'send(:next_day, 3)',
44
+ '今日|きょう': 'send(:next_day, 0)'
45
+ }
45
46
 
46
- # TIMES = {
47
- # '(?<hour>\d{1,2})時(?<min>\d{1,2})分',
48
- # '(?<hour>\d{1,2})',
49
- # ''
50
- # }
47
+ AFTERS = [
48
+ # 何年後
49
+ '(?<year>\d{1)年後',
50
+ # 何ヶ月後
51
+ '(?<month>\d{1,2}ヶ月後)',
52
+ # 何日後
53
+ '(?<day>\d{1,2})日後',
54
+ # 何時間後
55
+ '(?<hour>\d{1,2})時間後',
56
+ # 何分後
57
+ '(?<min>\d{1,2})分後'
58
+ ].map { |pattern| Regexp.compile(pattern) }.freeze
51
59
 
52
60
  class DatetimeTable
53
61
  def initialize
@@ -2,22 +2,20 @@ module Datte
2
2
  class Dattetime
3
3
 
4
4
  DEFAULT_OPTIONS = {
5
- force_update: false
5
+ force_update: false,
6
+ level: 1
6
7
  }
7
8
 
8
- attr_reader :year, :month, :day, :hour, :min, :sec
9
+ attr_reader :year, :month, :day, :hour, :min
9
10
 
10
11
  def initialize(options = {})
11
12
  @options = DEFAULT_OPTIONS.merge(options)
12
- @date = Date.today
13
- end
14
-
15
- def to_s
16
- @date.to_s
13
+ @date = DateTime.now
17
14
  end
18
15
 
19
16
  def to_datetime
20
- DateTime.new(@year, @month, @day, 12, 0, 0) rescue nil
17
+ return nil unless check_level?
18
+ DateTime.new(y, m, d, h, mi, 0) rescue nil
21
19
  end
22
20
 
23
21
  # 年か月か日を更新
@@ -29,18 +27,29 @@ module Datte
29
27
  end
30
28
 
31
29
  # 時か分を更新
32
- def update_time(hour, min)
33
- end
34
-
35
- # 何年後、何ヶ月後、何日後
36
- def after_date(year, month, day)
30
+ def update_time(md, options = @options)
31
+ op = @options[:force_update] ? '=' : '||='
32
+ eval("@hour #{op} hour!(md)")
33
+ eval("@min #{op} min!(md)")
37
34
  end
38
35
 
39
- # 何時間後、何分後
40
- def after_time(hour, min)
36
+ # 何年後、何ヶ月後、何日後, 何時間後, 何分後
37
+ def after(md)
38
+ @date >> (md[:year].to_i * 12) if md.matched?(:year) # 何年後
39
+ @date >> md[:month].to_i if md.matched?(:month) # 何ヶ月後
40
+ @date + md[:day].to_i if md.matched?(:day) # 何日後
41
+ @date + Rational(md[:hour].to_i, 24) # 何時間後
42
+ @date + Rational(md[:hour].to_i, 24 * 60) # 何分後
41
43
  end
42
44
 
43
45
  private
46
+
47
+ def y; @year || now[:year] end
48
+ def m; @month || now[:month] end
49
+ def d; @day || now[:day] end
50
+ def h; @hour || now[:hour] end
51
+ def mi; @min || 0 end
52
+
44
53
  def now
45
54
  d = DateTime.now
46
55
  { year: d.year, month: d.month, day: d.day, hour: d.hour, min: d.min }
@@ -57,6 +66,22 @@ module Datte
57
66
  def day!(md)
58
67
  md.matched?(:day) ? md[:day].to_i : now[:day]
59
68
  end
69
+
70
+ def hour!(md)
71
+ md.matched?(:hour) ? md[:hour].to_i : now[:hour]
72
+ end
73
+
74
+ def min!(md)
75
+ md.matched?(:min) ? md[:min].to_i : 0
76
+ end
77
+
78
+ def check_level?
79
+ counter = 0
80
+ [@year, @month, @day, @hour, @min].each do |check|
81
+ counter += 1 unless check.nil?
82
+ end
83
+ @options[:level] < counter
84
+ end
60
85
  end
61
86
  end
62
87
 
@@ -0,0 +1,203 @@
1
+ module Datte
2
+ class Train
3
+ FNAME = File.join(File.dirname(__FILE__), 'data.txt')
4
+
5
+ # SEE: http://qiita.com/Hironsan/items/326b66711eb4196aa9d4
6
+
7
+ def initialize(path = FNAME)
8
+ train_sents = corpus_read
9
+ x = corpus_read[0]
10
+ p x
11
+ p '==='
12
+ p sent2features(x)
13
+ #sent2features(corpus_read[0])[0]
14
+ #sent2features(train_sents[0])[0]
15
+ end
16
+
17
+ def x_train
18
+ train_sents.each do |s|
19
+ return sent2features(s)
20
+ end
21
+ end
22
+
23
+ def y_train
24
+ train_sents.each do |s|
25
+ return sent2labels(s)
26
+ end
27
+ end
28
+
29
+ def x_test
30
+ test_sents.each do |s|
31
+ return sent2features(s)
32
+ end
33
+ end
34
+
35
+ def y_test
36
+ test_sents.each do |s|
37
+ return sent2labels(s)
38
+ end
39
+ end
40
+
41
+ def train()
42
+ end
43
+
44
+ private
45
+ def hiragana?(ch)
46
+ 0x3040 <= ch.ord && ch.ord <= 0x309F
47
+ end
48
+
49
+ def katakana(ch)
50
+ 0x30A0 <= ch.ord && ch.ord <= 0x30FF
51
+ end
52
+
53
+ def space?(ch)
54
+ !(ch =~ /^\s*$/).nil?
55
+ end
56
+
57
+ def integer?(ch)
58
+ Integer(ch)
59
+ Integer(ch)
60
+ true
61
+ rescue ArgumentError
62
+ false
63
+ end
64
+
65
+ def lower?(ch)
66
+ ch == ch.downcase
67
+ end
68
+
69
+ def upper?(ch)
70
+ ch == ch.upcase
71
+ end
72
+
73
+ def chara_type(ch)
74
+ if space?(ch) then 'ZSPACE'
75
+ elsif integer?(ch) then 'ZDIGIT'
76
+ elsif lower?(ch) then 'ZLLET'
77
+ elsif upper?(ch) then 'ZULET'
78
+ elsif hiragana?(ch) then 'HIRAG'
79
+ elsif katakana?(ch) then 'KATAK'
80
+ else 'OTHER'
81
+ end
82
+ end
83
+
84
+ def chara_types(str)
85
+ types = str.each_char.to_a.map do |ch|
86
+ chara_type(ch)
87
+ end
88
+ types.uniq.sort().join('-')
89
+ end
90
+
91
+ def extract_pos(morph)
92
+ idx = morph.index('*')
93
+ morph[1, idx-1].join('-')
94
+ end
95
+
96
+ def word2features(sent, i)
97
+ word = sent[i][0]
98
+ chtype = chara_types(sent[i][0])
99
+ postag = extract_pos(sent[i])
100
+
101
+ features = [
102
+ 'bias',
103
+ 'word=' + word,
104
+ 'type=' + chtype,
105
+ 'pos_tag=' + postag
106
+ ]
107
+
108
+ if i >= 2
109
+ word2 = sent[i-2][0]
110
+ chtype2 = chara_types(sent[i-2][0])
111
+ postag2 = extract_pos(sent[i-2])
112
+ iobtag2 = sent[i-2][-1]
113
+ features.push(*[
114
+ '-2:word=' + word2,
115
+ '-2:type=' + chtype2,
116
+ '-2:postag=' + postag2,
117
+ '-2:iobtag=' + iobtag2
118
+ ])
119
+ else
120
+ features.push('BOS')
121
+ end
122
+
123
+ if i >= 1
124
+ word1 = sent[i-1][0]
125
+ chtype1 = chara_types(sent[i-1][0])
126
+ postag1 = extract_pos(sent[i-1])
127
+ iobtag1 = sent[i-1][-1]
128
+ features.push(*[
129
+ '-1:word=' + word1,
130
+ '-1:type=' + chtype1,
131
+ '-1:postag=' + postag1,
132
+ '-1:iobtag=' + iobtag1
133
+ ])
134
+ else
135
+ features.push('BOS')
136
+ end
137
+
138
+ if i < sent.length - 1
139
+ word1 = sent[i+1][0]
140
+ chtype1 = chara_types(sent[i+1][0])
141
+ postag1 = extract_pos(sent[i+1])
142
+ features.push(*[
143
+ '+1:word=' + word1,
144
+ '+1:type=' + chtype1,
145
+ '+1:postag=' + postag1
146
+ ])
147
+ else
148
+ features.push('EOS')
149
+ end
150
+
151
+ if i < sent.length - 2
152
+ word2 = sent[i+2][0]
153
+ chtype2 = chara_types(sent[i+2][0])
154
+ postag2 = extract_pos(sent[i+2])
155
+ features.push(*[
156
+ '+2:word=' + word2,
157
+ '+2:type=' + chtype2,
158
+ '+2:postag=' + postag2
159
+ ])
160
+ else
161
+ features.push('EOS')
162
+ end
163
+
164
+ return features
165
+ end
166
+
167
+ def sent2features(sent)
168
+ (0..(sent.length)).to_a.map do |i|
169
+ return word2features(sent, i)
170
+ end
171
+ end
172
+
173
+ def sent2labels(sent)
174
+ sent.map do |morph|
175
+ morph[-1]
176
+ end
177
+ end
178
+
179
+ def sent2tokens(sent)
180
+ sent.map do |morph|
181
+ morph[0]
182
+ end
183
+ end
184
+
185
+ def corpus_read
186
+ sents, sent = [], []
187
+
188
+ File.open(FNAME, 'r') do |file|
189
+ file.each_line do |line|
190
+ if line == "\n"
191
+ sents.push(sent)
192
+ sent = []
193
+ next
194
+ end
195
+ morph_info = line.strip().split(' ')
196
+ sent.push(morph_info)
197
+ end
198
+ end
199
+ sents
200
+ end
201
+
202
+ end
203
+ end
data/lib/datte/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Datte
2
- VERSION = "0.1.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/datte.rb CHANGED
@@ -4,3 +4,4 @@ require File.join(File.dirname(__FILE__), 'datte', 'parser')
4
4
  require File.join(File.dirname(__FILE__), 'datte', 'date_parser')
5
5
  require File.join(File.dirname(__FILE__), 'datte', 'dattetime')
6
6
  require File.join(File.dirname(__FILE__), 'datte', 'datetime_table')
7
+ require File.join(File.dirname(__FILE__), 'datte', 'train')
data/spec/datte_spec.rb CHANGED
@@ -5,7 +5,13 @@ describe Datte do
5
5
  expect(Datte::VERSION).not_to be nil
6
6
  end
7
7
 
8
- it "does something useful" do
9
- expect(false).to eq(true)
8
+ describe "::Parser" do
9
+ datte = Datte::Parser.new
10
+
11
+ it "2016/11/1日に遊ぼー" do
12
+ body = "2016/11/1日に遊ぼー"
13
+ d = DateTime.now
14
+ expect(datte.parse_date(body)).to eq(DateTime.new(2016, 11, 1, d.hour, 0, 0))
15
+ end
10
16
  end
11
17
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datte
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - pokohide
@@ -73,10 +73,12 @@ files:
73
73
  - bin/setup
74
74
  - datte.gemspec
75
75
  - lib/datte.rb
76
+ - lib/datte/data.txt
76
77
  - lib/datte/date_parser.rb
77
78
  - lib/datte/datetime_table.rb
78
79
  - lib/datte/dattetime.rb
79
80
  - lib/datte/parser.rb
81
+ - lib/datte/train.rb
80
82
  - lib/datte/version.rb
81
83
  - spec/datte_spec.rb
82
84
  - spec/spec_helper.rb