RubyGems - datte - Versions diffs - 0.1.0 → 0.3.0 - Mend

datte 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/lib/datte/data.txt +16020 -0
data/lib/datte/date_parser.rb +26 -13
data/lib/datte/datetime_table.rb +18 -10
data/lib/datte/dattetime.rb +40 -15
data/lib/datte/train.rb +203 -0
data/lib/datte/version.rb +1 -1
data/lib/datte.rb +1 -0
data/spec/datte_spec.rb +8 -2
metadata +3 -1

data/lib/datte/date_parser.rb CHANGED Viewed

@@ -19,23 +19,36 @@ module Datte
         end
       end
-      return @date.to_datetime
+      ABSOLUTE_TIMES.each do |matcher|
+        if md = @body.match(matcher)
+          @date.update_time(md)
+          p @date
+          break
+        end
+      end
-      # MATCHER.each do |matcher|
-      #   @body.match(matcher) do |md|
-      #     y = year(md)
-      #     m = month(md)
-      #     d = day(md)
-      #     #h = hour(md)
-      #     #m = minute(md)
-      #
-      #     @md = md
-      #     return y, m, d
-      #   end
-      # end
+      NOUNS.each do |matcher_s, method|
+        matcher = Regexp.new(matcher_s.to_s)
+        if md = @body.match(matcher)
+          eval(method)
+          break
+        end
+      end
+      AFTERS.each do |matcher|
+        if md = @body.match(matcher)
+          @date.after(md)
+          break
+        end
+      end
+      return @date.to_datetime
     end
     private
+    def next_day(day)
+    end
   end
 end

data/lib/datte/datetime_table.rb CHANGED Viewed

@@ -37,17 +37,25 @@ module Datte
     '(?<min>\d{1,2})分'
   ].map { |pattern| Regexp.compile(pattern) }.freeze
-  # DATES = {
-  #   '明日|あした': 'send(:next_day, 1)',
-  #   '明後日|あさって': 'send(:next_day, 2)',
-  #   '': '',
-  # }
+  NOUNS = {
+    '明日|あした|あす': 'send(:next_day, 1)',
+    '明後日|あさって': 'send(:next_day, 2)',
+    '明々後日|しあさって': 'send(:next_day, 3)',
+    '今日|きょう': 'send(:next_day, 0)'
+  }
-  # TIMES = {
-  #   '(?<hour>\d{1,2})時(?<min>\d{1,2})分',
-  #   '(?<hour>\d{1,2})時',
-  #   ''
-  # }
+  AFTERS = [
+    # 何年後
+    '(?<year>\d{1)年後',
+    # 何ヶ月後
+    '(?<month>\d{1,2}ヶ月後)',
+    # 何日後
+    '(?<day>\d{1,2})日後',
+    # 何時間後
+    '(?<hour>\d{1,2})時間後',
+    # 何分後
+    '(?<min>\d{1,2})分後'
+  ].map { |pattern| Regexp.compile(pattern) }.freeze
   class DatetimeTable
     def initialize

data/lib/datte/dattetime.rb CHANGED Viewed

@@ -2,22 +2,20 @@ module Datte
   class Dattetime
     DEFAULT_OPTIONS = {
-      force_update: false
+      force_update: false,
+      level: 1
     }
-    attr_reader :year, :month, :day, :hour, :min, :sec
+    attr_reader :year, :month, :day, :hour, :min
     def initialize(options = {})
       @options = DEFAULT_OPTIONS.merge(options)
-      @date = Date.today
-    end
-    def to_s
-      @date.to_s
+      @date = DateTime.now
     end
     def to_datetime
-      DateTime.new(@year, @month, @day, 12, 0, 0) rescue nil
+      return nil unless check_level?
+      DateTime.new(y, m, d, h, mi, 0) rescue nil
     end
     # 年か月か日を更新
@@ -29,18 +27,29 @@ module Datte
     end
     # 時か分を更新
-    def update_time(hour, min)
-    end
-    # 何年後、何ヶ月後、何日後
-    def after_date(year, month, day)
+    def update_time(md, options = @options)
+      op = @options[:force_update] ? '=' : '||='
+      eval("@hour #{op} hour!(md)")
+      eval("@min #{op} min!(md)")
     end
-    # 何時間後、何分後
-    def after_time(hour, min)
+    # 何年後、何ヶ月後、何日後, 何時間後, 何分後
+    def after(md)
+      @date >> (md[:year].to_i * 12) if md.matched?(:year) # 何年後
+      @date >> md[:month].to_i if md.matched?(:month) # 何ヶ月後
+      @date + md[:day].to_i if md.matched?(:day) # 何日後
+      @date + Rational(md[:hour].to_i, 24) # 何時間後
+      @date + Rational(md[:hour].to_i, 24 * 60) # 何分後
     end
     private
+    def y; @year || now[:year] end
+    def m; @month || now[:month] end
+    def d; @day || now[:day] end
+    def h; @hour || now[:hour] end
+    def mi; @min || 0 end
     def now
       d = DateTime.now
       { year: d.year, month: d.month, day: d.day, hour: d.hour, min: d.min }
@@ -57,6 +66,22 @@ module Datte
     def day!(md)
       md.matched?(:day) ? md[:day].to_i : now[:day]
     end
+    def hour!(md)
+      md.matched?(:hour) ? md[:hour].to_i : now[:hour]
+    end
+    def min!(md)
+      md.matched?(:min) ? md[:min].to_i : 0
+    end
+    def check_level?
+      counter = 0
+      [@year, @month, @day, @hour, @min].each do |check|
+        counter += 1 unless check.nil?
+      end
+      @options[:level] < counter
+    end
   end
 end

data/lib/datte/train.rb ADDED Viewed

@@ -0,0 +1,203 @@
+module Datte
+  class Train
+    FNAME = File.join(File.dirname(__FILE__), 'data.txt')
+    # SEE: http://qiita.com/Hironsan/items/326b66711eb4196aa9d4
+    def initialize(path = FNAME)
+      train_sents = corpus_read
+      x = corpus_read[0]
+      p x
+      p '==='
+      p sent2features(x)
+      #sent2features(corpus_read[0])[0]
+      #sent2features(train_sents[0])[0]
+    end
+    def x_train
+      train_sents.each do |s|
+        return sent2features(s)
+      end
+    end
+    def y_train
+      train_sents.each do |s|
+        return sent2labels(s)
+      end
+    end
+    def x_test
+      test_sents.each do |s|
+        return sent2features(s)
+      end
+    end
+    def y_test
+      test_sents.each do |s|
+        return sent2labels(s)
+      end
+    end
+    def train()
+    end
+    private
+    def hiragana?(ch)
+      0x3040 <= ch.ord && ch.ord <= 0x309F
+    end
+    def katakana(ch)
+      0x30A0 <= ch.ord && ch.ord <= 0x30FF
+    end
+    def space?(ch)
+      !(ch =~ /^\s*$/).nil?
+    end
+    def integer?(ch)
+      Integer(ch)
+      Integer(ch)
+      true
+    rescue ArgumentError
+      false
+    end
+    def lower?(ch)
+      ch == ch.downcase
+    end
+    def upper?(ch)
+      ch == ch.upcase
+    end
+    def chara_type(ch)
+      if space?(ch) then 'ZSPACE'
+      elsif integer?(ch) then 'ZDIGIT'
+      elsif lower?(ch) then 'ZLLET'
+      elsif upper?(ch) then 'ZULET'
+      elsif hiragana?(ch) then 'HIRAG'
+      elsif katakana?(ch) then 'KATAK'
+      else 'OTHER'
+      end
+    end
+    def chara_types(str)
+      types = str.each_char.to_a.map do |ch|
+        chara_type(ch)
+      end
+      types.uniq.sort().join('-')
+    end
+    def extract_pos(morph)
+      idx = morph.index('*')
+      morph[1, idx-1].join('-')
+    end
+    def word2features(sent, i)
+      word = sent[i][0]
+      chtype = chara_types(sent[i][0])
+      postag = extract_pos(sent[i])
+      features = [
+        'bias',
+        'word=' + word,
+        'type=' + chtype,
+        'pos_tag=' + postag
+      ]
+      if i >= 2
+        word2 = sent[i-2][0]
+        chtype2 = chara_types(sent[i-2][0])
+        postag2 = extract_pos(sent[i-2])
+        iobtag2 = sent[i-2][-1]
+        features.push(*[
+          '-2:word=' + word2,
+          '-2:type=' + chtype2,
+          '-2:postag=' + postag2,
+          '-2:iobtag=' + iobtag2
+        ])
+      else
+        features.push('BOS')
+      end
+      if i >= 1
+        word1 = sent[i-1][0]
+        chtype1 = chara_types(sent[i-1][0])
+        postag1 = extract_pos(sent[i-1])
+        iobtag1 = sent[i-1][-1]
+        features.push(*[
+          '-1:word=' + word1,
+          '-1:type=' + chtype1,
+          '-1:postag=' + postag1,
+          '-1:iobtag=' + iobtag1
+        ])
+      else
+        features.push('BOS')
+      end
+      if i < sent.length - 1
+        word1 = sent[i+1][0]
+        chtype1 = chara_types(sent[i+1][0])
+        postag1 = extract_pos(sent[i+1])
+        features.push(*[
+          '+1:word=' + word1,
+          '+1:type=' + chtype1,
+          '+1:postag=' + postag1
+        ])
+      else
+        features.push('EOS')
+      end
+      if i < sent.length - 2
+        word2 = sent[i+2][0]
+        chtype2 = chara_types(sent[i+2][0])
+        postag2 = extract_pos(sent[i+2])
+        features.push(*[
+          '+2:word=' + word2,
+          '+2:type=' + chtype2,
+          '+2:postag=' + postag2
+        ])
+      else
+        features.push('EOS')
+      end
+      return features
+    end
+    def sent2features(sent)
+      (0..(sent.length)).to_a.map do |i|
+        return word2features(sent, i)
+      end
+    end
+    def sent2labels(sent)
+      sent.map do |morph|
+        morph[-1]
+      end
+    end
+    def sent2tokens(sent)
+      sent.map do |morph|
+        morph[0]
+      end
+    end
+    def corpus_read
+      sents, sent = [], []
+      File.open(FNAME, 'r') do |file|
+        file.each_line do |line|
+          if line == "\n"
+            sents.push(sent)
+            sent = []
+            next
+          end
+          morph_info = line.strip().split(' ')
+          sent.push(morph_info)
+        end
+      end
+      sents
+    end
+  end
+end

data/lib/datte/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Datte
-  VERSION = "0.1.0"
+  VERSION = "0.3.0"
 end

data/lib/datte.rb CHANGED Viewed

@@ -4,3 +4,4 @@ require File.join(File.dirname(__FILE__), 'datte', 'parser')
 require File.join(File.dirname(__FILE__), 'datte', 'date_parser')
 require File.join(File.dirname(__FILE__), 'datte', 'dattetime')
 require File.join(File.dirname(__FILE__), 'datte', 'datetime_table')
+require File.join(File.dirname(__FILE__), 'datte', 'train')

data/spec/datte_spec.rb CHANGED Viewed

@@ -5,7 +5,13 @@ describe Datte do
     expect(Datte::VERSION).not_to be nil
   end
-  it "does something useful" do
-    expect(false).to eq(true)
+  describe "::Parser" do
+    datte = Datte::Parser.new
+    it "2016/11/1日に遊ぼー" do
+      body = "2016/11/1日に遊ぼー"
+      d = DateTime.now
+      expect(datte.parse_date(body)).to eq(DateTime.new(2016, 11, 1, d.hour, 0, 0))
+    end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: datte
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.3.0
 platform: ruby
 authors:
 - pokohide
@@ -73,10 +73,12 @@ files:
 - bin/setup
 - datte.gemspec
 - lib/datte.rb
+- lib/datte/data.txt
 - lib/datte/date_parser.rb
 - lib/datte/datetime_table.rb
 - lib/datte/dattetime.rb
 - lib/datte/parser.rb
+- lib/datte/train.rb
 - lib/datte/version.rb
 - spec/datte_spec.rb
 - spec/spec_helper.rb