date_extractor 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6ccaf1114d7c608b8f2cccb38d902db374839807
4
+ data.tar.gz: f101665c1df1a41c6c4b7eeda73de833e5ff6ebf
5
+ SHA512:
6
+ metadata.gz: 46b468a50ed84fbf06888fff0447acd7ad602aaf2919e89e9d686cd098240ed28b4be8d740553370a28d2d70a9299479e1d5912f1aba4246b4a3327eb2f1b8a1
7
+ data.tar.gz: 58f2827dc897ee64acde531cd6dbd77a5fbb144abcf7a6c24a5e19aa188d57cad7d7557fffc92e60f2058fcf7500141a997bb55616249bfaeda54b7e4f109868
@@ -0,0 +1,8 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.4.1
5
+ before_install: gem install bundler -v 1.16.0.pre.2
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in date_extractor.gemspec
6
+ gemspec
@@ -0,0 +1,41 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ date_extractor (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ coderay (1.1.2)
10
+ diff-lcs (1.2.5)
11
+ method_source (0.9.0)
12
+ pry (0.11.1)
13
+ coderay (~> 1.1.0)
14
+ method_source (~> 0.9.0)
15
+ rake (10.5.0)
16
+ rspec (3.5.0)
17
+ rspec-core (~> 3.5.0)
18
+ rspec-expectations (~> 3.5.0)
19
+ rspec-mocks (~> 3.5.0)
20
+ rspec-core (3.5.4)
21
+ rspec-support (~> 3.5.0)
22
+ rspec-expectations (3.5.0)
23
+ diff-lcs (>= 1.2.0, < 2.0)
24
+ rspec-support (~> 3.5.0)
25
+ rspec-mocks (3.5.0)
26
+ diff-lcs (>= 1.2.0, < 2.0)
27
+ rspec-support (~> 3.5.0)
28
+ rspec-support (3.5.0)
29
+
30
+ PLATFORMS
31
+ ruby
32
+
33
+ DEPENDENCIES
34
+ bundler (~> 1.16.a)
35
+ date_extractor!
36
+ pry
37
+ rake (~> 10.0)
38
+ rspec (~> 3.0)
39
+
40
+ BUNDLED WITH
41
+ 1.16.0.pre.2
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 Nao Minami
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,49 @@
1
+ # DateExtractor
2
+
3
+ Extract dates from a string.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'date_extractor'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install date_extractor
20
+
21
+ ## Usage
22
+
23
+ ```ruby
24
+ body = "6月27日16時以降、28日14時~16時、29日13時~17時、30日13時以降"
25
+ DateExtractor.extract(body, fallback_year: 2017)
26
+ #=> [
27
+ # ["6月27日16時以降", "28日14時~16時", "29日13時~17時", "30日13時以降"],
28
+ # [
29
+ # [Date.new(2017, 6, 27), DateTime.new(2017, 6, 27, 16, 0), nil],
30
+ # [Date.new(2017, 6, 28), DateTime.new(2017, 6, 28, 14, 0), DateTime.new(2017, 6, 28, 16, 0)],
31
+ # [Date.new(2017, 6, 29), DateTime.new(2017, 6, 29, 13, 0), DateTime.new(2017, 6, 29, 17, 0)],
32
+ # [Date.new(2017, 6, 30), DateTime.new(2017, 6, 30, 13, 0), nil],
33
+ # ]
34
+ # ]
35
+ ```
36
+
37
+ ## Development
38
+
39
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake true` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
40
+
41
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
42
+
43
+ ## Contributing
44
+
45
+ Bug reports and pull requests are welcome on GitHub at https://github.com/south37/date_extractor.
46
+
47
+ ## License
48
+
49
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "date_extractor"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,28 @@
1
+
2
+ lib = File.expand_path("../lib", __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "date_extractor/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "date_extractor"
8
+ spec.version = DateExtractor::VERSION
9
+ spec.authors = ["Nao Minami"]
10
+ spec.email = ["south37777@gmail.com"]
11
+
12
+ spec.summary = %q{Extract dates from a text.}
13
+ spec.description = %q{Extract dates from a text.}
14
+ spec.homepage = "https://github.com/south37"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
18
+ f.match(%r{^(test|spec|features)/})
19
+ end
20
+ spec.bindir = "exe"
21
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
22
+ spec.require_paths = ["lib"]
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.16.a"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec", "~> 3.0"
27
+ spec.add_development_dependency "pry"
28
+ end
@@ -0,0 +1,305 @@
1
+ require "date"
2
+ require "date_extractor/version"
3
+
4
+ module DateExtractor
5
+ RANGE_RE = /
6
+ [-~〜~ー]
7
+ /x
8
+
9
+ NUMBER_RE = /
10
+ (?:\d+)|(?:[0-9]+)
11
+ /x
12
+
13
+ # NOTE: Use `(?!間)` to reject `"〜時間"`
14
+ START_CHINESE_CHARACTER_TIME_RE = /
15
+ (?<start_hour>#{NUMBER_RE})時(?!間)
16
+ (?:
17
+ (?<start_min>#{NUMBER_RE}分)
18
+ |
19
+ (?<start_half_hour_unit>半)
20
+ )?
21
+ /x
22
+ END_CHINESE_CHARACTER_TIME_RE = /
23
+ (?<end_hour>#{NUMBER_RE})時(?!間)
24
+ (?:
25
+ (?<end_min>#{NUMBER_RE}分)
26
+ |
27
+ (?<end_half_hour_unit>半)
28
+ )?
29
+ /x
30
+
31
+ TIMESLOT_RE1 = /
32
+ (?<start_hour>#{NUMBER_RE}+)[:;](?<start_min>#{NUMBER_RE})
33
+ \s*
34
+ #{RANGE_RE}?
35
+ \s*
36
+ (?:
37
+ (?<end_hour>#{NUMBER_RE})[:;](?<end_min>#{NUMBER_RE})
38
+ )?
39
+ /x
40
+
41
+ TIMESLOT_RE2 = /
42
+ #{START_CHINESE_CHARACTER_TIME_RE}以降
43
+ /x
44
+
45
+ TIMESLOT_RE3 = /
46
+ #{START_CHINESE_CHARACTER_TIME_RE}
47
+ \s*
48
+ #{RANGE_RE}?
49
+ \s*
50
+ (?:#{END_CHINESE_CHARACTER_TIME_RE})?
51
+ /x
52
+
53
+ TIMESLOT_RE4 = /
54
+ (?:朝)?
55
+ #{RANGE_RE}
56
+ \s*
57
+ (?:
58
+ (?<end_hour>#{NUMBER_RE})[:;](?<end_min>#{NUMBER_RE})
59
+ )
60
+ /x
61
+
62
+ TIMESLOT_RE = /
63
+ (?:#{TIMESLOT_RE1})|(?:#{TIMESLOT_RE2})|(?:#{TIMESLOT_RE3}|(?:#{TIMESLOT_RE4}))
64
+ /x
65
+
66
+ WDAY_RE = /
67
+ (?:
68
+ \([^()]+\)
69
+ )
70
+ |
71
+ (?:
72
+ ([^()]+)
73
+ )
74
+ /x
75
+
76
+ DAY_RE1 = /
77
+ (?<year>#{NUMBER_RE})\/(?<month>#{NUMBER_RE})\/(?<day>#{NUMBER_RE})
78
+ \s*
79
+ (?:#{WDAY_RE})?
80
+ \s*
81
+ (?:#{TIMESLOT_RE})?
82
+ /x
83
+
84
+ DAY_RE2 = /
85
+ (?<month>#{NUMBER_RE})\/(?<day>#{NUMBER_RE})
86
+ \s*
87
+ (?:#{WDAY_RE})?
88
+ \s*
89
+ (?:#{TIMESLOT_RE})?
90
+ /x
91
+
92
+ DAY_RE3 = /
93
+ (?<month>#{NUMBER_RE})月(?<day>#{NUMBER_RE})日
94
+ \s*
95
+ (?:#{WDAY_RE})?
96
+ \s*
97
+ (?:#{TIMESLOT_RE})?
98
+ /x
99
+
100
+ DAY_RE = /(?:#{DAY_RE1})|(?:#{DAY_RE2})|(?:#{DAY_RE3})/x
101
+
102
+ # NOTE: Use `(?!(?:間)|(?:ほど))` to reject `~日間` and `~日ほど`
103
+ ONLY_DAY_RE = /
104
+ (?<day>#{NUMBER_RE})日
105
+ (?!(?:間)|(?:ほど))
106
+ \s*
107
+ (?:#{WDAY_RE})?
108
+ \s*
109
+ (?:#{TIMESLOT_RE})?
110
+ /x
111
+
112
+ RE = /(?:#{DAY_RE})|(?:#{ONLY_DAY_RE})/x
113
+
114
+ def self.get_match_and_positions(body, re)
115
+ body.to_enum(:scan, re).map { [Regexp.last_match, Regexp.last_match.begin(0), Regexp.last_match.end(0)] }
116
+ end
117
+
118
+ def self.get_hour_from_timeslot_match(match)
119
+ begin
120
+ start_hour = to_downer_letter(match[:start_hour])
121
+ rescue
122
+ start_hour = nil
123
+ end
124
+
125
+ begin
126
+ start_min = to_downer_letter(match[:start_min])
127
+ rescue
128
+ if match.names.include?('start_half_hour_unit') && match[:start_half_hour_unit] == '半'
129
+ start_min = 30
130
+ else
131
+ start_min = nil
132
+ end
133
+ end
134
+
135
+ begin
136
+ end_hour = to_downer_letter(match[:end_hour])
137
+ rescue
138
+ end_hour = nil
139
+ end
140
+
141
+ begin
142
+ end_min = to_downer_letter(match[:end_min])
143
+ rescue
144
+ if match.names.include?('end_half_hour_unit') && match[:end_half_hour_unit] == '半'
145
+ end_min = 30
146
+ else
147
+ end_min = nil
148
+ end
149
+ end
150
+
151
+ [start_hour, start_min, end_hour, end_min]
152
+ end
153
+
154
+ def self.create_datetime_if_exists(year, month, day, hour, min)
155
+ if !hour.nil?
156
+ begin
157
+ result = DateTime.new(year, month, day, hour.to_i, min.to_i)
158
+ rescue
159
+ result = nil
160
+ end
161
+ else
162
+ result = nil
163
+ end
164
+ result
165
+ end
166
+
167
+ # @return [[MatchData, Date | NilClass, DateTime | NilClass, DateTime | NilClass]]
168
+ # If month is not specified, fallback_month is used as month. This value is
169
+ # updated by discovering other month specification. Same for fallback_year.
170
+ def self.days_from_matches(matches, fallback_month, fallback_year, debug: false)
171
+ matches.map do |match|
172
+ begin
173
+ year = to_downer_letter(match[:year])
174
+ fallback_year = year
175
+ rescue
176
+ year = fallback_year
177
+ end
178
+
179
+ # When ONLY_DAY_RE is used, month is nil
180
+ begin
181
+ month = to_downer_letter(match[:month]).to_i
182
+ fallback_month = month
183
+ rescue
184
+ month = fallback_month
185
+ end
186
+
187
+ day = to_downer_letter(match[:day]).to_i
188
+
189
+ start_hour, start_min, end_hour, end_min = get_hour_from_timeslot_match(match)
190
+
191
+ begin
192
+ date = Date.new(year, month, day)
193
+ rescue
194
+ date = nil
195
+ end
196
+
197
+ start_t = self.create_datetime_if_exists(year, month, day, start_hour, start_min)
198
+ end_t = self.create_datetime_if_exists(year, month, day, end_hour, end_min)
199
+
200
+ if !date.nil?
201
+ [match, date, start_t, end_t]
202
+ else
203
+ [match, nil, nil, nil]
204
+ end
205
+ end
206
+ end
207
+
208
+ # days contains day whidh has same index with timeslots_container
209
+ def self.days_from_timeslot_matches(timeslots_container, day_with_hours)
210
+ result = []
211
+
212
+ day_with_hours.each_with_index do |day_with_hour, i|
213
+ result.push(day_with_hour)
214
+ _, day, _, _ = day_with_hour #
215
+ next if day.nil?
216
+
217
+ timeslot_matches = timeslots_container[i]
218
+ next if (timeslot_matches.size == 0)
219
+
220
+ timeslot_matches.each do |timeslot_match|
221
+ start_hour, start_min, end_hour, end_min = self.get_hour_from_timeslot_match(timeslot_match)
222
+
223
+ start_t = self.create_datetime_if_exists(day.year, day.month, day.day, start_hour, start_min)
224
+ end_t = self.create_datetime_if_exists(day.year, day.month, day.day, end_hour, end_min)
225
+
226
+ result.push([timeslot_match, day, start_t, end_t])
227
+ end
228
+ end
229
+
230
+ result
231
+ end
232
+
233
+ def self.to_downer_letter(upper_or_downer_letter)
234
+ upper_or_downer_letter.split('').map do |c|
235
+ if /[0-9]/.match(c)
236
+ (c.ord - "0".ord).to_s
237
+ else
238
+ c
239
+ end
240
+ end.join
241
+ end
242
+
243
+ # @param [String] body
244
+ # @param [Integer | NilClass] fallback_month
245
+ # @param [Integer | NilClass] fallback_year
246
+ # @param [Boolean] debug
247
+ # @return [[String], [[Date, DateTime | NilClass, DateTime | NilClass]] matched strings and dates
248
+ def self.extract(body, fallback_month: nil, fallback_year: nil, debug: false)
249
+ today = Date.today
250
+ fallback_month ||= Date.today.month
251
+ fallback_year ||= Date.today.year
252
+
253
+ day_matches = get_match_and_positions(body, RE) # [[MatchData, start, end], [...], ...]
254
+
255
+ day_with_hours = days_from_matches(day_matches.map(&:first), fallback_month, fallback_year, debug: debug) # [[MatchData, Date, DateTime, DateTime], [MatchData, Date, DateTime, nil]...]
256
+ day_with_hours_size = day_matches.size
257
+
258
+ timeslots_container = Array.new(day_with_hours_size) { Array.new } # contains timeslots in each day
259
+
260
+ timeslot_matches = get_match_and_positions(body, TIMESLOT_RE) # [[MatchData, start, end], [...], ...]
261
+ timeslot_matches.each do |(timeslot_match, start_pos, end_pos)|
262
+ i = 0 # index of left_day
263
+
264
+ while i < day_with_hours_size
265
+ left_day = day_with_hours[i]
266
+ if left_day[1].nil? # If failed to `Date.new(~)`, nil is set to left_day[1] which is `Date`
267
+ i += 1
268
+ next end
269
+
270
+ right_day = day_with_hours[i+1]
271
+ if !right_day.nil? && right_day[1].nil? # When failed to `Date.new(~)`
272
+ right_day = day_with_hours[i+2]
273
+ end
274
+
275
+ if right_day.nil? # left_day is on the last
276
+ # Check if timeslot is on the right of left_day
277
+ if left_day[0].end(0) <= start_pos
278
+ timeslots_container[i].push timeslot_match
279
+ end
280
+ else
281
+ # Check if timeslot is between left_day and right_day
282
+ if left_day[0].end(0) <= start_pos && (end_pos - 1) < right_day[0].begin(0)
283
+ timeslots_container[i].push timeslot_match
284
+ end
285
+ end
286
+
287
+ i += 1
288
+ end
289
+ end
290
+
291
+ days_from_timeslots = days_from_timeslot_matches(timeslots_container, day_with_hours) # days contains day whidh has same index with timeslots_container
292
+
293
+ result_datetimes = days_from_timeslots.map { |(match, day, start_t, end_t)| [day, start_t, end_t] }
294
+ result_strs = days_from_timeslots.map { |(match, _, _, _)| match&.[](0) }
295
+
296
+ if !debug # Reject nil dates
297
+ exists = result_datetimes.map { |arr| !arr[0].nil? }
298
+ result_strs = result_strs.select.with_index { |str, i| exists[i] }
299
+ result_datetimes = result_datetimes.select.with_index { |arr, i| exists[i] }
300
+ [result_strs, result_datetimes]
301
+ else
302
+ [result_strs, result_datetimes]
303
+ end
304
+ end
305
+ end
@@ -0,0 +1,3 @@
1
+ module DateExtractor
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: date_extractor
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Nao Minami
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-09-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.16.a
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.16.a
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: Extract dates from a text.
70
+ email:
71
+ - south37777@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".travis.yml"
78
+ - Gemfile
79
+ - Gemfile.lock
80
+ - LICENSE.txt
81
+ - README.md
82
+ - Rakefile
83
+ - bin/console
84
+ - bin/setup
85
+ - date_extractor.gemspec
86
+ - lib/date_extractor.rb
87
+ - lib/date_extractor/version.rb
88
+ homepage: https://github.com/south37
89
+ licenses:
90
+ - MIT
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.5.1
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: Extract dates from a text.
112
+ test_files: []