daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
@@ -0,0 +1,569 @@
|
|
1
|
+
module DaruLite
|
2
|
+
# Private module for storing helper functions for DateTimeIndex.
|
3
|
+
# @private
|
4
|
+
module DateTimeIndexHelper
|
5
|
+
class << self
|
6
|
+
OFFSETS_HASH = {
|
7
|
+
'S' => DaruLite::Offsets::Second,
|
8
|
+
'M' => DaruLite::Offsets::Minute,
|
9
|
+
'H' => DaruLite::Offsets::Hour,
|
10
|
+
'D' => DaruLite::Offsets::Day,
|
11
|
+
'W' => DaruLite::Offsets::Week,
|
12
|
+
'MONTH' => DaruLite::Offsets::Month,
|
13
|
+
'MB' => DaruLite::Offsets::MonthBegin,
|
14
|
+
'ME' => DaruLite::Offsets::MonthEnd,
|
15
|
+
'YEAR' => DaruLite::Offsets::Year,
|
16
|
+
'YB' => DaruLite::Offsets::YearBegin,
|
17
|
+
'YE' => DaruLite::Offsets::YearEnd
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
TIME_INTERVALS = {
|
21
|
+
Rational(1, 1) => DaruLite::Offsets::Day,
|
22
|
+
Rational(1, 24) => DaruLite::Offsets::Hour,
|
23
|
+
Rational(1, 1440) => DaruLite::Offsets::Minute,
|
24
|
+
Rational(1, 86_400) => DaruLite::Offsets::Second
|
25
|
+
}.freeze
|
26
|
+
|
27
|
+
DOW_REGEXP = Regexp.new(DaruLite::DAYS_OF_WEEK.keys.join('|'))
|
28
|
+
FREQUENCY_PATTERN = /^
|
29
|
+
(?<multiplier>[0-9]+)?
|
30
|
+
(
|
31
|
+
(?<offset>MONTH|YEAR|S|H|MB|ME|M|D|YB|YE) |
|
32
|
+
(?<offset>W)(-(?<weekday>#{DOW_REGEXP}))?
|
33
|
+
)$/x.freeze
|
34
|
+
|
35
|
+
# Generates a DaruLite::DateOffset object for generic offsets or one of the
|
36
|
+
# specialized classed within DaruLite::Offsets depending on the 'frequency'
|
37
|
+
# string.
|
38
|
+
def offset_from_frequency(frequency)
|
39
|
+
return frequency if frequency.is_a?(DaruLite::DateOffset)
|
40
|
+
|
41
|
+
frequency ||= 'D'
|
42
|
+
|
43
|
+
matched = FREQUENCY_PATTERN.match(frequency) or
|
44
|
+
raise ArgumentError, "Invalid frequency string #{frequency}"
|
45
|
+
|
46
|
+
n = (matched[:multiplier] || 1).to_i
|
47
|
+
offset_string = matched[:offset]
|
48
|
+
offset_klass = OFFSETS_HASH[offset_string] or
|
49
|
+
raise ArgumentError, "Cannont interpret offset #{offset_string}"
|
50
|
+
|
51
|
+
if offset_string == 'W'
|
52
|
+
offset_klass.new(n, weekday: DaruLite::DAYS_OF_WEEK[matched[:weekday]])
|
53
|
+
else
|
54
|
+
offset_klass.new(n)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def coerce_date(date)
|
59
|
+
return date unless date.is_a?(String)
|
60
|
+
|
61
|
+
date_time_from(date, determine_date_precision_of(date))
|
62
|
+
end
|
63
|
+
|
64
|
+
def begin_from_offset?(offset, start)
|
65
|
+
offset.is_a?(DaruLite::Offsets::Tick) ||
|
66
|
+
(offset.respond_to?(:on_offset?) && offset.on_offset?(start))
|
67
|
+
end
|
68
|
+
|
69
|
+
def generate_data(start, en, offset, periods)
|
70
|
+
data = []
|
71
|
+
new_date = begin_from_offset?(offset, start) ? start : offset + start
|
72
|
+
|
73
|
+
if periods.nil? # use end
|
74
|
+
loop do
|
75
|
+
break if new_date > en
|
76
|
+
|
77
|
+
data << new_date
|
78
|
+
new_date = offset + new_date
|
79
|
+
end
|
80
|
+
else
|
81
|
+
periods.times do
|
82
|
+
data << new_date
|
83
|
+
new_date = offset + new_date
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
data
|
88
|
+
end
|
89
|
+
|
90
|
+
def verify_start_and_end(start, en)
|
91
|
+
raise ArgumentError, 'Start and end cannot be the same' if start == en
|
92
|
+
raise ArgumentError, 'Start must be lesser than end' if start > en
|
93
|
+
raise ArgumentError, 'Only same time zones are allowed' if start.zone != en.zone
|
94
|
+
end
|
95
|
+
|
96
|
+
def infer_offset(data)
|
97
|
+
diffs = data.each_cons(2).map { |d1, d2| d2 - d1 }
|
98
|
+
|
99
|
+
return nil unless diffs.uniq.count == 1
|
100
|
+
|
101
|
+
return TIME_INTERVALS[diffs.first].new if TIME_INTERVALS.include?(diffs.first)
|
102
|
+
|
103
|
+
number_of_seconds = diffs.first / DaruLite::Offsets::Second.new.multiplier
|
104
|
+
DaruLite::Offsets::Second.new(number_of_seconds.numerator) if number_of_seconds.denominator == 1
|
105
|
+
end
|
106
|
+
|
107
|
+
def find_index_of_date(data, date_time)
|
108
|
+
searched = data.bsearch { |d| d[0] >= date_time }
|
109
|
+
raise(ArgumentError, "Cannot find #{date_time}") if searched.nil? || searched[0] != date_time
|
110
|
+
|
111
|
+
searched[1]
|
112
|
+
end
|
113
|
+
|
114
|
+
def find_date_string_bounds(date_string)
|
115
|
+
date_precision = determine_date_precision_of date_string
|
116
|
+
date_time = date_time_from date_string, date_precision
|
117
|
+
generate_bounds date_time, date_precision
|
118
|
+
end
|
119
|
+
|
120
|
+
def date_time_from(date_string, date_precision)
|
121
|
+
case date_precision
|
122
|
+
when :year
|
123
|
+
DateTime.new(date_string.gsub(/[^0-9]/, '').to_i)
|
124
|
+
when :month
|
125
|
+
DateTime.new(
|
126
|
+
date_string.match(/\d\d\d\d/).to_s.to_i,
|
127
|
+
date_string.match(/-\d?\d/).to_s.delete('-').to_i
|
128
|
+
)
|
129
|
+
else
|
130
|
+
DateTime.parse date_string
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
DATE_PRECISION_REGEXP = /^(\d\d\d\d)(-\d{1,2}(-\d{1,2}( \d{1,2}(:\d{1,2}(:\d{1,2})?)?)?)?)?$/.freeze
|
135
|
+
DATE_PRECISIONS = [nil, :year, :month, :day, :hour, :min, :sec].freeze
|
136
|
+
|
137
|
+
def determine_date_precision_of(date_string)
|
138
|
+
components = date_string.scan(DATE_PRECISION_REGEXP).flatten.compact
|
139
|
+
DATE_PRECISIONS[components.count] or
|
140
|
+
raise ArgumentError, "Unacceptable date string #{date_string}"
|
141
|
+
end
|
142
|
+
|
143
|
+
def generate_bounds(date_time, date_precision) # rubocop:disable Metrics/MethodLength
|
144
|
+
# FIXME: about that ^ disable: I'd like to use my zverok/time_boots here, which will simplify things
|
145
|
+
case date_precision
|
146
|
+
when :year
|
147
|
+
[
|
148
|
+
date_time,
|
149
|
+
DateTime.new(date_time.year, 12, 31, 23, 59, 59)
|
150
|
+
]
|
151
|
+
when :month
|
152
|
+
[
|
153
|
+
date_time,
|
154
|
+
DateTime.new(date_time.year, date_time.month, ((date_time >> 1) - 1).day,
|
155
|
+
23, 59, 59)
|
156
|
+
]
|
157
|
+
when :day
|
158
|
+
[
|
159
|
+
date_time,
|
160
|
+
DateTime.new(date_time.year, date_time.month, date_time.day, 23, 59, 59)
|
161
|
+
]
|
162
|
+
when :hour
|
163
|
+
[
|
164
|
+
date_time,
|
165
|
+
DateTime.new(date_time.year, date_time.month, date_time.day,
|
166
|
+
date_time.hour, 59, 59)
|
167
|
+
]
|
168
|
+
when :min
|
169
|
+
[
|
170
|
+
date_time,
|
171
|
+
DateTime.new(date_time.year, date_time.month, date_time.day,
|
172
|
+
date_time.hour, date_time.min, 59)
|
173
|
+
]
|
174
|
+
else # second or when precision is same as offset
|
175
|
+
[date_time, date_time]
|
176
|
+
end
|
177
|
+
end
|
178
|
+
|
179
|
+
def possibly_convert_to_date_time(data)
|
180
|
+
data[0].is_a?(String) ? data.map! { |e| DateTime.parse(e) } : data
|
181
|
+
end
|
182
|
+
|
183
|
+
def last_date(data)
|
184
|
+
data.max_by { |d| d[1] }
|
185
|
+
end
|
186
|
+
|
187
|
+
def key_out_of_bounds?(key, data)
|
188
|
+
dates = data.transpose.first
|
189
|
+
|
190
|
+
precision = determine_date_precision_of key
|
191
|
+
date_time = date_time_from key, precision
|
192
|
+
|
193
|
+
# FIXME: I'm pretty suspicious about logic here:
|
194
|
+
# why only year & month? - zverok 2016-05-16
|
195
|
+
|
196
|
+
case precision
|
197
|
+
when :year
|
198
|
+
year_out_of_bounds?(date_time, dates)
|
199
|
+
when :month
|
200
|
+
year_month_out_of_bounds?(date_time, dates)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
private
|
205
|
+
|
206
|
+
def year_out_of_bounds?(date_time, dates)
|
207
|
+
date_time.year < dates.first.year || date_time.year > dates.last.year
|
208
|
+
end
|
209
|
+
|
210
|
+
def year_month_out_of_bounds?(date_time, dates)
|
211
|
+
(date_time.year < dates.first.year && date_time.month < dates.first.month) ||
|
212
|
+
(date_time.year > dates.last.year && date_time.month > dates.last.month)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
class DateTimeIndex < Index
|
218
|
+
include Enumerable
|
219
|
+
Helper = DateTimeIndexHelper
|
220
|
+
|
221
|
+
def self.try_create(source)
|
222
|
+
new(source, freq: :infer) if source && ArrayHelper.array_of?(source, ::DateTime)
|
223
|
+
end
|
224
|
+
|
225
|
+
def each(&block)
|
226
|
+
to_a.each(&block)
|
227
|
+
end
|
228
|
+
|
229
|
+
attr_reader :frequency, :offset, :periods, :keys
|
230
|
+
|
231
|
+
# Create a DateTimeIndex with or without a frequency in data. The constructor
|
232
|
+
# should be used for creating DateTimeIndex by directly passing in DateTime
|
233
|
+
# objects or date-like strings, typically in cases where values with frequency
|
234
|
+
# are not needed.
|
235
|
+
#
|
236
|
+
# @param [Array<String>, Array<DateTime>] data Array of date-like Strings or
|
237
|
+
# actual DateTime objects for creating the DateTimeIndex.
|
238
|
+
# @param [Hash] opts Hash of options for configuring index.
|
239
|
+
# @option opts [Symbol, NilClass, String, DaruLite::DateOffset, DaruLite::Offsets::*] freq
|
240
|
+
# Option for specifying the frequency of data, if applicable. If `:infer` is
|
241
|
+
# passed to this option, daru will try to infer the frequency of the data
|
242
|
+
# by itself.
|
243
|
+
#
|
244
|
+
# @example Usage of DateTimeIndex constructor
|
245
|
+
# index = DaruLite::DateTimeIndex.new(
|
246
|
+
# [DateTime.new(2012,4,5), DateTime.new(2012,4,6),
|
247
|
+
# DateTime.new(2012,4,7), DateTime.new(2012,4,8)])
|
248
|
+
# #=>#<DateTimeIndex:84232240 offset=nil periods=4 data=[2012-04-05T00:00:00+00:00...2012-04-08T00:00:00+00:00]>
|
249
|
+
#
|
250
|
+
# index = DaruLite::DateTimeIndex.new([
|
251
|
+
# DateTime.new(2012,4,5), DateTime.new(2012,4,6), DateTime.new(2012,4,7),
|
252
|
+
# DateTime.new(2012,4,8), DateTime.new(2012,4,9), DateTime.new(2012,4,10),
|
253
|
+
# DateTime.new(2012,4,11), DateTime.new(2012,4,12)], freq: :infer)
|
254
|
+
# #=>#<DateTimeIndex:84198340 offset=D periods=8 data=[2012-04-05T00:00:00+00:00...2012-04-12T00:00:00+00:00]>
|
255
|
+
def initialize(data, opts = { freq: nil })
|
256
|
+
super(data)
|
257
|
+
Helper.possibly_convert_to_date_time data
|
258
|
+
|
259
|
+
@offset =
|
260
|
+
case opts[:freq]
|
261
|
+
when :infer then Helper.infer_offset(data)
|
262
|
+
when nil then nil
|
263
|
+
else Helper.offset_from_frequency(opts[:freq])
|
264
|
+
end
|
265
|
+
|
266
|
+
@frequency = @offset&.freq_string
|
267
|
+
@data = data.each_with_index.to_a.sort_by(&:first)
|
268
|
+
|
269
|
+
@periods = data.size
|
270
|
+
end
|
271
|
+
|
272
|
+
# Custom dup method for DateTimeIndex
|
273
|
+
def dup
|
274
|
+
DaruLite::DateTimeIndex.new(@data.transpose[0], freq: @offset)
|
275
|
+
end
|
276
|
+
|
277
|
+
# Create a date range by specifying the start, end, periods and frequency
|
278
|
+
# of the data.
|
279
|
+
#
|
280
|
+
# @param [Hash] opts Options hash to create the date range with
|
281
|
+
# @option opts [String, DateTime] :start A DateTime object or date-like
|
282
|
+
# string that defines the start of the date range.
|
283
|
+
# @option opts [String, DateTime] :end A DateTime object or date-like string
|
284
|
+
# that defines the end of the date range.
|
285
|
+
# @option opts [String, DaruLite::DateOffset, DaruLite::Offsets::*] :freq ('D') The interval
|
286
|
+
# between each date in the index. This can either be a string specifying
|
287
|
+
# the frequency (i.e. one of the frequency aliases) or an offset object.
|
288
|
+
# @option opts [Integer] :periods The number of periods that should go into
|
289
|
+
# this index. Takes precedence over `:end`.
|
290
|
+
# @return [DateTimeIndex] DateTimeIndex object of the specified parameters.
|
291
|
+
#
|
292
|
+
# == Notes
|
293
|
+
#
|
294
|
+
# If you specify :start and :end options as strings, they can be complete or
|
295
|
+
# partial dates and daru will intelligently infer the date from the string
|
296
|
+
# directly. However, note that the date-like string must be in the format
|
297
|
+
# `YYYY-MM-DD HH:MM:SS`.
|
298
|
+
#
|
299
|
+
# The string aliases supported by the :freq option are as follows:
|
300
|
+
#
|
301
|
+
# * 'S' - seconds
|
302
|
+
# * 'M' - minutes
|
303
|
+
# * 'H' - hours
|
304
|
+
# * 'D' - days
|
305
|
+
# * 'W' - Week (default) anchored on sunday
|
306
|
+
# * 'W-SUN' - Same as 'W'
|
307
|
+
# * 'W-MON' - Week anchored on monday
|
308
|
+
# * 'W-TUE' - Week anchored on tuesday
|
309
|
+
# * 'W-WED' - Week anchored on wednesday
|
310
|
+
# * 'W-THU' - Week anchored on thursday
|
311
|
+
# * 'W-FRI' - Week anchored on friday
|
312
|
+
# * 'W-SAT' - Week anchored on saturday
|
313
|
+
# * 'MONTH' - Month
|
314
|
+
# * 'YEAR' - One year
|
315
|
+
# * 'MB' - month begin
|
316
|
+
# * 'ME' - month end
|
317
|
+
# * 'YB' - year begin
|
318
|
+
# * 'YE' - year end
|
319
|
+
#
|
320
|
+
# Multiples of these can also be specified. For example '2S' for 2 seconds
|
321
|
+
# or '2ME' for two month end offsets.
|
322
|
+
#
|
323
|
+
# Currently the precision of DateTimeIndex is upto seconds only, though this
|
324
|
+
# will improve in the future.
|
325
|
+
#
|
326
|
+
# @example Creating date ranges
|
327
|
+
# DaruLite::DateTimeIndex.date_range(
|
328
|
+
# :start => DateTime.new(2014,5,1),
|
329
|
+
# :end => DateTime.new(2014,5,2), :freq => '6H')
|
330
|
+
# #=>#<DateTimeIndex:83600130 offset=H periods=5 data=[2014-05-01T00:00:00+00:00...2014-05-02T00:00:00+00:00]>
|
331
|
+
#
|
332
|
+
# DaruLite::DateTimeIndex.date_range(
|
333
|
+
# :start => '2012-5-2', :periods => 50, :freq => 'ME')
|
334
|
+
# #=> #<DateTimeIndex:83549940 offset=ME periods=50 data=[2012-05-31T00:00:00+00:00...2016-06-30T00:00:00+00:00]>
|
335
|
+
def self.date_range(opts = {})
|
336
|
+
start = Helper.coerce_date opts[:start]
|
337
|
+
en = Helper.coerce_date opts[:end]
|
338
|
+
Helper.verify_start_and_end(start, en) unless en.nil?
|
339
|
+
offset = Helper.offset_from_frequency opts[:freq]
|
340
|
+
data = Helper.generate_data start, en, offset, opts[:periods]
|
341
|
+
|
342
|
+
DateTimeIndex.new(data, freq: offset)
|
343
|
+
end
|
344
|
+
|
345
|
+
# Retreive a slice or a an individual index number from the index.
|
346
|
+
#
|
347
|
+
# @param key [String, DateTime] Specify a date partially (as a String) or
|
348
|
+
# completely to retrieve.
|
349
|
+
def [](*key)
|
350
|
+
return slice(*key) if key.size != 1
|
351
|
+
|
352
|
+
key = key[0]
|
353
|
+
case key
|
354
|
+
when Numeric
|
355
|
+
key
|
356
|
+
when DateTime
|
357
|
+
Helper.find_index_of_date(@data, key)
|
358
|
+
when Range
|
359
|
+
# FIXME: get_by_range is suspiciously close to just #slice,
|
360
|
+
# but one of specs fails when replacing it with just slice
|
361
|
+
get_by_range(key.first, key.last)
|
362
|
+
else
|
363
|
+
raise ArgumentError, "Key #{key} is out of bounds" if
|
364
|
+
Helper.key_out_of_bounds?(key, @data)
|
365
|
+
|
366
|
+
slice(*Helper.find_date_string_bounds(key))
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
def pos(*args)
|
371
|
+
# to filled
|
372
|
+
out = self[*args]
|
373
|
+
return out if out.is_a? Numeric
|
374
|
+
|
375
|
+
out.map { |date| self[date] }
|
376
|
+
end
|
377
|
+
|
378
|
+
def subset(*args)
|
379
|
+
self[*args]
|
380
|
+
end
|
381
|
+
|
382
|
+
def valid?(*args)
|
383
|
+
self[*args]
|
384
|
+
true
|
385
|
+
rescue IndexError
|
386
|
+
false
|
387
|
+
end
|
388
|
+
|
389
|
+
# Retrive a slice of the index by specifying first and last members of the slice.
|
390
|
+
#
|
391
|
+
# @param [String, DateTime] first Start of the slice as a string or DateTime.
|
392
|
+
# @param [String, DateTime] last End of the slice as a string or DateTime.
|
393
|
+
def slice(first, last)
|
394
|
+
if first.is_a?(Integer) && last.is_a?(Integer)
|
395
|
+
DateTimeIndex.new(to_a[first..last], freq: @offset)
|
396
|
+
else
|
397
|
+
first = Helper.find_date_string_bounds(first)[0] if first.is_a?(String)
|
398
|
+
last = Helper.find_date_string_bounds(last)[1] if last.is_a?(String)
|
399
|
+
|
400
|
+
slice_between_dates first, last
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# Return the DateTimeIndex as an Array of DateTime objects.
|
405
|
+
# @return [Array<DateTime>] Array of containing DateTimes.
|
406
|
+
def to_a
|
407
|
+
if @offset
|
408
|
+
@data
|
409
|
+
else
|
410
|
+
@data.sort_by(&:last)
|
411
|
+
end.transpose.first || []
|
412
|
+
end
|
413
|
+
|
414
|
+
# Size of index.
|
415
|
+
def size
|
416
|
+
@periods
|
417
|
+
end
|
418
|
+
|
419
|
+
def ==(other)
|
420
|
+
to_a == other.to_a
|
421
|
+
end
|
422
|
+
|
423
|
+
def inspect
|
424
|
+
meta = [@periods, @frequency ? "frequency=#{@frequency}" : nil].compact.join(', ')
|
425
|
+
return "#<#{self.class}(#{meta})>" if @data.empty?
|
426
|
+
|
427
|
+
"#<#{self.class}(#{meta}) " \
|
428
|
+
"#{@data.first[0]}...#{@data.last[0]}>"
|
429
|
+
end
|
430
|
+
|
431
|
+
# Shift all dates in the index by a positive number in the future. The dates
|
432
|
+
# are shifted by the same amount as that specified in the offset.
|
433
|
+
#
|
434
|
+
# @param [Integer, DaruLite::DateOffset, DaruLite::Offsets::*] distance Distance by
|
435
|
+
# which each date should be shifted. Passing an offset object to #shift
|
436
|
+
# will offset each data point by the offset value. Passing a positive
|
437
|
+
# integer will offset each data point by the same offset that it was
|
438
|
+
# created with.
|
439
|
+
# @return [DateTimeIndex] Returns a new, shifted DateTimeIndex object.
|
440
|
+
# @example Using the shift method
|
441
|
+
# index = DaruLite::DateTimeIndex.date_range(
|
442
|
+
# :start => '2012', :periods => 10, :freq => 'YEAR')
|
443
|
+
#
|
444
|
+
# # Passing a offset to shift
|
445
|
+
# index.shift(DaruLite::Offsets::Hour.new(3))
|
446
|
+
# #=>#<DateTimeIndex:84038960 offset=nil periods=10 data=[2012-01-01T03:00:00+00:00...2021-01-01T03:00:00+00:00]>
|
447
|
+
#
|
448
|
+
# # Pass an integer to shift
|
449
|
+
# index.shift(4)
|
450
|
+
# #=>#<DateTimeIndex:83979630 offset=YEAR periods=10 data=[2016-01-01T00:00:00+00:00...2025-01-01T00:00:00+00:00]>
|
451
|
+
def shift(distance)
|
452
|
+
distance.is_a?(Integer) && distance.negative? and
|
453
|
+
raise IndexError, "Distance #{distance} cannot be negative"
|
454
|
+
|
455
|
+
_shift(distance)
|
456
|
+
end
|
457
|
+
|
458
|
+
# Shift all dates in the index to the past. The dates are shifted by the same
|
459
|
+
# amount as that specified in the offset.
|
460
|
+
#
|
461
|
+
# @param [Integer, DaruLite::DateOffset, DaruLite::Offsets::*] distance Integer or
|
462
|
+
# DaruLite::DateOffset. Distance by which each date should be shifted. Passing
|
463
|
+
# an offset object to #lag will offset each data point by the offset value.
|
464
|
+
# Passing a positive integer will offset each data point by the same offset
|
465
|
+
# that it was created with.
|
466
|
+
# @return [DateTimeIndex] A new lagged DateTimeIndex object.
|
467
|
+
def lag(distance)
|
468
|
+
distance.is_a?(Integer) && distance.negative? and
|
469
|
+
raise IndexError, "Distance #{distance} cannot be negative"
|
470
|
+
|
471
|
+
_shift(-distance)
|
472
|
+
end
|
473
|
+
|
474
|
+
# :nocov:
|
475
|
+
def _dump(_depth)
|
476
|
+
Marshal.dump(data: to_a, freq: @offset)
|
477
|
+
end
|
478
|
+
|
479
|
+
def self._load(data)
|
480
|
+
h = Marshal.load data
|
481
|
+
|
482
|
+
DaruLite::DateTimeIndex.new(h[:data], freq: h[:freq])
|
483
|
+
end
|
484
|
+
# :nocov:
|
485
|
+
|
486
|
+
# @!method year
|
487
|
+
# @return [Array<Integer>] Array containing year of each index.
|
488
|
+
# @!method month
|
489
|
+
# @return [Array<Integer>] Array containing month of each index.
|
490
|
+
# @!method day
|
491
|
+
# @return [Array<Integer>] Array containing day of each index.
|
492
|
+
# @!method hour
|
493
|
+
# @return [Array<Integer>] Array containing hour of each index.
|
494
|
+
# @!method min
|
495
|
+
# @return [Array<Integer>] Array containing minutes of each index.
|
496
|
+
# @!method sec
|
497
|
+
# @return [Array<Integer>] Array containing seconds of each index.
|
498
|
+
%i[year month day hour min sec].each do |meth|
|
499
|
+
define_method(meth) do
|
500
|
+
each_with_object([]) do |d, arr|
|
501
|
+
arr << d.send(meth)
|
502
|
+
end
|
503
|
+
end
|
504
|
+
end
|
505
|
+
|
506
|
+
# Check if a date exists in the index. Will be inferred from string in case
|
507
|
+
# you pass a string. Recommened specifying the full date as a DateTime object.
|
508
|
+
def include?(date_time)
|
509
|
+
return false unless date_time.is_a?(String) || date_time.is_a?(DateTime)
|
510
|
+
|
511
|
+
if date_time.is_a?(String)
|
512
|
+
date_precision = Helper.determine_date_precision_of date_time
|
513
|
+
date_time = Helper.date_time_from date_time, date_precision
|
514
|
+
end
|
515
|
+
|
516
|
+
result, = @data.bsearch { |d| d[0] >= date_time }
|
517
|
+
result && result == date_time
|
518
|
+
end
|
519
|
+
|
520
|
+
# Return true if the DateTimeIndex is empty.
|
521
|
+
def empty?
|
522
|
+
@data.empty?
|
523
|
+
end
|
524
|
+
|
525
|
+
private
|
526
|
+
|
527
|
+
def get_by_range(first, last)
|
528
|
+
return slice(first, last) if first.is_a?(Integer) && last.is_a?(Integer)
|
529
|
+
|
530
|
+
raise ArgumentError, "Keys #{first} and #{last} are out of bounds" if
|
531
|
+
Helper.key_out_of_bounds?(first, @data) && Helper.key_out_of_bounds?(last, @data)
|
532
|
+
|
533
|
+
slice first, last
|
534
|
+
end
|
535
|
+
|
536
|
+
def slice_between_dates(first, last)
|
537
|
+
# about that ^ disable: I'm waiting for cleaner understanding
|
538
|
+
# of offsets logic. Reference: https://github.com/v0dro/daru/commit/7e1c34aec9516a9ba33037b4a1daaaaf1de0726a#diff-a95ef410a8e1f4ea3cc48d231bb880faR250
|
539
|
+
start = @data.bsearch { |d| d[0] >= first }
|
540
|
+
after_en = @data.bsearch { |d| d[0] > last }
|
541
|
+
|
542
|
+
if @offset
|
543
|
+
en = after_en ? @data[after_en[1] - 1] : @data.last
|
544
|
+
return start[1] if start == en
|
545
|
+
|
546
|
+
DateTimeIndex.date_range start: start[0], end: en[0], freq: @offset
|
547
|
+
else
|
548
|
+
st = @data.index(start)
|
549
|
+
en = after_en ? @data.index(after_en) - 1 : Helper.last_date(@data)[1]
|
550
|
+
return start[1] if st == en
|
551
|
+
|
552
|
+
DateTimeIndex.new(@data[st..en].transpose[0] || []) # empty slice guard
|
553
|
+
end
|
554
|
+
end
|
555
|
+
|
556
|
+
def _shift(distance)
|
557
|
+
if distance.is_a?(Integer)
|
558
|
+
raise IndexError, 'To lag non-freq date time index pass an offset.' unless @offset
|
559
|
+
|
560
|
+
start = @data[0][0]
|
561
|
+
off = distance.positive? ? @offset : -@offset
|
562
|
+
distance.abs.times { start = off + start }
|
563
|
+
DateTimeIndex.date_range(start: start, periods: @periods, freq: @offset)
|
564
|
+
else
|
565
|
+
DateTimeIndex.new(to_a.map { |e| distance + e }, freq: :infer)
|
566
|
+
end
|
567
|
+
end
|
568
|
+
end
|
569
|
+
end
|