daru_lite 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
|
@@ -0,0 +1,569 @@
|
|
|
1
|
+
module DaruLite
|
|
2
|
+
# Private module for storing helper functions for DateTimeIndex.
|
|
3
|
+
# @private
|
|
4
|
+
module DateTimeIndexHelper
|
|
5
|
+
class << self
|
|
6
|
+
OFFSETS_HASH = {
|
|
7
|
+
'S' => DaruLite::Offsets::Second,
|
|
8
|
+
'M' => DaruLite::Offsets::Minute,
|
|
9
|
+
'H' => DaruLite::Offsets::Hour,
|
|
10
|
+
'D' => DaruLite::Offsets::Day,
|
|
11
|
+
'W' => DaruLite::Offsets::Week,
|
|
12
|
+
'MONTH' => DaruLite::Offsets::Month,
|
|
13
|
+
'MB' => DaruLite::Offsets::MonthBegin,
|
|
14
|
+
'ME' => DaruLite::Offsets::MonthEnd,
|
|
15
|
+
'YEAR' => DaruLite::Offsets::Year,
|
|
16
|
+
'YB' => DaruLite::Offsets::YearBegin,
|
|
17
|
+
'YE' => DaruLite::Offsets::YearEnd
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
TIME_INTERVALS = {
|
|
21
|
+
Rational(1, 1) => DaruLite::Offsets::Day,
|
|
22
|
+
Rational(1, 24) => DaruLite::Offsets::Hour,
|
|
23
|
+
Rational(1, 1440) => DaruLite::Offsets::Minute,
|
|
24
|
+
Rational(1, 86_400) => DaruLite::Offsets::Second
|
|
25
|
+
}.freeze
|
|
26
|
+
|
|
27
|
+
DOW_REGEXP = Regexp.new(DaruLite::DAYS_OF_WEEK.keys.join('|'))
|
|
28
|
+
FREQUENCY_PATTERN = /^
|
|
29
|
+
(?<multiplier>[0-9]+)?
|
|
30
|
+
(
|
|
31
|
+
(?<offset>MONTH|YEAR|S|H|MB|ME|M|D|YB|YE) |
|
|
32
|
+
(?<offset>W)(-(?<weekday>#{DOW_REGEXP}))?
|
|
33
|
+
)$/x.freeze
|
|
34
|
+
|
|
35
|
+
# Generates a DaruLite::DateOffset object for generic offsets or one of the
|
|
36
|
+
# specialized classed within DaruLite::Offsets depending on the 'frequency'
|
|
37
|
+
# string.
|
|
38
|
+
def offset_from_frequency(frequency)
|
|
39
|
+
return frequency if frequency.is_a?(DaruLite::DateOffset)
|
|
40
|
+
|
|
41
|
+
frequency ||= 'D'
|
|
42
|
+
|
|
43
|
+
matched = FREQUENCY_PATTERN.match(frequency) or
|
|
44
|
+
raise ArgumentError, "Invalid frequency string #{frequency}"
|
|
45
|
+
|
|
46
|
+
n = (matched[:multiplier] || 1).to_i
|
|
47
|
+
offset_string = matched[:offset]
|
|
48
|
+
offset_klass = OFFSETS_HASH[offset_string] or
|
|
49
|
+
raise ArgumentError, "Cannont interpret offset #{offset_string}"
|
|
50
|
+
|
|
51
|
+
if offset_string == 'W'
|
|
52
|
+
offset_klass.new(n, weekday: DaruLite::DAYS_OF_WEEK[matched[:weekday]])
|
|
53
|
+
else
|
|
54
|
+
offset_klass.new(n)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def coerce_date(date)
|
|
59
|
+
return date unless date.is_a?(String)
|
|
60
|
+
|
|
61
|
+
date_time_from(date, determine_date_precision_of(date))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def begin_from_offset?(offset, start)
|
|
65
|
+
offset.is_a?(DaruLite::Offsets::Tick) ||
|
|
66
|
+
(offset.respond_to?(:on_offset?) && offset.on_offset?(start))
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def generate_data(start, en, offset, periods)
|
|
70
|
+
data = []
|
|
71
|
+
new_date = begin_from_offset?(offset, start) ? start : offset + start
|
|
72
|
+
|
|
73
|
+
if periods.nil? # use end
|
|
74
|
+
loop do
|
|
75
|
+
break if new_date > en
|
|
76
|
+
|
|
77
|
+
data << new_date
|
|
78
|
+
new_date = offset + new_date
|
|
79
|
+
end
|
|
80
|
+
else
|
|
81
|
+
periods.times do
|
|
82
|
+
data << new_date
|
|
83
|
+
new_date = offset + new_date
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
data
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def verify_start_and_end(start, en)
|
|
91
|
+
raise ArgumentError, 'Start and end cannot be the same' if start == en
|
|
92
|
+
raise ArgumentError, 'Start must be lesser than end' if start > en
|
|
93
|
+
raise ArgumentError, 'Only same time zones are allowed' if start.zone != en.zone
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def infer_offset(data)
|
|
97
|
+
diffs = data.each_cons(2).map { |d1, d2| d2 - d1 }
|
|
98
|
+
|
|
99
|
+
return nil unless diffs.uniq.count == 1
|
|
100
|
+
|
|
101
|
+
return TIME_INTERVALS[diffs.first].new if TIME_INTERVALS.include?(diffs.first)
|
|
102
|
+
|
|
103
|
+
number_of_seconds = diffs.first / DaruLite::Offsets::Second.new.multiplier
|
|
104
|
+
DaruLite::Offsets::Second.new(number_of_seconds.numerator) if number_of_seconds.denominator == 1
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def find_index_of_date(data, date_time)
|
|
108
|
+
searched = data.bsearch { |d| d[0] >= date_time }
|
|
109
|
+
raise(ArgumentError, "Cannot find #{date_time}") if searched.nil? || searched[0] != date_time
|
|
110
|
+
|
|
111
|
+
searched[1]
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def find_date_string_bounds(date_string)
|
|
115
|
+
date_precision = determine_date_precision_of date_string
|
|
116
|
+
date_time = date_time_from date_string, date_precision
|
|
117
|
+
generate_bounds date_time, date_precision
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def date_time_from(date_string, date_precision)
|
|
121
|
+
case date_precision
|
|
122
|
+
when :year
|
|
123
|
+
DateTime.new(date_string.gsub(/[^0-9]/, '').to_i)
|
|
124
|
+
when :month
|
|
125
|
+
DateTime.new(
|
|
126
|
+
date_string.match(/\d\d\d\d/).to_s.to_i,
|
|
127
|
+
date_string.match(/-\d?\d/).to_s.delete('-').to_i
|
|
128
|
+
)
|
|
129
|
+
else
|
|
130
|
+
DateTime.parse date_string
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
DATE_PRECISION_REGEXP = /^(\d\d\d\d)(-\d{1,2}(-\d{1,2}( \d{1,2}(:\d{1,2}(:\d{1,2})?)?)?)?)?$/.freeze
|
|
135
|
+
DATE_PRECISIONS = [nil, :year, :month, :day, :hour, :min, :sec].freeze
|
|
136
|
+
|
|
137
|
+
def determine_date_precision_of(date_string)
|
|
138
|
+
components = date_string.scan(DATE_PRECISION_REGEXP).flatten.compact
|
|
139
|
+
DATE_PRECISIONS[components.count] or
|
|
140
|
+
raise ArgumentError, "Unacceptable date string #{date_string}"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def generate_bounds(date_time, date_precision) # rubocop:disable Metrics/MethodLength
|
|
144
|
+
# FIXME: about that ^ disable: I'd like to use my zverok/time_boots here, which will simplify things
|
|
145
|
+
case date_precision
|
|
146
|
+
when :year
|
|
147
|
+
[
|
|
148
|
+
date_time,
|
|
149
|
+
DateTime.new(date_time.year, 12, 31, 23, 59, 59)
|
|
150
|
+
]
|
|
151
|
+
when :month
|
|
152
|
+
[
|
|
153
|
+
date_time,
|
|
154
|
+
DateTime.new(date_time.year, date_time.month, ((date_time >> 1) - 1).day,
|
|
155
|
+
23, 59, 59)
|
|
156
|
+
]
|
|
157
|
+
when :day
|
|
158
|
+
[
|
|
159
|
+
date_time,
|
|
160
|
+
DateTime.new(date_time.year, date_time.month, date_time.day, 23, 59, 59)
|
|
161
|
+
]
|
|
162
|
+
when :hour
|
|
163
|
+
[
|
|
164
|
+
date_time,
|
|
165
|
+
DateTime.new(date_time.year, date_time.month, date_time.day,
|
|
166
|
+
date_time.hour, 59, 59)
|
|
167
|
+
]
|
|
168
|
+
when :min
|
|
169
|
+
[
|
|
170
|
+
date_time,
|
|
171
|
+
DateTime.new(date_time.year, date_time.month, date_time.day,
|
|
172
|
+
date_time.hour, date_time.min, 59)
|
|
173
|
+
]
|
|
174
|
+
else # second or when precision is same as offset
|
|
175
|
+
[date_time, date_time]
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def possibly_convert_to_date_time(data)
|
|
180
|
+
data[0].is_a?(String) ? data.map! { |e| DateTime.parse(e) } : data
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def last_date(data)
|
|
184
|
+
data.max_by { |d| d[1] }
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def key_out_of_bounds?(key, data)
|
|
188
|
+
dates = data.transpose.first
|
|
189
|
+
|
|
190
|
+
precision = determine_date_precision_of key
|
|
191
|
+
date_time = date_time_from key, precision
|
|
192
|
+
|
|
193
|
+
# FIXME: I'm pretty suspicious about logic here:
|
|
194
|
+
# why only year & month? - zverok 2016-05-16
|
|
195
|
+
|
|
196
|
+
case precision
|
|
197
|
+
when :year
|
|
198
|
+
year_out_of_bounds?(date_time, dates)
|
|
199
|
+
when :month
|
|
200
|
+
year_month_out_of_bounds?(date_time, dates)
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
private
|
|
205
|
+
|
|
206
|
+
def year_out_of_bounds?(date_time, dates)
|
|
207
|
+
date_time.year < dates.first.year || date_time.year > dates.last.year
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def year_month_out_of_bounds?(date_time, dates)
|
|
211
|
+
(date_time.year < dates.first.year && date_time.month < dates.first.month) ||
|
|
212
|
+
(date_time.year > dates.last.year && date_time.month > dates.last.month)
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
class DateTimeIndex < Index
|
|
218
|
+
include Enumerable
|
|
219
|
+
Helper = DateTimeIndexHelper
|
|
220
|
+
|
|
221
|
+
def self.try_create(source)
|
|
222
|
+
new(source, freq: :infer) if source && ArrayHelper.array_of?(source, ::DateTime)
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def each(&block)
|
|
226
|
+
to_a.each(&block)
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
attr_reader :frequency, :offset, :periods, :keys
|
|
230
|
+
|
|
231
|
+
# Create a DateTimeIndex with or without a frequency in data. The constructor
|
|
232
|
+
# should be used for creating DateTimeIndex by directly passing in DateTime
|
|
233
|
+
# objects or date-like strings, typically in cases where values with frequency
|
|
234
|
+
# are not needed.
|
|
235
|
+
#
|
|
236
|
+
# @param [Array<String>, Array<DateTime>] data Array of date-like Strings or
|
|
237
|
+
# actual DateTime objects for creating the DateTimeIndex.
|
|
238
|
+
# @param [Hash] opts Hash of options for configuring index.
|
|
239
|
+
# @option opts [Symbol, NilClass, String, DaruLite::DateOffset, DaruLite::Offsets::*] freq
|
|
240
|
+
# Option for specifying the frequency of data, if applicable. If `:infer` is
|
|
241
|
+
# passed to this option, daru will try to infer the frequency of the data
|
|
242
|
+
# by itself.
|
|
243
|
+
#
|
|
244
|
+
# @example Usage of DateTimeIndex constructor
|
|
245
|
+
# index = DaruLite::DateTimeIndex.new(
|
|
246
|
+
# [DateTime.new(2012,4,5), DateTime.new(2012,4,6),
|
|
247
|
+
# DateTime.new(2012,4,7), DateTime.new(2012,4,8)])
|
|
248
|
+
# #=>#<DateTimeIndex:84232240 offset=nil periods=4 data=[2012-04-05T00:00:00+00:00...2012-04-08T00:00:00+00:00]>
|
|
249
|
+
#
|
|
250
|
+
# index = DaruLite::DateTimeIndex.new([
|
|
251
|
+
# DateTime.new(2012,4,5), DateTime.new(2012,4,6), DateTime.new(2012,4,7),
|
|
252
|
+
# DateTime.new(2012,4,8), DateTime.new(2012,4,9), DateTime.new(2012,4,10),
|
|
253
|
+
# DateTime.new(2012,4,11), DateTime.new(2012,4,12)], freq: :infer)
|
|
254
|
+
# #=>#<DateTimeIndex:84198340 offset=D periods=8 data=[2012-04-05T00:00:00+00:00...2012-04-12T00:00:00+00:00]>
|
|
255
|
+
def initialize(data, opts = { freq: nil })
|
|
256
|
+
super(data)
|
|
257
|
+
Helper.possibly_convert_to_date_time data
|
|
258
|
+
|
|
259
|
+
@offset =
|
|
260
|
+
case opts[:freq]
|
|
261
|
+
when :infer then Helper.infer_offset(data)
|
|
262
|
+
when nil then nil
|
|
263
|
+
else Helper.offset_from_frequency(opts[:freq])
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
@frequency = @offset&.freq_string
|
|
267
|
+
@data = data.each_with_index.to_a.sort_by(&:first)
|
|
268
|
+
|
|
269
|
+
@periods = data.size
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
# Custom dup method for DateTimeIndex
|
|
273
|
+
def dup
|
|
274
|
+
DaruLite::DateTimeIndex.new(@data.transpose[0], freq: @offset)
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
# Create a date range by specifying the start, end, periods and frequency
|
|
278
|
+
# of the data.
|
|
279
|
+
#
|
|
280
|
+
# @param [Hash] opts Options hash to create the date range with
|
|
281
|
+
# @option opts [String, DateTime] :start A DateTime object or date-like
|
|
282
|
+
# string that defines the start of the date range.
|
|
283
|
+
# @option opts [String, DateTime] :end A DateTime object or date-like string
|
|
284
|
+
# that defines the end of the date range.
|
|
285
|
+
# @option opts [String, DaruLite::DateOffset, DaruLite::Offsets::*] :freq ('D') The interval
|
|
286
|
+
# between each date in the index. This can either be a string specifying
|
|
287
|
+
# the frequency (i.e. one of the frequency aliases) or an offset object.
|
|
288
|
+
# @option opts [Integer] :periods The number of periods that should go into
|
|
289
|
+
# this index. Takes precedence over `:end`.
|
|
290
|
+
# @return [DateTimeIndex] DateTimeIndex object of the specified parameters.
|
|
291
|
+
#
|
|
292
|
+
# == Notes
|
|
293
|
+
#
|
|
294
|
+
# If you specify :start and :end options as strings, they can be complete or
|
|
295
|
+
# partial dates and daru will intelligently infer the date from the string
|
|
296
|
+
# directly. However, note that the date-like string must be in the format
|
|
297
|
+
# `YYYY-MM-DD HH:MM:SS`.
|
|
298
|
+
#
|
|
299
|
+
# The string aliases supported by the :freq option are as follows:
|
|
300
|
+
#
|
|
301
|
+
# * 'S' - seconds
|
|
302
|
+
# * 'M' - minutes
|
|
303
|
+
# * 'H' - hours
|
|
304
|
+
# * 'D' - days
|
|
305
|
+
# * 'W' - Week (default) anchored on sunday
|
|
306
|
+
# * 'W-SUN' - Same as 'W'
|
|
307
|
+
# * 'W-MON' - Week anchored on monday
|
|
308
|
+
# * 'W-TUE' - Week anchored on tuesday
|
|
309
|
+
# * 'W-WED' - Week anchored on wednesday
|
|
310
|
+
# * 'W-THU' - Week anchored on thursday
|
|
311
|
+
# * 'W-FRI' - Week anchored on friday
|
|
312
|
+
# * 'W-SAT' - Week anchored on saturday
|
|
313
|
+
# * 'MONTH' - Month
|
|
314
|
+
# * 'YEAR' - One year
|
|
315
|
+
# * 'MB' - month begin
|
|
316
|
+
# * 'ME' - month end
|
|
317
|
+
# * 'YB' - year begin
|
|
318
|
+
# * 'YE' - year end
|
|
319
|
+
#
|
|
320
|
+
# Multiples of these can also be specified. For example '2S' for 2 seconds
|
|
321
|
+
# or '2ME' for two month end offsets.
|
|
322
|
+
#
|
|
323
|
+
# Currently the precision of DateTimeIndex is upto seconds only, though this
|
|
324
|
+
# will improve in the future.
|
|
325
|
+
#
|
|
326
|
+
# @example Creating date ranges
|
|
327
|
+
# DaruLite::DateTimeIndex.date_range(
|
|
328
|
+
# :start => DateTime.new(2014,5,1),
|
|
329
|
+
# :end => DateTime.new(2014,5,2), :freq => '6H')
|
|
330
|
+
# #=>#<DateTimeIndex:83600130 offset=H periods=5 data=[2014-05-01T00:00:00+00:00...2014-05-02T00:00:00+00:00]>
|
|
331
|
+
#
|
|
332
|
+
# DaruLite::DateTimeIndex.date_range(
|
|
333
|
+
# :start => '2012-5-2', :periods => 50, :freq => 'ME')
|
|
334
|
+
# #=> #<DateTimeIndex:83549940 offset=ME periods=50 data=[2012-05-31T00:00:00+00:00...2016-06-30T00:00:00+00:00]>
|
|
335
|
+
def self.date_range(opts = {})
|
|
336
|
+
start = Helper.coerce_date opts[:start]
|
|
337
|
+
en = Helper.coerce_date opts[:end]
|
|
338
|
+
Helper.verify_start_and_end(start, en) unless en.nil?
|
|
339
|
+
offset = Helper.offset_from_frequency opts[:freq]
|
|
340
|
+
data = Helper.generate_data start, en, offset, opts[:periods]
|
|
341
|
+
|
|
342
|
+
DateTimeIndex.new(data, freq: offset)
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
# Retreive a slice or a an individual index number from the index.
|
|
346
|
+
#
|
|
347
|
+
# @param key [String, DateTime] Specify a date partially (as a String) or
|
|
348
|
+
# completely to retrieve.
|
|
349
|
+
def [](*key)
|
|
350
|
+
return slice(*key) if key.size != 1
|
|
351
|
+
|
|
352
|
+
key = key[0]
|
|
353
|
+
case key
|
|
354
|
+
when Numeric
|
|
355
|
+
key
|
|
356
|
+
when DateTime
|
|
357
|
+
Helper.find_index_of_date(@data, key)
|
|
358
|
+
when Range
|
|
359
|
+
# FIXME: get_by_range is suspiciously close to just #slice,
|
|
360
|
+
# but one of specs fails when replacing it with just slice
|
|
361
|
+
get_by_range(key.first, key.last)
|
|
362
|
+
else
|
|
363
|
+
raise ArgumentError, "Key #{key} is out of bounds" if
|
|
364
|
+
Helper.key_out_of_bounds?(key, @data)
|
|
365
|
+
|
|
366
|
+
slice(*Helper.find_date_string_bounds(key))
|
|
367
|
+
end
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
def pos(*args)
|
|
371
|
+
# to filled
|
|
372
|
+
out = self[*args]
|
|
373
|
+
return out if out.is_a? Numeric
|
|
374
|
+
|
|
375
|
+
out.map { |date| self[date] }
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
def subset(*args)
|
|
379
|
+
self[*args]
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def valid?(*args)
|
|
383
|
+
self[*args]
|
|
384
|
+
true
|
|
385
|
+
rescue IndexError
|
|
386
|
+
false
|
|
387
|
+
end
|
|
388
|
+
|
|
389
|
+
# Retrive a slice of the index by specifying first and last members of the slice.
|
|
390
|
+
#
|
|
391
|
+
# @param [String, DateTime] first Start of the slice as a string or DateTime.
|
|
392
|
+
# @param [String, DateTime] last End of the slice as a string or DateTime.
|
|
393
|
+
def slice(first, last)
|
|
394
|
+
if first.is_a?(Integer) && last.is_a?(Integer)
|
|
395
|
+
DateTimeIndex.new(to_a[first..last], freq: @offset)
|
|
396
|
+
else
|
|
397
|
+
first = Helper.find_date_string_bounds(first)[0] if first.is_a?(String)
|
|
398
|
+
last = Helper.find_date_string_bounds(last)[1] if last.is_a?(String)
|
|
399
|
+
|
|
400
|
+
slice_between_dates first, last
|
|
401
|
+
end
|
|
402
|
+
end
|
|
403
|
+
|
|
404
|
+
# Return the DateTimeIndex as an Array of DateTime objects.
|
|
405
|
+
# @return [Array<DateTime>] Array of containing DateTimes.
|
|
406
|
+
def to_a
|
|
407
|
+
if @offset
|
|
408
|
+
@data
|
|
409
|
+
else
|
|
410
|
+
@data.sort_by(&:last)
|
|
411
|
+
end.transpose.first || []
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# Size of index.
|
|
415
|
+
def size
|
|
416
|
+
@periods
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
def ==(other)
|
|
420
|
+
to_a == other.to_a
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def inspect
|
|
424
|
+
meta = [@periods, @frequency ? "frequency=#{@frequency}" : nil].compact.join(', ')
|
|
425
|
+
return "#<#{self.class}(#{meta})>" if @data.empty?
|
|
426
|
+
|
|
427
|
+
"#<#{self.class}(#{meta}) " \
|
|
428
|
+
"#{@data.first[0]}...#{@data.last[0]}>"
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
# Shift all dates in the index by a positive number in the future. The dates
|
|
432
|
+
# are shifted by the same amount as that specified in the offset.
|
|
433
|
+
#
|
|
434
|
+
# @param [Integer, DaruLite::DateOffset, DaruLite::Offsets::*] distance Distance by
|
|
435
|
+
# which each date should be shifted. Passing an offset object to #shift
|
|
436
|
+
# will offset each data point by the offset value. Passing a positive
|
|
437
|
+
# integer will offset each data point by the same offset that it was
|
|
438
|
+
# created with.
|
|
439
|
+
# @return [DateTimeIndex] Returns a new, shifted DateTimeIndex object.
|
|
440
|
+
# @example Using the shift method
|
|
441
|
+
# index = DaruLite::DateTimeIndex.date_range(
|
|
442
|
+
# :start => '2012', :periods => 10, :freq => 'YEAR')
|
|
443
|
+
#
|
|
444
|
+
# # Passing a offset to shift
|
|
445
|
+
# index.shift(DaruLite::Offsets::Hour.new(3))
|
|
446
|
+
# #=>#<DateTimeIndex:84038960 offset=nil periods=10 data=[2012-01-01T03:00:00+00:00...2021-01-01T03:00:00+00:00]>
|
|
447
|
+
#
|
|
448
|
+
# # Pass an integer to shift
|
|
449
|
+
# index.shift(4)
|
|
450
|
+
# #=>#<DateTimeIndex:83979630 offset=YEAR periods=10 data=[2016-01-01T00:00:00+00:00...2025-01-01T00:00:00+00:00]>
|
|
451
|
+
def shift(distance)
|
|
452
|
+
distance.is_a?(Integer) && distance.negative? and
|
|
453
|
+
raise IndexError, "Distance #{distance} cannot be negative"
|
|
454
|
+
|
|
455
|
+
_shift(distance)
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
# Shift all dates in the index to the past. The dates are shifted by the same
|
|
459
|
+
# amount as that specified in the offset.
|
|
460
|
+
#
|
|
461
|
+
# @param [Integer, DaruLite::DateOffset, DaruLite::Offsets::*] distance Integer or
|
|
462
|
+
# DaruLite::DateOffset. Distance by which each date should be shifted. Passing
|
|
463
|
+
# an offset object to #lag will offset each data point by the offset value.
|
|
464
|
+
# Passing a positive integer will offset each data point by the same offset
|
|
465
|
+
# that it was created with.
|
|
466
|
+
# @return [DateTimeIndex] A new lagged DateTimeIndex object.
|
|
467
|
+
def lag(distance)
|
|
468
|
+
distance.is_a?(Integer) && distance.negative? and
|
|
469
|
+
raise IndexError, "Distance #{distance} cannot be negative"
|
|
470
|
+
|
|
471
|
+
_shift(-distance)
|
|
472
|
+
end
|
|
473
|
+
|
|
474
|
+
# :nocov:
|
|
475
|
+
def _dump(_depth)
|
|
476
|
+
Marshal.dump(data: to_a, freq: @offset)
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
def self._load(data)
|
|
480
|
+
h = Marshal.load data
|
|
481
|
+
|
|
482
|
+
DaruLite::DateTimeIndex.new(h[:data], freq: h[:freq])
|
|
483
|
+
end
|
|
484
|
+
# :nocov:
|
|
485
|
+
|
|
486
|
+
# @!method year
|
|
487
|
+
# @return [Array<Integer>] Array containing year of each index.
|
|
488
|
+
# @!method month
|
|
489
|
+
# @return [Array<Integer>] Array containing month of each index.
|
|
490
|
+
# @!method day
|
|
491
|
+
# @return [Array<Integer>] Array containing day of each index.
|
|
492
|
+
# @!method hour
|
|
493
|
+
# @return [Array<Integer>] Array containing hour of each index.
|
|
494
|
+
# @!method min
|
|
495
|
+
# @return [Array<Integer>] Array containing minutes of each index.
|
|
496
|
+
# @!method sec
|
|
497
|
+
# @return [Array<Integer>] Array containing seconds of each index.
|
|
498
|
+
%i[year month day hour min sec].each do |meth|
|
|
499
|
+
define_method(meth) do
|
|
500
|
+
each_with_object([]) do |d, arr|
|
|
501
|
+
arr << d.send(meth)
|
|
502
|
+
end
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
# Check if a date exists in the index. Will be inferred from string in case
|
|
507
|
+
# you pass a string. Recommened specifying the full date as a DateTime object.
|
|
508
|
+
def include?(date_time)
|
|
509
|
+
return false unless date_time.is_a?(String) || date_time.is_a?(DateTime)
|
|
510
|
+
|
|
511
|
+
if date_time.is_a?(String)
|
|
512
|
+
date_precision = Helper.determine_date_precision_of date_time
|
|
513
|
+
date_time = Helper.date_time_from date_time, date_precision
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
result, = @data.bsearch { |d| d[0] >= date_time }
|
|
517
|
+
result && result == date_time
|
|
518
|
+
end
|
|
519
|
+
|
|
520
|
+
# Return true if the DateTimeIndex is empty.
|
|
521
|
+
def empty?
|
|
522
|
+
@data.empty?
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
private
|
|
526
|
+
|
|
527
|
+
def get_by_range(first, last)
|
|
528
|
+
return slice(first, last) if first.is_a?(Integer) && last.is_a?(Integer)
|
|
529
|
+
|
|
530
|
+
raise ArgumentError, "Keys #{first} and #{last} are out of bounds" if
|
|
531
|
+
Helper.key_out_of_bounds?(first, @data) && Helper.key_out_of_bounds?(last, @data)
|
|
532
|
+
|
|
533
|
+
slice first, last
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
def slice_between_dates(first, last)
|
|
537
|
+
# about that ^ disable: I'm waiting for cleaner understanding
|
|
538
|
+
# of offsets logic. Reference: https://github.com/v0dro/daru/commit/7e1c34aec9516a9ba33037b4a1daaaaf1de0726a#diff-a95ef410a8e1f4ea3cc48d231bb880faR250
|
|
539
|
+
start = @data.bsearch { |d| d[0] >= first }
|
|
540
|
+
after_en = @data.bsearch { |d| d[0] > last }
|
|
541
|
+
|
|
542
|
+
if @offset
|
|
543
|
+
en = after_en ? @data[after_en[1] - 1] : @data.last
|
|
544
|
+
return start[1] if start == en
|
|
545
|
+
|
|
546
|
+
DateTimeIndex.date_range start: start[0], end: en[0], freq: @offset
|
|
547
|
+
else
|
|
548
|
+
st = @data.index(start)
|
|
549
|
+
en = after_en ? @data.index(after_en) - 1 : Helper.last_date(@data)[1]
|
|
550
|
+
return start[1] if st == en
|
|
551
|
+
|
|
552
|
+
DateTimeIndex.new(@data[st..en].transpose[0] || []) # empty slice guard
|
|
553
|
+
end
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
def _shift(distance)
|
|
557
|
+
if distance.is_a?(Integer)
|
|
558
|
+
raise IndexError, 'To lag non-freq date time index pass an offset.' unless @offset
|
|
559
|
+
|
|
560
|
+
start = @data[0][0]
|
|
561
|
+
off = distance.positive? ? @offset : -@offset
|
|
562
|
+
distance.abs.times { start = off + start }
|
|
563
|
+
DateTimeIndex.date_range(start: start, periods: @periods, freq: @offset)
|
|
564
|
+
else
|
|
565
|
+
DateTimeIndex.new(to_a.map { |e| distance + e }, freq: :infer)
|
|
566
|
+
end
|
|
567
|
+
end
|
|
568
|
+
end
|
|
569
|
+
end
|