daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,569 @@
1
+ module DaruLite
2
+ # Private module for storing helper functions for DateTimeIndex.
3
+ # @private
4
+ module DateTimeIndexHelper
5
+ class << self
6
+ OFFSETS_HASH = {
7
+ 'S' => DaruLite::Offsets::Second,
8
+ 'M' => DaruLite::Offsets::Minute,
9
+ 'H' => DaruLite::Offsets::Hour,
10
+ 'D' => DaruLite::Offsets::Day,
11
+ 'W' => DaruLite::Offsets::Week,
12
+ 'MONTH' => DaruLite::Offsets::Month,
13
+ 'MB' => DaruLite::Offsets::MonthBegin,
14
+ 'ME' => DaruLite::Offsets::MonthEnd,
15
+ 'YEAR' => DaruLite::Offsets::Year,
16
+ 'YB' => DaruLite::Offsets::YearBegin,
17
+ 'YE' => DaruLite::Offsets::YearEnd
18
+ }.freeze
19
+
20
+ TIME_INTERVALS = {
21
+ Rational(1, 1) => DaruLite::Offsets::Day,
22
+ Rational(1, 24) => DaruLite::Offsets::Hour,
23
+ Rational(1, 1440) => DaruLite::Offsets::Minute,
24
+ Rational(1, 86_400) => DaruLite::Offsets::Second
25
+ }.freeze
26
+
27
+ DOW_REGEXP = Regexp.new(DaruLite::DAYS_OF_WEEK.keys.join('|'))
28
+ FREQUENCY_PATTERN = /^
29
+ (?<multiplier>[0-9]+)?
30
+ (
31
+ (?<offset>MONTH|YEAR|S|H|MB|ME|M|D|YB|YE) |
32
+ (?<offset>W)(-(?<weekday>#{DOW_REGEXP}))?
33
+ )$/x.freeze
34
+
35
+ # Generates a DaruLite::DateOffset object for generic offsets or one of the
36
+ # specialized classed within DaruLite::Offsets depending on the 'frequency'
37
+ # string.
38
+ def offset_from_frequency(frequency)
39
+ return frequency if frequency.is_a?(DaruLite::DateOffset)
40
+
41
+ frequency ||= 'D'
42
+
43
+ matched = FREQUENCY_PATTERN.match(frequency) or
44
+ raise ArgumentError, "Invalid frequency string #{frequency}"
45
+
46
+ n = (matched[:multiplier] || 1).to_i
47
+ offset_string = matched[:offset]
48
+ offset_klass = OFFSETS_HASH[offset_string] or
49
+ raise ArgumentError, "Cannont interpret offset #{offset_string}"
50
+
51
+ if offset_string == 'W'
52
+ offset_klass.new(n, weekday: DaruLite::DAYS_OF_WEEK[matched[:weekday]])
53
+ else
54
+ offset_klass.new(n)
55
+ end
56
+ end
57
+
58
+ def coerce_date(date)
59
+ return date unless date.is_a?(String)
60
+
61
+ date_time_from(date, determine_date_precision_of(date))
62
+ end
63
+
64
+ def begin_from_offset?(offset, start)
65
+ offset.is_a?(DaruLite::Offsets::Tick) ||
66
+ (offset.respond_to?(:on_offset?) && offset.on_offset?(start))
67
+ end
68
+
69
+ def generate_data(start, en, offset, periods)
70
+ data = []
71
+ new_date = begin_from_offset?(offset, start) ? start : offset + start
72
+
73
+ if periods.nil? # use end
74
+ loop do
75
+ break if new_date > en
76
+
77
+ data << new_date
78
+ new_date = offset + new_date
79
+ end
80
+ else
81
+ periods.times do
82
+ data << new_date
83
+ new_date = offset + new_date
84
+ end
85
+ end
86
+
87
+ data
88
+ end
89
+
90
+ def verify_start_and_end(start, en)
91
+ raise ArgumentError, 'Start and end cannot be the same' if start == en
92
+ raise ArgumentError, 'Start must be lesser than end' if start > en
93
+ raise ArgumentError, 'Only same time zones are allowed' if start.zone != en.zone
94
+ end
95
+
96
+ def infer_offset(data)
97
+ diffs = data.each_cons(2).map { |d1, d2| d2 - d1 }
98
+
99
+ return nil unless diffs.uniq.count == 1
100
+
101
+ return TIME_INTERVALS[diffs.first].new if TIME_INTERVALS.include?(diffs.first)
102
+
103
+ number_of_seconds = diffs.first / DaruLite::Offsets::Second.new.multiplier
104
+ DaruLite::Offsets::Second.new(number_of_seconds.numerator) if number_of_seconds.denominator == 1
105
+ end
106
+
107
+ def find_index_of_date(data, date_time)
108
+ searched = data.bsearch { |d| d[0] >= date_time }
109
+ raise(ArgumentError, "Cannot find #{date_time}") if searched.nil? || searched[0] != date_time
110
+
111
+ searched[1]
112
+ end
113
+
114
+ def find_date_string_bounds(date_string)
115
+ date_precision = determine_date_precision_of date_string
116
+ date_time = date_time_from date_string, date_precision
117
+ generate_bounds date_time, date_precision
118
+ end
119
+
120
+ def date_time_from(date_string, date_precision)
121
+ case date_precision
122
+ when :year
123
+ DateTime.new(date_string.gsub(/[^0-9]/, '').to_i)
124
+ when :month
125
+ DateTime.new(
126
+ date_string.match(/\d\d\d\d/).to_s.to_i,
127
+ date_string.match(/-\d?\d/).to_s.delete('-').to_i
128
+ )
129
+ else
130
+ DateTime.parse date_string
131
+ end
132
+ end
133
+
134
+ DATE_PRECISION_REGEXP = /^(\d\d\d\d)(-\d{1,2}(-\d{1,2}( \d{1,2}(:\d{1,2}(:\d{1,2})?)?)?)?)?$/.freeze
135
+ DATE_PRECISIONS = [nil, :year, :month, :day, :hour, :min, :sec].freeze
136
+
137
+ def determine_date_precision_of(date_string)
138
+ components = date_string.scan(DATE_PRECISION_REGEXP).flatten.compact
139
+ DATE_PRECISIONS[components.count] or
140
+ raise ArgumentError, "Unacceptable date string #{date_string}"
141
+ end
142
+
143
+ def generate_bounds(date_time, date_precision) # rubocop:disable Metrics/MethodLength
144
+ # FIXME: about that ^ disable: I'd like to use my zverok/time_boots here, which will simplify things
145
+ case date_precision
146
+ when :year
147
+ [
148
+ date_time,
149
+ DateTime.new(date_time.year, 12, 31, 23, 59, 59)
150
+ ]
151
+ when :month
152
+ [
153
+ date_time,
154
+ DateTime.new(date_time.year, date_time.month, ((date_time >> 1) - 1).day,
155
+ 23, 59, 59)
156
+ ]
157
+ when :day
158
+ [
159
+ date_time,
160
+ DateTime.new(date_time.year, date_time.month, date_time.day, 23, 59, 59)
161
+ ]
162
+ when :hour
163
+ [
164
+ date_time,
165
+ DateTime.new(date_time.year, date_time.month, date_time.day,
166
+ date_time.hour, 59, 59)
167
+ ]
168
+ when :min
169
+ [
170
+ date_time,
171
+ DateTime.new(date_time.year, date_time.month, date_time.day,
172
+ date_time.hour, date_time.min, 59)
173
+ ]
174
+ else # second or when precision is same as offset
175
+ [date_time, date_time]
176
+ end
177
+ end
178
+
179
+ def possibly_convert_to_date_time(data)
180
+ data[0].is_a?(String) ? data.map! { |e| DateTime.parse(e) } : data
181
+ end
182
+
183
+ def last_date(data)
184
+ data.max_by { |d| d[1] }
185
+ end
186
+
187
+ def key_out_of_bounds?(key, data)
188
+ dates = data.transpose.first
189
+
190
+ precision = determine_date_precision_of key
191
+ date_time = date_time_from key, precision
192
+
193
+ # FIXME: I'm pretty suspicious about logic here:
194
+ # why only year & month? - zverok 2016-05-16
195
+
196
+ case precision
197
+ when :year
198
+ year_out_of_bounds?(date_time, dates)
199
+ when :month
200
+ year_month_out_of_bounds?(date_time, dates)
201
+ end
202
+ end
203
+
204
+ private
205
+
206
+ def year_out_of_bounds?(date_time, dates)
207
+ date_time.year < dates.first.year || date_time.year > dates.last.year
208
+ end
209
+
210
+ def year_month_out_of_bounds?(date_time, dates)
211
+ (date_time.year < dates.first.year && date_time.month < dates.first.month) ||
212
+ (date_time.year > dates.last.year && date_time.month > dates.last.month)
213
+ end
214
+ end
215
+ end
216
+
217
+ class DateTimeIndex < Index
218
+ include Enumerable
219
+ Helper = DateTimeIndexHelper
220
+
221
+ def self.try_create(source)
222
+ new(source, freq: :infer) if source && ArrayHelper.array_of?(source, ::DateTime)
223
+ end
224
+
225
+ def each(&block)
226
+ to_a.each(&block)
227
+ end
228
+
229
+ attr_reader :frequency, :offset, :periods, :keys
230
+
231
+ # Create a DateTimeIndex with or without a frequency in data. The constructor
232
+ # should be used for creating DateTimeIndex by directly passing in DateTime
233
+ # objects or date-like strings, typically in cases where values with frequency
234
+ # are not needed.
235
+ #
236
+ # @param [Array<String>, Array<DateTime>] data Array of date-like Strings or
237
+ # actual DateTime objects for creating the DateTimeIndex.
238
+ # @param [Hash] opts Hash of options for configuring index.
239
+ # @option opts [Symbol, NilClass, String, DaruLite::DateOffset, DaruLite::Offsets::*] freq
240
+ # Option for specifying the frequency of data, if applicable. If `:infer` is
241
+ # passed to this option, daru will try to infer the frequency of the data
242
+ # by itself.
243
+ #
244
+ # @example Usage of DateTimeIndex constructor
245
+ # index = DaruLite::DateTimeIndex.new(
246
+ # [DateTime.new(2012,4,5), DateTime.new(2012,4,6),
247
+ # DateTime.new(2012,4,7), DateTime.new(2012,4,8)])
248
+ # #=>#<DateTimeIndex:84232240 offset=nil periods=4 data=[2012-04-05T00:00:00+00:00...2012-04-08T00:00:00+00:00]>
249
+ #
250
+ # index = DaruLite::DateTimeIndex.new([
251
+ # DateTime.new(2012,4,5), DateTime.new(2012,4,6), DateTime.new(2012,4,7),
252
+ # DateTime.new(2012,4,8), DateTime.new(2012,4,9), DateTime.new(2012,4,10),
253
+ # DateTime.new(2012,4,11), DateTime.new(2012,4,12)], freq: :infer)
254
+ # #=>#<DateTimeIndex:84198340 offset=D periods=8 data=[2012-04-05T00:00:00+00:00...2012-04-12T00:00:00+00:00]>
255
+ def initialize(data, opts = { freq: nil })
256
+ super(data)
257
+ Helper.possibly_convert_to_date_time data
258
+
259
+ @offset =
260
+ case opts[:freq]
261
+ when :infer then Helper.infer_offset(data)
262
+ when nil then nil
263
+ else Helper.offset_from_frequency(opts[:freq])
264
+ end
265
+
266
+ @frequency = @offset&.freq_string
267
+ @data = data.each_with_index.to_a.sort_by(&:first)
268
+
269
+ @periods = data.size
270
+ end
271
+
272
+ # Custom dup method for DateTimeIndex
273
+ def dup
274
+ DaruLite::DateTimeIndex.new(@data.transpose[0], freq: @offset)
275
+ end
276
+
277
+ # Create a date range by specifying the start, end, periods and frequency
278
+ # of the data.
279
+ #
280
+ # @param [Hash] opts Options hash to create the date range with
281
+ # @option opts [String, DateTime] :start A DateTime object or date-like
282
+ # string that defines the start of the date range.
283
+ # @option opts [String, DateTime] :end A DateTime object or date-like string
284
+ # that defines the end of the date range.
285
+ # @option opts [String, DaruLite::DateOffset, DaruLite::Offsets::*] :freq ('D') The interval
286
+ # between each date in the index. This can either be a string specifying
287
+ # the frequency (i.e. one of the frequency aliases) or an offset object.
288
+ # @option opts [Integer] :periods The number of periods that should go into
289
+ # this index. Takes precedence over `:end`.
290
+ # @return [DateTimeIndex] DateTimeIndex object of the specified parameters.
291
+ #
292
+ # == Notes
293
+ #
294
+ # If you specify :start and :end options as strings, they can be complete or
295
+ # partial dates and daru will intelligently infer the date from the string
296
+ # directly. However, note that the date-like string must be in the format
297
+ # `YYYY-MM-DD HH:MM:SS`.
298
+ #
299
+ # The string aliases supported by the :freq option are as follows:
300
+ #
301
+ # * 'S' - seconds
302
+ # * 'M' - minutes
303
+ # * 'H' - hours
304
+ # * 'D' - days
305
+ # * 'W' - Week (default) anchored on sunday
306
+ # * 'W-SUN' - Same as 'W'
307
+ # * 'W-MON' - Week anchored on monday
308
+ # * 'W-TUE' - Week anchored on tuesday
309
+ # * 'W-WED' - Week anchored on wednesday
310
+ # * 'W-THU' - Week anchored on thursday
311
+ # * 'W-FRI' - Week anchored on friday
312
+ # * 'W-SAT' - Week anchored on saturday
313
+ # * 'MONTH' - Month
314
+ # * 'YEAR' - One year
315
+ # * 'MB' - month begin
316
+ # * 'ME' - month end
317
+ # * 'YB' - year begin
318
+ # * 'YE' - year end
319
+ #
320
+ # Multiples of these can also be specified. For example '2S' for 2 seconds
321
+ # or '2ME' for two month end offsets.
322
+ #
323
+ # Currently the precision of DateTimeIndex is upto seconds only, though this
324
+ # will improve in the future.
325
+ #
326
+ # @example Creating date ranges
327
+ # DaruLite::DateTimeIndex.date_range(
328
+ # :start => DateTime.new(2014,5,1),
329
+ # :end => DateTime.new(2014,5,2), :freq => '6H')
330
+ # #=>#<DateTimeIndex:83600130 offset=H periods=5 data=[2014-05-01T00:00:00+00:00...2014-05-02T00:00:00+00:00]>
331
+ #
332
+ # DaruLite::DateTimeIndex.date_range(
333
+ # :start => '2012-5-2', :periods => 50, :freq => 'ME')
334
+ # #=> #<DateTimeIndex:83549940 offset=ME periods=50 data=[2012-05-31T00:00:00+00:00...2016-06-30T00:00:00+00:00]>
335
+ def self.date_range(opts = {})
336
+ start = Helper.coerce_date opts[:start]
337
+ en = Helper.coerce_date opts[:end]
338
+ Helper.verify_start_and_end(start, en) unless en.nil?
339
+ offset = Helper.offset_from_frequency opts[:freq]
340
+ data = Helper.generate_data start, en, offset, opts[:periods]
341
+
342
+ DateTimeIndex.new(data, freq: offset)
343
+ end
344
+
345
+ # Retreive a slice or a an individual index number from the index.
346
+ #
347
+ # @param key [String, DateTime] Specify a date partially (as a String) or
348
+ # completely to retrieve.
349
+ def [](*key)
350
+ return slice(*key) if key.size != 1
351
+
352
+ key = key[0]
353
+ case key
354
+ when Numeric
355
+ key
356
+ when DateTime
357
+ Helper.find_index_of_date(@data, key)
358
+ when Range
359
+ # FIXME: get_by_range is suspiciously close to just #slice,
360
+ # but one of specs fails when replacing it with just slice
361
+ get_by_range(key.first, key.last)
362
+ else
363
+ raise ArgumentError, "Key #{key} is out of bounds" if
364
+ Helper.key_out_of_bounds?(key, @data)
365
+
366
+ slice(*Helper.find_date_string_bounds(key))
367
+ end
368
+ end
369
+
370
+ def pos(*args)
371
+ # to filled
372
+ out = self[*args]
373
+ return out if out.is_a? Numeric
374
+
375
+ out.map { |date| self[date] }
376
+ end
377
+
378
+ def subset(*args)
379
+ self[*args]
380
+ end
381
+
382
+ def valid?(*args)
383
+ self[*args]
384
+ true
385
+ rescue IndexError
386
+ false
387
+ end
388
+
389
+ # Retrive a slice of the index by specifying first and last members of the slice.
390
+ #
391
+ # @param [String, DateTime] first Start of the slice as a string or DateTime.
392
+ # @param [String, DateTime] last End of the slice as a string or DateTime.
393
+ def slice(first, last)
394
+ if first.is_a?(Integer) && last.is_a?(Integer)
395
+ DateTimeIndex.new(to_a[first..last], freq: @offset)
396
+ else
397
+ first = Helper.find_date_string_bounds(first)[0] if first.is_a?(String)
398
+ last = Helper.find_date_string_bounds(last)[1] if last.is_a?(String)
399
+
400
+ slice_between_dates first, last
401
+ end
402
+ end
403
+
404
+ # Return the DateTimeIndex as an Array of DateTime objects.
405
+ # @return [Array<DateTime>] Array of containing DateTimes.
406
+ def to_a
407
+ if @offset
408
+ @data
409
+ else
410
+ @data.sort_by(&:last)
411
+ end.transpose.first || []
412
+ end
413
+
414
+ # Size of index.
415
+ def size
416
+ @periods
417
+ end
418
+
419
+ def ==(other)
420
+ to_a == other.to_a
421
+ end
422
+
423
+ def inspect
424
+ meta = [@periods, @frequency ? "frequency=#{@frequency}" : nil].compact.join(', ')
425
+ return "#<#{self.class}(#{meta})>" if @data.empty?
426
+
427
+ "#<#{self.class}(#{meta}) " \
428
+ "#{@data.first[0]}...#{@data.last[0]}>"
429
+ end
430
+
431
+ # Shift all dates in the index by a positive number in the future. The dates
432
+ # are shifted by the same amount as that specified in the offset.
433
+ #
434
+ # @param [Integer, DaruLite::DateOffset, DaruLite::Offsets::*] distance Distance by
435
+ # which each date should be shifted. Passing an offset object to #shift
436
+ # will offset each data point by the offset value. Passing a positive
437
+ # integer will offset each data point by the same offset that it was
438
+ # created with.
439
+ # @return [DateTimeIndex] Returns a new, shifted DateTimeIndex object.
440
+ # @example Using the shift method
441
+ # index = DaruLite::DateTimeIndex.date_range(
442
+ # :start => '2012', :periods => 10, :freq => 'YEAR')
443
+ #
444
+ # # Passing a offset to shift
445
+ # index.shift(DaruLite::Offsets::Hour.new(3))
446
+ # #=>#<DateTimeIndex:84038960 offset=nil periods=10 data=[2012-01-01T03:00:00+00:00...2021-01-01T03:00:00+00:00]>
447
+ #
448
+ # # Pass an integer to shift
449
+ # index.shift(4)
450
+ # #=>#<DateTimeIndex:83979630 offset=YEAR periods=10 data=[2016-01-01T00:00:00+00:00...2025-01-01T00:00:00+00:00]>
451
+ def shift(distance)
452
+ distance.is_a?(Integer) && distance.negative? and
453
+ raise IndexError, "Distance #{distance} cannot be negative"
454
+
455
+ _shift(distance)
456
+ end
457
+
458
+ # Shift all dates in the index to the past. The dates are shifted by the same
459
+ # amount as that specified in the offset.
460
+ #
461
+ # @param [Integer, DaruLite::DateOffset, DaruLite::Offsets::*] distance Integer or
462
+ # DaruLite::DateOffset. Distance by which each date should be shifted. Passing
463
+ # an offset object to #lag will offset each data point by the offset value.
464
+ # Passing a positive integer will offset each data point by the same offset
465
+ # that it was created with.
466
+ # @return [DateTimeIndex] A new lagged DateTimeIndex object.
467
+ def lag(distance)
468
+ distance.is_a?(Integer) && distance.negative? and
469
+ raise IndexError, "Distance #{distance} cannot be negative"
470
+
471
+ _shift(-distance)
472
+ end
473
+
474
+ # :nocov:
475
+ def _dump(_depth)
476
+ Marshal.dump(data: to_a, freq: @offset)
477
+ end
478
+
479
+ def self._load(data)
480
+ h = Marshal.load data
481
+
482
+ DaruLite::DateTimeIndex.new(h[:data], freq: h[:freq])
483
+ end
484
+ # :nocov:
485
+
486
+ # @!method year
487
+ # @return [Array<Integer>] Array containing year of each index.
488
+ # @!method month
489
+ # @return [Array<Integer>] Array containing month of each index.
490
+ # @!method day
491
+ # @return [Array<Integer>] Array containing day of each index.
492
+ # @!method hour
493
+ # @return [Array<Integer>] Array containing hour of each index.
494
+ # @!method min
495
+ # @return [Array<Integer>] Array containing minutes of each index.
496
+ # @!method sec
497
+ # @return [Array<Integer>] Array containing seconds of each index.
498
+ %i[year month day hour min sec].each do |meth|
499
+ define_method(meth) do
500
+ each_with_object([]) do |d, arr|
501
+ arr << d.send(meth)
502
+ end
503
+ end
504
+ end
505
+
506
+ # Check if a date exists in the index. Will be inferred from string in case
507
+ # you pass a string. Recommened specifying the full date as a DateTime object.
508
+ def include?(date_time)
509
+ return false unless date_time.is_a?(String) || date_time.is_a?(DateTime)
510
+
511
+ if date_time.is_a?(String)
512
+ date_precision = Helper.determine_date_precision_of date_time
513
+ date_time = Helper.date_time_from date_time, date_precision
514
+ end
515
+
516
+ result, = @data.bsearch { |d| d[0] >= date_time }
517
+ result && result == date_time
518
+ end
519
+
520
+ # Return true if the DateTimeIndex is empty.
521
+ def empty?
522
+ @data.empty?
523
+ end
524
+
525
+ private
526
+
527
+ def get_by_range(first, last)
528
+ return slice(first, last) if first.is_a?(Integer) && last.is_a?(Integer)
529
+
530
+ raise ArgumentError, "Keys #{first} and #{last} are out of bounds" if
531
+ Helper.key_out_of_bounds?(first, @data) && Helper.key_out_of_bounds?(last, @data)
532
+
533
+ slice first, last
534
+ end
535
+
536
+ def slice_between_dates(first, last)
537
+ # about that ^ disable: I'm waiting for cleaner understanding
538
+ # of offsets logic. Reference: https://github.com/v0dro/daru/commit/7e1c34aec9516a9ba33037b4a1daaaaf1de0726a#diff-a95ef410a8e1f4ea3cc48d231bb880faR250
539
+ start = @data.bsearch { |d| d[0] >= first }
540
+ after_en = @data.bsearch { |d| d[0] > last }
541
+
542
+ if @offset
543
+ en = after_en ? @data[after_en[1] - 1] : @data.last
544
+ return start[1] if start == en
545
+
546
+ DateTimeIndex.date_range start: start[0], end: en[0], freq: @offset
547
+ else
548
+ st = @data.index(start)
549
+ en = after_en ? @data.index(after_en) - 1 : Helper.last_date(@data)[1]
550
+ return start[1] if st == en
551
+
552
+ DateTimeIndex.new(@data[st..en].transpose[0] || []) # empty slice guard
553
+ end
554
+ end
555
+
556
+ def _shift(distance)
557
+ if distance.is_a?(Integer)
558
+ raise IndexError, 'To lag non-freq date time index pass an offset.' unless @offset
559
+
560
+ start = @data[0][0]
561
+ off = distance.positive? ? @offset : -@offset
562
+ distance.abs.times { start = off + start }
563
+ DateTimeIndex.date_range(start: start, periods: @periods, freq: @offset)
564
+ else
565
+ DateTimeIndex.new(to_a.map { |e| distance + e }, freq: :infer)
566
+ end
567
+ end
568
+ end
569
+ end