TimeSeriesAnalyzer 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/TimeSeriesAnalyzer.rb +307 -3
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 445a8aee4cb146c0e1d35db4e76ab8cc9f9aea6fd99126d0b23efa4e8071e44f
4
- data.tar.gz: a36c59cf65e3897efc8b35d1cf4ff0c9b615fe086aa15d3038b84b5857cde430
3
+ metadata.gz: afe5e55449bead7c4122c26277c7316ba1954904037c0e6bda307f4010243072
4
+ data.tar.gz: f019bf24421e06de735dc309393f3a2ebcc7ba7916171951b8e2d5a7176f401e
5
5
  SHA512:
6
- metadata.gz: 96de7319cbdf8cdac27613e2378bb1d0cdac56ba433970e71b738d5240689059e5ac8d03f8576cdf4747e79f56b6d46e55578cee2e5e5de011c76315cce737ab
7
- data.tar.gz: 5f026244fc2c2ac060ebafd512eacf9e9154ce9f48ee4723657c747a9fb521a3cdf72c289eff3dbfa6218f78796e238eee12c50d18421a76f812c5964a3d65aa
6
+ metadata.gz: cf1832b2d79731f1b813e2978213068473a3c3f2991bff329d404a92e06837f97de0fc31355b1a0ecd96b16109e7e500df4df90944e091d1fc67a21782b3f672
7
+ data.tar.gz: 02acea803ec3c301ac9998c5e771a6b468493904a1a5b7a851dbfaca9795ec3a44e7f70689076f7652f721c345f3c2e2d30b9e03babde733ef66d813cd0e51e7
@@ -1,8 +1,312 @@
1
1
  # frozen_string_literal: true
2
-
2
+ require 'csv'
3
+ require 'rmagick'
4
+ require 'time'
5
+ require 'matrix'
3
6
  require_relative "TimeSeriesAnalyzer/version"
4
7
 
5
8
  module TimeSeriesAnalyzer
6
- class Error < StandardError; end
7
- # Your code goes here...
9
+ #Числовой ряд
10
+ class TimeSeries
11
+ attr_accessor :data, :timestamps
12
+
13
+ def initialize(data, timestamps, period = -1)
14
+ @data = data
15
+ @timestamps = timestamps
16
+ @period = period
17
+ end
18
+
19
+ #Чтение ряда из .csv файла
20
+ def self.load_from_csv(file_path, period = -1)
21
+ data = []
22
+ timestamps = []
23
+ CSV.foreach(file_path, headers: true) do |row|
24
+ timestamps << Time.parse(row['timestamp'])
25
+ data << row['value'].to_f
26
+ end
27
+ new(data, timestamps, period)
28
+ end
29
+
30
+ #Функция для запуска визуализации ряда в .png файл, file_name - путь к нему
31
+ def plot(file_name = 'time_series.png', title = "")
32
+ draw_plot(@data, @timestamps, title, file_name)
33
+ end
34
+
35
+ #Возращает числовой ряд, отображающий скользящее среднее исходного
36
+ def moving_average(window_size)
37
+ ma_data = @data.each_cons(window_size).map { |window| window.sum / window_size }
38
+ padding = Array.new(window_size - 1, nil)
39
+ TimeSeries.new(padding + ma_data, @timestamps)
40
+ end
41
+
42
+ #Возращает числовой ряд, отображающий экспоненциальное сглаживание исходного
43
+ def exponential_smoothing(alpha)
44
+ smoothed_data = [@data.first]
45
+ @data.each_cons(2) do |_, current|
46
+ smoothed_data << alpha * current + (1 - alpha) * smoothed_data.last
47
+ end
48
+ TimeSeries.new(smoothed_data, @timestamps)
49
+ end
50
+
51
+ #Разложение ряда на тренд, сезонную компоненту и остаток
52
+ def decompose
53
+ trend = TrendComponent.new(@data).fit
54
+ seasonal = SeasonalComponent.new(@data, @period).fit
55
+ residual = @data.zip(trend, seasonal).map { |d, t, s| d - t - s }
56
+ {
57
+ trend: TimeSeries.new(TrendComponent.new(@data).accurate_fit, @timestamps),
58
+ seasonal: TimeSeries.new(seasonal, @timestamps),
59
+ residual: TimeSeries.new(residual, @timestamps)
60
+ }
61
+ end
62
+
63
+ #Функция, возвращающая найденные аномалии в числовом ряду
64
+ def detect_anomalies
65
+ threshold = 3 * Math.sqrt(variance(@data))
66
+ mean = mean(@data)
67
+ @data.each_with_index.select do |value, _|
68
+ (value - mean).abs > threshold
69
+ end.map { |value, index| { timestamp: @timestamps[index], value: value } }
70
+ end
71
+
72
+ #Функция, прогнозирующая следующие значения (прогноз на основе последующего изменения только остатков)
73
+ def forecast(steps)
74
+ trend = TrendComponent.new(@data).fit
75
+ seasonal = SeasonalComponent.new(@data, @period).fit
76
+
77
+ trend_seasonal = trend.zip(seasonal).map { |t, s| t + s.to_f }
78
+ residuals = @data.zip(trend_seasonal).map { |d, ts| d - ts }
79
+
80
+ forecast_residuals = forecast_arima(residuals, steps)
81
+ last_trend = trend.last
82
+ last_seasonal = seasonal.last
83
+
84
+ (1..steps).map do |step|
85
+ last_trend + last_seasonal + forecast_residuals[step - 1]
86
+ end
87
+ end
88
+
89
+ private
90
+ def forecast_arima(data, steps)
91
+ phi = mean(data.each_cons(2).map { |x, y| y / x.to_f })
92
+
93
+ last_value = data.last
94
+ (1..steps).map { |step| last_value * (phi**step) }
95
+ end
96
+
97
+ #Визуализация ряда в .png файл
98
+ def draw_plot(data, timestamps, title, file_name)
99
+ width = 200 + timestamps.size * 50
100
+ height = 400
101
+ padding = 50
102
+ plot_area_width = width - 2 * padding
103
+ plot_area_height = height - 2 * padding
104
+
105
+ min_value = data.compact.min.to_f
106
+ max_value = data.compact.max.to_f
107
+ value_range = max_value - min_value
108
+
109
+ canvas = Magick::Image.new(width, height) {background_color = 'white' }
110
+ draw = Magick::Draw.new
111
+
112
+ # Draw title
113
+ draw.annotate(canvas, 0, 0, 0, padding / 2, title) { |options|
114
+ options.font_weight = Magick::BoldWeight
115
+ options.pointsize = 20
116
+ options.gravity = Magick::NorthGravity
117
+ }
118
+
119
+ # Draw axes
120
+ draw.line(padding, padding, padding, height - padding)
121
+ draw.line(padding, height - padding, width - padding, height - padding)
122
+
123
+ # Set line width
124
+ draw.stroke('#506DFF')
125
+ draw.stroke_linecap('round')
126
+ draw.stroke_width(4)
127
+ draw.stroke_linejoin('round')
128
+
129
+ # Draw data
130
+ data.each_with_index.each_cons(2) do |(value1, index1), (value2, index2)|
131
+ next if value1.nil? || value2.nil?
132
+
133
+ x1 = padding + index1 * plot_area_width / (data.size - 1)
134
+ y1 = height - padding - (value1 - min_value) * plot_area_height / value_range
135
+ x2 = padding + index2 * plot_area_width / (data.size - 1)
136
+ y2 = height - padding - (value2 - min_value) * plot_area_height / value_range
137
+
138
+ draw.line(x1, y1, x2, y2)
139
+ end
140
+
141
+ # Draw labels on Y axis
142
+ count_y = 10
143
+ label_interval = value_range / count_y
144
+ (0..count_y).each do |i|
145
+ value = (min_value + (count_y - i) * label_interval)
146
+ y = i * plot_area_height / count_y - plot_area_height / 2
147
+ draw.annotate(canvas, 0, 0, width - padding + 10, y, sprintf('%.2f', value)) { |options|
148
+ options.gravity = Magick::EastGravity
149
+ options.pointsize = plot_area_height / 33.3
150
+ }
151
+ end
152
+
153
+ # Draw labels
154
+ timestamps.each_with_index do |timestamp, index|
155
+ x = index * plot_area_width / (timestamps.size == 1 ? 1 : timestamps.size - 1) - plot_area_width / 2
156
+ draw.annotate(canvas, 0, 0, x, height - padding / 2, timestamp.strftime('%Y-%m-%d')) { |options|
157
+ options.gravity = Magick::NorthGravity
158
+ options.pointsize = 10 > plot_area_width / 144.4 ? 10 : plot_area_width / 144.4
159
+ }
160
+ end
161
+
162
+ draw.draw(canvas)
163
+ canvas.write(file_name)
164
+ end
165
+
166
+ class TrendComponent
167
+ def initialize(data)
168
+ @data = data
169
+ end
170
+
171
+ def fit
172
+ # Пример использования
173
+ x = (0...@data.size).to_a # Массив значений x (0, 1, 2, ...)
174
+ y = @data # Массив значений y (данные)
175
+
176
+ max_degree = x.size >= 8 ? 8 : x.size - 1
177
+ best_degree = best_polynomial_degree(x, y, max_degree)# Степень полинома
178
+
179
+ # Получаем коэффициенты полинома
180
+ coefficients = polynomial_coefficients(x, y, best_degree)
181
+
182
+ # Вычисляем значения полиномиального тренда
183
+ polynomial_trend(x, coefficients)
184
+ end
185
+
186
+ def accurate_fit
187
+ # Пример использования
188
+ x = (0...@data.size).to_a # Массив значений x (0, 1, 2, ...)
189
+ y = @data # Массив значений y (данные)
190
+
191
+ max_degree = x.size >= 8 ? 8 : x.size - 1
192
+ best_degree = best_polynomial_degree(x, y, max_degree)# Степень полинома
193
+
194
+ # Получаем коэффициенты полинома
195
+ coefficients = polynomial_coefficients(x, y, best_degree)
196
+
197
+ accurate_x = (0...@data.size*10-9).map { |xx| xx.to_f/10 }
198
+ # Вычисляем значения полиномиального тренда
199
+ polynomial_trend(accurate_x, coefficients)
200
+ end
201
+
202
+ private
203
+
204
+ # Функция для вычисления RMSE
205
+ def rmse(y_true, y_pred)
206
+ Math.sqrt(y_true.zip(y_pred).map { |y_t, y_p| (y_t - y_p)**2 }.sum / y_true.size)
207
+ end
208
+
209
+ # Функция для определения лучшей степени полинома
210
+ def best_polynomial_degree(x, y, max_degree)
211
+ best_degree = 0
212
+ best_rmse = Float::INFINITY
213
+
214
+ (1..max_degree).each do |degree|
215
+ coefficients = polynomial_coefficients(x, y, degree)
216
+ trend = polynomial_trend(x, coefficients)
217
+ current_rmse = rmse(y, trend)
218
+
219
+ if current_rmse < best_rmse
220
+ best_rmse = current_rmse
221
+ best_degree = degree
222
+ end
223
+ end
224
+
225
+ best_degree
226
+ end
227
+
228
+ def polynomial_coefficients(x, y, degree)
229
+ n = x.size
230
+ x_data = Array.new(n) { Array.new(degree + 1, 0.0) }
231
+
232
+ # Заполняем матрицу значениями x, x^2, x^3 и т.д.
233
+ (0...n).each do |i|
234
+ (0..degree).each do |j|
235
+ x_data[i][j] = x[i]**j
236
+ end
237
+ end
238
+
239
+ x_matrix = Matrix[*x_data]
240
+ y_matrix = Matrix.column_vector(y)
241
+
242
+ # Оцениваем коэффициенты полинома
243
+ ((x_matrix.t * x_matrix).inverse * x_matrix.t * y_matrix).transpose.to_a[0]
244
+ end
245
+
246
+ # Функция для вычисления значений полиномиального тренда
247
+ def polynomial_trend(x, coefficients)
248
+ trend = Array.new(x.size, 0.0)
249
+ x.each_with_index do |x_val, index|
250
+ coefficients.each_with_index do |coeff, i|
251
+ trend[index] += coeff * (x_val**i)
252
+ end
253
+ end
254
+ trend
255
+ end
256
+ end
257
+
258
+ class SeasonalComponent
259
+ def initialize(data, period = -1)
260
+ @data = data
261
+ @period = period == -1 ? detect_period : period
262
+ end
263
+
264
+ def fit
265
+ if @period == 1
266
+ return @data.map {|| 0.0}
267
+ end
268
+ period_means = Array.new(@period) { |i| mean(@data.each_slice(@period).map { |slice| slice[i] }.compact) }
269
+ @data.each_with_index.map { |_, index| period_means[index % @period] }
270
+ end
271
+
272
+ private
273
+
274
+ def detect_period
275
+ max_lag = @data.size / 2 # Максимальное значение лага
276
+ autocorrelation = (1..max_lag).map { |lag| calculate_autocorrelation(lag) }
277
+
278
+ # Определяем период сезонности
279
+ autocorrelation.index(autocorrelation.max) + 1
280
+ end
281
+
282
+ private
283
+
284
+ def calculate_autocorrelation(lag)
285
+ mean = mean(@data)
286
+ n = @data.size
287
+
288
+ numerator = (0...n - lag).map { |i| (@data[i] - mean) * (@data[i + lag] - mean) }.sum
289
+ denominator = (0...n).map { |i| (@data[i] - mean)**2 }.sum
290
+
291
+ numerator / denominator
292
+ end
293
+
294
+ #Среднее арифметическое значений в массиве
295
+ def mean(arr)
296
+ arr.sum.to_f / arr.size
297
+ end
298
+ end
299
+
300
+
301
+ #Среднее арифметическое значений в массиве
302
+ def mean(arr)
303
+ arr.sum.to_f / arr.size
304
+ end
305
+
306
+ #Вычисление дисперсии значений в массиве
307
+ def variance(arr)
308
+ m = mean(arr)
309
+ arr.map { |v| (v - m)**2 }.sum / (arr.size - 1)
310
+ end
311
+ end
8
312
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: TimeSeriesAnalyzer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - KuryataDanil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-03 00:00:00.000000000 Z
11
+ date: 2024-06-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec