TimeSeriesAnalyzer 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/TimeSeriesAnalyzer.rb +307 -3
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: afe5e55449bead7c4122c26277c7316ba1954904037c0e6bda307f4010243072
|
|
4
|
+
data.tar.gz: f019bf24421e06de735dc309393f3a2ebcc7ba7916171951b8e2d5a7176f401e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cf1832b2d79731f1b813e2978213068473a3c3f2991bff329d404a92e06837f97de0fc31355b1a0ecd96b16109e7e500df4df90944e091d1fc67a21782b3f672
|
|
7
|
+
data.tar.gz: 02acea803ec3c301ac9998c5e771a6b468493904a1a5b7a851dbfaca9795ec3a44e7f70689076f7652f721c345f3c2e2d30b9e03babde733ef66d813cd0e51e7
|
data/lib/TimeSeriesAnalyzer.rb
CHANGED
|
@@ -1,8 +1,312 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
-
|
|
2
|
+
require 'csv'
|
|
3
|
+
require 'rmagick'
|
|
4
|
+
require 'time'
|
|
5
|
+
require 'matrix'
|
|
3
6
|
require_relative "TimeSeriesAnalyzer/version"
|
|
4
7
|
|
|
5
8
|
module TimeSeriesAnalyzer
|
|
6
|
-
|
|
7
|
-
|
|
9
|
+
#Числовой ряд
|
|
10
|
+
class TimeSeries
|
|
11
|
+
attr_accessor :data, :timestamps
|
|
12
|
+
|
|
13
|
+
def initialize(data, timestamps, period = -1)
|
|
14
|
+
@data = data
|
|
15
|
+
@timestamps = timestamps
|
|
16
|
+
@period = period
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
#Чтение ряда из .csv файла
|
|
20
|
+
def self.load_from_csv(file_path, period = -1)
|
|
21
|
+
data = []
|
|
22
|
+
timestamps = []
|
|
23
|
+
CSV.foreach(file_path, headers: true) do |row|
|
|
24
|
+
timestamps << Time.parse(row['timestamp'])
|
|
25
|
+
data << row['value'].to_f
|
|
26
|
+
end
|
|
27
|
+
new(data, timestamps, period)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
#Функция для запуска визуализации ряда в .png файл, file_name - путь к нему
|
|
31
|
+
def plot(file_name = 'time_series.png', title = "")
|
|
32
|
+
draw_plot(@data, @timestamps, title, file_name)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
#Возращает числовой ряд, отображающий скользящее среднее исходного
|
|
36
|
+
def moving_average(window_size)
|
|
37
|
+
ma_data = @data.each_cons(window_size).map { |window| window.sum / window_size }
|
|
38
|
+
padding = Array.new(window_size - 1, nil)
|
|
39
|
+
TimeSeries.new(padding + ma_data, @timestamps)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
#Возращает числовой ряд, отображающий экспоненциальное сглаживание исходного
|
|
43
|
+
def exponential_smoothing(alpha)
|
|
44
|
+
smoothed_data = [@data.first]
|
|
45
|
+
@data.each_cons(2) do |_, current|
|
|
46
|
+
smoothed_data << alpha * current + (1 - alpha) * smoothed_data.last
|
|
47
|
+
end
|
|
48
|
+
TimeSeries.new(smoothed_data, @timestamps)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
#Разложение ряда на тренд, сезонную компоненту и остаток
|
|
52
|
+
def decompose
|
|
53
|
+
trend = TrendComponent.new(@data).fit
|
|
54
|
+
seasonal = SeasonalComponent.new(@data, @period).fit
|
|
55
|
+
residual = @data.zip(trend, seasonal).map { |d, t, s| d - t - s }
|
|
56
|
+
{
|
|
57
|
+
trend: TimeSeries.new(TrendComponent.new(@data).accurate_fit, @timestamps),
|
|
58
|
+
seasonal: TimeSeries.new(seasonal, @timestamps),
|
|
59
|
+
residual: TimeSeries.new(residual, @timestamps)
|
|
60
|
+
}
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
#Функция, возвращающая найденные аномалии в числовом ряду
|
|
64
|
+
def detect_anomalies
|
|
65
|
+
threshold = 3 * Math.sqrt(variance(@data))
|
|
66
|
+
mean = mean(@data)
|
|
67
|
+
@data.each_with_index.select do |value, _|
|
|
68
|
+
(value - mean).abs > threshold
|
|
69
|
+
end.map { |value, index| { timestamp: @timestamps[index], value: value } }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
#Функция, прогнозирующая следующие значения (прогноз на основе последующего изменения только остатков)
|
|
73
|
+
def forecast(steps)
|
|
74
|
+
trend = TrendComponent.new(@data).fit
|
|
75
|
+
seasonal = SeasonalComponent.new(@data, @period).fit
|
|
76
|
+
|
|
77
|
+
trend_seasonal = trend.zip(seasonal).map { |t, s| t + s.to_f }
|
|
78
|
+
residuals = @data.zip(trend_seasonal).map { |d, ts| d - ts }
|
|
79
|
+
|
|
80
|
+
forecast_residuals = forecast_arima(residuals, steps)
|
|
81
|
+
last_trend = trend.last
|
|
82
|
+
last_seasonal = seasonal.last
|
|
83
|
+
|
|
84
|
+
(1..steps).map do |step|
|
|
85
|
+
last_trend + last_seasonal + forecast_residuals[step - 1]
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
def forecast_arima(data, steps)
|
|
91
|
+
phi = mean(data.each_cons(2).map { |x, y| y / x.to_f })
|
|
92
|
+
|
|
93
|
+
last_value = data.last
|
|
94
|
+
(1..steps).map { |step| last_value * (phi**step) }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
#Визуализация ряда в .png файл
|
|
98
|
+
def draw_plot(data, timestamps, title, file_name)
|
|
99
|
+
width = 200 + timestamps.size * 50
|
|
100
|
+
height = 400
|
|
101
|
+
padding = 50
|
|
102
|
+
plot_area_width = width - 2 * padding
|
|
103
|
+
plot_area_height = height - 2 * padding
|
|
104
|
+
|
|
105
|
+
min_value = data.compact.min.to_f
|
|
106
|
+
max_value = data.compact.max.to_f
|
|
107
|
+
value_range = max_value - min_value
|
|
108
|
+
|
|
109
|
+
canvas = Magick::Image.new(width, height) {background_color = 'white' }
|
|
110
|
+
draw = Magick::Draw.new
|
|
111
|
+
|
|
112
|
+
# Draw title
|
|
113
|
+
draw.annotate(canvas, 0, 0, 0, padding / 2, title) { |options|
|
|
114
|
+
options.font_weight = Magick::BoldWeight
|
|
115
|
+
options.pointsize = 20
|
|
116
|
+
options.gravity = Magick::NorthGravity
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
# Draw axes
|
|
120
|
+
draw.line(padding, padding, padding, height - padding)
|
|
121
|
+
draw.line(padding, height - padding, width - padding, height - padding)
|
|
122
|
+
|
|
123
|
+
# Set line width
|
|
124
|
+
draw.stroke('#506DFF')
|
|
125
|
+
draw.stroke_linecap('round')
|
|
126
|
+
draw.stroke_width(4)
|
|
127
|
+
draw.stroke_linejoin('round')
|
|
128
|
+
|
|
129
|
+
# Draw data
|
|
130
|
+
data.each_with_index.each_cons(2) do |(value1, index1), (value2, index2)|
|
|
131
|
+
next if value1.nil? || value2.nil?
|
|
132
|
+
|
|
133
|
+
x1 = padding + index1 * plot_area_width / (data.size - 1)
|
|
134
|
+
y1 = height - padding - (value1 - min_value) * plot_area_height / value_range
|
|
135
|
+
x2 = padding + index2 * plot_area_width / (data.size - 1)
|
|
136
|
+
y2 = height - padding - (value2 - min_value) * plot_area_height / value_range
|
|
137
|
+
|
|
138
|
+
draw.line(x1, y1, x2, y2)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Draw labels on Y axis
|
|
142
|
+
count_y = 10
|
|
143
|
+
label_interval = value_range / count_y
|
|
144
|
+
(0..count_y).each do |i|
|
|
145
|
+
value = (min_value + (count_y - i) * label_interval)
|
|
146
|
+
y = i * plot_area_height / count_y - plot_area_height / 2
|
|
147
|
+
draw.annotate(canvas, 0, 0, width - padding + 10, y, sprintf('%.2f', value)) { |options|
|
|
148
|
+
options.gravity = Magick::EastGravity
|
|
149
|
+
options.pointsize = plot_area_height / 33.3
|
|
150
|
+
}
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Draw labels
|
|
154
|
+
timestamps.each_with_index do |timestamp, index|
|
|
155
|
+
x = index * plot_area_width / (timestamps.size == 1 ? 1 : timestamps.size - 1) - plot_area_width / 2
|
|
156
|
+
draw.annotate(canvas, 0, 0, x, height - padding / 2, timestamp.strftime('%Y-%m-%d')) { |options|
|
|
157
|
+
options.gravity = Magick::NorthGravity
|
|
158
|
+
options.pointsize = 10 > plot_area_width / 144.4 ? 10 : plot_area_width / 144.4
|
|
159
|
+
}
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
draw.draw(canvas)
|
|
163
|
+
canvas.write(file_name)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
class TrendComponent
|
|
167
|
+
def initialize(data)
|
|
168
|
+
@data = data
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def fit
|
|
172
|
+
# Пример использования
|
|
173
|
+
x = (0...@data.size).to_a # Массив значений x (0, 1, 2, ...)
|
|
174
|
+
y = @data # Массив значений y (данные)
|
|
175
|
+
|
|
176
|
+
max_degree = x.size >= 8 ? 8 : x.size - 1
|
|
177
|
+
best_degree = best_polynomial_degree(x, y, max_degree)# Степень полинома
|
|
178
|
+
|
|
179
|
+
# Получаем коэффициенты полинома
|
|
180
|
+
coefficients = polynomial_coefficients(x, y, best_degree)
|
|
181
|
+
|
|
182
|
+
# Вычисляем значения полиномиального тренда
|
|
183
|
+
polynomial_trend(x, coefficients)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def accurate_fit
|
|
187
|
+
# Пример использования
|
|
188
|
+
x = (0...@data.size).to_a # Массив значений x (0, 1, 2, ...)
|
|
189
|
+
y = @data # Массив значений y (данные)
|
|
190
|
+
|
|
191
|
+
max_degree = x.size >= 8 ? 8 : x.size - 1
|
|
192
|
+
best_degree = best_polynomial_degree(x, y, max_degree)# Степень полинома
|
|
193
|
+
|
|
194
|
+
# Получаем коэффициенты полинома
|
|
195
|
+
coefficients = polynomial_coefficients(x, y, best_degree)
|
|
196
|
+
|
|
197
|
+
accurate_x = (0...@data.size*10-9).map { |xx| xx.to_f/10 }
|
|
198
|
+
# Вычисляем значения полиномиального тренда
|
|
199
|
+
polynomial_trend(accurate_x, coefficients)
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
private
|
|
203
|
+
|
|
204
|
+
# Функция для вычисления RMSE
|
|
205
|
+
def rmse(y_true, y_pred)
|
|
206
|
+
Math.sqrt(y_true.zip(y_pred).map { |y_t, y_p| (y_t - y_p)**2 }.sum / y_true.size)
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Функция для определения лучшей степени полинома
|
|
210
|
+
def best_polynomial_degree(x, y, max_degree)
|
|
211
|
+
best_degree = 0
|
|
212
|
+
best_rmse = Float::INFINITY
|
|
213
|
+
|
|
214
|
+
(1..max_degree).each do |degree|
|
|
215
|
+
coefficients = polynomial_coefficients(x, y, degree)
|
|
216
|
+
trend = polynomial_trend(x, coefficients)
|
|
217
|
+
current_rmse = rmse(y, trend)
|
|
218
|
+
|
|
219
|
+
if current_rmse < best_rmse
|
|
220
|
+
best_rmse = current_rmse
|
|
221
|
+
best_degree = degree
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
best_degree
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
def polynomial_coefficients(x, y, degree)
|
|
229
|
+
n = x.size
|
|
230
|
+
x_data = Array.new(n) { Array.new(degree + 1, 0.0) }
|
|
231
|
+
|
|
232
|
+
# Заполняем матрицу значениями x, x^2, x^3 и т.д.
|
|
233
|
+
(0...n).each do |i|
|
|
234
|
+
(0..degree).each do |j|
|
|
235
|
+
x_data[i][j] = x[i]**j
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
x_matrix = Matrix[*x_data]
|
|
240
|
+
y_matrix = Matrix.column_vector(y)
|
|
241
|
+
|
|
242
|
+
# Оцениваем коэффициенты полинома
|
|
243
|
+
((x_matrix.t * x_matrix).inverse * x_matrix.t * y_matrix).transpose.to_a[0]
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Функция для вычисления значений полиномиального тренда
|
|
247
|
+
def polynomial_trend(x, coefficients)
|
|
248
|
+
trend = Array.new(x.size, 0.0)
|
|
249
|
+
x.each_with_index do |x_val, index|
|
|
250
|
+
coefficients.each_with_index do |coeff, i|
|
|
251
|
+
trend[index] += coeff * (x_val**i)
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
trend
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
class SeasonalComponent
|
|
259
|
+
def initialize(data, period = -1)
|
|
260
|
+
@data = data
|
|
261
|
+
@period = period == -1 ? detect_period : period
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def fit
|
|
265
|
+
if @period == 1
|
|
266
|
+
return @data.map {|| 0.0}
|
|
267
|
+
end
|
|
268
|
+
period_means = Array.new(@period) { |i| mean(@data.each_slice(@period).map { |slice| slice[i] }.compact) }
|
|
269
|
+
@data.each_with_index.map { |_, index| period_means[index % @period] }
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
private
|
|
273
|
+
|
|
274
|
+
def detect_period
|
|
275
|
+
max_lag = @data.size / 2 # Максимальное значение лага
|
|
276
|
+
autocorrelation = (1..max_lag).map { |lag| calculate_autocorrelation(lag) }
|
|
277
|
+
|
|
278
|
+
# Определяем период сезонности
|
|
279
|
+
autocorrelation.index(autocorrelation.max) + 1
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
private
|
|
283
|
+
|
|
284
|
+
def calculate_autocorrelation(lag)
|
|
285
|
+
mean = mean(@data)
|
|
286
|
+
n = @data.size
|
|
287
|
+
|
|
288
|
+
numerator = (0...n - lag).map { |i| (@data[i] - mean) * (@data[i + lag] - mean) }.sum
|
|
289
|
+
denominator = (0...n).map { |i| (@data[i] - mean)**2 }.sum
|
|
290
|
+
|
|
291
|
+
numerator / denominator
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
#Среднее арифметическое значений в массиве
|
|
295
|
+
def mean(arr)
|
|
296
|
+
arr.sum.to_f / arr.size
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
#Среднее арифметическое значений в массиве
|
|
302
|
+
def mean(arr)
|
|
303
|
+
arr.sum.to_f / arr.size
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
#Вычисление дисперсии значений в массиве
|
|
307
|
+
def variance(arr)
|
|
308
|
+
m = mean(arr)
|
|
309
|
+
arr.map { |v| (v - m)**2 }.sum / (arr.size - 1)
|
|
310
|
+
end
|
|
311
|
+
end
|
|
8
312
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: TimeSeriesAnalyzer
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0
|
|
4
|
+
version: 0.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- KuryataDanil
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2024-06-
|
|
11
|
+
date: 2024-06-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|