yfinrb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rubocop.yml +13 -0
- data/CHANGELOG.rst +0 -0
- data/CODE_OF_CONDUCT.md +15 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +144 -0
- data/Rakefile +8 -0
- data/lib/yfinrb/analysis.rb +68 -0
- data/lib/yfinrb/financials.rb +302 -0
- data/lib/yfinrb/fundamentals.rb +54 -0
- data/lib/yfinrb/holders.rb +260 -0
- data/lib/yfinrb/multi.rb +238 -0
- data/lib/yfinrb/price_history.rb +2037 -0
- data/lib/yfinrb/quote.rb +342 -0
- data/lib/yfinrb/ticker.rb +381 -0
- data/lib/yfinrb/tickers.rb +52 -0
- data/lib/yfinrb/utils.rb +359 -0
- data/lib/yfinrb/version.rb +5 -0
- data/lib/yfinrb/yf_connection.rb +300 -0
- data/lib/yfinrb/yfinance_exception.rb +16 -0
- data/lib/yfinrb.rb +17 -0
- data/sig/yfinrb.rbs +4 -0
- metadata +124 -0
@@ -0,0 +1,2037 @@
|
|
1
|
+
|
2
|
+
class Yfin
|
3
|
+
module PriceHistory
|
4
|
+
extend ActiveSupport::Concern
|
5
|
+
include ActionView::Helpers::NumberHelper
|
6
|
+
|
7
|
+
PRICE_COLNAMES = ['Open', 'High', 'Low', 'Close', 'Adj Close']
|
8
|
+
BASE_URL = 'https://query2.finance.yahoo.com'
|
9
|
+
|
10
|
+
# attr_accessor :ticker
|
11
|
+
|
12
|
+
def self.included(base) # built-in Ruby hook for modules
|
13
|
+
base.class_eval do
|
14
|
+
original_method = instance_method(:initialize)
|
15
|
+
define_method(:initialize) do |*args, &block|
|
16
|
+
original_method.bind(self).call(*args, &block)
|
17
|
+
initialize_price_history # (your module code here)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize_price_history #(ticker)
|
23
|
+
# ticker = ticker
|
24
|
+
|
25
|
+
@history = nil
|
26
|
+
@history_metadata = nil
|
27
|
+
@history_metadata_formatted = false
|
28
|
+
@reconstruct_start_interval = nil
|
29
|
+
|
30
|
+
yfconn_initialize
|
31
|
+
end
|
32
|
+
|
33
|
+
def history(period: "1mo", interval: "1d", start: nil, fin: nil, prepost: false,
|
34
|
+
actions: true, auto_adjust: true, back_adjust: false, repair: false, keepna: false,
|
35
|
+
rounding: false, raise_errors: false, returns: false)
|
36
|
+
logger = Rails.logger # Yfin.get_yf_logger
|
37
|
+
start_user = start
|
38
|
+
end_user = fin || DateTime.now
|
39
|
+
|
40
|
+
params = _preprocess_params(start, fin, interval, period, prepost, raise_errors)
|
41
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} params=#{params.inspect}" }
|
42
|
+
|
43
|
+
params_pretty = params.dup
|
44
|
+
|
45
|
+
["period1", "period2"].each do |k|
|
46
|
+
params_pretty[k] = DateTime.strptime(params[k].to_s, '%s').new_offset(0).to_time.strftime('%Y-%m-%d %H:%M:%S %z') if params_pretty.key?(k)
|
47
|
+
end
|
48
|
+
|
49
|
+
data = _get_data(ticker, params, fin, raise_errors)
|
50
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result].first.keys = #{data['chart']['result'].first.keys.inspect}" }
|
51
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result].first[events] = #{data['chart']['result'].first['events'].inspect}" }
|
52
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result].first[events][dividends] = #{data['chart']['result'].first['events']['dividends'].inspect}" }
|
53
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result].first[events][splits] = #{data['chart']['result'].first['events']['splits'].inspect}" }
|
54
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
55
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
56
|
+
|
57
|
+
@history_metadata = data["chart"]["result"][0]["meta"] rescue {}
|
58
|
+
@history = data["chart"]["result"][0]
|
59
|
+
|
60
|
+
intraday = params["interval"][-1] == "m" || params["interval"][-1] == "h"
|
61
|
+
|
62
|
+
err_msg = _get_err_msg(params['period1'], period, start, params['period2'], fin, params['interval'], params['intraday'])
|
63
|
+
# err_msg = _get_err_msg(start, period, start_user, fin, end_user, interval, intraday)
|
64
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} err_msg = #{err_msg}" }
|
65
|
+
|
66
|
+
f = _did_it_fail(data, period, @history_metadata)
|
67
|
+
failed = f[:fail]
|
68
|
+
err_msg = f[:msg]
|
69
|
+
|
70
|
+
if failed
|
71
|
+
if raise_errors
|
72
|
+
raise Exception.new("#{ticker}: #{err_msg}")
|
73
|
+
else
|
74
|
+
logger.error("#{ticker}: #{err_msg}")
|
75
|
+
end
|
76
|
+
if @reconstruct_start_interval && @reconstruct_start_interval == interval
|
77
|
+
@reconstruct_start_interval = nil
|
78
|
+
end
|
79
|
+
return Utils.empty_df
|
80
|
+
end
|
81
|
+
|
82
|
+
# begin
|
83
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result][0] = #{data["chart"]["result"][0].inspect}" }
|
84
|
+
quotes = _parse_quotes(data["chart"]["result"][0], interval)
|
85
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
86
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
87
|
+
|
88
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} quotes=#{quotes.inspect}" }
|
89
|
+
# if fin && !quotes.empty?
|
90
|
+
# endDt = fin.to_datetime.to_i # DateTime.strptime(fin.to_s, '%s').new_offset(0)
|
91
|
+
# if quotes.index[quotes.shape[0] - 1] >= endDt
|
92
|
+
# quotes = quotes[0..quotes.shape[0] - 2]
|
93
|
+
# end
|
94
|
+
# end
|
95
|
+
|
96
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here" }
|
97
|
+
# rescue Exception
|
98
|
+
# if raise_errors
|
99
|
+
# raise Exception.new("#{ticker}: #{err_msg}")
|
100
|
+
# else
|
101
|
+
# logger.error("#{ticker}: #{err_msg}")
|
102
|
+
# end
|
103
|
+
# if @reconstruct_start_interval && @reconstruct_start_interval == interval
|
104
|
+
# @reconstruct_start_interval = nil
|
105
|
+
# end
|
106
|
+
# return nil
|
107
|
+
# end
|
108
|
+
|
109
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here" }
|
110
|
+
quote_type = @history_metadata["instrumentType"]
|
111
|
+
expect_capital_gains = quote_type == 'MUTUALFUND' || quote_type == 'ETF'
|
112
|
+
tz_exchange = @history_metadata["exchangeTimezoneName"]
|
113
|
+
|
114
|
+
quotes = _set_df_tz(quotes, params["interval"], tz_exchange)
|
115
|
+
quotes = _fix_yahoo_dst_issue(quotes, params["interval"])
|
116
|
+
quotes = _fix_yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
|
117
|
+
|
118
|
+
intraday = params["interval"][-1] == "m" || params["interval"][-1] == "h"
|
119
|
+
|
120
|
+
if !prepost && intraday && @history_metadata.key?("tradingPeriods")
|
121
|
+
tps = @history_metadata["tradingPeriods"]
|
122
|
+
if !tps.is_a?(Polars::DataFrame)
|
123
|
+
@history_metadata = _format_history_metadata(@history_metadata, tradingPeriodsOnly: true)
|
124
|
+
tps = @history_metadata["tradingPeriods"]
|
125
|
+
end
|
126
|
+
quotes = _fix_yahoo_returning_prepost_unrequested(quotes, params["interval"], tps)
|
127
|
+
end
|
128
|
+
|
129
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} quotes = #{quotes.inspect}" }
|
130
|
+
df = _get_stock_data(quotes, params, fin)
|
131
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
132
|
+
|
133
|
+
if repair
|
134
|
+
# df = _fix_unit_mixups(df, interval, tz_exchange, prepost)
|
135
|
+
# df = _fix_bad_stock_split(df, interval, tz_exchange)
|
136
|
+
# df = _fix_zeroes(df, interval, tz_exchange, prepost)
|
137
|
+
# df = _fix_missing_div_adjust(df, interval, tz_exchange)
|
138
|
+
# df = df.sort_index
|
139
|
+
end
|
140
|
+
|
141
|
+
if auto_adjust
|
142
|
+
# df = _auto_adjust(df)
|
143
|
+
elsif back_adjust
|
144
|
+
# df = _back_adjust(df)
|
145
|
+
end
|
146
|
+
|
147
|
+
if rounding
|
148
|
+
# df = df.round(data["chart"]["result"][0]["meta"]["priceHint"])
|
149
|
+
end
|
150
|
+
|
151
|
+
df["Volume"] = df["Volume"].fill_nan(0) #.astype(Integer)
|
152
|
+
|
153
|
+
# df.index.name = intraday ? "Datetime" : "Date"
|
154
|
+
# [0..df['Timestamps'].length-2].each{|i| df['Timestamps'][i] = df['Timestamps'][i].round("1d") } unless intraday
|
155
|
+
unless intraday
|
156
|
+
s = Polars::Series.new(df['Timestamps']).to_a
|
157
|
+
df['Timestamps'] = (0..s.length-1).to_a.map{|i| Time.at(s[i]).to_date }
|
158
|
+
end
|
159
|
+
|
160
|
+
@history = df.dup
|
161
|
+
|
162
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} actions = #{actions}" }
|
163
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
164
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
165
|
+
df = df.drop(["Dividends", "Stock Splits", "Capital Gains"], errors: 'ignore') unless actions
|
166
|
+
|
167
|
+
if !keepna
|
168
|
+
# price_colnames = ['Open', 'High', 'Low', 'Close', 'Adj Close']
|
169
|
+
# data_colnames = price_colnames + ['Volume'] + ['Dividends', 'Stock Splits', 'Capital Gains']
|
170
|
+
# data_colnames = data_colnames.select { |c| df.columns.include?(c) }
|
171
|
+
# mask_nan_or_zero = (df[data_colnames].isnan? | (df[data_colnames] == 0)).all(axis: 1)
|
172
|
+
# df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero])
|
173
|
+
end
|
174
|
+
|
175
|
+
# logger.debug("#{ticker}: yfinance returning OHLC: #{df.index[0]} -> #{df.index[-1]}")
|
176
|
+
|
177
|
+
@reconstruct_start_interval = nil if @reconstruct_start_interval && @reconstruct_start_interval == interval
|
178
|
+
|
179
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
180
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df.rows = #{df.rows}" }
|
181
|
+
if returns && df.shape.first > 1
|
182
|
+
df['Returns'] = [Float::NAN] + (1..df.length-1).to_a.map {|i| (df['Close'][i]-df['Close'][i-1])/df['Close'][i-1] }
|
183
|
+
end
|
184
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
185
|
+
|
186
|
+
return df
|
187
|
+
end
|
188
|
+
|
189
|
+
|
190
|
+
def history_metadata
|
191
|
+
history(period: "1wk", interval: "1h", prepost: true) if @history_metadata.nil?
|
192
|
+
|
193
|
+
if !@history_metadata_formatted
|
194
|
+
@history_metadata = _format_history_metadata(@history_metadata)
|
195
|
+
@history_metadata_formatted = true
|
196
|
+
end
|
197
|
+
return @history_metadata
|
198
|
+
end
|
199
|
+
|
200
|
+
def exchange
|
201
|
+
return @exchange ||= _get_exchange_metadata["exchangeName"]
|
202
|
+
end
|
203
|
+
|
204
|
+
def timezone
|
205
|
+
return @timezone ||= _get_exchange_metadata["exchangeTimezoneName"]
|
206
|
+
end
|
207
|
+
|
208
|
+
def dividends
|
209
|
+
history(period: "max") if @history.nil?
|
210
|
+
|
211
|
+
if !@history.nil? # && @history['events'].keys.include?("dividends")
|
212
|
+
df = @history.dup.drop('Open','High','Low','Close','Adj Close', 'Volume','Stock Splits','Capital Gains')
|
213
|
+
return df.filter(Polars.col('Dividends')>0.0)
|
214
|
+
# divi = []
|
215
|
+
# @history['events']["dividends"].each_pair {|k,v| divi << { Timestamps: Time.at(k.to_i).utc.to_date, Value: v['amount']} }
|
216
|
+
# return Polars::DataFrame.new( divi )
|
217
|
+
end
|
218
|
+
return Polars::Series.new
|
219
|
+
end
|
220
|
+
|
221
|
+
def capital_gains
|
222
|
+
history(period: "max") if @history.nil?
|
223
|
+
|
224
|
+
if !@history.nil? # && @history['events'].keys.include?("capital gains")
|
225
|
+
# caga = []
|
226
|
+
# @history['events']['capital gains'].each_pair {|k,v| caga << { Timestamps: Time.at(k).utc.to_date, Value: v['amount']} }
|
227
|
+
# capital_gains = @history["Capital Gains"]
|
228
|
+
# return capital_gains[capital_gains != 0]
|
229
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
230
|
+
df = @history.dup.drop('Open','High','Low','Close','Adj Close', 'Volume','Stock Splits', 'Dividends')
|
231
|
+
return df.filter(Polars.col('Capital Gains')>0.0)
|
232
|
+
end
|
233
|
+
return Polars::Series.new
|
234
|
+
end
|
235
|
+
|
236
|
+
def splits
|
237
|
+
history(period: "max") if @history.nil?
|
238
|
+
|
239
|
+
if !@history.nil? #&& @history['events'].keys.include?("stock splits") # @history.columns.include?("Stock Splits")
|
240
|
+
# stspl = []
|
241
|
+
# @history['events']['stock splits'].each_pair {|k,v| stspl << { Timestamps: Time.at(k.to_i).utc.to_date, Ratio: v['numerator'].to_f/v['denominator'].to_f } }
|
242
|
+
|
243
|
+
# splits = @history["Stock Splits"]
|
244
|
+
# return splits[splits != 0]
|
245
|
+
df = @history.dup.drop('Open','High','Low','Close','Adj Close', 'Volume','Capital Gains','Dividends')
|
246
|
+
return df.filter(Polars.col('Stock Splits')>0.0) #Polars::DataFrame.new(stspl)
|
247
|
+
end
|
248
|
+
return Polars::Series.new
|
249
|
+
end
|
250
|
+
|
251
|
+
def actions
|
252
|
+
history(period: "max") if @history.nil?
|
253
|
+
|
254
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
255
|
+
if !@history.nil? #&& @history.columns.include?("Dividends") && @history.columns.include?("Stock Splits")
|
256
|
+
# action_columns = ["Dividends", "Stock Splits"]
|
257
|
+
|
258
|
+
# action_columns.append("Capital Gains") if @history.columns.include?("Capital Gains")
|
259
|
+
|
260
|
+
# actions = @history[action_columns]
|
261
|
+
# return actions[actions != 0].dropna(how: 'all').fillna(0)
|
262
|
+
df = @history.dup.drop('Open','High','Low','Close','Adj Close', 'Volume')
|
263
|
+
return df.filter((Polars.col('Stock Splits')>0.0) | (Polars.col('Dividends')>0.0) | (Polars.col('Capital Gains')>0.0)) #Polars::DataFrame.new(stspl)
|
264
|
+
end
|
265
|
+
return Polars::Series.new
|
266
|
+
end
|
267
|
+
|
268
|
+
def currency
|
269
|
+
if @currency.nil?
|
270
|
+
|
271
|
+
md = history_metadata #(proxy=self.proxy)
|
272
|
+
@currency = md["currency"]
|
273
|
+
end
|
274
|
+
return @currency
|
275
|
+
end
|
276
|
+
|
277
|
+
def quote_type
|
278
|
+
if @quote_type.nil?
|
279
|
+
|
280
|
+
md = history_metadata #(proxy=self.proxy)
|
281
|
+
@quote_type = md["instrumentType"]
|
282
|
+
end
|
283
|
+
return @quote_type
|
284
|
+
end
|
285
|
+
|
286
|
+
def last_price
|
287
|
+
return @last_price unless @last_price.nil?
|
288
|
+
|
289
|
+
prices = _get_1y_prices
|
290
|
+
|
291
|
+
if prices.empty?
|
292
|
+
@md ||= _get_exchange_metadata
|
293
|
+
@last_price = md["regularMarketPrice"] if "regularMarketPrice".in?(@md)
|
294
|
+
|
295
|
+
else
|
296
|
+
@last_price = (prices["Close"][-1]).to_f
|
297
|
+
if @last_price.nan?
|
298
|
+
@md ||= _get_exchange_metadata
|
299
|
+
@last_price = md["regularMarketPrice"] if "regularMarketPrice".in?(@md)
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
return @last_price
|
304
|
+
end
|
305
|
+
|
306
|
+
def previous_close
|
307
|
+
return @prev_close unless @prev_close.nil?
|
308
|
+
|
309
|
+
prices = _get_1wk_1h_prepost_prices
|
310
|
+
|
311
|
+
fail = prices.empty?
|
312
|
+
prices = fail ? prices : prices[["Close"]].groupby('Timestamps', maintain_order: true).agg([Polars.col("Close")]).to_f
|
313
|
+
|
314
|
+
# Very few symbols have previousClose despite no
|
315
|
+
# no trading data e.g. 'QCSTIX'.
|
316
|
+
fail = prices.shape.first < 2
|
317
|
+
@prev_close = fail ? nil : (prices["Close"][-2]).to_f
|
318
|
+
|
319
|
+
# if fail
|
320
|
+
# # Fallback to original info[] if available.
|
321
|
+
# info # trigger fetch
|
322
|
+
# k = "previousClose"
|
323
|
+
# @prev_close = _quote._retired_info[k] if !_quote._retired_info.nil? && k.in?(_quote._retired_info)
|
324
|
+
# end
|
325
|
+
return @prev_close
|
326
|
+
end
|
327
|
+
|
328
|
+
def regular_market_previous_close
|
329
|
+
return @reg_prev_close unless @reg_prev_close.nil?
|
330
|
+
|
331
|
+
prices = _get_1y_prices
|
332
|
+
if prices.shape[0] == 1
|
333
|
+
# Tiny % of tickers don't return daily history before last trading day,
|
334
|
+
# so backup option is hourly history:
|
335
|
+
prices = _get_1wk_1h_reg_prices
|
336
|
+
prices = prices[["Close"]].groupby(prices.index.date).last
|
337
|
+
end
|
338
|
+
|
339
|
+
# if prices.shape[0] < 2
|
340
|
+
# # Very few symbols have regularMarketPreviousClose despite no
|
341
|
+
# # no trading data. E.g. 'QCSTIX'.
|
342
|
+
# # So fallback to original info[] if available.
|
343
|
+
# info # trigger fetch
|
344
|
+
# k = "regularMarketPreviousClose"
|
345
|
+
# @reg_prev_close = _quote._retired_info[k] if !_quote._retired_info.nil? && k.in?(_quote._retired_info)
|
346
|
+
|
347
|
+
# else
|
348
|
+
# @reg_prev_close = float(prices["Close"].iloc[-2])
|
349
|
+
# end
|
350
|
+
|
351
|
+
return @reg_prev_close
|
352
|
+
end
|
353
|
+
|
354
|
+
def open
|
355
|
+
return @open unless @open.nil?
|
356
|
+
|
357
|
+
prices = _get_1y_prices
|
358
|
+
if prices.empty
|
359
|
+
@open = nil
|
360
|
+
|
361
|
+
else
|
362
|
+
@open = (prices["Open"][-1])
|
363
|
+
@open = nil if @open.nan?
|
364
|
+
end
|
365
|
+
|
366
|
+
return @open
|
367
|
+
end
|
368
|
+
|
369
|
+
def day_high
|
370
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @day_high = #{@day_high}" }
|
371
|
+
return @day_high unless @day_high.nil?
|
372
|
+
|
373
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @day_high = #{@day_high}" }
|
374
|
+
prices = _get_1y_prices
|
375
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} prices = #{prices.inspect}" }
|
376
|
+
# if prices.empty?
|
377
|
+
# @day_high = nil
|
378
|
+
|
379
|
+
# else
|
380
|
+
@day_high = (prices["High"][-1])
|
381
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @day_high = #{@day_high}" }
|
382
|
+
@day_high = nil if @day_high.nan?
|
383
|
+
# end
|
384
|
+
|
385
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @day_high = #{@day_high}" }
|
386
|
+
return @day_high
|
387
|
+
end
|
388
|
+
|
389
|
+
def day_low
|
390
|
+
return @day_low unless @day_low.nil?
|
391
|
+
|
392
|
+
prices = _get_1y_prices
|
393
|
+
if prices.empty?
|
394
|
+
@day_low = nil
|
395
|
+
|
396
|
+
else
|
397
|
+
@day_low = (prices["Low"][-1])
|
398
|
+
@day_low = nil if @day_low.nan?
|
399
|
+
end
|
400
|
+
|
401
|
+
return @day_low
|
402
|
+
end
|
403
|
+
|
404
|
+
def last_volume
|
405
|
+
return @last_volume unless @last_volume.nil?
|
406
|
+
|
407
|
+
prices = _get_1y_prices
|
408
|
+
@last_volume = prices.empty? ? nil : (prices["Volume"][-1])
|
409
|
+
return @last_volume
|
410
|
+
end
|
411
|
+
|
412
|
+
def fifty_day_average
|
413
|
+
return @_50d_day_average unless @_50d_day_average.nil?
|
414
|
+
|
415
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
416
|
+
if prices.empty?
|
417
|
+
@_50d_day_average = nil
|
418
|
+
|
419
|
+
else
|
420
|
+
n = prices.shape.first
|
421
|
+
a = n-50
|
422
|
+
b = n
|
423
|
+
a = 0 if a < 0
|
424
|
+
|
425
|
+
@_50d_day_average = (prices["Close"][a..b].mean)
|
426
|
+
end
|
427
|
+
|
428
|
+
return @_50d_day_average
|
429
|
+
end
|
430
|
+
|
431
|
+
def two_hundred_day_average
|
432
|
+
return @_200d_day_average unless @_200d_day_average.nil?
|
433
|
+
|
434
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
435
|
+
if prices.empty?
|
436
|
+
@_200d_day_average = nil
|
437
|
+
|
438
|
+
else
|
439
|
+
n = prices.shape[0]
|
440
|
+
a = n-200
|
441
|
+
b = n
|
442
|
+
a = 0 if a < 0
|
443
|
+
|
444
|
+
@_200d_day_average = (prices["Close"][a..b].mean)
|
445
|
+
end
|
446
|
+
|
447
|
+
return @_200d_day_average
|
448
|
+
end
|
449
|
+
|
450
|
+
def ten_day_average_volume
|
451
|
+
return @_10d_avg_vol unless @_10d_avg_vol.nil?
|
452
|
+
|
453
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
454
|
+
if prices.empty?
|
455
|
+
@_10d_avg_vol = nil
|
456
|
+
|
457
|
+
else
|
458
|
+
n = prices.shape[0]
|
459
|
+
a = n-10
|
460
|
+
b = n
|
461
|
+
a = 0 if a < 0
|
462
|
+
|
463
|
+
@_10d_avg_vol = (prices["Volume"][a..b].mean)
|
464
|
+
|
465
|
+
end
|
466
|
+
return @_10d_avg_vol
|
467
|
+
end
|
468
|
+
|
469
|
+
def three_month_average_volume
|
470
|
+
return @_3mo_avg_vol unless @_3mo_avg_vol.nil?
|
471
|
+
|
472
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
473
|
+
if prices.empty
|
474
|
+
@_3mo_avg_vol = nil
|
475
|
+
|
476
|
+
else
|
477
|
+
dt1 = prices.index[-1]
|
478
|
+
dt0 = dt1 - 3.months + 1.day
|
479
|
+
@_3mo_avg_vol = (prices[dt0..dt1]["Volume"].mean)
|
480
|
+
end
|
481
|
+
|
482
|
+
return @_3mo_avg_vol
|
483
|
+
end
|
484
|
+
|
485
|
+
def year_high
|
486
|
+
if @year_high.nil?
|
487
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
488
|
+
prices = _get_1y_prices(fullDaysOnly=false) if prices.empty?
|
489
|
+
|
490
|
+
@year_high = (prices["High"].max)
|
491
|
+
end
|
492
|
+
return @year_high
|
493
|
+
end
|
494
|
+
|
495
|
+
def year_low
|
496
|
+
if @year_low.nil?
|
497
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
498
|
+
prices = _get_1y_prices(fullDaysOnly=false) if prices.empty?
|
499
|
+
|
500
|
+
@year_low = (prices["Low"].min)
|
501
|
+
end
|
502
|
+
return @year_low
|
503
|
+
end
|
504
|
+
|
505
|
+
def year_change
|
506
|
+
if @year_change.nil?
|
507
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
508
|
+
@year_change = (prices["Close"][-1] - prices["Close"][0]) / prices["Close"][0] if prices.shape[0] >= 2
|
509
|
+
end
|
510
|
+
return @year_change
|
511
|
+
end
|
512
|
+
|
513
|
+
def market_cap
|
514
|
+
return @mcap unless @mcap.nil?
|
515
|
+
|
516
|
+
begin
|
517
|
+
# shares = self.shares
|
518
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} shares = #{shares}" }
|
519
|
+
sh = shares
|
520
|
+
lp = last_price
|
521
|
+
@mcap = shares * last_price
|
522
|
+
# @mcap = 'US$' + number_to_human((shares * last_price), precision: 4)
|
523
|
+
rescue Exception => e
|
524
|
+
if "Cannot retrieve share count".in?(e.message) || "failed to decrypt Yahoo".in?(e.message)
|
525
|
+
shares = nil
|
526
|
+
else
|
527
|
+
raise
|
528
|
+
end
|
529
|
+
|
530
|
+
# if shares.nil?
|
531
|
+
# # Very few symbols have marketCap despite no share count.
|
532
|
+
# # E.g. 'BTC-USD'
|
533
|
+
# # So fallback to original info[] if available.
|
534
|
+
# info
|
535
|
+
# k = "marketCap"
|
536
|
+
# @mcap = _quote._retired_info[k] if !_quote._retired_info.nil? && k.in?(_quote._retired_info)
|
537
|
+
|
538
|
+
# else
|
539
|
+
# @mcap = float(shares * self.last_price)
|
540
|
+
# end
|
541
|
+
|
542
|
+
return nil #@mcap
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
# price_history_methods = [:get_history_metadata, :get_dividends, :get_capital_gains, \
|
547
|
+
# :get_splits, :get_actions]
|
548
|
+
# price_history_methods.each { |meth| alias_method meth.to_s.gsub(/^get_/, '').to_sym, meth }
|
549
|
+
|
550
|
+
|
551
|
+
|
552
|
+
|
553
|
+
|
554
|
+
|
555
|
+
|
556
|
+
private
|
557
|
+
|
558
|
+
def _preprocess_params(start, fin, interval, period, prepost, raise_errors)
|
559
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} start = #{start.inspect}, end_date = #{fin.inspect}, interval = #{interval}, period = #{period}, tz = #{tz}, prepost = #{prepost}, raise_errors = #{raise_errors}" }
|
560
|
+
|
561
|
+
if start || period.nil? || period.downcase == "max"
|
562
|
+
if tz.nil?
|
563
|
+
err_msg = "No timezone found, symbol may be delisted"
|
564
|
+
# Yfin.shared_DFS[@ticker] = Utils.empty_df
|
565
|
+
# Yfin.shared_ERRORS[@ticker] = err_msg
|
566
|
+
if raise_errors
|
567
|
+
raise Exception.new("#{@ticker}: #{err_msg}")
|
568
|
+
else
|
569
|
+
Rails.logger.error("#{@ticker}: #{err_msg}")
|
570
|
+
end
|
571
|
+
return Utils.empty_df
|
572
|
+
end
|
573
|
+
|
574
|
+
fin = fin.nil? ? Time.now.to_i : Utils.parse_user_dt(fin, tz)
|
575
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} fin = #{fin.inspect}" }
|
576
|
+
|
577
|
+
if start.nil?
|
578
|
+
if interval == "1m"
|
579
|
+
start = (fin - 1.week).to_i
|
580
|
+
else
|
581
|
+
max_start_datetime = (DateTime.now - (99.years)).to_i
|
582
|
+
start = max_start_datetime.to_i
|
583
|
+
end
|
584
|
+
else
|
585
|
+
start = Utils.parse_user_dt(start, tz)
|
586
|
+
end
|
587
|
+
|
588
|
+
params = { "period1" => start, "period2" => fin }
|
589
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} params = #{params.inspect}" }
|
590
|
+
|
591
|
+
else
|
592
|
+
period = period.downcase
|
593
|
+
# params = { "range" => period }
|
594
|
+
fin = DateTime.now.to_i
|
595
|
+
start = (fin - Utils.interval_to_timedelta(period)).to_i
|
596
|
+
params = { "period1" => start, "period2" => fin }
|
597
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} params = #{params.inspect}" }
|
598
|
+
end
|
599
|
+
|
600
|
+
params["interval"] = interval.downcase
|
601
|
+
params["includePrePost"] = prepost
|
602
|
+
params["interval"] = "15m" if params["interval"] == "30m"
|
603
|
+
params["events"] = "div,splits,capitalGains"
|
604
|
+
|
605
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} params = #{params.inspect}" }
|
606
|
+
return params
|
607
|
+
end
|
608
|
+
|
609
|
+
def _get_data(ticker, params, fin, raise_errors)
|
610
|
+
url = "https://query2.finance.yahoo.com/v8/finance/chart/#{ticker}"
|
611
|
+
# url = "https://query1.finance.yahoo.com/v7/finance/download/#{ticker}" ... Deprecated
|
612
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} url = #{url}" }
|
613
|
+
data = nil
|
614
|
+
# get_fn = @data.method(:get)
|
615
|
+
|
616
|
+
if fin
|
617
|
+
end_dt = DateTime.strptime(fin.to_s, '%s') #.new_offset(0)
|
618
|
+
dt_now = DateTime.now #.new_offset(0)
|
619
|
+
data_delay = Rational(30, 24 * 60)
|
620
|
+
|
621
|
+
# get_fn = @data.method(:cache_get) if end_dt + data_delay <= dt_now
|
622
|
+
end
|
623
|
+
|
624
|
+
begin
|
625
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} url = #{url}, params = #{params.inspect}" }
|
626
|
+
data = get(url, nil, params).parsed_response
|
627
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
628
|
+
|
629
|
+
raise RuntimeError.new(
|
630
|
+
"*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" +
|
631
|
+
"Our engineers are working quickly to resolve the issue. Thank you for your patience."
|
632
|
+
) if data.text.include?("Will be right back") || data.nil?
|
633
|
+
|
634
|
+
data = HashWithIndifferentAccess.new(data)
|
635
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
636
|
+
rescue Exception
|
637
|
+
raise if raise_errors
|
638
|
+
end
|
639
|
+
|
640
|
+
data
|
641
|
+
end
|
642
|
+
|
643
|
+
def _get_err_msg(start, period, start_user, fin, end_user, interval, intraday)
|
644
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} start = #{start}, period = #{period}, start_user = #{start_user}, fin = #{fin}, end_user = #{end_user}, interval = #{interval}, intraday = #{intraday}"}
|
645
|
+
err_msg = "No price data found, symbol may be delisted"
|
646
|
+
|
647
|
+
if start.nil? || period.nil? || period.downcase == "max"
|
648
|
+
err_msg += " (#{interval} "
|
649
|
+
|
650
|
+
if start_user
|
651
|
+
err_msg += "#{start_user}"
|
652
|
+
elsif !intraday
|
653
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} start = #{start}" }
|
654
|
+
err_msg += "#{(Time.at(start).to_date).strftime('%Y-%m-%d')}"
|
655
|
+
else
|
656
|
+
err_msg += "#{Time.at(start).strftime('%Y-%m-%d %H:%M:%S %z')}"
|
657
|
+
end
|
658
|
+
|
659
|
+
err_msg += " -> "
|
660
|
+
|
661
|
+
if end_user
|
662
|
+
err_msg += "#{end_user})"
|
663
|
+
elsif !intraday
|
664
|
+
err_msg += "#{(Time.at(fin).to_date).strftime('%Y-%m-%d')})"
|
665
|
+
else
|
666
|
+
err_msg += "#{Time.at(fin).strftime('%Y-%m-%d %H:%M:%S %z')})"
|
667
|
+
end
|
668
|
+
else
|
669
|
+
err_msg += " (period=#{period})"
|
670
|
+
end
|
671
|
+
err_msg
|
672
|
+
end
|
673
|
+
|
674
|
+
def _did_it_fail(data, period, hist_metadata)
|
675
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}, period = #{period}, hist_metadata = #{hist_metadata.inspect}" }
|
676
|
+
failed = false
|
677
|
+
|
678
|
+
if data.nil? || !data.is_a?(Hash)
|
679
|
+
failed = true
|
680
|
+
elsif data.is_a?(Hash) && data.key?("status_code")
|
681
|
+
err_msg += "(yahoo status_code = #{data['status_code']})"
|
682
|
+
failed = true
|
683
|
+
elsif data["chart"].nil? || data["chart"]["error"]
|
684
|
+
err_msg = data["chart"]["error"]["description"]
|
685
|
+
failed = true
|
686
|
+
elsif data["chart"].nil? || data["chart"]["result"].nil? || !data["chart"]["result"]
|
687
|
+
failed = true
|
688
|
+
elsif period && !data["chart"]["result"][0].key?("timestamp") && !hist_metadata["validRanges"].include?(period)
|
689
|
+
err_msg = "Period '#{period}' is invalid, must be one of #{hist_metadata['validRanges']}"
|
690
|
+
failed = true
|
691
|
+
end
|
692
|
+
|
693
|
+
{fail: failed, msg: err_msg}
|
694
|
+
end
|
695
|
+
|
696
|
+
def _get_stock_data(quotes, params, fin = nil)
|
697
|
+
df = quotes #.sort_index
|
698
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
699
|
+
ts = Polars::Series.new(df['Timestamps']).to_a
|
700
|
+
|
701
|
+
if quotes.shape.first > 0
|
702
|
+
# startDt = quotes.index[0].floor('D')
|
703
|
+
startDt = quotes['Timestamps'].to_a.map(&:to_date).min
|
704
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} startDt = #{startDt.inspect}" }
|
705
|
+
endDt = fin.present? ? fin : Time.at(DateTime.now.tomorrow).to_i
|
706
|
+
|
707
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history[events][dividends] = #{@history['events']["dividends"].inspect}" }
|
708
|
+
# divi = {}
|
709
|
+
# @history['events']["dividends"].select{|k,v|
|
710
|
+
# Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
711
|
+
# divi['date'] = v['amount']} unless @history.try(:[],'events').try(:[],"dividends").nil?
|
712
|
+
d = [0.0] * df.length
|
713
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df.length = #{df.length}" }
|
714
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} ts = #{ts.inspect}" }
|
715
|
+
@history['events']["dividends"].select{|k,v|
|
716
|
+
Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
717
|
+
d[ts.index(Time.at(k.to_i).utc)] = v['amount'].to_f} unless @history.try(:[],'events').try(:[],"dividends").nil?
|
718
|
+
df['Dividends'] = Polars::Series.new(d)
|
719
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
720
|
+
|
721
|
+
# caga = {}
|
722
|
+
# @history['events']["capital gains"].select{|k,v|
|
723
|
+
# Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
724
|
+
# caga['date'] = v['amount']} unless @history.try(:[],'events').try(:[],"capital gains").nil?
|
725
|
+
# capital_gains = capital_gains.loc[startDt:] if capital_gains.shape.first > 0
|
726
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} caga = #{caga.inspect}" }
|
727
|
+
d = [0.0] * df.length
|
728
|
+
@history['events']["capital gains"].select{|k,v|
|
729
|
+
Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
730
|
+
d[ts.index(Time.at(k.to_i).utc)] = v['amount'].to_f} unless @history.try(:[],'events').try(:[],"capital gains").nil?
|
731
|
+
df['Capital Gains'] = Polars::Series.new(d)
|
732
|
+
|
733
|
+
# splits = splits.loc[startDt:] if splits.shape[0] > 0
|
734
|
+
# stspl = {}
|
735
|
+
# @history['events']['stock splits'].select{|k,v|
|
736
|
+
# Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
737
|
+
# stspl['date'] = v['numerator'].to_f/v['denominator'].to_f} unless @history.try(:[],'events').try(:[],"stock splits").nil?
|
738
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} stspl = #{stspl.inspect}" }
|
739
|
+
d = [0.0] * df.length
|
740
|
+
@history['events']["capital gains"].select{|k,v|
|
741
|
+
Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
742
|
+
d[ts.index(Time.at(k.to_i).utc)] = v['numerator'].to_f/v['denominator'].to_f} unless @history.try(:[],'events').try(:[],"capital gains").nil?
|
743
|
+
df['Stock Splits'] = Polars::Series.new(d)
|
744
|
+
end
|
745
|
+
|
746
|
+
# intraday = params["interval"][-1] == "m" || params["interval"][-1] == "h"
|
747
|
+
|
748
|
+
# if !intraday
|
749
|
+
# quotes.index = quotes.index.map { |i| DateTime.strptime(i.to_s, '%s').new_offset(tz).to_time }
|
750
|
+
|
751
|
+
# dividends.index = \
|
752
|
+
# dividends.index.map { |i| DateTime.strptime(i.to_s, '%s').new_offset(tz).to_time } if dividends.shape[0] > 0
|
753
|
+
|
754
|
+
# splits.index = \
|
755
|
+
# splits.index.map { |i| DateTime.strptime(i.to_s, '%s').new_offset(tz).to_time } if splits.shape[0] > 0
|
756
|
+
|
757
|
+
# end
|
758
|
+
|
759
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} quotes = #{quotes.inspect}" }
|
760
|
+
# df = quotes
|
761
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
762
|
+
|
763
|
+
# df = _safe_merge_dfs(df, dividends, interval) if dividends.shape[0] > 0
|
764
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
765
|
+
|
766
|
+
|
767
|
+
# if df.columns.include?("Dividends")
|
768
|
+
# df.loc[df["Dividends"].isna?, "Dividends"] = 0
|
769
|
+
# else
|
770
|
+
# df["Dividends"] = 0.0
|
771
|
+
# end
|
772
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
773
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.filter(Polars.col("Dividends") > 0.0)}" }
|
774
|
+
|
775
|
+
# df = _safe_merge_dfs(df, splits, interval) if splits.shape[0] > 0
|
776
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
777
|
+
|
778
|
+
|
779
|
+
# if df.columns.include?("Stock Splits")
|
780
|
+
# df.loc[df["Stock Splits"].isna?, "Stock Splits"] = 0
|
781
|
+
# else
|
782
|
+
# df["Stock Splits"] = 0.0
|
783
|
+
# end
|
784
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
785
|
+
|
786
|
+
# if expect_capital_gains
|
787
|
+
|
788
|
+
# df = _safe_merge_dfs(df, capital_gains, interval) if capital_gains.shape[0] > 0
|
789
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
790
|
+
|
791
|
+
# if df.columns.include?("Capital Gains")
|
792
|
+
# df.loc[df["Capital Gains"].isna?, "Capital Gains"] = 0
|
793
|
+
# else
|
794
|
+
# df["Capital Gains"] = 0.0
|
795
|
+
# end
|
796
|
+
# end
|
797
|
+
|
798
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
799
|
+
# df = df[~df.index.duplicated(keep: 'first')]
|
800
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
801
|
+
df
|
802
|
+
end
|
803
|
+
|
804
|
+
def _auto_adjust(data)
|
805
|
+
col_order = data.columns
|
806
|
+
df = data.dup
|
807
|
+
ratio = (df["Adj Close"] / df["Close"]).to_a
|
808
|
+
df["Adj Open"] = df["Open"] * ratio
|
809
|
+
df["Adj High"] = df["High"] * ratio
|
810
|
+
df["Adj Low"] = df["Low"] * ratio
|
811
|
+
|
812
|
+
df.drop(
|
813
|
+
["Open", "High", "Low", "Close"],
|
814
|
+
axis: 1, inplace: true)
|
815
|
+
|
816
|
+
df.rename(columns: {
|
817
|
+
"Adj Open": "Open", "Adj High": "High",
|
818
|
+
"Adj Low": "Low", "Adj Close": "Close"
|
819
|
+
}, inplace: true)
|
820
|
+
|
821
|
+
return df
|
822
|
+
end
|
823
|
+
|
824
|
+
def _back_adjust(data)
|
825
|
+
col_order = data.columns
|
826
|
+
df = data.dup
|
827
|
+
ratio = df["Adj Close"] / df["Close"]
|
828
|
+
df["Adj Open"] = df["Open"] * ratio
|
829
|
+
df["Adj High"] = df["High"] * ratio
|
830
|
+
df["Adj Low"] = df["Low"] * ratio
|
831
|
+
|
832
|
+
df.drop(
|
833
|
+
["Open", "High", "Low", "Adj Close"],
|
834
|
+
axis: 1, inplace: true)
|
835
|
+
|
836
|
+
df.rename(columns: {
|
837
|
+
"Adj Open": "Open", "Adj High": "High",
|
838
|
+
"Adj Low": "Low"
|
839
|
+
}, inplace: true)
|
840
|
+
|
841
|
+
return df
|
842
|
+
end
|
843
|
+
|
844
|
+
def _set_df_tz(df, interval, tz)
|
845
|
+
|
846
|
+
# df.index = df.index.tz_localize("UTC") if df.index.tz.nil?
|
847
|
+
|
848
|
+
# df.index = df.index.tz_convert(tz)
|
849
|
+
return df
|
850
|
+
end
|
851
|
+
|
852
|
+
def _fix_yahoo_dst_issue(df, interval)
|
853
|
+
# if interval.in?(["1d", "1w", "1wk"])
|
854
|
+
# f_pre_midnight = (df.index.minute == 0) & (df.index.hour.in?([22, 23]))
|
855
|
+
# dst_error_hours = [0] * df.shape[0]
|
856
|
+
# dst_error_hours[f_pre_midnight] = 24 - df.index[f_pre_midnight].hour
|
857
|
+
# df.index += dst_error_hours.map { |h| ActiveSupport::Duration.new(hours: h) }
|
858
|
+
# end
|
859
|
+
return df
|
860
|
+
end
|
861
|
+
|
862
|
+
def _fix_yahoo_returning_live_separate(quotes, interval, tz_exchange)
|
863
|
+
n = quotes.shape[0]
|
864
|
+
# if n > 1
|
865
|
+
# dt1 = quotes['Timestamps'][n - 1]
|
866
|
+
# dt2 = quotes['Timestamps'][n - 2]
|
867
|
+
# if quotes['Timestamps'].tz.nil?
|
868
|
+
# dt1 = dt1.tz_localize("UTC")
|
869
|
+
# dt2 = dt2.tz_localize("UTC")
|
870
|
+
# end
|
871
|
+
# dt1 = dt1.tz_convert(tz_exchange)
|
872
|
+
# dt2 = dt2.tz_convert(tz_exchange)
|
873
|
+
|
874
|
+
# if interval == "1d"
|
875
|
+
# quotes = quotes.drop(quotes.index[n - 2]) if dt1.to_date == dt2.to_date
|
876
|
+
|
877
|
+
# else
|
878
|
+
# if interval == "1wk"
|
879
|
+
# last_rows_same_interval = dt1.year == dt2.year && dt1.cweek == dt2.cweek
|
880
|
+
# elsif interval == "1mo"
|
881
|
+
# last_rows_same_interval = dt1.month == dt2.month
|
882
|
+
# elsif interval == "3mo"
|
883
|
+
# last_rows_same_interval = dt1.year == dt2.year && dt1.quarter == dt2.quarter
|
884
|
+
# else
|
885
|
+
# last_rows_same_interval = (dt1 - dt2) < ActiveSupport::Duration.parse(interval)
|
886
|
+
# end
|
887
|
+
|
888
|
+
# if last_rows_same_interval
|
889
|
+
# idx1 = quotes.index[n - 1]
|
890
|
+
# idx2 = quotes.index[n - 2]
|
891
|
+
|
892
|
+
# return quotes if idx1 == idx2
|
893
|
+
|
894
|
+
# quotes.loc[idx2, "Open"] = quotes["Open"].iloc[n - 1] if quotes.loc[idx2, "Open"].nan?
|
895
|
+
|
896
|
+
# if !quotes["High"].iloc[n - 1].nan?
|
897
|
+
# quotes.loc[idx2, "High"] = [quotes["High"].iloc[n - 1], quotes["High"].iloc[n - 2]].max
|
898
|
+
# if quotes.columns.include?("Adj High")
|
899
|
+
# quotes.loc[idx2, "Adj High"] = [quotes["Adj High"].iloc[n - 1], quotes["Adj High"].iloc[n - 2]].max
|
900
|
+
# end
|
901
|
+
# end
|
902
|
+
# if !quotes["Low"].iloc[n - 1].nan?
|
903
|
+
# quotes.loc[idx2, "Low"] = [quotes["Low"].iloc[n - 1], quotes["Low"].iloc[n - 2]].min
|
904
|
+
# if quotes.columns.include?("Adj Low")
|
905
|
+
# quotes.loc[idx2, "Adj Low"] = [quotes["Adj Low"].iloc[n - 1], quotes["Adj Low"].iloc[n - 2]].min
|
906
|
+
# end
|
907
|
+
# end
|
908
|
+
# quotes.loc[idx2, "Close"] = quotes["Close"].iloc[n - 1]
|
909
|
+
# if quotes.columns.include?("Adj Close")
|
910
|
+
# quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"].iloc[n - 1]
|
911
|
+
# end
|
912
|
+
# quotes.loc[idx2, "Volume"] += quotes["Volume"].iloc[n - 1]
|
913
|
+
# quotes = quotes.drop(quotes.index[n - 1])
|
914
|
+
# end
|
915
|
+
# end
|
916
|
+
# end
|
917
|
+
return quotes
|
918
|
+
end
|
919
|
+
|
920
|
+
def _fix_yahoo_returning_prepost_unrequested(quotes, interval, tradingPeriods)
|
921
|
+
tps_df = tradingPeriods.dup
|
922
|
+
tps_df["_date"] = tps_df.index.map(&:to_date)
|
923
|
+
quotes["_date"] = quotes.index.map(&:to_date)
|
924
|
+
idx = quotes.index.dup
|
925
|
+
quotes = quotes.merge(tps_df, how: "left")
|
926
|
+
quotes.index = idx
|
927
|
+
f_drop = quotes.index >= quotes["end"]
|
928
|
+
f_drop = f_drop | (quotes.index < quotes["start"])
|
929
|
+
if f_drop.any?
|
930
|
+
quotes = quotes[~f_drop]
|
931
|
+
end
|
932
|
+
quotes = quotes.drop(["_date", "start", "end"], axis: 1)
|
933
|
+
return quotes
|
934
|
+
end
|
935
|
+
|
936
|
+
def _format_history_metadata(md, tradingPeriodsOnly = true)
|
937
|
+
return md unless md.is_a?(Hash)
|
938
|
+
return md if md.length.zero?
|
939
|
+
|
940
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} md = #{md.inspect}"}
|
941
|
+
tz = md["exchangeTimezoneName"]
|
942
|
+
|
943
|
+
if !tradingPeriodsOnly
|
944
|
+
["firstTradeDate", "regularMarketTime"].each do |k|
|
945
|
+
if md.key?(k) && !md[k].nil?
|
946
|
+
if md[k].is_a?(Integer)
|
947
|
+
md[k] = Time.at(md[k]).in_time_zone(tz)
|
948
|
+
end
|
949
|
+
end
|
950
|
+
end
|
951
|
+
|
952
|
+
if md.key?("currentTradingPeriod")
|
953
|
+
["regular", "pre", "post"].each do |m|
|
954
|
+
if md["currentTradingPeriod"].key?(m) && md["currentTradingPeriod"][m]["start"].is_a?(Integer)
|
955
|
+
["start", "end"].each do |t|
|
956
|
+
md["currentTradingPeriod"][m][t] = Time.at(md["currentTradingPeriod"][m][t]).utc.in_time_zone(tz)
|
957
|
+
end
|
958
|
+
md["currentTradingPeriod"][m].delete("gmtoffset")
|
959
|
+
md["currentTradingPeriod"][m].delete("timezone")
|
960
|
+
end
|
961
|
+
end
|
962
|
+
end
|
963
|
+
end
|
964
|
+
|
965
|
+
if md.key?("tradingPeriods")
|
966
|
+
tps = md["tradingPeriods"]
|
967
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} tps = #{tps.inspect}"}
|
968
|
+
if tps == {"pre" => [], "post" => []}
|
969
|
+
# Ignore
|
970
|
+
elsif tps.is_a?(Array) || tps.is_a?(Hash)
|
971
|
+
if tps.is_a?(Array)
|
972
|
+
df = Polars::DataFrame.from_records(_np.hstack(tps))
|
973
|
+
df = df.drop(["timezone", "gmtoffset"], axis: 1)
|
974
|
+
df["start"] = Time.at(df["start"]).in_time_zone(tz)
|
975
|
+
df["end"] = Time.at(df['end']).in_time_zone(tz)
|
976
|
+
else #if tps.is_a?(Hash)
|
977
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} tps[pre] = #{tps['pre'].flatten.inspect}"}
|
978
|
+
pre_df = {} ; tps['pre'].flatten.each{|yy| yy.keys.each{|yyk| pre_df[yyk] ||= []; pre_df[yyk] << yy[yyk] }}; pre_df = Polars::DataFrame.new(pre_df) # Polars::DataFrame.from_records(_np.hstack(tps["pre"]))
|
979
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} pre_df = #{pre_df.inspect}"}
|
980
|
+
post_df = {}; tps['post'].flatten.each{|yy| yy.keys.each{|yyk| post_df[yyk] ||= []; post_df[yyk] << yy[yyk] }}; post_df = Polars::DataFrame.new(post_df) # Polars::DataFrame.from_records(_np.hstack(tps["post"]))
|
981
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} post_df = #{post_df.inspect}"}
|
982
|
+
regular_df = {}; tps['regular'].flatten.each{|yy| yy.keys.each{|yyk| regular_df[yyk] ||= []; regular_df[yyk] << yy[yyk] }}; regular_df = Polars::DataFrame.new(regular_df) # Polars::DataFrame.from_records(_np.hstack(tps["regular"]))
|
983
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} regular_df = #{regular_df.inspect}"}
|
984
|
+
|
985
|
+
pre_df = pre_df.rename({"start" => "pre_start", "end" => "pre_end"}).drop(["timezone", "gmtoffset"]) #, axis: 1)
|
986
|
+
post_df = post_df.rename({"start" => "post_start", "end" => "post_end"}).drop(["timezone", "gmtoffset"]) #, axis: 1)
|
987
|
+
regular_df = regular_df.drop(["timezone", "gmtoffset"]) #, axis: 1)
|
988
|
+
|
989
|
+
cols = ["pre_start", "pre_end", "end", "post_end"]
|
990
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} pre_df = #{pre_df.inspect}"}
|
991
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} post_df = #{post_df.inspect}"}
|
992
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} regular_df = #{regular_df.inspect}"}
|
993
|
+
df = pre_df.join(regular_df, left_on: 'pre_end', right_on: 'start')
|
994
|
+
df = df.join(post_df, left_on: 'end', right_on: 'post_start')
|
995
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}"}
|
996
|
+
cols.each do |c|
|
997
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} c = #{c}"}
|
998
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df[c].map{|t| Time.at(t).in_time_zone(tz) } = #{df[c].map{|t| Time.at(t).in_time_zone(tz) }.inspect}" }
|
999
|
+
s = Polars::Series.new(df[c].map{|t| Time.at(t).in_time_zone(tz) }, dtype: :i64)
|
1000
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} s = #{s.inspect}" }
|
1001
|
+
df.replace(c, s)
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
df = Polars::DataFrame.new({'pre_start' => df['pre_start'], 'pre_end' => df['pre_end'], 'start' => df['pre_end'], 'end' => df['end'], 'post_start' => df['end'], 'post_end' => df['post_end']})
|
1005
|
+
# df = df[cols]
|
1006
|
+
end
|
1007
|
+
|
1008
|
+
# df.index = _pd.to_datetime(df["start"].dt.date)
|
1009
|
+
# df.index = df.index.tz_localize(tz)
|
1010
|
+
# df.index.name = "Date"
|
1011
|
+
|
1012
|
+
md["tradingPeriods"] = df
|
1013
|
+
end
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
return md
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
def _safe_merge_dfs(df_main, df_sub, interval)
|
1020
|
+
if df_sub.empty?
|
1021
|
+
raise Exception.new("No data to merge")
|
1022
|
+
end
|
1023
|
+
if df_main.empty?
|
1024
|
+
return df_main
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
df = df_main
|
1028
|
+
return df
|
1029
|
+
end
|
1030
|
+
|
1031
|
+
|
1032
|
+
def _parse_quotes(data, interval)
|
1033
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
1034
|
+
timestamps = data["timestamp"]
|
1035
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} timestamps = #{timestamps.inspect}" }
|
1036
|
+
ohlc = data["indicators"]["quote"][0]
|
1037
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} ohlc = #{ohlc.inspect}" }
|
1038
|
+
volumes = ohlc["volume"]
|
1039
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} volumes = #{volumes.inspect}" }
|
1040
|
+
opens = ohlc["open"]
|
1041
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} opens = #{opens.inspect}" }
|
1042
|
+
closes = ohlc["close"]
|
1043
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} closes = #{closes.inspect}" }
|
1044
|
+
lows = ohlc["low"]
|
1045
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} lows = #{lows.inspect}" }
|
1046
|
+
highs = ohlc["high"]
|
1047
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} highs = #{highs.inspect}" }
|
1048
|
+
|
1049
|
+
adjclose = closes
|
1050
|
+
if data["indicators"].key?("adjclose")
|
1051
|
+
adjclose = data["indicators"]["adjclose"][0]["adjclose"]
|
1052
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} adjclose = #{adjclose.inspect}" }
|
1053
|
+
end
|
1054
|
+
|
1055
|
+
quotes = Polars::DataFrame.new(
|
1056
|
+
{
|
1057
|
+
'Timestamps': timestamps.map{|t| Time.at(t) },
|
1058
|
+
"Open": opens,
|
1059
|
+
"High": highs,
|
1060
|
+
"Low": lows,
|
1061
|
+
"Close": closes,
|
1062
|
+
"Adj Close": adjclose,
|
1063
|
+
"Volume": volumes
|
1064
|
+
}
|
1065
|
+
)
|
1066
|
+
|
1067
|
+
# quotes.index = _pd.to_datetime(timestamps, unit: "s")
|
1068
|
+
# quotes.sort_index!(inplace: true)
|
1069
|
+
|
1070
|
+
if interval.downcase == "30m"
|
1071
|
+
logger.debug("#{ticker}: resampling 30m OHLC from 15m")
|
1072
|
+
quotes2 = quotes.resample('30T')
|
1073
|
+
quotes = Polars::DataFrame.new(index: quotes2.last.index, data: {
|
1074
|
+
'Open' => quotes2['Open'].first,
|
1075
|
+
'High' => quotes2['High'].max,
|
1076
|
+
'Low' => quotes2['Low'].min,
|
1077
|
+
'Close' => quotes2['Close'].last,
|
1078
|
+
'Adj Close' => quotes2['Adj Close'].last,
|
1079
|
+
'Volume' => quotes2['Volume'].sum
|
1080
|
+
})
|
1081
|
+
begin
|
1082
|
+
quotes['Dividends'] = quotes2['Dividends'].max
|
1083
|
+
quotes['Stock Splits'] = quotes2['Stock Splits'].max
|
1084
|
+
rescue Exception
|
1085
|
+
end
|
1086
|
+
end
|
1087
|
+
|
1088
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} quotes = #{quotes.inspect}" }
|
1089
|
+
return quotes
|
1090
|
+
end
|
1091
|
+
|
1092
|
+
def _fix_unit_mixups(df, interval, tz_exchange, prepost)
|
1093
|
+
# TODO: Implement _fix_unit_mixups
|
1094
|
+
return df
|
1095
|
+
end
|
1096
|
+
|
1097
|
+
# def _fix_bad_stock_split(df, interval, tz_exchange)
|
1098
|
+
# # TODO: Implement _fix_bad_stock_split
|
1099
|
+
# return df
|
1100
|
+
# end
|
1101
|
+
|
1102
|
+
# def _fix_zeroes(df, interval, tz_exchange, prepost)
|
1103
|
+
# # TODO: Implement _fix_zeroes
|
1104
|
+
# return df
|
1105
|
+
# end
|
1106
|
+
|
1107
|
+
# def _fix_missing_div_adjust(df, interval, tz_exchange)
|
1108
|
+
# # TODO: Implement _fix_missing_div_adjust
|
1109
|
+
# return df
|
1110
|
+
# end
|
1111
|
+
|
1112
|
+
def _reconstruct_intervals_batch(df, interval, prepost, tag=-1)
|
1113
|
+
# # Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
|
1114
|
+
# logger = Rails.logger # Utils.get_yf_logger
|
1115
|
+
|
1116
|
+
# # raise Exception.new("'df' must be a Polars DataFrame not", type(df)) unless df.is_a?(Polars::DataFrame)
|
1117
|
+
# return df if interval == "1m"
|
1118
|
+
|
1119
|
+
# if interval[1:].in?(['d', 'wk', 'mo'])
|
1120
|
+
# # Interday data always includes pre & post
|
1121
|
+
# prepost = true
|
1122
|
+
# intraday = false
|
1123
|
+
# else
|
1124
|
+
# intraday = true
|
1125
|
+
# end
|
1126
|
+
|
1127
|
+
# price_cols = df.columns.select { |c| PRICE_COLNAMES.include?(c) }
|
1128
|
+
# data_cols = price_cols + ["Volume"]
|
1129
|
+
|
1130
|
+
# # If interval is weekly then can construct with daily. But if smaller intervals then
|
1131
|
+
# # restricted to recent times:
|
1132
|
+
# intervals = ["1wk", "1d", "1h", "30m", "15m", "5m", "2m", "1m"]
|
1133
|
+
# itds = intervals.map { |i| [i, Utils.interval_to_timedelta(interval)] }.to_h
|
1134
|
+
# nexts = intervals.each_cons(2).to_h
|
1135
|
+
# min_lookbacks = {"1wk" => nil, "1d" => nil, "1h" => 730.days }
|
1136
|
+
# ["30m", "15m", "5m", "2m"].each { |i| min_lookbacks[i] = 60.days }
|
1137
|
+
# min_lookbacks["1m"] = 30.days
|
1138
|
+
|
1139
|
+
# if interval.in?(nexts)
|
1140
|
+
# sub_interval = nexts[interval]
|
1141
|
+
# td_range = itds[interval]
|
1142
|
+
# else
|
1143
|
+
# logger.warning("Have not implemented price repair for '#{interval}' interval. Contact developers") unless df.columns.include?("Repaired?")
|
1144
|
+
# return df
|
1145
|
+
# end
|
1146
|
+
|
1147
|
+
# # Limit max reconstruction depth to 2:
|
1148
|
+
# @reconstruct_start_interval = interval if @reconstruct_start_interval.nil?
|
1149
|
+
# if interval != @reconstruct_start_interval && interval != nexts[@reconstruct_start_interval]
|
1150
|
+
# logger.debug("#{ticker}: Price repair has hit max depth of 2 ('%s'->'%s'->'%s')", @reconstruct_start_interval, nexts[@reconstruct_start_interval], interval)
|
1151
|
+
# return df
|
1152
|
+
# end
|
1153
|
+
|
1154
|
+
# df = df.sort_index
|
1155
|
+
|
1156
|
+
# f_repair = df[data_cols].to_numpy == tag
|
1157
|
+
# f_repair_rows = f_repair.any(axis=1)
|
1158
|
+
|
1159
|
+
# # Ignore old intervals for which yahoo won't return finer data:
|
1160
|
+
# m = min_lookbacks[sub_interval]
|
1161
|
+
|
1162
|
+
# if m.nil?
|
1163
|
+
# min_dt = nil
|
1164
|
+
# else
|
1165
|
+
# m -= _datetime.timedelta(days=1) # allow space for 1-day padding
|
1166
|
+
# min_dt = DateTime.now.utc - m
|
1167
|
+
# min_dt = min_dt.tz_convert(df.index.tz).ceil("D")
|
1168
|
+
# end
|
1169
|
+
|
1170
|
+
# logger.debug("min_dt=#{min_dt} interval=#{interval} sub_interval=#{sub_interval}")
|
1171
|
+
|
1172
|
+
# if min_dt.nil?
|
1173
|
+
# f_recent = df.index >= min_dt
|
1174
|
+
# f_repair_rows = f_repair_rows & f_recent
|
1175
|
+
# unless f_repair_rows.any?
|
1176
|
+
# logger.info("Data too old to repair") unless df.columns.include?("Repaired?")
|
1177
|
+
# return df
|
1178
|
+
# end
|
1179
|
+
# end
|
1180
|
+
|
1181
|
+
# dts_to_repair = df.index[f_repair_rows]
|
1182
|
+
|
1183
|
+
# if dts_to_repair.length == 0
|
1184
|
+
# logger.info("Nothing needs repairing (dts_to_repair[] empty)") unless df.columns.include?("Repaired?")
|
1185
|
+
# return df
|
1186
|
+
# end
|
1187
|
+
|
1188
|
+
# df_v2 = df.copy
|
1189
|
+
# df_v2["Repaired?"] = false unless df_v2.columns.include?("Repaired?")
|
1190
|
+
# f_good = ~(df[price_cols].isna.any(axis=1))
|
1191
|
+
# f_good = f_good && (df[price_cols].to_numpy != tag).all(axis=1)
|
1192
|
+
# df_good = df[f_good]
|
1193
|
+
|
1194
|
+
# # Group nearby NaN-intervals together to reduce number of yahoo fetches
|
1195
|
+
# dts_groups = [[dts_to_repair[0]]]
|
1196
|
+
# # Note on setting max size: have to allow space for adding good data
|
1197
|
+
# if sub_interval == "1mo"
|
1198
|
+
# grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
|
1199
|
+
# elsif sub_interval == "1wk"
|
1200
|
+
# grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
|
1201
|
+
# elsif sub_interval == "1d"
|
1202
|
+
# grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
|
1203
|
+
# elsif sub_interval == "1h"
|
1204
|
+
# grp_max_size = _dateutil.relativedelta.relativedelta(years=1)
|
1205
|
+
# elsif sub_interval == "1m"
|
1206
|
+
# grp_max_size = _datetime.timedelta(days=5) # allow 2 days for buffer below
|
1207
|
+
# else
|
1208
|
+
# grp_max_size = _datetime.timedelta(days=30)
|
1209
|
+
# end
|
1210
|
+
|
1211
|
+
# logger.debug("grp_max_size = #{grp_max_size}")
|
1212
|
+
|
1213
|
+
# (1..dts_to_repair.length).each do |i|
|
1214
|
+
# dt = dts_to_repair[i]
|
1215
|
+
# if dt.date < dts_groups[-1][0].date + grp_max_size
|
1216
|
+
# dts_groups[-1].append(dt)
|
1217
|
+
# else
|
1218
|
+
# dts_groups.append([dt])
|
1219
|
+
# end
|
1220
|
+
# end
|
1221
|
+
|
1222
|
+
# logger.debug("Repair groups:")
|
1223
|
+
# dts_groups.each { |g| logger.debug("- #{g[0]} -> #{g[-1]}") }
|
1224
|
+
|
1225
|
+
# # Add some good data to each group, so can calibrate prices later:
|
1226
|
+
# (0..dts_groups.length).each do |i|
|
1227
|
+
# g = dts_groups[i]
|
1228
|
+
# g0 = g[0]
|
1229
|
+
# i0 = df_good.index.get_indexer([g0], method="nearest")[0]
|
1230
|
+
# if i0 > 0
|
1231
|
+
# if (min_dt.nil? || df_good.index[i0 - 1] >= min_dt) && \
|
1232
|
+
# ((!intraday) || df_good.index[i0 - 1].date == g0.date)
|
1233
|
+
# i0 -= 1
|
1234
|
+
# end
|
1235
|
+
# end
|
1236
|
+
# gl = g[-1]
|
1237
|
+
# il = df_good.index.get_indexer([gl], method="nearest")[0]
|
1238
|
+
# if il < len(df_good) - 1
|
1239
|
+
# il += 1 if (!intraday) || df_good.index[il + 1].date == gl.date
|
1240
|
+
# end
|
1241
|
+
# good_dts = df_good.index[i0:il + 1]
|
1242
|
+
# dts_groups[i] += good_dts.to_list
|
1243
|
+
# dts_groups[i].sort
|
1244
|
+
# end
|
1245
|
+
|
1246
|
+
# n_fixed = 0
|
1247
|
+
# dts_groups.each do |g|
|
1248
|
+
# df_block = df[df.index.isin(g)]
|
1249
|
+
# logger.debug("df_block:\n" + str(df_block))
|
1250
|
+
|
1251
|
+
# start_dt = g[0]
|
1252
|
+
# start_d = start_dt.date
|
1253
|
+
|
1254
|
+
# reject = false
|
1255
|
+
# if sub_interval == "1h" && (DateTime::now - start_d) > 729.days
|
1256
|
+
# reject = true
|
1257
|
+
# elsif sub_interval.in?(["30m", "15m"]) && (DateTime::now - start_d) > 59.days
|
1258
|
+
# reject = true
|
1259
|
+
# end
|
1260
|
+
|
1261
|
+
# if reject
|
1262
|
+
# # Don't bother requesting more price data, yahoo will reject
|
1263
|
+
# msg = "Cannot reconstruct #{interval} block starting"
|
1264
|
+
# msg += intraday ? " #{start_dt}" : " #{start_d}"
|
1265
|
+
# msg += ", too old, yahoo will reject request for finer-grain data"
|
1266
|
+
# logger.info(msg)
|
1267
|
+
# next
|
1268
|
+
# end
|
1269
|
+
|
1270
|
+
# td_1d = _datetime.timedelta(days=1)
|
1271
|
+
# end_dt = g[-1]
|
1272
|
+
# end_d = end_dt.date + td_1d
|
1273
|
+
|
1274
|
+
# if interval == "1wk"
|
1275
|
+
# fetch_start = start_d - td_range # need previous week too
|
1276
|
+
# fetch_end = g[-1].date + td_range
|
1277
|
+
# elsif interval == "1d"
|
1278
|
+
# fetch_start = start_d
|
1279
|
+
# fetch_end = g[-1].date + td_range
|
1280
|
+
# else
|
1281
|
+
# fetch_start = g[0]
|
1282
|
+
# fetch_end = g[-1] + td_range
|
1283
|
+
# end
|
1284
|
+
|
1285
|
+
# # The first and last day returned by yahoo can be slightly wrong, so add buffer:
|
1286
|
+
# fetch_start -= td_1d
|
1287
|
+
# fetch_end += td_1d
|
1288
|
+
# if intraday
|
1289
|
+
# fetch_start = fetch_start.date
|
1290
|
+
# fetch_end = fetch_end.date + td_1d
|
1291
|
+
# end
|
1292
|
+
|
1293
|
+
# fetch_start = max(min_dt.date, fetch_start) if min_dt.nil?
|
1294
|
+
# logger.debug("Fetching #{sub_interval} prepost=#{prepost} #{fetch_start}->#{fetch_end}")
|
1295
|
+
|
1296
|
+
# df_fine = self.history(start: fetch_start, fin: fetch_end, interval: sub_interval, auto_adjust: false, actions: true, prepost: prepost, repair: true, keepna: true)
|
1297
|
+
# if df_fine.nil? || df_fine.empty?
|
1298
|
+
# msg = "Cannot reconstruct #{interval} block starting"
|
1299
|
+
# msg += intraday ? " #{start_dt}" : " #{start_d}"
|
1300
|
+
# msg += ", too old, yahoo is rejecting request for finer-grain data"
|
1301
|
+
# logger.debug(msg)
|
1302
|
+
# next
|
1303
|
+
# end
|
1304
|
+
|
1305
|
+
# # Discard the buffer
|
1306
|
+
# df_fine = df_fine.loc[g[0]: g[-1] + itds[sub_interval] - 1.milliseconds].copy
|
1307
|
+
|
1308
|
+
# if df_fine.empty?
|
1309
|
+
# msg = "Cannot reconstruct #{interval} block range"
|
1310
|
+
# msg += (intraday ? " #{start_dt}->#{end_dt}" : " #{start_d}->#{end_d}")
|
1311
|
+
# msg += ", yahoo not returning finer-grain data within range"
|
1312
|
+
# logger.debug(msg)
|
1313
|
+
# next
|
1314
|
+
# end
|
1315
|
+
|
1316
|
+
# df_fine["ctr"] = 0
|
1317
|
+
# if interval == "1wk"
|
1318
|
+
# weekdays = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
|
1319
|
+
# week_end_day = weekdays[(df_block.index[0].weekday + 7 - 1) % 7]
|
1320
|
+
# df_fine["Week Start"] = df_fine.index.tz_localize(nil).to_period("W-" + week_end_day).start_time
|
1321
|
+
# grp_col = "Week Start"
|
1322
|
+
# elsif interval == "1d"
|
1323
|
+
# df_fine["Day Start"] = pd.to_datetime(df_fine.index.date)
|
1324
|
+
# grp_col = "Day Start"
|
1325
|
+
# else
|
1326
|
+
# df_fine.loc[df_fine.index.isin(df_block.index), "ctr"] = 1
|
1327
|
+
# df_fine["intervalID"] = df_fine["ctr"].cumsum
|
1328
|
+
# df_fine = df_fine.drop("ctr", axis=1)
|
1329
|
+
# grp_col = "intervalID"
|
1330
|
+
# end
|
1331
|
+
# df_fine = df_fine[~df_fine[price_cols + ['Dividends']].isna.all(axis=1)]
|
1332
|
+
|
1333
|
+
# df_fine_grp = df_fine.groupby(grp_col)
|
1334
|
+
# df_new = df_fine_grp.agg(
|
1335
|
+
# Open: ["Open", "first"],
|
1336
|
+
# Close: ["Close", "last"],
|
1337
|
+
# AdjClose: ["Adj Close", "last"],
|
1338
|
+
# Low: ["Low", "min"],
|
1339
|
+
# High: ["High", "max"],
|
1340
|
+
# Dividends: ["Dividends", "sum"],
|
1341
|
+
# Volume: ["Volume", "sum"]
|
1342
|
+
# ).rename(columns: {"AdjClose": "Adj Close"})
|
1343
|
+
|
1344
|
+
# if grp_col.in?(["Week Start", "Day Start"])
|
1345
|
+
# df_new.index = df_new.index.tz_localize(df_fine.index.tz)
|
1346
|
+
# else
|
1347
|
+
# df_fine["diff"] = df_fine["intervalID"].diff
|
1348
|
+
# new_index = np.append([df_fine.index[0]], df_fine.index[df_fine["intervalID"].diff > 0])
|
1349
|
+
# df_new.index = new_index
|
1350
|
+
# end
|
1351
|
+
# logger.debug('df_new:' + '\n' + str(df_new))
|
1352
|
+
# # df_new = df_fine
|
1353
|
+
|
1354
|
+
# # Calibrate!
|
1355
|
+
# common_index = np.intersect1d(df_block.index, df_new.index)
|
1356
|
+
# if common_index.length == 0
|
1357
|
+
# # Can't calibrate so don't attempt repair
|
1358
|
+
# logger.info("Can't calibrate #{interval} block starting #{start_d} so aborting repair")
|
1359
|
+
# next
|
1360
|
+
# end
|
1361
|
+
|
1362
|
+
# # First, attempt to calibrate the 'Adj Close' column. OK if cannot.
|
1363
|
+
# # Only necessary for 1d interval, because the 1h data is not div-adjusted.
|
1364
|
+
# if interval == '1d'
|
1365
|
+
|
1366
|
+
# df_new_calib = df_new[df_new.index.isin(common_index)]
|
1367
|
+
# df_block_calib = df_block[df_block.index.isin(common_index)]
|
1368
|
+
# f_tag = df_block_calib['Adj Close'] == tag
|
1369
|
+
|
1370
|
+
# if f_tag.any?
|
1371
|
+
|
1372
|
+
# div_adjusts = df_block_calib['Adj Close'] / df_block_calib['Close']
|
1373
|
+
# # The loop below assumes each 1d repair is isolated, i.e. surrounded by
|
1374
|
+
# # good data. Which is case most of time.
|
1375
|
+
# # But in case are repairing a chunk of bad 1d data, back/forward-fill the
|
1376
|
+
# # good div-adjustments - not perfect, but a good backup.
|
1377
|
+
# div_adjusts[f_tag] = np.nan
|
1378
|
+
# div_adjusts = div_adjusts.ffill.bfill
|
1379
|
+
|
1380
|
+
# (0..np.where(f_tag)[0].length).each do |idx|
|
1381
|
+
# dt = df_new_calib.index[idx]
|
1382
|
+
# n = len(div_adjusts)
|
1383
|
+
|
1384
|
+
# if df_new.loc[dt, "Dividends"] != 0
|
1385
|
+
# if idx < n - 1
|
1386
|
+
# # Easy, take div-adjustment from next-day
|
1387
|
+
# div_adjusts.iloc[idx] = div_adjusts.iloc[idx + 1]
|
1388
|
+
# else
|
1389
|
+
# # Take previous-day div-adjustment and reverse todays adjustment
|
1390
|
+
# div_adj = 1.0 - df_new_calib["Dividends"].iloc[idx] / df_new_calib['Close'].iloc[idx - 1]
|
1391
|
+
# div_adjusts.iloc[idx] = div_adjusts.iloc[idx - 1] / div_adj
|
1392
|
+
# end
|
1393
|
+
|
1394
|
+
# else
|
1395
|
+
# if idx > 0
|
1396
|
+
# # Easy, take div-adjustment from previous-day
|
1397
|
+
# div_adjusts.iloc[idx] = div_adjusts.iloc[idx - 1]
|
1398
|
+
# else
|
1399
|
+
# # Must take next-day div-adjustment
|
1400
|
+
# div_adjusts.iloc[idx] = div_adjusts.iloc[idx + 1]
|
1401
|
+
# if df_new_calib["Dividends"].iloc[idx + 1] != 0
|
1402
|
+
# div_adjusts.iloc[idx] *= 1.0 - df_new_calib["Dividends"].iloc[idx + 1] / \
|
1403
|
+
# df_new_calib['Close'].iloc[idx]
|
1404
|
+
# end
|
1405
|
+
# end
|
1406
|
+
# end
|
1407
|
+
# end
|
1408
|
+
|
1409
|
+
# f_close_bad = df_block_calib['Close'] == tag
|
1410
|
+
# div_adjusts = div_adjusts.reindex(df_block.index, fill_value=np.nan).ffill.bfill
|
1411
|
+
# df_new['Adj Close'] = df_block['Close'] * div_adjusts
|
1412
|
+
|
1413
|
+
# if f_close_bad.any?
|
1414
|
+
# f_close_bad_new = f_close_bad.reindex(df_new.index, fill_value=false)
|
1415
|
+
# div_adjusts_new = div_adjusts.reindex(df_new.index, fill_value=np.nan).ffill.bfill
|
1416
|
+
# div_adjusts_new_np = f_close_bad_new.to_numpy
|
1417
|
+
# df_new.loc[div_adjusts_new_np, 'Adj Close'] = df_new['Close'][div_adjusts_new_np] * div_adjusts_new[div_adjusts_new_np]
|
1418
|
+
# end
|
1419
|
+
# end
|
1420
|
+
|
1421
|
+
# # Check whether 'df_fine' has different split-adjustment.
|
1422
|
+
# # If different, then adjust to match 'df'
|
1423
|
+
# calib_cols = ['Open', 'Close']
|
1424
|
+
# df_new_calib = df_new[df_new.index.isin(common_index)][calib_cols].to_numpy
|
1425
|
+
# df_block_calib = df_block[df_block.index.isin(common_index)][calib_cols].to_numpy
|
1426
|
+
# calib_filter = (df_block_calib != tag)
|
1427
|
+
|
1428
|
+
# if !calib_filter.any?
|
1429
|
+
# # Can't calibrate so don't attempt repair
|
1430
|
+
# logger.info("Can't calibrate #{interval} block starting #{start_d} so aborting repair")
|
1431
|
+
# next
|
1432
|
+
# end
|
1433
|
+
|
1434
|
+
# # Avoid divide-by-zero warnings:
|
1435
|
+
# (0..calib_cols.length).each do |j|
|
1436
|
+
# f = ~calib_filter[:, j]
|
1437
|
+
# if f.any?
|
1438
|
+
# df_block_calib[f, j] = 1
|
1439
|
+
# df_new_calib[f, j] = 1
|
1440
|
+
# end
|
1441
|
+
# end
|
1442
|
+
|
1443
|
+
# ratios = df_block_calib[calib_filter] / df_new_calib[calib_filter]
|
1444
|
+
# weights = df_fine_grp.size
|
1445
|
+
# weights.index = df_new.index
|
1446
|
+
# weights = weights[weights.index.isin(common_index)].to_numpy.astype(float)
|
1447
|
+
# weights = weights[:, None] # transpose
|
1448
|
+
# weights = np.tile(weights, len(calib_cols)) # 1D -> 2D
|
1449
|
+
# weights = weights[calib_filter] # flatten
|
1450
|
+
# not1 = ~np.isclose(ratios, 1.0, rtol=0.00001)
|
1451
|
+
|
1452
|
+
# if np.sum(not1) == len(calib_cols)
|
1453
|
+
# # Only 1 calibration row in df_new is different to df_block so ignore
|
1454
|
+
# ratio = 1.0
|
1455
|
+
# else
|
1456
|
+
# ratio = np.average(ratios, weights=weights)
|
1457
|
+
# end
|
1458
|
+
|
1459
|
+
# logger.debug("Price calibration ratio (raw) = #{ratio:6f}")
|
1460
|
+
# ratio_rcp = round(1.0 / ratio, 1)
|
1461
|
+
# ratio = round(ratio, 1)
|
1462
|
+
# if ratio == 1 && ratio_rcp == 1
|
1463
|
+
# # Good!
|
1464
|
+
# next
|
1465
|
+
|
1466
|
+
# else
|
1467
|
+
# if ratio > 1
|
1468
|
+
# # data has different split-adjustment than fine-grained data
|
1469
|
+
# # Adjust fine-grained to match
|
1470
|
+
# df_new[price_cols] *= ratio
|
1471
|
+
# df_new["Volume"] /= ratio
|
1472
|
+
# elsif ratio_rcp > 1
|
1473
|
+
# # data has different split-adjustment than fine-grained data
|
1474
|
+
# # Adjust fine-grained to match
|
1475
|
+
# df_new[price_cols] *= 1.0 / ratio_rcp
|
1476
|
+
# df_new["Volume"] *= ratio_rcp
|
1477
|
+
# end
|
1478
|
+
# end
|
1479
|
+
|
1480
|
+
# # Repair!
|
1481
|
+
# bad_dts = df_block.index[(df_block[price_cols + ["Volume"]] == tag).to_numpy.any(axis=1)]
|
1482
|
+
|
1483
|
+
# no_fine_data_dts = []
|
1484
|
+
# bad_dts.each do |idx|
|
1485
|
+
# if !df_new.index.include?(idx)
|
1486
|
+
# # yahoo didn't return finer-grain data for this interval,
|
1487
|
+
# # so probably no trading happened.
|
1488
|
+
# no_fine_data_dts.append(idx)
|
1489
|
+
# end
|
1490
|
+
# end
|
1491
|
+
|
1492
|
+
# unless no_fine_data_dts.length == 0
|
1493
|
+
# logger.debug("yahoo didn't return finer-grain data for these intervals: " + str(no_fine_data_dts))
|
1494
|
+
# end
|
1495
|
+
|
1496
|
+
# bad_dts.each do |idx|
|
1497
|
+
|
1498
|
+
# # yahoo didn't return finer-grain data for this interval,
|
1499
|
+
# # so probably no trading happened.
|
1500
|
+
# next if !df_new.index.include?(idx)
|
1501
|
+
|
1502
|
+
# df_new_row = df_new.loc[idx]
|
1503
|
+
|
1504
|
+
# if interval == "1wk"
|
1505
|
+
# df_last_week = df_new.iloc[df_new.index.get_loc(idx) - 1]
|
1506
|
+
# df_fine = df_fine.loc[idx:]
|
1507
|
+
# end
|
1508
|
+
|
1509
|
+
# df_bad_row = df.loc[idx]
|
1510
|
+
# bad_fields = df_bad_row.index[df_bad_row == tag].to_numpy
|
1511
|
+
|
1512
|
+
# df_v2.loc[idx, "High"] = df_new_row["High"] if bad_fields.include?("High")
|
1513
|
+
|
1514
|
+
# df_v2.loc[idx, "Low"] = df_new_row["Low"] if bad_fields.include?("Low")
|
1515
|
+
|
1516
|
+
# if bad_fields.include?("Open")
|
1517
|
+
# if interval == "1wk" && idx != df_fine.index[0]
|
1518
|
+
# # Exchange closed Monday. In this case, yahoo sets Open to last week close
|
1519
|
+
# df_v2.loc[idx, "Open"] = df_last_week["Close"]
|
1520
|
+
# df_v2.loc[idx, "Low"] = [df_v2.loc[idx, "Open"], df_v2.loc[idx, "Low"]].min
|
1521
|
+
# else
|
1522
|
+
# df_v2.loc[idx, "Open"] = df_new_row["Open"]
|
1523
|
+
# end
|
1524
|
+
# end
|
1525
|
+
|
1526
|
+
# if bad_fields.include?("Close")
|
1527
|
+
# df_v2.loc[idx, "Close"] = df_new_row["Close"]
|
1528
|
+
# # Assume 'Adj Close' also corrupted, easier than detecting whether true
|
1529
|
+
# df_v2.loc[idx, "Adj Close"] = df_new_row["Adj Close"]
|
1530
|
+
# elsif bad_fields.include?("Adj Close")
|
1531
|
+
# df_v2.loc[idx, "Adj Close"] = df_new_row["Adj Close"]
|
1532
|
+
# end
|
1533
|
+
# if bad_fields.include?("Volume")
|
1534
|
+
# df_v2.loc[idx, "Volume"] = df_new_row["Volume"]
|
1535
|
+
# end
|
1536
|
+
# df_v2.loc[idx, "Repaired?"] = true
|
1537
|
+
# n_fixed += 1
|
1538
|
+
# end
|
1539
|
+
# end
|
1540
|
+
# end
|
1541
|
+
# return df_v2
|
1542
|
+
# end
|
1543
|
+
return df
|
1544
|
+
end
|
1545
|
+
|
1546
|
+
def _fix_unit_mixups(df, interval, tz_exchange, prepost)
|
1547
|
+
# return df if df.empty?
|
1548
|
+
# df2 = self._fix_unit_switch(df, interval, tz_exchange)
|
1549
|
+
# df3 = self._fix_unit_random_mixups(df2, interval, tz_exchange, prepost)
|
1550
|
+
# return df3
|
1551
|
+
end
|
1552
|
+
|
1553
|
+
def _fix_unit_random_mixups(df, interval, tz_exchange, prepost)
|
1554
|
+
# # Sometimes yahoo returns few prices in cents/pence instead of $/£
|
1555
|
+
# # I.e. 100x bigger
|
1556
|
+
# # 2 ways this manifests:
|
1557
|
+
# # - random 100x errors spread throughout table
|
1558
|
+
# # - a sudden switch between $<->cents at some date
|
1559
|
+
# # This function fixes the first.
|
1560
|
+
|
1561
|
+
# return df if df.empty?
|
1562
|
+
|
1563
|
+
# # Easy to detect and fix, just look for outliers = ~100x local median
|
1564
|
+
# logger = Rails.logger # Utils.get_yf_logger
|
1565
|
+
|
1566
|
+
# if df.shape[0] == 0
|
1567
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1568
|
+
# return df
|
1569
|
+
# end
|
1570
|
+
# if df.shape[0] == 1
|
1571
|
+
# # Need multiple rows to confidently identify outliers
|
1572
|
+
# logger.info("price-repair-100x: Cannot check single-row table for 100x price errors")
|
1573
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1574
|
+
|
1575
|
+
# return df
|
1576
|
+
# end
|
1577
|
+
|
1578
|
+
# df2 = df.copy
|
1579
|
+
|
1580
|
+
# if df2.index.tz.nil?
|
1581
|
+
# df2.index = df2.index.tz_localize(tz_exchange)
|
1582
|
+
# elsif df2.index.tz != tz_exchange
|
1583
|
+
# df2.index = df2.index.tz_convert(tz_exchange)
|
1584
|
+
# end
|
1585
|
+
|
1586
|
+
# # Only import scipy if users actually want function. To avoid
|
1587
|
+
# # adding it to dependencies.
|
1588
|
+
# require 'scipy'
|
1589
|
+
|
1590
|
+
# data_cols = ["High", "Open", "Low", "Close", "Adj Close"] # Order important, separate High from Low
|
1591
|
+
# data_cols = data_cols.select { |c| df2.columns.include?(c) }
|
1592
|
+
# f_zeroes = (df2[data_cols] == 0).any(axis=1).to_numpy
|
1593
|
+
|
1594
|
+
# if f_zeroes.any?
|
1595
|
+
# df2_zeroes = df2[f_zeroes]
|
1596
|
+
# df2 = df2[~f_zeroes]
|
1597
|
+
# df = df[~f_zeroes] # all row slicing must be applied to both df and df2
|
1598
|
+
|
1599
|
+
# else
|
1600
|
+
# df2_zeroes = nil
|
1601
|
+
# end
|
1602
|
+
|
1603
|
+
# if df2.shape[0] <= 1
|
1604
|
+
# logger.info("price-repair-100x: Insufficient good data for detecting 100x price errors")
|
1605
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1606
|
+
|
1607
|
+
# return df
|
1608
|
+
# end
|
1609
|
+
|
1610
|
+
# df2_data = df2[data_cols].to_numpy
|
1611
|
+
# median = scipy.ndimage.median_filter(df2_data, size: [3, 3], mode: "wrap")
|
1612
|
+
# ratio = df2_data / median
|
1613
|
+
# ratio_rounded = (ratio / 20).round * 20 # round ratio to nearest 20
|
1614
|
+
# f = ratio_rounded == 100
|
1615
|
+
# ratio_rcp = 1.0 / ratio
|
1616
|
+
# ratio_rcp_rounded = (ratio_rcp / 20).round * 20 # round ratio to nearest 20
|
1617
|
+
# f_rcp = (ratio_rounded == 100) | (ratio_rcp_rounded == 100)
|
1618
|
+
# f_either = f | f_rcp
|
1619
|
+
|
1620
|
+
# if !f_either.any?
|
1621
|
+
# logger.info("price-repair-100x: No sporadic 100x errors")
|
1622
|
+
|
1623
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1624
|
+
|
1625
|
+
# return df
|
1626
|
+
# end
|
1627
|
+
|
1628
|
+
# # Mark values to send for repair
|
1629
|
+
# tag = -1.0
|
1630
|
+
# data_cols.each_with_index do |c, i|
|
1631
|
+
# fi = f_either[:, i]
|
1632
|
+
# df2.loc[fi, c] = tag
|
1633
|
+
# end
|
1634
|
+
|
1635
|
+
# n_before = (df2_data == tag).sum
|
1636
|
+
# df2 = _reconstruct_intervals_batch(df2, interval, prepost, tag)
|
1637
|
+
# df2_tagged = df2[data_cols].to_numpy == tag
|
1638
|
+
# n_after = (df2[data_cols].to_numpy == tag).sum
|
1639
|
+
|
1640
|
+
# if n_after > 0
|
1641
|
+
# # This second pass will *crudely* "fix" any remaining errors in High/Low
|
1642
|
+
# # simply by ensuring they don't contradict e.g. Low = 100x High.
|
1643
|
+
# f = (df2[data_cols].to_numpy == tag) & f
|
1644
|
+
# f.each_with_index do |fi, i|
|
1645
|
+
# next if !fi.any?
|
1646
|
+
|
1647
|
+
# idx = df2.index[i]
|
1648
|
+
|
1649
|
+
# ['Open', 'Close'].each do |c|
|
1650
|
+
# j = data_cols.index(c)
|
1651
|
+
# df2.loc[idx, c] = df.loc[idx, c] * 0.01 if fi[j]
|
1652
|
+
# end
|
1653
|
+
# end
|
1654
|
+
|
1655
|
+
# c = "High"
|
1656
|
+
# j = data_cols.index(c)
|
1657
|
+
# df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max if fi[j]
|
1658
|
+
|
1659
|
+
# c = "Low"
|
1660
|
+
# j = data_cols.index(c)
|
1661
|
+
# df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min if fi[j]
|
1662
|
+
# end
|
1663
|
+
|
1664
|
+
# f_rcp = (df2[data_cols].to_numpy == tag) & f_rcp
|
1665
|
+
# f_rcp.each_with_index do |fi, i|
|
1666
|
+
# next if !fi.any?
|
1667
|
+
|
1668
|
+
# idx = df2.index[i]
|
1669
|
+
|
1670
|
+
# ['Open', 'Close'].each do |c|
|
1671
|
+
# j = data_cols.index(c)
|
1672
|
+
|
1673
|
+
# df2.loc[idx, c] = df.loc[idx, c] * 100.0 if fi[j]
|
1674
|
+
# end
|
1675
|
+
|
1676
|
+
# c = "High"
|
1677
|
+
# j = data_cols.index(c)
|
1678
|
+
# df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max if fi[j]
|
1679
|
+
|
1680
|
+
# c = "Low"
|
1681
|
+
# j = data_cols.index(c)
|
1682
|
+
# df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min if fi[j]
|
1683
|
+
# end
|
1684
|
+
|
1685
|
+
# df2_tagged = df2[data_cols].to_numpy == tag
|
1686
|
+
# n_after_crude = df2_tagged.sum
|
1687
|
+
|
1688
|
+
# else
|
1689
|
+
# n_after_crude = n_after
|
1690
|
+
# end
|
1691
|
+
|
1692
|
+
# n_fixed = n_before - n_after_crude
|
1693
|
+
# n_fixed_crudely = n_after - n_after_crude
|
1694
|
+
# if n_fixed > 0
|
1695
|
+
# report_msg = "#{ticker}: fixed #{n_fixed}/#{n_before} currency unit mixups "
|
1696
|
+
# report_msg += "(#{n_fixed_crudely} crudely) " if n_fixed_crudely > 0
|
1697
|
+
|
1698
|
+
# report_msg += "in #{interval} price data"
|
1699
|
+
# logger.info('price-repair-100x: ' + report_msg)
|
1700
|
+
# end
|
1701
|
+
|
1702
|
+
# # Restore original values where repair failed
|
1703
|
+
# f_either = df2[data_cols].to_numpy == tag
|
1704
|
+
# f_either.each_with_index do |fj, j|
|
1705
|
+
# if fj.any?
|
1706
|
+
# c = data_cols[j]
|
1707
|
+
# df2.loc[fj, c] = df.loc[fj, c]
|
1708
|
+
# end
|
1709
|
+
# end
|
1710
|
+
# if df2_zeroes
|
1711
|
+
# df2_zeroes["Repaired?"] = false if !df2_zeroes.columns.include?("Repaired?")
|
1712
|
+
|
1713
|
+
# df2 = pd.concat([df2, df2_zeroes]).sort_index
|
1714
|
+
# df2.index = pd.to_datetime(df2.index)
|
1715
|
+
# end
|
1716
|
+
|
1717
|
+
# return df2
|
1718
|
+
return df
|
1719
|
+
end
|
1720
|
+
|
1721
|
+
def _fix_unit_switch(df, interval, tz_exchange)
|
1722
|
+
# Sometimes yahoo returns few prices in cents/pence instead of $/£
|
1723
|
+
# I.e. 100x bigger
|
1724
|
+
# 2 ways this manifests:
|
1725
|
+
# - random 100x errors spread throughout table
|
1726
|
+
# - a sudden switch between $<->cents at some date
|
1727
|
+
# This function fixes the second.
|
1728
|
+
# Eventually yahoo fixes but could take them 2 weeks.
|
1729
|
+
|
1730
|
+
return fix_prices_sudden_change(df, interval, tz_exchange, 100.0)
|
1731
|
+
end
|
1732
|
+
|
1733
|
+
def _fix_zeroes(df, interval, tz_exchange, prepost)
|
1734
|
+
# # Sometimes yahoo returns prices=0 or NaN when trades occurred.
|
1735
|
+
# # But most times when prices=0 or NaN returned is because no trades.
|
1736
|
+
# # Impossible to distinguish, so only attempt repair if few or rare.
|
1737
|
+
|
1738
|
+
# return df if df.empty?
|
1739
|
+
|
1740
|
+
# logger = Rails.logger #utils.get_yf_logger
|
1741
|
+
|
1742
|
+
# if df.shape[0] == 0
|
1743
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1744
|
+
# return df
|
1745
|
+
# end
|
1746
|
+
|
1747
|
+
# intraday = interval[-1] in ["m", 'h']
|
1748
|
+
|
1749
|
+
# df = df.sort_index # important!
|
1750
|
+
# df2 = df.copy
|
1751
|
+
|
1752
|
+
# if df2.index.tz.nil?
|
1753
|
+
# df2.index = df2.index.tz_localize(tz_exchange)
|
1754
|
+
# elsif df2.index.tz != tz_exchange
|
1755
|
+
# df2.index = df2.index.tz_convert(tz_exchange)
|
1756
|
+
# end
|
1757
|
+
|
1758
|
+
# price_cols = ["High", "Open", "Low", "Close", "Adj Close"].select { |c| df2.columns.include?(c) }
|
1759
|
+
# f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna
|
1760
|
+
# df2_reserve = nil
|
1761
|
+
# if intraday
|
1762
|
+
# # Ignore days with >50% intervals containing NaNs
|
1763
|
+
# grp = Polars::Series(f_prices_bad.any(axis=1), name: "nan").groupby(f_prices_bad.index.date)
|
1764
|
+
# nan_pct = grp.sum / grp.count
|
1765
|
+
# dts = nan_pct.index[nan_pct > 0.5]
|
1766
|
+
# f_zero_or_nan_ignore = np.isin(f_prices_bad.index.date, dts)
|
1767
|
+
# df2_reserve = df2[f_zero_or_nan_ignore]
|
1768
|
+
# df2 = df2[~f_zero_or_nan_ignore]
|
1769
|
+
# f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna
|
1770
|
+
# end
|
1771
|
+
|
1772
|
+
# f_high_low_good = (~df2["High"].isna.to_numpy) & (~df2["Low"].isna.to_numpy)
|
1773
|
+
# f_change = df2["High"].to_numpy != df2["Low"].to_numpy
|
1774
|
+
# f_vol_bad = (df2["Volume"] == 0).to_numpy & f_high_low_good & f_change
|
1775
|
+
|
1776
|
+
# # If stock split occurred, then trading must have happened.
|
1777
|
+
# # I should probably rename the function, because prices aren't zero ...
|
1778
|
+
# if df2.columns.include?('Stock Splits')
|
1779
|
+
# f_split = (df2['Stock Splits'] != 0.0).to_numpy
|
1780
|
+
# if f_split.any?
|
1781
|
+
# f_change_expected_but_missing = f_split & ~f_change
|
1782
|
+
|
1783
|
+
# f_prices_bad[f_change_expected_but_missing] = true if f_change_expected_but_missing.any?
|
1784
|
+
# end
|
1785
|
+
# end
|
1786
|
+
|
1787
|
+
# # Check whether worth attempting repair
|
1788
|
+
# f_prices_bad = f_prices_bad.to_numpy
|
1789
|
+
# f_bad_rows = f_prices_bad.any(axis=1) | f_vol_bad
|
1790
|
+
# if !f_bad_rows.any?
|
1791
|
+
# logger.info("price-repair-missing: No price=0 errors to repair")
|
1792
|
+
|
1793
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1794
|
+
|
1795
|
+
# return df
|
1796
|
+
# end
|
1797
|
+
# if f_prices_bad.sum == len(price_cols) * len(df2)
|
1798
|
+
# # Need some good data to calibrate
|
1799
|
+
# logger.info("price-repair-missing: No good data for calibration so cannot fix price=0 bad data")
|
1800
|
+
|
1801
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1802
|
+
|
1803
|
+
# return df
|
1804
|
+
# end
|
1805
|
+
|
1806
|
+
# data_cols = price_cols + ["Volume"]
|
1807
|
+
|
1808
|
+
# # Mark values to send for repair
|
1809
|
+
# tag = -1.0
|
1810
|
+
# price_cols.each_with_index { |c, i| df2.loc[f_prices_bad[:, i], c] = tag }
|
1811
|
+
|
1812
|
+
# df2.loc[f_vol_bad, "Volume"] = tag
|
1813
|
+
# # If volume=0 or NaN for bad prices, then tag volume for repair
|
1814
|
+
# f_vol_zero_or_nan = (df2["Volume"].to_numpy == 0) | (df2["Volume"].isna.to_numpy)
|
1815
|
+
# df2.loc[f_prices_bad.any(axis=1) & f_vol_zero_or_nan, "Volume"] = tag
|
1816
|
+
# # If volume=0 or NaN but price moved in interval, then tag volume for repair
|
1817
|
+
# df2.loc[f_change & f_vol_zero_or_nan, "Volume"] = tag
|
1818
|
+
|
1819
|
+
# df2_tagged = df2[data_cols].to_numpy == tag
|
1820
|
+
# n_before = df2_tagged.sum
|
1821
|
+
# dts_tagged = df2.index[df2_tagged.any(axis=1)]
|
1822
|
+
# df2 = _reconstruct_intervals_batch(df2, interval, prepost, tag)
|
1823
|
+
# df2_tagged = df2[data_cols].to_numpy == tag
|
1824
|
+
# n_after = df2_tagged.sum
|
1825
|
+
# dts_not_repaired = df2.index[df2_tagged.any(axis=1)]
|
1826
|
+
# n_fixed = n_before - n_after
|
1827
|
+
# if n_fixed > 0
|
1828
|
+
# msg = "#{ticker}: fixed #{n_fixed}/#{n_before} value=0 errors in #{interval} price data"
|
1829
|
+
# if n_fixed < 4
|
1830
|
+
# dts_repaired = (dts_tagged - dts_not_repaired).to_list.sort
|
1831
|
+
# msg += ": #{dts_repaired}"
|
1832
|
+
# end
|
1833
|
+
# logger.info('price-repair-missing: ' + msg)
|
1834
|
+
# end
|
1835
|
+
|
1836
|
+
# if df2_reserve
|
1837
|
+
# df2_reserve["Repaired?"] = false if !df2_reserve.columns.include?("Repaired?")
|
1838
|
+
|
1839
|
+
# df2 = pd.concat([df2, df2_reserve]).sort_index
|
1840
|
+
# end
|
1841
|
+
|
1842
|
+
# # Restore original values where repair failed (i.e. remove tag values)
|
1843
|
+
# f = df2[data_cols].to_numpy == tag
|
1844
|
+
# f.each_with_index do |fj, j|
|
1845
|
+
# if fj.any?
|
1846
|
+
# c = data_cols[j]
|
1847
|
+
# df2.loc[fj, c] = df.loc[fj, c]
|
1848
|
+
# end
|
1849
|
+
# end
|
1850
|
+
|
1851
|
+
# return df2
|
1852
|
+
end
|
1853
|
+
|
1854
|
+
def _fix_missing_div_adjust(df, interval, tz_exchange)
|
1855
|
+
# # Sometimes, if a dividend occurred today, then yahoo has not adjusted historic data.
|
1856
|
+
# # Easy to detect and correct BUT ONLY IF the data 'df' includes today's dividend.
|
1857
|
+
# # E.g. if fetching historic prices before todays dividend, then cannot fix.
|
1858
|
+
|
1859
|
+
# logger = Rails.logger # utils.get_yf_logger
|
1860
|
+
|
1861
|
+
# return df if df.nil? || df.empty?
|
1862
|
+
|
1863
|
+
# interday = interval in ['1d', '1wk', '1mo', '3mo']
|
1864
|
+
|
1865
|
+
# return df if !interday
|
1866
|
+
|
1867
|
+
# df = df.sort_index
|
1868
|
+
|
1869
|
+
# f_div = (df["Dividends"] != 0.0).to_numpy
|
1870
|
+
# if !f_div.any?
|
1871
|
+
# logger.debug('div-adjust-repair: No dividends to check')
|
1872
|
+
# return df
|
1873
|
+
# end
|
1874
|
+
|
1875
|
+
# df2 = df.copy
|
1876
|
+
# if df2.index.tz.nil?
|
1877
|
+
# df2.index = df2.index.tz_localize(tz_exchange)
|
1878
|
+
# elsif df2.index.tz != tz_exchange
|
1879
|
+
# df2.index = df2.index.tz_convert(tz_exchange)
|
1880
|
+
# end
|
1881
|
+
|
1882
|
+
# div_indices = np.where(f_div)[0]
|
1883
|
+
# last_div_idx = div_indices[-1]
|
1884
|
+
# if last_div_idx == 0
|
1885
|
+
# # Not enough data to recalculate the div-adjustment,
|
1886
|
+
# # because need close day before
|
1887
|
+
# logger.debug('div-adjust-repair: Insufficient data to recalculate div-adjustment')
|
1888
|
+
# return df
|
1889
|
+
# end
|
1890
|
+
|
1891
|
+
# # To determine if yahoo messed up, analyse price data between today's dividend and
|
1892
|
+
# # the previous dividend
|
1893
|
+
# if div_indices.length == 1
|
1894
|
+
# # No other divs in data
|
1895
|
+
# prev_idx = 0
|
1896
|
+
# prev_dt = nil
|
1897
|
+
# else
|
1898
|
+
# prev_idx = div_indices[-2]
|
1899
|
+
# prev_dt = df2.index[prev_idx]
|
1900
|
+
# end
|
1901
|
+
# f_no_adj = (df2['Close'] == df2['Adj Close']).to_numpy[prev_idx:last_div_idx]
|
1902
|
+
# threshold_pct = 0.5
|
1903
|
+
# yahoo_failed = (np.sum(f_no_adj) / len(f_no_adj)) > threshold_pct
|
1904
|
+
|
1905
|
+
# # Fix yahoo
|
1906
|
+
# if yahoo_failed
|
1907
|
+
# last_div_dt = df2.index[last_div_idx]
|
1908
|
+
# last_div_row = df2.loc[last_div_dt]
|
1909
|
+
# close_day_before = df2['Close'].iloc[last_div_idx - 1]
|
1910
|
+
# adj = 1.0 - df2['Dividends'].iloc[last_div_idx] / close_day_before
|
1911
|
+
# div = last_div_row['Dividends']
|
1912
|
+
# msg = "Correcting missing div-adjustment preceding div = #{div} @ #{last_div_dt.date} (prev_dt=#{prev_dt})"
|
1913
|
+
# logger.debug('div-adjust-repair: ' + msg)
|
1914
|
+
|
1915
|
+
# if interval == '1d'
|
1916
|
+
# # exclusive
|
1917
|
+
# df2.loc[:last_div_dt - _datetime.timedelta(seconds=1), 'Adj Close'] *= adj
|
1918
|
+
# else
|
1919
|
+
# # inclusive
|
1920
|
+
# df2.loc[:last_div_dt, 'Adj Close'] *= adj
|
1921
|
+
# end
|
1922
|
+
# end
|
1923
|
+
|
1924
|
+
# return df2
|
1925
|
+
return df
|
1926
|
+
end
|
1927
|
+
|
1928
|
+
def _fix_bad_stock_split(df, interval, tz_exchange)
|
1929
|
+
# # Repair idea is to look for BIG daily price changes that closely match the
|
1930
|
+
# # most recent stock split ratio. This indicates yahoo failed to apply a new
|
1931
|
+
# # stock split to old price data.
|
1932
|
+
# #
|
1933
|
+
# # There is a slight complication, because yahoo does another stupid thing.
|
1934
|
+
# # Sometimes the old data is adjusted twice. So cannot simply assume
|
1935
|
+
# # which direction to reverse adjustment - have to analyse prices and detect.
|
1936
|
+
# # Not difficult.
|
1937
|
+
|
1938
|
+
# return df if df.empty?
|
1939
|
+
|
1940
|
+
# logger = Rails.logger # utils.get_yf_logger
|
1941
|
+
|
1942
|
+
# interday = interval.in?(['1d', '1wk', '1mo', '3mo'])
|
1943
|
+
|
1944
|
+
# return df if !interday
|
1945
|
+
|
1946
|
+
# # Find the most recent stock split
|
1947
|
+
# df = df.sort_index(ascending: false)
|
1948
|
+
# split_f = df['Stock Splits'].to_numpy != 0
|
1949
|
+
# if !split_f.any?
|
1950
|
+
# logger.debug('price-repair-split: No splits in data')
|
1951
|
+
# return df
|
1952
|
+
# end
|
1953
|
+
# most_recent_split_day = df.index[split_f].max
|
1954
|
+
# split = df.loc[most_recent_split_day, 'Stock Splits']
|
1955
|
+
# if most_recent_split_day == df.index[0]
|
1956
|
+
# logger.info("price-repair-split: Need 1+ day of price data after split to determine true price. Won't repair")
|
1957
|
+
# return df
|
1958
|
+
# end
|
1959
|
+
|
1960
|
+
# # logger.debug("price-repair-split: Most recent split = #{split:.4f} @ #{most_recent_split_day.date}")
|
1961
|
+
|
1962
|
+
# return _fix_prices_sudden_change(df, interval, tz_exchange, split, correct_volume: true)
|
1963
|
+
return df
|
1964
|
+
end
|
1965
|
+
|
1966
|
+
def _get_1y_prices( fullDaysOnly=false)
|
1967
|
+
if @prices_1y.nil?
|
1968
|
+
@prices_1y = history(period: "380d", auto_adjust: false, keepna: true) #, proxy: self.proxy)
|
1969
|
+
@md = get_history_metadata #(proxy=self.proxy)
|
1970
|
+
begin
|
1971
|
+
ctp = @md["currentTradingPeriod"]
|
1972
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} ctp = #{ctp.inspect}" }
|
1973
|
+
@today_open = Time.at(ctp["regular"]["start"]).in_time_zone(tz)
|
1974
|
+
@today_close = Time.at(ctp["regular"]["end"]).in_time_zone(tz)
|
1975
|
+
@today_midnight = @today_close.midnight
|
1976
|
+
rescue Exception => e
|
1977
|
+
@today_open = nil
|
1978
|
+
@today_close = nil
|
1979
|
+
@today_midnight = nil
|
1980
|
+
raise
|
1981
|
+
end
|
1982
|
+
end
|
1983
|
+
|
1984
|
+
return @prices_1y unless @prices_1y.nil? || @prices_1y.empty?
|
1985
|
+
|
1986
|
+
dnow = DateTime.now.utc.to_date
|
1987
|
+
d1 = dnow
|
1988
|
+
d0 = (d1 + datetime.timedelta(days=1)) - 1.year
|
1989
|
+
if fullDaysOnly && @_exchange_open_now
|
1990
|
+
# Exclude today
|
1991
|
+
d1 -= 1.day
|
1992
|
+
end
|
1993
|
+
return @prices_1y[str(d0)..str(d1)]
|
1994
|
+
end
|
1995
|
+
|
1996
|
+
def _get_1wk_1h_prepost_prices
|
1997
|
+
return @prices_1wk_1h_prepost ||= history(period: "1wk", interval: "1h", auto_adjust: false, prepost: true)
|
1998
|
+
end
|
1999
|
+
|
2000
|
+
def _get_1wk_1h_reg_prices
|
2001
|
+
return @prices_1wk_1h_reg ||= history(period: "1wk", interval: "1h", auto_adjust: false, prepost: false)
|
2002
|
+
end
|
2003
|
+
|
2004
|
+
def _get_exchange_metadata
|
2005
|
+
if @md.nil?
|
2006
|
+
|
2007
|
+
_get_1y_prices
|
2008
|
+
@md = get_history_metadata #(proxy=self.proxy)
|
2009
|
+
end
|
2010
|
+
return @md
|
2011
|
+
end
|
2012
|
+
|
2013
|
+
def _exchange_open_now
|
2014
|
+
t = DateTime.now
|
2015
|
+
_get_exchange_metadata
|
2016
|
+
|
2017
|
+
# if self._today_open is nil and self._today_close.nil?
|
2018
|
+
# r = false
|
2019
|
+
# else:
|
2020
|
+
# r = self._today_open <= t and t < self._today_close
|
2021
|
+
|
2022
|
+
# if self._today_midnight.nil?
|
2023
|
+
# r = false
|
2024
|
+
# elsif self._today_midnight.date > t.tz_convert(self.timezone).date:
|
2025
|
+
# r = false
|
2026
|
+
# else:
|
2027
|
+
# r = t < self._today_midnight
|
2028
|
+
|
2029
|
+
last_day_cutoff = @get_1y_prices[-1] + 1.days
|
2030
|
+
last_day_cutoff += 20.minutes
|
2031
|
+
r = t < last_day_cutoff
|
2032
|
+
|
2033
|
+
# print("_exchange_open_now returning", r)
|
2034
|
+
# return r
|
2035
|
+
end
|
2036
|
+
end
|
2037
|
+
end
|