yf_as_dataframe 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +13 -0
- data/CHANGELOG.rst +0 -0
- data/CODE_OF_CONDUCT.md +15 -0
- data/Gemfile +10 -0
- data/LICENSE.txt +21 -0
- data/README.md +299 -0
- data/Rakefile +8 -0
- data/chart.png +0 -0
- data/lib/yf_as_dataframe/analysis.rb +68 -0
- data/lib/yf_as_dataframe/financials.rb +304 -0
- data/lib/yf_as_dataframe/fundamentals.rb +53 -0
- data/lib/yf_as_dataframe/holders.rb +253 -0
- data/lib/yf_as_dataframe/multi.rb +238 -0
- data/lib/yf_as_dataframe/price_history.rb +2045 -0
- data/lib/yf_as_dataframe/price_technical.rb +579 -0
- data/lib/yf_as_dataframe/quote.rb +343 -0
- data/lib/yf_as_dataframe/ticker.rb +380 -0
- data/lib/yf_as_dataframe/tickers.rb +50 -0
- data/lib/yf_as_dataframe/utils.rb +354 -0
- data/lib/yf_as_dataframe/version.rb +3 -0
- data/lib/yf_as_dataframe/yf_connection.rb +304 -0
- data/lib/yf_as_dataframe/yfinance_exception.rb +15 -0
- data/lib/yf_as_dataframe.rb +24 -0
- metadata +139 -0
@@ -0,0 +1,2045 @@
|
|
1
|
+
require 'polars'
|
2
|
+
require 'polars-df'
|
3
|
+
|
4
|
+
class YfAsDataframe
|
5
|
+
module PriceHistory
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
include ActionView::Helpers::NumberHelper
|
8
|
+
|
9
|
+
PRICE_COLNAMES = ['Open', 'High', 'Low', 'Close', 'Adj Close']
|
10
|
+
BASE_URL = 'https://query2.finance.yahoo.com'
|
11
|
+
|
12
|
+
# attr_accessor :ticker
|
13
|
+
|
14
|
+
def self.included(base) # built-in Ruby hook for modules
|
15
|
+
base.class_eval do
|
16
|
+
original_method = instance_method(:initialize)
|
17
|
+
define_method(:initialize) do |*args, &block|
|
18
|
+
original_method.bind(self).call(*args, &block)
|
19
|
+
initialize_price_history # (your module code here)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def initialize_price_history #(ticker)
|
25
|
+
# ticker = ticker
|
26
|
+
|
27
|
+
@history = nil
|
28
|
+
@history_metadata = nil
|
29
|
+
@history_metadata_formatted = false
|
30
|
+
@reconstruct_start_interval = nil
|
31
|
+
|
32
|
+
yfconn_initialize
|
33
|
+
end
|
34
|
+
|
35
|
+
def history(period: "1mo", interval: "1d", start: nil, fin: nil, prepost: false,
|
36
|
+
actions: true, auto_adjust: true, back_adjust: false, repair: false, keepna: false,
|
37
|
+
rounding: false, raise_errors: false, returns: false)
|
38
|
+
logger = Rails.logger # Yfin.get_yf_logger
|
39
|
+
start_user = start
|
40
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here" }
|
41
|
+
end_user = fin || DateTime.now
|
42
|
+
|
43
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here" }
|
44
|
+
params = _preprocess_params(start, fin, interval, period, prepost, raise_errors)
|
45
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} params=#{params.inspect}" }
|
46
|
+
|
47
|
+
params_pretty = params.dup
|
48
|
+
|
49
|
+
["period1", "period2"].each do |k|
|
50
|
+
params_pretty[k] = DateTime.strptime(params[k].to_s, '%s').new_offset(0).to_time.strftime('%Y-%m-%d %H:%M:%S %z') if params_pretty.key?(k)
|
51
|
+
end
|
52
|
+
|
53
|
+
data = _get_data(ticker, params, fin, raise_errors)
|
54
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
55
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result].first.keys = #{data['chart']['result'].first.keys.inspect}" }
|
56
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result].first[events] = #{data['chart']['result'].first['events'].inspect}" }
|
57
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result].first[events][dividends] = #{data['chart']['result'].first['events']['dividends'].inspect}" }
|
58
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result].first[events][splits] = #{data['chart']['result'].first['events']['splits'].inspect}" }
|
59
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
60
|
+
|
61
|
+
@history_metadata = data["chart"]["result"][0]["meta"] rescue {}
|
62
|
+
@history = data["chart"]["result"][0]
|
63
|
+
|
64
|
+
intraday = params["interval"][-1] == "m" || params["interval"][-1] == "h"
|
65
|
+
|
66
|
+
err_msg = _get_err_msg(params['period1'], period, start, params['period2'], fin, params['interval'], params['intraday'])
|
67
|
+
# err_msg = _get_err_msg(start, period, start_user, fin, end_user, interval, intraday)
|
68
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} err_msg = #{err_msg}" }
|
69
|
+
|
70
|
+
f = _did_it_fail(data, period, @history_metadata)
|
71
|
+
failed = f[:fail]
|
72
|
+
err_msg = f[:msg]
|
73
|
+
|
74
|
+
if failed
|
75
|
+
if raise_errors
|
76
|
+
raise Exception.new("#{ticker}: #{err_msg}")
|
77
|
+
else
|
78
|
+
logger.error("#{ticker}: #{err_msg}")
|
79
|
+
end
|
80
|
+
if @reconstruct_start_interval && @reconstruct_start_interval == interval
|
81
|
+
@reconstruct_start_interval = nil
|
82
|
+
end
|
83
|
+
return YfAsDataframe::Utils.empty_df
|
84
|
+
end
|
85
|
+
|
86
|
+
# begin
|
87
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data[chart][result][0] = #{data["chart"]["result"][0].inspect}" }
|
88
|
+
quotes = _parse_quotes(data["chart"]["result"][0], interval)
|
89
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
90
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
91
|
+
|
92
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} quotes=#{quotes.inspect}" }
|
93
|
+
# if fin && !quotes.empty?
|
94
|
+
# endDt = fin.to_datetime.to_i # DateTime.strptime(fin.to_s, '%s').new_offset(0)
|
95
|
+
# if quotes.index[quotes.shape[0] - 1] >= endDt
|
96
|
+
# quotes = quotes[0..quotes.shape[0] - 2]
|
97
|
+
# end
|
98
|
+
# end
|
99
|
+
|
100
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here" }
|
101
|
+
# rescue Exception
|
102
|
+
# if raise_errors
|
103
|
+
# raise Exception.new("#{ticker}: #{err_msg}")
|
104
|
+
# else
|
105
|
+
# logger.error("#{ticker}: #{err_msg}")
|
106
|
+
# end
|
107
|
+
# if @reconstruct_start_interval && @reconstruct_start_interval == interval
|
108
|
+
# @reconstruct_start_interval = nil
|
109
|
+
# end
|
110
|
+
# return nil
|
111
|
+
# end
|
112
|
+
|
113
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here" }
|
114
|
+
quote_type = @history_metadata["instrumentType"]
|
115
|
+
expect_capital_gains = quote_type == 'MUTUALFUND' || quote_type == 'ETF'
|
116
|
+
tz_exchange = @history_metadata["exchangeTimezoneName"]
|
117
|
+
|
118
|
+
quotes = _set_df_tz(quotes, params["interval"], tz_exchange)
|
119
|
+
quotes = _fix_yahoo_dst_issue(quotes, params["interval"])
|
120
|
+
quotes = _fix_yahoo_returning_live_separate(quotes, params["interval"], tz_exchange)
|
121
|
+
|
122
|
+
intraday = params["interval"][-1] == "m" || params["interval"][-1] == "h"
|
123
|
+
|
124
|
+
if !prepost && intraday && @history_metadata.key?("tradingPeriods")
|
125
|
+
tps = @history_metadata["tradingPeriods"]
|
126
|
+
if !tps.is_a?(Polars::DataFrame)
|
127
|
+
@history_metadata = _format_history_metadata(@history_metadata, tradingPeriodsOnly: true)
|
128
|
+
tps = @history_metadata["tradingPeriods"]
|
129
|
+
end
|
130
|
+
quotes = _fix_yahoo_returning_prepost_unrequested(quotes, params["interval"], tps)
|
131
|
+
end
|
132
|
+
|
133
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} quotes = #{quotes.inspect}" }
|
134
|
+
df = _get_stock_data(quotes, params, fin)
|
135
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
136
|
+
|
137
|
+
if repair
|
138
|
+
# df = _fix_unit_mixups(df, interval, tz_exchange, prepost)
|
139
|
+
# df = _fix_bad_stock_split(df, interval, tz_exchange)
|
140
|
+
# df = _fix_zeroes(df, interval, tz_exchange, prepost)
|
141
|
+
# df = _fix_missing_div_adjust(df, interval, tz_exchange)
|
142
|
+
# df = df.sort_index
|
143
|
+
end
|
144
|
+
|
145
|
+
if auto_adjust
|
146
|
+
# df = _auto_adjust(df)
|
147
|
+
elsif back_adjust
|
148
|
+
# df = _back_adjust(df)
|
149
|
+
end
|
150
|
+
|
151
|
+
if rounding
|
152
|
+
# df = df.round(data["chart"]["result"][0]["meta"]["priceHint"])
|
153
|
+
end
|
154
|
+
|
155
|
+
df["Volume"] = df["Volume"].fill_nan(0) #.astype(Integer)
|
156
|
+
|
157
|
+
# df.index.name = intraday ? "Datetime" : "Date"
|
158
|
+
# [0..df['Timestamps'].length-2].each{|i| df['Timestamps'][i] = df['Timestamps'][i].round("1d") } unless intraday
|
159
|
+
unless intraday
|
160
|
+
s = Polars::Series.new(df['Timestamps']).to_a
|
161
|
+
df['Timestamps'] = (0..s.length-1).to_a.map{|i| Time.at(s[i]).to_date }
|
162
|
+
end
|
163
|
+
|
164
|
+
@history = df.dup
|
165
|
+
|
166
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} actions = #{actions}" }
|
167
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
168
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
169
|
+
df = df.drop(["Dividends", "Stock Splits", "Capital Gains"], errors: 'ignore') unless actions
|
170
|
+
|
171
|
+
if !keepna
|
172
|
+
# price_colnames = ['Open', 'High', 'Low', 'Close', 'Adj Close']
|
173
|
+
# data_colnames = price_colnames + ['Volume'] + ['Dividends', 'Stock Splits', 'Capital Gains']
|
174
|
+
# data_colnames = data_colnames.select { |c| df.columns.include?(c) }
|
175
|
+
# mask_nan_or_zero = (df[data_colnames].isnan? | (df[data_colnames] == 0)).all(axis: 1)
|
176
|
+
# df = df.drop(mask_nan_or_zero.index[mask_nan_or_zero])
|
177
|
+
end
|
178
|
+
|
179
|
+
# logger.debug("#{ticker}: yfinance returning OHLC: #{df.index[0]} -> #{df.index[-1]}")
|
180
|
+
|
181
|
+
@reconstruct_start_interval = nil if @reconstruct_start_interval && @reconstruct_start_interval == interval
|
182
|
+
|
183
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
184
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df.rows = #{df.rows}" }
|
185
|
+
if returns && df.shape.first > 1
|
186
|
+
df['Returns'] = [Float::NAN] + (1..df.length-1).to_a.map {|i| (df['Close'][i]-df['Close'][i-1])/df['Close'][i-1] }
|
187
|
+
end
|
188
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
189
|
+
|
190
|
+
return df
|
191
|
+
end
|
192
|
+
|
193
|
+
|
194
|
+
def history_metadata
|
195
|
+
history(period: "1wk", interval: "1h", prepost: true) if @history_metadata.nil?
|
196
|
+
|
197
|
+
if !@history_metadata_formatted
|
198
|
+
@history_metadata = _format_history_metadata(@history_metadata)
|
199
|
+
@history_metadata_formatted = true
|
200
|
+
end
|
201
|
+
return @history_metadata
|
202
|
+
end
|
203
|
+
|
204
|
+
def exchange
|
205
|
+
return @exchange ||= _get_exchange_metadata["exchangeName"]
|
206
|
+
end
|
207
|
+
|
208
|
+
def timezone
|
209
|
+
return @timezone ||= _get_exchange_metadata["exchangeTimezoneName"]
|
210
|
+
end
|
211
|
+
|
212
|
+
def dividends
|
213
|
+
history(period: "max") if @history.nil?
|
214
|
+
|
215
|
+
if !@history.nil? # && @history['events'].keys.include?("dividends")
|
216
|
+
df = @history.dup.drop('Open','High','Low','Close','Adj Close', 'Volume','Stock Splits','Capital Gains')
|
217
|
+
return df.filter(Polars.col('Dividends')>0.0)
|
218
|
+
# divi = []
|
219
|
+
# @history['events']["dividends"].each_pair {|k,v| divi << { Timestamps: Time.at(k.to_i).utc.to_date, Value: v['amount']} }
|
220
|
+
# return Polars::DataFrame.new( divi )
|
221
|
+
end
|
222
|
+
return Polars::Series.new
|
223
|
+
end
|
224
|
+
|
225
|
+
def capital_gains
|
226
|
+
history(period: "max") if @history.nil?
|
227
|
+
|
228
|
+
if !@history.nil? # && @history['events'].keys.include?("capital gains")
|
229
|
+
# caga = []
|
230
|
+
# @history['events']['capital gains'].each_pair {|k,v| caga << { Timestamps: Time.at(k).utc.to_date, Value: v['amount']} }
|
231
|
+
# capital_gains = @history["Capital Gains"]
|
232
|
+
# return capital_gains[capital_gains != 0]
|
233
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
234
|
+
df = @history.dup.drop('Open','High','Low','Close','Adj Close', 'Volume','Stock Splits', 'Dividends')
|
235
|
+
return df.filter(Polars.col('Capital Gains')>0.0)
|
236
|
+
end
|
237
|
+
return Polars::Series.new
|
238
|
+
end
|
239
|
+
|
240
|
+
def splits
|
241
|
+
history(period: "max") if @history.nil?
|
242
|
+
|
243
|
+
if !@history.nil? #&& @history['events'].keys.include?("stock splits") # @history.columns.include?("Stock Splits")
|
244
|
+
# stspl = []
|
245
|
+
# @history['events']['stock splits'].each_pair {|k,v| stspl << { Timestamps: Time.at(k.to_i).utc.to_date, Ratio: v['numerator'].to_f/v['denominator'].to_f } }
|
246
|
+
|
247
|
+
# splits = @history["Stock Splits"]
|
248
|
+
# return splits[splits != 0]
|
249
|
+
df = @history.dup.drop('Open','High','Low','Close','Adj Close', 'Volume','Capital Gains','Dividends')
|
250
|
+
return df.filter(Polars.col('Stock Splits')>0.0) #Polars::DataFrame.new(stspl)
|
251
|
+
end
|
252
|
+
return Polars::Series.new
|
253
|
+
end
|
254
|
+
|
255
|
+
def actions
|
256
|
+
history(period: "max") if @history.nil?
|
257
|
+
|
258
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history = #{@history.inspect}" }
|
259
|
+
if !@history.nil? #&& @history.columns.include?("Dividends") && @history.columns.include?("Stock Splits")
|
260
|
+
# action_columns = ["Dividends", "Stock Splits"]
|
261
|
+
|
262
|
+
# action_columns.append("Capital Gains") if @history.columns.include?("Capital Gains")
|
263
|
+
|
264
|
+
# actions = @history[action_columns]
|
265
|
+
# return actions[actions != 0].dropna(how: 'all').fillna(0)
|
266
|
+
df = @history.dup.drop('Open','High','Low','Close','Adj Close', 'Volume')
|
267
|
+
return df.filter((Polars.col('Stock Splits')>0.0) | (Polars.col('Dividends')>0.0) | (Polars.col('Capital Gains')>0.0)) #Polars::DataFrame.new(stspl)
|
268
|
+
end
|
269
|
+
return Polars::Series.new
|
270
|
+
end
|
271
|
+
|
272
|
+
def currency
|
273
|
+
if @currency.nil?
|
274
|
+
|
275
|
+
md = history_metadata #(proxy=self.proxy)
|
276
|
+
@currency = md["currency"]
|
277
|
+
end
|
278
|
+
return @currency
|
279
|
+
end
|
280
|
+
|
281
|
+
def quote_type
|
282
|
+
if @quote_type.nil?
|
283
|
+
|
284
|
+
md = history_metadata #(proxy=self.proxy)
|
285
|
+
@quote_type = md["instrumentType"]
|
286
|
+
end
|
287
|
+
return @quote_type
|
288
|
+
end
|
289
|
+
|
290
|
+
def last_price
|
291
|
+
return @last_price unless @last_price.nil?
|
292
|
+
|
293
|
+
prices = _get_1y_prices
|
294
|
+
|
295
|
+
if prices.empty?
|
296
|
+
@md ||= _get_exchange_metadata
|
297
|
+
@last_price = md["regularMarketPrice"] if "regularMarketPrice".in?(@md)
|
298
|
+
|
299
|
+
else
|
300
|
+
@last_price = (prices["Close"][-1]).to_f
|
301
|
+
if @last_price.nan?
|
302
|
+
@md ||= _get_exchange_metadata
|
303
|
+
@last_price = md["regularMarketPrice"] if "regularMarketPrice".in?(@md)
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
return @last_price
|
308
|
+
end
|
309
|
+
|
310
|
+
def previous_close
|
311
|
+
return @prev_close unless @prev_close.nil?
|
312
|
+
|
313
|
+
prices = _get_1wk_1h_prepost_prices
|
314
|
+
|
315
|
+
fail = prices.empty?
|
316
|
+
prices = fail ? prices : prices[["Close"]].groupby('Timestamps', maintain_order: true).agg([Polars.col("Close")]).to_f
|
317
|
+
|
318
|
+
# Very few symbols have previousClose despite no
|
319
|
+
# no trading data e.g. 'QCSTIX'.
|
320
|
+
fail = prices.shape.first < 2
|
321
|
+
@prev_close = fail ? nil : (prices["Close"][-2]).to_f
|
322
|
+
|
323
|
+
# if fail
|
324
|
+
# # Fallback to original info[] if available.
|
325
|
+
# info # trigger fetch
|
326
|
+
# k = "previousClose"
|
327
|
+
# @prev_close = _quote._retired_info[k] if !_quote._retired_info.nil? && k.in?(_quote._retired_info)
|
328
|
+
# end
|
329
|
+
return @prev_close
|
330
|
+
end
|
331
|
+
|
332
|
+
def regular_market_previous_close
|
333
|
+
return @reg_prev_close unless @reg_prev_close.nil?
|
334
|
+
|
335
|
+
prices = _get_1y_prices
|
336
|
+
if prices.shape[0] == 1
|
337
|
+
# Tiny % of tickers don't return daily history before last trading day,
|
338
|
+
# so backup option is hourly history:
|
339
|
+
prices = _get_1wk_1h_reg_prices
|
340
|
+
prices = prices[["Close"]].groupby(prices.index.date).last
|
341
|
+
end
|
342
|
+
|
343
|
+
# if prices.shape[0] < 2
|
344
|
+
# # Very few symbols have regularMarketPreviousClose despite no
|
345
|
+
# # no trading data. E.g. 'QCSTIX'.
|
346
|
+
# # So fallback to original info[] if available.
|
347
|
+
# info # trigger fetch
|
348
|
+
# k = "regularMarketPreviousClose"
|
349
|
+
# @reg_prev_close = _quote._retired_info[k] if !_quote._retired_info.nil? && k.in?(_quote._retired_info)
|
350
|
+
|
351
|
+
# else
|
352
|
+
# @reg_prev_close = float(prices["Close"].iloc[-2])
|
353
|
+
# end
|
354
|
+
|
355
|
+
return @reg_prev_close
|
356
|
+
end
|
357
|
+
|
358
|
+
def open
|
359
|
+
return @open unless @open.nil?
|
360
|
+
|
361
|
+
prices = _get_1y_prices
|
362
|
+
if prices.empty
|
363
|
+
@open = nil
|
364
|
+
|
365
|
+
else
|
366
|
+
@open = (prices["Open"][-1])
|
367
|
+
@open = nil if @open.nan?
|
368
|
+
end
|
369
|
+
|
370
|
+
return @open
|
371
|
+
end
|
372
|
+
|
373
|
+
def day_high
|
374
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @day_high = #{@day_high}" }
|
375
|
+
return @day_high unless @day_high.nil?
|
376
|
+
|
377
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @day_high = #{@day_high}" }
|
378
|
+
prices = _get_1y_prices
|
379
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} prices = #{prices.inspect}" }
|
380
|
+
# if prices.empty?
|
381
|
+
# @day_high = nil
|
382
|
+
|
383
|
+
# else
|
384
|
+
@day_high = (prices["High"][-1])
|
385
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @day_high = #{@day_high}" }
|
386
|
+
@day_high = nil if @day_high.nan?
|
387
|
+
# end
|
388
|
+
|
389
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @day_high = #{@day_high}" }
|
390
|
+
return @day_high
|
391
|
+
end
|
392
|
+
|
393
|
+
def day_low
|
394
|
+
return @day_low unless @day_low.nil?
|
395
|
+
|
396
|
+
prices = _get_1y_prices
|
397
|
+
if prices.empty?
|
398
|
+
@day_low = nil
|
399
|
+
|
400
|
+
else
|
401
|
+
@day_low = (prices["Low"][-1])
|
402
|
+
@day_low = nil if @day_low.nan?
|
403
|
+
end
|
404
|
+
|
405
|
+
return @day_low
|
406
|
+
end
|
407
|
+
|
408
|
+
def last_volume
|
409
|
+
return @last_volume unless @last_volume.nil?
|
410
|
+
|
411
|
+
prices = _get_1y_prices
|
412
|
+
@last_volume = prices.empty? ? nil : (prices["Volume"][-1])
|
413
|
+
return @last_volume
|
414
|
+
end
|
415
|
+
|
416
|
+
def fifty_day_average
|
417
|
+
return @_50d_day_average unless @_50d_day_average.nil?
|
418
|
+
|
419
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
420
|
+
if prices.empty?
|
421
|
+
@_50d_day_average = nil
|
422
|
+
|
423
|
+
else
|
424
|
+
n = prices.shape.first
|
425
|
+
a = n-50
|
426
|
+
b = n
|
427
|
+
a = 0 if a < 0
|
428
|
+
|
429
|
+
@_50d_day_average = (prices["Close"][a..b].mean)
|
430
|
+
end
|
431
|
+
|
432
|
+
return @_50d_day_average
|
433
|
+
end
|
434
|
+
|
435
|
+
def two_hundred_day_average
|
436
|
+
return @_200d_day_average unless @_200d_day_average.nil?
|
437
|
+
|
438
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
439
|
+
if prices.empty?
|
440
|
+
@_200d_day_average = nil
|
441
|
+
|
442
|
+
else
|
443
|
+
n = prices.shape[0]
|
444
|
+
a = n-200
|
445
|
+
b = n
|
446
|
+
a = 0 if a < 0
|
447
|
+
|
448
|
+
@_200d_day_average = (prices["Close"][a..b].mean)
|
449
|
+
end
|
450
|
+
|
451
|
+
return @_200d_day_average
|
452
|
+
end
|
453
|
+
|
454
|
+
def ten_day_average_volume
|
455
|
+
return @_10d_avg_vol unless @_10d_avg_vol.nil?
|
456
|
+
|
457
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
458
|
+
if prices.empty?
|
459
|
+
@_10d_avg_vol = nil
|
460
|
+
|
461
|
+
else
|
462
|
+
n = prices.shape[0]
|
463
|
+
a = n-10
|
464
|
+
b = n
|
465
|
+
a = 0 if a < 0
|
466
|
+
|
467
|
+
@_10d_avg_vol = (prices["Volume"][a..b].mean)
|
468
|
+
|
469
|
+
end
|
470
|
+
return @_10d_avg_vol
|
471
|
+
end
|
472
|
+
|
473
|
+
def three_month_average_volume
|
474
|
+
return @_3mo_avg_vol unless @_3mo_avg_vol.nil?
|
475
|
+
|
476
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
477
|
+
if prices.empty
|
478
|
+
@_3mo_avg_vol = nil
|
479
|
+
|
480
|
+
else
|
481
|
+
dt1 = prices.index[-1]
|
482
|
+
dt0 = dt1 - 3.months + 1.day
|
483
|
+
@_3mo_avg_vol = (prices[dt0..dt1]["Volume"].mean)
|
484
|
+
end
|
485
|
+
|
486
|
+
return @_3mo_avg_vol
|
487
|
+
end
|
488
|
+
|
489
|
+
def year_high
|
490
|
+
if @year_high.nil?
|
491
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
492
|
+
prices = _get_1y_prices(fullDaysOnly=false) if prices.empty?
|
493
|
+
|
494
|
+
@year_high = (prices["High"].max)
|
495
|
+
end
|
496
|
+
return @year_high
|
497
|
+
end
|
498
|
+
|
499
|
+
def year_low
|
500
|
+
if @year_low.nil?
|
501
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
502
|
+
prices = _get_1y_prices(fullDaysOnly=false) if prices.empty?
|
503
|
+
|
504
|
+
@year_low = (prices["Low"].min)
|
505
|
+
end
|
506
|
+
return @year_low
|
507
|
+
end
|
508
|
+
|
509
|
+
def year_change
|
510
|
+
if @year_change.nil?
|
511
|
+
prices = _get_1y_prices(fullDaysOnly=true)
|
512
|
+
@year_change = (prices["Close"][-1] - prices["Close"][0]) / prices["Close"][0] if prices.shape[0] >= 2
|
513
|
+
end
|
514
|
+
return @year_change
|
515
|
+
end
|
516
|
+
|
517
|
+
def market_cap
|
518
|
+
return @mcap unless @mcap.nil?
|
519
|
+
|
520
|
+
begin
|
521
|
+
# shares = self.shares
|
522
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} shares = #{shares}" }
|
523
|
+
sh = shares
|
524
|
+
lp = last_price
|
525
|
+
@mcap = shares * last_price
|
526
|
+
# @mcap = 'US$' + number_to_human((shares * last_price), precision: 4)
|
527
|
+
rescue Exception => e
|
528
|
+
if "Cannot retrieve share count".in?(e.message) || "failed to decrypt Yahoo".in?(e.message)
|
529
|
+
shares = nil
|
530
|
+
else
|
531
|
+
raise
|
532
|
+
end
|
533
|
+
|
534
|
+
# if shares.nil?
|
535
|
+
# # Very few symbols have marketCap despite no share count.
|
536
|
+
# # E.g. 'BTC-USD'
|
537
|
+
# # So fallback to original info[] if available.
|
538
|
+
# info
|
539
|
+
# k = "marketCap"
|
540
|
+
# @mcap = _quote._retired_info[k] if !_quote._retired_info.nil? && k.in?(_quote._retired_info)
|
541
|
+
|
542
|
+
# else
|
543
|
+
# @mcap = float(shares * self.last_price)
|
544
|
+
# end
|
545
|
+
|
546
|
+
return nil #@mcap
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
# price_history_methods = [:get_history_metadata, :get_dividends, :get_capital_gains, \
|
551
|
+
# :get_splits, :get_actions]
|
552
|
+
# price_history_methods.each { |meth| alias_method meth.to_s.gsub(/^get_/, '').to_sym, meth }
|
553
|
+
|
554
|
+
|
555
|
+
|
556
|
+
|
557
|
+
|
558
|
+
|
559
|
+
|
560
|
+
private
|
561
|
+
|
562
|
+
def _preprocess_params(start, fin, interval, period, prepost, raise_errors)
|
563
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} start = #{start.inspect}, end_date = #{fin.inspect}, interval = #{interval}, period = #{period}, tz = #{tz}, prepost = #{prepost}, raise_errors = #{raise_errors}" }
|
564
|
+
|
565
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here start = #{fin}, period = #{period}" }
|
566
|
+
if start || period.nil? || period.downcase == "max"
|
567
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here fin = #{fin}" }
|
568
|
+
if tz.nil?
|
569
|
+
err_msg = "No timezone found, symbol may be delisted"
|
570
|
+
# Yfin.shared_DFS[@ticker] = YfAsDataframe::Utils.empty_df
|
571
|
+
# Yfin.shared_ERRORS[@ticker] = err_msg
|
572
|
+
if raise_errors
|
573
|
+
raise Exception.new("#{@ticker}: #{err_msg}")
|
574
|
+
else
|
575
|
+
Rails.logger.error("#{@ticker}: #{err_msg}")
|
576
|
+
end
|
577
|
+
return YfAsDataframe::Utils.empty_df
|
578
|
+
end
|
579
|
+
|
580
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here fin = #{fin}" }
|
581
|
+
fin = fin.nil? ? Time.now.to_i : YfAsDataframe::Utils.parse_user_dt(fin, tz)
|
582
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} fin = #{fin.inspect}" }
|
583
|
+
|
584
|
+
if start.nil?
|
585
|
+
if interval == "1m"
|
586
|
+
start = (fin - 1.week).to_i
|
587
|
+
else
|
588
|
+
max_start_datetime = (DateTime.now - (99.years)).to_i
|
589
|
+
start = max_start_datetime.to_i
|
590
|
+
end
|
591
|
+
else
|
592
|
+
start = YfAsDataframe::Utils.parse_user_dt(start, tz)
|
593
|
+
end
|
594
|
+
|
595
|
+
params = { "period1" => start, "period2" => fin }
|
596
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} params = #{params.inspect}" }
|
597
|
+
|
598
|
+
else
|
599
|
+
period = period.downcase
|
600
|
+
# params = { "range" => period }
|
601
|
+
fin = DateTime.now.to_i
|
602
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} here fin= #{fin}, period = #{period}" }
|
603
|
+
start = (fin - YfAsDataframe::Utils.interval_to_timedelta(period)).to_i
|
604
|
+
params = { "period1" => start, "period2" => fin }
|
605
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} params = #{params.inspect}" }
|
606
|
+
end
|
607
|
+
|
608
|
+
params["interval"] = interval.downcase
|
609
|
+
params["includePrePost"] = prepost
|
610
|
+
params["interval"] = "15m" if params["interval"] == "30m"
|
611
|
+
params["events"] = "div,splits,capitalGains"
|
612
|
+
|
613
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} params = #{params.inspect}" }
|
614
|
+
return params
|
615
|
+
end
|
616
|
+
|
617
|
+
def _get_data(ticker, params, fin, raise_errors)
|
618
|
+
url = "https://query2.finance.yahoo.com/v8/finance/chart/#{CGI.escape ticker}"
|
619
|
+
# url = "https://query1.finance.yahoo.com/v7/finance/download/#{ticker}" ... Deprecated
|
620
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} url = #{url}" }
|
621
|
+
data = nil
|
622
|
+
# get_fn = @data.method(:get)
|
623
|
+
|
624
|
+
if fin
|
625
|
+
end_dt = DateTime.strptime(fin.to_s, '%s') #.new_offset(0)
|
626
|
+
dt_now = DateTime.now #.new_offset(0)
|
627
|
+
data_delay = Rational(30, 24 * 60)
|
628
|
+
|
629
|
+
# get_fn = @data.method(:cache_get) if end_dt + data_delay <= dt_now
|
630
|
+
end
|
631
|
+
|
632
|
+
begin
|
633
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} url = #{url}, params = #{params.inspect}" }
|
634
|
+
data = get(url, nil, params).parsed_response
|
635
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
636
|
+
|
637
|
+
raise RuntimeError.new(
|
638
|
+
"*** YAHOO! FINANCE IS CURRENTLY DOWN! ***\n" +
|
639
|
+
"Our engineers are working quickly to resolve the issue. Thank you for your patience."
|
640
|
+
) if data.text.include?("Will be right back") || data.nil?
|
641
|
+
|
642
|
+
data = HashWithIndifferentAccess.new(data)
|
643
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
644
|
+
rescue Exception
|
645
|
+
raise if raise_errors
|
646
|
+
end
|
647
|
+
|
648
|
+
data
|
649
|
+
end
|
650
|
+
|
651
|
+
def _get_err_msg(start, period, start_user, fin, end_user, interval, intraday)
|
652
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} start = #{start}, period = #{period}, start_user = #{start_user}, fin = #{fin}, end_user = #{end_user}, interval = #{interval}, intraday = #{intraday}"}
|
653
|
+
err_msg = "No price data found, symbol may be delisted"
|
654
|
+
|
655
|
+
if start.nil? || period.nil? || period.downcase == "max"
|
656
|
+
err_msg += " (#{interval} "
|
657
|
+
|
658
|
+
if start_user
|
659
|
+
err_msg += "#{start_user}"
|
660
|
+
elsif !intraday
|
661
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} start = #{start}" }
|
662
|
+
err_msg += "#{(Time.at(start).to_date).strftime('%Y-%m-%d')}"
|
663
|
+
else
|
664
|
+
err_msg += "#{Time.at(start).strftime('%Y-%m-%d %H:%M:%S %z')}"
|
665
|
+
end
|
666
|
+
|
667
|
+
err_msg += " -> "
|
668
|
+
|
669
|
+
if end_user
|
670
|
+
err_msg += "#{end_user})"
|
671
|
+
elsif !intraday
|
672
|
+
err_msg += "#{(Time.at(fin).to_date).strftime('%Y-%m-%d')})"
|
673
|
+
else
|
674
|
+
err_msg += "#{Time.at(fin).strftime('%Y-%m-%d %H:%M:%S %z')})"
|
675
|
+
end
|
676
|
+
else
|
677
|
+
err_msg += " (period=#{period})"
|
678
|
+
end
|
679
|
+
err_msg
|
680
|
+
end
|
681
|
+
|
682
|
+
def _did_it_fail(data, period, hist_metadata)
|
683
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}, period = #{period}, hist_metadata = #{hist_metadata.inspect}" }
|
684
|
+
failed = false
|
685
|
+
|
686
|
+
if data.nil? || !data.is_a?(Hash)
|
687
|
+
failed = true
|
688
|
+
elsif data.is_a?(Hash) && data.key?("status_code")
|
689
|
+
err_msg += "(yahoo status_code = #{data['status_code']})"
|
690
|
+
failed = true
|
691
|
+
elsif data["chart"].nil? || data["chart"]["error"]
|
692
|
+
err_msg = data["chart"]["error"]["description"]
|
693
|
+
failed = true
|
694
|
+
elsif data["chart"].nil? || data["chart"]["result"].nil? || !data["chart"]["result"]
|
695
|
+
failed = true
|
696
|
+
elsif period && !data["chart"]["result"][0].key?("timestamp") && !hist_metadata["validRanges"].include?(period)
|
697
|
+
err_msg = "Period '#{period}' is invalid, must be one of #{hist_metadata['validRanges']}"
|
698
|
+
failed = true
|
699
|
+
end
|
700
|
+
|
701
|
+
{fail: failed, msg: err_msg}
|
702
|
+
end
|
703
|
+
|
704
|
+
def _get_stock_data(quotes, params, fin = nil)
|
705
|
+
df = quotes #.sort_index
|
706
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
707
|
+
ts = Polars::Series.new(df['Timestamps']).to_a
|
708
|
+
|
709
|
+
if quotes.shape.first > 0
|
710
|
+
# startDt = quotes.index[0].floor('D')
|
711
|
+
startDt = quotes['Timestamps'].to_a.map(&:to_date).min
|
712
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} startDt = #{startDt.inspect}" }
|
713
|
+
endDt = fin.present? ? fin.to_date : Time.at(DateTime.now.tomorrow).to_i
|
714
|
+
|
715
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} @history[events][dividends] = #{@history['events']["dividends"].inspect}" }
|
716
|
+
# divi = {}
|
717
|
+
# @history['events']["dividends"].select{|k,v|
|
718
|
+
# Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
719
|
+
# divi['date'] = v['amount']} unless @history.try(:[],'events').try(:[],"dividends").nil?
|
720
|
+
d = [0.0] * df.length
|
721
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df.length = #{df.length}" }
|
722
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} ts = #{ts.inspect}" }
|
723
|
+
@history['events']["dividends"].select{|k,v|
|
724
|
+
Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
725
|
+
d[ts.index(Time.at(k.to_i).utc)] = v['amount'].to_f} unless @history.try(:[],'events').try(:[],"dividends").nil?
|
726
|
+
df['Dividends'] = Polars::Series.new(d)
|
727
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
728
|
+
|
729
|
+
# caga = {}
|
730
|
+
# @history['events']["capital gains"].select{|k,v|
|
731
|
+
# Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
732
|
+
# caga['date'] = v['amount']} unless @history.try(:[],'events').try(:[],"capital gains").nil?
|
733
|
+
# capital_gains = capital_gains.loc[startDt:] if capital_gains.shape.first > 0
|
734
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} caga = #{caga.inspect}" }
|
735
|
+
d = [0.0] * df.length
|
736
|
+
@history['events']["capital gains"].select{|k,v|
|
737
|
+
Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
738
|
+
d[ts.index(Time.at(k.to_i).utc)] = v['amount'].to_f} unless @history.try(:[],'events').try(:[],"capital gains").nil?
|
739
|
+
df['Capital Gains'] = Polars::Series.new(d)
|
740
|
+
|
741
|
+
# splits = splits.loc[startDt:] if splits.shape[0] > 0
|
742
|
+
# stspl = {}
|
743
|
+
# @history['events']['stock splits'].select{|k,v|
|
744
|
+
# Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
745
|
+
# stspl['date'] = v['numerator'].to_f/v['denominator'].to_f} unless @history.try(:[],'events').try(:[],"stock splits").nil?
|
746
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} stspl = #{stspl.inspect}" }
|
747
|
+
d = [0.0] * df.length
|
748
|
+
@history['events']["capital gains"].select{|k,v|
|
749
|
+
Time.at(k.to_i).utc.to_date >= startDt && Time.at(k.to_i).utc.to_date <= endDt }.each{|k,v|
|
750
|
+
d[ts.index(Time.at(k.to_i).utc)] = v['numerator'].to_f/v['denominator'].to_f} unless @history.try(:[],'events').try(:[],"capital gains").nil?
|
751
|
+
df['Stock Splits'] = Polars::Series.new(d)
|
752
|
+
end
|
753
|
+
|
754
|
+
# intraday = params["interval"][-1] == "m" || params["interval"][-1] == "h"
|
755
|
+
|
756
|
+
# if !intraday
|
757
|
+
# quotes.index = quotes.index.map { |i| DateTime.strptime(i.to_s, '%s').new_offset(tz).to_time }
|
758
|
+
|
759
|
+
# dividends.index = \
|
760
|
+
# dividends.index.map { |i| DateTime.strptime(i.to_s, '%s').new_offset(tz).to_time } if dividends.shape[0] > 0
|
761
|
+
|
762
|
+
# splits.index = \
|
763
|
+
# splits.index.map { |i| DateTime.strptime(i.to_s, '%s').new_offset(tz).to_time } if splits.shape[0] > 0
|
764
|
+
|
765
|
+
# end
|
766
|
+
|
767
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} quotes = #{quotes.inspect}" }
|
768
|
+
# df = quotes
|
769
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
770
|
+
|
771
|
+
# df = _safe_merge_dfs(df, dividends, interval) if dividends.shape[0] > 0
|
772
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
773
|
+
|
774
|
+
|
775
|
+
# if df.columns.include?("Dividends")
|
776
|
+
# df.loc[df["Dividends"].isna?, "Dividends"] = 0
|
777
|
+
# else
|
778
|
+
# df["Dividends"] = 0.0
|
779
|
+
# end
|
780
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
781
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.filter(Polars.col("Dividends") > 0.0)}" }
|
782
|
+
|
783
|
+
# df = _safe_merge_dfs(df, splits, interval) if splits.shape[0] > 0
|
784
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
785
|
+
|
786
|
+
|
787
|
+
# if df.columns.include?("Stock Splits")
|
788
|
+
# df.loc[df["Stock Splits"].isna?, "Stock Splits"] = 0
|
789
|
+
# else
|
790
|
+
# df["Stock Splits"] = 0.0
|
791
|
+
# end
|
792
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
793
|
+
|
794
|
+
# if expect_capital_gains
|
795
|
+
|
796
|
+
# df = _safe_merge_dfs(df, capital_gains, interval) if capital_gains.shape[0] > 0
|
797
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
798
|
+
|
799
|
+
# if df.columns.include?("Capital Gains")
|
800
|
+
# df.loc[df["Capital Gains"].isna?, "Capital Gains"] = 0
|
801
|
+
# else
|
802
|
+
# df["Capital Gains"] = 0.0
|
803
|
+
# end
|
804
|
+
# end
|
805
|
+
|
806
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
807
|
+
# df = df[~df.index.duplicated(keep: 'first')]
|
808
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}" }
|
809
|
+
df
|
810
|
+
end
|
811
|
+
|
812
|
+
def _auto_adjust(data)
|
813
|
+
col_order = data.columns
|
814
|
+
df = data.dup
|
815
|
+
ratio = (df["Adj Close"] / df["Close"]).to_a
|
816
|
+
df["Adj Open"] = df["Open"] * ratio
|
817
|
+
df["Adj High"] = df["High"] * ratio
|
818
|
+
df["Adj Low"] = df["Low"] * ratio
|
819
|
+
|
820
|
+
df.drop(
|
821
|
+
["Open", "High", "Low", "Close"],
|
822
|
+
axis: 1, inplace: true)
|
823
|
+
|
824
|
+
df.rename(columns: {
|
825
|
+
"Adj Open": "Open", "Adj High": "High",
|
826
|
+
"Adj Low": "Low", "Adj Close": "Close"
|
827
|
+
}, inplace: true)
|
828
|
+
|
829
|
+
return df
|
830
|
+
end
|
831
|
+
|
832
|
+
def _back_adjust(data)
|
833
|
+
col_order = data.columns
|
834
|
+
df = data.dup
|
835
|
+
ratio = df["Adj Close"] / df["Close"]
|
836
|
+
df["Adj Open"] = df["Open"] * ratio
|
837
|
+
df["Adj High"] = df["High"] * ratio
|
838
|
+
df["Adj Low"] = df["Low"] * ratio
|
839
|
+
|
840
|
+
df.drop(
|
841
|
+
["Open", "High", "Low", "Adj Close"],
|
842
|
+
axis: 1, inplace: true)
|
843
|
+
|
844
|
+
df.rename(columns: {
|
845
|
+
"Adj Open": "Open", "Adj High": "High",
|
846
|
+
"Adj Low": "Low"
|
847
|
+
}, inplace: true)
|
848
|
+
|
849
|
+
return df
|
850
|
+
end
|
851
|
+
|
852
|
+
def _set_df_tz(df, interval, tz)
|
853
|
+
|
854
|
+
# df.index = df.index.tz_localize("UTC") if df.index.tz.nil?
|
855
|
+
|
856
|
+
# df.index = df.index.tz_convert(tz)
|
857
|
+
return df
|
858
|
+
end
|
859
|
+
|
860
|
+
def _fix_yahoo_dst_issue(df, interval)
|
861
|
+
# if interval.in?(["1d", "1w", "1wk"])
|
862
|
+
# f_pre_midnight = (df.index.minute == 0) & (df.index.hour.in?([22, 23]))
|
863
|
+
# dst_error_hours = [0] * df.shape[0]
|
864
|
+
# dst_error_hours[f_pre_midnight] = 24 - df.index[f_pre_midnight].hour
|
865
|
+
# df.index += dst_error_hours.map { |h| ActiveSupport::Duration.new(hours: h) }
|
866
|
+
# end
|
867
|
+
return df
|
868
|
+
end
|
869
|
+
|
870
|
+
def _fix_yahoo_returning_live_separate(quotes, interval, tz_exchange)
|
871
|
+
n = quotes.shape[0]
|
872
|
+
# if n > 1
|
873
|
+
# dt1 = quotes['Timestamps'][n - 1]
|
874
|
+
# dt2 = quotes['Timestamps'][n - 2]
|
875
|
+
# if quotes['Timestamps'].tz.nil?
|
876
|
+
# dt1 = dt1.tz_localize("UTC")
|
877
|
+
# dt2 = dt2.tz_localize("UTC")
|
878
|
+
# end
|
879
|
+
# dt1 = dt1.tz_convert(tz_exchange)
|
880
|
+
# dt2 = dt2.tz_convert(tz_exchange)
|
881
|
+
|
882
|
+
# if interval == "1d"
|
883
|
+
# quotes = quotes.drop(quotes.index[n - 2]) if dt1.to_date == dt2.to_date
|
884
|
+
|
885
|
+
# else
|
886
|
+
# if interval == "1wk"
|
887
|
+
# last_rows_same_interval = dt1.year == dt2.year && dt1.cweek == dt2.cweek
|
888
|
+
# elsif interval == "1mo"
|
889
|
+
# last_rows_same_interval = dt1.month == dt2.month
|
890
|
+
# elsif interval == "3mo"
|
891
|
+
# last_rows_same_interval = dt1.year == dt2.year && dt1.quarter == dt2.quarter
|
892
|
+
# else
|
893
|
+
# last_rows_same_interval = (dt1 - dt2) < ActiveSupport::Duration.parse(interval)
|
894
|
+
# end
|
895
|
+
|
896
|
+
# if last_rows_same_interval
|
897
|
+
# idx1 = quotes.index[n - 1]
|
898
|
+
# idx2 = quotes.index[n - 2]
|
899
|
+
|
900
|
+
# return quotes if idx1 == idx2
|
901
|
+
|
902
|
+
# quotes.loc[idx2, "Open"] = quotes["Open"].iloc[n - 1] if quotes.loc[idx2, "Open"].nan?
|
903
|
+
|
904
|
+
# if !quotes["High"].iloc[n - 1].nan?
|
905
|
+
# quotes.loc[idx2, "High"] = [quotes["High"].iloc[n - 1], quotes["High"].iloc[n - 2]].max
|
906
|
+
# if quotes.columns.include?("Adj High")
|
907
|
+
# quotes.loc[idx2, "Adj High"] = [quotes["Adj High"].iloc[n - 1], quotes["Adj High"].iloc[n - 2]].max
|
908
|
+
# end
|
909
|
+
# end
|
910
|
+
# if !quotes["Low"].iloc[n - 1].nan?
|
911
|
+
# quotes.loc[idx2, "Low"] = [quotes["Low"].iloc[n - 1], quotes["Low"].iloc[n - 2]].min
|
912
|
+
# if quotes.columns.include?("Adj Low")
|
913
|
+
# quotes.loc[idx2, "Adj Low"] = [quotes["Adj Low"].iloc[n - 1], quotes["Adj Low"].iloc[n - 2]].min
|
914
|
+
# end
|
915
|
+
# end
|
916
|
+
# quotes.loc[idx2, "Close"] = quotes["Close"].iloc[n - 1]
|
917
|
+
# if quotes.columns.include?("Adj Close")
|
918
|
+
# quotes.loc[idx2, "Adj Close"] = quotes["Adj Close"].iloc[n - 1]
|
919
|
+
# end
|
920
|
+
# quotes.loc[idx2, "Volume"] += quotes["Volume"].iloc[n - 1]
|
921
|
+
# quotes = quotes.drop(quotes.index[n - 1])
|
922
|
+
# end
|
923
|
+
# end
|
924
|
+
# end
|
925
|
+
return quotes
|
926
|
+
end
|
927
|
+
|
928
|
+
def _fix_yahoo_returning_prepost_unrequested(quotes, interval, tradingPeriods)
|
929
|
+
tps_df = tradingPeriods.dup
|
930
|
+
tps_df["_date"] = tps_df.index.map(&:to_date)
|
931
|
+
quotes["_date"] = quotes.index.map(&:to_date)
|
932
|
+
idx = quotes.index.dup
|
933
|
+
quotes = quotes.merge(tps_df, how: "left")
|
934
|
+
quotes.index = idx
|
935
|
+
f_drop = quotes.index >= quotes["end"]
|
936
|
+
f_drop = f_drop | (quotes.index < quotes["start"])
|
937
|
+
if f_drop.any?
|
938
|
+
quotes = quotes[~f_drop]
|
939
|
+
end
|
940
|
+
quotes = quotes.drop(["_date", "start", "end"], axis: 1)
|
941
|
+
return quotes
|
942
|
+
end
|
943
|
+
|
944
|
+
def _format_history_metadata(md, tradingPeriodsOnly = true)
|
945
|
+
return md unless md.is_a?(Hash)
|
946
|
+
return md if md.length.zero?
|
947
|
+
|
948
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} md = #{md.inspect}"}
|
949
|
+
tz = md["exchangeTimezoneName"]
|
950
|
+
|
951
|
+
if !tradingPeriodsOnly
|
952
|
+
["firstTradeDate", "regularMarketTime"].each do |k|
|
953
|
+
if md.key?(k) && !md[k].nil?
|
954
|
+
if md[k].is_a?(Integer)
|
955
|
+
md[k] = Time.at(md[k]).in_time_zone(tz)
|
956
|
+
end
|
957
|
+
end
|
958
|
+
end
|
959
|
+
|
960
|
+
if md.key?("currentTradingPeriod")
|
961
|
+
["regular", "pre", "post"].each do |m|
|
962
|
+
if md["currentTradingPeriod"].key?(m) && md["currentTradingPeriod"][m]["start"].is_a?(Integer)
|
963
|
+
["start", "end"].each do |t|
|
964
|
+
md["currentTradingPeriod"][m][t] = Time.at(md["currentTradingPeriod"][m][t]).utc.in_time_zone(tz)
|
965
|
+
end
|
966
|
+
md["currentTradingPeriod"][m].delete("gmtoffset")
|
967
|
+
md["currentTradingPeriod"][m].delete("timezone")
|
968
|
+
end
|
969
|
+
end
|
970
|
+
end
|
971
|
+
end
|
972
|
+
|
973
|
+
if md.key?("tradingPeriods")
|
974
|
+
tps = md["tradingPeriods"]
|
975
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} tps = #{tps.inspect}"}
|
976
|
+
if tps == {"pre" => [], "post" => []}
|
977
|
+
# Ignore
|
978
|
+
elsif tps.is_a?(Array) || tps.is_a?(Hash)
|
979
|
+
if tps.is_a?(Array)
|
980
|
+
df = Polars::DataFrame.from_records(_np.hstack(tps))
|
981
|
+
df = df.drop(["timezone", "gmtoffset"], axis: 1)
|
982
|
+
df["start"] = Time.at(df["start"]).in_time_zone(tz)
|
983
|
+
df["end"] = Time.at(df['end']).in_time_zone(tz)
|
984
|
+
else #if tps.is_a?(Hash)
|
985
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} tps[pre] = #{tps['pre'].flatten.inspect}"}
|
986
|
+
pre_df = {} ; tps['pre'].flatten.each{|yy| yy.keys.each{|yyk| pre_df[yyk] ||= []; pre_df[yyk] << yy[yyk] }}; pre_df = Polars::DataFrame.new(pre_df) # Polars::DataFrame.from_records(_np.hstack(tps["pre"]))
|
987
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} pre_df = #{pre_df.inspect}"}
|
988
|
+
post_df = {}; tps['post'].flatten.each{|yy| yy.keys.each{|yyk| post_df[yyk] ||= []; post_df[yyk] << yy[yyk] }}; post_df = Polars::DataFrame.new(post_df) # Polars::DataFrame.from_records(_np.hstack(tps["post"]))
|
989
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} post_df = #{post_df.inspect}"}
|
990
|
+
regular_df = {}; tps['regular'].flatten.each{|yy| yy.keys.each{|yyk| regular_df[yyk] ||= []; regular_df[yyk] << yy[yyk] }}; regular_df = Polars::DataFrame.new(regular_df) # Polars::DataFrame.from_records(_np.hstack(tps["regular"]))
|
991
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} regular_df = #{regular_df.inspect}"}
|
992
|
+
|
993
|
+
pre_df = pre_df.rename({"start" => "pre_start", "end" => "pre_end"}).drop(["timezone", "gmtoffset"]) #, axis: 1)
|
994
|
+
post_df = post_df.rename({"start" => "post_start", "end" => "post_end"}).drop(["timezone", "gmtoffset"]) #, axis: 1)
|
995
|
+
regular_df = regular_df.drop(["timezone", "gmtoffset"]) #, axis: 1)
|
996
|
+
|
997
|
+
cols = ["pre_start", "pre_end", "end", "post_end"]
|
998
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} pre_df = #{pre_df.inspect}"}
|
999
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} post_df = #{post_df.inspect}"}
|
1000
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} regular_df = #{regular_df.inspect}"}
|
1001
|
+
df = pre_df.join(regular_df, left_on: 'pre_end', right_on: 'start')
|
1002
|
+
df = df.join(post_df, left_on: 'end', right_on: 'post_start')
|
1003
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df = #{df.inspect}"}
|
1004
|
+
cols.each do |c|
|
1005
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} c = #{c}"}
|
1006
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} df[c].map{|t| Time.at(t).in_time_zone(tz) } = #{df[c].map{|t| Time.at(t).in_time_zone(tz) }.inspect}" }
|
1007
|
+
s = Polars::Series.new(df[c].map{|t| Time.at(t).in_time_zone(tz) }, dtype: :i64)
|
1008
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} s = #{s.inspect}" }
|
1009
|
+
df.replace(c, s)
|
1010
|
+
end
|
1011
|
+
|
1012
|
+
df = Polars::DataFrame.new({'pre_start' => df['pre_start'], 'pre_end' => df['pre_end'], 'start' => df['pre_end'], 'end' => df['end'], 'post_start' => df['end'], 'post_end' => df['post_end']})
|
1013
|
+
# df = df[cols]
|
1014
|
+
end
|
1015
|
+
|
1016
|
+
# df.index = _pd.to_datetime(df["start"].dt.date)
|
1017
|
+
# df.index = df.index.tz_localize(tz)
|
1018
|
+
# df.index.name = "Date"
|
1019
|
+
|
1020
|
+
md["tradingPeriods"] = df
|
1021
|
+
end
|
1022
|
+
end
|
1023
|
+
|
1024
|
+
return md
|
1025
|
+
end
|
1026
|
+
|
1027
|
+
def _safe_merge_dfs(df_main, df_sub, interval)
|
1028
|
+
if df_sub.empty?
|
1029
|
+
raise Exception.new("No data to merge")
|
1030
|
+
end
|
1031
|
+
if df_main.empty?
|
1032
|
+
return df_main
|
1033
|
+
end
|
1034
|
+
|
1035
|
+
df = df_main
|
1036
|
+
return df
|
1037
|
+
end
|
1038
|
+
|
1039
|
+
|
1040
|
+
def _parse_quotes(data, interval)
|
1041
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
1042
|
+
timestamps = data["timestamp"]
|
1043
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} timestamps = #{timestamps.inspect}" }
|
1044
|
+
ohlc = data["indicators"]["quote"][0]
|
1045
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} ohlc = #{ohlc.inspect}" }
|
1046
|
+
volumes = ohlc["volume"]
|
1047
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} volumes = #{volumes.inspect}" }
|
1048
|
+
opens = ohlc["open"]
|
1049
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} opens = #{opens.inspect}" }
|
1050
|
+
closes = ohlc["close"]
|
1051
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} closes = #{closes.inspect}" }
|
1052
|
+
lows = ohlc["low"]
|
1053
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} lows = #{lows.inspect}" }
|
1054
|
+
highs = ohlc["high"]
|
1055
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} highs = #{highs.inspect}" }
|
1056
|
+
|
1057
|
+
adjclose = closes
|
1058
|
+
if data["indicators"].key?("adjclose")
|
1059
|
+
adjclose = data["indicators"]["adjclose"][0]["adjclose"]
|
1060
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} adjclose = #{adjclose.inspect}" }
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
quotes = Polars::DataFrame.new(
|
1064
|
+
{
|
1065
|
+
'Timestamps': timestamps.map{|t| Time.at(t) },
|
1066
|
+
"Open": opens,
|
1067
|
+
"High": highs,
|
1068
|
+
"Low": lows,
|
1069
|
+
"Close": closes,
|
1070
|
+
"Adj Close": adjclose,
|
1071
|
+
"Volume": volumes
|
1072
|
+
}
|
1073
|
+
)
|
1074
|
+
|
1075
|
+
# quotes.index = _pd.to_datetime(timestamps, unit: "s")
|
1076
|
+
# quotes.sort_index!(inplace: true)
|
1077
|
+
|
1078
|
+
if interval.downcase == "30m"
|
1079
|
+
logger.debug("#{ticker}: resampling 30m OHLC from 15m")
|
1080
|
+
quotes2 = quotes.resample('30T')
|
1081
|
+
quotes = Polars::DataFrame.new(index: quotes2.last.index, data: {
|
1082
|
+
'Open' => quotes2['Open'].first,
|
1083
|
+
'High' => quotes2['High'].max,
|
1084
|
+
'Low' => quotes2['Low'].min,
|
1085
|
+
'Close' => quotes2['Close'].last,
|
1086
|
+
'Adj Close' => quotes2['Adj Close'].last,
|
1087
|
+
'Volume' => quotes2['Volume'].sum
|
1088
|
+
})
|
1089
|
+
begin
|
1090
|
+
quotes['Dividends'] = quotes2['Dividends'].max
|
1091
|
+
quotes['Stock Splits'] = quotes2['Stock Splits'].max
|
1092
|
+
rescue Exception
|
1093
|
+
end
|
1094
|
+
end
|
1095
|
+
|
1096
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} quotes = #{quotes.inspect}" }
|
1097
|
+
return quotes
|
1098
|
+
end
|
1099
|
+
|
1100
|
+
def _fix_unit_mixups(df, interval, tz_exchange, prepost)
|
1101
|
+
# TODO: Implement _fix_unit_mixups
|
1102
|
+
return df
|
1103
|
+
end
|
1104
|
+
|
1105
|
+
# def _fix_bad_stock_split(df, interval, tz_exchange)
|
1106
|
+
# # TODO: Implement _fix_bad_stock_split
|
1107
|
+
# return df
|
1108
|
+
# end
|
1109
|
+
|
1110
|
+
# def _fix_zeroes(df, interval, tz_exchange, prepost)
|
1111
|
+
# # TODO: Implement _fix_zeroes
|
1112
|
+
# return df
|
1113
|
+
# end
|
1114
|
+
|
1115
|
+
# def _fix_missing_div_adjust(df, interval, tz_exchange)
|
1116
|
+
# # TODO: Implement _fix_missing_div_adjust
|
1117
|
+
# return df
|
1118
|
+
# end
|
1119
|
+
|
1120
|
+
def _reconstruct_intervals_batch(df, interval, prepost, tag=-1)
|
1121
|
+
# # Reconstruct values in df using finer-grained price data. Delimiter marks what to reconstruct
|
1122
|
+
# logger = Rails.logger # YfAsDataframe::Utils.get_yf_logger
|
1123
|
+
|
1124
|
+
# # raise Exception.new("'df' must be a Polars DataFrame not", type(df)) unless df.is_a?(Polars::DataFrame)
|
1125
|
+
# return df if interval == "1m"
|
1126
|
+
|
1127
|
+
# if interval[1:].in?(['d', 'wk', 'mo'])
|
1128
|
+
# # Interday data always includes pre & post
|
1129
|
+
# prepost = true
|
1130
|
+
# intraday = false
|
1131
|
+
# else
|
1132
|
+
# intraday = true
|
1133
|
+
# end
|
1134
|
+
|
1135
|
+
# price_cols = df.columns.select { |c| PRICE_COLNAMES.include?(c) }
|
1136
|
+
# data_cols = price_cols + ["Volume"]
|
1137
|
+
|
1138
|
+
# # If interval is weekly then can construct with daily. But if smaller intervals then
|
1139
|
+
# # restricted to recent times:
|
1140
|
+
# intervals = ["1wk", "1d", "1h", "30m", "15m", "5m", "2m", "1m"]
|
1141
|
+
# itds = intervals.map { |i| [i, YfAsDataframe::Utils.interval_to_timedelta(interval)] }.to_h
|
1142
|
+
# nexts = intervals.each_cons(2).to_h
|
1143
|
+
# min_lookbacks = {"1wk" => nil, "1d" => nil, "1h" => 730.days }
|
1144
|
+
# ["30m", "15m", "5m", "2m"].each { |i| min_lookbacks[i] = 60.days }
|
1145
|
+
# min_lookbacks["1m"] = 30.days
|
1146
|
+
|
1147
|
+
# if interval.in?(nexts)
|
1148
|
+
# sub_interval = nexts[interval]
|
1149
|
+
# td_range = itds[interval]
|
1150
|
+
# else
|
1151
|
+
# logger.warning("Have not implemented price repair for '#{interval}' interval. Contact developers") unless df.columns.include?("Repaired?")
|
1152
|
+
# return df
|
1153
|
+
# end
|
1154
|
+
|
1155
|
+
# # Limit max reconstruction depth to 2:
|
1156
|
+
# @reconstruct_start_interval = interval if @reconstruct_start_interval.nil?
|
1157
|
+
# if interval != @reconstruct_start_interval && interval != nexts[@reconstruct_start_interval]
|
1158
|
+
# logger.debug("#{ticker}: Price repair has hit max depth of 2 ('%s'->'%s'->'%s')", @reconstruct_start_interval, nexts[@reconstruct_start_interval], interval)
|
1159
|
+
# return df
|
1160
|
+
# end
|
1161
|
+
|
1162
|
+
# df = df.sort_index
|
1163
|
+
|
1164
|
+
# f_repair = df[data_cols].to_numpy == tag
|
1165
|
+
# f_repair_rows = f_repair.any(axis=1)
|
1166
|
+
|
1167
|
+
# # Ignore old intervals for which yahoo won't return finer data:
|
1168
|
+
# m = min_lookbacks[sub_interval]
|
1169
|
+
|
1170
|
+
# if m.nil?
|
1171
|
+
# min_dt = nil
|
1172
|
+
# else
|
1173
|
+
# m -= _datetime.timedelta(days=1) # allow space for 1-day padding
|
1174
|
+
# min_dt = DateTime.now.utc - m
|
1175
|
+
# min_dt = min_dt.tz_convert(df.index.tz).ceil("D")
|
1176
|
+
# end
|
1177
|
+
|
1178
|
+
# logger.debug("min_dt=#{min_dt} interval=#{interval} sub_interval=#{sub_interval}")
|
1179
|
+
|
1180
|
+
# if min_dt.nil?
|
1181
|
+
# f_recent = df.index >= min_dt
|
1182
|
+
# f_repair_rows = f_repair_rows & f_recent
|
1183
|
+
# unless f_repair_rows.any?
|
1184
|
+
# logger.info("Data too old to repair") unless df.columns.include?("Repaired?")
|
1185
|
+
# return df
|
1186
|
+
# end
|
1187
|
+
# end
|
1188
|
+
|
1189
|
+
# dts_to_repair = df.index[f_repair_rows]
|
1190
|
+
|
1191
|
+
# if dts_to_repair.length == 0
|
1192
|
+
# logger.info("Nothing needs repairing (dts_to_repair[] empty)") unless df.columns.include?("Repaired?")
|
1193
|
+
# return df
|
1194
|
+
# end
|
1195
|
+
|
1196
|
+
# df_v2 = df.copy
|
1197
|
+
# df_v2["Repaired?"] = false unless df_v2.columns.include?("Repaired?")
|
1198
|
+
# f_good = ~(df[price_cols].isna.any(axis=1))
|
1199
|
+
# f_good = f_good && (df[price_cols].to_numpy != tag).all(axis=1)
|
1200
|
+
# df_good = df[f_good]
|
1201
|
+
|
1202
|
+
# # Group nearby NaN-intervals together to reduce number of yahoo fetches
|
1203
|
+
# dts_groups = [[dts_to_repair[0]]]
|
1204
|
+
# # Note on setting max size: have to allow space for adding good data
|
1205
|
+
# if sub_interval == "1mo"
|
1206
|
+
# grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
|
1207
|
+
# elsif sub_interval == "1wk"
|
1208
|
+
# grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
|
1209
|
+
# elsif sub_interval == "1d"
|
1210
|
+
# grp_max_size = _dateutil.relativedelta.relativedelta(years=2)
|
1211
|
+
# elsif sub_interval == "1h"
|
1212
|
+
# grp_max_size = _dateutil.relativedelta.relativedelta(years=1)
|
1213
|
+
# elsif sub_interval == "1m"
|
1214
|
+
# grp_max_size = _datetime.timedelta(days=5) # allow 2 days for buffer below
|
1215
|
+
# else
|
1216
|
+
# grp_max_size = _datetime.timedelta(days=30)
|
1217
|
+
# end
|
1218
|
+
|
1219
|
+
# logger.debug("grp_max_size = #{grp_max_size}")
|
1220
|
+
|
1221
|
+
# (1..dts_to_repair.length).each do |i|
|
1222
|
+
# dt = dts_to_repair[i]
|
1223
|
+
# if dt.date < dts_groups[-1][0].date + grp_max_size
|
1224
|
+
# dts_groups[-1].append(dt)
|
1225
|
+
# else
|
1226
|
+
# dts_groups.append([dt])
|
1227
|
+
# end
|
1228
|
+
# end
|
1229
|
+
|
1230
|
+
# logger.debug("Repair groups:")
|
1231
|
+
# dts_groups.each { |g| logger.debug("- #{g[0]} -> #{g[-1]}") }
|
1232
|
+
|
1233
|
+
# # Add some good data to each group, so can calibrate prices later:
|
1234
|
+
# (0..dts_groups.length).each do |i|
|
1235
|
+
# g = dts_groups[i]
|
1236
|
+
# g0 = g[0]
|
1237
|
+
# i0 = df_good.index.get_indexer([g0], method="nearest")[0]
|
1238
|
+
# if i0 > 0
|
1239
|
+
# if (min_dt.nil? || df_good.index[i0 - 1] >= min_dt) && \
|
1240
|
+
# ((!intraday) || df_good.index[i0 - 1].date == g0.date)
|
1241
|
+
# i0 -= 1
|
1242
|
+
# end
|
1243
|
+
# end
|
1244
|
+
# gl = g[-1]
|
1245
|
+
# il = df_good.index.get_indexer([gl], method="nearest")[0]
|
1246
|
+
# if il < len(df_good) - 1
|
1247
|
+
# il += 1 if (!intraday) || df_good.index[il + 1].date == gl.date
|
1248
|
+
# end
|
1249
|
+
# good_dts = df_good.index[i0:il + 1]
|
1250
|
+
# dts_groups[i] += good_dts.to_list
|
1251
|
+
# dts_groups[i].sort
|
1252
|
+
# end
|
1253
|
+
|
1254
|
+
# n_fixed = 0
|
1255
|
+
# dts_groups.each do |g|
|
1256
|
+
# df_block = df[df.index.isin(g)]
|
1257
|
+
# logger.debug("df_block:\n" + str(df_block))
|
1258
|
+
|
1259
|
+
# start_dt = g[0]
|
1260
|
+
# start_d = start_dt.date
|
1261
|
+
|
1262
|
+
# reject = false
|
1263
|
+
# if sub_interval == "1h" && (DateTime::now - start_d) > 729.days
|
1264
|
+
# reject = true
|
1265
|
+
# elsif sub_interval.in?(["30m", "15m"]) && (DateTime::now - start_d) > 59.days
|
1266
|
+
# reject = true
|
1267
|
+
# end
|
1268
|
+
|
1269
|
+
# if reject
|
1270
|
+
# # Don't bother requesting more price data, yahoo will reject
|
1271
|
+
# msg = "Cannot reconstruct #{interval} block starting"
|
1272
|
+
# msg += intraday ? " #{start_dt}" : " #{start_d}"
|
1273
|
+
# msg += ", too old, yahoo will reject request for finer-grain data"
|
1274
|
+
# logger.info(msg)
|
1275
|
+
# next
|
1276
|
+
# end
|
1277
|
+
|
1278
|
+
# td_1d = _datetime.timedelta(days=1)
|
1279
|
+
# end_dt = g[-1]
|
1280
|
+
# end_d = end_dt.date + td_1d
|
1281
|
+
|
1282
|
+
# if interval == "1wk"
|
1283
|
+
# fetch_start = start_d - td_range # need previous week too
|
1284
|
+
# fetch_end = g[-1].date + td_range
|
1285
|
+
# elsif interval == "1d"
|
1286
|
+
# fetch_start = start_d
|
1287
|
+
# fetch_end = g[-1].date + td_range
|
1288
|
+
# else
|
1289
|
+
# fetch_start = g[0]
|
1290
|
+
# fetch_end = g[-1] + td_range
|
1291
|
+
# end
|
1292
|
+
|
1293
|
+
# # The first and last day returned by yahoo can be slightly wrong, so add buffer:
|
1294
|
+
# fetch_start -= td_1d
|
1295
|
+
# fetch_end += td_1d
|
1296
|
+
# if intraday
|
1297
|
+
# fetch_start = fetch_start.date
|
1298
|
+
# fetch_end = fetch_end.date + td_1d
|
1299
|
+
# end
|
1300
|
+
|
1301
|
+
# fetch_start = max(min_dt.date, fetch_start) if min_dt.nil?
|
1302
|
+
# logger.debug("Fetching #{sub_interval} prepost=#{prepost} #{fetch_start}->#{fetch_end}")
|
1303
|
+
|
1304
|
+
# df_fine = self.history(start: fetch_start, fin: fetch_end, interval: sub_interval, auto_adjust: false, actions: true, prepost: prepost, repair: true, keepna: true)
|
1305
|
+
# if df_fine.nil? || df_fine.empty?
|
1306
|
+
# msg = "Cannot reconstruct #{interval} block starting"
|
1307
|
+
# msg += intraday ? " #{start_dt}" : " #{start_d}"
|
1308
|
+
# msg += ", too old, yahoo is rejecting request for finer-grain data"
|
1309
|
+
# logger.debug(msg)
|
1310
|
+
# next
|
1311
|
+
# end
|
1312
|
+
|
1313
|
+
# # Discard the buffer
|
1314
|
+
# df_fine = df_fine.loc[g[0]: g[-1] + itds[sub_interval] - 1.milliseconds].copy
|
1315
|
+
|
1316
|
+
# if df_fine.empty?
|
1317
|
+
# msg = "Cannot reconstruct #{interval} block range"
|
1318
|
+
# msg += (intraday ? " #{start_dt}->#{end_dt}" : " #{start_d}->#{end_d}")
|
1319
|
+
# msg += ", yahoo not returning finer-grain data within range"
|
1320
|
+
# logger.debug(msg)
|
1321
|
+
# next
|
1322
|
+
# end
|
1323
|
+
|
1324
|
+
# df_fine["ctr"] = 0
|
1325
|
+
# if interval == "1wk"
|
1326
|
+
# weekdays = ["MON", "TUE", "WED", "THU", "FRI", "SAT", "SUN"]
|
1327
|
+
# week_end_day = weekdays[(df_block.index[0].weekday + 7 - 1) % 7]
|
1328
|
+
# df_fine["Week Start"] = df_fine.index.tz_localize(nil).to_period("W-" + week_end_day).start_time
|
1329
|
+
# grp_col = "Week Start"
|
1330
|
+
# elsif interval == "1d"
|
1331
|
+
# df_fine["Day Start"] = pd.to_datetime(df_fine.index.date)
|
1332
|
+
# grp_col = "Day Start"
|
1333
|
+
# else
|
1334
|
+
# df_fine.loc[df_fine.index.isin(df_block.index), "ctr"] = 1
|
1335
|
+
# df_fine["intervalID"] = df_fine["ctr"].cumsum
|
1336
|
+
# df_fine = df_fine.drop("ctr", axis=1)
|
1337
|
+
# grp_col = "intervalID"
|
1338
|
+
# end
|
1339
|
+
# df_fine = df_fine[~df_fine[price_cols + ['Dividends']].isna.all(axis=1)]
|
1340
|
+
|
1341
|
+
# df_fine_grp = df_fine.groupby(grp_col)
|
1342
|
+
# df_new = df_fine_grp.agg(
|
1343
|
+
# Open: ["Open", "first"],
|
1344
|
+
# Close: ["Close", "last"],
|
1345
|
+
# AdjClose: ["Adj Close", "last"],
|
1346
|
+
# Low: ["Low", "min"],
|
1347
|
+
# High: ["High", "max"],
|
1348
|
+
# Dividends: ["Dividends", "sum"],
|
1349
|
+
# Volume: ["Volume", "sum"]
|
1350
|
+
# ).rename(columns: {"AdjClose": "Adj Close"})
|
1351
|
+
|
1352
|
+
# if grp_col.in?(["Week Start", "Day Start"])
|
1353
|
+
# df_new.index = df_new.index.tz_localize(df_fine.index.tz)
|
1354
|
+
# else
|
1355
|
+
# df_fine["diff"] = df_fine["intervalID"].diff
|
1356
|
+
# new_index = np.append([df_fine.index[0]], df_fine.index[df_fine["intervalID"].diff > 0])
|
1357
|
+
# df_new.index = new_index
|
1358
|
+
# end
|
1359
|
+
# logger.debug('df_new:' + '\n' + str(df_new))
|
1360
|
+
# # df_new = df_fine
|
1361
|
+
|
1362
|
+
# # Calibrate!
|
1363
|
+
# common_index = np.intersect1d(df_block.index, df_new.index)
|
1364
|
+
# if common_index.length == 0
|
1365
|
+
# # Can't calibrate so don't attempt repair
|
1366
|
+
# logger.info("Can't calibrate #{interval} block starting #{start_d} so aborting repair")
|
1367
|
+
# next
|
1368
|
+
# end
|
1369
|
+
|
1370
|
+
# # First, attempt to calibrate the 'Adj Close' column. OK if cannot.
|
1371
|
+
# # Only necessary for 1d interval, because the 1h data is not div-adjusted.
|
1372
|
+
# if interval == '1d'
|
1373
|
+
|
1374
|
+
# df_new_calib = df_new[df_new.index.isin(common_index)]
|
1375
|
+
# df_block_calib = df_block[df_block.index.isin(common_index)]
|
1376
|
+
# f_tag = df_block_calib['Adj Close'] == tag
|
1377
|
+
|
1378
|
+
# if f_tag.any?
|
1379
|
+
|
1380
|
+
# div_adjusts = df_block_calib['Adj Close'] / df_block_calib['Close']
|
1381
|
+
# # The loop below assumes each 1d repair is isolated, i.e. surrounded by
|
1382
|
+
# # good data. Which is case most of time.
|
1383
|
+
# # But in case are repairing a chunk of bad 1d data, back/forward-fill the
|
1384
|
+
# # good div-adjustments - not perfect, but a good backup.
|
1385
|
+
# div_adjusts[f_tag] = np.nan
|
1386
|
+
# div_adjusts = div_adjusts.ffill.bfill
|
1387
|
+
|
1388
|
+
# (0..np.where(f_tag)[0].length).each do |idx|
|
1389
|
+
# dt = df_new_calib.index[idx]
|
1390
|
+
# n = len(div_adjusts)
|
1391
|
+
|
1392
|
+
# if df_new.loc[dt, "Dividends"] != 0
|
1393
|
+
# if idx < n - 1
|
1394
|
+
# # Easy, take div-adjustment from next-day
|
1395
|
+
# div_adjusts.iloc[idx] = div_adjusts.iloc[idx + 1]
|
1396
|
+
# else
|
1397
|
+
# # Take previous-day div-adjustment and reverse todays adjustment
|
1398
|
+
# div_adj = 1.0 - df_new_calib["Dividends"].iloc[idx] / df_new_calib['Close'].iloc[idx - 1]
|
1399
|
+
# div_adjusts.iloc[idx] = div_adjusts.iloc[idx - 1] / div_adj
|
1400
|
+
# end
|
1401
|
+
|
1402
|
+
# else
|
1403
|
+
# if idx > 0
|
1404
|
+
# # Easy, take div-adjustment from previous-day
|
1405
|
+
# div_adjusts.iloc[idx] = div_adjusts.iloc[idx - 1]
|
1406
|
+
# else
|
1407
|
+
# # Must take next-day div-adjustment
|
1408
|
+
# div_adjusts.iloc[idx] = div_adjusts.iloc[idx + 1]
|
1409
|
+
# if df_new_calib["Dividends"].iloc[idx + 1] != 0
|
1410
|
+
# div_adjusts.iloc[idx] *= 1.0 - df_new_calib["Dividends"].iloc[idx + 1] / \
|
1411
|
+
# df_new_calib['Close'].iloc[idx]
|
1412
|
+
# end
|
1413
|
+
# end
|
1414
|
+
# end
|
1415
|
+
# end
|
1416
|
+
|
1417
|
+
# f_close_bad = df_block_calib['Close'] == tag
|
1418
|
+
# div_adjusts = div_adjusts.reindex(df_block.index, fill_value=np.nan).ffill.bfill
|
1419
|
+
# df_new['Adj Close'] = df_block['Close'] * div_adjusts
|
1420
|
+
|
1421
|
+
# if f_close_bad.any?
|
1422
|
+
# f_close_bad_new = f_close_bad.reindex(df_new.index, fill_value=false)
|
1423
|
+
# div_adjusts_new = div_adjusts.reindex(df_new.index, fill_value=np.nan).ffill.bfill
|
1424
|
+
# div_adjusts_new_np = f_close_bad_new.to_numpy
|
1425
|
+
# df_new.loc[div_adjusts_new_np, 'Adj Close'] = df_new['Close'][div_adjusts_new_np] * div_adjusts_new[div_adjusts_new_np]
|
1426
|
+
# end
|
1427
|
+
# end
|
1428
|
+
|
1429
|
+
# # Check whether 'df_fine' has different split-adjustment.
|
1430
|
+
# # If different, then adjust to match 'df'
|
1431
|
+
# calib_cols = ['Open', 'Close']
|
1432
|
+
# df_new_calib = df_new[df_new.index.isin(common_index)][calib_cols].to_numpy
|
1433
|
+
# df_block_calib = df_block[df_block.index.isin(common_index)][calib_cols].to_numpy
|
1434
|
+
# calib_filter = (df_block_calib != tag)
|
1435
|
+
|
1436
|
+
# if !calib_filter.any?
|
1437
|
+
# # Can't calibrate so don't attempt repair
|
1438
|
+
# logger.info("Can't calibrate #{interval} block starting #{start_d} so aborting repair")
|
1439
|
+
# next
|
1440
|
+
# end
|
1441
|
+
|
1442
|
+
# # Avoid divide-by-zero warnings:
|
1443
|
+
# (0..calib_cols.length).each do |j|
|
1444
|
+
# f = ~calib_filter[:, j]
|
1445
|
+
# if f.any?
|
1446
|
+
# df_block_calib[f, j] = 1
|
1447
|
+
# df_new_calib[f, j] = 1
|
1448
|
+
# end
|
1449
|
+
# end
|
1450
|
+
|
1451
|
+
# ratios = df_block_calib[calib_filter] / df_new_calib[calib_filter]
|
1452
|
+
# weights = df_fine_grp.size
|
1453
|
+
# weights.index = df_new.index
|
1454
|
+
# weights = weights[weights.index.isin(common_index)].to_numpy.astype(float)
|
1455
|
+
# weights = weights[:, None] # transpose
|
1456
|
+
# weights = np.tile(weights, len(calib_cols)) # 1D -> 2D
|
1457
|
+
# weights = weights[calib_filter] # flatten
|
1458
|
+
# not1 = ~np.isclose(ratios, 1.0, rtol=0.00001)
|
1459
|
+
|
1460
|
+
# if np.sum(not1) == len(calib_cols)
|
1461
|
+
# # Only 1 calibration row in df_new is different to df_block so ignore
|
1462
|
+
# ratio = 1.0
|
1463
|
+
# else
|
1464
|
+
# ratio = np.average(ratios, weights=weights)
|
1465
|
+
# end
|
1466
|
+
|
1467
|
+
# logger.debug("Price calibration ratio (raw) = #{ratio:6f}")
|
1468
|
+
# ratio_rcp = round(1.0 / ratio, 1)
|
1469
|
+
# ratio = round(ratio, 1)
|
1470
|
+
# if ratio == 1 && ratio_rcp == 1
|
1471
|
+
# # Good!
|
1472
|
+
# next
|
1473
|
+
|
1474
|
+
# else
|
1475
|
+
# if ratio > 1
|
1476
|
+
# # data has different split-adjustment than fine-grained data
|
1477
|
+
# # Adjust fine-grained to match
|
1478
|
+
# df_new[price_cols] *= ratio
|
1479
|
+
# df_new["Volume"] /= ratio
|
1480
|
+
# elsif ratio_rcp > 1
|
1481
|
+
# # data has different split-adjustment than fine-grained data
|
1482
|
+
# # Adjust fine-grained to match
|
1483
|
+
# df_new[price_cols] *= 1.0 / ratio_rcp
|
1484
|
+
# df_new["Volume"] *= ratio_rcp
|
1485
|
+
# end
|
1486
|
+
# end
|
1487
|
+
|
1488
|
+
# # Repair!
|
1489
|
+
# bad_dts = df_block.index[(df_block[price_cols + ["Volume"]] == tag).to_numpy.any(axis=1)]
|
1490
|
+
|
1491
|
+
# no_fine_data_dts = []
|
1492
|
+
# bad_dts.each do |idx|
|
1493
|
+
# if !df_new.index.include?(idx)
|
1494
|
+
# # yahoo didn't return finer-grain data for this interval,
|
1495
|
+
# # so probably no trading happened.
|
1496
|
+
# no_fine_data_dts.append(idx)
|
1497
|
+
# end
|
1498
|
+
# end
|
1499
|
+
|
1500
|
+
# unless no_fine_data_dts.length == 0
|
1501
|
+
# logger.debug("yahoo didn't return finer-grain data for these intervals: " + str(no_fine_data_dts))
|
1502
|
+
# end
|
1503
|
+
|
1504
|
+
# bad_dts.each do |idx|
|
1505
|
+
|
1506
|
+
# # yahoo didn't return finer-grain data for this interval,
|
1507
|
+
# # so probably no trading happened.
|
1508
|
+
# next if !df_new.index.include?(idx)
|
1509
|
+
|
1510
|
+
# df_new_row = df_new.loc[idx]
|
1511
|
+
|
1512
|
+
# if interval == "1wk"
|
1513
|
+
# df_last_week = df_new.iloc[df_new.index.get_loc(idx) - 1]
|
1514
|
+
# df_fine = df_fine.loc[idx:]
|
1515
|
+
# end
|
1516
|
+
|
1517
|
+
# df_bad_row = df.loc[idx]
|
1518
|
+
# bad_fields = df_bad_row.index[df_bad_row == tag].to_numpy
|
1519
|
+
|
1520
|
+
# df_v2.loc[idx, "High"] = df_new_row["High"] if bad_fields.include?("High")
|
1521
|
+
|
1522
|
+
# df_v2.loc[idx, "Low"] = df_new_row["Low"] if bad_fields.include?("Low")
|
1523
|
+
|
1524
|
+
# if bad_fields.include?("Open")
|
1525
|
+
# if interval == "1wk" && idx != df_fine.index[0]
|
1526
|
+
# # Exchange closed Monday. In this case, yahoo sets Open to last week close
|
1527
|
+
# df_v2.loc[idx, "Open"] = df_last_week["Close"]
|
1528
|
+
# df_v2.loc[idx, "Low"] = [df_v2.loc[idx, "Open"], df_v2.loc[idx, "Low"]].min
|
1529
|
+
# else
|
1530
|
+
# df_v2.loc[idx, "Open"] = df_new_row["Open"]
|
1531
|
+
# end
|
1532
|
+
# end
|
1533
|
+
|
1534
|
+
# if bad_fields.include?("Close")
|
1535
|
+
# df_v2.loc[idx, "Close"] = df_new_row["Close"]
|
1536
|
+
# # Assume 'Adj Close' also corrupted, easier than detecting whether true
|
1537
|
+
# df_v2.loc[idx, "Adj Close"] = df_new_row["Adj Close"]
|
1538
|
+
# elsif bad_fields.include?("Adj Close")
|
1539
|
+
# df_v2.loc[idx, "Adj Close"] = df_new_row["Adj Close"]
|
1540
|
+
# end
|
1541
|
+
# if bad_fields.include?("Volume")
|
1542
|
+
# df_v2.loc[idx, "Volume"] = df_new_row["Volume"]
|
1543
|
+
# end
|
1544
|
+
# df_v2.loc[idx, "Repaired?"] = true
|
1545
|
+
# n_fixed += 1
|
1546
|
+
# end
|
1547
|
+
# end
|
1548
|
+
# end
|
1549
|
+
# return df_v2
|
1550
|
+
# end
|
1551
|
+
return df
|
1552
|
+
end
|
1553
|
+
|
1554
|
+
def _fix_unit_mixups(df, interval, tz_exchange, prepost)
|
1555
|
+
# return df if df.empty?
|
1556
|
+
# df2 = self._fix_unit_switch(df, interval, tz_exchange)
|
1557
|
+
# df3 = self._fix_unit_random_mixups(df2, interval, tz_exchange, prepost)
|
1558
|
+
# return df3
|
1559
|
+
end
|
1560
|
+
|
1561
|
+
def _fix_unit_random_mixups(df, interval, tz_exchange, prepost)
|
1562
|
+
# # Sometimes yahoo returns few prices in cents/pence instead of $/£
|
1563
|
+
# # I.e. 100x bigger
|
1564
|
+
# # 2 ways this manifests:
|
1565
|
+
# # - random 100x errors spread throughout table
|
1566
|
+
# # - a sudden switch between $<->cents at some date
|
1567
|
+
# # This function fixes the first.
|
1568
|
+
|
1569
|
+
# return df if df.empty?
|
1570
|
+
|
1571
|
+
# # Easy to detect and fix, just look for outliers = ~100x local median
|
1572
|
+
# logger = Rails.logger # YfAsDataframe::Utils.get_yf_logger
|
1573
|
+
|
1574
|
+
# if df.shape[0] == 0
|
1575
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1576
|
+
# return df
|
1577
|
+
# end
|
1578
|
+
# if df.shape[0] == 1
|
1579
|
+
# # Need multiple rows to confidently identify outliers
|
1580
|
+
# logger.info("price-repair-100x: Cannot check single-row table for 100x price errors")
|
1581
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1582
|
+
|
1583
|
+
# return df
|
1584
|
+
# end
|
1585
|
+
|
1586
|
+
# df2 = df.copy
|
1587
|
+
|
1588
|
+
# if df2.index.tz.nil?
|
1589
|
+
# df2.index = df2.index.tz_localize(tz_exchange)
|
1590
|
+
# elsif df2.index.tz != tz_exchange
|
1591
|
+
# df2.index = df2.index.tz_convert(tz_exchange)
|
1592
|
+
# end
|
1593
|
+
|
1594
|
+
# # Only import scipy if users actually want function. To avoid
|
1595
|
+
# # adding it to dependencies.
|
1596
|
+
# require 'scipy'
|
1597
|
+
|
1598
|
+
# data_cols = ["High", "Open", "Low", "Close", "Adj Close"] # Order important, separate High from Low
|
1599
|
+
# data_cols = data_cols.select { |c| df2.columns.include?(c) }
|
1600
|
+
# f_zeroes = (df2[data_cols] == 0).any(axis=1).to_numpy
|
1601
|
+
|
1602
|
+
# if f_zeroes.any?
|
1603
|
+
# df2_zeroes = df2[f_zeroes]
|
1604
|
+
# df2 = df2[~f_zeroes]
|
1605
|
+
# df = df[~f_zeroes] # all row slicing must be applied to both df and df2
|
1606
|
+
|
1607
|
+
# else
|
1608
|
+
# df2_zeroes = nil
|
1609
|
+
# end
|
1610
|
+
|
1611
|
+
# if df2.shape[0] <= 1
|
1612
|
+
# logger.info("price-repair-100x: Insufficient good data for detecting 100x price errors")
|
1613
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1614
|
+
|
1615
|
+
# return df
|
1616
|
+
# end
|
1617
|
+
|
1618
|
+
# df2_data = df2[data_cols].to_numpy
|
1619
|
+
# median = scipy.ndimage.median_filter(df2_data, size: [3, 3], mode: "wrap")
|
1620
|
+
# ratio = df2_data / median
|
1621
|
+
# ratio_rounded = (ratio / 20).round * 20 # round ratio to nearest 20
|
1622
|
+
# f = ratio_rounded == 100
|
1623
|
+
# ratio_rcp = 1.0 / ratio
|
1624
|
+
# ratio_rcp_rounded = (ratio_rcp / 20).round * 20 # round ratio to nearest 20
|
1625
|
+
# f_rcp = (ratio_rounded == 100) | (ratio_rcp_rounded == 100)
|
1626
|
+
# f_either = f | f_rcp
|
1627
|
+
|
1628
|
+
# if !f_either.any?
|
1629
|
+
# logger.info("price-repair-100x: No sporadic 100x errors")
|
1630
|
+
|
1631
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1632
|
+
|
1633
|
+
# return df
|
1634
|
+
# end
|
1635
|
+
|
1636
|
+
# # Mark values to send for repair
|
1637
|
+
# tag = -1.0
|
1638
|
+
# data_cols.each_with_index do |c, i|
|
1639
|
+
# fi = f_either[:, i]
|
1640
|
+
# df2.loc[fi, c] = tag
|
1641
|
+
# end
|
1642
|
+
|
1643
|
+
# n_before = (df2_data == tag).sum
|
1644
|
+
# df2 = _reconstruct_intervals_batch(df2, interval, prepost, tag)
|
1645
|
+
# df2_tagged = df2[data_cols].to_numpy == tag
|
1646
|
+
# n_after = (df2[data_cols].to_numpy == tag).sum
|
1647
|
+
|
1648
|
+
# if n_after > 0
|
1649
|
+
# # This second pass will *crudely* "fix" any remaining errors in High/Low
|
1650
|
+
# # simply by ensuring they don't contradict e.g. Low = 100x High.
|
1651
|
+
# f = (df2[data_cols].to_numpy == tag) & f
|
1652
|
+
# f.each_with_index do |fi, i|
|
1653
|
+
# next if !fi.any?
|
1654
|
+
|
1655
|
+
# idx = df2.index[i]
|
1656
|
+
|
1657
|
+
# ['Open', 'Close'].each do |c|
|
1658
|
+
# j = data_cols.index(c)
|
1659
|
+
# df2.loc[idx, c] = df.loc[idx, c] * 0.01 if fi[j]
|
1660
|
+
# end
|
1661
|
+
# end
|
1662
|
+
|
1663
|
+
# c = "High"
|
1664
|
+
# j = data_cols.index(c)
|
1665
|
+
# df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max if fi[j]
|
1666
|
+
|
1667
|
+
# c = "Low"
|
1668
|
+
# j = data_cols.index(c)
|
1669
|
+
# df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min if fi[j]
|
1670
|
+
# end
|
1671
|
+
|
1672
|
+
# f_rcp = (df2[data_cols].to_numpy == tag) & f_rcp
|
1673
|
+
# f_rcp.each_with_index do |fi, i|
|
1674
|
+
# next if !fi.any?
|
1675
|
+
|
1676
|
+
# idx = df2.index[i]
|
1677
|
+
|
1678
|
+
# ['Open', 'Close'].each do |c|
|
1679
|
+
# j = data_cols.index(c)
|
1680
|
+
|
1681
|
+
# df2.loc[idx, c] = df.loc[idx, c] * 100.0 if fi[j]
|
1682
|
+
# end
|
1683
|
+
|
1684
|
+
# c = "High"
|
1685
|
+
# j = data_cols.index(c)
|
1686
|
+
# df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].max if fi[j]
|
1687
|
+
|
1688
|
+
# c = "Low"
|
1689
|
+
# j = data_cols.index(c)
|
1690
|
+
# df2.loc[idx, c] = df2.loc[idx, ["Open", "Close"]].min if fi[j]
|
1691
|
+
# end
|
1692
|
+
|
1693
|
+
# df2_tagged = df2[data_cols].to_numpy == tag
|
1694
|
+
# n_after_crude = df2_tagged.sum
|
1695
|
+
|
1696
|
+
# else
|
1697
|
+
# n_after_crude = n_after
|
1698
|
+
# end
|
1699
|
+
|
1700
|
+
# n_fixed = n_before - n_after_crude
|
1701
|
+
# n_fixed_crudely = n_after - n_after_crude
|
1702
|
+
# if n_fixed > 0
|
1703
|
+
# report_msg = "#{ticker}: fixed #{n_fixed}/#{n_before} currency unit mixups "
|
1704
|
+
# report_msg += "(#{n_fixed_crudely} crudely) " if n_fixed_crudely > 0
|
1705
|
+
|
1706
|
+
# report_msg += "in #{interval} price data"
|
1707
|
+
# logger.info('price-repair-100x: ' + report_msg)
|
1708
|
+
# end
|
1709
|
+
|
1710
|
+
# # Restore original values where repair failed
|
1711
|
+
# f_either = df2[data_cols].to_numpy == tag
|
1712
|
+
# f_either.each_with_index do |fj, j|
|
1713
|
+
# if fj.any?
|
1714
|
+
# c = data_cols[j]
|
1715
|
+
# df2.loc[fj, c] = df.loc[fj, c]
|
1716
|
+
# end
|
1717
|
+
# end
|
1718
|
+
# if df2_zeroes
|
1719
|
+
# df2_zeroes["Repaired?"] = false if !df2_zeroes.columns.include?("Repaired?")
|
1720
|
+
|
1721
|
+
# df2 = pd.concat([df2, df2_zeroes]).sort_index
|
1722
|
+
# df2.index = pd.to_datetime(df2.index)
|
1723
|
+
# end
|
1724
|
+
|
1725
|
+
# return df2
|
1726
|
+
return df
|
1727
|
+
end
|
1728
|
+
|
1729
|
+
def _fix_unit_switch(df, interval, tz_exchange)
|
1730
|
+
# Sometimes yahoo returns few prices in cents/pence instead of $/£
|
1731
|
+
# I.e. 100x bigger
|
1732
|
+
# 2 ways this manifests:
|
1733
|
+
# - random 100x errors spread throughout table
|
1734
|
+
# - a sudden switch between $<->cents at some date
|
1735
|
+
# This function fixes the second.
|
1736
|
+
# Eventually yahoo fixes but could take them 2 weeks.
|
1737
|
+
|
1738
|
+
return fix_prices_sudden_change(df, interval, tz_exchange, 100.0)
|
1739
|
+
end
|
1740
|
+
|
1741
|
+
def _fix_zeroes(df, interval, tz_exchange, prepost)
|
1742
|
+
# # Sometimes yahoo returns prices=0 or NaN when trades occurred.
|
1743
|
+
# # But most times when prices=0 or NaN returned is because no trades.
|
1744
|
+
# # Impossible to distinguish, so only attempt repair if few or rare.
|
1745
|
+
|
1746
|
+
# return df if df.empty?
|
1747
|
+
|
1748
|
+
# logger = Rails.logger #utils.get_yf_logger
|
1749
|
+
|
1750
|
+
# if df.shape[0] == 0
|
1751
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1752
|
+
# return df
|
1753
|
+
# end
|
1754
|
+
|
1755
|
+
# intraday = interval[-1] in ["m", 'h']
|
1756
|
+
|
1757
|
+
# df = df.sort_index # important!
|
1758
|
+
# df2 = df.copy
|
1759
|
+
|
1760
|
+
# if df2.index.tz.nil?
|
1761
|
+
# df2.index = df2.index.tz_localize(tz_exchange)
|
1762
|
+
# elsif df2.index.tz != tz_exchange
|
1763
|
+
# df2.index = df2.index.tz_convert(tz_exchange)
|
1764
|
+
# end
|
1765
|
+
|
1766
|
+
# price_cols = ["High", "Open", "Low", "Close", "Adj Close"].select { |c| df2.columns.include?(c) }
|
1767
|
+
# f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna
|
1768
|
+
# df2_reserve = nil
|
1769
|
+
# if intraday
|
1770
|
+
# # Ignore days with >50% intervals containing NaNs
|
1771
|
+
# grp = Polars::Series(f_prices_bad.any(axis=1), name: "nan").groupby(f_prices_bad.index.date)
|
1772
|
+
# nan_pct = grp.sum / grp.count
|
1773
|
+
# dts = nan_pct.index[nan_pct > 0.5]
|
1774
|
+
# f_zero_or_nan_ignore = np.isin(f_prices_bad.index.date, dts)
|
1775
|
+
# df2_reserve = df2[f_zero_or_nan_ignore]
|
1776
|
+
# df2 = df2[~f_zero_or_nan_ignore]
|
1777
|
+
# f_prices_bad = (df2[price_cols] == 0.0) | df2[price_cols].isna
|
1778
|
+
# end
|
1779
|
+
|
1780
|
+
# f_high_low_good = (~df2["High"].isna.to_numpy) & (~df2["Low"].isna.to_numpy)
|
1781
|
+
# f_change = df2["High"].to_numpy != df2["Low"].to_numpy
|
1782
|
+
# f_vol_bad = (df2["Volume"] == 0).to_numpy & f_high_low_good & f_change
|
1783
|
+
|
1784
|
+
# # If stock split occurred, then trading must have happened.
|
1785
|
+
# # I should probably rename the function, because prices aren't zero ...
|
1786
|
+
# if df2.columns.include?('Stock Splits')
|
1787
|
+
# f_split = (df2['Stock Splits'] != 0.0).to_numpy
|
1788
|
+
# if f_split.any?
|
1789
|
+
# f_change_expected_but_missing = f_split & ~f_change
|
1790
|
+
|
1791
|
+
# f_prices_bad[f_change_expected_but_missing] = true if f_change_expected_but_missing.any?
|
1792
|
+
# end
|
1793
|
+
# end
|
1794
|
+
|
1795
|
+
# # Check whether worth attempting repair
|
1796
|
+
# f_prices_bad = f_prices_bad.to_numpy
|
1797
|
+
# f_bad_rows = f_prices_bad.any(axis=1) | f_vol_bad
|
1798
|
+
# if !f_bad_rows.any?
|
1799
|
+
# logger.info("price-repair-missing: No price=0 errors to repair")
|
1800
|
+
|
1801
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1802
|
+
|
1803
|
+
# return df
|
1804
|
+
# end
|
1805
|
+
# if f_prices_bad.sum == len(price_cols) * len(df2)
|
1806
|
+
# # Need some good data to calibrate
|
1807
|
+
# logger.info("price-repair-missing: No good data for calibration so cannot fix price=0 bad data")
|
1808
|
+
|
1809
|
+
# df["Repaired?"] = false if !df.columns.include?("Repaired?")
|
1810
|
+
|
1811
|
+
# return df
|
1812
|
+
# end
|
1813
|
+
|
1814
|
+
# data_cols = price_cols + ["Volume"]
|
1815
|
+
|
1816
|
+
# # Mark values to send for repair
|
1817
|
+
# tag = -1.0
|
1818
|
+
# price_cols.each_with_index { |c, i| df2.loc[f_prices_bad[:, i], c] = tag }
|
1819
|
+
|
1820
|
+
# df2.loc[f_vol_bad, "Volume"] = tag
|
1821
|
+
# # If volume=0 or NaN for bad prices, then tag volume for repair
|
1822
|
+
# f_vol_zero_or_nan = (df2["Volume"].to_numpy == 0) | (df2["Volume"].isna.to_numpy)
|
1823
|
+
# df2.loc[f_prices_bad.any(axis=1) & f_vol_zero_or_nan, "Volume"] = tag
|
1824
|
+
# # If volume=0 or NaN but price moved in interval, then tag volume for repair
|
1825
|
+
# df2.loc[f_change & f_vol_zero_or_nan, "Volume"] = tag
|
1826
|
+
|
1827
|
+
# df2_tagged = df2[data_cols].to_numpy == tag
|
1828
|
+
# n_before = df2_tagged.sum
|
1829
|
+
# dts_tagged = df2.index[df2_tagged.any(axis=1)]
|
1830
|
+
# df2 = _reconstruct_intervals_batch(df2, interval, prepost, tag)
|
1831
|
+
# df2_tagged = df2[data_cols].to_numpy == tag
|
1832
|
+
# n_after = df2_tagged.sum
|
1833
|
+
# dts_not_repaired = df2.index[df2_tagged.any(axis=1)]
|
1834
|
+
# n_fixed = n_before - n_after
|
1835
|
+
# if n_fixed > 0
|
1836
|
+
# msg = "#{ticker}: fixed #{n_fixed}/#{n_before} value=0 errors in #{interval} price data"
|
1837
|
+
# if n_fixed < 4
|
1838
|
+
# dts_repaired = (dts_tagged - dts_not_repaired).to_list.sort
|
1839
|
+
# msg += ": #{dts_repaired}"
|
1840
|
+
# end
|
1841
|
+
# logger.info('price-repair-missing: ' + msg)
|
1842
|
+
# end
|
1843
|
+
|
1844
|
+
# if df2_reserve
|
1845
|
+
# df2_reserve["Repaired?"] = false if !df2_reserve.columns.include?("Repaired?")
|
1846
|
+
|
1847
|
+
# df2 = pd.concat([df2, df2_reserve]).sort_index
|
1848
|
+
# end
|
1849
|
+
|
1850
|
+
# # Restore original values where repair failed (i.e. remove tag values)
|
1851
|
+
# f = df2[data_cols].to_numpy == tag
|
1852
|
+
# f.each_with_index do |fj, j|
|
1853
|
+
# if fj.any?
|
1854
|
+
# c = data_cols[j]
|
1855
|
+
# df2.loc[fj, c] = df.loc[fj, c]
|
1856
|
+
# end
|
1857
|
+
# end
|
1858
|
+
|
1859
|
+
# return df2
|
1860
|
+
end
|
1861
|
+
|
1862
|
+
def _fix_missing_div_adjust(df, interval, tz_exchange)
|
1863
|
+
# # Sometimes, if a dividend occurred today, then yahoo has not adjusted historic data.
|
1864
|
+
# # Easy to detect and correct BUT ONLY IF the data 'df' includes today's dividend.
|
1865
|
+
# # E.g. if fetching historic prices before todays dividend, then cannot fix.
|
1866
|
+
|
1867
|
+
# logger = Rails.logger # utils.get_yf_logger
|
1868
|
+
|
1869
|
+
# return df if df.nil? || df.empty?
|
1870
|
+
|
1871
|
+
# interday = interval in ['1d', '1wk', '1mo', '3mo']
|
1872
|
+
|
1873
|
+
# return df if !interday
|
1874
|
+
|
1875
|
+
# df = df.sort_index
|
1876
|
+
|
1877
|
+
# f_div = (df["Dividends"] != 0.0).to_numpy
|
1878
|
+
# if !f_div.any?
|
1879
|
+
# logger.debug('div-adjust-repair: No dividends to check')
|
1880
|
+
# return df
|
1881
|
+
# end
|
1882
|
+
|
1883
|
+
# df2 = df.copy
|
1884
|
+
# if df2.index.tz.nil?
|
1885
|
+
# df2.index = df2.index.tz_localize(tz_exchange)
|
1886
|
+
# elsif df2.index.tz != tz_exchange
|
1887
|
+
# df2.index = df2.index.tz_convert(tz_exchange)
|
1888
|
+
# end
|
1889
|
+
|
1890
|
+
# div_indices = np.where(f_div)[0]
|
1891
|
+
# last_div_idx = div_indices[-1]
|
1892
|
+
# if last_div_idx == 0
|
1893
|
+
# # Not enough data to recalculate the div-adjustment,
|
1894
|
+
# # because need close day before
|
1895
|
+
# logger.debug('div-adjust-repair: Insufficient data to recalculate div-adjustment')
|
1896
|
+
# return df
|
1897
|
+
# end
|
1898
|
+
|
1899
|
+
# # To determine if yahoo messed up, analyse price data between today's dividend and
|
1900
|
+
# # the previous dividend
|
1901
|
+
# if div_indices.length == 1
|
1902
|
+
# # No other divs in data
|
1903
|
+
# prev_idx = 0
|
1904
|
+
# prev_dt = nil
|
1905
|
+
# else
|
1906
|
+
# prev_idx = div_indices[-2]
|
1907
|
+
# prev_dt = df2.index[prev_idx]
|
1908
|
+
# end
|
1909
|
+
# f_no_adj = (df2['Close'] == df2['Adj Close']).to_numpy[prev_idx:last_div_idx]
|
1910
|
+
# threshold_pct = 0.5
|
1911
|
+
# yahoo_failed = (np.sum(f_no_adj) / len(f_no_adj)) > threshold_pct
|
1912
|
+
|
1913
|
+
# # Fix yahoo
|
1914
|
+
# if yahoo_failed
|
1915
|
+
# last_div_dt = df2.index[last_div_idx]
|
1916
|
+
# last_div_row = df2.loc[last_div_dt]
|
1917
|
+
# close_day_before = df2['Close'].iloc[last_div_idx - 1]
|
1918
|
+
# adj = 1.0 - df2['Dividends'].iloc[last_div_idx] / close_day_before
|
1919
|
+
# div = last_div_row['Dividends']
|
1920
|
+
# msg = "Correcting missing div-adjustment preceding div = #{div} @ #{last_div_dt.date} (prev_dt=#{prev_dt})"
|
1921
|
+
# logger.debug('div-adjust-repair: ' + msg)
|
1922
|
+
|
1923
|
+
# if interval == '1d'
|
1924
|
+
# # exclusive
|
1925
|
+
# df2.loc[:last_div_dt - _datetime.timedelta(seconds=1), 'Adj Close'] *= adj
|
1926
|
+
# else
|
1927
|
+
# # inclusive
|
1928
|
+
# df2.loc[:last_div_dt, 'Adj Close'] *= adj
|
1929
|
+
# end
|
1930
|
+
# end
|
1931
|
+
|
1932
|
+
# return df2
|
1933
|
+
return df
|
1934
|
+
end
|
1935
|
+
|
1936
|
+
def _fix_bad_stock_split(df, interval, tz_exchange)
|
1937
|
+
# # Repair idea is to look for BIG daily price changes that closely match the
|
1938
|
+
# # most recent stock split ratio. This indicates yahoo failed to apply a new
|
1939
|
+
# # stock split to old price data.
|
1940
|
+
# #
|
1941
|
+
# # There is a slight complication, because yahoo does another stupid thing.
|
1942
|
+
# # Sometimes the old data is adjusted twice. So cannot simply assume
|
1943
|
+
# # which direction to reverse adjustment - have to analyse prices and detect.
|
1944
|
+
# # Not difficult.
|
1945
|
+
|
1946
|
+
# return df if df.empty?
|
1947
|
+
|
1948
|
+
# logger = Rails.logger # utils.get_yf_logger
|
1949
|
+
|
1950
|
+
# interday = interval.in?(['1d', '1wk', '1mo', '3mo'])
|
1951
|
+
|
1952
|
+
# return df if !interday
|
1953
|
+
|
1954
|
+
# # Find the most recent stock split
|
1955
|
+
# df = df.sort_index(ascending: false)
|
1956
|
+
# split_f = df['Stock Splits'].to_numpy != 0
|
1957
|
+
# if !split_f.any?
|
1958
|
+
# logger.debug('price-repair-split: No splits in data')
|
1959
|
+
# return df
|
1960
|
+
# end
|
1961
|
+
# most_recent_split_day = df.index[split_f].max
|
1962
|
+
# split = df.loc[most_recent_split_day, 'Stock Splits']
|
1963
|
+
# if most_recent_split_day == df.index[0]
|
1964
|
+
# logger.info("price-repair-split: Need 1+ day of price data after split to determine true price. Won't repair")
|
1965
|
+
# return df
|
1966
|
+
# end
|
1967
|
+
|
1968
|
+
# # logger.debug("price-repair-split: Most recent split = #{split:.4f} @ #{most_recent_split_day.date}")
|
1969
|
+
|
1970
|
+
# return _fix_prices_sudden_change(df, interval, tz_exchange, split, correct_volume: true)
|
1971
|
+
return df
|
1972
|
+
end
|
1973
|
+
|
1974
|
+
def _get_1y_prices( fullDaysOnly=false)
|
1975
|
+
if @prices_1y.nil?
|
1976
|
+
@prices_1y = history(period: "380d", auto_adjust: false, keepna: true) #, proxy: self.proxy)
|
1977
|
+
@md = get_history_metadata #(proxy=self.proxy)
|
1978
|
+
begin
|
1979
|
+
ctp = @md["currentTradingPeriod"]
|
1980
|
+
# Rails.logger.info { "#{__FILE__}:#{__LINE__} ctp = #{ctp.inspect}" }
|
1981
|
+
@today_open = Time.at(ctp["regular"]["start"]).in_time_zone(tz)
|
1982
|
+
@today_close = Time.at(ctp["regular"]["end"]).in_time_zone(tz)
|
1983
|
+
@today_midnight = @today_close.midnight
|
1984
|
+
rescue Exception => e
|
1985
|
+
@today_open = nil
|
1986
|
+
@today_close = nil
|
1987
|
+
@today_midnight = nil
|
1988
|
+
raise
|
1989
|
+
end
|
1990
|
+
end
|
1991
|
+
|
1992
|
+
return @prices_1y unless @prices_1y.nil? || @prices_1y.empty?
|
1993
|
+
|
1994
|
+
dnow = DateTime.now.utc.to_date
|
1995
|
+
d1 = dnow
|
1996
|
+
d0 = (d1 + datetime.timedelta(days=1)) - 1.year
|
1997
|
+
if fullDaysOnly && @_exchange_open_now
|
1998
|
+
# Exclude today
|
1999
|
+
d1 -= 1.day
|
2000
|
+
end
|
2001
|
+
return @prices_1y[str(d0)..str(d1)]
|
2002
|
+
end
|
2003
|
+
|
2004
|
+
def _get_1wk_1h_prepost_prices
|
2005
|
+
return @prices_1wk_1h_prepost ||= history(period: "1wk", interval: "1h", auto_adjust: false, prepost: true)
|
2006
|
+
end
|
2007
|
+
|
2008
|
+
def _get_1wk_1h_reg_prices
|
2009
|
+
return @prices_1wk_1h_reg ||= history(period: "1wk", interval: "1h", auto_adjust: false, prepost: false)
|
2010
|
+
end
|
2011
|
+
|
2012
|
+
def _get_exchange_metadata
|
2013
|
+
if @md.nil?
|
2014
|
+
|
2015
|
+
_get_1y_prices
|
2016
|
+
@md = get_history_metadata #(proxy=self.proxy)
|
2017
|
+
end
|
2018
|
+
return @md
|
2019
|
+
end
|
2020
|
+
|
2021
|
+
def _exchange_open_now
|
2022
|
+
t = DateTime.now
|
2023
|
+
_get_exchange_metadata
|
2024
|
+
|
2025
|
+
# if self._today_open is nil and self._today_close.nil?
|
2026
|
+
# r = false
|
2027
|
+
# else:
|
2028
|
+
# r = self._today_open <= t and t < self._today_close
|
2029
|
+
|
2030
|
+
# if self._today_midnight.nil?
|
2031
|
+
# r = false
|
2032
|
+
# elsif self._today_midnight.date > t.tz_convert(self.timezone).date:
|
2033
|
+
# r = false
|
2034
|
+
# else:
|
2035
|
+
# r = t < self._today_midnight
|
2036
|
+
|
2037
|
+
last_day_cutoff = @get_1y_prices[-1] + 1.days
|
2038
|
+
last_day_cutoff += 20.minutes
|
2039
|
+
r = t < last_day_cutoff
|
2040
|
+
|
2041
|
+
# print("_exchange_open_now returning", r)
|
2042
|
+
# return r
|
2043
|
+
end
|
2044
|
+
end
|
2045
|
+
end
|