yf_as_dataframe 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +99 -0
- data/MINIMAL_INTEGRATION.md +227 -0
- data/README.md +65 -0
- data/lib/yf_as_dataframe/curl_impersonate_integration.rb +110 -0
- data/lib/yf_as_dataframe/financials.rb +3 -2
- data/lib/yf_as_dataframe/holders.rb +4 -2
- data/lib/yf_as_dataframe/multi.rb +2 -1
- data/lib/yf_as_dataframe/price_history.rb +46 -16
- data/lib/yf_as_dataframe/price_technical.rb +0 -1
- data/lib/yf_as_dataframe/quote.rb +4 -3
- data/lib/yf_as_dataframe/ticker.rb +7 -4
- data/lib/yf_as_dataframe/utils.rb +59 -16
- data/lib/yf_as_dataframe/version.rb +1 -1
- data/lib/yf_as_dataframe/yf_connection.rb +295 -49
- data/lib/yf_as_dataframe/yf_connection_minimal_patch.rb +97 -0
- data/lib/yf_as_dataframe/yfinance_exception.rb +3 -1
- data/lib/yf_as_dataframe.rb +2 -0
- data/quick_test.rb +143 -0
- data/test_minimal_integration.rb +121 -0
- metadata +53 -5
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'active_support'
|
2
|
+
require 'active_support/concern'
|
1
3
|
# require 'requests'
|
2
4
|
# require 'requests_cache'
|
3
5
|
require 'thread'
|
@@ -5,17 +7,78 @@ require 'date'
|
|
5
7
|
require 'nokogiri'
|
6
8
|
require 'zache'
|
7
9
|
require 'httparty'
|
10
|
+
require 'uri'
|
11
|
+
require 'json'
|
8
12
|
|
9
13
|
class YfAsDataframe
|
10
14
|
module YfConnection
|
11
|
-
extend ActiveSupport::Concern
|
15
|
+
extend ::ActiveSupport::Concern
|
12
16
|
# extend HTTParty
|
13
17
|
|
14
18
|
# """
|
15
19
|
# Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations.
|
16
20
|
# """
|
21
|
+
@@user_agent_headers_selection = [
|
22
|
+
# Chrome - Desktop
|
23
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36", # Windows
|
24
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36", # Mac
|
25
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36", # Linux
|
26
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", # Windows
|
27
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", # Mac
|
28
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36", # Linux
|
29
|
+
|
30
|
+
# Chrome - Mobile
|
31
|
+
"Mozilla/5.0 (Linux; Android 15; SM-S931B Build/AP3A.240905.015.A2; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/127.0.6533.103 Mobile Safari/537.36", # Samsung S25
|
32
|
+
"Mozilla/5.0 (Linux; Android 15; Pixel 8 Pro Build/AP4A.250105.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/132.0.6834.163 Mobile Safari/537.36", # Pixel 8 Pro
|
33
|
+
"Mozilla/5.0 (Linux; Android 14; Pixel 9 Pro Build/AD1A.240418.003; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/124.0.6367.54 Mobile Safari/537.36", # Pixel 9 Pro
|
34
|
+
"Mozilla/5.0 (Linux; Android 14; SM-S928B/DS) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.230 Mobile Safari/537.36", # Samsung S24 Ultra
|
35
|
+
|
36
|
+
# Firefox - Desktop
|
37
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0", # Windows
|
38
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14.7; rv:135.0) Gecko/20100101 Firefox/135.0", # Mac
|
39
|
+
"Mozilla/5.0 (X11; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0", # Linux
|
40
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0", # Windows
|
41
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14.7; rv:136.0) Gecko/20100101 Firefox/136.0", # Mac
|
42
|
+
"Mozilla/5.0 (X11; Linux x86_64; rv:136.0) Gecko/20100101 Firefox/136.0", # Linux
|
43
|
+
|
44
|
+
# Firefox - Mobile
|
45
|
+
"Mozilla/5.0 (Android 15; Mobile; SM-G556B/DS; rv:130.0) Gecko/130.0 Firefox/130.0", # Samsung Xcover7
|
46
|
+
"Mozilla/5.0 (Linux; Android 13; Pixel 7 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36", # Pixel 7 Pro
|
47
|
+
"Mozilla/5.0 (Linux; Android 13; Pixel 6 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Mobile Safari/537.36", # Pixel 6 Pro
|
48
|
+
"Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36", # Generic Android
|
49
|
+
|
50
|
+
# Safari - Desktop
|
51
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15", # Mac
|
52
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 13_6_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.10 Safari/605.1.15", # Mac
|
53
|
+
|
54
|
+
# Safari - Mobile
|
55
|
+
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_7_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1", # iPhone
|
56
|
+
"Mozilla/5.0 (iPad; CPU OS 17_7_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Mobile/15E148 Safari/604.1", # iPad
|
57
|
+
|
58
|
+
# Edge - Desktop
|
59
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 Edg/134.0.0.0", # Windows
|
60
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/131.0.2903.86", # Windows
|
61
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0", # Windows
|
62
|
+
|
63
|
+
# Edge - Mobile
|
64
|
+
"Mozilla/5.0 (Linux; Android 10; OnePlus HD1913) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Mobile Safari/537.36 EdgA/134.0.0.0", # Android
|
65
|
+
|
66
|
+
# Opera - Desktop
|
67
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36 OPR/117.0.0.0", # Windows
|
68
|
+
|
69
|
+
# Opera - Mobile
|
70
|
+
"Mozilla/5.0 (Linux; Android 10; Huawei VOG-L29) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.2.4027.0 Mobile Safari/537.36 OPR/76.2.4027.0" # Android
|
71
|
+
]
|
72
|
+
|
73
|
+
# adding more headers that a browser would often send. it seems they've recently implemented fingerprinting. We're not fingerprinting yet, but this could be closer
|
17
74
|
@@user_agent_headers = {
|
18
|
-
|
75
|
+
"User-Agent" => @@user_agent_headers_selection.sample,
|
76
|
+
"Accept" => "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
77
|
+
"Accept-Language" => "en-US,en;q=0.9",
|
78
|
+
"Accept-Encoding" => "gzip, deflate, br",
|
79
|
+
"Referer" => "https:://finance.yahoo.com/",
|
80
|
+
"Cache-Control" => "max-age=0",
|
81
|
+
"Connection" => "keep-alive"
|
19
82
|
}
|
20
83
|
@@proxy = nil
|
21
84
|
|
@@ -35,18 +98,50 @@ class YfAsDataframe
|
|
35
98
|
@@cookie = nil
|
36
99
|
@@cookie_strategy = 'basic'
|
37
100
|
@@cookie_lock = ::Mutex.new()
|
101
|
+
|
102
|
+
# Add session tracking
|
103
|
+
@@session_created_at = Time.now
|
104
|
+
@@session_refresh_interval = 3600 # 1 hour
|
105
|
+
@@request_count = 0
|
106
|
+
@@last_request_time = nil
|
107
|
+
|
108
|
+
# Circuit breaker state
|
109
|
+
@@circuit_breaker_state = :closed # :closed, :open, :half_open
|
110
|
+
@@failure_count = 0
|
111
|
+
@@last_failure_time = nil
|
112
|
+
@@circuit_breaker_threshold = 3
|
113
|
+
@@circuit_breaker_timeout = 60 # seconds
|
114
|
+
@@circuit_breaker_base_timeout = 60 # seconds
|
38
115
|
end
|
39
116
|
|
40
117
|
|
41
118
|
def get(url, headers=nil, params=nil)
|
42
|
-
#
|
43
|
-
|
119
|
+
# Check circuit breaker first
|
120
|
+
unless circuit_breaker_allow_request?
|
121
|
+
raise RuntimeError.new("Circuit breaker is open - too many recent failures. Please try again later.")
|
122
|
+
end
|
123
|
+
|
124
|
+
# Add request throttling to be respectful of rate limits
|
125
|
+
throttle_request
|
126
|
+
|
127
|
+
# Track session usage
|
128
|
+
track_session_usage
|
129
|
+
|
130
|
+
# Refresh session if needed
|
131
|
+
refresh_session_if_needed
|
132
|
+
|
133
|
+
# Only fetch crumb for /v7/finance/download endpoint
|
134
|
+
crumb_needed = url.include?('/v7/finance/download')
|
44
135
|
|
45
136
|
headers ||= {}
|
46
137
|
params ||= {}
|
47
|
-
params.merge!(crumb: @@crumb) unless @@crumb.nil?
|
48
|
-
|
49
|
-
|
138
|
+
# params.merge!(crumb: @@crumb) unless @@crumb.nil? # Commented out: crumb not needed for most endpoints
|
139
|
+
if crumb_needed
|
140
|
+
crumb = get_crumb_scrape_quote_page(params[:symbol] || params['symbol'])
|
141
|
+
params.merge!(crumb: crumb) unless crumb.nil?
|
142
|
+
end
|
143
|
+
cookie, _, strategy = _get_cookie_and_crumb(crumb_needed)
|
144
|
+
crumbs = {} # crumb logic handled above if needed
|
50
145
|
|
51
146
|
request_args = {
|
52
147
|
url: url,
|
@@ -59,17 +154,24 @@ class YfAsDataframe
|
|
59
154
|
|
60
155
|
cookie_hash = ::HTTParty::CookieHash.new
|
61
156
|
cookie_hash.add_cookies(@@cookie)
|
62
|
-
options = { headers: headers.dup.merge(@@user_agent_headers).merge({ 'cookie' => cookie_hash.to_cookie_string
|
157
|
+
options = { headers: headers.dup.merge(@@user_agent_headers).merge({ 'cookie' => cookie_hash.to_cookie_string })} #, debug_output: STDOUT }
|
63
158
|
|
64
159
|
u = (request_args[:url]).dup.to_s
|
65
|
-
joiner = (
|
66
|
-
u += (joiner +
|
67
|
-
|
68
|
-
# Rails.logger.info { "#{__FILE__}:#{__LINE__} u=#{u}, options = #{options.inspect}" }
|
69
|
-
response = ::HTTParty.get(u, options)
|
70
|
-
# Rails.logger.info { "#{__FILE__}:#{__LINE__} response=#{response.inspect}" }
|
160
|
+
joiner = (request_args[:url].include?('?') ? '&' : '?')
|
161
|
+
u += (joiner + URI.encode_www_form(request_args[:params])) unless request_args[:params].empty?
|
71
162
|
|
72
|
-
|
163
|
+
begin
|
164
|
+
response = ::HTTParty.get(u, options)
|
165
|
+
if response_failure?(response)
|
166
|
+
circuit_breaker_record_failure
|
167
|
+
raise RuntimeError.new("Yahoo Finance request failed: #{response.code} - #{response.body}")
|
168
|
+
end
|
169
|
+
circuit_breaker_record_success
|
170
|
+
return response
|
171
|
+
rescue => e
|
172
|
+
circuit_breaker_record_failure
|
173
|
+
raise e
|
174
|
+
end
|
73
175
|
end
|
74
176
|
|
75
177
|
alias_method :cache_get, :get
|
@@ -114,33 +216,17 @@ class YfAsDataframe
|
|
114
216
|
end
|
115
217
|
end
|
116
218
|
|
117
|
-
def _get_cookie_and_crumb()
|
219
|
+
def _get_cookie_and_crumb(crumb_needed=false)
|
118
220
|
cookie, crumb, strategy = nil, nil, nil
|
119
|
-
# puts "cookie_mode = '#{@@cookie_strategy}'"
|
120
|
-
|
121
221
|
@@cookie_lock.synchronize do
|
122
|
-
if
|
123
|
-
crumb = _get_crumb_csrf()
|
124
|
-
if crumb.nil?
|
125
|
-
# Fail
|
126
|
-
_set_cookie_strategy('basic', have_lock=true)
|
127
|
-
cookie, crumb = __get_cookie_and_crumb_basic()
|
128
|
-
# Rails.logger.info { "#{__FILE__}:#{__LINE__} cookie = #{cookie}, crumb = #{crumb}" }
|
129
|
-
end
|
130
|
-
else
|
131
|
-
# Fallback strategy
|
222
|
+
if crumb_needed
|
132
223
|
cookie, crumb = __get_cookie_and_crumb_basic()
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
_set_cookie_strategy('csrf', have_lock=true)
|
137
|
-
crumb = _get_crumb_csrf()
|
138
|
-
end
|
224
|
+
else
|
225
|
+
cookie = _get_cookie_basic()
|
226
|
+
crumb = nil
|
139
227
|
end
|
140
228
|
strategy = @@cookie_strategy
|
141
229
|
end
|
142
|
-
|
143
|
-
# Rails.logger.info { "#{__FILE__}:#{__LINE__} cookie = #{cookie}, crumb = #{crumb}, strategy=#{strategy}" }
|
144
230
|
return cookie, crumb, strategy
|
145
231
|
end
|
146
232
|
|
@@ -170,18 +256,58 @@ class YfAsDataframe
|
|
170
256
|
|
171
257
|
def _get_crumb_basic()
|
172
258
|
return @@crumb unless @@crumb.nil?
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
259
|
+
|
260
|
+
# Retry logic similar to yfinance: try up to 3 times
|
261
|
+
3.times do |attempt|
|
262
|
+
begin
|
263
|
+
# Clear cookie on retry (except first attempt) to get fresh session
|
264
|
+
if attempt > 0
|
265
|
+
@@cookie = nil
|
266
|
+
# Clear curl-impersonate executables cache to force re-selection
|
267
|
+
CurlImpersonateIntegration.instance_variable_set(:@available_executables, nil)
|
268
|
+
warn "[yf_as_dataframe] Retrying crumb fetch (attempt #{attempt + 1}/3)"
|
269
|
+
# Add delay between retries to be respectful of rate limits
|
270
|
+
sleep(2 ** attempt) # Exponential backoff: 2s, 4s, 8s
|
271
|
+
end
|
272
|
+
|
273
|
+
return nil if (cookie = _get_cookie_basic()).nil?
|
274
|
+
|
275
|
+
cookie_hash = ::HTTParty::CookieHash.new
|
276
|
+
cookie_hash.add_cookies(cookie)
|
277
|
+
options = {headers: @@user_agent_headers.dup.merge(
|
278
|
+
{ 'cookie' => cookie_hash.to_cookie_string }
|
279
|
+
)}
|
280
|
+
|
281
|
+
crumb_response = ::HTTParty.get('https://query1.finance.yahoo.com/v1/test/getcrumb', options)
|
282
|
+
@@crumb = crumb_response.parsed_response
|
283
|
+
|
284
|
+
# Validate crumb: must be short, alphanumeric, no spaces, not an error message
|
285
|
+
if crumb_valid?(@@crumb)
|
286
|
+
warn "[yf_as_dataframe] Successfully fetched valid crumb on attempt #{attempt + 1}"
|
287
|
+
return @@crumb
|
288
|
+
else
|
289
|
+
warn "[yf_as_dataframe] Invalid crumb received on attempt #{attempt + 1}: '#{@@crumb.inspect}'"
|
290
|
+
@@crumb = nil
|
291
|
+
end
|
292
|
+
rescue => e
|
293
|
+
warn "[yf_as_dataframe] Error fetching crumb on attempt #{attempt + 1}: #{e.message}"
|
294
|
+
@@crumb = nil
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
# All attempts failed
|
299
|
+
warn "[yf_as_dataframe] Failed to fetch valid crumb after 3 attempts"
|
300
|
+
raise "Could not fetch a valid Yahoo Finance crumb after 3 attempts"
|
301
|
+
end
|
183
302
|
|
184
|
-
|
303
|
+
def crumb_valid?(crumb)
|
304
|
+
return false if crumb.nil?
|
305
|
+
return false if crumb.include?('<html>')
|
306
|
+
return false if crumb.include?('Too Many Requests')
|
307
|
+
return false if crumb.strip.empty?
|
308
|
+
return false if crumb.length < 8 || crumb.length > 20
|
309
|
+
return false if crumb =~ /\s/
|
310
|
+
true
|
185
311
|
end
|
186
312
|
|
187
313
|
def _get_cookie_csrf()
|
@@ -251,7 +377,8 @@ class YfAsDataframe
|
|
251
377
|
# puts 'reusing crumb'
|
252
378
|
return @@crumb unless @@crumb.nil?
|
253
379
|
# This cookie stored in session
|
254
|
-
|
380
|
+
cookie_csrf = _get_cookie_csrf()
|
381
|
+
return nil if cookie_csrf.nil? || (cookie_csrf.respond_to?(:empty?) && cookie_csrf.empty?)
|
255
382
|
|
256
383
|
get_args = {
|
257
384
|
url: 'https://query2.finance.yahoo.com/v1/test/getcrumb',
|
@@ -264,7 +391,7 @@ class YfAsDataframe
|
|
264
391
|
@@crumb = r.text
|
265
392
|
|
266
393
|
# puts "Didn't receive crumb"
|
267
|
-
return nil if @@crumb.nil? || '<html>'
|
394
|
+
return nil if @@crumb.nil? || @@crumb.include?('<html>') || @@crumb.length.zero?
|
268
395
|
return @@crumb
|
269
396
|
end
|
270
397
|
|
@@ -299,5 +426,124 @@ class YfAsDataframe
|
|
299
426
|
@@zache.put(:basic, nil, lifetime: 1) unless @@zache.exists?(:basic, dirty: false)
|
300
427
|
return @@zache.expired?(:basic) ? nil : @@zache.get(:basic)
|
301
428
|
end
|
429
|
+
|
430
|
+
def throttle_request
|
431
|
+
# Random delay between 0.1 and 0.5 seconds to be respectful of rate limits
|
432
|
+
# Similar to yfinance's approach
|
433
|
+
sleep(rand(0.1..0.5))
|
434
|
+
end
|
435
|
+
|
436
|
+
def track_session_usage
|
437
|
+
@@request_count += 1
|
438
|
+
@@last_request_time = Time.now
|
439
|
+
end
|
440
|
+
|
441
|
+
def refresh_session_if_needed
|
442
|
+
return unless session_needs_refresh?
|
443
|
+
|
444
|
+
warn "[yf_as_dataframe] Refreshing session (age: #{session_age} seconds, requests: #{@@request_count})"
|
445
|
+
refresh_session
|
446
|
+
end
|
447
|
+
|
448
|
+
def session_needs_refresh?
|
449
|
+
return true if session_age > @@session_refresh_interval
|
450
|
+
return true if @@request_count > 100 # Refresh after 100 requests
|
451
|
+
return true if @@cookie.nil? || @@crumb.nil?
|
452
|
+
false
|
453
|
+
end
|
454
|
+
|
455
|
+
def session_age
|
456
|
+
Time.now - @@session_created_at
|
457
|
+
end
|
458
|
+
|
459
|
+
def refresh_session
|
460
|
+
@@cookie = nil
|
461
|
+
@@crumb = nil
|
462
|
+
@@session_created_at = Time.now
|
463
|
+
@@request_count = 0
|
464
|
+
warn "[yf_as_dataframe] Session refreshed"
|
465
|
+
end
|
466
|
+
|
467
|
+
# Circuit breaker methods
|
468
|
+
def circuit_breaker_allow_request?
|
469
|
+
case @@circuit_breaker_state
|
470
|
+
when :closed
|
471
|
+
true
|
472
|
+
when :open
|
473
|
+
if Time.now - @@last_failure_time > @@circuit_breaker_timeout
|
474
|
+
@@circuit_breaker_state = :half_open
|
475
|
+
warn "[yf_as_dataframe] Circuit breaker transitioning to half-open"
|
476
|
+
true
|
477
|
+
else
|
478
|
+
false
|
479
|
+
end
|
480
|
+
when :half_open
|
481
|
+
true
|
482
|
+
end
|
483
|
+
end
|
484
|
+
|
485
|
+
def circuit_breaker_record_failure
|
486
|
+
@@failure_count += 1
|
487
|
+
@@last_failure_time = Time.now
|
488
|
+
|
489
|
+
if @@failure_count >= @@circuit_breaker_threshold && @@circuit_breaker_state != :open
|
490
|
+
@@circuit_breaker_state = :open
|
491
|
+
# Exponential backoff: 60s, 120s, 240s, 480s, etc.
|
492
|
+
@@circuit_breaker_timeout = @@circuit_breaker_base_timeout * (2 ** (@@failure_count - @@circuit_breaker_threshold))
|
493
|
+
warn "[yf_as_dataframe] Circuit breaker opened after #{@@failure_count} failures (timeout: #{@@circuit_breaker_timeout}s)"
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
497
|
+
def circuit_breaker_record_success
|
498
|
+
if @@circuit_breaker_state == :half_open
|
499
|
+
@@circuit_breaker_state = :closed
|
500
|
+
@@failure_count = 0
|
501
|
+
@@circuit_breaker_timeout = @@circuit_breaker_base_timeout
|
502
|
+
warn "[yf_as_dataframe] Circuit breaker closed after successful request"
|
503
|
+
elsif @@circuit_breaker_state == :closed
|
504
|
+
# Reset failure count on success
|
505
|
+
@@failure_count = 0
|
506
|
+
@@circuit_breaker_timeout = @@circuit_breaker_base_timeout
|
507
|
+
end
|
508
|
+
end
|
509
|
+
|
510
|
+
def response_failure?(response)
|
511
|
+
return true if response.nil?
|
512
|
+
return true if response.code >= 400
|
513
|
+
return true if response.body.to_s.include?("Too Many Requests")
|
514
|
+
return true if response.body.to_s.include?("Will be right back")
|
515
|
+
return true if response.body.to_s.include?("<html>")
|
516
|
+
false
|
517
|
+
end
|
518
|
+
|
519
|
+
def circuit_breaker_status
|
520
|
+
{
|
521
|
+
state: @@circuit_breaker_state,
|
522
|
+
failure_count: @@failure_count,
|
523
|
+
last_failure_time: @@last_failure_time,
|
524
|
+
timeout: @@circuit_breaker_timeout,
|
525
|
+
threshold: @@circuit_breaker_threshold
|
526
|
+
}
|
527
|
+
end
|
528
|
+
|
529
|
+
# For /v7/finance/download, scrape crumb from quote page
|
530
|
+
def get_crumb_scrape_quote_page(symbol)
|
531
|
+
return nil if symbol.nil?
|
532
|
+
url = "https://finance.yahoo.com/quote/#{symbol}"
|
533
|
+
response = ::HTTParty.get(url, headers: @@user_agent_headers)
|
534
|
+
# Look for root.App.main = { ... };
|
535
|
+
m = response.body.match(/root\.App\.main\s*=\s*(\{.*?\});/m)
|
536
|
+
return nil unless m
|
537
|
+
json_blob = m[1]
|
538
|
+
begin
|
539
|
+
data = JSON.parse(json_blob)
|
540
|
+
crumb = data.dig('context', 'dispatcher', 'stores', 'CrumbStore', 'crumb')
|
541
|
+
warn "[yf_as_dataframe] Scraped crumb from quote page: #{crumb.inspect}"
|
542
|
+
return crumb
|
543
|
+
rescue => e
|
544
|
+
warn "[yf_as_dataframe] Failed to parse crumb from quote page: #{e.message}"
|
545
|
+
return nil
|
546
|
+
end
|
547
|
+
end
|
302
548
|
end
|
303
549
|
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# Minimal patch to make curl-impersonate the default behavior
|
2
|
+
# This file should be required after the main YfConnection class
|
3
|
+
|
4
|
+
require_relative 'curl_impersonate_integration'
|
5
|
+
|
6
|
+
class YfAsDataframe
|
7
|
+
module YfConnection
|
8
|
+
# Store original methods
|
9
|
+
alias_method :get_original, :get
|
10
|
+
alias_method :get_raw_json_original, :get_raw_json
|
11
|
+
|
12
|
+
# Override get method to use curl-impersonate by default
|
13
|
+
def get(url, headers=nil, params=nil)
|
14
|
+
# Debug output
|
15
|
+
puts "DEBUG: curl_impersonate_enabled = #{CurlImpersonateIntegration.curl_impersonate_enabled}"
|
16
|
+
puts "DEBUG: curl_impersonate_fallback = #{CurlImpersonateIntegration.curl_impersonate_fallback}"
|
17
|
+
|
18
|
+
# Try curl-impersonate first if enabled
|
19
|
+
if CurlImpersonateIntegration.curl_impersonate_enabled
|
20
|
+
puts "DEBUG: Trying curl-impersonate..."
|
21
|
+
begin
|
22
|
+
# Prepare headers and params as in original method
|
23
|
+
headers ||= {}
|
24
|
+
params ||= {}
|
25
|
+
params.merge!(crumb: @@crumb) unless @@crumb.nil?
|
26
|
+
cookie, crumb, strategy = _get_cookie_and_crumb()
|
27
|
+
crumbs = !crumb.nil? ? {'crumb' => crumb} : {}
|
28
|
+
|
29
|
+
# Prepare headers for curl-impersonate
|
30
|
+
curl_headers = headers.dup.merge(@@user_agent_headers)
|
31
|
+
|
32
|
+
# Add cookie if available
|
33
|
+
if cookie
|
34
|
+
cookie_hash = ::HTTParty::CookieHash.new
|
35
|
+
cookie_hash.add_cookies(cookie)
|
36
|
+
curl_headers['Cookie'] = cookie_hash.to_cookie_string
|
37
|
+
end
|
38
|
+
|
39
|
+
# Add crumb if available
|
40
|
+
curl_headers['crumb'] = crumb if crumb
|
41
|
+
|
42
|
+
# Make curl-impersonate request
|
43
|
+
response = CurlImpersonateIntegration.make_request(
|
44
|
+
url,
|
45
|
+
headers: curl_headers,
|
46
|
+
params: params.merge(crumbs),
|
47
|
+
timeout: CurlImpersonateIntegration.curl_impersonate_timeout
|
48
|
+
)
|
49
|
+
|
50
|
+
if response && response.success?
|
51
|
+
puts "DEBUG: curl-impersonate succeeded"
|
52
|
+
return response
|
53
|
+
else
|
54
|
+
puts "DEBUG: curl-impersonate returned nil or failed"
|
55
|
+
end
|
56
|
+
rescue => e
|
57
|
+
# Log error but continue to fallback
|
58
|
+
puts "DEBUG: curl-impersonate exception: #{e.message}"
|
59
|
+
warn "curl-impersonate request failed: #{e.message}" if $VERBOSE
|
60
|
+
end
|
61
|
+
else
|
62
|
+
puts "DEBUG: curl-impersonate is disabled, skipping to fallback"
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fallback to original HTTParty method
|
66
|
+
if CurlImpersonateIntegration.curl_impersonate_fallback
|
67
|
+
puts "DEBUG: Using HTTParty fallback"
|
68
|
+
get_original(url, headers, params)
|
69
|
+
else
|
70
|
+
puts "DEBUG: Fallback is disabled, but forcing fallback anyway"
|
71
|
+
get_original(url, headers, params)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# get_raw_json uses get, so it automatically gets curl-impersonate behavior
|
76
|
+
# No need to override it separately
|
77
|
+
|
78
|
+
# Class-level configuration methods
|
79
|
+
class << self
|
80
|
+
def enable_curl_impersonate(enabled: true)
|
81
|
+
CurlImpersonateIntegration.curl_impersonate_enabled = enabled
|
82
|
+
end
|
83
|
+
|
84
|
+
def enable_curl_impersonate_fallback(enabled: true)
|
85
|
+
CurlImpersonateIntegration.curl_impersonate_fallback = enabled
|
86
|
+
end
|
87
|
+
|
88
|
+
def set_curl_impersonate_timeout(timeout)
|
89
|
+
CurlImpersonateIntegration.curl_impersonate_timeout = timeout
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_available_curl_impersonate_executables
|
93
|
+
CurlImpersonateIntegration.available_executables
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
1
3
|
class YfAsDataframe
|
2
4
|
class YfinanceException < StandardError
|
3
5
|
attr_reader :msg
|
@@ -9,7 +11,7 @@ class YfAsDataframe
|
|
9
11
|
class YFNotImplementedError < NotImplementedError
|
10
12
|
def initialize(str)
|
11
13
|
@msg = "Have not implemented fetching \"#{str}\" from Yahoo API"
|
12
|
-
|
14
|
+
Logger.new(STDOUT).warn { @msg }
|
13
15
|
end
|
14
16
|
end
|
15
17
|
end
|
data/lib/yf_as_dataframe.rb
CHANGED
@@ -7,6 +7,8 @@ require_relative 'yf_as_dataframe/version'
|
|
7
7
|
require_relative 'yf_as_dataframe/utils'
|
8
8
|
require_relative 'yf_as_dataframe/yfinance_exception'
|
9
9
|
require_relative 'yf_as_dataframe/yf_connection'
|
10
|
+
require_relative 'yf_as_dataframe/curl_impersonate_integration'
|
11
|
+
require_relative 'yf_as_dataframe/yf_connection_minimal_patch'
|
10
12
|
require_relative 'yf_as_dataframe/price_technical'
|
11
13
|
require_relative 'yf_as_dataframe/price_history'
|
12
14
|
require_relative 'yf_as_dataframe/quote'
|