yf_as_dataframe 0.3.1 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +99 -0
- data/MINIMAL_INTEGRATION.md +227 -0
- data/README.md +58 -1
- data/lib/yf_as_dataframe/curl_impersonate_integration.rb +139 -0
- data/lib/yf_as_dataframe/financials.rb +3 -2
- data/lib/yf_as_dataframe/holders.rb +5 -2
- data/lib/yf_as_dataframe/multi.rb +9 -8
- data/lib/yf_as_dataframe/price_history.rb +51 -21
- data/lib/yf_as_dataframe/price_technical.rb +0 -1
- data/lib/yf_as_dataframe/quote.rb +4 -3
- data/lib/yf_as_dataframe/ticker.rb +12 -9
- data/lib/yf_as_dataframe/utils.rb +68 -21
- data/lib/yf_as_dataframe/version.rb +1 -1
- data/lib/yf_as_dataframe/yf_connection.rb +235 -48
- data/lib/yf_as_dataframe/yf_connection_minimal_patch.rb +129 -0
- data/lib/yf_as_dataframe/yfinance_exception.rb +3 -1
- data/lib/yf_as_dataframe.rb +2 -0
- data/smoke_test.rb +64 -0
- metadata +52 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef9b7d363312088694d19ce68cd538579f9b3f2b93f57cf5e111f3247cd87b00
|
4
|
+
data.tar.gz: 5bab2fa2aa65c4b6496025aa13f86c15d5df49deeeff2f6b60db07818d6fe1c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0019c0c2f4293f3c1b3684bff81f2e0a78eb614c505031aaa77c4ba541c88def5189ed8d96310dc6b7217f3651f37f7ea9dbb64c05009a2286d12856027368b2'
|
7
|
+
data.tar.gz: 39b3d46681bbc24409315878a8722e563266553e9b81592051404ef53df6aef397c0248ab1fc724ea6edfc5f6d647e2fb89de6bf675b975aeeb592ae7244f66c
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
yf_as_dataframe (0.4.0)
|
5
|
+
activesupport
|
6
|
+
httparty
|
7
|
+
nokogiri
|
8
|
+
polars-df (~> 0.12.0)
|
9
|
+
tulirb
|
10
|
+
tzinfo
|
11
|
+
tzinfo-data
|
12
|
+
zache
|
13
|
+
|
14
|
+
GEM
|
15
|
+
remote: https://rubygems.org/
|
16
|
+
specs:
|
17
|
+
activesupport (7.2.2.1)
|
18
|
+
base64
|
19
|
+
benchmark (>= 0.3)
|
20
|
+
bigdecimal
|
21
|
+
concurrent-ruby (~> 1.0, >= 1.3.1)
|
22
|
+
connection_pool (>= 2.2.5)
|
23
|
+
drb
|
24
|
+
i18n (>= 1.6, < 2)
|
25
|
+
logger (>= 1.4.2)
|
26
|
+
minitest (>= 5.1)
|
27
|
+
securerandom (>= 0.3)
|
28
|
+
tzinfo (~> 2.0, >= 2.0.5)
|
29
|
+
ast (2.4.3)
|
30
|
+
base64 (0.3.0)
|
31
|
+
benchmark (0.4.1)
|
32
|
+
bigdecimal (3.2.2)
|
33
|
+
concurrent-ruby (1.3.5)
|
34
|
+
connection_pool (2.5.3)
|
35
|
+
csv (3.3.5)
|
36
|
+
drb (2.2.3)
|
37
|
+
httparty (0.23.1)
|
38
|
+
csv
|
39
|
+
mini_mime (>= 1.0.0)
|
40
|
+
multi_xml (>= 0.5.2)
|
41
|
+
i18n (1.14.7)
|
42
|
+
concurrent-ruby (~> 1.0)
|
43
|
+
json (2.12.2)
|
44
|
+
language_server-protocol (3.17.0.5)
|
45
|
+
lint_roller (1.1.0)
|
46
|
+
logger (1.7.0)
|
47
|
+
mini_mime (1.1.5)
|
48
|
+
minitest (5.25.5)
|
49
|
+
multi_xml (0.7.1)
|
50
|
+
bigdecimal (~> 3.1)
|
51
|
+
nokogiri (1.18.8-arm64-darwin)
|
52
|
+
racc (~> 1.4)
|
53
|
+
parallel (1.27.0)
|
54
|
+
parser (3.3.8.0)
|
55
|
+
ast (~> 2.4.1)
|
56
|
+
racc
|
57
|
+
polars-df (0.12.0-arm64-darwin)
|
58
|
+
bigdecimal
|
59
|
+
prism (1.4.0)
|
60
|
+
racc (1.8.1)
|
61
|
+
rainbow (3.1.1)
|
62
|
+
rake (13.3.0)
|
63
|
+
regexp_parser (2.10.0)
|
64
|
+
rubocop (1.77.0)
|
65
|
+
json (~> 2.3)
|
66
|
+
language_server-protocol (~> 3.17.0.2)
|
67
|
+
lint_roller (~> 1.1.0)
|
68
|
+
parallel (~> 1.10)
|
69
|
+
parser (>= 3.3.0.2)
|
70
|
+
rainbow (>= 2.2.2, < 4.0)
|
71
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
72
|
+
rubocop-ast (>= 1.45.1, < 2.0)
|
73
|
+
ruby-progressbar (~> 1.7)
|
74
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
75
|
+
rubocop-ast (1.45.1)
|
76
|
+
parser (>= 3.3.7.2)
|
77
|
+
prism (~> 1.4)
|
78
|
+
ruby-progressbar (1.13.0)
|
79
|
+
securerandom (0.4.1)
|
80
|
+
tulirb (1.0.0)
|
81
|
+
tzinfo (2.0.6)
|
82
|
+
concurrent-ruby (~> 1.0)
|
83
|
+
tzinfo-data (1.2025.2)
|
84
|
+
tzinfo (>= 1.0.0)
|
85
|
+
unicode-display_width (3.1.4)
|
86
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
87
|
+
unicode-emoji (4.0.4)
|
88
|
+
zache (0.15.0)
|
89
|
+
|
90
|
+
PLATFORMS
|
91
|
+
arm64-darwin-23
|
92
|
+
|
93
|
+
DEPENDENCIES
|
94
|
+
rake (~> 13.0)
|
95
|
+
rubocop (~> 1.21)
|
96
|
+
yf_as_dataframe!
|
97
|
+
|
98
|
+
BUNDLED WITH
|
99
|
+
2.6.9
|
@@ -0,0 +1,227 @@
|
|
1
|
+
# Minimal Curl-Impersonate Integration
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
|
5
|
+
This is a minimal integration that makes curl-impersonate the **default behavior** for all Yahoo Finance requests. No changes to your existing code are required - curl-impersonate is used automatically to bypass TLS fingerprinting.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
### 1. Install curl-impersonate
|
10
|
+
|
11
|
+
```bash
|
12
|
+
# macOS
|
13
|
+
brew tap shakacode/brew
|
14
|
+
brew install curl-impersonate
|
15
|
+
|
16
|
+
# Verify installation
|
17
|
+
ls -la /usr/local/bin/curl_*
|
18
|
+
```
|
19
|
+
|
20
|
+
### 2. Custom Installation Directory (Optional)
|
21
|
+
|
22
|
+
If you have curl-impersonate installed in a different directory, you can set the `CURL_IMPERSONATE_DIR` environment variable:
|
23
|
+
|
24
|
+
```bash
|
25
|
+
# Set custom directory
|
26
|
+
export CURL_IMPERSONATE_DIR="/opt/curl-impersonate/bin"
|
27
|
+
|
28
|
+
# Or set it for a single command
|
29
|
+
CURL_IMPERSONATE_DIR="/opt/curl-impersonate/bin" ruby your_script.rb
|
30
|
+
```
|
31
|
+
|
32
|
+
The default directory is `/usr/local/bin` if the environment variable is not set.
|
33
|
+
|
34
|
+
### 3. Add Integration Files
|
35
|
+
|
36
|
+
Copy these two files to your project's `lib/yf_as_dataframe/` directory:
|
37
|
+
|
38
|
+
1. `lib/yf_as_dataframe/curl_impersonate_integration.rb`
|
39
|
+
2. `lib/yf_as_dataframe/yf_connection_minimal_patch.rb`
|
40
|
+
|
41
|
+
### 4. Enable Integration
|
42
|
+
|
43
|
+
Add this single line to your code **before** any Yahoo Finance requests:
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
require 'yf_as_dataframe/curl_impersonate_integration'
|
47
|
+
require 'yf_as_dataframe/yf_connection_minimal_patch'
|
48
|
+
```
|
49
|
+
|
50
|
+
## Usage
|
51
|
+
|
52
|
+
### Default Behavior (Recommended)
|
53
|
+
|
54
|
+
Your existing code works exactly as before, but now uses curl-impersonate automatically:
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
require 'yf_as_dataframe'
|
58
|
+
require 'yf_as_dataframe/curl_impersonate_integration'
|
59
|
+
require 'yf_as_dataframe/yf_connection_minimal_patch'
|
60
|
+
|
61
|
+
# Your existing code - no changes needed!
|
62
|
+
msft = YfAsDataframe::Ticker.new("MSFT")
|
63
|
+
hist = msft.history(period: "1mo") # Uses curl-impersonate automatically
|
64
|
+
puts "Retrieved #{hist.length} data points"
|
65
|
+
```
|
66
|
+
|
67
|
+
### Configuration (Optional)
|
68
|
+
|
69
|
+
You can configure the behavior if needed:
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
# Disable curl-impersonate (use HTTParty only)
|
73
|
+
YfAsDataframe::YfConnection.enable_curl_impersonate(false)
|
74
|
+
|
75
|
+
# Disable fallback (fail if curl-impersonate fails)
|
76
|
+
YfAsDataframe::YfConnection.enable_curl_impersonate_fallback(false)
|
77
|
+
|
78
|
+
# Set timeout
|
79
|
+
YfAsDataframe::YfConnection.set_curl_impersonate_timeout(45)
|
80
|
+
|
81
|
+
# Check available executables
|
82
|
+
executables = YfAsDataframe::YfConnection.get_available_curl_impersonate_executables
|
83
|
+
puts "Available: #{executables.length} executables"
|
84
|
+
|
85
|
+
# Check which directory is being used
|
86
|
+
puts "Using directory: #{YfAsDataframe::CurlImpersonateIntegration.executable_directory}"
|
87
|
+
```
|
88
|
+
|
89
|
+
## How It Works
|
90
|
+
|
91
|
+
1. **Automatic Detection**: Dynamically finds curl-impersonate executables in the configured directory
|
92
|
+
2. **Default Behavior**: Uses curl-impersonate for all requests by default
|
93
|
+
3. **Seamless Fallback**: Falls back to HTTParty if curl-impersonate fails
|
94
|
+
4. **Zero Interface Changes**: All existing method signatures remain the same
|
95
|
+
|
96
|
+
## Key Features
|
97
|
+
|
98
|
+
### ✅ **Zero Code Changes**
|
99
|
+
- Your existing code works exactly as before
|
100
|
+
- No new method names to learn
|
101
|
+
- No changes to method signatures
|
102
|
+
|
103
|
+
### ✅ **Automatic Browser Rotation**
|
104
|
+
- Randomly selects from available curl-impersonate executables
|
105
|
+
- Supports Chrome, Firefox, Edge, and Safari configurations
|
106
|
+
- Automatically adapts to new browser versions
|
107
|
+
|
108
|
+
### ✅ **Robust Fallback**
|
109
|
+
- Falls back to HTTParty if curl-impersonate fails
|
110
|
+
- Configurable fallback behavior
|
111
|
+
- Maintains compatibility with existing code
|
112
|
+
|
113
|
+
### ✅ **Dynamic Discovery**
|
114
|
+
- Automatically finds curl-impersonate executables
|
115
|
+
- Configurable directory via environment variable
|
116
|
+
- Works with any curl-impersonate installation
|
117
|
+
|
118
|
+
### ✅ **Environment Variable Support**
|
119
|
+
- Set `CURL_IMPERSONATE_DIR` to customize installation directory
|
120
|
+
- Defaults to `/usr/local/bin` if not set
|
121
|
+
- Supports both persistent and per-command configuration
|
122
|
+
|
123
|
+
## Example
|
124
|
+
|
125
|
+
```ruby
|
126
|
+
require 'yf_as_dataframe'
|
127
|
+
require 'yf_as_dataframe/curl_impersonate_integration'
|
128
|
+
require 'yf_as_dataframe/yf_connection_minimal_patch'
|
129
|
+
|
130
|
+
# Check what's available
|
131
|
+
executables = YfAsDataframe::YfConnection.get_available_curl_impersonate_executables
|
132
|
+
puts "Found #{executables.length} curl-impersonate executables"
|
133
|
+
|
134
|
+
# Check which directory is being used
|
135
|
+
puts "Using directory: #{YfAsDataframe::CurlImpersonateIntegration.executable_directory}"
|
136
|
+
|
137
|
+
# Use as normal - curl-impersonate is used automatically
|
138
|
+
msft = YfAsDataframe::Ticker.new("MSFT")
|
139
|
+
|
140
|
+
begin
|
141
|
+
# These all use curl-impersonate automatically
|
142
|
+
hist = msft.history(period: "1mo")
|
143
|
+
info = msft.info
|
144
|
+
actions = msft.actions
|
145
|
+
|
146
|
+
puts "✅ All requests successful using curl-impersonate"
|
147
|
+
puts "History: #{hist.length} data points"
|
148
|
+
puts "Company: #{info['longName']}"
|
149
|
+
puts "Actions: #{actions.length} items"
|
150
|
+
rescue => e
|
151
|
+
puts "❌ Error: #{e.message}"
|
152
|
+
end
|
153
|
+
```
|
154
|
+
|
155
|
+
## Troubleshooting
|
156
|
+
|
157
|
+
### "No curl-impersonate executables found"
|
158
|
+
```bash
|
159
|
+
# Check if executables exist in default location
|
160
|
+
ls -la /usr/local/bin/curl_*
|
161
|
+
|
162
|
+
# Check if executables exist in custom location
|
163
|
+
ls -la $CURL_IMPERSONATE_DIR/curl_*
|
164
|
+
|
165
|
+
# If not found, reinstall curl-impersonate
|
166
|
+
brew reinstall curl-impersonate
|
167
|
+
```
|
168
|
+
|
169
|
+
### Permission errors
|
170
|
+
```bash
|
171
|
+
sudo chmod +x /usr/local/bin/curl_*
|
172
|
+
# or
|
173
|
+
sudo chmod +x $CURL_IMPERSONATE_DIR/curl_*
|
174
|
+
```
|
175
|
+
|
176
|
+
### Still getting blocked
|
177
|
+
```ruby
|
178
|
+
# Try disabling fallback to see if curl-impersonate is working
|
179
|
+
YfAsDataframe::YfConnection.enable_curl_impersonate_fallback(false)
|
180
|
+
|
181
|
+
# Check available executables
|
182
|
+
executables = YfAsDataframe::YfConnection.get_available_curl_impersonate_executables
|
183
|
+
puts executables.map { |e| "#{e[:browser]} #{e[:executable]}" }
|
184
|
+
|
185
|
+
# Check which directory is being used
|
186
|
+
puts "Directory: #{YfAsDataframe::CurlImpersonateIntegration.executable_directory}"
|
187
|
+
```
|
188
|
+
|
189
|
+
## Configuration Options
|
190
|
+
|
191
|
+
| Option | Default | Description |
|
192
|
+
|--------|---------|-------------|
|
193
|
+
| `curl_impersonate_enabled` | `true` | Use curl-impersonate for requests |
|
194
|
+
| `curl_impersonate_fallback` | `true` | Fall back to HTTParty if curl-impersonate fails |
|
195
|
+
| `curl_impersonate_timeout` | `30` | Timeout in seconds for curl-impersonate requests |
|
196
|
+
| `CURL_IMPERSONATE_DIR` | `/usr/local/bin` | Directory containing curl-impersonate executables |
|
197
|
+
|
198
|
+
## Benefits
|
199
|
+
|
200
|
+
1. **Immediate Solution**: Bypasses TLS fingerprinting immediately
|
201
|
+
2. **Zero Learning Curve**: No new APIs or methods to learn
|
202
|
+
3. **Future-Proof**: Automatically adapts to new curl-impersonate versions
|
203
|
+
4. **Robust**: Multiple fallback strategies ensure reliability
|
204
|
+
5. **Minimal**: Only two small files to add
|
205
|
+
6. **Flexible**: Configurable installation directory via environment variable
|
206
|
+
|
207
|
+
## Comparison with Previous Approach
|
208
|
+
|
209
|
+
| Aspect | Previous Approach | Minimal Approach |
|
210
|
+
|--------|------------------|------------------|
|
211
|
+
| Interface Changes | New method names | No changes |
|
212
|
+
| Learning Curve | High | Zero |
|
213
|
+
| Integration | Complex | Simple |
|
214
|
+
| Default Behavior | HTTParty | curl-impersonate |
|
215
|
+
| Configuration | Required | Optional |
|
216
|
+
| Files to Add | 3 files | 2 files |
|
217
|
+
| Directory Config | Hardcoded | Environment variable |
|
218
|
+
|
219
|
+
## Next Steps
|
220
|
+
|
221
|
+
1. **Install curl-impersonate** following the instructions above
|
222
|
+
2. **Set CURL_IMPERSONATE_DIR** if using a custom installation directory
|
223
|
+
3. **Add the two integration files** to your project
|
224
|
+
4. **Add the require statements** to your code
|
225
|
+
5. **Test with your existing code** - it should work immediately
|
226
|
+
|
227
|
+
That's it! Your existing Yahoo Finance scraping code will now automatically use curl-impersonate to bypass TLS fingerprinting.
|
data/README.md
CHANGED
@@ -12,7 +12,7 @@
|
|
12
12
|
Yahoo, Inc.**
|
13
13
|
|
14
14
|
yf_as_dataframe is **not** affiliated, endorsed, or vetted by Yahoo, Inc. It is
|
15
|
-
an open-source tool that uses Yahoo's publicly available APIs, and is
|
15
|
+
an open-source tool that uses Yahoo's publicly available APIs, and is **only**
|
16
16
|
intended for research and educational purposes.
|
17
17
|
|
18
18
|
**You should refer to Yahoo!'s terms of use**
|
@@ -197,6 +197,63 @@ YfAsDataframe.zlema(df, column: 'Adj Close', window: 5)
|
|
197
197
|
|
198
198
|
---
|
199
199
|
|
200
|
+
## TLS Fingerprinting Protection
|
201
|
+
|
202
|
+
**New in v0.4.0**: This gem now includes built-in support for [curl-impersonate](https://github.com/lwthiker/curl-impersonate). The curl-impersonate integration is **enabled by default** in v0.4.0+. Existing code will automatically use curl-impersonate to bypass TLS fingerprinting:
|
203
|
+
|
204
|
+
### Installation Requirements
|
205
|
+
|
206
|
+
To use the TLS fingerprinting protection, you need to install curl-impersonate:
|
207
|
+
|
208
|
+
```bash
|
209
|
+
# macOS
|
210
|
+
brew tap shakacode/brew
|
211
|
+
brew install curl-impersonate
|
212
|
+
|
213
|
+
# Verify installation
|
214
|
+
ls -la /usr/local/bin/curl_*
|
215
|
+
```
|
216
|
+
|
217
|
+
### Custom Installation Directory
|
218
|
+
|
219
|
+
The codebase will look for the location of the curl-impersonate binaries per the `CURL_IMPERSONATE_DIR` environment variable;
|
220
|
+
if it is not assigned, the default location of the binaries is `/usr/local/bin`.
|
221
|
+
The code will randomly select one of the binaries (expected to be named "curl_chrome*", "curl_ff*", "curl_edge*", etc.) for its communications with the servers.
|
222
|
+
|
223
|
+
```bash
|
224
|
+
# Set custom directory
|
225
|
+
export CURL_IMPERSONATE_DIR="/opt/curl-impersonate/bin"
|
226
|
+
|
227
|
+
# Or set it for a single command
|
228
|
+
CURL_IMPERSONATE_DIR="/opt/curl-impersonate/bin" ruby your_script.rb
|
229
|
+
```
|
230
|
+
|
231
|
+
### Configuration (Optional)
|
232
|
+
|
233
|
+
You can configure the curl-impersonate behavior if needed:
|
234
|
+
|
235
|
+
```ruby
|
236
|
+
# Disable curl-impersonate (use HTTParty only)
|
237
|
+
YfAsDataframe::YfConnection.enable_curl_impersonate(false)
|
238
|
+
|
239
|
+
# Disable fallback (fail if curl-impersonate fails)
|
240
|
+
YfAsDataframe::YfConnection.enable_curl_impersonate_fallback(false)
|
241
|
+
|
242
|
+
# Set timeout
|
243
|
+
YfAsDataframe::YfConnection.set_curl_impersonate_timeout(45)
|
244
|
+
|
245
|
+
# Check available executables
|
246
|
+
executables = YfAsDataframe::YfConnection.get_available_curl_impersonate_executables
|
247
|
+
puts "Available: #{executables.length} executables"
|
248
|
+
|
249
|
+
# Check which directory is being used
|
250
|
+
puts "Using directory: #{YfAsDataframe::CurlImpersonateIntegration.executable_directory}"
|
251
|
+
```
|
252
|
+
|
253
|
+
For more detailed information, see [MINIMAL_INTEGRATION.md](MINIMAL_INTEGRATION.md).
|
254
|
+
|
255
|
+
---
|
256
|
+
|
200
257
|
## Graphing
|
201
258
|
|
202
259
|
To graph any of the series using [Vega](https://github.com/ankane/vega-ruby), per the information [here](https://github.com/ankane/vega-ruby#exporting-charts-experimental), you will need to run
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'json'
|
3
|
+
require 'ostruct'
|
4
|
+
require 'timeout'
|
5
|
+
|
6
|
+
class YfAsDataframe
|
7
|
+
module CurlImpersonateIntegration
|
8
|
+
# Configuration
|
9
|
+
@curl_impersonate_enabled = true
|
10
|
+
@curl_impersonate_fallback = true
|
11
|
+
@curl_impersonate_timeout = 30 # Increased from 5 to 30 seconds
|
12
|
+
@curl_impersonate_connect_timeout = 10 # New: connection timeout
|
13
|
+
@curl_impersonate_retries = 2
|
14
|
+
@curl_impersonate_retry_delay = 1
|
15
|
+
@curl_impersonate_process_timeout = 60 # New: process timeout protection
|
16
|
+
|
17
|
+
class << self
|
18
|
+
attr_accessor :curl_impersonate_enabled, :curl_impersonate_fallback,
|
19
|
+
:curl_impersonate_timeout, :curl_impersonate_connect_timeout,
|
20
|
+
:curl_impersonate_retries, :curl_impersonate_retry_delay,
|
21
|
+
:curl_impersonate_process_timeout
|
22
|
+
end
|
23
|
+
|
24
|
+
# Get the curl-impersonate executable directory from environment variable or default
|
25
|
+
def self.executable_directory
|
26
|
+
ENV['CURL_IMPERSONATE_DIR'] || '/usr/local/bin'
|
27
|
+
end
|
28
|
+
|
29
|
+
# Find available curl-impersonate executables
|
30
|
+
def self.available_executables
|
31
|
+
@available_executables ||= begin
|
32
|
+
executables = []
|
33
|
+
Dir.glob(File.join(executable_directory, "curl_*")).each do |path|
|
34
|
+
executable = File.basename(path)
|
35
|
+
if executable.start_with?('curl_')
|
36
|
+
browser_type = case executable
|
37
|
+
when /^curl_chrome/ then :chrome
|
38
|
+
when /^curl_ff/ then :firefox
|
39
|
+
when /^curl_edge/ then :edge
|
40
|
+
when /^curl_safari/ then :safari
|
41
|
+
else :unknown
|
42
|
+
end
|
43
|
+
executables << { path: path, executable: executable, browser: browser_type }
|
44
|
+
end
|
45
|
+
end
|
46
|
+
executables
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Get a random executable
|
51
|
+
def self.get_random_executable
|
52
|
+
available = available_executables
|
53
|
+
return nil if available.empty?
|
54
|
+
available.sample
|
55
|
+
end
|
56
|
+
|
57
|
+
# Make a curl-impersonate request with improved timeout handling
|
58
|
+
def self.make_request(url, headers: {}, params: {}, timeout: nil, retries: nil)
|
59
|
+
executable_info = get_random_executable
|
60
|
+
return nil unless executable_info
|
61
|
+
|
62
|
+
timeout ||= @curl_impersonate_timeout
|
63
|
+
retries ||= @curl_impersonate_retries
|
64
|
+
|
65
|
+
cmd = [
|
66
|
+
executable_info[:path],
|
67
|
+
"--max-time", timeout.to_s,
|
68
|
+
"--connect-timeout", @curl_impersonate_connect_timeout.to_s,
|
69
|
+
"--retry", retries.to_s,
|
70
|
+
"--retry-delay", @curl_impersonate_retry_delay.to_s,
|
71
|
+
"--retry-max-time", (timeout * 2).to_s,
|
72
|
+
"--fail",
|
73
|
+
"--silent",
|
74
|
+
"--show-error"
|
75
|
+
]
|
76
|
+
headers.each { |key, value| cmd.concat(["-H", "#{key}: #{value}"]) }
|
77
|
+
unless params.empty?
|
78
|
+
query_string = params.map { |k, v| "#{k}=#{v}" }.join('&')
|
79
|
+
separator = url.include?('?') ? '&' : '?'
|
80
|
+
url = "#{url}#{separator}#{query_string}"
|
81
|
+
end
|
82
|
+
cmd << url
|
83
|
+
|
84
|
+
puts "DEBUG: curl-impersonate command: #{cmd.join(' ')}"
|
85
|
+
puts "DEBUG: curl-impersonate timeout: #{timeout} seconds"
|
86
|
+
|
87
|
+
begin
|
88
|
+
stdout_str = ''
|
89
|
+
stderr_str = ''
|
90
|
+
status = nil
|
91
|
+
Open3.popen3(*cmd) do |stdin, stdout, stderr, wait_thr|
|
92
|
+
stdin.close
|
93
|
+
pid = wait_thr.pid
|
94
|
+
done = false
|
95
|
+
monitor = Thread.new do
|
96
|
+
sleep(timeout + 10)
|
97
|
+
unless done
|
98
|
+
puts "DEBUG: Killing curl-impersonate PID \\#{pid} after timeout"
|
99
|
+
Process.kill('TERM', pid) rescue nil
|
100
|
+
sleep(1)
|
101
|
+
Process.kill('KILL', pid) rescue nil if wait_thr.alive?
|
102
|
+
end
|
103
|
+
end
|
104
|
+
stdout_str = stdout.read
|
105
|
+
stderr_str = stderr.read
|
106
|
+
status = wait_thr.value
|
107
|
+
done = true
|
108
|
+
monitor.kill
|
109
|
+
end
|
110
|
+
puts "DEBUG: curl-impersonate stdout: #{stdout_str[0..200]}..." if stdout_str && !stdout_str.empty?
|
111
|
+
puts "DEBUG: curl-impersonate stderr: #{stderr_str}" if stderr_str && !stderr_str.empty?
|
112
|
+
puts "DEBUG: curl-impersonate status: #{status.exitstatus}"
|
113
|
+
if status.success?
|
114
|
+
response = OpenStruct.new
|
115
|
+
response.body = stdout_str
|
116
|
+
response.code = 200
|
117
|
+
response.define_singleton_method(:success?) { true }
|
118
|
+
response.parsed_response = parse_json_if_possible(stdout_str)
|
119
|
+
response
|
120
|
+
else
|
121
|
+
error_message = "curl failed with code \\#{status.exitstatus}: \\#{stderr_str}"
|
122
|
+
puts "DEBUG: curl-impersonate failed with error: \\#{error_message}"
|
123
|
+
nil
|
124
|
+
end
|
125
|
+
rescue => e
|
126
|
+
puts "DEBUG: curl-impersonate exception: \\#{e.message}"
|
127
|
+
nil
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
private
|
132
|
+
|
133
|
+
def self.parse_json_if_possible(response_body)
|
134
|
+
JSON.parse(response_body)
|
135
|
+
rescue JSON::ParserError
|
136
|
+
response_body
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'polars-df'
|
2
|
+
require 'logger'
|
2
3
|
|
3
4
|
class YfAsDataframe
|
4
5
|
module Financials
|
@@ -111,7 +112,7 @@ class YfAsDataframe
|
|
111
112
|
ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/#{symbol}?symbol=#{symbol}"
|
112
113
|
url = ts_url_base + "&type=" + ts_keys.map { |k| "#{timescale}#{k}" }.join(",")
|
113
114
|
start_dt = DateTime.new(2016, 12, 31)
|
114
|
-
end_dt =
|
115
|
+
end_dt = Time.now.tomorrow.midnight
|
115
116
|
url += "&period1=#{start_dt.to_i}&period2=#{end_dt.to_i}"
|
116
117
|
|
117
118
|
json_str = get(url).parsed_response
|
@@ -160,7 +161,7 @@ class YfAsDataframe
|
|
160
161
|
statement = _create_financials_table(nam, timescale)
|
161
162
|
return statement unless statement.nil?
|
162
163
|
rescue Yfin::YfinDataException => e
|
163
|
-
|
164
|
+
Logger.new(STDOUT).error {"#{@symbol}: Failed to create #{nam} financials table for reason: #{e}"}
|
164
165
|
end
|
165
166
|
Polars::DataFrame.new()
|
166
167
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'open-uri'
|
3
|
+
|
1
4
|
class YfAsDataframe
|
2
5
|
module Holders
|
3
6
|
extend ActiveSupport::Concern
|
@@ -97,7 +100,7 @@ class YfAsDataframe
|
|
97
100
|
result = get_raw_json(QUOTE_SUMMARY_URL + "/#{symbol}", user_agent_headers=user_agent_headers, params=params_dict)
|
98
101
|
# Rails.logger.info { "#{__FILE__}:#{__LINE__} result = #{result.inspect}" }
|
99
102
|
rescue Exception => e
|
100
|
-
|
103
|
+
Logger.new(STDOUT).error("ERROR: #{e.message}")
|
101
104
|
return nil
|
102
105
|
end
|
103
106
|
return result
|
@@ -133,7 +136,7 @@ class YfAsDataframe
|
|
133
136
|
|
134
137
|
def _parse_result(result)
|
135
138
|
data = result.parsed_response['quoteSummary']['result'].first #.dig('quoteSummary', 'result', 0)
|
136
|
-
|
139
|
+
Logger.new(STDOUT).info { "#{__FILE__}:#{__LINE__} data = #{data.inspect}" }
|
137
140
|
_parse_institution_ownership(data['institutionOwnership'])
|
138
141
|
_parse_fund_ownership(data['fundOwnership'])
|
139
142
|
_parse_major_holders_breakdown(data['majorHoldersBreakdown'])
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'polars-df'
|
2
|
+
require 'logger'
|
2
3
|
|
3
4
|
class YfAsDataframe
|
4
5
|
class Multi
|
@@ -59,7 +60,7 @@ class YfAsDataframe
|
|
59
60
|
# session: None or Session
|
60
61
|
# Optional. Pass your own session object to be used for all requests
|
61
62
|
# """
|
62
|
-
logger =
|
63
|
+
logger = Logger.new(STDOUT)
|
63
64
|
|
64
65
|
if show_errors
|
65
66
|
YfAsDataframe::Utils.print_once("yfinance: download(show_errors=#{show_errors}) argument is deprecated and will be removed in future version. Do this instead: logging.getLogger('yfinance').setLevel(logging.ERROR)")
|
@@ -118,7 +119,7 @@ class YfAsDataframe
|
|
118
119
|
@shared::_PROGRESS_BAR.completed if progress
|
119
120
|
|
120
121
|
unless @shared::_ERRORS.empty?
|
121
|
-
logger.error("\n#{@shared::_ERRORS.length} Failed download#{@shared::_ERRORS.length > 1 ? 's' : ''}:")
|
122
|
+
# logger.error("\n#{@shared::_ERRORS.length} Failed download#{@shared::_ERRORS.length > 1 ? 's' : ''}:")
|
122
123
|
|
123
124
|
errors = {}
|
124
125
|
@shared::_ERRORS.each do |ticker, err|
|
@@ -126,9 +127,9 @@ class YfAsDataframe
|
|
126
127
|
errors[err] ||= []
|
127
128
|
errors[err] << ticker
|
128
129
|
end
|
129
|
-
errors.each do |err, tickers|
|
130
|
-
|
131
|
-
end
|
130
|
+
# errors.each do |err, tickers|
|
131
|
+
# logger.error("#{tickers.join(', ')}: #{err}")
|
132
|
+
# end
|
132
133
|
|
133
134
|
tbs = {}
|
134
135
|
@shared::_TRACEBACKS.each do |ticker, tb|
|
@@ -136,9 +137,9 @@ class YfAsDataframe
|
|
136
137
|
tbs[tb] ||= []
|
137
138
|
tbs[tb] << ticker
|
138
139
|
end
|
139
|
-
tbs.each do |tb, tickers|
|
140
|
-
|
141
|
-
end
|
140
|
+
# tbs.each do |tb, tickers|
|
141
|
+
# logger.debug("#{tickers.join(', ')}: #{tb}")
|
142
|
+
# end
|
142
143
|
end
|
143
144
|
|
144
145
|
if ignore_tz
|