sqa 0.0.24 → 0.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.goose/memory/development.txt +3 -0
- data/.semver +6 -0
- data/ARCHITECTURE.md +648 -0
- data/CHANGELOG.md +82 -0
- data/CLAUDE.md +653 -0
- data/COMMITS.md +196 -0
- data/DATAFRAME_ARCHITECTURE_REVIEW.md +421 -0
- data/NEXT-STEPS.md +154 -0
- data/README.md +812 -262
- data/TASKS.md +358 -0
- data/TEST_RESULTS.md +140 -0
- data/TODO.md +42 -0
- data/_notes.txt +25 -0
- data/bin/sqa-console +11 -0
- data/data/talk_talk.json +103284 -0
- data/develop_summary.md +313 -0
- data/docs/advanced/backtesting.md +206 -0
- data/docs/advanced/ensemble.md +68 -0
- data/docs/advanced/fpop.md +153 -0
- data/docs/advanced/index.md +112 -0
- data/docs/advanced/multi-timeframe.md +67 -0
- data/docs/advanced/pattern-matcher.md +75 -0
- data/docs/advanced/portfolio-optimizer.md +79 -0
- data/docs/advanced/portfolio.md +166 -0
- data/docs/advanced/risk-management.md +210 -0
- data/docs/advanced/strategy-generator.md +158 -0
- data/docs/advanced/streaming.md +209 -0
- data/docs/ai_and_ml.md +80 -0
- data/docs/api/dataframe.md +1115 -0
- data/docs/api/index.md +126 -0
- data/docs/assets/css/custom.css +88 -0
- data/docs/assets/js/mathjax.js +18 -0
- data/docs/concepts/index.md +68 -0
- data/docs/contributing/index.md +60 -0
- data/docs/data-sources/index.md +66 -0
- data/docs/data_frame.md +317 -97
- data/docs/factors_that_impact_price.md +26 -0
- data/docs/finviz.md +11 -0
- data/docs/fx_pro_bit.md +25 -0
- data/docs/genetic_programming.md +104 -0
- data/docs/getting-started/index.md +123 -0
- data/docs/getting-started/installation.md +229 -0
- data/docs/getting-started/quick-start.md +244 -0
- data/docs/i_gotta_an_idea.md +22 -0
- data/docs/index.md +163 -0
- data/docs/indicators/index.md +97 -0
- data/docs/indicators.md +110 -24
- data/docs/options.md +8 -0
- data/docs/strategies/bollinger-bands.md +146 -0
- data/docs/strategies/consensus.md +64 -0
- data/docs/strategies/custom.md +310 -0
- data/docs/strategies/ema.md +53 -0
- data/docs/strategies/index.md +92 -0
- data/docs/strategies/kbs.md +164 -0
- data/docs/strategies/macd.md +96 -0
- data/docs/strategies/market-profile.md +54 -0
- data/docs/strategies/mean-reversion.md +58 -0
- data/docs/strategies/rsi.md +95 -0
- data/docs/strategies/sma.md +55 -0
- data/docs/strategies/stochastic.md +63 -0
- data/docs/strategies/volume-breakout.md +54 -0
- data/docs/tags.md +7 -0
- data/docs/true_strength_index.md +46 -0
- data/docs/weighted_moving_average.md +48 -0
- data/examples/README.md +354 -0
- data/examples/advanced_features_example.rb +350 -0
- data/examples/fpop_analysis_example.rb +191 -0
- data/examples/genetic_programming_example.rb +148 -0
- data/examples/kbs_strategy_example.rb +208 -0
- data/examples/pattern_context_example.rb +300 -0
- data/examples/rails_app/Gemfile +34 -0
- data/examples/rails_app/README.md +416 -0
- data/examples/rails_app/app/assets/javascripts/application.js +107 -0
- data/examples/rails_app/app/assets/stylesheets/application.css +659 -0
- data/examples/rails_app/app/controllers/analysis_controller.rb +11 -0
- data/examples/rails_app/app/controllers/api/v1/stocks_controller.rb +227 -0
- data/examples/rails_app/app/controllers/application_controller.rb +22 -0
- data/examples/rails_app/app/controllers/backtest_controller.rb +11 -0
- data/examples/rails_app/app/controllers/dashboard_controller.rb +21 -0
- data/examples/rails_app/app/controllers/portfolio_controller.rb +7 -0
- data/examples/rails_app/app/views/analysis/show.html.erb +209 -0
- data/examples/rails_app/app/views/backtest/show.html.erb +171 -0
- data/examples/rails_app/app/views/dashboard/index.html.erb +118 -0
- data/examples/rails_app/app/views/dashboard/show.html.erb +408 -0
- data/examples/rails_app/app/views/errors/show.html.erb +17 -0
- data/examples/rails_app/app/views/layouts/application.html.erb +60 -0
- data/examples/rails_app/app/views/portfolio/index.html.erb +33 -0
- data/examples/rails_app/bin/rails +6 -0
- data/examples/rails_app/config/application.rb +45 -0
- data/examples/rails_app/config/boot.rb +5 -0
- data/examples/rails_app/config/database.yml +18 -0
- data/examples/rails_app/config/environment.rb +11 -0
- data/examples/rails_app/config/routes.rb +26 -0
- data/examples/rails_app/config.ru +8 -0
- data/examples/realtime_stream_example.rb +274 -0
- data/examples/sinatra_app/Gemfile +22 -0
- data/examples/sinatra_app/QUICKSTART.md +159 -0
- data/examples/sinatra_app/README.md +461 -0
- data/examples/sinatra_app/app.rb +344 -0
- data/examples/sinatra_app/config.ru +5 -0
- data/examples/sinatra_app/public/css/style.css +659 -0
- data/examples/sinatra_app/public/js/app.js +107 -0
- data/examples/sinatra_app/views/analyze.erb +306 -0
- data/examples/sinatra_app/views/backtest.erb +325 -0
- data/examples/sinatra_app/views/dashboard.erb +419 -0
- data/examples/sinatra_app/views/error.erb +58 -0
- data/examples/sinatra_app/views/index.erb +118 -0
- data/examples/sinatra_app/views/layout.erb +61 -0
- data/examples/sinatra_app/views/portfolio.erb +43 -0
- data/examples/strategy_generator_example.rb +346 -0
- data/hsa_portfolio.csv +11 -0
- data/justfile +0 -0
- data/lib/api/alpha_vantage_api.rb +462 -0
- data/lib/sqa/backtest.rb +329 -0
- data/lib/sqa/data_frame/alpha_vantage.rb +43 -65
- data/lib/sqa/data_frame/data.rb +92 -0
- data/lib/sqa/data_frame/yahoo_finance.rb +35 -43
- data/lib/sqa/data_frame.rb +148 -243
- data/lib/sqa/ensemble.rb +359 -0
- data/lib/sqa/fpop.rb +199 -0
- data/lib/sqa/gp.rb +259 -0
- data/lib/sqa/indicator.rb +5 -8
- data/lib/sqa/init.rb +15 -8
- data/lib/sqa/market_regime.rb +240 -0
- data/lib/sqa/multi_timeframe.rb +379 -0
- data/lib/sqa/pattern_matcher.rb +497 -0
- data/lib/sqa/portfolio.rb +260 -6
- data/lib/sqa/portfolio_optimizer.rb +377 -0
- data/lib/sqa/risk_manager.rb +442 -0
- data/lib/sqa/seasonal_analyzer.rb +209 -0
- data/lib/sqa/sector_analyzer.rb +300 -0
- data/lib/sqa/stock.rb +67 -125
- data/lib/sqa/strategy/bollinger_bands.rb +42 -0
- data/lib/sqa/strategy/consensus.rb +5 -2
- data/lib/sqa/strategy/kbs_strategy.rb +470 -0
- data/lib/sqa/strategy/macd.rb +46 -0
- data/lib/sqa/strategy/mp.rb +1 -1
- data/lib/sqa/strategy/stochastic.rb +60 -0
- data/lib/sqa/strategy/volume_breakout.rb +57 -0
- data/lib/sqa/strategy.rb +5 -0
- data/lib/sqa/strategy_generator.rb +947 -0
- data/lib/sqa/stream.rb +361 -0
- data/lib/sqa/version.rb +1 -7
- data/lib/sqa.rb +23 -16
- data/main.just +81 -0
- data/mkdocs.yml +288 -0
- data/trace.log +0 -0
- metadata +261 -51
- data/bin/sqa +0 -6
- data/lib/patches/dry-cli.rb +0 -228
- data/lib/sqa/activity.rb +0 -10
- data/lib/sqa/cli.rb +0 -62
- data/lib/sqa/commands/analysis.rb +0 -309
- data/lib/sqa/commands/base.rb +0 -139
- data/lib/sqa/commands/web.rb +0 -199
- data/lib/sqa/commands.rb +0 -22
- data/lib/sqa/constants.rb +0 -23
- data/lib/sqa/indicator/average_true_range.rb +0 -33
- data/lib/sqa/indicator/bollinger_bands.rb +0 -28
- data/lib/sqa/indicator/candlestick_pattern_recognizer.rb +0 -60
- data/lib/sqa/indicator/donchian_channel.rb +0 -29
- data/lib/sqa/indicator/double_top_bottom_pattern.rb +0 -34
- data/lib/sqa/indicator/elliott_wave_theory.rb +0 -57
- data/lib/sqa/indicator/exponential_moving_average.rb +0 -25
- data/lib/sqa/indicator/exponential_moving_average_trend.rb +0 -36
- data/lib/sqa/indicator/fibonacci_retracement.rb +0 -23
- data/lib/sqa/indicator/head_and_shoulders_pattern.rb +0 -26
- data/lib/sqa/indicator/market_profile.rb +0 -32
- data/lib/sqa/indicator/mean_reversion.rb +0 -37
- data/lib/sqa/indicator/momentum.rb +0 -28
- data/lib/sqa/indicator/moving_average_convergence_divergence.rb +0 -29
- data/lib/sqa/indicator/peaks_and_valleys.rb +0 -29
- data/lib/sqa/indicator/predict_next_value.rb +0 -202
- data/lib/sqa/indicator/relative_strength_index.rb +0 -47
- data/lib/sqa/indicator/simple_moving_average.rb +0 -24
- data/lib/sqa/indicator/simple_moving_average_trend.rb +0 -32
- data/lib/sqa/indicator/stochastic_oscillator.rb +0 -68
- data/lib/sqa/indicator/true_range.rb +0 -39
- data/lib/sqa/trade.rb +0 -26
data/lib/sqa/data_frame.rb
CHANGED
|
@@ -2,204 +2,148 @@
|
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
4
|
require 'forwardable'
|
|
5
|
+
require 'csv'
|
|
6
|
+
require 'polars'
|
|
5
7
|
|
|
8
|
+
|
|
9
|
+
require_relative 'data_frame/data'
|
|
6
10
|
require_relative 'data_frame/yahoo_finance'
|
|
7
11
|
require_relative 'data_frame/alpha_vantage'
|
|
8
12
|
|
|
9
13
|
class SQA::DataFrame
|
|
10
|
-
class Data < Hashie::Mash
|
|
11
|
-
# SNELL: Are all of these needed?
|
|
12
|
-
include Hashie::Extensions::Mash::KeepOriginalKeys
|
|
13
|
-
# include Hashie::Extensions::Mash::PermissiveRespondTo
|
|
14
|
-
include Hashie::Extensions::Mash::SafeAssignment
|
|
15
|
-
include Hashie::Extensions::Mash::SymbolizeKeys
|
|
16
|
-
# include Hashie::Extensions::Mash::DefineAccessors
|
|
17
|
-
end
|
|
18
|
-
|
|
19
14
|
extend Forwardable
|
|
20
15
|
|
|
21
|
-
# @data is of class Data
|
|
22
16
|
attr_accessor :data
|
|
23
17
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
# mapping is a Hash { old_key => new_key }
|
|
27
|
-
# transformers is also a Hash { key => Proc}
|
|
28
|
-
def initialize(
|
|
29
|
-
raw_data= {}, # Array of Hashes or hash of array or hash
|
|
30
|
-
mapping: {}, # { old_key => new_key }
|
|
31
|
-
transformers: {} # { key => Proc }
|
|
32
|
-
)
|
|
18
|
+
def initialize(raw_data = nil, mapping: {}, transformers: {})
|
|
19
|
+
@data = Polars::DataFrame.new(raw_data || [])
|
|
33
20
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
else
|
|
42
|
-
raise BadParameterError, "Expecting Hash or Array of Hashes got: #{aofh_or_hofa.class}"
|
|
43
|
-
end
|
|
21
|
+
debug_me{[
|
|
22
|
+
:raw_data,
|
|
23
|
+
:mapping,
|
|
24
|
+
:transformers,
|
|
25
|
+
'@data'
|
|
26
|
+
]}
|
|
44
27
|
|
|
45
|
-
|
|
28
|
+
# IMPORTANT: Rename columns FIRST, then apply transformers
|
|
29
|
+
# Transformers expect renamed column names
|
|
30
|
+
rename_columns!(mapping) unless mapping.empty?
|
|
31
|
+
apply_transformers!(transformers) unless transformers.empty?
|
|
46
32
|
end
|
|
47
33
|
|
|
48
34
|
|
|
49
|
-
def
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
35
|
+
def apply_transformers!(transformers)
|
|
36
|
+
transformers.each do |col, transformer|
|
|
37
|
+
col_name = col.to_s
|
|
38
|
+
@data = @data.with_column(
|
|
39
|
+
@data[col_name].apply(&transformer).alias(col_name)
|
|
40
|
+
)
|
|
41
|
+
end
|
|
53
42
|
end
|
|
54
43
|
|
|
55
44
|
|
|
56
|
-
def
|
|
57
|
-
|
|
45
|
+
def rename_columns!(mapping)
|
|
46
|
+
# Normalize mapping keys to strings for consistent lookup
|
|
47
|
+
# mapping can have string or symbol keys, columns are always strings
|
|
48
|
+
string_mapping = mapping.transform_keys(&:to_s)
|
|
58
49
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
50
|
+
rename_mapping = @data.columns.each_with_index.map do |col, _|
|
|
51
|
+
# Try exact match first, then lowercase match
|
|
52
|
+
new_name = string_mapping[col] || string_mapping[col.downcase] || col
|
|
53
|
+
# Polars requires both keys and values to be strings
|
|
54
|
+
[col, new_name.to_s]
|
|
55
|
+
end.to_h
|
|
63
56
|
|
|
64
|
-
|
|
57
|
+
@data = @data.rename(rename_mapping)
|
|
65
58
|
end
|
|
66
59
|
|
|
67
60
|
|
|
68
|
-
def
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
hofa,
|
|
72
|
-
adapter_mapping: mapping
|
|
73
|
-
) unless mapping.empty?
|
|
61
|
+
def append!(other_df)
|
|
62
|
+
self_row_count = @data.shape[0]
|
|
63
|
+
other_row_count = other_df.data.shape[0]
|
|
74
64
|
|
|
75
|
-
@data =
|
|
76
|
-
|
|
65
|
+
@data = if self_row_count == 0
|
|
66
|
+
other_df.data
|
|
67
|
+
else
|
|
68
|
+
@data.vstack(other_df.data)
|
|
69
|
+
end
|
|
77
70
|
|
|
71
|
+
post_append_row_count = @data.shape[0]
|
|
72
|
+
expected_row_count = self_row_count + other_row_count
|
|
73
|
+
return if post_append_row_count == expected_row_count
|
|
78
74
|
|
|
75
|
+
raise "Append Error: expected #{expected_row_count}, got #{post_append_row_count} "
|
|
79
76
|
|
|
80
|
-
def to_csv(path_to_file)
|
|
81
|
-
CSV.open(path_to_file, 'w') do |csv|
|
|
82
|
-
csv << keys
|
|
83
|
-
size.times do |x|
|
|
84
|
-
csv << row(x)
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
77
|
end
|
|
78
|
+
alias concat! append!
|
|
88
79
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
NotImplemented.raise
|
|
80
|
+
def columns
|
|
81
|
+
@data.columns
|
|
92
82
|
end
|
|
93
83
|
|
|
94
84
|
|
|
95
|
-
def
|
|
96
|
-
|
|
85
|
+
def keys
|
|
86
|
+
@data.columns
|
|
97
87
|
end
|
|
88
|
+
alias vectors keys
|
|
98
89
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
alias_method :to_h, :to_hofa
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
# The number of data rows
|
|
105
|
-
def size
|
|
106
|
-
data[@data.keys[0]].size
|
|
90
|
+
def to_h
|
|
91
|
+
@data.columns.map { |col| [col.to_sym, @data[col].to_a] }.to_h
|
|
107
92
|
end
|
|
108
|
-
alias_method :nrows, :size
|
|
109
|
-
alias_method :length, :size
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def_delegator :@data, :keys
|
|
113
|
-
alias_method :vectors, :keys
|
|
114
|
-
alias_method :columns, :keys
|
|
115
93
|
|
|
116
94
|
|
|
117
|
-
def
|
|
118
|
-
|
|
95
|
+
def to_csv(path_to_file)
|
|
96
|
+
@data.write_csv(path_to_file)
|
|
119
97
|
end
|
|
120
98
|
|
|
121
99
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
def_delegator :@data, :[]=, :[]=
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
# same as values.transpose
|
|
128
|
-
# TODO: do benchmark to see if the transpose method if faster
|
|
129
|
-
def rows
|
|
130
|
-
result = []
|
|
131
|
-
size.times do |x|
|
|
132
|
-
entry = row(x)
|
|
133
|
-
result << entry
|
|
134
|
-
end
|
|
135
|
-
result
|
|
100
|
+
def size
|
|
101
|
+
@data.height
|
|
136
102
|
end
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def row(x)
|
|
141
|
-
if x.is_a?(Integer)
|
|
142
|
-
raise BadParameterError if x < 0 || x >= size
|
|
143
|
-
|
|
144
|
-
elsif x.is_a?(Hash)
|
|
145
|
-
raise BadParameterError, "x is #{x}" if x.size > 1
|
|
146
|
-
key = x.keys[0]
|
|
147
|
-
x = @data[key].index(x[key])
|
|
148
|
-
raise BadParameterError, 'Not Found #{x}' if x.nil?
|
|
149
|
-
return keys.zip(row(x)).to_h
|
|
150
|
-
|
|
151
|
-
else
|
|
152
|
-
raise BadParameterError, "Unknown x.class: #{x.class}"
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
entry = []
|
|
156
|
-
|
|
157
|
-
keys.each do |key|
|
|
158
|
-
entry << @data[key][x]
|
|
159
|
-
end
|
|
103
|
+
alias nrows size
|
|
104
|
+
alias length size
|
|
160
105
|
|
|
161
|
-
|
|
106
|
+
def ncols
|
|
107
|
+
@data.width
|
|
162
108
|
end
|
|
163
109
|
|
|
164
110
|
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
111
|
+
# FPL Analysis - Calculate Future Period Loss/Profit
|
|
112
|
+
#
|
|
113
|
+
# @param column [String, Symbol] Column name containing prices (default: "adj_close_price")
|
|
114
|
+
# @param fpop [Integer] Future Period of Performance (days to look ahead)
|
|
115
|
+
# @return [Array<Array<Float, Float>>] Array of [min_delta, max_delta] pairs
|
|
116
|
+
#
|
|
117
|
+
# @example
|
|
118
|
+
# stock = SQA::Stock.new(ticker: 'AAPL')
|
|
119
|
+
# fpl_data = stock.df.fpl(fpop: 10)
|
|
120
|
+
#
|
|
121
|
+
def fpl(column: 'adj_close_price', fpop: 14)
|
|
122
|
+
prices = @data[column.to_s].to_a
|
|
123
|
+
SQA::FPOP.fpl(prices, fpop: fpop)
|
|
171
124
|
end
|
|
172
|
-
alias_method :concat!, :append!
|
|
173
125
|
|
|
174
126
|
|
|
175
|
-
#
|
|
176
|
-
# based on the mapping hash where
|
|
177
|
-
# { old_key => new_key }
|
|
127
|
+
# FPL Analysis with risk metrics and classification
|
|
178
128
|
#
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
129
|
+
# @param column [String, Symbol] Column name containing prices (default: "adj_close_price")
|
|
130
|
+
# @param fpop [Integer] Future Period of Performance
|
|
131
|
+
# @return [Array<Hash>] Array of analysis hashes
|
|
132
|
+
#
|
|
133
|
+
# @example
|
|
134
|
+
# analysis = stock.df.fpl_analysis(fpop: 10)
|
|
135
|
+
# analysis.first[:direction] # => :UP, :DOWN, :UNCERTAIN, or :FLAT
|
|
136
|
+
# analysis.first[:magnitude] # => Average expected movement percentage
|
|
137
|
+
# analysis.first[:risk] # => Volatility range
|
|
138
|
+
#
|
|
139
|
+
def fpl_analysis(column: 'adj_close_price', fpop: 14)
|
|
140
|
+
prices = @data[column.to_s].to_a
|
|
141
|
+
SQA::FPOP.fpl_analysis(prices, fpop: fpop)
|
|
186
142
|
end
|
|
187
|
-
alias_method :rename_vectors, :rename
|
|
188
143
|
|
|
189
144
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
# the value is a proc
|
|
193
|
-
#
|
|
194
|
-
# For Example:
|
|
195
|
-
# {
|
|
196
|
-
# price: -> (v) {v.to_f.round(3)}
|
|
197
|
-
# }
|
|
198
|
-
#
|
|
199
|
-
def coerce_vectors!(transformers)
|
|
200
|
-
transformers.each_pair do |key, transformer|
|
|
201
|
-
@data[key].map!{|v| transformer.call(v)}
|
|
202
|
-
end
|
|
145
|
+
def self.is_date?(value)
|
|
146
|
+
value.is_a?(String) && !/\d{4}-\d{2}-\d{2}/.match(value).nil?
|
|
203
147
|
end
|
|
204
148
|
|
|
205
149
|
|
|
@@ -219,135 +163,96 @@ class SQA::DataFrame
|
|
|
219
163
|
@data.respond_to?(method_name) || super
|
|
220
164
|
end
|
|
221
165
|
|
|
222
|
-
#################################################
|
|
223
166
|
class << self
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
base_df.concat!(other_df)
|
|
227
|
-
end
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
# TODO: The Data class has its own load which also supports
|
|
231
|
-
# YAML by default. Maybe this method should
|
|
232
|
-
# make use of @data = Data.load(source)
|
|
167
|
+
# Load a DataFrame from a file source
|
|
168
|
+
# This is the primary method for loading persisted DataFrames
|
|
233
169
|
#
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
df
|
|
170
|
+
# @param source [String, Pathname] Path to CSV file
|
|
171
|
+
# @param transformers [Hash] Column transformations to apply (usually not needed for cached data)
|
|
172
|
+
# @param mapping [Hash] Column name mappings (usually not needed for cached data)
|
|
173
|
+
# @return [SQA::DataFrame] Loaded DataFrame
|
|
174
|
+
#
|
|
175
|
+
# Note: For cached CSV files, transformers and mapping should typically be empty
|
|
176
|
+
# since transformations were already applied when the data was first fetched.
|
|
177
|
+
# We only apply them if the CSV has old-format column names that need migration.
|
|
178
|
+
def load(source:, transformers: {}, mapping: {})
|
|
179
|
+
df = Polars.read_csv(source.to_s)
|
|
180
|
+
|
|
181
|
+
# Auto-detect if CSV needs migration (has old column names like "open" instead of "open_price")
|
|
182
|
+
# Only apply mapping if explicitly provided (for migration scenarios)
|
|
183
|
+
new(df, mapping: mapping, transformers: transformers)
|
|
250
184
|
end
|
|
251
185
|
|
|
252
|
-
|
|
253
186
|
def from_aofh(aofh, mapping: {}, transformers: {})
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
187
|
+
aoh_sanitized = aofh.map { |entry| entry.transform_keys(&:to_s) }
|
|
188
|
+
columns = aoh_sanitized.first.keys
|
|
189
|
+
data = aoh_sanitized.map(&:values)
|
|
190
|
+
df = Polars::DataFrame.new(
|
|
191
|
+
data,
|
|
192
|
+
columns: columns
|
|
258
193
|
)
|
|
194
|
+
new(df)
|
|
259
195
|
end
|
|
260
196
|
|
|
261
197
|
|
|
262
198
|
def from_csv_file(source, mapping: {}, transformers: {})
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
199
|
+
debug_me do
|
|
200
|
+
%i[
|
|
201
|
+
source
|
|
202
|
+
mapping
|
|
203
|
+
transformers
|
|
204
|
+
]
|
|
267
205
|
end
|
|
268
206
|
|
|
269
|
-
|
|
207
|
+
df = Polars.read_csv(source)
|
|
208
|
+
new(df, mapping: mapping, transformers: transformers)
|
|
270
209
|
end
|
|
271
210
|
|
|
272
211
|
|
|
273
212
|
def from_json_file(source, mapping: {}, transformers: {})
|
|
274
|
-
aofh = JSON.parse(
|
|
275
|
-
|
|
213
|
+
aofh = JSON.parse(File.read(source)).map { |entry| entry.transform_keys(&:to_s) }
|
|
276
214
|
from_aofh(aofh, mapping: mapping, transformers: transformers)
|
|
277
215
|
end
|
|
278
216
|
|
|
279
217
|
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
hofa = {}
|
|
284
|
-
keys = aofh.first.keys
|
|
285
|
-
|
|
286
|
-
keys.each do |key|
|
|
287
|
-
hofa[key] = []
|
|
288
|
-
end
|
|
289
|
-
|
|
290
|
-
aofh.each do |entry|
|
|
291
|
-
keys.each do |key|
|
|
292
|
-
hofa[key] << entry[key]
|
|
293
|
-
end
|
|
294
|
-
end
|
|
295
|
-
|
|
296
|
-
# SMELL: This might not be necessary
|
|
297
|
-
normalize_keys(hofa, adapter_mapping: mapping)
|
|
298
|
-
end
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
def normalize_keys(hofa, adapter_mapping: {})
|
|
302
|
-
hofa = rename(adapter_mapping, hofa)
|
|
303
|
-
mapping = generate_mapping(hofa.keys)
|
|
304
|
-
|
|
305
|
-
rename(mapping, hofa)
|
|
306
|
-
end
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
def rename(mapping, hofa)
|
|
310
|
-
mapping.each_pair do |old_key, new_key|
|
|
311
|
-
hofa[new_key] = hofa.delete(old_key) if hofa.has_key?(old_key)
|
|
218
|
+
def generate_mapping(keys)
|
|
219
|
+
keys.each_with_object({}) do |key, hash|
|
|
220
|
+
hash[key.to_s] = underscore_key(key.to_s)
|
|
312
221
|
end
|
|
313
|
-
|
|
314
|
-
hofa
|
|
315
222
|
end
|
|
316
223
|
|
|
317
224
|
|
|
318
|
-
def
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
225
|
+
def underscore_key(key)
|
|
226
|
+
key.to_s
|
|
227
|
+
.gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
|
|
228
|
+
.gsub(/([a-z\d])([A-Z])/, '\1_\2')
|
|
229
|
+
.gsub(/[^a-zA-Z0-9]/, ' ')
|
|
230
|
+
.squeeze(' ')
|
|
231
|
+
.strip
|
|
232
|
+
.tr(' ', '_')
|
|
233
|
+
.downcase
|
|
234
|
+
.to_sym
|
|
326
235
|
end
|
|
327
236
|
|
|
237
|
+
alias sanitize_key underscore_key
|
|
328
238
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
|
334
|
-
tr("-", "_").
|
|
335
|
-
downcase.to_sym
|
|
239
|
+
def normalize_keys(hash, adapter_mapping: {})
|
|
240
|
+
hash = rename(hash, adapter_mapping) unless adapter_mapping.empty?
|
|
241
|
+
mapping = generate_mapping(hash.keys)
|
|
242
|
+
rename(hash, mapping)
|
|
336
243
|
end
|
|
337
244
|
|
|
338
245
|
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
key.tr('.():/','').gsub(/^\d+.?\s/, "").tr(' ','_')
|
|
246
|
+
def rename(hash, mapping)
|
|
247
|
+
mapping.each { |old_key, new_key| hash[new_key] = hash.delete(old_key) if hash.key?(old_key) }
|
|
248
|
+
hash
|
|
343
249
|
end
|
|
344
250
|
|
|
345
251
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
252
|
+
def aofh_to_hofa(aofh, mapping: {}, transformers: {})
|
|
253
|
+
hofa = Hash.new { |h, k| h[k.downcase] = [] }
|
|
254
|
+
aofh.each { |entry| entry.each { |key, value| hofa[key.to_s.downcase] << value } }
|
|
255
|
+
hofa
|
|
350
256
|
end
|
|
351
257
|
end
|
|
352
258
|
end
|
|
353
|
-
|