sqa 0.0.24 → 0.0.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. checksums.yaml +4 -4
  2. data/.goose/memory/development.txt +3 -0
  3. data/.semver +6 -0
  4. data/ARCHITECTURE.md +648 -0
  5. data/CHANGELOG.md +95 -0
  6. data/CLAUDE.md +674 -0
  7. data/COMMITS.md +196 -0
  8. data/DATAFRAME_ARCHITECTURE_REVIEW.md +421 -0
  9. data/NEXT-STEPS.md +154 -0
  10. data/README.md +839 -265
  11. data/TASKS.md +358 -0
  12. data/TEST_RESULTS.md +140 -0
  13. data/TODO.md +42 -0
  14. data/_notes.txt +25 -0
  15. data/bin/sqa-console +11 -0
  16. data/data/talk_talk.json +103284 -0
  17. data/develop_summary.md +313 -0
  18. data/docs/advanced/backtesting.md +206 -0
  19. data/docs/advanced/ensemble.md +68 -0
  20. data/docs/advanced/fpop.md +153 -0
  21. data/docs/advanced/index.md +112 -0
  22. data/docs/advanced/multi-timeframe.md +67 -0
  23. data/docs/advanced/pattern-matcher.md +75 -0
  24. data/docs/advanced/portfolio-optimizer.md +79 -0
  25. data/docs/advanced/portfolio.md +166 -0
  26. data/docs/advanced/risk-management.md +210 -0
  27. data/docs/advanced/strategy-generator.md +158 -0
  28. data/docs/advanced/streaming.md +209 -0
  29. data/docs/ai_and_ml.md +80 -0
  30. data/docs/api/dataframe.md +1114 -0
  31. data/docs/api/index.md +126 -0
  32. data/docs/assets/css/custom.css +88 -0
  33. data/docs/assets/images/sqa.jpg +0 -0
  34. data/docs/assets/js/mathjax.js +18 -0
  35. data/docs/concepts/index.md +60 -0
  36. data/docs/contributing/index.md +60 -0
  37. data/docs/data-sources/index.md +66 -0
  38. data/docs/data_frame.md +316 -97
  39. data/docs/factors_that_impact_price.md +26 -0
  40. data/docs/finviz.md +11 -0
  41. data/docs/fx_pro_bit.md +25 -0
  42. data/docs/genetic_programming.md +104 -0
  43. data/docs/getting-started/index.md +107 -0
  44. data/docs/getting-started/installation.md +229 -0
  45. data/docs/getting-started/quick-start.md +244 -0
  46. data/docs/i_gotta_an_idea.md +22 -0
  47. data/docs/index.md +161 -0
  48. data/docs/indicators/index.md +97 -0
  49. data/docs/indicators.md +110 -24
  50. data/docs/options.md +8 -0
  51. data/docs/strategies/bollinger-bands.md +146 -0
  52. data/docs/strategies/consensus.md +64 -0
  53. data/docs/strategies/custom.md +310 -0
  54. data/docs/strategies/ema.md +53 -0
  55. data/docs/strategies/index.md +92 -0
  56. data/docs/strategies/kbs.md +164 -0
  57. data/docs/strategies/macd.md +96 -0
  58. data/docs/strategies/market-profile.md +54 -0
  59. data/docs/strategies/mean-reversion.md +58 -0
  60. data/docs/strategies/rsi.md +95 -0
  61. data/docs/strategies/sma.md +55 -0
  62. data/docs/strategies/stochastic.md +63 -0
  63. data/docs/strategies/volume-breakout.md +54 -0
  64. data/docs/tags.md +7 -0
  65. data/examples/README.md +354 -0
  66. data/examples/advanced_features_example.rb +350 -0
  67. data/examples/fpop_analysis_example.rb +191 -0
  68. data/examples/genetic_programming_example.rb +148 -0
  69. data/examples/kbs_strategy_example.rb +208 -0
  70. data/examples/pattern_context_example.rb +300 -0
  71. data/examples/rails_app/Gemfile +34 -0
  72. data/examples/rails_app/README.md +416 -0
  73. data/examples/rails_app/app/assets/javascripts/application.js +107 -0
  74. data/examples/rails_app/app/assets/stylesheets/application.css +659 -0
  75. data/examples/rails_app/app/controllers/analysis_controller.rb +11 -0
  76. data/examples/rails_app/app/controllers/api/v1/stocks_controller.rb +227 -0
  77. data/examples/rails_app/app/controllers/application_controller.rb +22 -0
  78. data/examples/rails_app/app/controllers/backtest_controller.rb +11 -0
  79. data/examples/rails_app/app/controllers/dashboard_controller.rb +21 -0
  80. data/examples/rails_app/app/controllers/portfolio_controller.rb +7 -0
  81. data/examples/rails_app/app/views/analysis/show.html.erb +209 -0
  82. data/examples/rails_app/app/views/backtest/show.html.erb +171 -0
  83. data/examples/rails_app/app/views/dashboard/index.html.erb +118 -0
  84. data/examples/rails_app/app/views/dashboard/show.html.erb +408 -0
  85. data/examples/rails_app/app/views/errors/show.html.erb +17 -0
  86. data/examples/rails_app/app/views/layouts/application.html.erb +60 -0
  87. data/examples/rails_app/app/views/portfolio/index.html.erb +33 -0
  88. data/examples/rails_app/bin/rails +6 -0
  89. data/examples/rails_app/config/application.rb +45 -0
  90. data/examples/rails_app/config/boot.rb +5 -0
  91. data/examples/rails_app/config/database.yml +18 -0
  92. data/examples/rails_app/config/environment.rb +11 -0
  93. data/examples/rails_app/config/routes.rb +26 -0
  94. data/examples/rails_app/config.ru +8 -0
  95. data/examples/realtime_stream_example.rb +274 -0
  96. data/examples/sinatra_app/Gemfile +42 -0
  97. data/examples/sinatra_app/Gemfile.lock +268 -0
  98. data/examples/sinatra_app/QUICKSTART.md +169 -0
  99. data/examples/sinatra_app/README.md +471 -0
  100. data/examples/sinatra_app/RUNNING_WITHOUT_TALIB.md +90 -0
  101. data/examples/sinatra_app/TROUBLESHOOTING.md +95 -0
  102. data/examples/sinatra_app/app.rb +404 -0
  103. data/examples/sinatra_app/config.ru +5 -0
  104. data/examples/sinatra_app/public/css/style.css +723 -0
  105. data/examples/sinatra_app/public/debug_macd.html +82 -0
  106. data/examples/sinatra_app/public/js/app.js +107 -0
  107. data/examples/sinatra_app/start.sh +53 -0
  108. data/examples/sinatra_app/views/analyze.erb +306 -0
  109. data/examples/sinatra_app/views/backtest.erb +325 -0
  110. data/examples/sinatra_app/views/dashboard.erb +831 -0
  111. data/examples/sinatra_app/views/error.erb +58 -0
  112. data/examples/sinatra_app/views/index.erb +118 -0
  113. data/examples/sinatra_app/views/layout.erb +61 -0
  114. data/examples/sinatra_app/views/portfolio.erb +43 -0
  115. data/examples/strategy_generator_example.rb +346 -0
  116. data/hsa_portfolio.csv +11 -0
  117. data/justfile +0 -0
  118. data/lib/api/alpha_vantage_api.rb +462 -0
  119. data/lib/sqa/backtest.rb +329 -0
  120. data/lib/sqa/data_frame/alpha_vantage.rb +51 -63
  121. data/lib/sqa/data_frame/data.rb +92 -0
  122. data/lib/sqa/data_frame/yahoo_finance.rb +35 -43
  123. data/lib/sqa/data_frame.rb +154 -243
  124. data/lib/sqa/ensemble.rb +359 -0
  125. data/lib/sqa/fpop.rb +199 -0
  126. data/lib/sqa/gp.rb +259 -0
  127. data/lib/sqa/indicator.rb +16 -6
  128. data/lib/sqa/init.rb +15 -8
  129. data/lib/sqa/market_regime.rb +240 -0
  130. data/lib/sqa/multi_timeframe.rb +379 -0
  131. data/lib/sqa/pattern_matcher.rb +497 -0
  132. data/lib/sqa/portfolio.rb +260 -6
  133. data/lib/sqa/portfolio_optimizer.rb +377 -0
  134. data/lib/sqa/risk_manager.rb +442 -0
  135. data/lib/sqa/seasonal_analyzer.rb +209 -0
  136. data/lib/sqa/sector_analyzer.rb +300 -0
  137. data/lib/sqa/stock.rb +131 -127
  138. data/lib/sqa/strategy/bollinger_bands.rb +42 -0
  139. data/lib/sqa/strategy/consensus.rb +5 -2
  140. data/lib/sqa/strategy/kbs_strategy.rb +470 -0
  141. data/lib/sqa/strategy/macd.rb +46 -0
  142. data/lib/sqa/strategy/mp.rb +1 -1
  143. data/lib/sqa/strategy/stochastic.rb +60 -0
  144. data/lib/sqa/strategy/volume_breakout.rb +57 -0
  145. data/lib/sqa/strategy.rb +5 -0
  146. data/lib/sqa/strategy_generator.rb +947 -0
  147. data/lib/sqa/stream.rb +361 -0
  148. data/lib/sqa/ticker.rb +9 -2
  149. data/lib/sqa/version.rb +1 -7
  150. data/lib/sqa.rb +35 -20
  151. data/main.just +81 -0
  152. data/mkdocs.yml +252 -0
  153. data/trace.log +0 -0
  154. metadata +265 -69
  155. data/bin/sqa +0 -6
  156. data/docs/alpha_vantage_technical_indicators.md +0 -62
  157. data/docs/average_true_range.md +0 -9
  158. data/docs/bollinger_bands.md +0 -15
  159. data/docs/candlestick_pattern_recognizer.md +0 -4
  160. data/docs/donchian_channel.md +0 -5
  161. data/docs/double_top_bottom_pattern.md +0 -3
  162. data/docs/exponential_moving_average.md +0 -19
  163. data/docs/fibonacci_retracement.md +0 -30
  164. data/docs/head_and_shoulders_pattern.md +0 -3
  165. data/docs/market_profile.md +0 -4
  166. data/docs/momentum.md +0 -19
  167. data/docs/moving_average_convergence_divergence.md +0 -23
  168. data/docs/peaks_and_valleys.md +0 -11
  169. data/docs/relative_strength_index.md +0 -6
  170. data/docs/simple_moving_average.md +0 -8
  171. data/docs/stochastic_oscillator.md +0 -4
  172. data/docs/ta_lib.md +0 -160
  173. data/docs/true_range.md +0 -12
  174. data/lib/patches/dry-cli.rb +0 -228
  175. data/lib/sqa/activity.rb +0 -10
  176. data/lib/sqa/cli.rb +0 -62
  177. data/lib/sqa/commands/analysis.rb +0 -309
  178. data/lib/sqa/commands/base.rb +0 -139
  179. data/lib/sqa/commands/web.rb +0 -199
  180. data/lib/sqa/commands.rb +0 -22
  181. data/lib/sqa/constants.rb +0 -23
  182. data/lib/sqa/indicator/average_true_range.rb +0 -33
  183. data/lib/sqa/indicator/bollinger_bands.rb +0 -28
  184. data/lib/sqa/indicator/candlestick_pattern_recognizer.rb +0 -60
  185. data/lib/sqa/indicator/donchian_channel.rb +0 -29
  186. data/lib/sqa/indicator/double_top_bottom_pattern.rb +0 -34
  187. data/lib/sqa/indicator/elliott_wave_theory.rb +0 -57
  188. data/lib/sqa/indicator/exponential_moving_average.rb +0 -25
  189. data/lib/sqa/indicator/exponential_moving_average_trend.rb +0 -36
  190. data/lib/sqa/indicator/fibonacci_retracement.rb +0 -23
  191. data/lib/sqa/indicator/head_and_shoulders_pattern.rb +0 -26
  192. data/lib/sqa/indicator/market_profile.rb +0 -32
  193. data/lib/sqa/indicator/mean_reversion.rb +0 -37
  194. data/lib/sqa/indicator/momentum.rb +0 -28
  195. data/lib/sqa/indicator/moving_average_convergence_divergence.rb +0 -29
  196. data/lib/sqa/indicator/peaks_and_valleys.rb +0 -29
  197. data/lib/sqa/indicator/predict_next_value.rb +0 -202
  198. data/lib/sqa/indicator/relative_strength_index.rb +0 -47
  199. data/lib/sqa/indicator/simple_moving_average.rb +0 -24
  200. data/lib/sqa/indicator/simple_moving_average_trend.rb +0 -32
  201. data/lib/sqa/indicator/stochastic_oscillator.rb +0 -68
  202. data/lib/sqa/indicator/true_range.rb +0 -39
  203. data/lib/sqa/trade.rb +0 -26
@@ -2,204 +2,162 @@
2
2
  # frozen_string_literal: true
3
3
 
4
4
  require 'forwardable'
5
+ require 'csv'
6
+ require 'polars'
5
7
 
8
+
9
+ require_relative 'data_frame/data'
6
10
  require_relative 'data_frame/yahoo_finance'
7
11
  require_relative 'data_frame/alpha_vantage'
8
12
 
9
13
  class SQA::DataFrame
10
- class Data < Hashie::Mash
11
- # SNELL: Are all of these needed?
12
- include Hashie::Extensions::Mash::KeepOriginalKeys
13
- # include Hashie::Extensions::Mash::PermissiveRespondTo
14
- include Hashie::Extensions::Mash::SafeAssignment
15
- include Hashie::Extensions::Mash::SymbolizeKeys
16
- # include Hashie::Extensions::Mash::DefineAccessors
17
- end
18
-
19
14
  extend Forwardable
20
15
 
21
- # @data is of class Data
22
16
  attr_accessor :data
23
17
 
24
- # Expects a Hash of Arrays (hofa)
25
- # mapping: and transformers: are optional
26
- # mapping is a Hash { old_key => new_key }
27
- # transformers is also a Hash { key => Proc}
28
- def initialize(
29
- raw_data= {}, # Array of Hashes or hash of array or hash
30
- mapping: {}, # { old_key => new_key }
31
- transformers: {} # { key => Proc }
32
- )
18
+ def initialize(raw_data = nil, mapping: {}, transformers: {})
19
+ @data = Polars::DataFrame.new(raw_data || [])
33
20
 
34
- if raw_data.is_a? Hash
35
- initialize_hofa(raw_data, mapping: mapping)
21
+ # IMPORTANT: Rename columns FIRST, then apply transformers
22
+ # Transformers expect renamed column names
23
+ rename_columns!(mapping) unless mapping.empty?
24
+ apply_transformers!(transformers) unless transformers.empty?
25
+ end
36
26
 
37
- elsif raw_data.is_a?(Array) &&
38
- raw_data.first.is_a?(Hash)
39
- initialize_aofh(raw_data, mapping: mapping)
40
27
 
41
- else
42
- raise BadParameterError, "Expecting Hash or Array of Hashes got: #{aofh_or_hofa.class}"
28
+ def apply_transformers!(transformers)
29
+ transformers.each do |col, transformer|
30
+ col_name = col.to_s
31
+ @data = @data.with_column(
32
+ @data[col_name].apply(&transformer).alias(col_name)
33
+ )
43
34
  end
44
-
45
- coerce_vectors!(transformers) if good_data? && !(transformers.nil? || transformers.empty?)
46
35
  end
47
36
 
48
37
 
49
- def good_data?
50
- return false if @data.empty? || @data.values.all?{|v| v.nil? || v.empty?}
38
+ def rename_columns!(mapping)
39
+ # Normalize mapping keys to strings for consistent lookup
40
+ # mapping can have string or symbol keys, columns are always strings
41
+ string_mapping = mapping.transform_keys(&:to_s)
51
42
 
52
- true
53
- end
43
+ rename_mapping = @data.columns.each_with_index.map do |col, _|
44
+ # Try exact match first, then lowercase match
45
+ new_name = string_mapping[col] || string_mapping[col.downcase] || col
46
+ # Polars requires both keys and values to be strings
47
+ [col, new_name.to_s]
48
+ end.to_h
54
49
 
50
+ @data = @data.rename(rename_mapping)
51
+ end
55
52
 
56
- def initialize_aofh(aofh, mapping:)
57
- klass = self.class
58
53
 
59
- hofa = klass.aofh_to_hofa(
60
- aofh,
61
- mapping: mapping
62
- )
54
+ def append!(other_df)
55
+ self_row_count = @data.shape[0]
56
+ other_row_count = other_df.data.shape[0]
63
57
 
64
- initialize_hofa(hofa, mapping: mapping)
65
- end
58
+ @data = if self_row_count == 0
59
+ other_df.data
60
+ else
61
+ @data.vstack(other_df.data)
62
+ end
66
63
 
64
+ post_append_row_count = @data.shape[0]
65
+ expected_row_count = self_row_count + other_row_count
66
+ return if post_append_row_count == expected_row_count
67
67
 
68
- def initialize_hofa(hofa, mapping:)
69
- klass = self.class
70
- hofa = klass.normalize_keys(
71
- hofa,
72
- adapter_mapping: mapping
73
- ) unless mapping.empty?
68
+ raise "Append Error: expected #{expected_row_count}, got #{post_append_row_count} "
74
69
 
75
- @data = Data.new(hofa)
76
70
  end
71
+ alias concat! append!
77
72
 
73
+ # Concatenate another DataFrame, remove duplicates, and sort
74
+ # This is the preferred method for updating CSV data to prevent duplicates
75
+ #
76
+ # @param other_df [SQA::DataFrame] DataFrame to append
77
+ # @param sort_column [String] Column to use for deduplication and sorting (default: "timestamp")
78
+ # @param descending [Boolean] Sort order - false for ascending (oldest first, TA-Lib compatible), true for descending
79
+ def concat_and_deduplicate!(other_df, sort_column: "timestamp", descending: false)
80
+ # Concatenate the dataframes
81
+ @data = if @data.shape[0] == 0
82
+ other_df.data
83
+ else
84
+ @data.vstack(other_df.data)
85
+ end
78
86
 
87
+ # Remove duplicates based on sort_column, keeping first occurrence
88
+ @data = @data.unique(subset: [sort_column], keep: "first")
79
89
 
80
- def to_csv(path_to_file)
81
- CSV.open(path_to_file, 'w') do |csv|
82
- csv << keys
83
- size.times do |x|
84
- csv << row(x)
85
- end
86
- end
90
+ # Sort by the specified column (Polars uses 'reverse' for descending)
91
+ @data = @data.sort(sort_column, reverse: descending)
87
92
  end
88
93
 
89
-
90
- def to_json(path_to_file)
91
- NotImplemented.raise
94
+ def columns
95
+ @data.columns
92
96
  end
93
97
 
94
98
 
95
- def to_aofh
96
- NotImplemented.raise
99
+ def keys
100
+ @data.columns
97
101
  end
102
+ alias vectors keys
98
103
 
99
-
100
- def_delegator :@data, :to_h, :to_hofa
101
- alias_method :to_h, :to_hofa
102
-
103
-
104
- # The number of data rows
105
- def size
106
- data[@data.keys[0]].size
104
+ def to_h
105
+ @data.columns.map { |col| [col.to_sym, @data[col].to_a] }.to_h
107
106
  end
108
- alias_method :nrows, :size
109
- alias_method :length, :size
110
-
111
107
 
112
- def_delegator :@data, :keys
113
- alias_method :vectors, :keys
114
- alias_method :columns, :keys
115
108
 
116
-
117
- def ncols
118
- keys.size
109
+ def to_csv(path_to_file)
110
+ @data.write_csv(path_to_file)
119
111
  end
120
112
 
121
113
 
122
- def_delegator :@data, :values, :values
123
- def_delegator :@data, :[], :[]
124
- def_delegator :@data, :[]=, :[]=
125
-
126
-
127
- # same as values.transpose
128
- # TODO: do benchmark to see if the transpose method if faster
129
- def rows
130
- result = []
131
- size.times do |x|
132
- entry = row(x)
133
- result << entry
134
- end
135
- result
114
+ def size
115
+ @data.height
136
116
  end
137
- alias_method :to_a, :rows
138
-
139
-
140
- def row(x)
141
- if x.is_a?(Integer)
142
- raise BadParameterError if x < 0 || x >= size
117
+ alias nrows size
118
+ alias length size
143
119
 
144
- elsif x.is_a?(Hash)
145
- raise BadParameterError, "x is #{x}" if x.size > 1
146
- key = x.keys[0]
147
- x = @data[key].index(x[key])
148
- raise BadParameterError, 'Not Found #{x}' if x.nil?
149
- return keys.zip(row(x)).to_h
150
-
151
- else
152
- raise BadParameterError, "Unknown x.class: #{x.class}"
153
- end
154
-
155
- entry = []
156
-
157
- keys.each do |key|
158
- entry << @data[key][x]
159
- end
160
-
161
- entry
120
+ def ncols
121
+ @data.width
162
122
  end
163
123
 
164
124
 
165
- def append!(new_df)
166
- raise(BadParameterError, "Key mismatch") if keys != new_df.keys
167
-
168
- keys.each do |key|
169
- @data[key] += new_df[key]
170
- end
125
+ # FPL Analysis - Calculate Future Period Loss/Profit
126
+ #
127
+ # @param column [String, Symbol] Column name containing prices (default: "adj_close_price")
128
+ # @param fpop [Integer] Future Period of Performance (days to look ahead)
129
+ # @return [Array<Array<Float, Float>>] Array of [min_delta, max_delta] pairs
130
+ #
131
+ # @example
132
+ # stock = SQA::Stock.new(ticker: 'AAPL')
133
+ # fpl_data = stock.df.fpl(fpop: 10)
134
+ #
135
+ def fpl(column: 'adj_close_price', fpop: 14)
136
+ prices = @data[column.to_s].to_a
137
+ SQA::FPOP.fpl(prices, fpop: fpop)
171
138
  end
172
- alias_method :concat!, :append!
173
139
 
174
140
 
175
- # Creates a new instance with new keys
176
- # based on the mapping hash where
177
- # { old_key => new_key }
141
+ # FPL Analysis with risk metrics and classification
178
142
  #
179
- def rename(mapping)
180
- SQA::DataFrame.new(
181
- self.class.rename(
182
- mapping,
183
- @data.to_h
184
- )
185
- )
143
+ # @param column [String, Symbol] Column name containing prices (default: "adj_close_price")
144
+ # @param fpop [Integer] Future Period of Performance
145
+ # @return [Array<Hash>] Array of analysis hashes
146
+ #
147
+ # @example
148
+ # analysis = stock.df.fpl_analysis(fpop: 10)
149
+ # analysis.first[:direction] # => :UP, :DOWN, :UNCERTAIN, or :FLAT
150
+ # analysis.first[:magnitude] # => Average expected movement percentage
151
+ # analysis.first[:risk] # => Volatility range
152
+ #
153
+ def fpl_analysis(column: 'adj_close_price', fpop: 14)
154
+ prices = @data[column.to_s].to_a
155
+ SQA::FPOP.fpl_analysis(prices, fpop: fpop)
186
156
  end
187
- alias_method :rename_vectors, :rename
188
157
 
189
158
 
190
- # Map the values of the vectors into different objects
191
- # types is a Hash where the key is the vector name and
192
- # the value is a proc
193
- #
194
- # For Example:
195
- # {
196
- # price: -> (v) {v.to_f.round(3)}
197
- # }
198
- #
199
- def coerce_vectors!(transformers)
200
- transformers.each_pair do |key, transformer|
201
- @data[key].map!{|v| transformer.call(v)}
202
- end
159
+ def self.is_date?(value)
160
+ value.is_a?(String) && !/\d{4}-\d{2}-\d{2}/.match(value).nil?
203
161
  end
204
162
 
205
163
 
@@ -219,135 +177,88 @@ class SQA::DataFrame
219
177
  @data.respond_to?(method_name) || super
220
178
  end
221
179
 
222
- #################################################
223
180
  class << self
224
-
225
- def concat(base_df, other_df)
226
- base_df.concat!(other_df)
227
- end
228
-
229
-
230
- # TODO: The Data class has its own load which also supports
231
- # YAML by default. Maybe this method should
232
- # make use of @data = Data.load(source)
181
+ # Load a DataFrame from a file source
182
+ # This is the primary method for loading persisted DataFrames
233
183
  #
234
- def load(source:, mapping: {}, transformers:{})
235
- file_type = source.extname[1..].downcase.to_sym
236
-
237
- df = if :csv == file_type
238
- from_csv_file(source, mapping: mapping, transformers: transformers)
239
- elsif :json == file_type
240
- from_json_file(source, mapping: mapping, transformers: transformers)
241
- else
242
- raise BadParameterError, "unsupported file type: #{file_type}"
243
- end
244
-
245
- unless transformers.empty?
246
- df.coerce_vectors!(transformers)
247
- end
248
-
249
- df
184
+ # @param source [String, Pathname] Path to CSV file
185
+ # @param transformers [Hash] Column transformations to apply (usually not needed for cached data)
186
+ # @param mapping [Hash] Column name mappings (usually not needed for cached data)
187
+ # @return [SQA::DataFrame] Loaded DataFrame
188
+ #
189
+ # Note: For cached CSV files, transformers and mapping should typically be empty
190
+ # since transformations were already applied when the data was first fetched.
191
+ # We only apply them if the CSV has old-format column names that need migration.
192
+ def load(source:, transformers: {}, mapping: {})
193
+ df = Polars.read_csv(source.to_s)
194
+
195
+ # Auto-detect if CSV needs migration (has old column names like "open" instead of "open_price")
196
+ # Only apply mapping if explicitly provided (for migration scenarios)
197
+ new(df, mapping: mapping, transformers: transformers)
250
198
  end
251
199
 
252
-
253
200
  def from_aofh(aofh, mapping: {}, transformers: {})
254
- new(
255
- aofh,
256
- mapping: mapping,
257
- transformers: transformers
201
+ aoh_sanitized = aofh.map { |entry| entry.transform_keys(&:to_s) }
202
+ columns = aoh_sanitized.first.keys
203
+ data = aoh_sanitized.map(&:values)
204
+ df = Polars::DataFrame.new(
205
+ data,
206
+ columns: columns
258
207
  )
208
+ new(df)
259
209
  end
260
210
 
261
211
 
262
212
  def from_csv_file(source, mapping: {}, transformers: {})
263
- aofh = []
264
-
265
- CSV.foreach(source, headers: true) do |row|
266
- aofh << row.to_h
267
- end
268
-
269
- from_aofh(aofh, mapping: mapping, transformers: transformers)
213
+ df = Polars.read_csv(source)
214
+ new(df, mapping: mapping, transformers: transformers)
270
215
  end
271
216
 
272
217
 
273
218
  def from_json_file(source, mapping: {}, transformers: {})
274
- aofh = JSON.parse(source.read)
275
-
219
+ aofh = JSON.parse(File.read(source)).map { |entry| entry.transform_keys(&:to_s) }
276
220
  from_aofh(aofh, mapping: mapping, transformers: transformers)
277
221
  end
278
222
 
279
223
 
280
- # aofh -- Array of Hashes
281
- # hofa -- Hash of Arrays
282
- def aofh_to_hofa(aofh, mapping: {}, transformers: {})
283
- hofa = {}
284
- keys = aofh.first.keys
285
-
286
- keys.each do |key|
287
- hofa[key] = []
288
- end
289
-
290
- aofh.each do |entry|
291
- keys.each do |key|
292
- hofa[key] << entry[key]
293
- end
224
+ def generate_mapping(keys)
225
+ keys.each_with_object({}) do |key, hash|
226
+ hash[key.to_s] = underscore_key(key.to_s)
294
227
  end
295
-
296
- # SMELL: This might not be necessary
297
- normalize_keys(hofa, adapter_mapping: mapping)
298
- end
299
-
300
-
301
- def normalize_keys(hofa, adapter_mapping: {})
302
- hofa = rename(adapter_mapping, hofa)
303
- mapping = generate_mapping(hofa.keys)
304
-
305
- rename(mapping, hofa)
306
228
  end
307
229
 
308
230
 
309
- def rename(mapping, hofa)
310
- mapping.each_pair do |old_key, new_key|
311
- hofa[new_key] = hofa.delete(old_key) if hofa.has_key?(old_key)
312
- end
313
-
314
- hofa
315
- end
316
-
317
-
318
- def generate_mapping(keys)
319
- mapping = {}
320
-
321
- keys.each do |key|
322
- mapping[key] = underscore_key(sanitize_key(key)) unless key.is_a?(Symbol)
323
- end
324
-
325
- mapping
231
+ def underscore_key(key)
232
+ key.to_s
233
+ .gsub(/([A-Z]+)([A-Z][a-z])/, '\1_\2')
234
+ .gsub(/([a-z\d])([A-Z])/, '\1_\2')
235
+ .gsub(/[^a-zA-Z0-9]/, ' ')
236
+ .squeeze(' ')
237
+ .strip
238
+ .tr(' ', '_')
239
+ .downcase
240
+ .to_sym
326
241
  end
327
242
 
243
+ alias sanitize_key underscore_key
328
244
 
329
- # returns a snake_case Symbol
330
- def underscore_key(key)
331
- key.to_s.gsub(/::/, '/').
332
- gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
333
- gsub(/([a-z\d])([A-Z])/,'\1_\2').
334
- tr("-", "_").
335
- downcase.to_sym
245
+ def normalize_keys(hash, adapter_mapping: {})
246
+ hash = rename(hash, adapter_mapping) unless adapter_mapping.empty?
247
+ mapping = generate_mapping(hash.keys)
248
+ rename(hash, mapping)
336
249
  end
337
250
 
338
251
 
339
- # removes punctuation and specal characters,
340
- # replaces space with underscore.
341
- def sanitize_key(key)
342
- key.tr('.():/','').gsub(/^\d+.?\s/, "").tr(' ','_')
252
+ def rename(hash, mapping)
253
+ mapping.each { |old_key, new_key| hash[new_key] = hash.delete(old_key) if hash.key?(old_key) }
254
+ hash
343
255
  end
344
256
 
345
257
 
346
- # returns true if key is in a date format
347
- # like 2023-06-03
348
- def is_date?(key)
349
- !/(\d{4}-\d{2}-\d{2})/.match(key.to_s).nil?
258
+ def aofh_to_hofa(aofh, mapping: {}, transformers: {})
259
+ hofa = Hash.new { |h, k| h[k.downcase] = [] }
260
+ aofh.each { |entry| entry.each { |key, value| hofa[key.to_s.downcase] << value } }
261
+ hofa
350
262
  end
351
263
  end
352
264
  end
353
-