sqa 0.0.14 → 0.0.17
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/checksums/sqa-0.0.15.gem.sha512 +1 -0
- data/checksums/sqa-0.0.17.gem.sha512 +1 -0
- data/docs/alpha_vantage_technical_indicators.md +62 -0
- data/lib/sqa/cli.rb +0 -1
- data/lib/sqa/config.rb +7 -6
- data/lib/sqa/data_frame/alpha_vantage.rb +24 -71
- data/lib/sqa/data_frame/yahoo_finance.rb +4 -48
- data/lib/sqa/data_frame.rb +282 -32
- data/lib/sqa/errors.rb +27 -3
- data/lib/sqa/init.rb +51 -0
- data/lib/sqa/stock.rb +102 -45
- data/lib/sqa/strategy.rb +1 -1
- data/lib/sqa/version.rb +1 -4
- data/lib/sqa/web.rb +1 -1
- data/lib/sqa.rb +33 -54
- metadata +25 -71
- data/checksums/sqa-0.0.14.gem.sha512 +0 -1
- data/lib/patches/daru/category.rb +0 -19
- data/lib/patches/daru/data_frame.rb +0 -19
- data/lib/patches/daru/plotting/svg-graph/category.rb +0 -55
- data/lib/patches/daru/plotting/svg-graph/dataframe.rb +0 -105
- data/lib/patches/daru/plotting/svg-graph/vector.rb +0 -102
- data/lib/patches/daru/plotting/svg-graph.rb +0 -7
- data/lib/patches/daru/vector.rb +0 -19
- data/lib/patches/daru.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 911775606b7e0fa046261c5a9bc4d3be21ca9caf38c37011141f1393ac6e5063
|
4
|
+
data.tar.gz: d70ae996a39dbe7c386750286cb4cc437681d584945b31104e91b06c30b1600f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0074688f69947d20d5aae7c81b090a91fb4f06862084b2349b248ecdf2e17376f47d5a1acffbfb7b2700865cf25f5ff5ba054514d00ed88750f46224b4c9360e'
|
7
|
+
data.tar.gz: 38d1ebf511e9dfa2b87084ed77093e2fc03ccf6eeb6a00e42d1060acca82e135d0b4c0d36a25d9f9bc4a2dd9adf91749a6640aab14b304e33f24fee0c3eb7e15
|
data/README.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
**Replacing Daru** with Hashie::Mash
|
2
|
+
|
3
|
+
This is branch hashie_df
|
4
|
+
|
1
5
|
# SQA - Simple Qualitative Analysis
|
2
6
|
|
3
7
|
This is a very simplistic set of tools for running technical analysis on a stock portfolio. Simplistic means it is not reliable nor intended for any kind of financial use. Think of it as a training tool. I do. Its helping me understand why I need professional help from people who know what they are doing.
|
@@ -0,0 +1 @@
|
|
1
|
+
d4f3ab1bf26de034f0f044a5cab9d86e61221fc2d160056c1c24c166586e7ce72d90095ebea01965f5cabf989c4116e7409f8fc6749cabe7d5a13e34f87f4b96
|
@@ -0,0 +1 @@
|
|
1
|
+
2ee94a54d6ac3d13685dc9b91a2bae0fe75feab6148e1aa9a9d4096961b9b7b577b7ce9d1264f0cce260640515ddd86d5fd5fd2b66f49175844c903581ff6fd9
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Alpha Vantage
|
2
|
+
## Technical Indicators
|
3
|
+
|
4
|
+
The following technical indicators are available from Alpha Vantage
|
5
|
+
using a free API key.
|
6
|
+
|
7
|
+
| Acronym | Indicator Description |
|
8
|
+
|-----------|----------------------------------------------------------|
|
9
|
+
| AD | Accumulation/Distribution |
|
10
|
+
| ADOSC | Accumulation/Distribution Oscillator |
|
11
|
+
| ADX | Average Directional Index |
|
12
|
+
| ADXR | Average Directional Movement Rating |
|
13
|
+
| APO | Absolute Price Oscillator |
|
14
|
+
| AROON | Aroon Indicator |
|
15
|
+
| AROONOSC | Aroon Oscillator |
|
16
|
+
| ATR | Average True Range |
|
17
|
+
| BBANDS | Bollinger Bands |
|
18
|
+
| BOP | Balance of Power |
|
19
|
+
| CCI | Commodity Channel Index |
|
20
|
+
| CMO | Chande Momentum Oscillator |
|
21
|
+
| DEMA | Double Exponential Moving Average |
|
22
|
+
| DX | Directional Movement Index |
|
23
|
+
| EMA | Exponential Moving Average |
|
24
|
+
| HT_DCPERIOD | Hilbert Transform - Dominant Cycle Period |
|
25
|
+
| HT_DCPHASE | Hilbert Transform - Dominant Cycle Phase |
|
26
|
+
| HT_PHASOR | Hilbert Transform - Phasor Components |
|
27
|
+
| HT_SINE | Hilbert Transform - SineWave |
|
28
|
+
| HT_TRENDLINE | Hilbert Transform - Instantaneous Trendline |
|
29
|
+
| HT_TRENDMODE | Hilbert Transform - Trend vs Cycle Mode |
|
30
|
+
| KAMA | Kaufman Adaptive Moving Average |
|
31
|
+
| MACD | Moving Average Convergence Divergence |
|
32
|
+
| MACDEXT | MACD with controllable MA type |
|
33
|
+
| MAMA | MESA Adaptive Moving Average |
|
34
|
+
| MFI | Money Flow Index |
|
35
|
+
| MIDPOINT | MidPoint over period |
|
36
|
+
| MIDPRICE | Midpoint Price over period |
|
37
|
+
| MINUS_DI | Minus Directional Indicator |
|
38
|
+
| MINUS_DM | Minus Directional Movement |
|
39
|
+
| MOM | Momentum |
|
40
|
+
| NATR | Normalized Average True Range |
|
41
|
+
| OBV | On Balance Volume |
|
42
|
+
| PLUS_DI | Plus Directional Indicator |
|
43
|
+
| PLUS_DM | Plus Directional Movement |
|
44
|
+
| PPO | Percentage Price Oscillator |
|
45
|
+
| ROC | Rate of Change |
|
46
|
+
| ROCR | Rate of Change Ratio |
|
47
|
+
| RSI | Relative Strength Index |
|
48
|
+
| SAR | Parabolic SAR |
|
49
|
+
| SMA | Simple Moving Average |
|
50
|
+
| STOCH | Stochastic Oscillator |
|
51
|
+
| STOCHF | Stochastic Fast |
|
52
|
+
| STOCHRSI | Stochastic Relative Strength Index |
|
53
|
+
| T3 | Triple Exponential Moving Average (T3) |
|
54
|
+
| TEMA | Triple Exponential Moving Average |
|
55
|
+
| TRANGE | True Range |
|
56
|
+
| TRIMA | Triangular Moving Average |
|
57
|
+
| TRIX | 1-day Rate of Change of a Triple Smooth EMA |
|
58
|
+
| ULTOSC | Ultimate Oscillator |
|
59
|
+
| VWAP | Volume Weighted Average Price |
|
60
|
+
| WILLR | Williams' %R |
|
61
|
+
| WMA | Weighted Moving Average |
|
62
|
+
|
data/lib/sqa/cli.rb
CHANGED
data/lib/sqa/config.rb
CHANGED
@@ -6,17 +6,18 @@
|
|
6
6
|
# config file ..... overrides envar
|
7
7
|
# command line parameters ...... overrides config file
|
8
8
|
|
9
|
-
require 'hashie'
|
10
|
-
require 'yaml'
|
11
|
-
require 'json'
|
12
|
-
require 'toml-rb'
|
13
|
-
|
14
9
|
|
15
10
|
module SQA
|
16
11
|
class Config < Hashie::Dash
|
17
12
|
include Hashie::Extensions::Dash::PropertyTranslation
|
18
13
|
include Hashie::Extensions::Coercion
|
19
|
-
|
14
|
+
|
15
|
+
# FIXME: Getting undefined error PredefinedValues
|
16
|
+
# I'm thinking that Ruby is dropping it from the ObjectSpace
|
17
|
+
# Looks like it is only used for the log level. Should
|
18
|
+
# able to work around that.
|
19
|
+
#
|
20
|
+
# include Hashie::Extensions::Dash::PredefinedValues
|
20
21
|
|
21
22
|
property :config_file #,a String filepath for the current config overriden by cli options
|
22
23
|
property :dump_config # a String filepath into which to dump the current config
|
@@ -4,12 +4,9 @@
|
|
4
4
|
# Using the Alpha Vantage JSON interface
|
5
5
|
#
|
6
6
|
|
7
|
-
require 'faraday'
|
8
|
-
require 'json'
|
9
7
|
|
10
|
-
class SQA::DataFrame
|
8
|
+
class SQA::DataFrame
|
11
9
|
class AlphaVantage
|
12
|
-
API_KEY = Nenv.av_api_key
|
13
10
|
CONNECTION = Faraday.new(url: 'https://www.alphavantage.co')
|
14
11
|
HEADERS = YahooFinance::HEADERS
|
15
12
|
|
@@ -26,47 +23,16 @@ class SQA::DataFrame < Daru::DataFrame
|
|
26
23
|
"volume" => HEADERS[6]
|
27
24
|
}
|
28
25
|
|
26
|
+
TRANSFORMERS = {
|
27
|
+
HEADERS[1] => -> (v) { v.to_f.round(3) },
|
28
|
+
HEADERS[2] => -> (v) { v.to_f.round(3) },
|
29
|
+
HEADERS[3] => -> (v) { v.to_f.round(3) },
|
30
|
+
HEADERS[4] => -> (v) { v.to_f.round(3) },
|
31
|
+
HEADERS[5] => -> (v) { v.to_f.round(3) },
|
32
|
+
HEADERS[6] => -> (v) { v.to_i }
|
33
|
+
}
|
29
34
|
|
30
35
|
################################################################
|
31
|
-
# Load a Dataframe from a csv file
|
32
|
-
def self.load(ticker, type="csv")
|
33
|
-
filepath = SQA.data_dir + "#{ticker}.#{type}"
|
34
|
-
|
35
|
-
if filepath.exist?
|
36
|
-
df = normalize_vector_names SQA::DataFrame.load(ticker, type)
|
37
|
-
else
|
38
|
-
df = recent(ticker, full: true)
|
39
|
-
df.send("to_#{type}",filepath)
|
40
|
-
end
|
41
|
-
|
42
|
-
df
|
43
|
-
end
|
44
|
-
|
45
|
-
|
46
|
-
# Normalize the vector (aka column) names as
|
47
|
-
# symbols using the standard names set by
|
48
|
-
# Yahoo Finance ... since it was the first one
|
49
|
-
# not because its anything special.
|
50
|
-
#
|
51
|
-
def self.normalize_vector_names(df)
|
52
|
-
headers = df.vectors.to_a
|
53
|
-
|
54
|
-
# convert vector names to symbols
|
55
|
-
# when they are strings. They become stings
|
56
|
-
# when the data frame is saved to a CSV file
|
57
|
-
# and then loaded back in.
|
58
|
-
|
59
|
-
if headers.first == HEADERS.first.to_s
|
60
|
-
a_hash = {}
|
61
|
-
HEADERS.each {|k| a_hash[k.to_s] = k}
|
62
|
-
df.rename_vectors(a_hash) # renames from String to Symbol
|
63
|
-
else
|
64
|
-
df.rename_vectors(HEADER_MAPPING)
|
65
|
-
end
|
66
|
-
|
67
|
-
df
|
68
|
-
end
|
69
|
-
|
70
36
|
|
71
37
|
# Get recent data from JSON API
|
72
38
|
#
|
@@ -84,7 +50,8 @@ class SQA::DataFrame < Daru::DataFrame
|
|
84
50
|
# and adding that to the data frame as if it were
|
85
51
|
# adjusted.
|
86
52
|
#
|
87
|
-
def self.recent(ticker, full: false)
|
53
|
+
def self.recent(ticker, full: false, from_date: nil)
|
54
|
+
|
88
55
|
# NOTE: Using the CSV format because the JSON format has
|
89
56
|
# really silly key values. The column names for the
|
90
57
|
# CSV format are much better.
|
@@ -92,7 +59,7 @@ class SQA::DataFrame < Daru::DataFrame
|
|
92
59
|
"/query?" +
|
93
60
|
"function=TIME_SERIES_DAILY&" +
|
94
61
|
"symbol=#{ticker.upcase}&" +
|
95
|
-
"apikey=#{
|
62
|
+
"apikey=#{SQA.av.key}&" +
|
96
63
|
"datatype=csv&" +
|
97
64
|
"outputsize=#{full ? 'full' : 'compact'}"
|
98
65
|
).to_hash
|
@@ -102,18 +69,19 @@ class SQA::DataFrame < Daru::DataFrame
|
|
102
69
|
end
|
103
70
|
|
104
71
|
raw = response[:body].split
|
105
|
-
|
106
72
|
headers = raw.shift.split(',')
|
73
|
+
|
107
74
|
headers[0] = 'date' # website returns "timestamp" but that
|
108
75
|
# has an unintended side-effect when
|
109
76
|
# the names are normalized.
|
77
|
+
# SMELL: IS THIS STILL TRUE?
|
110
78
|
|
111
79
|
close_inx = headers.size - 2
|
112
80
|
adj_close_inx = close_inx + 1
|
113
81
|
|
114
82
|
headers.insert(adj_close_inx, 'adjusted_close')
|
115
83
|
|
116
|
-
|
84
|
+
aofh = raw.map do |e|
|
117
85
|
e2 = e.split(',')
|
118
86
|
e2[1..-2] = e2[1..-2].map(&:to_f) # converting open, high, low, close
|
119
87
|
e2[-1] = e2[-1].to_i # converting volumn
|
@@ -121,35 +89,20 @@ class SQA::DataFrame < Daru::DataFrame
|
|
121
89
|
headers.zip(e2).to_h
|
122
90
|
end
|
123
91
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
92
|
+
if from_date
|
93
|
+
aofh.reject!{|e| Date.parse(e['date']) < from_date}
|
94
|
+
end
|
128
95
|
|
129
|
-
|
130
|
-
#
|
131
|
-
# base_df is ascending on timestamp
|
132
|
-
# update_df is descending on timestamp
|
133
|
-
#
|
134
|
-
# base_df content came from CSV file downloaded
|
135
|
-
# from Yahoo Finance.
|
136
|
-
#
|
137
|
-
# update_df came from scraping the webpage
|
138
|
-
# at Yahoo Finance for the recent history.
|
139
|
-
#
|
140
|
-
# Returns a combined DataFrame.
|
141
|
-
#
|
142
|
-
def self.append(base_df, updates_df)
|
143
|
-
last_timestamp = Date.parse base_df.timestamp.last
|
144
|
-
filtered_df = updates_df.filter_rows { |row| Date.parse(row[:timestamp]) > last_timestamp }
|
96
|
+
return nil if aofh.empty?
|
145
97
|
|
146
|
-
|
98
|
+
# ensure tha the data frame is
|
99
|
+
# always sorted oldest to newest.
|
147
100
|
|
148
|
-
|
149
|
-
|
101
|
+
if aofh.first['date'] > aofh.last['date']
|
102
|
+
aofh.reverse!
|
150
103
|
end
|
151
104
|
|
152
|
-
|
105
|
+
SQA::DataFrame.from_aofh(aofh, mapping: HEADER_MAPPING, transformers: TRANSFORMERS)
|
153
106
|
end
|
154
107
|
end
|
155
108
|
end
|
@@ -1,10 +1,8 @@
|
|
1
1
|
# lib/sqa/data_frame/yahoo_finance.rb
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
require 'faraday'
|
5
|
-
require 'nokogiri'
|
6
4
|
|
7
|
-
class SQA::DataFrame
|
5
|
+
class SQA::DataFrame
|
8
6
|
class YahooFinance
|
9
7
|
CONNECTION = Faraday.new(url: 'https://finance.yahoo.com')
|
10
8
|
HEADERS = [
|
@@ -32,21 +30,6 @@ class SQA::DataFrame < Daru::DataFrame
|
|
32
30
|
}
|
33
31
|
|
34
32
|
################################################################
|
35
|
-
def self.load(filename, options={}, &block)
|
36
|
-
df = SQA::DataFrame.load(filename, options={}, &block)
|
37
|
-
|
38
|
-
headers = df.vectors
|
39
|
-
|
40
|
-
if headers.first == HEADERS.first.to_s
|
41
|
-
a_hash = {}
|
42
|
-
HEADERS.each {|k| a_hash[k.to_s] = k}
|
43
|
-
df.rename_vectors(a_hash)
|
44
|
-
else
|
45
|
-
df.rename_vectors(HEADER_MAPPING)
|
46
|
-
end
|
47
|
-
|
48
|
-
df
|
49
|
-
end
|
50
33
|
|
51
34
|
|
52
35
|
# Scrape the Yahoo Finance website to get recent
|
@@ -64,7 +47,7 @@ class SQA::DataFrame < Daru::DataFrame
|
|
64
47
|
|
65
48
|
rows = table.css('tbody tr')
|
66
49
|
|
67
|
-
|
50
|
+
aofh = []
|
68
51
|
|
69
52
|
rows.each do |row|
|
70
53
|
cols = row.css('td').map{|c| c.children[0].text}
|
@@ -82,37 +65,10 @@ class SQA::DataFrame < Daru::DataFrame
|
|
82
65
|
cols[0] = Date.parse(cols[0]).to_s
|
83
66
|
cols[6] = cols[6].tr(',','').to_i
|
84
67
|
(1..5).each {|x| cols[x] = cols[x].to_f}
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
Daru::DataFrame.new(data)
|
89
|
-
end
|
90
|
-
|
91
|
-
|
92
|
-
# Append update_df rows to the base_df
|
93
|
-
#
|
94
|
-
# base_df is ascending on timestamp
|
95
|
-
# update_df is descending on timestamp
|
96
|
-
#
|
97
|
-
# base_df content came from CSV file downloaded
|
98
|
-
# from Yahoo Finance.
|
99
|
-
#
|
100
|
-
# update_df came from scraping the webpage
|
101
|
-
# at Yahoo Finance for the recent history.
|
102
|
-
#
|
103
|
-
# Returns a combined DataFrame.
|
104
|
-
#
|
105
|
-
def self.append(base_df, updates_df)
|
106
|
-
last_timestamp = Date.parse base_df.timestamp.last
|
107
|
-
filtered_df = updates_df.filter_rows { |row| Date.parse(row[:timestamp]) > last_timestamp }
|
108
|
-
|
109
|
-
last_inx = filtered_df.size - 1
|
110
|
-
|
111
|
-
(0..last_inx).each do |x|
|
112
|
-
base_df.add_row filtered_df.row[last_inx-x]
|
68
|
+
aofh << HEADERS.zip(cols).to_h
|
113
69
|
end
|
114
70
|
|
115
|
-
|
71
|
+
aofh
|
116
72
|
end
|
117
73
|
end
|
118
74
|
end
|
data/lib/sqa/data_frame.rb
CHANGED
@@ -1,52 +1,302 @@
|
|
1
1
|
# lib/sqa/data_frame.rb
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
5
|
-
require_relative 'data_frame/alpha_vantage'
|
4
|
+
require 'forwardable'
|
6
5
|
|
7
|
-
|
6
|
+
require_relative 'data_frame/yahoo_finance'
|
7
|
+
require_relative 'data_frame/alpha_vantage'
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
class SQA::DataFrame
|
10
|
+
class Data < Hashie::Mash
|
11
|
+
# SNELL: Are all of these needed?
|
12
|
+
include Hashie::Extensions::Mash::KeepOriginalKeys
|
13
|
+
# include Hashie::Extensions::Mash::PermissiveRespondTo
|
14
|
+
include Hashie::Extensions::Mash::SafeAssignment
|
15
|
+
include Hashie::Extensions::Mash::SymbolizeKeys
|
16
|
+
# include Hashie::Extensions::Mash::DefineAccessors
|
17
|
+
end
|
18
|
+
|
19
|
+
extend Forwardable
|
14
20
|
|
15
|
-
|
21
|
+
# @data is of class Data
|
22
|
+
attr_accessor :data
|
23
|
+
|
24
|
+
# Expects a Hash of Arrays (hofa)
|
25
|
+
def initialize(a_hash={})
|
26
|
+
@data = Data.new(a_hash)
|
27
|
+
end
|
16
28
|
|
17
|
-
writer << vectors.to_a if options[:headers]
|
18
29
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
30
|
+
def to_csv(path_to_file)
|
31
|
+
CSV.open(path_to_file, 'w') do |csv|
|
32
|
+
csv << keys
|
33
|
+
size.times do |x|
|
34
|
+
csv << row(x)
|
35
|
+
end
|
25
36
|
end
|
37
|
+
end
|
38
|
+
|
26
39
|
|
27
|
-
|
40
|
+
def to_json(path_to_file)
|
41
|
+
NotImplemented.raise
|
28
42
|
end
|
29
|
-
end
|
30
43
|
|
31
44
|
|
45
|
+
def to_aofh
|
46
|
+
NotImplemented.raise
|
47
|
+
end
|
32
48
|
|
33
49
|
|
34
|
-
|
50
|
+
def_delegator :@data, :to_h, :to_hofa
|
51
|
+
alias_method :to_h, :to_hofa
|
52
|
+
|
53
|
+
|
54
|
+
# The number of data rows
|
55
|
+
def size
|
56
|
+
data[@data.keys[0]].size
|
57
|
+
end
|
58
|
+
alias_method :nrows, :size
|
59
|
+
alias_method :length, :size
|
60
|
+
|
61
|
+
|
62
|
+
def_delegator :@data, :keys
|
63
|
+
alias_method :vectors, :keys
|
64
|
+
alias_method :columns, :keys
|
65
|
+
|
66
|
+
|
67
|
+
def ncols
|
68
|
+
keys.size
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def_delegator :@data, :values, :values
|
73
|
+
def_delegator :@data, :[], :[]
|
74
|
+
def_delegator :@data, :[]=, :[]=
|
75
|
+
|
76
|
+
|
77
|
+
def rows
|
78
|
+
result = []
|
79
|
+
(0..size - 1).each do |x|
|
80
|
+
entry = row(x)
|
81
|
+
result << entry
|
82
|
+
end
|
83
|
+
result
|
84
|
+
end
|
85
|
+
alias_method :to_a, :rows
|
86
|
+
|
87
|
+
|
88
|
+
def row(x)
|
89
|
+
if x.is_a?(Integer)
|
90
|
+
raise BadParameterError if x < 0 || x >= size
|
91
|
+
|
92
|
+
elsif x.is_a?(Hash)
|
93
|
+
raise BadParameterError, "x is #{x}" if x.size > 1
|
94
|
+
key = x.keys[0]
|
95
|
+
x = @data[key].index(x[key])
|
96
|
+
raise BadParameterError, 'Not Found #{x}' if x.nil?
|
97
|
+
return keys.zip(row(x)).to_h
|
35
98
|
|
36
|
-
#################################################
|
37
|
-
def self.load(ticker, type="csv", options={}, &block)
|
38
|
-
source = SQA.data_dir + "#{ticker}.#{type}"
|
39
|
-
|
40
|
-
if :csv == type
|
41
|
-
from_csv(source, options={}, &block)
|
42
|
-
elsif :json == type
|
43
|
-
from_json(source, options={}, &block)
|
44
|
-
elsif %i[txt dat].include?(type)
|
45
|
-
from_plaintext(source, options={}, &block)
|
46
|
-
elsif :xls == type
|
47
|
-
from_excel(source, options={}, &block)
|
48
99
|
else
|
49
|
-
raise
|
100
|
+
raise BadParameterError, "Unknown x.class: #{x.class}"
|
101
|
+
end
|
102
|
+
|
103
|
+
entry = []
|
104
|
+
|
105
|
+
keys.each do |key|
|
106
|
+
entry << @data[key][x]
|
107
|
+
end
|
108
|
+
|
109
|
+
entry
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
def append(new_df)
|
114
|
+
raise(BadParameterError, "Key mismatch") if keys != new_df.keys
|
115
|
+
|
116
|
+
keys.each do |key|
|
117
|
+
@data[key] += new_df[key]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
alias_method :concat, :append
|
121
|
+
|
122
|
+
|
123
|
+
# Creates a new instance with new keys
|
124
|
+
# based on the mapping hash where
|
125
|
+
# { old_key => new_key }
|
126
|
+
#
|
127
|
+
def rename(mapping)
|
128
|
+
SQA::DataFrame.new(
|
129
|
+
self.class.rename(
|
130
|
+
mapping,
|
131
|
+
@data.to_h
|
132
|
+
)
|
133
|
+
)
|
134
|
+
end
|
135
|
+
alias_method :rename_vectors, :rename
|
136
|
+
|
137
|
+
|
138
|
+
# Map the values of the vectors into different objects
|
139
|
+
# types is a Hash where the key is the vector name and
|
140
|
+
# the value is a proc
|
141
|
+
#
|
142
|
+
# For Example:
|
143
|
+
# {
|
144
|
+
# price: -> (v) {v.to_f.round(3)}
|
145
|
+
# }
|
146
|
+
#
|
147
|
+
def coerce_vectors(transformers)
|
148
|
+
transformers.each_pair do |key, transformer|
|
149
|
+
@data[key].map!{|v| transformer.call(v)}
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
def method_missing(method_name, *args, &block)
|
155
|
+
if @data.respond_to?(method_name)
|
156
|
+
self.class.send(:define_method, method_name) do |*method_args, &method_block|
|
157
|
+
@data.send(method_name, *method_args, &method_block)
|
158
|
+
end
|
159
|
+
send(method_name, *args, &block)
|
160
|
+
else
|
161
|
+
super
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
def respond_to_missing?(method_name, include_private = false)
|
167
|
+
@data.respond_to?(method_name) || super
|
168
|
+
end
|
169
|
+
|
170
|
+
#################################################
|
171
|
+
class << self
|
172
|
+
|
173
|
+
def append(base_df, other_df)
|
174
|
+
base_df.append(other_df)
|
175
|
+
end
|
176
|
+
|
177
|
+
|
178
|
+
# TODO: The Data class has its own load which also supports
|
179
|
+
# YAML by default. Maybe this method should
|
180
|
+
# make use of @data = Data.load(source)
|
181
|
+
#
|
182
|
+
def load(source:, mapping: {}, transformers:{})
|
183
|
+
file_type = source.extname[1..].downcase.to_sym
|
184
|
+
|
185
|
+
df = if :csv == file_type
|
186
|
+
from_csv_file(source, mapping: mapping, transformers: transformers)
|
187
|
+
elsif :json == file_type
|
188
|
+
from_json_file(source, mapping: mapping, transformers: transformers)
|
189
|
+
else
|
190
|
+
raise BadParameterError, "unsupported file type: #{file_type}"
|
191
|
+
end
|
192
|
+
|
193
|
+
unless transformers.empty?
|
194
|
+
df.coerce_vectors(transformers)
|
195
|
+
end
|
196
|
+
|
197
|
+
df
|
198
|
+
end
|
199
|
+
|
200
|
+
|
201
|
+
def from_aofh(aofh, mapping: {}, transformers: {})
|
202
|
+
new(
|
203
|
+
aofh_to_hofa(
|
204
|
+
aofh,
|
205
|
+
mapping: mapping,
|
206
|
+
transformers: transformers
|
207
|
+
)
|
208
|
+
)
|
209
|
+
end
|
210
|
+
|
211
|
+
|
212
|
+
def from_csv_file(source, mapping: {}, transformers: {})
|
213
|
+
aofh = []
|
214
|
+
|
215
|
+
CSV.foreach(source, headers: true) do |row|
|
216
|
+
aofh << row.to_h
|
217
|
+
end
|
218
|
+
|
219
|
+
from_aofh(aofh, mapping: mapping, transformers: transformers)
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
def from_json_file(source, mapping: {}, transformers: {})
|
224
|
+
aofh = JSON.parse(source.read)
|
225
|
+
|
226
|
+
from_aofh(aofh, mapping: mapping, transformers: transformers)
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
# aofh -- Array of Hashes
|
231
|
+
# hofa -- Hash of Arrays
|
232
|
+
def aofh_to_hofa(aofh, mapping: {}, transformers: {})
|
233
|
+
hofa = {}
|
234
|
+
keys = aofh.first.keys
|
235
|
+
|
236
|
+
keys.each do |key|
|
237
|
+
hofa[key] = []
|
238
|
+
end
|
239
|
+
|
240
|
+
aofh.each do |entry|
|
241
|
+
keys.each do |key|
|
242
|
+
hofa[key] << entry[key]
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# SMELL: This might be necessary
|
247
|
+
normalize_keys(hofa, adapter_mapping: mapping)
|
248
|
+
end
|
249
|
+
|
250
|
+
|
251
|
+
def normalize_keys(hofa, adapter_mapping: {})
|
252
|
+
hofa = rename(adapter_mapping, hofa)
|
253
|
+
mapping = generate_mapping(hofa.keys)
|
254
|
+
rename(mapping, hofa)
|
255
|
+
end
|
256
|
+
|
257
|
+
|
258
|
+
def rename(mapping, hofa)
|
259
|
+
mapping.each_pair do |old_key, new_key|
|
260
|
+
hofa[new_key] = hofa.delete(old_key)
|
261
|
+
end
|
262
|
+
|
263
|
+
hofa
|
264
|
+
end
|
265
|
+
|
266
|
+
|
267
|
+
def generate_mapping(keys)
|
268
|
+
mapping = {}
|
269
|
+
|
270
|
+
keys.each do |key|
|
271
|
+
mapping[key] = underscore_key(sanitize_key(key)) unless key.is_a?(Symbol)
|
272
|
+
end
|
273
|
+
|
274
|
+
mapping
|
275
|
+
end
|
276
|
+
|
277
|
+
|
278
|
+
# returns a snake_case Symbol
|
279
|
+
def underscore_key(key)
|
280
|
+
key.to_s.gsub(/::/, '/').
|
281
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
282
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
283
|
+
tr("-", "_").
|
284
|
+
downcase.to_sym
|
285
|
+
end
|
286
|
+
|
287
|
+
|
288
|
+
# removes punctuation and specal characters,
|
289
|
+
# replaces space with underscore.
|
290
|
+
def sanitize_key(key)
|
291
|
+
key.tr('.():/','').gsub(/^\d+.?\s/, "").tr(' ','_')
|
292
|
+
end
|
293
|
+
|
294
|
+
|
295
|
+
# returns true if key is in a date format
|
296
|
+
# like 2023-06-03
|
297
|
+
def is_date?(key)
|
298
|
+
!/(\d{4}-\d{2}-\d{2})/.match(key.to_s).nil?
|
50
299
|
end
|
51
300
|
end
|
52
301
|
end
|
302
|
+
|