sqa 0.0.14 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -0
- data/checksums/sqa-0.0.15.gem.sha512 +1 -0
- data/checksums/sqa-0.0.17.gem.sha512 +1 -0
- data/docs/alpha_vantage_technical_indicators.md +62 -0
- data/lib/sqa/cli.rb +0 -1
- data/lib/sqa/config.rb +7 -6
- data/lib/sqa/data_frame/alpha_vantage.rb +24 -71
- data/lib/sqa/data_frame/yahoo_finance.rb +4 -48
- data/lib/sqa/data_frame.rb +282 -32
- data/lib/sqa/errors.rb +27 -3
- data/lib/sqa/init.rb +51 -0
- data/lib/sqa/stock.rb +102 -45
- data/lib/sqa/strategy.rb +1 -1
- data/lib/sqa/version.rb +1 -4
- data/lib/sqa/web.rb +1 -1
- data/lib/sqa.rb +33 -54
- metadata +25 -71
- data/checksums/sqa-0.0.14.gem.sha512 +0 -1
- data/lib/patches/daru/category.rb +0 -19
- data/lib/patches/daru/data_frame.rb +0 -19
- data/lib/patches/daru/plotting/svg-graph/category.rb +0 -55
- data/lib/patches/daru/plotting/svg-graph/dataframe.rb +0 -105
- data/lib/patches/daru/plotting/svg-graph/vector.rb +0 -102
- data/lib/patches/daru/plotting/svg-graph.rb +0 -7
- data/lib/patches/daru/vector.rb +0 -19
- data/lib/patches/daru.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 911775606b7e0fa046261c5a9bc4d3be21ca9caf38c37011141f1393ac6e5063
|
4
|
+
data.tar.gz: d70ae996a39dbe7c386750286cb4cc437681d584945b31104e91b06c30b1600f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0074688f69947d20d5aae7c81b090a91fb4f06862084b2349b248ecdf2e17376f47d5a1acffbfb7b2700865cf25f5ff5ba054514d00ed88750f46224b4c9360e'
|
7
|
+
data.tar.gz: 38d1ebf511e9dfa2b87084ed77093e2fc03ccf6eeb6a00e42d1060acca82e135d0b4c0d36a25d9f9bc4a2dd9adf91749a6640aab14b304e33f24fee0c3eb7e15
|
data/README.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
**Replacing Daru** with Hashie::Mash
|
2
|
+
|
3
|
+
This is branch hashie_df
|
4
|
+
|
1
5
|
# SQA - Simple Qualitative Analysis
|
2
6
|
|
3
7
|
This is a very simplistic set of tools for running technical analysis on a stock portfolio. Simplistic means it is not reliable nor intended for any kind of financial use. Think of it as a training tool. I do. Its helping me understand why I need professional help from people who know what they are doing.
|
@@ -0,0 +1 @@
|
|
1
|
+
d4f3ab1bf26de034f0f044a5cab9d86e61221fc2d160056c1c24c166586e7ce72d90095ebea01965f5cabf989c4116e7409f8fc6749cabe7d5a13e34f87f4b96
|
@@ -0,0 +1 @@
|
|
1
|
+
2ee94a54d6ac3d13685dc9b91a2bae0fe75feab6148e1aa9a9d4096961b9b7b577b7ce9d1264f0cce260640515ddd86d5fd5fd2b66f49175844c903581ff6fd9
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Alpha Vantage
|
2
|
+
## Technical Indicators
|
3
|
+
|
4
|
+
The following technical indicators are available from Alpha Vantage
|
5
|
+
using a free API key.
|
6
|
+
|
7
|
+
| Acronym | Indicator Description |
|
8
|
+
|-----------|----------------------------------------------------------|
|
9
|
+
| AD | Accumulation/Distribution |
|
10
|
+
| ADOSC | Accumulation/Distribution Oscillator |
|
11
|
+
| ADX | Average Directional Index |
|
12
|
+
| ADXR | Average Directional Movement Rating |
|
13
|
+
| APO | Absolute Price Oscillator |
|
14
|
+
| AROON | Aroon Indicator |
|
15
|
+
| AROONOSC | Aroon Oscillator |
|
16
|
+
| ATR | Average True Range |
|
17
|
+
| BBANDS | Bollinger Bands |
|
18
|
+
| BOP | Balance of Power |
|
19
|
+
| CCI | Commodity Channel Index |
|
20
|
+
| CMO | Chande Momentum Oscillator |
|
21
|
+
| DEMA | Double Exponential Moving Average |
|
22
|
+
| DX | Directional Movement Index |
|
23
|
+
| EMA | Exponential Moving Average |
|
24
|
+
| HT_DCPERIOD | Hilbert Transform - Dominant Cycle Period |
|
25
|
+
| HT_DCPHASE | Hilbert Transform - Dominant Cycle Phase |
|
26
|
+
| HT_PHASOR | Hilbert Transform - Phasor Components |
|
27
|
+
| HT_SINE | Hilbert Transform - SineWave |
|
28
|
+
| HT_TRENDLINE | Hilbert Transform - Instantaneous Trendline |
|
29
|
+
| HT_TRENDMODE | Hilbert Transform - Trend vs Cycle Mode |
|
30
|
+
| KAMA | Kaufman Adaptive Moving Average |
|
31
|
+
| MACD | Moving Average Convergence Divergence |
|
32
|
+
| MACDEXT | MACD with controllable MA type |
|
33
|
+
| MAMA | MESA Adaptive Moving Average |
|
34
|
+
| MFI | Money Flow Index |
|
35
|
+
| MIDPOINT | MidPoint over period |
|
36
|
+
| MIDPRICE | Midpoint Price over period |
|
37
|
+
| MINUS_DI | Minus Directional Indicator |
|
38
|
+
| MINUS_DM | Minus Directional Movement |
|
39
|
+
| MOM | Momentum |
|
40
|
+
| NATR | Normalized Average True Range |
|
41
|
+
| OBV | On Balance Volume |
|
42
|
+
| PLUS_DI | Plus Directional Indicator |
|
43
|
+
| PLUS_DM | Plus Directional Movement |
|
44
|
+
| PPO | Percentage Price Oscillator |
|
45
|
+
| ROC | Rate of Change |
|
46
|
+
| ROCR | Rate of Change Ratio |
|
47
|
+
| RSI | Relative Strength Index |
|
48
|
+
| SAR | Parabolic SAR |
|
49
|
+
| SMA | Simple Moving Average |
|
50
|
+
| STOCH | Stochastic Oscillator |
|
51
|
+
| STOCHF | Stochastic Fast |
|
52
|
+
| STOCHRSI | Stochastic Relative Strength Index |
|
53
|
+
| T3 | Triple Exponential Moving Average (T3) |
|
54
|
+
| TEMA | Triple Exponential Moving Average |
|
55
|
+
| TRANGE | True Range |
|
56
|
+
| TRIMA | Triangular Moving Average |
|
57
|
+
| TRIX | 1-day Rate of Change of a Triple Smooth EMA |
|
58
|
+
| ULTOSC | Ultimate Oscillator |
|
59
|
+
| VWAP | Volume Weighted Average Price |
|
60
|
+
| WILLR | Williams' %R |
|
61
|
+
| WMA | Weighted Moving Average |
|
62
|
+
|
data/lib/sqa/cli.rb
CHANGED
data/lib/sqa/config.rb
CHANGED
@@ -6,17 +6,18 @@
|
|
6
6
|
# config file ..... overrides envar
|
7
7
|
# command line parameters ...... overrides config file
|
8
8
|
|
9
|
-
require 'hashie'
|
10
|
-
require 'yaml'
|
11
|
-
require 'json'
|
12
|
-
require 'toml-rb'
|
13
|
-
|
14
9
|
|
15
10
|
module SQA
|
16
11
|
class Config < Hashie::Dash
|
17
12
|
include Hashie::Extensions::Dash::PropertyTranslation
|
18
13
|
include Hashie::Extensions::Coercion
|
19
|
-
|
14
|
+
|
15
|
+
# FIXME: Getting undefined error PredefinedValues
|
16
|
+
# I'm thinking that Ruby is dropping it from the ObjectSpace
|
17
|
+
# Looks like it is only used for the log level. Should
|
18
|
+
# able to work around that.
|
19
|
+
#
|
20
|
+
# include Hashie::Extensions::Dash::PredefinedValues
|
20
21
|
|
21
22
|
property :config_file #,a String filepath for the current config overriden by cli options
|
22
23
|
property :dump_config # a String filepath into which to dump the current config
|
@@ -4,12 +4,9 @@
|
|
4
4
|
# Using the Alpha Vantage JSON interface
|
5
5
|
#
|
6
6
|
|
7
|
-
require 'faraday'
|
8
|
-
require 'json'
|
9
7
|
|
10
|
-
class SQA::DataFrame
|
8
|
+
class SQA::DataFrame
|
11
9
|
class AlphaVantage
|
12
|
-
API_KEY = Nenv.av_api_key
|
13
10
|
CONNECTION = Faraday.new(url: 'https://www.alphavantage.co')
|
14
11
|
HEADERS = YahooFinance::HEADERS
|
15
12
|
|
@@ -26,47 +23,16 @@ class SQA::DataFrame < Daru::DataFrame
|
|
26
23
|
"volume" => HEADERS[6]
|
27
24
|
}
|
28
25
|
|
26
|
+
TRANSFORMERS = {
|
27
|
+
HEADERS[1] => -> (v) { v.to_f.round(3) },
|
28
|
+
HEADERS[2] => -> (v) { v.to_f.round(3) },
|
29
|
+
HEADERS[3] => -> (v) { v.to_f.round(3) },
|
30
|
+
HEADERS[4] => -> (v) { v.to_f.round(3) },
|
31
|
+
HEADERS[5] => -> (v) { v.to_f.round(3) },
|
32
|
+
HEADERS[6] => -> (v) { v.to_i }
|
33
|
+
}
|
29
34
|
|
30
35
|
################################################################
|
31
|
-
# Load a Dataframe from a csv file
|
32
|
-
def self.load(ticker, type="csv")
|
33
|
-
filepath = SQA.data_dir + "#{ticker}.#{type}"
|
34
|
-
|
35
|
-
if filepath.exist?
|
36
|
-
df = normalize_vector_names SQA::DataFrame.load(ticker, type)
|
37
|
-
else
|
38
|
-
df = recent(ticker, full: true)
|
39
|
-
df.send("to_#{type}",filepath)
|
40
|
-
end
|
41
|
-
|
42
|
-
df
|
43
|
-
end
|
44
|
-
|
45
|
-
|
46
|
-
# Normalize the vector (aka column) names as
|
47
|
-
# symbols using the standard names set by
|
48
|
-
# Yahoo Finance ... since it was the first one
|
49
|
-
# not because its anything special.
|
50
|
-
#
|
51
|
-
def self.normalize_vector_names(df)
|
52
|
-
headers = df.vectors.to_a
|
53
|
-
|
54
|
-
# convert vector names to symbols
|
55
|
-
# when they are strings. They become stings
|
56
|
-
# when the data frame is saved to a CSV file
|
57
|
-
# and then loaded back in.
|
58
|
-
|
59
|
-
if headers.first == HEADERS.first.to_s
|
60
|
-
a_hash = {}
|
61
|
-
HEADERS.each {|k| a_hash[k.to_s] = k}
|
62
|
-
df.rename_vectors(a_hash) # renames from String to Symbol
|
63
|
-
else
|
64
|
-
df.rename_vectors(HEADER_MAPPING)
|
65
|
-
end
|
66
|
-
|
67
|
-
df
|
68
|
-
end
|
69
|
-
|
70
36
|
|
71
37
|
# Get recent data from JSON API
|
72
38
|
#
|
@@ -84,7 +50,8 @@ class SQA::DataFrame < Daru::DataFrame
|
|
84
50
|
# and adding that to the data frame as if it were
|
85
51
|
# adjusted.
|
86
52
|
#
|
87
|
-
def self.recent(ticker, full: false)
|
53
|
+
def self.recent(ticker, full: false, from_date: nil)
|
54
|
+
|
88
55
|
# NOTE: Using the CSV format because the JSON format has
|
89
56
|
# really silly key values. The column names for the
|
90
57
|
# CSV format are much better.
|
@@ -92,7 +59,7 @@ class SQA::DataFrame < Daru::DataFrame
|
|
92
59
|
"/query?" +
|
93
60
|
"function=TIME_SERIES_DAILY&" +
|
94
61
|
"symbol=#{ticker.upcase}&" +
|
95
|
-
"apikey=#{
|
62
|
+
"apikey=#{SQA.av.key}&" +
|
96
63
|
"datatype=csv&" +
|
97
64
|
"outputsize=#{full ? 'full' : 'compact'}"
|
98
65
|
).to_hash
|
@@ -102,18 +69,19 @@ class SQA::DataFrame < Daru::DataFrame
|
|
102
69
|
end
|
103
70
|
|
104
71
|
raw = response[:body].split
|
105
|
-
|
106
72
|
headers = raw.shift.split(',')
|
73
|
+
|
107
74
|
headers[0] = 'date' # website returns "timestamp" but that
|
108
75
|
# has an unintended side-effect when
|
109
76
|
# the names are normalized.
|
77
|
+
# SMELL: IS THIS STILL TRUE?
|
110
78
|
|
111
79
|
close_inx = headers.size - 2
|
112
80
|
adj_close_inx = close_inx + 1
|
113
81
|
|
114
82
|
headers.insert(adj_close_inx, 'adjusted_close')
|
115
83
|
|
116
|
-
|
84
|
+
aofh = raw.map do |e|
|
117
85
|
e2 = e.split(',')
|
118
86
|
e2[1..-2] = e2[1..-2].map(&:to_f) # converting open, high, low, close
|
119
87
|
e2[-1] = e2[-1].to_i # converting volumn
|
@@ -121,35 +89,20 @@ class SQA::DataFrame < Daru::DataFrame
|
|
121
89
|
headers.zip(e2).to_h
|
122
90
|
end
|
123
91
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
92
|
+
if from_date
|
93
|
+
aofh.reject!{|e| Date.parse(e['date']) < from_date}
|
94
|
+
end
|
128
95
|
|
129
|
-
|
130
|
-
#
|
131
|
-
# base_df is ascending on timestamp
|
132
|
-
# update_df is descending on timestamp
|
133
|
-
#
|
134
|
-
# base_df content came from CSV file downloaded
|
135
|
-
# from Yahoo Finance.
|
136
|
-
#
|
137
|
-
# update_df came from scraping the webpage
|
138
|
-
# at Yahoo Finance for the recent history.
|
139
|
-
#
|
140
|
-
# Returns a combined DataFrame.
|
141
|
-
#
|
142
|
-
def self.append(base_df, updates_df)
|
143
|
-
last_timestamp = Date.parse base_df.timestamp.last
|
144
|
-
filtered_df = updates_df.filter_rows { |row| Date.parse(row[:timestamp]) > last_timestamp }
|
96
|
+
return nil if aofh.empty?
|
145
97
|
|
146
|
-
|
98
|
+
# ensure tha the data frame is
|
99
|
+
# always sorted oldest to newest.
|
147
100
|
|
148
|
-
|
149
|
-
|
101
|
+
if aofh.first['date'] > aofh.last['date']
|
102
|
+
aofh.reverse!
|
150
103
|
end
|
151
104
|
|
152
|
-
|
105
|
+
SQA::DataFrame.from_aofh(aofh, mapping: HEADER_MAPPING, transformers: TRANSFORMERS)
|
153
106
|
end
|
154
107
|
end
|
155
108
|
end
|
@@ -1,10 +1,8 @@
|
|
1
1
|
# lib/sqa/data_frame/yahoo_finance.rb
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
require 'faraday'
|
5
|
-
require 'nokogiri'
|
6
4
|
|
7
|
-
class SQA::DataFrame
|
5
|
+
class SQA::DataFrame
|
8
6
|
class YahooFinance
|
9
7
|
CONNECTION = Faraday.new(url: 'https://finance.yahoo.com')
|
10
8
|
HEADERS = [
|
@@ -32,21 +30,6 @@ class SQA::DataFrame < Daru::DataFrame
|
|
32
30
|
}
|
33
31
|
|
34
32
|
################################################################
|
35
|
-
def self.load(filename, options={}, &block)
|
36
|
-
df = SQA::DataFrame.load(filename, options={}, &block)
|
37
|
-
|
38
|
-
headers = df.vectors
|
39
|
-
|
40
|
-
if headers.first == HEADERS.first.to_s
|
41
|
-
a_hash = {}
|
42
|
-
HEADERS.each {|k| a_hash[k.to_s] = k}
|
43
|
-
df.rename_vectors(a_hash)
|
44
|
-
else
|
45
|
-
df.rename_vectors(HEADER_MAPPING)
|
46
|
-
end
|
47
|
-
|
48
|
-
df
|
49
|
-
end
|
50
33
|
|
51
34
|
|
52
35
|
# Scrape the Yahoo Finance website to get recent
|
@@ -64,7 +47,7 @@ class SQA::DataFrame < Daru::DataFrame
|
|
64
47
|
|
65
48
|
rows = table.css('tbody tr')
|
66
49
|
|
67
|
-
|
50
|
+
aofh = []
|
68
51
|
|
69
52
|
rows.each do |row|
|
70
53
|
cols = row.css('td').map{|c| c.children[0].text}
|
@@ -82,37 +65,10 @@ class SQA::DataFrame < Daru::DataFrame
|
|
82
65
|
cols[0] = Date.parse(cols[0]).to_s
|
83
66
|
cols[6] = cols[6].tr(',','').to_i
|
84
67
|
(1..5).each {|x| cols[x] = cols[x].to_f}
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
Daru::DataFrame.new(data)
|
89
|
-
end
|
90
|
-
|
91
|
-
|
92
|
-
# Append update_df rows to the base_df
|
93
|
-
#
|
94
|
-
# base_df is ascending on timestamp
|
95
|
-
# update_df is descending on timestamp
|
96
|
-
#
|
97
|
-
# base_df content came from CSV file downloaded
|
98
|
-
# from Yahoo Finance.
|
99
|
-
#
|
100
|
-
# update_df came from scraping the webpage
|
101
|
-
# at Yahoo Finance for the recent history.
|
102
|
-
#
|
103
|
-
# Returns a combined DataFrame.
|
104
|
-
#
|
105
|
-
def self.append(base_df, updates_df)
|
106
|
-
last_timestamp = Date.parse base_df.timestamp.last
|
107
|
-
filtered_df = updates_df.filter_rows { |row| Date.parse(row[:timestamp]) > last_timestamp }
|
108
|
-
|
109
|
-
last_inx = filtered_df.size - 1
|
110
|
-
|
111
|
-
(0..last_inx).each do |x|
|
112
|
-
base_df.add_row filtered_df.row[last_inx-x]
|
68
|
+
aofh << HEADERS.zip(cols).to_h
|
113
69
|
end
|
114
70
|
|
115
|
-
|
71
|
+
aofh
|
116
72
|
end
|
117
73
|
end
|
118
74
|
end
|
data/lib/sqa/data_frame.rb
CHANGED
@@ -1,52 +1,302 @@
|
|
1
1
|
# lib/sqa/data_frame.rb
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
5
|
-
require_relative 'data_frame/alpha_vantage'
|
4
|
+
require 'forwardable'
|
6
5
|
|
7
|
-
|
6
|
+
require_relative 'data_frame/yahoo_finance'
|
7
|
+
require_relative 'data_frame/alpha_vantage'
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
class SQA::DataFrame
|
10
|
+
class Data < Hashie::Mash
|
11
|
+
# SNELL: Are all of these needed?
|
12
|
+
include Hashie::Extensions::Mash::KeepOriginalKeys
|
13
|
+
# include Hashie::Extensions::Mash::PermissiveRespondTo
|
14
|
+
include Hashie::Extensions::Mash::SafeAssignment
|
15
|
+
include Hashie::Extensions::Mash::SymbolizeKeys
|
16
|
+
# include Hashie::Extensions::Mash::DefineAccessors
|
17
|
+
end
|
18
|
+
|
19
|
+
extend Forwardable
|
14
20
|
|
15
|
-
|
21
|
+
# @data is of class Data
|
22
|
+
attr_accessor :data
|
23
|
+
|
24
|
+
# Expects a Hash of Arrays (hofa)
|
25
|
+
def initialize(a_hash={})
|
26
|
+
@data = Data.new(a_hash)
|
27
|
+
end
|
16
28
|
|
17
|
-
writer << vectors.to_a if options[:headers]
|
18
29
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
30
|
+
def to_csv(path_to_file)
|
31
|
+
CSV.open(path_to_file, 'w') do |csv|
|
32
|
+
csv << keys
|
33
|
+
size.times do |x|
|
34
|
+
csv << row(x)
|
35
|
+
end
|
25
36
|
end
|
37
|
+
end
|
38
|
+
|
26
39
|
|
27
|
-
|
40
|
+
def to_json(path_to_file)
|
41
|
+
NotImplemented.raise
|
28
42
|
end
|
29
|
-
end
|
30
43
|
|
31
44
|
|
45
|
+
def to_aofh
|
46
|
+
NotImplemented.raise
|
47
|
+
end
|
32
48
|
|
33
49
|
|
34
|
-
|
50
|
+
def_delegator :@data, :to_h, :to_hofa
|
51
|
+
alias_method :to_h, :to_hofa
|
52
|
+
|
53
|
+
|
54
|
+
# The number of data rows
|
55
|
+
def size
|
56
|
+
data[@data.keys[0]].size
|
57
|
+
end
|
58
|
+
alias_method :nrows, :size
|
59
|
+
alias_method :length, :size
|
60
|
+
|
61
|
+
|
62
|
+
def_delegator :@data, :keys
|
63
|
+
alias_method :vectors, :keys
|
64
|
+
alias_method :columns, :keys
|
65
|
+
|
66
|
+
|
67
|
+
def ncols
|
68
|
+
keys.size
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def_delegator :@data, :values, :values
|
73
|
+
def_delegator :@data, :[], :[]
|
74
|
+
def_delegator :@data, :[]=, :[]=
|
75
|
+
|
76
|
+
|
77
|
+
def rows
|
78
|
+
result = []
|
79
|
+
(0..size - 1).each do |x|
|
80
|
+
entry = row(x)
|
81
|
+
result << entry
|
82
|
+
end
|
83
|
+
result
|
84
|
+
end
|
85
|
+
alias_method :to_a, :rows
|
86
|
+
|
87
|
+
|
88
|
+
def row(x)
|
89
|
+
if x.is_a?(Integer)
|
90
|
+
raise BadParameterError if x < 0 || x >= size
|
91
|
+
|
92
|
+
elsif x.is_a?(Hash)
|
93
|
+
raise BadParameterError, "x is #{x}" if x.size > 1
|
94
|
+
key = x.keys[0]
|
95
|
+
x = @data[key].index(x[key])
|
96
|
+
raise BadParameterError, 'Not Found #{x}' if x.nil?
|
97
|
+
return keys.zip(row(x)).to_h
|
35
98
|
|
36
|
-
#################################################
|
37
|
-
def self.load(ticker, type="csv", options={}, &block)
|
38
|
-
source = SQA.data_dir + "#{ticker}.#{type}"
|
39
|
-
|
40
|
-
if :csv == type
|
41
|
-
from_csv(source, options={}, &block)
|
42
|
-
elsif :json == type
|
43
|
-
from_json(source, options={}, &block)
|
44
|
-
elsif %i[txt dat].include?(type)
|
45
|
-
from_plaintext(source, options={}, &block)
|
46
|
-
elsif :xls == type
|
47
|
-
from_excel(source, options={}, &block)
|
48
99
|
else
|
49
|
-
raise
|
100
|
+
raise BadParameterError, "Unknown x.class: #{x.class}"
|
101
|
+
end
|
102
|
+
|
103
|
+
entry = []
|
104
|
+
|
105
|
+
keys.each do |key|
|
106
|
+
entry << @data[key][x]
|
107
|
+
end
|
108
|
+
|
109
|
+
entry
|
110
|
+
end
|
111
|
+
|
112
|
+
|
113
|
+
def append(new_df)
|
114
|
+
raise(BadParameterError, "Key mismatch") if keys != new_df.keys
|
115
|
+
|
116
|
+
keys.each do |key|
|
117
|
+
@data[key] += new_df[key]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
alias_method :concat, :append
|
121
|
+
|
122
|
+
|
123
|
+
# Creates a new instance with new keys
|
124
|
+
# based on the mapping hash where
|
125
|
+
# { old_key => new_key }
|
126
|
+
#
|
127
|
+
def rename(mapping)
|
128
|
+
SQA::DataFrame.new(
|
129
|
+
self.class.rename(
|
130
|
+
mapping,
|
131
|
+
@data.to_h
|
132
|
+
)
|
133
|
+
)
|
134
|
+
end
|
135
|
+
alias_method :rename_vectors, :rename
|
136
|
+
|
137
|
+
|
138
|
+
# Map the values of the vectors into different objects
|
139
|
+
# types is a Hash where the key is the vector name and
|
140
|
+
# the value is a proc
|
141
|
+
#
|
142
|
+
# For Example:
|
143
|
+
# {
|
144
|
+
# price: -> (v) {v.to_f.round(3)}
|
145
|
+
# }
|
146
|
+
#
|
147
|
+
def coerce_vectors(transformers)
|
148
|
+
transformers.each_pair do |key, transformer|
|
149
|
+
@data[key].map!{|v| transformer.call(v)}
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
def method_missing(method_name, *args, &block)
|
155
|
+
if @data.respond_to?(method_name)
|
156
|
+
self.class.send(:define_method, method_name) do |*method_args, &method_block|
|
157
|
+
@data.send(method_name, *method_args, &method_block)
|
158
|
+
end
|
159
|
+
send(method_name, *args, &block)
|
160
|
+
else
|
161
|
+
super
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
def respond_to_missing?(method_name, include_private = false)
|
167
|
+
@data.respond_to?(method_name) || super
|
168
|
+
end
|
169
|
+
|
170
|
+
#################################################
|
171
|
+
class << self
|
172
|
+
|
173
|
+
def append(base_df, other_df)
|
174
|
+
base_df.append(other_df)
|
175
|
+
end
|
176
|
+
|
177
|
+
|
178
|
+
# TODO: The Data class has its own load which also supports
|
179
|
+
# YAML by default. Maybe this method should
|
180
|
+
# make use of @data = Data.load(source)
|
181
|
+
#
|
182
|
+
def load(source:, mapping: {}, transformers:{})
|
183
|
+
file_type = source.extname[1..].downcase.to_sym
|
184
|
+
|
185
|
+
df = if :csv == file_type
|
186
|
+
from_csv_file(source, mapping: mapping, transformers: transformers)
|
187
|
+
elsif :json == file_type
|
188
|
+
from_json_file(source, mapping: mapping, transformers: transformers)
|
189
|
+
else
|
190
|
+
raise BadParameterError, "unsupported file type: #{file_type}"
|
191
|
+
end
|
192
|
+
|
193
|
+
unless transformers.empty?
|
194
|
+
df.coerce_vectors(transformers)
|
195
|
+
end
|
196
|
+
|
197
|
+
df
|
198
|
+
end
|
199
|
+
|
200
|
+
|
201
|
+
def from_aofh(aofh, mapping: {}, transformers: {})
|
202
|
+
new(
|
203
|
+
aofh_to_hofa(
|
204
|
+
aofh,
|
205
|
+
mapping: mapping,
|
206
|
+
transformers: transformers
|
207
|
+
)
|
208
|
+
)
|
209
|
+
end
|
210
|
+
|
211
|
+
|
212
|
+
def from_csv_file(source, mapping: {}, transformers: {})
|
213
|
+
aofh = []
|
214
|
+
|
215
|
+
CSV.foreach(source, headers: true) do |row|
|
216
|
+
aofh << row.to_h
|
217
|
+
end
|
218
|
+
|
219
|
+
from_aofh(aofh, mapping: mapping, transformers: transformers)
|
220
|
+
end
|
221
|
+
|
222
|
+
|
223
|
+
def from_json_file(source, mapping: {}, transformers: {})
|
224
|
+
aofh = JSON.parse(source.read)
|
225
|
+
|
226
|
+
from_aofh(aofh, mapping: mapping, transformers: transformers)
|
227
|
+
end
|
228
|
+
|
229
|
+
|
230
|
+
# aofh -- Array of Hashes
|
231
|
+
# hofa -- Hash of Arrays
|
232
|
+
def aofh_to_hofa(aofh, mapping: {}, transformers: {})
|
233
|
+
hofa = {}
|
234
|
+
keys = aofh.first.keys
|
235
|
+
|
236
|
+
keys.each do |key|
|
237
|
+
hofa[key] = []
|
238
|
+
end
|
239
|
+
|
240
|
+
aofh.each do |entry|
|
241
|
+
keys.each do |key|
|
242
|
+
hofa[key] << entry[key]
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
# SMELL: This might be necessary
|
247
|
+
normalize_keys(hofa, adapter_mapping: mapping)
|
248
|
+
end
|
249
|
+
|
250
|
+
|
251
|
+
def normalize_keys(hofa, adapter_mapping: {})
|
252
|
+
hofa = rename(adapter_mapping, hofa)
|
253
|
+
mapping = generate_mapping(hofa.keys)
|
254
|
+
rename(mapping, hofa)
|
255
|
+
end
|
256
|
+
|
257
|
+
|
258
|
+
def rename(mapping, hofa)
|
259
|
+
mapping.each_pair do |old_key, new_key|
|
260
|
+
hofa[new_key] = hofa.delete(old_key)
|
261
|
+
end
|
262
|
+
|
263
|
+
hofa
|
264
|
+
end
|
265
|
+
|
266
|
+
|
267
|
+
def generate_mapping(keys)
|
268
|
+
mapping = {}
|
269
|
+
|
270
|
+
keys.each do |key|
|
271
|
+
mapping[key] = underscore_key(sanitize_key(key)) unless key.is_a?(Symbol)
|
272
|
+
end
|
273
|
+
|
274
|
+
mapping
|
275
|
+
end
|
276
|
+
|
277
|
+
|
278
|
+
# returns a snake_case Symbol
|
279
|
+
def underscore_key(key)
|
280
|
+
key.to_s.gsub(/::/, '/').
|
281
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
282
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
283
|
+
tr("-", "_").
|
284
|
+
downcase.to_sym
|
285
|
+
end
|
286
|
+
|
287
|
+
|
288
|
+
# removes punctuation and specal characters,
|
289
|
+
# replaces space with underscore.
|
290
|
+
def sanitize_key(key)
|
291
|
+
key.tr('.():/','').gsub(/^\d+.?\s/, "").tr(' ','_')
|
292
|
+
end
|
293
|
+
|
294
|
+
|
295
|
+
# returns true if key is in a date format
|
296
|
+
# like 2023-06-03
|
297
|
+
def is_date?(key)
|
298
|
+
!/(\d{4}-\d{2}-\d{2})/.match(key.to_s).nil?
|
50
299
|
end
|
51
300
|
end
|
52
301
|
end
|
302
|
+
|