bardata 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bardata/__init__.py ADDED
@@ -0,0 +1,9 @@
1
+ """
2
+ bardata package
3
+
4
+ basic bar data manipulation routines based on pandas
5
+ """
6
+
7
+ from .tickers import get_tickers
8
+ from .prices import get_prices, price_engine
9
+
bardata/dates.py ADDED
@@ -0,0 +1,113 @@
1
+ """ date utils """
2
+
3
+ import re
4
+ import datetime
5
+
6
+ from dateutil.tz import gettz
7
+ from dateutil.relativedelta import relativedelta
8
+
9
+ DEFAULT_CLOSING = 1700
10
+
11
+ NEW_YORK = gettz('America/New_York')
12
+
13
+
14
+ def last_busday(date):
15
+ """ last business day as of given date, backtracks on weekends """
16
+
17
+ weekend = max(date.weekday() - 4, 0)
18
+ if weekend > 0:
19
+ date -= datetime.timedelta(days=weekend)
20
+
21
+ return date
22
+
23
+
24
+ def add_busdays(date, nbdays=0):
25
+ """ adds number of business days to given date, backtracks on weekends """
26
+
27
+ offset = date.weekday() # offset to monday (backward)
28
+ nbdays = nbdays + min(offset, 5) # add offset back (forward)
29
+ delta = (nbdays // 5) * 7 + (nbdays % 5) - offset
30
+ date += datetime.timedelta(days=delta)
31
+
32
+ return date
33
+
34
+
35
+ def last_busday_close(asof: datetime.datetime = None, *,
36
+ closing_time: int = None, tzinfo: str = None,
37
+ days_back: int = 0):
38
+ """ last business day close for specified closing time, backtracks on weekends """
39
+
40
+ if closing_time is None:
41
+ closing_time = DEFAULT_CLOSING
42
+
43
+ if tzinfo is None:
44
+ tzinfo = NEW_YORK
45
+
46
+ if isinstance(tzinfo, str):
47
+ tzinfo = gettz(tzinfo)
48
+
49
+ if isinstance(closing_time, int):
50
+ hour, minute = int(closing_time / 100), (closing_time % 100)
51
+ closing_time = datetime.time(hour=hour, minute=minute)
52
+
53
+ if not isinstance(closing_time, datetime.time):
54
+ raise ValueError(f"Expected an int or time, got {closing_time!r}")
55
+
56
+ # keep closing_time a naive time
57
+ if closing_time.tzinfo is not None:
58
+ tzinfo = closing_time.tzinfo
59
+ closing_time.replace(tzinfo=None)
60
+
61
+ if asof is None:
62
+ asof = datetime.datetime.now(tzinfo)
63
+
64
+ if not isinstance(asof, datetime.datetime):
65
+ raise ValueError(f"Expected a datetime value {asof}")
66
+
67
+ if tzinfo:
68
+ asof = asof.astimezone(tzinfo)
69
+
70
+ if closing_time and asof.time() < closing_time:
71
+ days_back += 1
72
+
73
+ date = asof.date()
74
+
75
+ if days_back:
76
+ date -= datetime.timedelta(days=days_back)
77
+
78
+ # backtrack to last business day
79
+ weekend = max(date.weekday() - 4, 0)
80
+ if weekend > 0:
81
+ date -= datetime.timedelta(days=weekend)
82
+
83
+ time = closing_time or datetime.time(0)
84
+
85
+ result = datetime.datetime.combine(date, time, tzinfo)
86
+
87
+ return result
88
+
89
+
90
+ def quick_timedelta(period: str) -> relativedelta:
91
+ """ relativedelta from a period pecification string like 1D, 1W, 1M, 4H, 300T etc ... """
92
+
93
+ if match := re.fullmatch(r"(\d+)(\w)", period):
94
+ count, freq = int(match.group(1)), match.group(2)
95
+ else:
96
+ raise ValueError(f"Invalid period {period!r}")
97
+
98
+ if freq in "D":
99
+ kwargs = relativedelta(days=count)
100
+ elif freq in "W":
101
+ kwargs = dict(weeks=count)
102
+ elif freq == "M":
103
+ kwargs = dict(months=count)
104
+ elif freq == "Y":
105
+ kwargs = dict(months=count * 12)
106
+ elif freq == "H":
107
+ kwargs = dict(hours=count)
108
+ elif freq == "T":
109
+ kwargs = dict(minutes=count)
110
+ else:
111
+ raise ValueError(f"Invalid period {period!r}")
112
+
113
+ return relativedelta(**kwargs)
bardata/errors.py ADDED
@@ -0,0 +1,6 @@
1
+ """ exceptions """
2
+
3
+
4
+ class DataNotFoundError(Exception):
5
+ """ Data Not Found Error """
6
+ pass
bardata/freqs.py ADDED
@@ -0,0 +1,48 @@
1
+ """ frequency utils """
2
+
3
+ # see https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
4
+
5
+ import re
6
+
7
+ PANDAS_FREQ = dict(day='B', week='W', month='M', year='Y', hour='H', minute='T')
8
+
9
+
10
+ def split_frequency(freq: str) -> str:
11
+ """ split freq string into count and frequency (long name) """
12
+
13
+ if match := re.fullmatch(r"(\d+)(\w+)", freq):
14
+ count, freq = int(match.group(1)), match.group(2)
15
+ elif match := re.fullmatch(r"(\w+)", freq):
16
+ count, freq = 1, match.group(1)
17
+ else:
18
+ raise ValueError(f"Invalid freq {freq!r}")
19
+
20
+ if freq in ('D', 'day', 'daily'):
21
+ freq = 'day'
22
+ elif freq in ('W', 'week', 'weekly'):
23
+ freq = 'week'
24
+ elif freq in ('M', 'month', 'monthly'):
25
+ freq = 'month'
26
+ elif freq in ('Y', 'year', 'yearly'):
27
+ freq = 'year'
28
+ elif freq in ('H', 'hour', 'hourly'):
29
+ freq = 'hour'
30
+ elif freq in ('T', 'min', 'minute'):
31
+ freq = 'minute'
32
+ else:
33
+ raise ValueError(f"Invalid freq {freq!r}")
34
+
35
+ return count, freq
36
+
37
+
38
+ def pandas_freq(freq: str) -> str:
39
+ """ map a frequency string to a pandas frequency string """
40
+
41
+ count, freq = split_frequency(freq)
42
+ freq = PANDAS_FREQ.get(freq)
43
+
44
+ if count != 1:
45
+ freq = f"{count}{freq}"
46
+
47
+ return freq
48
+
bardata/model.py ADDED
@@ -0,0 +1,33 @@
1
+ """ model classes """
2
+
3
+ from abc import ABC, abstractmethod
4
+
5
+
6
+ class PriceEngine(ABC):
7
+ """ Price Engine """
8
+
9
+ priority: int = 0
10
+
11
+ def __init__(self, source=None):
12
+ self.source = source
13
+
14
+ @abstractmethod
15
+ def get_prices(self,
16
+ ticker: str, freq: str = 'daily', *,
17
+ start_date=None, end_date=None,
18
+ max_bars=None, adjusted=True):
19
+ """ fetch prices data """
20
+ ...
21
+
22
+
23
+ class TickerHandler(ABC):
24
+ """ Ticker Handler """
25
+
26
+ priority: int = 0
27
+
28
+ def __init__(self, source=None):
29
+ self.source = source
30
+
31
+ @abstractmethod
32
+ def get_tickers(self, moniker: str):
33
+ ...
bardata/prices.py ADDED
@@ -0,0 +1,159 @@
1
+ """ price data """
2
+
3
+ import sys
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+ from functools import lru_cache
9
+
10
+ from concurrent.futures import ThreadPoolExecutor, as_completed
11
+
12
+ from .freqs import pandas_freq, split_frequency
13
+ from .dates import quick_timedelta
14
+ from .model import PriceEngine
15
+
16
+ if sys.version_info < (3, 10):
17
+ from importlib_metadata import entry_points
18
+ else:
19
+ from importlib.metadata import entry_points
20
+
21
+ # MAYBE add caching (boolean) parameter ... where ?
22
+ # MAYBE add source, start_date and end_date to combine_prices ?
23
+ # MAYBE can we remove resample param from combine prices ???
24
+
25
+
26
+ DEFAULT_PRICES = 'rawdata'
27
+ DEFAULT_INTRADAY = 'polygon'
28
+
29
+ ENTRY_POINTS = "bardata_prices"
30
+
31
+
32
+ @lru_cache
33
+ def default_source(freq: str = None) -> str:
34
+ """ best source for given freq """
35
+
36
+ count, freq = split_frequency(freq or 'daily')
37
+
38
+ if freq in ('hour', 'minute'):
39
+ return DEFAULT_INTRADAY
40
+
41
+ return DEFAULT_PRICES
42
+
43
+
44
+ @lru_cache
45
+ def price_engine(source: str = None) -> PriceEngine:
46
+ """ engine for source """
47
+
48
+ if source is None:
49
+ source = default_source()
50
+
51
+ entries = entry_points(group=ENTRY_POINTS, name=source)
52
+
53
+ if not entries:
54
+ raise ValueError(f"No price engine for {source=}")
55
+
56
+ entry = tuple(entries)[0]
57
+
58
+ engine: PriceEngine = entry.load()(source=source)
59
+
60
+ return engine
61
+
62
+
63
+ def get_prices(ticker: str, freq: str = 'daily', *,
64
+ source=None, start_date=None, end_date=None,
65
+ max_bars=None, adjusted=True):
66
+ """ get prices from default engine """
67
+
68
+ if freq is None:
69
+ freq = 'daily'
70
+
71
+ if source is None:
72
+ source = default_source(freq=freq)
73
+
74
+ engine = price_engine(source)
75
+
76
+ return engine.get_prices(
77
+ ticker, freq=freq,
78
+ start_date=start_date,
79
+ end_date=end_date,
80
+ max_bars=max_bars,
81
+ adjusted=adjusted
82
+ )
83
+
84
+
85
+ def collect_prices(tickers, *, freq='daily', source=None,
86
+ start_date=None, end_date=None,
87
+ max_bars=None, use_threads=True):
88
+ """ yields ticker, prices pais overs list of tickers """
89
+
90
+ if source is None:
91
+ source = default_source(freq=freq)
92
+
93
+ kwds = dict(
94
+ freq=freq,
95
+ source=source,
96
+ start_date=start_date,
97
+ end_date=end_date,
98
+ max_bars=max_bars
99
+ )
100
+
101
+ if use_threads:
102
+ executor = ThreadPoolExecutor()
103
+
104
+ fvmap = {
105
+ executor.submit(get_prices, ticker, **kwds): ticker
106
+ for ticker in tickers
107
+ }
108
+
109
+ for fv in as_completed(fvmap):
110
+ ticker = fvmap[fv]
111
+ prices = fv.result()
112
+ if prices is not None:
113
+ yield ticker, prices
114
+ else:
115
+ for ticker in tickers:
116
+ prices = get_prices(ticker, **kwds)
117
+ if prices is not None:
118
+ yield ticker, prices
119
+
120
+
121
+ def combine_prices(tickers, *, freq='daily', source=None, item='close',
122
+ max_bars=None, period=None, resample=None,
123
+ pct_change=False, log_returns=False,
124
+ use_threads=True):
125
+ """
126
+ matrix of closing prices aligned by date
127
+ to insure better aliognement use resample instead of freq
128
+ """
129
+
130
+ data = {
131
+ k: v[item]
132
+ for k, v in collect_prices(
133
+ tickers,
134
+ freq=freq,
135
+ source=source,
136
+ max_bars=max_bars,
137
+ use_threads=use_threads
138
+ )
139
+ }
140
+
141
+ result = pd.DataFrame(data).dropna()
142
+
143
+ if resample is not None:
144
+ rule = pandas_freq(resample)
145
+ result = result.resample(rule).agg('last')
146
+
147
+ if period:
148
+ delta = quick_timedelta(period)
149
+ enddate = result.index[-1]
150
+ begdate = enddate - delta
151
+ result = result.loc[begdate:]
152
+
153
+ if log_returns:
154
+ result = result.apply(np.log).diff().dropna()
155
+
156
+ if pct_change:
157
+ result = result.apply(np.log).diff().dropna().apply(np.exp) - 1
158
+
159
+ return result
bardata/sample.py ADDED
@@ -0,0 +1,170 @@
1
+ """ sample routines """
2
+
3
+ import string
4
+ import itertools
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+ import datetime as dt
9
+
10
+ from . import model
11
+ from .freqs import pandas_freq, split_frequency
12
+
13
+ DEFAULT_MAX_DATES = 500
14
+
15
+ MAXIMUM_PERIODS = dict(
16
+ day=5000,
17
+ week=1000,
18
+ month=200,
19
+ hour=20000,
20
+ minute=20000
21
+ )
22
+
23
+
24
+ def maximum_periods(freq: str):
25
+ count, freq = split_frequency(freq)
26
+ return MAXIMUM_PERIODS.get(freq, 200) / count
27
+
28
+
29
+ def sample_dates(max_dates=None, freq=None, *,
30
+ start_date=None, end_date=None):
31
+ """ sample dates (wrapper around pandas date_range) """
32
+
33
+ if freq is None:
34
+ freq = 'daily'
35
+
36
+ if not start_date and not end_date:
37
+ end_date = dt.date.today()
38
+
39
+ if start_date and end_date:
40
+ periods = None
41
+ elif max_dates:
42
+ periods = max_dates
43
+ else:
44
+ periods = DEFAULT_MAX_DATES
45
+
46
+ max_periods = maximum_periods(freq)
47
+ if periods > max_periods:
48
+ periods = max_periods
49
+
50
+ freq = pandas_freq(freq)
51
+
52
+ if isinstance(start_date, str):
53
+ start_date = pd.to_datetime(start_date)
54
+
55
+ if isinstance(end_date, str):
56
+ end_date = pd.to_datetime(end_date)
57
+
58
+ dates = pd.date_range(periods=periods, start=start_date, end=end_date, freq=freq)
59
+
60
+ if max_dates and len(dates) > max_dates:
61
+ dates = dates[-max_dates:]
62
+
63
+ return dates
64
+
65
+
66
+ def random_walk(max_bars=None, freq='daily', *,
67
+ start_date=None, end_date=None,
68
+ start_value=100.0, volatility=0.20, fwd_rate=0.10,
69
+ name=None, seed=None):
70
+ """ series of random walk prices """
71
+
72
+ generator = np.random.default_rng(seed)
73
+
74
+ dates = sample_dates(freq=freq, max_dates=max_bars, start_date=start_date, end_date=end_date)
75
+ count = len(dates)
76
+
77
+ avgdelta = dates.diff().mean()
78
+ sampling = pd.Timedelta(days=365) / avgdelta
79
+
80
+ fwd = np.log(1 + fwd_rate) / sampling
81
+ std = volatility / np.sqrt(sampling)
82
+
83
+ change = generator.standard_normal(count - 1) * std + np.log(1 + fwd)
84
+ price = start_value * np.exp(np.r_[0.0, change.cumsum(0)])
85
+
86
+ series = pd.Series(price, index=dates.values, name=name).rename_axis(index='date')
87
+
88
+ return series
89
+
90
+
91
+ def sample_prices(max_bars=None, freq='daily', *,
92
+ start_date=None, end_date=None,
93
+ start_value=100.0, volatility=0.20, fwd_rate=0.10,
94
+ skip=0, volume_as_int=True, seed=None):
95
+ """ dataframe of random prices """
96
+
97
+ generator = np.random.default_rng(seed)
98
+
99
+ dates = sample_dates(freq=freq, max_dates=max_bars, start_date=start_date, end_date=end_date)
100
+ count = len(dates)
101
+
102
+ avgdelta = dates.diff().mean()
103
+ sampling = pd.Timedelta(days=365) / avgdelta
104
+
105
+ fwd = np.log(1 + fwd_rate) / sampling
106
+ std = volatility / np.sqrt(sampling)
107
+
108
+ rnd = generator.standard_normal((count, 4)).cumsum(1) * std + fwd
109
+ cum = np.r_[0.0, rnd[:, -1].cumsum(0)[:-1]]
110
+
111
+ op = start_value * np.exp(rnd[:, 0] + cum)
112
+ hi = start_value * np.exp(rnd.max(1) + cum)
113
+ lo = start_value * np.exp(rnd.min(1) + cum)
114
+ cl = start_value * np.exp(rnd[:, -1] + cum)
115
+
116
+ vol = np.exp(generator.standard_normal(count) * 0.2 + 1.0) * 50000.0
117
+
118
+ data = dict()
119
+
120
+ data['date'] = dates.values
121
+ data['open'] = op.round(2)
122
+ data['high'] = hi.round(2)
123
+ data['low'] = lo.round(2)
124
+ data['close'] = cl.round(2)
125
+ data['volume'] = vol.astype(int) if volume_as_int else vol.round(2)
126
+
127
+ prices = pd.DataFrame(data).set_index('date')
128
+
129
+ if skip > 0:
130
+ prices.iloc[:skip] = np.nan
131
+
132
+ if skip < 0:
133
+ prices.iloc[skip:] = np.nan
134
+
135
+ return prices
136
+
137
+
138
+ def sample_tickers(count=10, *, ticker_length=3):
139
+ """ sample tickers """
140
+
141
+ def ticker_generator(chars=string.ascii_uppercase):
142
+ for length in itertools.count(ticker_length):
143
+ for item in itertools.product(chars, repeat=length):
144
+ yield "".join(item)
145
+
146
+ tickers = ticker_generator()
147
+
148
+ return list(itertools.islice(tickers, count))
149
+
150
+
151
+ class Prices(model.PriceEngine):
152
+ """ Price Engine """
153
+
154
+ def get_prices(self,
155
+ ticker: str, freq: str = 'daily', *,
156
+ start_date=None, end_date=None,
157
+ max_bars=None, adjusted=True):
158
+ prices = sample_prices(freq=freq, max_bars=max_bars,
159
+ start_date=start_date, end_date=end_date)
160
+ return prices
161
+
162
+
163
+ class Tickers(model.TickerHandler):
164
+ """ Ticker Handler """
165
+
166
+ def get_tickers(self, moniker: str = None):
167
+ if moniker is not None:
168
+ raise ValueError(f"Moniker {moniker!r} unexppected")
169
+ tickers = sample_tickers()
170
+ return tickers
bardata/tickers.py ADDED
@@ -0,0 +1,45 @@
1
+ """ ticker data """
2
+
3
+ import sys
4
+
5
+ from functools import lru_cache
6
+
7
+ from .model import TickerHandler
8
+
9
+ if sys.version_info < (3, 10):
10
+ from importlib_metadata import entry_points
11
+ else:
12
+ from importlib.metadata import entry_points
13
+
14
+ DEFAULT_SOURCE = "rawdata"
15
+ ENTRY_POINTS = "bardata_tickers"
16
+
17
+
18
+ @lru_cache
19
+ def default_source() -> str:
20
+ return DEFAULT_SOURCE
21
+
22
+
23
+ @lru_cache
24
+ def ticker_handler(source: str = None) -> TickerHandler:
25
+ if source is None:
26
+ source = default_source()
27
+
28
+ entries = entry_points(group=ENTRY_POINTS, name=source)
29
+
30
+ if not entries:
31
+ raise ValueError(f"No ticker engine for {source=}")
32
+
33
+ entry = list(entries)[0]
34
+
35
+ engine: TickerHandler = entry.load()(source)
36
+
37
+ return engine
38
+
39
+
40
+ def get_tickers(moniker: str = None, *, source: str = None):
41
+ """ tickers for index/portfolio symbol """
42
+
43
+ handler = ticker_handler(source=source)
44
+
45
+ return handler.get_tickers(moniker)
bardata/utils.py ADDED
@@ -0,0 +1,289 @@
1
+ """ utility routines """
2
+
3
+ import logging
4
+ import warnings
5
+
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ from .freqs import pandas_freq
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def extract_dates(prices):
15
+ """ series of dates from column or index data """
16
+
17
+ if 'date' in prices:
18
+ dates = prices.get('date')
19
+
20
+ elif 'date' in prices.index.names:
21
+ dates = prices.index.get_level_values('date').to_series(index=prices.index)
22
+
23
+ else:
24
+ raise ValueError("Dataframe has no date column or index!")
25
+
26
+ return dates
27
+
28
+
29
+ def date_offset(prices, *, delta=None, warn=True):
30
+ """
31
+ relative date offset from a prices dataframe
32
+
33
+ Returns:
34
+ a series of integers
35
+ """
36
+
37
+ dates = extract_dates(prices)
38
+
39
+ if delta is None:
40
+ delta = dates.drop_duplicates().sort_values().diff().min()
41
+
42
+ minutes = delta / pd.Timedelta(minutes=1)
43
+
44
+ if warn and minutes not in (1, 2, 5, 10, 15, 30, 60, 120, 1440):
45
+ warnings.warn(
46
+ f"Implied timedelta {delta} not standard. Use delta parameter!",
47
+ stacklevel=2
48
+ )
49
+
50
+ lastd = dates.max()
51
+
52
+ offset = (lastd - dates) / delta
53
+
54
+ return offset
55
+
56
+
57
+ def get_sampling(prices, basis=365):
58
+ """ average yearly sampling rate """
59
+
60
+ dates = extract_dates(prices)
61
+
62
+ period = dates.diff().mean()
63
+
64
+ return pd.Timedelta(days=basis) / period
65
+
66
+
67
+ def resample_prices(prices, freq):
68
+ """ resample prices """
69
+
70
+ freq = pandas_freq(freq)
71
+
72
+ aggspec = dict(open='first', high='max', low='min', close='last', volume='sum')
73
+ prices = prices.resample(freq).agg(aggspec).dropna(subset=['close'])
74
+
75
+ return prices
76
+
77
+
78
+ def concat_prices(frames, convert_utc=True, remove_duplicates=True):
79
+ """ concatanate prices and remove duplicates """
80
+
81
+ if convert_utc:
82
+ frames = [f.tz_convert('UTC') for f in frames]
83
+
84
+ prices = pd.concat(frames)
85
+
86
+ if remove_duplicates:
87
+ prices = prices[~prices.index.duplicated(keep='last')]
88
+
89
+ return prices
90
+
91
+
92
+ def adjust_prices(prices, splits=None, include_divs=False, inplace=False):
93
+ """ adjust prices using split_factor and div_pct """
94
+
95
+ if splits is None:
96
+ splits = prices
97
+
98
+ split_factor = splits.split_factor.fillna(1.0)
99
+
100
+ if include_divs and hasattr(splits, 'div_pct'):
101
+ div_pct = splits.div_pct.fillna(0.0)
102
+ split_factor = split_factor * (div_pct + 1.0)
103
+
104
+ cumprod = split_factor[::-1].cumprod()[::-1]
105
+
106
+ bak_adjust, adj_factor = cumprod.iloc[0], cumprod.shift(-1).fillna(1.0)
107
+
108
+ if splits is not None:
109
+ adj_factor = adj_factor.asof(prices.index).fillna(bak_adjust)
110
+
111
+ if not inplace:
112
+ prices = prices.copy()
113
+
114
+ for column in ['open', 'high', 'low', 'close', 'vwap', 'price', 'mid']:
115
+ if column in prices:
116
+ prices[column] /= adj_factor
117
+
118
+ for column in ['volume']:
119
+ if column in prices:
120
+ prices[column] *= adj_factor
121
+
122
+ return prices
123
+
124
+
125
+ def extract_splits(prices):
126
+ """ extracts split & dividend data from daily prices """
127
+
128
+ split_factor = prices.split_factor.fillna(1.0)
129
+ columns = [split_factor]
130
+ mask = (split_factor != 1.0)
131
+
132
+ if hasattr(prices, 'div_pct'):
133
+ div_pct = prices.div_pct
134
+ elif hasattr(prices, 'div_cash'):
135
+ div_pct = prices.div_cash / prices.close
136
+ else:
137
+ div_pct = None
138
+
139
+ if div_pct is not None:
140
+ div_pct = div_pct.fillna(0.0)
141
+ columns.append(div_pct)
142
+ mask = mask | (div_pct != 0.0)
143
+
144
+ if np.any(mask):
145
+ splits = pd.concat(columns, axis=1)[mask]
146
+ return splits
147
+
148
+ return None
149
+
150
+
151
+ def price_gaps(prices, resample_freq=None):
152
+ """ price gaps in series """
153
+
154
+ if resample_freq is not None:
155
+ prices = resample_prices(prices, freq=resample_freq)
156
+
157
+ if not prices.index.is_monotonic_increasing:
158
+ raise ValueError("Data is not ordered!")
159
+
160
+ if 'adj_close' in prices:
161
+ close = prices.adj_close
162
+ trange = prices.adj_high / prices.adj_low - 1.0
163
+ else:
164
+ close = prices.close
165
+ trange = prices.high / prices.low - 1.0
166
+
167
+ change = close.pct_change()
168
+
169
+ std = change.rolling(30).std()
170
+
171
+ max_change = 0.25
172
+
173
+ mask = (change.abs() > max_change) & \
174
+ (change.abs() > std * 5.0) & \
175
+ (change.abs() > trange * 2.0)
176
+
177
+ result = change[mask].rename("price gap")
178
+
179
+ return result
180
+
181
+
182
+ def time_gaps(prices, max_days=7):
183
+ """ time gaps in series """
184
+
185
+ dates = prices.index.to_series()
186
+ dspan = dates.diff()
187
+
188
+ result = dspan[dspan > pd.Timedelta(days=max_days)].rename("time gap")
189
+
190
+ return result
191
+
192
+
193
+ def check_prices(prices, ticker='series', warn=True, verbose=False):
194
+ """ check prices for possible gaps in price or time """
195
+
196
+ if prices is None:
197
+ return False
198
+
199
+ result = True
200
+
201
+ pgaps = price_gaps(prices)
202
+ tgaps = time_gaps(prices)
203
+
204
+ if len(pgaps):
205
+ result = False
206
+ if warn:
207
+ warnings.warn(
208
+ f"{ticker} has {len(pgaps)} price gaps!",
209
+ stacklevel=2
210
+ )
211
+ if verbose:
212
+ print(pgaps)
213
+
214
+ if len(tgaps):
215
+ result = False
216
+ if warn:
217
+ warnings.warn(
218
+ f"{ticker} has {len(tgaps)} time gaps!",
219
+ stacklevel=2
220
+ )
221
+ if verbose:
222
+ print(tgaps)
223
+
224
+ return result
225
+
226
+
227
+ def fix_price_gaps(prices, gaps=None, verbose=None):
228
+ """ fixes price gaps """
229
+
230
+ if gaps is None:
231
+ gaps = price_gaps(prices)
232
+
233
+ if gaps.empty:
234
+ logger.debug("No gaps found!")
235
+ return prices
236
+
237
+ gaps = gaps.reindex_like(prices).fillna(0.0)
238
+
239
+ if verbose:
240
+ for ts, gap in gaps[gaps != 0].iteritems():
241
+ print("Correcting gap of {gap} on {date}...".format(date=ts.date(), gap=gap))
242
+
243
+ factor = 1.0 / (1.0 + gaps)
244
+ factor = factor.shift(-1, fill_value=1.0)
245
+ factor = factor[::-1].cumprod()[::-1]
246
+
247
+ for col in ['open', 'high', 'low', 'close']:
248
+ if col in prices:
249
+ prices[col] /= factor
250
+
251
+ for col in ['volume']:
252
+ if col in prices:
253
+ prices[col] *= factor
254
+
255
+ return prices
256
+
257
+
258
+ def append_quote(prices, quote):
259
+ """ append quote (dict with timestamp) to prices (df) """
260
+
261
+ ts = quote.get('timestamp')
262
+
263
+ if not ts:
264
+ raise ValueError("timestamp is missing!")
265
+
266
+ date = pd.to_datetime(ts[:10])
267
+
268
+ if date <= prices.index[-1]:
269
+ return
270
+
271
+ record = dict(quote, close=quote['last'], split_factor=1.0, div_pct=0.0)
272
+ record = {k: record.get(k) for k in prices.columns if k in record}
273
+
274
+ prices.loc[date] = record
275
+
276
+
277
+ def slice_prices(prices, start_date=None, end_date=None, max_bars=None):
278
+ """ slice prices dataframe """
279
+
280
+ if start_date is not None:
281
+ prices = prices.loc[start_date:]
282
+
283
+ if end_date is not None:
284
+ prices = prices.loc[:end_date]
285
+
286
+ if max_bars:
287
+ prices = prices.tail(max_bars)
288
+
289
+ return prices
@@ -0,0 +1,66 @@
1
+ Metadata-Version: 2.1
2
+ Name: bardata
3
+ Version: 0.0.1
4
+ Summary: Bar data manipulation utilities
5
+ Author-email: Furechan <furechan@xsmail.com>
6
+ License: Copyright (c) 2016 The Python Packaging Authority (PyPA)
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
9
+ this software and associated documentation files (the "Software"), to deal in
10
+ the Software without restriction, including without limitation the rights to
11
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12
+ of the Software, and to permit persons to whom the Software is furnished to do
13
+ so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
25
+ License-File: LICENSE.txt
26
+ Keywords: data,finance
27
+ Classifier: Programming Language :: Python :: 3
28
+ Requires-Python: >=3.9
29
+ Requires-Dist: importlib-metadata; python_version < '3.10'
30
+ Requires-Dist: numpy
31
+ Requires-Dist: pandas
32
+ Requires-Dist: python-dateutil
33
+ Requires-Dist: requests
34
+ Description-Content-Type: text/markdown
35
+
36
+ # Bar data utilities in Python
37
+
38
+ This package includes utilities and a plugin model to fetch and manipulate bar data
39
+ for stocks and other financial series. This is work in progress.
40
+
41
+ Bar data is represented as a datafrane with a `date` timestamp index
42
+ and `open`, `high`, `low`, `close`, `volume` columns all in lower case.
43
+ Data for stocks prices is always assumed adjusted.
44
+
45
+ The main conventions are:
46
+ - data sources are referenced by an opaque `source` parameter
47
+ - flexible definition of frequencies with the `freq` parameter
48
+ - maximum number of bars is indicated with the `max_bars` parameter
49
+ - date range is optional with `start_date` and `end_date` parameters
50
+
51
+ ## Typical Usage
52
+
53
+ ```python
54
+ from bardata import get_tickers, get_prices
55
+
56
+ SOURCE = 'sample'
57
+ FREQ = 'daily'
58
+
59
+ tickers = get_tickers(source=SOURCE)
60
+
61
+ for ticker in tickers:
62
+ prices = get_prices(ticker, freq=FREQ, source=SOURCE)
63
+ ...
64
+ ```
65
+
66
+
@@ -0,0 +1,14 @@
1
+ bardata/__init__.py,sha256=6riuYC6jI1-k1ezMLCxkFwOtqjIMuBUqemnIS-cS67w,158
2
+ bardata/dates.py,sha256=wLdElD_wHfAVVkif6pI5KULqyPImdsMz0IDZULBgKK0,3122
3
+ bardata/errors.py,sha256=rgCGFKV4iffDAwhK32WLKFFaditPa7zv-FPZ39T8tqI,99
4
+ bardata/freqs.py,sha256=IOxsuDChmKKI2QPQjbBq5w4kUIH0cZeLsZ3c82G5waw,1292
5
+ bardata/model.py,sha256=Cl-vbVcqiSHM5tEGzlTyLTbUErI1h1ctTTz1nJqLVvY,668
6
+ bardata/prices.py,sha256=DHLy-wBzzS2BjNaIzKR4zamGeM-IxJ4hofRlYENg3vI,3909
7
+ bardata/sample.py,sha256=PmmIj_PGKiG2vhoX_zEezRK_GM_mzX5hvDmN-MOsJdM,4636
8
+ bardata/tickers.py,sha256=kzeSnpp6ItkxhMdPpy7D6QVVWN1RJo1Ll-0GImSSFDo,926
9
+ bardata/utils.py,sha256=3kLIYqbr3oNr7wvFyPZSzQhpge_Oi0YhQKXEXwkGapo,6823
10
+ bardata-0.0.1.dist-info/METADATA,sha256=JxfM_8-laLtgLleyCt_gf_EkY0Jgu3j6zYAYEmN_ICg,2593
11
+ bardata-0.0.1.dist-info/WHEEL,sha256=mRYSEL3Ih6g5a_CVMIcwiF__0Ae4_gLYh01YFNwiq1k,87
12
+ bardata-0.0.1.dist-info/entry_points.txt,sha256=HPQcVQg4NHicE3kKH6xE7k7eK5-NVeU99tg2ctcFVNI,99
13
+ bardata-0.0.1.dist-info/licenses/LICENSE.txt,sha256=ceC9ZJOV9H6CtQDcYmHOS46NA3dHJ_WD4J9blH513pc,1081
14
+ bardata-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.21.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,5 @@
1
+ [bardata_prices]
2
+ sample = bardata.sample:Prices
3
+
4
+ [bardata_tickers]
5
+ sample = bardata.sample:Tickers
@@ -0,0 +1,19 @@
1
+ Copyright (c) 2016 The Python Packaging Authority (PyPA)
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the "Software"), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7
+ of the Software, and to permit persons to whom the Software is furnished to do
8
+ so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.