readabs 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- readabs/__init__.py +4 -0
- readabs/abs_catalogue_map.py +56 -0
- readabs/abs_meta_data_support.py +40 -0
- readabs/download_cache.py +218 -0
- readabs/generate_catalogue_map.py +56 -0
- readabs/get_data_links.py +121 -0
- readabs/read_abs_cat.py +389 -0
- readabs/read_abs_series.py +95 -0
- readabs/read_support.py +31 -0
- readabs/readabs.py +40 -0
- readabs/readabs.pyi +26 -0
- readabs/utilities.py +98 -0
- readabs-0.0.2.dist-info/LICENSE +8 -0
- readabs-0.0.2.dist-info/METADATA +13 -0
- readabs-0.0.2.dist-info/RECORD +17 -0
- readabs-0.0.2.dist-info/WHEEL +5 -0
- readabs-0.0.2.dist-info/top_level.txt +1 -0
readabs/__init__.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Catalogue map for ABS data."""
|
|
2
|
+
|
|
3
|
+
from io import StringIO
|
|
4
|
+
|
|
5
|
+
from pandas import DataFrame, read_csv
|
|
6
|
+
def catalogue_map() -> DataFrame:
|
|
7
|
+
"""Return the catalogue map."""
|
|
8
|
+
|
|
9
|
+
csv = """Catalogue ID,Theme,Parent Topic,Topic,URL,Status
|
|
10
|
+
1364.0.15.003,Economy,National Accounts,Modellers Database,https://www.abs.gov.au/statistics/economy/national-accounts/modellers-database/latest-release,
|
|
11
|
+
3101.0,People,Population,National State And Territory Population,https://www.abs.gov.au/statistics/people/population/national-state-and-territory-population/latest-release,
|
|
12
|
+
3222.0,People,Population,Population Projections Australia,https://www.abs.gov.au/statistics/people/population/population-projections-australia/latest-release,
|
|
13
|
+
3401.0,Industry,Tourism And Transport,Overseas Arrivals And Departures Australia,https://www.abs.gov.au/statistics/industry/tourism-and-transport/overseas-arrivals-and-departures-australia/latest-release,
|
|
14
|
+
5204.0,Economy,National Accounts,Australian System National Accounts,https://www.abs.gov.au/statistics/economy/national-accounts/australian-system-national-accounts/latest-release,
|
|
15
|
+
5206.0,Economy,National Accounts,Australian National Accounts National Income Expenditure And Product,https://www.abs.gov.au/statistics/economy/national-accounts/australian-national-accounts-national-income-expenditure-and-product/latest-release,
|
|
16
|
+
5220.0,Economy,National Accounts,Australian National Accounts State Accounts,https://www.abs.gov.au/statistics/economy/national-accounts/australian-national-accounts-state-accounts/latest-release,
|
|
17
|
+
5232.0,Economy,National Accounts,Australian National Accounts Finance And Wealth,https://www.abs.gov.au/statistics/economy/national-accounts/australian-national-accounts-finance-and-wealth/latest-release,
|
|
18
|
+
5232.0.55.001,Economy,Finance,Assets And Liabilities Australian Securitisers,https://www.abs.gov.au/statistics/economy/finance/assets-and-liabilities-australian-securitisers/latest-release,
|
|
19
|
+
5302.0,Economy,International Trade,Balance Payments And International Investment Position Australia,https://www.abs.gov.au/statistics/economy/international-trade/balance-payments-and-international-investment-position-australia/latest-release,
|
|
20
|
+
5368.0,Economy,International Trade,International Trade Goods And Services Australia,https://www.abs.gov.au/statistics/economy/international-trade/international-trade-goods-and-services-australia/latest-release,
|
|
21
|
+
5368.0.55.024,Economy,International Trade,International Merchandise Trade Preliminary Australia,https://www.abs.gov.au/statistics/economy/international-trade/international-merchandise-trade-preliminary-australia/latest-release,
|
|
22
|
+
5601.0,Economy,Finance,Lending Indicators,https://www.abs.gov.au/statistics/economy/finance/lending-indicators/latest-release,
|
|
23
|
+
5625.0,Economy,Business Indicators,Private New Capital Expenditure And Expected Expenditure Australia,https://www.abs.gov.au/statistics/economy/business-indicators/private-new-capital-expenditure-and-expected-expenditure-australia/latest-release,
|
|
24
|
+
5655.0,Economy,Finance,Managed Funds Australia,https://www.abs.gov.au/statistics/economy/finance/managed-funds-australia/latest-release,
|
|
25
|
+
5676.0,Economy,Business Indicators,Business Indicators Australia,https://www.abs.gov.au/statistics/economy/business-indicators/business-indicators-australia/latest-release,
|
|
26
|
+
5681.0,Economy,Business Indicators,Monthly Business Turnover Indicator,https://www.abs.gov.au/statistics/economy/business-indicators/monthly-business-turnover-indicator/latest-release,
|
|
27
|
+
5682.0,Economy,Finance,Monthly Household Spending Indicator,https://www.abs.gov.au/statistics/economy/finance/monthly-household-spending-indicator/latest-release,
|
|
28
|
+
6202.0,Labour,Employment And Unemployment,Labour Force Australia,https://www.abs.gov.au/statistics/labour/employment-and-unemployment/labour-force-australia/latest-release,
|
|
29
|
+
6150.0.55.003,Labour,Labour Accounts,Labour Account Australia,https://www.abs.gov.au/statistics/labour/labour-accounts/labour-account-australia/latest-release,
|
|
30
|
+
6248.0.55.002,Labour,Employment And Unemployment,Public Sector Employment And Earnings,https://www.abs.gov.au/statistics/labour/employment-and-unemployment/public-sector-employment-and-earnings/latest-release,
|
|
31
|
+
6291.0.55.001,Labour,Employment And Unemployment,Labour Force Australia Detailed,https://www.abs.gov.au/statistics/labour/employment-and-unemployment/labour-force-australia-detailed/latest-release,
|
|
32
|
+
6302.0,Labour,Earnings And Working Conditions,Average Weekly Earnings Australia,https://www.abs.gov.au/statistics/labour/earnings-and-working-conditions/average-weekly-earnings-australia/latest-release,
|
|
33
|
+
6321.0.55.001,Labour,Earnings And Working Conditions,Industrial Disputes Australia,https://www.abs.gov.au/statistics/labour/earnings-and-working-conditions/industrial-disputes-australia/latest-release,
|
|
34
|
+
6345.0,Economy,Price Indexes And Inflation,Wage Price Index Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/wage-price-index-australia/latest-release,
|
|
35
|
+
6354.0,Labour,Jobs,Job Vacancies Australia,https://www.abs.gov.au/statistics/labour/jobs/job-vacancies-australia/latest-release,
|
|
36
|
+
6401.0,Economy,Price Indexes And Inflation,Consumer Price Index Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/consumer-price-index-australia/latest-release,
|
|
37
|
+
6416.0,Economy,Price Indexes And Inflation,Residential Property Price Indexes Eight Capital Cities,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/residential-property-price-indexes-eight-capital-cities/latest-release,Ceased
|
|
38
|
+
6427.0,Economy,Price Indexes And Inflation,Producer Price Indexes Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/producer-price-indexes-australia/latest-release,
|
|
39
|
+
6432.0,Economy,Price Indexes And Inflation,Total Value Dwellings,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/total-value-dwellings/latest-release,
|
|
40
|
+
6457.0,Economy,Price Indexes And Inflation,International Trade Price Indexes Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/international-trade-price-indexes-australia/latest-release,
|
|
41
|
+
6467.0,Economy,Price Indexes And Inflation,Selected Living Cost Indexes Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/selected-living-cost-indexes-australia/latest-release,
|
|
42
|
+
6484.0,Economy,Price Indexes And Inflation,Monthly Consumer Price Index Indicator,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/monthly-consumer-price-index-indicator/latest-release,
|
|
43
|
+
7215.0,Industry,Agriculture,Livestock Products Australia,https://www.abs.gov.au/statistics/industry/agriculture/livestock-products-australia/latest-release,
|
|
44
|
+
7218.0.55.001,Industry,Agriculture,Livestock And Meat Australia,https://www.abs.gov.au/statistics/industry/agriculture/livestock-and-meat-australia/latest-release,Ceased
|
|
45
|
+
8155.0,Industry,Industry Overview,Australian Industry,https://www.abs.gov.au/statistics/industry/industry-overview/australian-industry/latest-release,
|
|
46
|
+
8165.0,Economy,Business Indicators,Counts Australian Businesses Including Entries And Exits,https://www.abs.gov.au/statistics/economy/business-indicators/counts-australian-businesses-including-entries-and-exits/latest-release,
|
|
47
|
+
8412.0,Industry,Mining,Mineral And Petroleum Exploration Australia,https://www.abs.gov.au/statistics/industry/mining/mineral-and-petroleum-exploration-australia/latest-release,
|
|
48
|
+
8501.0,Industry,Retail And Wholesale Trade,Retail Trade Australia,https://www.abs.gov.au/statistics/industry/retail-and-wholesale-trade/retail-trade-australia/latest-release,
|
|
49
|
+
8701.0,Industry,Building And Construction,Estimated Dwelling Stock,https://www.abs.gov.au/statistics/industry/building-and-construction/estimated-dwelling-stock/latest-release,
|
|
50
|
+
8731.0,Industry,Building And Construction,Building Approvals Australia,https://www.abs.gov.au/statistics/industry/building-and-construction/building-approvals-australia/latest-release,
|
|
51
|
+
8752.0,Industry,Building And Construction,Building Activity Australia,https://www.abs.gov.au/statistics/industry/building-and-construction/building-activity-australia/latest-release,
|
|
52
|
+
8755.0,Industry,Building And Construction,Construction Work Done Australia Preliminary,https://www.abs.gov.au/statistics/industry/building-and-construction/construction-work-done-australia-preliminary/latest-release,
|
|
53
|
+
8762.0,Industry,Building And Construction,Engineering Construction Activity Australia,https://www.abs.gov.au/statistics/industry/building-and-construction/engineering-construction-activity-australia/latest-release,
|
|
54
|
+
8782.0.65.001,Industry,Building And Construction,Construction Activity Chain Volume Measures Australia,https://www.abs.gov.au/statistics/industry/building-and-construction/construction-activity-chain-volume-measures-australia/jun-2020,Ceased
|
|
55
|
+
"""
|
|
56
|
+
return read_csv(StringIO(csv), index_col=0)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""abs_meta_data_sypport.py
|
|
2
|
+
|
|
3
|
+
Support for working with ABS meta data."""
|
|
4
|
+
|
|
5
|
+
from collections import namedtuple
|
|
6
|
+
|
|
7
|
+
Metacol = namedtuple(
|
|
8
|
+
"Metacol",
|
|
9
|
+
[
|
|
10
|
+
"did",
|
|
11
|
+
"stype",
|
|
12
|
+
"id",
|
|
13
|
+
"start",
|
|
14
|
+
"end",
|
|
15
|
+
"num",
|
|
16
|
+
"unit",
|
|
17
|
+
"dtype",
|
|
18
|
+
"freq",
|
|
19
|
+
"cmonth",
|
|
20
|
+
"table",
|
|
21
|
+
"tdesc",
|
|
22
|
+
"cat",
|
|
23
|
+
],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
metacol = Metacol(
|
|
27
|
+
did="Data Item Description",
|
|
28
|
+
stype="Series Type",
|
|
29
|
+
id="Series ID",
|
|
30
|
+
start="Series Start",
|
|
31
|
+
end="Series End",
|
|
32
|
+
num="No. Obs.",
|
|
33
|
+
unit="Unit",
|
|
34
|
+
dtype="Data Type",
|
|
35
|
+
freq="Freq.",
|
|
36
|
+
cmonth="Collection Month",
|
|
37
|
+
table="Table",
|
|
38
|
+
tdesc="Table Description",
|
|
39
|
+
cat="Catalogue number",
|
|
40
|
+
)
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""download_cache.py - a module for downloading and caching data from the web.
|
|
2
|
+
|
|
3
|
+
The default cache directory can be specified by setting the environment
|
|
4
|
+
variable READABS_CACHE_DIR."""
|
|
5
|
+
|
|
6
|
+
# --- imports
|
|
7
|
+
# system imports
|
|
8
|
+
from hashlib import md5
|
|
9
|
+
import re
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from os import utime, getenv
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
# data imports
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import requests
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# --- constants
|
|
21
|
+
# define the default cache directory
|
|
22
|
+
DEFAULT_CACHE_DIR = "./.readabs_cache"
|
|
23
|
+
READABS_CACHE_DIR = getenv("READABS_CACHE_DIR", DEFAULT_CACHE_DIR)
|
|
24
|
+
READABS_CACHE_PATH = Path(READABS_CACHE_DIR)
|
|
25
|
+
|
|
26
|
+
DOWNLOAD_TIMEOUT = 60 # seconds
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# --- Exception classes
|
|
30
|
+
class HttpError(Exception):
|
|
31
|
+
"""A problem retrieving data from HTTP."""
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class CacheError(Exception):
|
|
35
|
+
"""A problem retrieving data from the cache."""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# --- functions
|
|
39
|
+
def check_for_bad_response(
|
|
40
|
+
url: str,
|
|
41
|
+
response: requests.Response,
|
|
42
|
+
**kwargs: Any,
|
|
43
|
+
) -> bool:
|
|
44
|
+
"""Raise an Exception if we could not retrieve the URL.
|
|
45
|
+
If "ignore_errors" is True, return True if there is a problem,
|
|
46
|
+
otherwise raise an exception if there is a problem."""
|
|
47
|
+
|
|
48
|
+
ignore_errors = kwargs.get("ignore_errors", False)
|
|
49
|
+
code = response.status_code
|
|
50
|
+
if code != 200 or response.headers is None:
|
|
51
|
+
problem = f"Problem {code} accessing: {url}."
|
|
52
|
+
if not ignore_errors:
|
|
53
|
+
raise HttpError(problem)
|
|
54
|
+
print(problem)
|
|
55
|
+
return True
|
|
56
|
+
|
|
57
|
+
return False
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def request_get(
|
|
61
|
+
url: str,
|
|
62
|
+
**kwargs: Any,
|
|
63
|
+
) -> bytes:
|
|
64
|
+
"""Use python requests to get the contents of the specified URL.
|
|
65
|
+
Depending on "ignore_errors", if something goes wrong, we either
|
|
66
|
+
raise an exception or return an empty bytes object."""
|
|
67
|
+
|
|
68
|
+
# Initialise variables
|
|
69
|
+
verbose = kwargs.get("verbose", False)
|
|
70
|
+
ignore_errors = kwargs.get("ignore_errors", False)
|
|
71
|
+
|
|
72
|
+
if verbose:
|
|
73
|
+
print(f"About to request/download: {url}")
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
gotten = requests.get(url, allow_redirects=True, timeout=DOWNLOAD_TIMEOUT)
|
|
77
|
+
except requests.exceptions.RequestException as e:
|
|
78
|
+
error = f"request_get(): there was a problem downloading {url}."
|
|
79
|
+
if not ignore_errors:
|
|
80
|
+
raise HttpError(error) from e
|
|
81
|
+
print(error)
|
|
82
|
+
return b""
|
|
83
|
+
|
|
84
|
+
if check_for_bad_response(url, gotten, **kwargs):
|
|
85
|
+
# Note: check_for_bad_response() will raise an exception
|
|
86
|
+
# if it encounters a problem and ignore_errors is False.
|
|
87
|
+
# Otherwise it will print an error message and return True
|
|
88
|
+
return b""
|
|
89
|
+
|
|
90
|
+
return gotten.content # bytes
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def save_to_cache(
|
|
94
|
+
file: Path,
|
|
95
|
+
contents: bytes,
|
|
96
|
+
**kwargs: Any,
|
|
97
|
+
) -> None:
|
|
98
|
+
"""Save bytes to the file-system."""
|
|
99
|
+
|
|
100
|
+
verbose = kwargs.get("verbose", False)
|
|
101
|
+
if len(contents) == 0:
|
|
102
|
+
# dont save empty files (probably caused by ignoring errors)
|
|
103
|
+
return
|
|
104
|
+
if file.exists():
|
|
105
|
+
if verbose:
|
|
106
|
+
print("Removing old cache file.")
|
|
107
|
+
file.unlink()
|
|
108
|
+
if verbose:
|
|
109
|
+
print(f"About to save to cache: {file}")
|
|
110
|
+
file.open(mode="w", buffering=-1, encoding=None, errors=None, newline=None)
|
|
111
|
+
file.write_bytes(contents)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def retrieve_from_cache(file: Path, **kwargs: Any) -> bytes:
|
|
115
|
+
"""Retrieve bytes from file-system."""
|
|
116
|
+
|
|
117
|
+
verbose = kwargs.get("verbose", False)
|
|
118
|
+
ignore_errors = kwargs.get("ignore_errors", False)
|
|
119
|
+
|
|
120
|
+
if not file.exists() or not file.is_file():
|
|
121
|
+
message = f"Cached file not available: {file.name}"
|
|
122
|
+
if ignore_errors:
|
|
123
|
+
print(message)
|
|
124
|
+
return b""
|
|
125
|
+
raise CacheError(message)
|
|
126
|
+
if verbose:
|
|
127
|
+
print(f"Retrieving from cache: {file}")
|
|
128
|
+
return file.read_bytes()
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def get_file(
|
|
132
|
+
url: str,
|
|
133
|
+
cache_dir: Path = READABS_CACHE_PATH,
|
|
134
|
+
cache_prefix: str = "cache",
|
|
135
|
+
**kwargs: Any,
|
|
136
|
+
) -> bytes:
|
|
137
|
+
"""Get a file from URL or local file-system cache, depending on freshness.
|
|
138
|
+
Note: we create the cache_dir if it does not exist.
|
|
139
|
+
Returns: the contents of the file as bytes."""
|
|
140
|
+
|
|
141
|
+
def get_fpath() -> Path:
|
|
142
|
+
"""Convert URL string into a cache file name,
|
|
143
|
+
then return as a Path object."""
|
|
144
|
+
bad_cache_pattern = r'[~"#%&*:<>?\\{|}]+' # chars to remove from name
|
|
145
|
+
hash_name = md5(url.encode("utf-8")).hexdigest()
|
|
146
|
+
tail_name = url.split("/")[-1].split("?")[0]
|
|
147
|
+
file_name = re.sub(
|
|
148
|
+
bad_cache_pattern, "", f"{cache_prefix}--{hash_name}--{tail_name}"
|
|
149
|
+
)
|
|
150
|
+
return Path(cache_dir / file_name)
|
|
151
|
+
|
|
152
|
+
# create and check cache_dir is a directory
|
|
153
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
154
|
+
if not cache_dir.is_dir():
|
|
155
|
+
raise CacheError(f"Cache path is not a directory: {cache_dir.name}")
|
|
156
|
+
|
|
157
|
+
# get URL modification time in UTC
|
|
158
|
+
response = requests.head(url, allow_redirects=True, timeout=20)
|
|
159
|
+
if not check_for_bad_response(url, response, **kwargs):
|
|
160
|
+
source_time = response.headers.get("Last-Modified", None)
|
|
161
|
+
else:
|
|
162
|
+
source_time = None
|
|
163
|
+
source_mtime = (
|
|
164
|
+
None if source_time is None else pd.to_datetime(source_time, utc=True)
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# get cache modification time in UTC
|
|
168
|
+
target_mtime: datetime | None = None
|
|
169
|
+
file_path = get_fpath()
|
|
170
|
+
if file_path.exists() and file_path.is_file():
|
|
171
|
+
target_mtime = pd.to_datetime(
|
|
172
|
+
datetime.fromtimestamp(file_path.stat().st_mtime, tz=timezone.utc), utc=True
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# get and save URL source data
|
|
176
|
+
if target_mtime is None or ( # cache is empty, or
|
|
177
|
+
source_mtime is not None
|
|
178
|
+
and source_mtime > target_mtime # URL is fresher than cache
|
|
179
|
+
):
|
|
180
|
+
url_bytes = request_get(url, **kwargs) # raises exception if it fails
|
|
181
|
+
save_to_cache(file_path, url_bytes, **kwargs)
|
|
182
|
+
# - change file mod time to reflect mtime at URL
|
|
183
|
+
if source_mtime is not None and len(url_bytes) > 0:
|
|
184
|
+
unixtime = source_mtime.value / 1_000_000_000 # convert to seconds
|
|
185
|
+
utime(file_path, (unixtime, unixtime))
|
|
186
|
+
return url_bytes
|
|
187
|
+
|
|
188
|
+
# return the data that has been cached previously
|
|
189
|
+
return retrieve_from_cache(file_path, **kwargs)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# --- preliminary testing:
|
|
193
|
+
DO_TEST = False
|
|
194
|
+
if __name__ == "__main__" and DO_TEST:
|
|
195
|
+
|
|
196
|
+
def cache_test() -> None:
|
|
197
|
+
"""This function provides a quick test of the retrieval
|
|
198
|
+
and caching system. You may need to first clear the
|
|
199
|
+
cache directory to see the effect of the cache."""
|
|
200
|
+
|
|
201
|
+
# prepare the test case
|
|
202
|
+
url1 = (
|
|
203
|
+
"https://www.abs.gov.au/statistics/labour/employment-and-"
|
|
204
|
+
+ "unemployment/labour-force-australia/nov-2023/6202001.xlsx"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
# implement - first retrieval is from the web, second from the cache
|
|
208
|
+
width = 20
|
|
209
|
+
print("Test commencing.")
|
|
210
|
+
for u in (url1, url1):
|
|
211
|
+
print("=" * width)
|
|
212
|
+
content = get_file(u, verbose=True)
|
|
213
|
+
print("-" * width)
|
|
214
|
+
print(f"{len(content)} bytes retrieved from {u}.")
|
|
215
|
+
print("=" * width)
|
|
216
|
+
print("Test completed.")
|
|
217
|
+
|
|
218
|
+
cache_test()
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Generate the catalogue_map.py file."""
|
|
2
|
+
|
|
3
|
+
# --- imports
|
|
4
|
+
from io import StringIO
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pandas import DataFrame, Series, Index
|
|
7
|
+
from download_cache import get_file
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# --- functions
|
|
11
|
+
# private
|
|
12
|
+
def _get_abs_directory() -> DataFrame:
|
|
13
|
+
"""Return a DataFrame of ABS Catalogue numbers."""
|
|
14
|
+
|
|
15
|
+
# get ABS web page of catalogue numbers
|
|
16
|
+
url = "https://www.abs.gov.au/about/data-services/help/abs-time-series-directory"
|
|
17
|
+
page = get_file(url)
|
|
18
|
+
links = pd.read_html(StringIO(page.decode("utf-8")), extract_links="body")[
|
|
19
|
+
1
|
|
20
|
+
] # second table on the page
|
|
21
|
+
|
|
22
|
+
# extract catalogue numbers
|
|
23
|
+
cats = links["Catalogue Number"].apply(Series)[0]
|
|
24
|
+
urls = links["Topic"].apply(Series)[1]
|
|
25
|
+
root = "https://www.abs.gov.au/statistics/"
|
|
26
|
+
snip = urls.str.replace(root, "")
|
|
27
|
+
snip = (
|
|
28
|
+
snip[~snip.str.contains("http")].str.replace("-", " ").str.title()
|
|
29
|
+
) # remove bad cases
|
|
30
|
+
frame = snip.str.split("/", expand=True).iloc[:, :3]
|
|
31
|
+
frame.columns = Index(["Theme", "Parent Topic", "Topic"])
|
|
32
|
+
frame["URL"] = urls
|
|
33
|
+
cats = cats[frame.index]
|
|
34
|
+
cat_index = cats.str.replace("(Ceased)", "").str.strip()
|
|
35
|
+
status = Series(" ", index=cats.index).where(cat_index == cats, "Ceased")
|
|
36
|
+
frame["Status"] = status
|
|
37
|
+
frame.index = Index(cat_index)
|
|
38
|
+
frame.index.name = "Catalogue ID"
|
|
39
|
+
return frame
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def produce_catalogue_map():
|
|
43
|
+
"""Generate the catalogue_map.py file."""
|
|
44
|
+
directory = _get_abs_directory()
|
|
45
|
+
with open("abs_catalogue_map.py", "w", encoding="utf-8") as file:
|
|
46
|
+
file.write('"""Catalogue map for ABS data."""\n\n')
|
|
47
|
+
file.write("from io import StringIO\n\n")
|
|
48
|
+
file.write("from pandas import DataFrame, read_csv\n")
|
|
49
|
+
file.write("def catalogue_map() -> DataFrame:\n")
|
|
50
|
+
file.write(' """Return the catalogue map."""\n\n')
|
|
51
|
+
file.write(f' csv = """{directory.to_csv()}"""\n')
|
|
52
|
+
file.write(" return read_csv(StringIO(csv), index_col=0)\n")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
if __name__ == "__main__":
|
|
56
|
+
produce_catalogue_map()
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""To do"""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any
|
|
5
|
+
from bs4 import BeautifulSoup
|
|
6
|
+
|
|
7
|
+
# local imports - ugly, need to find out how to fix thiscd
|
|
8
|
+
if __package__ is None or __package__ == "":
|
|
9
|
+
from download_cache import get_file, HttpError, CacheError
|
|
10
|
+
else:
|
|
11
|
+
from .download_cache import get_file, HttpError, CacheError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# private
|
|
15
|
+
def _make_absolute_url(url: str, prefix: str = "https://www.abs.gov.au") -> str:
|
|
16
|
+
"""Convert a relative URL address found on the ABS site to
|
|
17
|
+
an absolute URL address."""
|
|
18
|
+
|
|
19
|
+
# remove a prefix if it already exists (just to be sure)
|
|
20
|
+
url = url.replace(prefix, "")
|
|
21
|
+
url = url.replace(prefix.replace("https://", "http://"), "")
|
|
22
|
+
# then add the prefix (back) ...
|
|
23
|
+
return f"{prefix}{url}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# public (also used by read_abs_cat.py)
|
|
27
|
+
def get_table_name(url: str) -> str:
|
|
28
|
+
"""Get the table name from the ABS URL."""
|
|
29
|
+
|
|
30
|
+
tail = url.rsplit("/", 1)[-1]
|
|
31
|
+
table_name = tail.split(".")[0]
|
|
32
|
+
return table_name
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
# private
|
|
36
|
+
def historicise_links(
|
|
37
|
+
link_dict: dict[str, list[str]], history: str
|
|
38
|
+
) -> dict[str, list[str]]:
|
|
39
|
+
"""Age an ABS link so that it points to a historical version of the data.
|
|
40
|
+
Note: the history string is typically in "mon-yr" format, but not alwayts.
|
|
41
|
+
Note: we are also assuming that the date is in the second last part of the URL.
|
|
42
|
+
These assumptions may not always hold."""
|
|
43
|
+
|
|
44
|
+
new_dict = {}
|
|
45
|
+
for link_type, link_list in link_dict.items():
|
|
46
|
+
new_list = []
|
|
47
|
+
for link in link_list:
|
|
48
|
+
head, _, tail = link.rsplit("/", 2)
|
|
49
|
+
replacement = "/".join([head, history, tail])
|
|
50
|
+
new_list.append(replacement)
|
|
51
|
+
new_dict[link_type] = new_list
|
|
52
|
+
|
|
53
|
+
return new_dict
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# public
|
|
57
|
+
def get_data_links(
|
|
58
|
+
url: str, # the URL of the ABS page to scan
|
|
59
|
+
inspect_file_name="", # for debugging - save the page to disk
|
|
60
|
+
**kwargs: Any,
|
|
61
|
+
) -> dict[str, list[str]]:
|
|
62
|
+
"""Scan the webpage at the ABS URL for links to ZIP files and for
|
|
63
|
+
links to Microsoft Excel files.
|
|
64
|
+
Return the links in a dictionary of lists undexed by file type ending.
|
|
65
|
+
Ensure relative links have been fully expanded to be absolute links."""
|
|
66
|
+
|
|
67
|
+
# get relevant web-page from ABS website
|
|
68
|
+
verbose = kwargs.get("verbose", False)
|
|
69
|
+
if verbose:
|
|
70
|
+
print("Getting data links from the ABS web page.")
|
|
71
|
+
try:
|
|
72
|
+
page = get_file(url, **kwargs)
|
|
73
|
+
except (HttpError, CacheError) as e:
|
|
74
|
+
print(f"Error when obtaining links from ABS web page: {e}")
|
|
75
|
+
return {}
|
|
76
|
+
|
|
77
|
+
# save the HTML webpage to disk for inspection
|
|
78
|
+
if inspect_file_name:
|
|
79
|
+
with open(inspect_file_name, "w", encoding="utf-8") as file_handle:
|
|
80
|
+
file_handle.write(page.decode("utf-8"))
|
|
81
|
+
|
|
82
|
+
# remove those pesky span tags - probably not necessary
|
|
83
|
+
page = re.sub(b"<span[^>]*>", b" ", page)
|
|
84
|
+
page = re.sub(b"</span>", b" ", page)
|
|
85
|
+
page = re.sub(b"\\s+", b" ", page) # tidy up white space
|
|
86
|
+
|
|
87
|
+
# capture all links (of ZIP and Microsoft Excel types)
|
|
88
|
+
link_types = (
|
|
89
|
+
".zip",
|
|
90
|
+
".xlsx",
|
|
91
|
+
) # must be lower case
|
|
92
|
+
soup = BeautifulSoup(page, features="lxml")
|
|
93
|
+
link_dict: dict[str, list[str]] = {}
|
|
94
|
+
for link in soup.findAll("a"):
|
|
95
|
+
url = link.get("href")
|
|
96
|
+
if url is None:
|
|
97
|
+
# ignore silly cases
|
|
98
|
+
continue
|
|
99
|
+
if "pivot" in url.rsplit("/", 1)[-1].lower():
|
|
100
|
+
# ignore pivot tables
|
|
101
|
+
continue
|
|
102
|
+
for link_type in link_types:
|
|
103
|
+
if url.lower().endswith(link_type):
|
|
104
|
+
if link_type not in link_dict:
|
|
105
|
+
link_dict[link_type] = []
|
|
106
|
+
link_dict[link_type].append(_make_absolute_url(url))
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
# age links if required
|
|
110
|
+
history = kwargs.get("history", "")
|
|
111
|
+
if history:
|
|
112
|
+
link_dict = historicise_links(link_dict, history)
|
|
113
|
+
|
|
114
|
+
if verbose:
|
|
115
|
+
print("Found links to the following ABS data tables:")
|
|
116
|
+
for link_type, link_list in link_dict.items():
|
|
117
|
+
summary = [get_table_name(x) for x in link_list] # just the file name
|
|
118
|
+
print(f"Found: {len(link_list)} items of type {link_type}: {summary}")
|
|
119
|
+
print()
|
|
120
|
+
|
|
121
|
+
return link_dict
|
readabs/read_abs_cat.py
ADDED
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
"""read_abs_cat.py
|
|
2
|
+
|
|
3
|
+
Download all/selected timeseries data from the
|
|
4
|
+
Australian Bureau of Statistics (ABS) for a specified
|
|
5
|
+
ABS catalogue identifier and package that data into a
|
|
6
|
+
dictionary of DataFrames."""
|
|
7
|
+
|
|
8
|
+
# --- imports ---
|
|
9
|
+
# standard library imports
|
|
10
|
+
import calendar
|
|
11
|
+
import zipfile
|
|
12
|
+
from functools import cache
|
|
13
|
+
from io import BytesIO
|
|
14
|
+
from typing import Any, Callable, cast
|
|
15
|
+
|
|
16
|
+
# analytic imports
|
|
17
|
+
import pandas as pd
|
|
18
|
+
from pandas import DataFrame
|
|
19
|
+
|
|
20
|
+
# local imports - ugly, need to find out how to fix this
|
|
21
|
+
#print(f"in read_abs_cat.py: __main__={__name__}, __package__={__package__}")
|
|
22
|
+
if __package__ is None or __package__ == "":
|
|
23
|
+
from abs_meta_data_support import metacol
|
|
24
|
+
from get_data_links import get_data_links, get_table_name
|
|
25
|
+
from abs_catalogue_map import catalogue_map
|
|
26
|
+
from read_support import check_kwargs, get_args
|
|
27
|
+
from download_cache import get_file
|
|
28
|
+
else:
|
|
29
|
+
from .abs_meta_data_support import metacol
|
|
30
|
+
from .get_data_links import get_data_links, get_table_name
|
|
31
|
+
from .abs_catalogue_map import catalogue_map
|
|
32
|
+
from .read_support import check_kwargs, get_args
|
|
33
|
+
from .download_cache import get_file
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# --- functions ---
|
|
37
|
+
# private
|
|
38
|
+
def _get_meta_from_excel(
|
|
39
|
+
excel: pd.ExcelFile,
|
|
40
|
+
table: str,
|
|
41
|
+
tab_desc: str,
|
|
42
|
+
cat_id: str,
|
|
43
|
+
) -> pd.DataFrame:
|
|
44
|
+
"""Capture the metadata from the Index sheet of an ABS excel file.
|
|
45
|
+
Returns a DataFrame specific to the current excel file.
|
|
46
|
+
Returning an empty DataFrame, means that the meatadata could not
|
|
47
|
+
be identified. Meta data for each ABS data item is organised by row."""
|
|
48
|
+
|
|
49
|
+
# Unfortunately, the header for some of the 3401.0
|
|
50
|
+
# spreadsheets starts on row 10
|
|
51
|
+
starting_rows = 9, 10
|
|
52
|
+
required = metacol.did, metacol.id, metacol.stype, metacol.unit
|
|
53
|
+
required_set = set(required)
|
|
54
|
+
for header_row in starting_rows:
|
|
55
|
+
file_meta = excel.parse(
|
|
56
|
+
"Index",
|
|
57
|
+
header=header_row,
|
|
58
|
+
parse_dates=True,
|
|
59
|
+
infer_datetime_format=True,
|
|
60
|
+
converters={"Unit": str},
|
|
61
|
+
)
|
|
62
|
+
file_meta = file_meta.iloc[1:-2] # drop first and last 2
|
|
63
|
+
file_meta = file_meta.dropna(axis="columns", how="all")
|
|
64
|
+
|
|
65
|
+
if required_set.issubset(set(file_meta.columns)):
|
|
66
|
+
break
|
|
67
|
+
|
|
68
|
+
if header_row == starting_rows[-1]:
|
|
69
|
+
print(f"Could not find metadata for {cat_id}-{tab_desc}")
|
|
70
|
+
return pd.DataFrame()
|
|
71
|
+
|
|
72
|
+
# add the table name and table description to the metadata
|
|
73
|
+
file_meta[metacol.table] = table.strip()
|
|
74
|
+
file_meta[metacol.tdesc] = tab_desc.strip()
|
|
75
|
+
file_meta[metacol.cat] = cat_id.strip()
|
|
76
|
+
|
|
77
|
+
# make damn sure there are no rogue white spaces
|
|
78
|
+
for col in required:
|
|
79
|
+
file_meta[col] = file_meta[col].str.strip()
|
|
80
|
+
|
|
81
|
+
return file_meta
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# private
|
|
85
|
+
def _unpack_excel_into_df(
|
|
86
|
+
excel: pd.ExcelFile,
|
|
87
|
+
meta: DataFrame,
|
|
88
|
+
freq: str,
|
|
89
|
+
verbose: bool,
|
|
90
|
+
) -> DataFrame:
|
|
91
|
+
"""Take an ABS excel file and put all the Data sheets into a single
|
|
92
|
+
pandas DataFrame and return that DataFrame."""
|
|
93
|
+
|
|
94
|
+
data = DataFrame()
|
|
95
|
+
data_sheets = [x for x in excel.sheet_names if cast(str, x).startswith("Data")]
|
|
96
|
+
for sheet_name in data_sheets:
|
|
97
|
+
sheet_data = excel.parse(
|
|
98
|
+
sheet_name,
|
|
99
|
+
header=9,
|
|
100
|
+
index_col=0,
|
|
101
|
+
).dropna(how="all", axis="index")
|
|
102
|
+
data.index = pd.to_datetime(data.index)
|
|
103
|
+
|
|
104
|
+
# merge data into a large dataframe
|
|
105
|
+
if len(data) == 0:
|
|
106
|
+
data = sheet_data
|
|
107
|
+
else:
|
|
108
|
+
data = pd.merge(
|
|
109
|
+
left=data,
|
|
110
|
+
right=sheet_data,
|
|
111
|
+
how="outer",
|
|
112
|
+
left_index=True,
|
|
113
|
+
right_index=True,
|
|
114
|
+
suffixes=("", ""),
|
|
115
|
+
)
|
|
116
|
+
if freq:
|
|
117
|
+
if freq in ("Q", "A"):
|
|
118
|
+
month = calendar.month_abbr[
|
|
119
|
+
cast(pd.PeriodIndex, data.index).month.max()
|
|
120
|
+
].upper()
|
|
121
|
+
freq = f"{freq}-{month}"
|
|
122
|
+
if isinstance(data.index, pd.DatetimeIndex):
|
|
123
|
+
data = data.to_period(freq=freq)
|
|
124
|
+
|
|
125
|
+
# check for NA columns - rarely happens
|
|
126
|
+
# Note: these empty columns are not removed,
|
|
127
|
+
# but it is useful to know they are there
|
|
128
|
+
if data.isna().all().any() and verbose:
|
|
129
|
+
cols = data.columns[data.isna().all()]
|
|
130
|
+
print(
|
|
131
|
+
"Caution: these columns are all NA in "
|
|
132
|
+
+ f"{meta[metacol.table].iloc[0]}: {cols}"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# check for duplicate columns - should not happen
|
|
136
|
+
# Note: these duplicate columns are removed
|
|
137
|
+
duplicates = data.columns.duplicated()
|
|
138
|
+
if duplicates.any():
|
|
139
|
+
if verbose:
|
|
140
|
+
dup_table = meta[metacol.table].iloc[0]
|
|
141
|
+
print(
|
|
142
|
+
f"Note: duplicates removed from {dup_table}: "
|
|
143
|
+
+ f"{data.columns[duplicates]}"
|
|
144
|
+
)
|
|
145
|
+
data = data.loc[:, ~duplicates].copy()
|
|
146
|
+
return data
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# private
|
|
150
|
+
def _extract_data_from_excel(
|
|
151
|
+
raw_bytes: bytes, table_name: str, **kwargs: Any
|
|
152
|
+
) -> tuple[DataFrame, DataFrame]:
|
|
153
|
+
"""Convert the raw bytes of an Excel file into a pandas DataFrame.
|
|
154
|
+
Returns the actual data and meta data in two separate DataFrames."""
|
|
155
|
+
|
|
156
|
+
ignore_errors = kwargs.get("ignore_errors", False)
|
|
157
|
+
|
|
158
|
+
# convert the raw bytes into a pandas ExcelFile
|
|
159
|
+
try:
|
|
160
|
+
excel = pd.ExcelFile(BytesIO(raw_bytes))
|
|
161
|
+
except Exception as e:
|
|
162
|
+
message = f"With {table_name}: could not convert raw bytes to ExcelFile.\n{e}"
|
|
163
|
+
if ignore_errors:
|
|
164
|
+
print(message)
|
|
165
|
+
return pd.DataFrame(), pd.DataFrame()
|
|
166
|
+
raise RuntimeError(message) from e
|
|
167
|
+
|
|
168
|
+
excel = pd.ExcelFile(BytesIO(raw_bytes))
|
|
169
|
+
|
|
170
|
+
# get table information (ie. the meta data)
|
|
171
|
+
if "Index" not in excel.sheet_names:
|
|
172
|
+
print(
|
|
173
|
+
"Caution: Could not find the 'Index' "
|
|
174
|
+
f"sheet in {table_name}. File not included"
|
|
175
|
+
)
|
|
176
|
+
return pd.DataFrame(), pd.DataFrame()
|
|
177
|
+
|
|
178
|
+
# get table header information
|
|
179
|
+
header = excel.parse("Index", nrows=8) # ???
|
|
180
|
+
cat_id = header.iat[3, 1].split(" ")[0].strip()
|
|
181
|
+
tab_desc = header.iat[4, 1].split(".", 1)[-1].strip()
|
|
182
|
+
|
|
183
|
+
# get the metadata rows
|
|
184
|
+
file_meta = _get_meta_from_excel(excel, table_name, tab_desc, cat_id)
|
|
185
|
+
if len(file_meta) == 0:
|
|
186
|
+
return pd.DataFrame(), pd.DataFrame()
|
|
187
|
+
|
|
188
|
+
# establish freq - used for making the index a PeriodIndex
|
|
189
|
+
freq_dict = {"annual": "Y", "biannual": "Q", "quarter": "Q", "month": "M"}
|
|
190
|
+
freqlist = file_meta["Freq."].str.lower().unique()
|
|
191
|
+
if not len(freqlist) == 1 or freqlist[0] not in freq_dict:
|
|
192
|
+
print(f"Unrecognised data frequency {freqlist} for {tab_desc}")
|
|
193
|
+
return pd.DataFrame(), pd.DataFrame()
|
|
194
|
+
freq = freq_dict[freqlist[0]]
|
|
195
|
+
|
|
196
|
+
data = _unpack_excel_into_df(
|
|
197
|
+
excel, file_meta, freq, verbose=kwargs.get("verbose", False)
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return data, file_meta
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
# private
|
|
204
|
+
def _process_zip_binary(
|
|
205
|
+
zip_contents: bytes,
|
|
206
|
+
**kwargs: Any,
|
|
207
|
+
) -> tuple[dict[str, DataFrame], DataFrame]:
|
|
208
|
+
"""Extract the contents of a ZIP file into a tuple, where the
|
|
209
|
+
first element is a dictionary of DataFrames; and the second
|
|
210
|
+
element is the related ABS meta data in a DataFrame."""
|
|
211
|
+
|
|
212
|
+
verbose = kwargs.get("verbose", False)
|
|
213
|
+
if verbose:
|
|
214
|
+
print("Extracting DataFrames from the zip-file binary.")
|
|
215
|
+
returnable_data: dict[str, DataFrame] = {}
|
|
216
|
+
returnable_meta = DataFrame()
|
|
217
|
+
|
|
218
|
+
with zipfile.ZipFile(BytesIO(zip_contents)) as zipped:
|
|
219
|
+
for count, element in enumerate(zipped.infolist()):
|
|
220
|
+
# get the zipfile into pandas
|
|
221
|
+
table_name = get_table_name(url=element.filename)
|
|
222
|
+
raw_bytes = zipped.read(element.filename)
|
|
223
|
+
excel_df, file_meta = _extract_data_from_excel(
|
|
224
|
+
raw_bytes, table_name, **kwargs
|
|
225
|
+
)
|
|
226
|
+
if len(excel_df) == 0:
|
|
227
|
+
# this table could not be captured
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
# fix tabulation if ABS used the same table numbers for data
|
|
231
|
+
if table_name in returnable_data:
|
|
232
|
+
# This really just should not happen, but if it does, we need to dix it
|
|
233
|
+
tmp = f"{table_name}-{count}"
|
|
234
|
+
if verbose:
|
|
235
|
+
print(f"Changing duplicate table name from {table_name} to {tmp}.")
|
|
236
|
+
table_name = tmp
|
|
237
|
+
file_meta[metacol.table] = table_name
|
|
238
|
+
|
|
239
|
+
# aggregate the meta data
|
|
240
|
+
returnable_meta = pd.concat([returnable_meta, file_meta])
|
|
241
|
+
|
|
242
|
+
# add the table to the returnable dictionary
|
|
243
|
+
returnable_data[table_name] = excel_df
|
|
244
|
+
|
|
245
|
+
return returnable_data, returnable_meta
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# private
|
|
249
|
+
def _add_zip(
|
|
250
|
+
link: str, abs_dict: dict[str, DataFrame], abs_meta: DataFrame, **args
|
|
251
|
+
) -> tuple[dict[str, DataFrame], DataFrame]:
|
|
252
|
+
"""Add tables from zip file to the dictionary of DataFrames
|
|
253
|
+
and associated rows to the meta data."""
|
|
254
|
+
|
|
255
|
+
zip_contents = get_file(link, **args)
|
|
256
|
+
if len(zip_contents) == 0:
|
|
257
|
+
return abs_dict, abs_meta
|
|
258
|
+
zip_data, zip_meta = _process_zip_binary(zip_contents, **args)
|
|
259
|
+
abs_dict.update(zip_data)
|
|
260
|
+
abs_meta = pd.concat([abs_meta, zip_meta], axis=0)
|
|
261
|
+
return abs_dict, abs_meta
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# private
|
|
265
|
+
def _add_excel(
|
|
266
|
+
link: str,
|
|
267
|
+
abs_dict: dict[str, DataFrame],
|
|
268
|
+
abs_meta: DataFrame,
|
|
269
|
+
**args: Any,
|
|
270
|
+
) -> tuple[dict[str, DataFrame], DataFrame]:
|
|
271
|
+
"""Add a table to the dictionary of DataFrames
|
|
272
|
+
and rows to the the meta data."""
|
|
273
|
+
|
|
274
|
+
name = get_table_name(link)
|
|
275
|
+
if name in abs_dict:
|
|
276
|
+
# table already in the dictionary
|
|
277
|
+
return abs_dict, abs_meta
|
|
278
|
+
raw_bytes = get_file(link, **args)
|
|
279
|
+
if len(raw_bytes) == 0:
|
|
280
|
+
# could not get the file, and errors are ignored
|
|
281
|
+
return abs_dict, abs_meta
|
|
282
|
+
excel_df, file_meta = _extract_data_from_excel(raw_bytes, name, **args)
|
|
283
|
+
if len(excel_df) == 0:
|
|
284
|
+
# could not get the file, and errors are ignored
|
|
285
|
+
return abs_dict, abs_meta
|
|
286
|
+
abs_dict[name] = excel_df
|
|
287
|
+
abs_meta = pd.concat([abs_meta, file_meta], axis=0)
|
|
288
|
+
return abs_dict, abs_meta
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
# private
|
|
292
|
+
def _add_single(
|
|
293
|
+
name: str,
|
|
294
|
+
abs_dict: dict[str, DataFrame],
|
|
295
|
+
abs_meta: DataFrame,
|
|
296
|
+
links: dict[str, list[str]],
|
|
297
|
+
typology: str, # ".zip" or ".xlsx"
|
|
298
|
+
**args,
|
|
299
|
+
) -> tuple[dict[str, DataFrame], DataFrame]:
|
|
300
|
+
"""Add a single excel file or zip file to the dictionary of DataFrames,
|
|
301
|
+
along with associated meta data."""
|
|
302
|
+
|
|
303
|
+
fn: Callable = _add_zip if typology == ".zip" else _add_excel
|
|
304
|
+
selection = {get_table_name(x): x for x in links.get(typology, [])}
|
|
305
|
+
if name not in selection:
|
|
306
|
+
message = f"File ({name}{typology}) not found on ABS web page."
|
|
307
|
+
if not args["ignore_errors"]:
|
|
308
|
+
raise ValueError(message)
|
|
309
|
+
print(message)
|
|
310
|
+
return abs_dict, abs_meta
|
|
311
|
+
abs_dict, abs_meta = fn(selection[name], abs_dict, abs_meta, **args)
|
|
312
|
+
return abs_dict, abs_meta
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
# public -- primary entry point for this module
|
|
316
|
+
@cache # minimise slowness with repeat business
|
|
317
|
+
def read_abs_cat(
|
|
318
|
+
cat: str, **kwargs: Any # ABS catalogue number # keyword arguments
|
|
319
|
+
) -> tuple[dict[str, DataFrame], DataFrame]:
|
|
320
|
+
"""Read the ABS data for a catalogue id and return the data.
|
|
321
|
+
|
|
322
|
+
Parameters
|
|
323
|
+
----------
|
|
324
|
+
cat : str
|
|
325
|
+
The ABS catalogue number.
|
|
326
|
+
**kwargs : Any
|
|
327
|
+
Keyword arguments for the read_abs_cat function.
|
|
328
|
+
|
|
329
|
+
Returns
|
|
330
|
+
-------
|
|
331
|
+
tuple[dict[str, DataFrame], DataFrame]
|
|
332
|
+
A dictionary of DataFrames and a DataFrame of the meta data.
|
|
333
|
+
The dictionary is indexed by table names, which can be found
|
|
334
|
+
in the meta data DataFrame."""
|
|
335
|
+
|
|
336
|
+
# check/get the keyword arguments
|
|
337
|
+
check_kwargs(kwargs, "read_abs_cat")
|
|
338
|
+
args = get_args(kwargs)
|
|
339
|
+
|
|
340
|
+
if (
|
|
341
|
+
not args["get_zip"]
|
|
342
|
+
and not args["get_excel"]
|
|
343
|
+
and not args["get_excel_if_no_zip"]
|
|
344
|
+
):
|
|
345
|
+
raise ValueError("read_abs_dict: either get_zip or get_excel must be True.")
|
|
346
|
+
|
|
347
|
+
# convert the catalogue number to the ABS webpage URL
|
|
348
|
+
cm = catalogue_map()
|
|
349
|
+
if cat not in cm.index:
|
|
350
|
+
raise ValueError(f"ABS catalogue number {cat} not found.")
|
|
351
|
+
url = cm["URL"].astype(str)[cat]
|
|
352
|
+
|
|
353
|
+
# get the URL links to the relevant ABS data files on that webpage
|
|
354
|
+
links = get_data_links(url, **args)
|
|
355
|
+
if not links:
|
|
356
|
+
print(f"No data files found for catalogue number {cat}")
|
|
357
|
+
return {}, DataFrame() # return an empty dictionary, DataFrame
|
|
358
|
+
|
|
359
|
+
# read the data files into a dictionary of DataFrames
|
|
360
|
+
abs_dict: dict[str, DataFrame] = {}
|
|
361
|
+
abs_meta: DataFrame = DataFrame()
|
|
362
|
+
|
|
363
|
+
if args["single_excel_only"]:
|
|
364
|
+
abs_dict, abs_meta = _add_single(
|
|
365
|
+
args["single_excel_only"], abs_dict, abs_meta, links, ".xlsx", **args
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
elif args["single_zip_only"]:
|
|
369
|
+
abs_dict, abs_meta = _add_single(
|
|
370
|
+
args["single_zip_only"], abs_dict, abs_meta, links, ".zip", **args
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
else:
|
|
374
|
+
for link_type in ".zip", ".xlsx": # .zip must come first
|
|
375
|
+
for link in links.get(link_type, []):
|
|
376
|
+
if link_type == ".zip" and args["get_zip"]:
|
|
377
|
+
abs_dict, abs_meta = _add_zip(link, abs_dict, abs_meta, **args)
|
|
378
|
+
|
|
379
|
+
elif link_type == ".xlsx" and (
|
|
380
|
+
args["get_excel"]
|
|
381
|
+
or (args["get_excel_if_no_zip"] and not args["get_zip"])
|
|
382
|
+
or (args["get_excel_if_no_zip"] and not links.get(".zip", []))
|
|
383
|
+
):
|
|
384
|
+
abs_dict, abs_meta = _add_excel(
|
|
385
|
+
link, abs_dict, abs_meta, links=links, **args
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# reset the index of the metadata
|
|
389
|
+
return abs_dict, abs_meta.reset_index()
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""read_abs_series.py
|
|
2
|
+
|
|
3
|
+
Get specific ABS data series by their ABS series identifiers."""
|
|
4
|
+
|
|
5
|
+
# --- imports
|
|
6
|
+
# system imports
|
|
7
|
+
from typing import Any, Sequence, cast
|
|
8
|
+
|
|
9
|
+
# analytic imports
|
|
10
|
+
from pandas import DataFrame, PeriodIndex, concat
|
|
11
|
+
|
|
12
|
+
# local imports
|
|
13
|
+
if __package__ is None or __package__ == "":
|
|
14
|
+
from read_abs_cat import read_abs_cat
|
|
15
|
+
from read_support import check_kwargs, get_args
|
|
16
|
+
from abs_meta_data_support import metacol
|
|
17
|
+
else:
|
|
18
|
+
from .read_abs_cat import read_abs_cat
|
|
19
|
+
from .read_support import check_kwargs, get_args
|
|
20
|
+
from .abs_meta_data_support import metacol
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# --- functions
|
|
24
|
+
def read_abs_series(
|
|
25
|
+
cat: str,
|
|
26
|
+
series_id: str | Sequence[str],
|
|
27
|
+
**kwargs: Any,
|
|
28
|
+
) -> tuple[DataFrame, DataFrame]:
|
|
29
|
+
"""Get specific ABS data series by their ABS catalogue ID and series ID
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
cat : str
|
|
34
|
+
The ABS catalogue ID.
|
|
35
|
+
series_id : str | Sequence[str]
|
|
36
|
+
An ABS series ID or a sequence of ABS series IDs.
|
|
37
|
+
**kwargs : Any
|
|
38
|
+
Keyword arguments for the read_abs_series function,
|
|
39
|
+
which are the same as the keyword arguments for the r
|
|
40
|
+
read_abs_cat function.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
tuple[DataFrame, DataFrame]
|
|
45
|
+
The ABS series data and the associated meta data.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
# check for unexpected keyword arguments/get defaults
|
|
49
|
+
check_kwargs(kwargs, "read_abs_series")
|
|
50
|
+
args = get_args(kwargs)
|
|
51
|
+
|
|
52
|
+
# read the ABS category data
|
|
53
|
+
cat_data, cat_meta = read_abs_cat(cat, **args)
|
|
54
|
+
|
|
55
|
+
# drop repeated series_ids in the meta data, make series_ids the index
|
|
56
|
+
cat_meta.index = cat_meta[metacol.id]
|
|
57
|
+
cat_meta = cat_meta.groupby(cat_meta.index).first()
|
|
58
|
+
|
|
59
|
+
# get the ABS series data
|
|
60
|
+
if isinstance(series_id, str):
|
|
61
|
+
series_id = [series_id]
|
|
62
|
+
return_data, return_meta = DataFrame(), DataFrame()
|
|
63
|
+
for identifier in series_id:
|
|
64
|
+
|
|
65
|
+
# confirm that the series ID is in the catalogue
|
|
66
|
+
if not identifier in cat_meta.index:
|
|
67
|
+
if args["verbose"]:
|
|
68
|
+
print(f"Series ID {identifier} not found in ABS catalogue ID {cat}")
|
|
69
|
+
if args["ignore_errors"]:
|
|
70
|
+
continue
|
|
71
|
+
raise ValueError(f"Series ID {identifier} not found in catalogue {cat}")
|
|
72
|
+
|
|
73
|
+
# confirm thay the index of the series is compatible
|
|
74
|
+
table = cat_meta.loc[identifier, metacol.table]
|
|
75
|
+
data_series = cat_data[table][identifier]
|
|
76
|
+
if (
|
|
77
|
+
len(return_data) > 0
|
|
78
|
+
and cast(PeriodIndex, return_data.index).freq
|
|
79
|
+
!= cast(PeriodIndex, data_series.index).freq
|
|
80
|
+
):
|
|
81
|
+
if args["verbose"]:
|
|
82
|
+
print(f"Frequency mismatch for series ID {identifier}")
|
|
83
|
+
if args["ignore_errors"]:
|
|
84
|
+
continue
|
|
85
|
+
raise ValueError(f"Frequency mismatch for series ID {identifier}")
|
|
86
|
+
|
|
87
|
+
# add the series data and meta data to the return values
|
|
88
|
+
if len(return_data) > 0:
|
|
89
|
+
return_data = return_data.reindex(
|
|
90
|
+
return_data.index.union(data_series.index)
|
|
91
|
+
)
|
|
92
|
+
return_data[identifier] = data_series
|
|
93
|
+
return_meta = concat([return_meta, cat_meta.loc[identifier]], axis=1)
|
|
94
|
+
|
|
95
|
+
return return_data, return_meta.T
|
readabs/read_support.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Support for the read ABS data functions, all of which take the
|
|
2
|
+
same keyword arguments. This module provides a way to check for
|
|
3
|
+
unexpected keyword arguments and to provide default values for
|
|
4
|
+
those arguments that are not provided."""
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
DEFAULTS: dict[str, Any] = {
|
|
9
|
+
# argument_name: default_value,
|
|
10
|
+
"verbose": False,
|
|
11
|
+
"ignore_errors": False,
|
|
12
|
+
"get_zip": True,
|
|
13
|
+
"get_excel_if_no_zip": True,
|
|
14
|
+
"get_excel": False,
|
|
15
|
+
"single_zip_only": "",
|
|
16
|
+
"single_excel_only": "",
|
|
17
|
+
"history": "",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def check_kwargs(kwargs: dict[str, Any], name: str) -> None:
|
|
22
|
+
"""Warn if there are any invalid keyword args."""
|
|
23
|
+
for k in kwargs:
|
|
24
|
+
if k not in DEFAULTS:
|
|
25
|
+
print(f"{name}: Unexpected keyword argument {k}")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_args(kwargs: dict[str, Any]) -> dict[str, Any]:
|
|
29
|
+
"""Return a dictionary with only the valid kwargs
|
|
30
|
+
(and their default values if a valid key is missing from kwargs)."""
|
|
31
|
+
return {k: kwargs.get(k, v) for k, v in DEFAULTS.items()}
|
readabs/readabs.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Read time series data from the Australian Bureau of Statistics (ABS)."""
|
|
2
|
+
|
|
3
|
+
# --- imports
|
|
4
|
+
# system imports
|
|
5
|
+
|
|
6
|
+
# analytic imports
|
|
7
|
+
|
|
8
|
+
# local imports
|
|
9
|
+
from .abs_catalogue_map import catalogue_map
|
|
10
|
+
from .get_data_links import get_data_links
|
|
11
|
+
from .read_abs_cat import read_abs_cat
|
|
12
|
+
from .read_abs_series import read_abs_series
|
|
13
|
+
from .abs_meta_data_support import metacol
|
|
14
|
+
from .utilities import (
|
|
15
|
+
percent_change,
|
|
16
|
+
annualise_rates,
|
|
17
|
+
annualise_percentages,
|
|
18
|
+
qtly_to_monthly,
|
|
19
|
+
monthly_to_qtly,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
_ = (
|
|
23
|
+
# silence linters/checkers
|
|
24
|
+
get_data_links,
|
|
25
|
+
metacol,
|
|
26
|
+
read_abs_cat,
|
|
27
|
+
read_abs_series,
|
|
28
|
+
percent_change,
|
|
29
|
+
annualise_rates,
|
|
30
|
+
annualise_percentages,
|
|
31
|
+
qtly_to_monthly,
|
|
32
|
+
monthly_to_qtly,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# --- functions
|
|
37
|
+
def print_abs_catalogue() -> None:
|
|
38
|
+
"""Print the ABS catalogue."""
|
|
39
|
+
catalogue = catalogue_map()
|
|
40
|
+
print(catalogue.loc[:, catalogue.columns != "URL"].to_markdown())
|
readabs/readabs.pyi
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""Stubs for readabs."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Sequence
|
|
4
|
+
from pandas import DataFrame, Series
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
# TO DO: metacol
|
|
8
|
+
|
|
9
|
+
def catalogue_map() -> DataFrame: ...
|
|
10
|
+
def print_abs_catalogue() -> None: ...
|
|
11
|
+
|
|
12
|
+
def get_data_links(
|
|
13
|
+
url: str, inspect_file_name="", **kwargs: Any,
|
|
14
|
+
) -> dict[str, list[str]]: ...
|
|
15
|
+
|
|
16
|
+
def read_abs_cat(
|
|
17
|
+
cat: str, **kwargs: Any,
|
|
18
|
+
) -> tuple[dict[str, DataFrame], DataFrame]: ...
|
|
19
|
+
|
|
20
|
+
def read_abs_series(
|
|
21
|
+
cat: str,
|
|
22
|
+
series_id: str | Sequence[str],
|
|
23
|
+
**kwargs: Any,
|
|
24
|
+
) -> tuple[DataFrame, DataFrame]: ...
|
|
25
|
+
|
|
26
|
+
|
readabs/utilities.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""utilities.py
|
|
2
|
+
|
|
3
|
+
This module provides a small numer of utilities for
|
|
4
|
+
working with ABS timeseries data."""
|
|
5
|
+
|
|
6
|
+
# --- imports
|
|
7
|
+
from typing import TypeVar, Optional, cast
|
|
8
|
+
from pandas import Series, DataFrame, PeriodIndex, DatetimeIndex
|
|
9
|
+
from numpy import nan
|
|
10
|
+
|
|
11
|
+
# - define a useful typevar for working with both Series and DataFrames
|
|
12
|
+
DataT = TypeVar("DataT", Series, DataFrame)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# --- functions
|
|
16
|
+
def percent_change(data: DataT, m_periods: int) -> DataT:
|
|
17
|
+
"""Calculate an percentage change in a series over n_periods."""
|
|
18
|
+
|
|
19
|
+
return (data / data.shift(m_periods) - 1) * 100
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def annualise_rates(data: DataT, periods_per_year: int | float = 12) -> DataT:
|
|
23
|
+
"""Annualise a growth rate for a period.
|
|
24
|
+
Note: returns a percentage (and not a rate)!"""
|
|
25
|
+
|
|
26
|
+
return (((1 + data) ** periods_per_year) - 1) * 100
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def annualise_percentages(data: DataT, periods_per_year: int | float = 12) -> DataT:
|
|
30
|
+
"""Annualise a growth rate (expressed as a percentage) for a period."""
|
|
31
|
+
|
|
32
|
+
rates = data / 100.0
|
|
33
|
+
return annualise_rates(rates, periods_per_year)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def qtly_to_monthly(
|
|
37
|
+
data: DataT,
|
|
38
|
+
interpolate: bool = True,
|
|
39
|
+
limit: Optional[int] = 2, # only used if interpolate is True
|
|
40
|
+
dropna: bool = True,
|
|
41
|
+
) -> DataT:
|
|
42
|
+
"""Convert a pandas timeseries with a Quarterly PeriodIndex to an
|
|
43
|
+
timeseries with a Monthly PeriodIndex.
|
|
44
|
+
|
|
45
|
+
Arguments:
|
|
46
|
+
==========
|
|
47
|
+
data - either a pandas Series or DataFrame - assumes the index is unique.
|
|
48
|
+
interpolate - whether to interpolate the missing monthly data.
|
|
49
|
+
dropna - whether to drop NA data
|
|
50
|
+
|
|
51
|
+
Notes:
|
|
52
|
+
======
|
|
53
|
+
Necessitated by Pandas 2.2, which removed .resample()
|
|
54
|
+
from pandas objects with a PeriodIndex."""
|
|
55
|
+
|
|
56
|
+
# sanity checks
|
|
57
|
+
assert isinstance(data.index, PeriodIndex)
|
|
58
|
+
assert data.index.freqstr[0] == "Q"
|
|
59
|
+
assert data.index.is_unique
|
|
60
|
+
assert data.index.is_monotonic_increasing
|
|
61
|
+
|
|
62
|
+
def set_axis_monthly_periods(x: DataT) -> DataT:
|
|
63
|
+
"""Convert a DatetimeIndex to a Monthly PeriodIndex."""
|
|
64
|
+
|
|
65
|
+
return x.set_axis(
|
|
66
|
+
labels=cast(DatetimeIndex, x.index).to_period(freq="M"), axis="index"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# do the heavy lifting
|
|
70
|
+
data = (
|
|
71
|
+
data.set_axis(
|
|
72
|
+
labels=data.index.to_timestamp(how="end"), axis="index", copy=True
|
|
73
|
+
)
|
|
74
|
+
.resample(rule="ME") # adds in every missing month
|
|
75
|
+
.first(min_count=1) # generates nans for new months
|
|
76
|
+
# assumes only one value per quarter (ie. unique index)
|
|
77
|
+
.pipe(set_axis_monthly_periods)
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
if interpolate:
|
|
81
|
+
data = data.interpolate(limit_area="inside", limit=limit)
|
|
82
|
+
if dropna:
|
|
83
|
+
data = data.dropna()
|
|
84
|
+
|
|
85
|
+
return data
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def monthly_to_qtly(data: DataT, q_ending="DEC", f: str = "mean") -> DataT:
|
|
89
|
+
"""Convert monthly data to quarterly data by taking the mean of
|
|
90
|
+
the three months in each quarter. Ignore quarters with less than
|
|
91
|
+
three months data. Drop NA items. Change f to "sum" for a quarterly sum"""
|
|
92
|
+
|
|
93
|
+
return (
|
|
94
|
+
data.groupby(PeriodIndex(data.index, freq=f"Q-{q_ending}"))
|
|
95
|
+
.agg([f, "count"])
|
|
96
|
+
.apply(lambda x: x["mean"] if x["count"] == 3 else nan, axis=1)
|
|
97
|
+
.dropna()
|
|
98
|
+
)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Copyright 2024 Bryan Palmer (Canberra Australia)
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
8
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: readabs
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: Get ABS timeseries data in pandas DataFrames
|
|
5
|
+
Author-email: Bryan Palmer <palmer.bryan@gmail.com>
|
|
6
|
+
Maintainer-email: Bryan Palmer <palmer.bryan@gmail.com>
|
|
7
|
+
Project-URL: Homepage, https://github.com/bpalmer4/readabs
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.9
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
readabs/__init__.py,sha256=oRPBeNE3YqW2BPHpluXF39OfwCFGrVws2dMTeXyhnhM,68
|
|
2
|
+
readabs/abs_catalogue_map.py,sha256=XrTc844NEV0g6CaxadS0HsCJweOGUQwpRMEdi2I5iBY,8922
|
|
3
|
+
readabs/abs_meta_data_support.py,sha256=PgVOWIGb3_axFwYDnCIZ0IBJ5WUTtBIfNwkdjFfQ9zs,705
|
|
4
|
+
readabs/download_cache.py,sha256=h_ElUmdJJuBm5DAB9KefShOWxCvMHskHDwVlV6L5IiE,6960
|
|
5
|
+
readabs/generate_catalogue_map.py,sha256=5Air2d4fvZVezJt9fzUQc7WLX1aHsv2y4Yn0SJtXbRk,2011
|
|
6
|
+
readabs/get_data_links.py,sha256=wLL2p8cZMUVM-PYCoh5XKO2-lt3J9QwEGL17CFaYDq4,4116
|
|
7
|
+
readabs/read_abs_cat.py,sha256=6Sb1meL_NlcjXRB3J7VbjbtDvIlRP5GzKzmKD5ZpUGI,13464
|
|
8
|
+
readabs/read_abs_series.py,sha256=R0ogok9Wm4fgL59ZYcI2TObPLEQjAH5QxRZ2qoUjhmc,3175
|
|
9
|
+
readabs/read_support.py,sha256=AQAvOQ-FlpQBRHrLQHx44OiUDgN1twRPrwVHcekh6Bk,1007
|
|
10
|
+
readabs/readabs.py,sha256=D1zJXYbrc1mipDCp9rKmPjuGx91MLIKlRMHKH-pq0ig,909
|
|
11
|
+
readabs/readabs.pyi,sha256=N6psbIWpV5obaltk0BbFl2Eip0LUaPN0hCFzhr5wQXs,524
|
|
12
|
+
readabs/utilities.py,sha256=-L2kSe-141l-8s5fKj-bSPxs7o5VKFDU5JKaqU5rGDU,3124
|
|
13
|
+
readabs-0.0.2.dist-info/LICENSE,sha256=YMg097MHV-y9Yg1sZK7T9nueRGswD4cEcCRtst9FGxE,1082
|
|
14
|
+
readabs-0.0.2.dist-info/METADATA,sha256=cLZ2HVbv1q19NV_89nhnftjpdmuo8BDrtcir7XaNkiA,464
|
|
15
|
+
readabs-0.0.2.dist-info/WHEEL,sha256=mguMlWGMX-VHnMpKOjjQidIo1ssRlCFu4a4mBpz1s2M,91
|
|
16
|
+
readabs-0.0.2.dist-info/top_level.txt,sha256=lA7BwCI3L6fvTyx0HcMTcS3FhgXkCiEL3sXUQ2WtLbE,8
|
|
17
|
+
readabs-0.0.2.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
readabs
|