readabs 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- readabs-0.0.1/LICENSE +8 -0
- readabs-0.0.1/PKG-INFO +148 -0
- readabs-0.0.1/README.md +135 -0
- readabs-0.0.1/readabs.egg-info/PKG-INFO +148 -0
- readabs-0.0.1/readabs.egg-info/SOURCES.txt +26 -0
- readabs-0.0.1/readabs.egg-info/dependency_links.txt +1 -0
- readabs-0.0.1/readabs.egg-info/top_level.txt +1 -0
- readabs-0.0.1/setup.cfg +4 -0
- readabs-0.0.1/setup.py +22 -0
- readabs-0.0.1/src/__init__.py +1 -0
- readabs-0.0.1/src/abs_catalogue_map.py +57 -0
- readabs-0.0.1/src/abs_meta_data_support.py +40 -0
- readabs-0.0.1/src/download_cache.py +218 -0
- readabs-0.0.1/src/generate_catalogue_map.py +55 -0
- readabs-0.0.1/src/get_data_links.py +116 -0
- readabs-0.0.1/src/read_abs_cat.py +383 -0
- readabs-0.0.1/src/read_abs_series.py +90 -0
- readabs-0.0.1/src/read_support.py +31 -0
- readabs-0.0.1/src/readabs.py +40 -0
- readabs-0.0.1/src/utilities.py +98 -0
- readabs-0.0.1/tests/test_get_data_links_1.py +22 -0
- readabs-0.0.1/tests/test_get_data_links_2.py +27 -0
- readabs-0.0.1/tests/test_print_abs_catalogue.py +14 -0
- readabs-0.0.1/tests/test_read_abs_cat_1.py +39 -0
- readabs-0.0.1/tests/test_read_abs_cat_2.py +34 -0
- readabs-0.0.1/tests/test_read_abs_cat_3.py +25 -0
- readabs-0.0.1/tests/test_read_abs_series_1.py +28 -0
- readabs-0.0.1/tests/test_read_abs_series_2.py +42 -0
readabs-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Copyright 2024 Bryan Palmer (Canberra Australia)
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
8
|
+
|
readabs-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: readabs
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Read ABS time series data
|
|
5
|
+
Home-page: https://github.com/bpalmer4/readabs
|
|
6
|
+
Author: Bryan Palmer
|
|
7
|
+
Author-email: palmer.bryan@gmail.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
|
|
14
|
+
# readabs
|
|
15
|
+
|
|
16
|
+
readabs is an open-source python package to download and work with
|
|
17
|
+
imeseries data from the Australian Bureau of Statistics (ABS),
|
|
18
|
+
using pandas DataFrames.
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
## Usage:
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
Standand import arrangements
|
|
28
|
+
```python
|
|
29
|
+
import readabs as ra
|
|
30
|
+
from readabs import metacol # short column names for meta data DataFrames
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
Print a list of available catalogue identifiers from the ABS. You may need
|
|
35
|
+
this to get the catalogue identifier/number for the data you want to download.
|
|
36
|
+
```python
|
|
37
|
+
ra.print_abs_catalogue()
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
Get the ABS catalogue map as a pandas DataFrame.
|
|
42
|
+
```python
|
|
43
|
+
cat_map = ra.catalogue_map()
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
Get all of the data tables associated with a particular catalogue identifier.
|
|
48
|
+
The catalogue identifier is a string with the standard ABS identifier. For example,
|
|
49
|
+
the cataloge identifier for the monthly labour force survey is "6202.0".
|
|
50
|
+
Returns a tuple. The first element of the tuple is a dictionary of DataFrames.
|
|
51
|
+
The dictionary is indexed by table names (which can be found in the meta data).
|
|
52
|
+
The second element is a DataFrame for the meta data. Note: with some ABS
|
|
53
|
+
catalogues, a specific series may be repeated in more than one table.
|
|
54
|
+
```python
|
|
55
|
+
abs_dict, meta = ra.read_abs_cat(cat="id")
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
Get two DataFrames in a tuple, the first containing the actual data, and the
|
|
60
|
+
second containing the meta data for one or more specified ABS series identifiers.
|
|
61
|
+
```python
|
|
62
|
+
data, meta = ra.read_abs_series(cat="id", series="id1")
|
|
63
|
+
data, meta = ra.read_abs_series(cat="id", series=("id1", "id2, ...))
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Additional utility functions
|
|
67
|
+
While not necessary for working with ABS data, the package includes some useful
|
|
68
|
+
functions for manipulating ABS data:
|
|
69
|
+
|
|
70
|
+
Calculate percentage change over n_periods.
|
|
71
|
+
```python
|
|
72
|
+
change_data = percentage_change(data, n_periods)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Annualise monthly or quarterly percentage rates.
|
|
76
|
+
```python
|
|
77
|
+
annualised = annualise_percentages(data, periods_per_year)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Convert a pandas timeseries with a Quarterly PeriodIndex to an
|
|
81
|
+
timeseries with a Monthly PeriodIndex.
|
|
82
|
+
```python
|
|
83
|
+
monthly_data = qtly_to_monthly(
|
|
84
|
+
quarterly_data,
|
|
85
|
+
interpolate, # default is True
|
|
86
|
+
limit, # default is 2, only used if interpolate is True
|
|
87
|
+
dropna, # default is True,
|
|
88
|
+
)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Convert monthly data to quarterly data by taking the mean or sum of
|
|
92
|
+
the three months in each quarter. Ignore quarters with less than
|
|
93
|
+
three months data. Drop NA items.
|
|
94
|
+
```python
|
|
95
|
+
quarterly_data = monthly_to_qtly(
|
|
96
|
+
data,
|
|
97
|
+
q_ending, # default is "DEC"
|
|
98
|
+
f, the func
|
|
99
|
+
tion to apply ("sum" or "mean"), the default is "mean"
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Notes:
|
|
106
|
+
|
|
107
|
+
* This package largely does not manipulate the ABS data. The data is returned as it
|
|
108
|
+
was downloaded. This includes any NA-only (ie. empty) columns where they occur.
|
|
109
|
+
* This package only downloads timeseries data tables. Other data tables (for example,
|
|
110
|
+
pivot tables) are ignored.
|
|
111
|
+
* The index for all of the downloaded tables should be a pandas PeriodIndex, with an
|
|
112
|
+
appropriately selected frequency.
|
|
113
|
+
* In the process of data retrieval, the ABS data tables are downloaded and stored in a
|
|
114
|
+
local cache. By default the cache directory is "./.readabs_cache/".
|
|
115
|
+
You can change the default directory name by setting the environemnt variable
|
|
116
|
+
"READABS_CACHE_DIR" with the name of the preferred directory.
|
|
117
|
+
* the "read" functions have a number of standard keyword arguments (with default
|
|
118
|
+
settings as follows):
|
|
119
|
+
- `history=""` - provide a month-year string to extract historical ABS data.
|
|
120
|
+
For example, you can set history="dec-2023" to the get the ABS data for a
|
|
121
|
+
catalogue identifier that was originally published in respect of Q4 of 2023.
|
|
122
|
+
Note: not all ABS data sources are structured so that this technique works
|
|
123
|
+
in every case; but most are.
|
|
124
|
+
- `verbose=False` - Do not print detailed information on the data retrieval process.
|
|
125
|
+
Setting this to true may help diagnose why something might be going wrong with the
|
|
126
|
+
data retrieval process.
|
|
127
|
+
- `ignore_errors=False` - Cease downloading when an error in encounted. However,
|
|
128
|
+
sometimes the ABS website has malformed links, and changing this setting is
|
|
129
|
+
necessitated. (Note: if you drop a message to the ABS, they will usually fix
|
|
130
|
+
broken links with a business day).
|
|
131
|
+
- `get_zip=True` - Download .zip files.
|
|
132
|
+
- `get_excel_if_no_zip=True` Only try to download .xlsx files if there are no
|
|
133
|
+
zip files available to be downloaded.
|
|
134
|
+
- `get_excel=False` - Do not automatically download .xlsx files.
|
|
135
|
+
Note at least one of get_zip, get_excel_if_no_zip, or get_excel must be true.
|
|
136
|
+
For most ABS catalogue items, it is sufficient to just download the one zip
|
|
137
|
+
file. But note, some catalogue items do not have a zip file. Others have
|
|
138
|
+
quite a number of zip files.
|
|
139
|
+
- `single_excel_only=""` - if this argument is set to a table name (without the
|
|
140
|
+
,xlsx extention), only that excel file will be downloaded. If set, and only a
|
|
141
|
+
limited subset of available data is needed, this can speed up download
|
|
142
|
+
times significantly. Note: overrides get_zip, get_excel_if_no_zip, get_excel and
|
|
143
|
+
single_zip_only.
|
|
144
|
+
- `single_zip_only=""` - if this argument is set to a zip file name (without
|
|
145
|
+
the .zip extention), only that zip file will be downloaded. If set, and only a
|
|
146
|
+
limited subset of available data is needed, this can speed up download times
|
|
147
|
+
significantly. Note: overrides get_zip, get_excel_if_no_zip, and get_excel.
|
|
148
|
+
|
readabs-0.0.1/README.md
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
# readabs
|
|
2
|
+
|
|
3
|
+
readabs is an open-source python package to download and work with
|
|
4
|
+
imeseries data from the Australian Bureau of Statistics (ABS),
|
|
5
|
+
using pandas DataFrames.
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
## Usage:
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
Standand import arrangements
|
|
15
|
+
```python
|
|
16
|
+
import readabs as ra
|
|
17
|
+
from readabs import metacol # short column names for meta data DataFrames
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
Print a list of available catalogue identifiers from the ABS. You may need
|
|
22
|
+
this to get the catalogue identifier/number for the data you want to download.
|
|
23
|
+
```python
|
|
24
|
+
ra.print_abs_catalogue()
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
Get the ABS catalogue map as a pandas DataFrame.
|
|
29
|
+
```python
|
|
30
|
+
cat_map = ra.catalogue_map()
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
Get all of the data tables associated with a particular catalogue identifier.
|
|
35
|
+
The catalogue identifier is a string with the standard ABS identifier. For example,
|
|
36
|
+
the cataloge identifier for the monthly labour force survey is "6202.0".
|
|
37
|
+
Returns a tuple. The first element of the tuple is a dictionary of DataFrames.
|
|
38
|
+
The dictionary is indexed by table names (which can be found in the meta data).
|
|
39
|
+
The second element is a DataFrame for the meta data. Note: with some ABS
|
|
40
|
+
catalogues, a specific series may be repeated in more than one table.
|
|
41
|
+
```python
|
|
42
|
+
abs_dict, meta = ra.read_abs_cat(cat="id")
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
Get two DataFrames in a tuple, the first containing the actual data, and the
|
|
47
|
+
second containing the meta data for one or more specified ABS series identifiers.
|
|
48
|
+
```python
|
|
49
|
+
data, meta = ra.read_abs_series(cat="id", series="id1")
|
|
50
|
+
data, meta = ra.read_abs_series(cat="id", series=("id1", "id2, ...))
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Additional utility functions
|
|
54
|
+
While not necessary for working with ABS data, the package includes some useful
|
|
55
|
+
functions for manipulating ABS data:
|
|
56
|
+
|
|
57
|
+
Calculate percentage change over n_periods.
|
|
58
|
+
```python
|
|
59
|
+
change_data = percentage_change(data, n_periods)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Annualise monthly or quarterly percentage rates.
|
|
63
|
+
```python
|
|
64
|
+
annualised = annualise_percentages(data, periods_per_year)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Convert a pandas timeseries with a Quarterly PeriodIndex to an
|
|
68
|
+
timeseries with a Monthly PeriodIndex.
|
|
69
|
+
```python
|
|
70
|
+
monthly_data = qtly_to_monthly(
|
|
71
|
+
quarterly_data,
|
|
72
|
+
interpolate, # default is True
|
|
73
|
+
limit, # default is 2, only used if interpolate is True
|
|
74
|
+
dropna, # default is True,
|
|
75
|
+
)
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Convert monthly data to quarterly data by taking the mean or sum of
|
|
79
|
+
the three months in each quarter. Ignore quarters with less than
|
|
80
|
+
three months data. Drop NA items.
|
|
81
|
+
```python
|
|
82
|
+
quarterly_data = monthly_to_qtly(
|
|
83
|
+
data,
|
|
84
|
+
q_ending, # default is "DEC"
|
|
85
|
+
f, the func
|
|
86
|
+
tion to apply ("sum" or "mean"), the default is "mean"
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Notes:
|
|
93
|
+
|
|
94
|
+
* This package largely does not manipulate the ABS data. The data is returned as it
|
|
95
|
+
was downloaded. This includes any NA-only (ie. empty) columns where they occur.
|
|
96
|
+
* This package only downloads timeseries data tables. Other data tables (for example,
|
|
97
|
+
pivot tables) are ignored.
|
|
98
|
+
* The index for all of the downloaded tables should be a pandas PeriodIndex, with an
|
|
99
|
+
appropriately selected frequency.
|
|
100
|
+
* In the process of data retrieval, the ABS data tables are downloaded and stored in a
|
|
101
|
+
local cache. By default the cache directory is "./.readabs_cache/".
|
|
102
|
+
You can change the default directory name by setting the environemnt variable
|
|
103
|
+
"READABS_CACHE_DIR" with the name of the preferred directory.
|
|
104
|
+
* the "read" functions have a number of standard keyword arguments (with default
|
|
105
|
+
settings as follows):
|
|
106
|
+
- `history=""` - provide a month-year string to extract historical ABS data.
|
|
107
|
+
For example, you can set history="dec-2023" to the get the ABS data for a
|
|
108
|
+
catalogue identifier that was originally published in respect of Q4 of 2023.
|
|
109
|
+
Note: not all ABS data sources are structured so that this technique works
|
|
110
|
+
in every case; but most are.
|
|
111
|
+
- `verbose=False` - Do not print detailed information on the data retrieval process.
|
|
112
|
+
Setting this to true may help diagnose why something might be going wrong with the
|
|
113
|
+
data retrieval process.
|
|
114
|
+
- `ignore_errors=False` - Cease downloading when an error in encounted. However,
|
|
115
|
+
sometimes the ABS website has malformed links, and changing this setting is
|
|
116
|
+
necessitated. (Note: if you drop a message to the ABS, they will usually fix
|
|
117
|
+
broken links with a business day).
|
|
118
|
+
- `get_zip=True` - Download .zip files.
|
|
119
|
+
- `get_excel_if_no_zip=True` Only try to download .xlsx files if there are no
|
|
120
|
+
zip files available to be downloaded.
|
|
121
|
+
- `get_excel=False` - Do not automatically download .xlsx files.
|
|
122
|
+
Note at least one of get_zip, get_excel_if_no_zip, or get_excel must be true.
|
|
123
|
+
For most ABS catalogue items, it is sufficient to just download the one zip
|
|
124
|
+
file. But note, some catalogue items do not have a zip file. Others have
|
|
125
|
+
quite a number of zip files.
|
|
126
|
+
- `single_excel_only=""` - if this argument is set to a table name (without the
|
|
127
|
+
,xlsx extention), only that excel file will be downloaded. If set, and only a
|
|
128
|
+
limited subset of available data is needed, this can speed up download
|
|
129
|
+
times significantly. Note: overrides get_zip, get_excel_if_no_zip, get_excel and
|
|
130
|
+
single_zip_only.
|
|
131
|
+
- `single_zip_only=""` - if this argument is set to a zip file name (without
|
|
132
|
+
the .zip extention), only that zip file will be downloaded. If set, and only a
|
|
133
|
+
limited subset of available data is needed, this can speed up download times
|
|
134
|
+
significantly. Note: overrides get_zip, get_excel_if_no_zip, and get_excel.
|
|
135
|
+
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: readabs
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Read ABS time series data
|
|
5
|
+
Home-page: https://github.com/bpalmer4/readabs
|
|
6
|
+
Author: Bryan Palmer
|
|
7
|
+
Author-email: palmer.bryan@gmail.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Description-Content-Type: text/markdown
|
|
12
|
+
License-File: LICENSE
|
|
13
|
+
|
|
14
|
+
# readabs
|
|
15
|
+
|
|
16
|
+
readabs is an open-source python package to download and work with
|
|
17
|
+
imeseries data from the Australian Bureau of Statistics (ABS),
|
|
18
|
+
using pandas DataFrames.
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
## Usage:
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
Standand import arrangements
|
|
28
|
+
```python
|
|
29
|
+
import readabs as ra
|
|
30
|
+
from readabs import metacol # short column names for meta data DataFrames
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
Print a list of available catalogue identifiers from the ABS. You may need
|
|
35
|
+
this to get the catalogue identifier/number for the data you want to download.
|
|
36
|
+
```python
|
|
37
|
+
ra.print_abs_catalogue()
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
Get the ABS catalogue map as a pandas DataFrame.
|
|
42
|
+
```python
|
|
43
|
+
cat_map = ra.catalogue_map()
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
Get all of the data tables associated with a particular catalogue identifier.
|
|
48
|
+
The catalogue identifier is a string with the standard ABS identifier. For example,
|
|
49
|
+
the cataloge identifier for the monthly labour force survey is "6202.0".
|
|
50
|
+
Returns a tuple. The first element of the tuple is a dictionary of DataFrames.
|
|
51
|
+
The dictionary is indexed by table names (which can be found in the meta data).
|
|
52
|
+
The second element is a DataFrame for the meta data. Note: with some ABS
|
|
53
|
+
catalogues, a specific series may be repeated in more than one table.
|
|
54
|
+
```python
|
|
55
|
+
abs_dict, meta = ra.read_abs_cat(cat="id")
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
Get two DataFrames in a tuple, the first containing the actual data, and the
|
|
60
|
+
second containing the meta data for one or more specified ABS series identifiers.
|
|
61
|
+
```python
|
|
62
|
+
data, meta = ra.read_abs_series(cat="id", series="id1")
|
|
63
|
+
data, meta = ra.read_abs_series(cat="id", series=("id1", "id2, ...))
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Additional utility functions
|
|
67
|
+
While not necessary for working with ABS data, the package includes some useful
|
|
68
|
+
functions for manipulating ABS data:
|
|
69
|
+
|
|
70
|
+
Calculate percentage change over n_periods.
|
|
71
|
+
```python
|
|
72
|
+
change_data = percentage_change(data, n_periods)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Annualise monthly or quarterly percentage rates.
|
|
76
|
+
```python
|
|
77
|
+
annualised = annualise_percentages(data, periods_per_year)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Convert a pandas timeseries with a Quarterly PeriodIndex to an
|
|
81
|
+
timeseries with a Monthly PeriodIndex.
|
|
82
|
+
```python
|
|
83
|
+
monthly_data = qtly_to_monthly(
|
|
84
|
+
quarterly_data,
|
|
85
|
+
interpolate, # default is True
|
|
86
|
+
limit, # default is 2, only used if interpolate is True
|
|
87
|
+
dropna, # default is True,
|
|
88
|
+
)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Convert monthly data to quarterly data by taking the mean or sum of
|
|
92
|
+
the three months in each quarter. Ignore quarters with less than
|
|
93
|
+
three months data. Drop NA items.
|
|
94
|
+
```python
|
|
95
|
+
quarterly_data = monthly_to_qtly(
|
|
96
|
+
data,
|
|
97
|
+
q_ending, # default is "DEC"
|
|
98
|
+
f, the func
|
|
99
|
+
tion to apply ("sum" or "mean"), the default is "mean"
|
|
100
|
+
)
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## Notes:
|
|
106
|
+
|
|
107
|
+
* This package largely does not manipulate the ABS data. The data is returned as it
|
|
108
|
+
was downloaded. This includes any NA-only (ie. empty) columns where they occur.
|
|
109
|
+
* This package only downloads timeseries data tables. Other data tables (for example,
|
|
110
|
+
pivot tables) are ignored.
|
|
111
|
+
* The index for all of the downloaded tables should be a pandas PeriodIndex, with an
|
|
112
|
+
appropriately selected frequency.
|
|
113
|
+
* In the process of data retrieval, the ABS data tables are downloaded and stored in a
|
|
114
|
+
local cache. By default the cache directory is "./.readabs_cache/".
|
|
115
|
+
You can change the default directory name by setting the environemnt variable
|
|
116
|
+
"READABS_CACHE_DIR" with the name of the preferred directory.
|
|
117
|
+
* the "read" functions have a number of standard keyword arguments (with default
|
|
118
|
+
settings as follows):
|
|
119
|
+
- `history=""` - provide a month-year string to extract historical ABS data.
|
|
120
|
+
For example, you can set history="dec-2023" to the get the ABS data for a
|
|
121
|
+
catalogue identifier that was originally published in respect of Q4 of 2023.
|
|
122
|
+
Note: not all ABS data sources are structured so that this technique works
|
|
123
|
+
in every case; but most are.
|
|
124
|
+
- `verbose=False` - Do not print detailed information on the data retrieval process.
|
|
125
|
+
Setting this to true may help diagnose why something might be going wrong with the
|
|
126
|
+
data retrieval process.
|
|
127
|
+
- `ignore_errors=False` - Cease downloading when an error in encounted. However,
|
|
128
|
+
sometimes the ABS website has malformed links, and changing this setting is
|
|
129
|
+
necessitated. (Note: if you drop a message to the ABS, they will usually fix
|
|
130
|
+
broken links with a business day).
|
|
131
|
+
- `get_zip=True` - Download .zip files.
|
|
132
|
+
- `get_excel_if_no_zip=True` Only try to download .xlsx files if there are no
|
|
133
|
+
zip files available to be downloaded.
|
|
134
|
+
- `get_excel=False` - Do not automatically download .xlsx files.
|
|
135
|
+
Note at least one of get_zip, get_excel_if_no_zip, or get_excel must be true.
|
|
136
|
+
For most ABS catalogue items, it is sufficient to just download the one zip
|
|
137
|
+
file. But note, some catalogue items do not have a zip file. Others have
|
|
138
|
+
quite a number of zip files.
|
|
139
|
+
- `single_excel_only=""` - if this argument is set to a table name (without the
|
|
140
|
+
,xlsx extention), only that excel file will be downloaded. If set, and only a
|
|
141
|
+
limited subset of available data is needed, this can speed up download
|
|
142
|
+
times significantly. Note: overrides get_zip, get_excel_if_no_zip, get_excel and
|
|
143
|
+
single_zip_only.
|
|
144
|
+
- `single_zip_only=""` - if this argument is set to a zip file name (without
|
|
145
|
+
the .zip extention), only that zip file will be downloaded. If set, and only a
|
|
146
|
+
limited subset of available data is needed, this can speed up download times
|
|
147
|
+
significantly. Note: overrides get_zip, get_excel_if_no_zip, and get_excel.
|
|
148
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
setup.py
|
|
4
|
+
readabs.egg-info/PKG-INFO
|
|
5
|
+
readabs.egg-info/SOURCES.txt
|
|
6
|
+
readabs.egg-info/dependency_links.txt
|
|
7
|
+
readabs.egg-info/top_level.txt
|
|
8
|
+
src/__init__.py
|
|
9
|
+
src/abs_catalogue_map.py
|
|
10
|
+
src/abs_meta_data_support.py
|
|
11
|
+
src/download_cache.py
|
|
12
|
+
src/generate_catalogue_map.py
|
|
13
|
+
src/get_data_links.py
|
|
14
|
+
src/read_abs_cat.py
|
|
15
|
+
src/read_abs_series.py
|
|
16
|
+
src/read_support.py
|
|
17
|
+
src/readabs.py
|
|
18
|
+
src/utilities.py
|
|
19
|
+
tests/test_get_data_links_1.py
|
|
20
|
+
tests/test_get_data_links_2.py
|
|
21
|
+
tests/test_print_abs_catalogue.py
|
|
22
|
+
tests/test_read_abs_cat_1.py
|
|
23
|
+
tests/test_read_abs_cat_2.py
|
|
24
|
+
tests/test_read_abs_cat_3.py
|
|
25
|
+
tests/test_read_abs_series_1.py
|
|
26
|
+
tests/test_read_abs_series_2.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
src
|
readabs-0.0.1/setup.cfg
ADDED
readabs-0.0.1/setup.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import setuptools
|
|
2
|
+
|
|
3
|
+
with open("README.md", "r") as fh:
|
|
4
|
+
long_description = fh.read()
|
|
5
|
+
|
|
6
|
+
setuptools.setup(
|
|
7
|
+
name="readabs",
|
|
8
|
+
version="0.0.1",
|
|
9
|
+
author="Bryan Palmer",
|
|
10
|
+
author_email="palmer.bryan@gmail.com",
|
|
11
|
+
description="Read ABS time series data",
|
|
12
|
+
long_description=long_description,
|
|
13
|
+
long_description_content_type="text/markdown",
|
|
14
|
+
url="https://github.com/bpalmer4/readabs",
|
|
15
|
+
packages=setuptools.find_packages(),
|
|
16
|
+
classifiers=(
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"License :: OSI Approved :: MIT License",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
),
|
|
21
|
+
)
|
|
22
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
name = "readabs"
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Catalogue map for ABS data."""
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from io import StringIO
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def catalogue_map() -> pd.DataFrame:
|
|
8
|
+
"""Return the catalogue map."""
|
|
9
|
+
|
|
10
|
+
csv = """Catalogue ID,Theme,Parent Topic,Topic,URL,Status
|
|
11
|
+
1364.0.15.003,Economy,National Accounts,Modellers Database,https://www.abs.gov.au/statistics/economy/national-accounts/modellers-database/latest-release,
|
|
12
|
+
3101.0,People,Population,National State And Territory Population,https://www.abs.gov.au/statistics/people/population/national-state-and-territory-population/latest-release,
|
|
13
|
+
3222.0,People,Population,Population Projections Australia,https://www.abs.gov.au/statistics/people/population/population-projections-australia/latest-release,
|
|
14
|
+
3401.0,Industry,Tourism And Transport,Overseas Arrivals And Departures Australia,https://www.abs.gov.au/statistics/industry/tourism-and-transport/overseas-arrivals-and-departures-australia/latest-release,
|
|
15
|
+
5204.0,Economy,National Accounts,Australian System National Accounts,https://www.abs.gov.au/statistics/economy/national-accounts/australian-system-national-accounts/latest-release,
|
|
16
|
+
5206.0,Economy,National Accounts,Australian National Accounts National Income Expenditure And Product,https://www.abs.gov.au/statistics/economy/national-accounts/australian-national-accounts-national-income-expenditure-and-product/latest-release,
|
|
17
|
+
5220.0,Economy,National Accounts,Australian National Accounts State Accounts,https://www.abs.gov.au/statistics/economy/national-accounts/australian-national-accounts-state-accounts/latest-release,
|
|
18
|
+
5232.0,Economy,National Accounts,Australian National Accounts Finance And Wealth,https://www.abs.gov.au/statistics/economy/national-accounts/australian-national-accounts-finance-and-wealth/latest-release,
|
|
19
|
+
5232.0.55.001,Economy,Finance,Assets And Liabilities Australian Securitisers,https://www.abs.gov.au/statistics/economy/finance/assets-and-liabilities-australian-securitisers/latest-release,
|
|
20
|
+
5302.0,Economy,International Trade,Balance Payments And International Investment Position Australia,https://www.abs.gov.au/statistics/economy/international-trade/balance-payments-and-international-investment-position-australia/latest-release,
|
|
21
|
+
5368.0,Economy,International Trade,International Trade Goods And Services Australia,https://www.abs.gov.au/statistics/economy/international-trade/international-trade-goods-and-services-australia/latest-release,
|
|
22
|
+
5368.0.55.024,Economy,International Trade,International Merchandise Trade Preliminary Australia,https://www.abs.gov.au/statistics/economy/international-trade/international-merchandise-trade-preliminary-australia/latest-release,
|
|
23
|
+
5601.0,Economy,Finance,Lending Indicators,https://www.abs.gov.au/statistics/economy/finance/lending-indicators/latest-release,
|
|
24
|
+
5625.0,Economy,Business Indicators,Private New Capital Expenditure And Expected Expenditure Australia,https://www.abs.gov.au/statistics/economy/business-indicators/private-new-capital-expenditure-and-expected-expenditure-australia/latest-release,
|
|
25
|
+
5655.0,Economy,Finance,Managed Funds Australia,https://www.abs.gov.au/statistics/economy/finance/managed-funds-australia/latest-release,
|
|
26
|
+
5676.0,Economy,Business Indicators,Business Indicators Australia,https://www.abs.gov.au/statistics/economy/business-indicators/business-indicators-australia/latest-release,
|
|
27
|
+
5681.0,Economy,Business Indicators,Monthly Business Turnover Indicator,https://www.abs.gov.au/statistics/economy/business-indicators/monthly-business-turnover-indicator/latest-release,
|
|
28
|
+
5682.0,Economy,Finance,Monthly Household Spending Indicator,https://www.abs.gov.au/statistics/economy/finance/monthly-household-spending-indicator/latest-release,
|
|
29
|
+
6202.0,Labour,Employment And Unemployment,Labour Force Australia,https://www.abs.gov.au/statistics/labour/employment-and-unemployment/labour-force-australia/latest-release,
|
|
30
|
+
6150.0.55.003,Labour,Labour Accounts,Labour Account Australia,https://www.abs.gov.au/statistics/labour/labour-accounts/labour-account-australia/latest-release,
|
|
31
|
+
6248.0.55.002,Labour,Employment And Unemployment,Public Sector Employment And Earnings,https://www.abs.gov.au/statistics/labour/employment-and-unemployment/public-sector-employment-and-earnings/latest-release,
|
|
32
|
+
6291.0.55.001,Labour,Employment And Unemployment,Labour Force Australia Detailed,https://www.abs.gov.au/statistics/labour/employment-and-unemployment/labour-force-australia-detailed/latest-release,
|
|
33
|
+
6302.0,Labour,Earnings And Working Conditions,Average Weekly Earnings Australia,https://www.abs.gov.au/statistics/labour/earnings-and-working-conditions/average-weekly-earnings-australia/latest-release,
|
|
34
|
+
6321.0.55.001,Labour,Earnings And Working Conditions,Industrial Disputes Australia,https://www.abs.gov.au/statistics/labour/earnings-and-working-conditions/industrial-disputes-australia/latest-release,
|
|
35
|
+
6345.0,Economy,Price Indexes And Inflation,Wage Price Index Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/wage-price-index-australia/latest-release,
|
|
36
|
+
6354.0,Labour,Jobs,Job Vacancies Australia,https://www.abs.gov.au/statistics/labour/jobs/job-vacancies-australia/latest-release,
|
|
37
|
+
6401.0,Economy,Price Indexes And Inflation,Consumer Price Index Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/consumer-price-index-australia/latest-release,
|
|
38
|
+
6416.0,Economy,Price Indexes And Inflation,Residential Property Price Indexes Eight Capital Cities,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/residential-property-price-indexes-eight-capital-cities/latest-release,Ceased
|
|
39
|
+
6427.0,Economy,Price Indexes And Inflation,Producer Price Indexes Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/producer-price-indexes-australia/latest-release,
|
|
40
|
+
6432.0,Economy,Price Indexes And Inflation,Total Value Dwellings,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/total-value-dwellings/latest-release,
|
|
41
|
+
6457.0,Economy,Price Indexes And Inflation,International Trade Price Indexes Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/international-trade-price-indexes-australia/latest-release,
|
|
42
|
+
6467.0,Economy,Price Indexes And Inflation,Selected Living Cost Indexes Australia,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/selected-living-cost-indexes-australia/latest-release,
|
|
43
|
+
6484.0,Economy,Price Indexes And Inflation,Monthly Consumer Price Index Indicator,https://www.abs.gov.au/statistics/economy/price-indexes-and-inflation/monthly-consumer-price-index-indicator/latest-release,
|
|
44
|
+
7215.0,Industry,Agriculture,Livestock Products Australia,https://www.abs.gov.au/statistics/industry/agriculture/livestock-products-australia/latest-release,
|
|
45
|
+
7218.0.55.001,Industry,Agriculture,Livestock And Meat Australia,https://www.abs.gov.au/statistics/industry/agriculture/livestock-and-meat-australia/latest-release,Ceased
|
|
46
|
+
8155.0,Industry,Industry Overview,Australian Industry,https://www.abs.gov.au/statistics/industry/industry-overview/australian-industry/latest-release,
|
|
47
|
+
8165.0,Economy,Business Indicators,Counts Australian Businesses Including Entries And Exits,https://www.abs.gov.au/statistics/economy/business-indicators/counts-australian-businesses-including-entries-and-exits/latest-release,
|
|
48
|
+
8412.0,Industry,Mining,Mineral And Petroleum Exploration Australia,https://www.abs.gov.au/statistics/industry/mining/mineral-and-petroleum-exploration-australia/latest-release,
|
|
49
|
+
8501.0,Industry,Retail And Wholesale Trade,Retail Trade Australia,https://www.abs.gov.au/statistics/industry/retail-and-wholesale-trade/retail-trade-australia/latest-release,
|
|
50
|
+
8701.0,Industry,Building And Construction,Estimated Dwelling Stock,https://www.abs.gov.au/statistics/industry/building-and-construction/estimated-dwelling-stock/latest-release,
|
|
51
|
+
8731.0,Industry,Building And Construction,Building Approvals Australia,https://www.abs.gov.au/statistics/industry/building-and-construction/building-approvals-australia/latest-release,
|
|
52
|
+
8752.0,Industry,Building And Construction,Building Activity Australia,https://www.abs.gov.au/statistics/industry/building-and-construction/building-activity-australia/latest-release,
|
|
53
|
+
8755.0,Industry,Building And Construction,Construction Work Done Australia Preliminary,https://www.abs.gov.au/statistics/industry/building-and-construction/construction-work-done-australia-preliminary/latest-release,
|
|
54
|
+
8762.0,Industry,Building And Construction,Engineering Construction Activity Australia,https://www.abs.gov.au/statistics/industry/building-and-construction/engineering-construction-activity-australia/latest-release,
|
|
55
|
+
8782.0.65.001,Industry,Building And Construction,Construction Activity Chain Volume Measures Australia,https://www.abs.gov.au/statistics/industry/building-and-construction/construction-activity-chain-volume-measures-australia/jun-2020,Ceased
|
|
56
|
+
"""
|
|
57
|
+
return pd.read_csv(StringIO(csv), index_col=0)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""abs_meta_data_sypport.py
|
|
2
|
+
|
|
3
|
+
Support for working with ABS meta data."""
|
|
4
|
+
|
|
5
|
+
from collections import namedtuple
|
|
6
|
+
|
|
7
|
+
Metacol = namedtuple(
|
|
8
|
+
"Metacol",
|
|
9
|
+
[
|
|
10
|
+
"did",
|
|
11
|
+
"stype",
|
|
12
|
+
"id",
|
|
13
|
+
"start",
|
|
14
|
+
"end",
|
|
15
|
+
"num",
|
|
16
|
+
"unit",
|
|
17
|
+
"dtype",
|
|
18
|
+
"freq",
|
|
19
|
+
"cmonth",
|
|
20
|
+
"table",
|
|
21
|
+
"tdesc",
|
|
22
|
+
"cat",
|
|
23
|
+
],
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
metacol = Metacol(
|
|
27
|
+
did="Data Item Description",
|
|
28
|
+
stype="Series Type",
|
|
29
|
+
id="Series ID",
|
|
30
|
+
start="Series Start",
|
|
31
|
+
end="Series End",
|
|
32
|
+
num="No. Obs.",
|
|
33
|
+
unit="Unit",
|
|
34
|
+
dtype="Data Type",
|
|
35
|
+
freq="Freq.",
|
|
36
|
+
cmonth="Collection Month",
|
|
37
|
+
table="Table",
|
|
38
|
+
tdesc="Table Description",
|
|
39
|
+
cat="Catalogue number",
|
|
40
|
+
)
|