readabs 0.2.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {readabs-0.2.1 → readabs-0.2.2}/CHANGELOG.md +11 -0
- {readabs-0.2.1 → readabs-0.2.2}/PKG-INFO +13 -4
- {readabs-0.2.1 → readabs-0.2.2}/README.md +12 -3
- {readabs-0.2.1 → readabs-0.2.2}/pyproject.toml +1 -1
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/splice.py +60 -23
- {readabs-0.2.1 → readabs-0.2.2}/uv.lock +1 -1
- {readabs-0.2.1 → readabs-0.2.2}/.gitignore +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/.pylintrc +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/.python-version +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/LICENSE +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/build-all.sh +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/build-docs.sh +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/index.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/abs_catalogue.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/grab_abs_url.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/print_abs_catalogue.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/rba_catalogue.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/read_abs_by_desc.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/read_abs_cat.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/read_abs_series.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/read_rba_table.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/recalibrate.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/search_abs_meta.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/splice.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/readabs.html +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/docs/search.js +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/.test-data/Qrtly-CPI-Time-series-spreadsheets-all.zip +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/__init__.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/abs_catalogue.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/abs_meta_data.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/datatype.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/download_cache.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/get_abs_links.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/grab_abs_url.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/lint-all.sh +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/print_abs_catalogue.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/py.typed +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/rba_catalogue.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/rba_meta_data.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_abs_by_desc.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_abs_cat.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_abs_series.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_rba_table.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_support.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/recalibrate.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/search_abs_meta.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/src/readabs/utilities.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/test/test.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/test/test_url_retrieval.py +0 -0
- {readabs-0.2.1 → readabs-0.2.2}/uv-upgrade.sh +0 -0
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
Version 0.2.2 released 04-Jun-2026 (Canberra Australia)
|
|
2
|
+
|
|
3
|
+
- A *selector* (in `select_one()`, `select()` and `select_and_splice()` sources)
|
|
4
|
+
can now be a bare ABS Series ID string (e.g. `"A2325846C"`) as well as the
|
|
5
|
+
`{search_value: meta_column}` dict form. A string is matched exactly against
|
|
6
|
+
the metadata's Series ID column via the same `find_abs_id` machinery (so the
|
|
7
|
+
cross-table de-duplication and uniqueness guarantees are unchanged), and an
|
|
8
|
+
unknown ID raises. The two forms mix freely across sources.
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
1
12
|
Version 0.2.1 released 03-Jun-2026 (Canberra Australia)
|
|
2
13
|
|
|
3
14
|
- `splice()` and `select_and_splice()` now default to `rebase=False` — segments
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: readabs
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: Get ABS timeseries data in pandas DataFrames
|
|
5
5
|
Project-URL: Repository, https://github.com/bpalmer4/readabs
|
|
6
6
|
Project-URL: Homepage, https://github.com/bpalmer4/readabs
|
|
@@ -220,9 +220,18 @@ Four composable functions:
|
|
|
220
220
|
| `splice(segments)` | Splice an iterable of series, highest priority first → `(series, report)` |
|
|
221
221
|
| `select_and_splice(sources)` | `select` then `splice` for the no-transform case; checks units → `(series, unit, report)` |
|
|
222
222
|
|
|
223
|
-
A *selector* is the `{search_value: column}` form used by `find_abs_id`
|
|
224
|
-
`validate_unique=True`, so it de-duplicates on Series ID and raises on
|
|
225
|
-
ambiguity rather than guessing)
|
|
223
|
+
A *selector* is either the `{search_value: column}` form used by `find_abs_id`
|
|
224
|
+
(with `validate_unique=True`, so it de-duplicates on Series ID and raises on
|
|
225
|
+
genuine ambiguity rather than guessing), **or a bare ABS Series ID string**
|
|
226
|
+
(e.g. `"A2325846C"`, matched exactly) for when you already know precisely which
|
|
227
|
+
series you want. The two forms mix freely across sources:
|
|
228
|
+
|
|
229
|
+
```python
|
|
230
|
+
series, unit, report = ra.select_and_splice([
|
|
231
|
+
(cur, cmeta, base | {"Month": mc.freq}), # by description
|
|
232
|
+
(cur, cmeta, "A2325846C"), # by Series ID (quarterly All groups CPI)
|
|
233
|
+
], rebase=True)
|
|
234
|
+
```
|
|
226
235
|
|
|
227
236
|
By default `select` **raises if the selected series carry different ABS units** —
|
|
228
237
|
coherence is required to splice. Pass `require_same_units=False` to select
|
|
@@ -198,9 +198,18 @@ Four composable functions:
|
|
|
198
198
|
| `splice(segments)` | Splice an iterable of series, highest priority first → `(series, report)` |
|
|
199
199
|
| `select_and_splice(sources)` | `select` then `splice` for the no-transform case; checks units → `(series, unit, report)` |
|
|
200
200
|
|
|
201
|
-
A *selector* is the `{search_value: column}` form used by `find_abs_id`
|
|
202
|
-
`validate_unique=True`, so it de-duplicates on Series ID and raises on
|
|
203
|
-
ambiguity rather than guessing)
|
|
201
|
+
A *selector* is either the `{search_value: column}` form used by `find_abs_id`
|
|
202
|
+
(with `validate_unique=True`, so it de-duplicates on Series ID and raises on
|
|
203
|
+
genuine ambiguity rather than guessing), **or a bare ABS Series ID string**
|
|
204
|
+
(e.g. `"A2325846C"`, matched exactly) for when you already know precisely which
|
|
205
|
+
series you want. The two forms mix freely across sources:
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
series, unit, report = ra.select_and_splice([
|
|
209
|
+
(cur, cmeta, base | {"Month": mc.freq}), # by description
|
|
210
|
+
(cur, cmeta, "A2325846C"), # by Series ID (quarterly All groups CPI)
|
|
211
|
+
], rebase=True)
|
|
212
|
+
```
|
|
204
213
|
|
|
205
214
|
By default `select` **raises if the selected series carry different ABS units** —
|
|
206
215
|
coherence is required to splice. Pass `require_same_units=False` to select
|
|
@@ -15,7 +15,9 @@ This module has two layers:
|
|
|
15
15
|
(carrying each series' ABS unit on ``.attrs["unit"]``), so the common case —
|
|
16
16
|
splice a few ABS series selected by description/frequency — is one call,
|
|
17
17
|
while ``select`` stays exposed for when you need a transform between
|
|
18
|
-
selecting and splicing.
|
|
18
|
+
selecting and splicing. A *selector* is either a ``{search_value:
|
|
19
|
+
meta_column}`` dict, or a bare ABS Series ID string when you already know
|
|
20
|
+
exactly which series you want.
|
|
19
21
|
|
|
20
22
|
Splice design
|
|
21
23
|
-------------
|
|
@@ -68,6 +70,7 @@ from typing import Literal, cast
|
|
|
68
70
|
import pandas as pd
|
|
69
71
|
from pandas import DataFrame, PeriodIndex, Series
|
|
70
72
|
|
|
73
|
+
from readabs.abs_meta_data import metacol as mc # used by the select() layer
|
|
71
74
|
from readabs.search_abs_meta import find_abs_id # used by the select() layer
|
|
72
75
|
|
|
73
76
|
# Frequency rank — higher number = finer frequency.
|
|
@@ -146,9 +149,7 @@ def _to_grid(s: Series, target: str, agg: str) -> Series:
|
|
|
146
149
|
return out.sort_index()
|
|
147
150
|
|
|
148
151
|
|
|
149
|
-
def _rebase_factor(
|
|
150
|
-
result: Series, seg: Series
|
|
151
|
-
) -> tuple[float, str, int, pd.Period | None, pd.Period | None]:
|
|
152
|
+
def _rebase_factor(result: Series, seg: Series) -> tuple[float, str, int, pd.Period | None, pd.Period | None]:
|
|
152
153
|
"""Compute the factor to bring *seg* onto *result*'s level.
|
|
153
154
|
|
|
154
155
|
Measured as the ratio of mean levels over the overlapping *date span*, so
|
|
@@ -294,19 +295,31 @@ def splice(
|
|
|
294
295
|
return result, report
|
|
295
296
|
|
|
296
297
|
|
|
297
|
-
# A select_and_splice() source: the fetched data dict, its meta, and a
|
|
298
|
-
# {search_value: meta_column} selector (readabs' find_abs_id convention)
|
|
299
|
-
|
|
298
|
+
# A select_and_splice() source: the fetched data dict, its meta, and either a
|
|
299
|
+
# {search_value: meta_column} selector (readabs' find_abs_id convention) or a
|
|
300
|
+
# bare ABS Series ID string (matched exactly against the Series ID column).
|
|
301
|
+
Source = tuple[dict[str, DataFrame], DataFrame, dict[str, str] | str]
|
|
300
302
|
|
|
301
303
|
|
|
302
|
-
def select_one(data: dict[str, DataFrame], meta: DataFrame, selector: dict[str, str]) -> Series:
|
|
304
|
+
def select_one(data: dict[str, DataFrame], meta: DataFrame, selector: dict[str, str] | str) -> Series:
|
|
303
305
|
"""Select the single Series for one ``(data, meta, selector)`` — the single-source wrapper.
|
|
304
306
|
|
|
305
307
|
Convenience for the common one-selector case; equivalent to
|
|
306
|
-
``select([(data, meta, selector)])[0]``.
|
|
307
|
-
|
|
308
|
+
``select([(data, meta, selector)])[0]``. The *selector* is either a
|
|
309
|
+
``{search_value: meta_column}`` dict for ``find_abs_id``, or a bare ABS
|
|
310
|
+
Series ID string, matched exactly against the metadata's Series ID column.
|
|
311
|
+
Returns the Series named by its Series ID, with its ABS unit on
|
|
312
|
+
``.attrs["unit"]``.
|
|
308
313
|
"""
|
|
309
|
-
|
|
314
|
+
if isinstance(selector, str):
|
|
315
|
+
# A bare Series ID — same find_abs_id machinery, but exact-match on the
|
|
316
|
+
# Series ID column so one ID cannot substring-match another.
|
|
317
|
+
try:
|
|
318
|
+
table, series_id, unit = find_abs_id(meta, {selector: mc.id}, exact_match=True, validate_unique=True)
|
|
319
|
+
except ValueError as exc:
|
|
320
|
+
raise ValueError(f"select: series ID {selector!r} not found in the supplied metadata.") from exc
|
|
321
|
+
else:
|
|
322
|
+
table, series_id, unit = find_abs_id(meta, selector, validate_unique=True)
|
|
310
323
|
s = data[table][series_id].copy()
|
|
311
324
|
s.name = series_id
|
|
312
325
|
s.attrs["unit"] = str(unit)
|
|
@@ -333,7 +346,8 @@ def select(sources: Iterable[Source], *, require_same_units: bool = True) -> lis
|
|
|
333
346
|
- ``meta`` — the matching metadata DataFrame.
|
|
334
347
|
- ``selector`` — ``{search_value: meta_column}`` for ``find_abs_id``, e.g.
|
|
335
348
|
``{"Index Numbers ; All groups CPI ; Australia ;": mc.did,
|
|
336
|
-
"Index Numbers": mc.unit, "Quarter": mc.freq}
|
|
349
|
+
"Index Numbers": mc.unit, "Quarter": mc.freq}``; or a bare ABS Series
|
|
350
|
+
ID string (e.g. ``"A2325846C"``), matched exactly.
|
|
337
351
|
require_same_units
|
|
338
352
|
If ``True`` (default) **raise** when the selected series do not all share
|
|
339
353
|
the same ABS unit — units must cohere to be spliced. Set ``False`` when
|
|
@@ -380,7 +394,7 @@ def select_and_splice(
|
|
|
380
394
|
) -> tuple[Series, str, DataFrame]:
|
|
381
395
|
"""Select one series per source and :func:`splice` them — the no-transform case.
|
|
382
396
|
|
|
383
|
-
Sugar for ``splice(select(
|
|
397
|
+
Sugar for ``splice(select(sources))`` with a unit guard. When
|
|
384
398
|
you need a transform *between* selecting and splicing (e.g. a growth rate),
|
|
385
399
|
compose :func:`select` and :func:`splice` directly instead — that is the whole
|
|
386
400
|
reason :func:`select` is exposed separately.
|
|
@@ -395,7 +409,8 @@ def select_and_splice(
|
|
|
395
409
|
- ``meta`` — the matching metadata DataFrame.
|
|
396
410
|
- ``selector`` — ``{search_value: meta_column}`` for ``find_abs_id``,
|
|
397
411
|
e.g. ``{"Index Numbers ; All groups CPI ; Australia ;": mc.did,
|
|
398
|
-
"Index Numbers": mc.unit, "Quarter": mc.freq}
|
|
412
|
+
"Index Numbers": mc.unit, "Quarter": mc.freq}``; or a bare ABS Series
|
|
413
|
+
ID string (e.g. ``"A2325846C"``), matched exactly. In the common case
|
|
399
414
|
the only thing differing between two sources is the frequency, so a
|
|
400
415
|
shared *base* selector composes with ``base | {"Quarter": mc.freq}``.
|
|
401
416
|
target, rebase, agg, output, fill, name
|
|
@@ -416,9 +431,7 @@ def select_and_splice(
|
|
|
416
431
|
segments = select(sources, require_same_units=require_same_units)
|
|
417
432
|
units = [str(s.attrs.get("unit", "")) for s in segments]
|
|
418
433
|
|
|
419
|
-
result, report = splice(
|
|
420
|
-
segments, target=target, rebase=rebase, agg=agg, output=output, fill=fill, name=name
|
|
421
|
-
)
|
|
434
|
+
result, report = splice(segments, target=target, rebase=rebase, agg=agg, output=output, fill=fill, name=name)
|
|
422
435
|
# Audit trail: which Series ID / unit did each reported (lower-priority) segment use?
|
|
423
436
|
if len(report):
|
|
424
437
|
seg = [int(i) for i in report["segment"]]
|
|
@@ -455,10 +468,7 @@ if __name__ == "__main__":
|
|
|
455
468
|
)
|
|
456
469
|
out, rep = splice([m, q], rebase=True) # monthly priority, quarterly fills the back-history
|
|
457
470
|
_show("Case 1 — M (priority) spliced with Q-DEC, auto-grid", out, rep)
|
|
458
|
-
print(
|
|
459
|
-
f"check: rebased Q value at 2018-03 = {out.loc['2018-03']:.3f} "
|
|
460
|
-
f"(monthly 2018-01 = {m.iloc[0]:.3f})"
|
|
461
|
-
)
|
|
471
|
+
print(f"check: rebased Q value at 2018-03 = {out.loc['2018-03']:.3f} (monthly 2018-01 = {m.iloc[0]:.3f})")
|
|
462
472
|
|
|
463
473
|
# --- Case 2: the anchor clash — Q-NOV vs Q-DEC, overlapping in time
|
|
464
474
|
q_dec = Series(
|
|
@@ -503,12 +513,39 @@ if __name__ == "__main__":
|
|
|
503
513
|
out4, rep4 = splice([new_m, indic, old_q], name="cpi_long", rebase=True)
|
|
504
514
|
_show("Case 4 — 3-way: new monthly + indicator + quarterly", out4, rep4)
|
|
505
515
|
print(
|
|
506
|
-
f"\nfull series spans {out4.index.min()} .. {out4.index.max()}, "
|
|
507
|
-
f"{out4.notna().sum()} observations present"
|
|
516
|
+
f"\nfull series spans {out4.index.min()} .. {out4.index.max()}, {out4.notna().sum()} observations present"
|
|
508
517
|
)
|
|
509
518
|
|
|
510
519
|
# --- Case 5: same, but ask for a clean quarterly output (downsample)
|
|
511
520
|
out5, rep5 = splice([new_m, indic, old_q], output="Q-DEC", name="cpi_long_q", rebase=True)
|
|
512
521
|
_show("Case 5 — same 3-way, resampled to a clean Q-DEC output", out5, rep5)
|
|
513
522
|
|
|
523
|
+
# --- Case 6: the select() layer — dict selector vs bare Series ID string
|
|
524
|
+
fake_meta = DataFrame(
|
|
525
|
+
{
|
|
526
|
+
mc.did: ["Index Numbers ; All groups CPI ; Australia ;"] * 2,
|
|
527
|
+
mc.id: ["A2325846C", "A128478317T"],
|
|
528
|
+
mc.unit: ["Index Numbers", "Index Numbers"],
|
|
529
|
+
mc.freq: ["Quarter", "Month"],
|
|
530
|
+
mc.table: ["640101", "648601"],
|
|
531
|
+
}
|
|
532
|
+
)
|
|
533
|
+
fake_data = {
|
|
534
|
+
"640101": DataFrame({"A2325846C": q.to_numpy()[:40]}, index=q.index[:40]),
|
|
535
|
+
"648601": DataFrame({"A128478317T": m.to_numpy()}, index=m.index),
|
|
536
|
+
}
|
|
537
|
+
by_id = select_one(fake_data, fake_meta, "A2325846C") # bare Series ID string
|
|
538
|
+
by_dict = select_one(fake_data, fake_meta, {"Month": mc.freq}) # selector dict
|
|
539
|
+
print(f"\n{'=' * 70}\nCase 6 — select_one: bare Series ID vs selector dict\n{'=' * 70}")
|
|
540
|
+
print(f"by ID: name={by_id.name} unit={by_id.attrs['unit']!r} n={len(by_id)}")
|
|
541
|
+
print(f"by dict: name={by_dict.name} unit={by_dict.attrs['unit']!r} n={len(by_dict)}")
|
|
542
|
+
out6, unit6, rep6 = select_and_splice(
|
|
543
|
+
[(fake_data, fake_meta, "A128478317T"), (fake_data, fake_meta, "A2325846C")], rebase=True
|
|
544
|
+
)
|
|
545
|
+
_show(f"Case 6b — select_and_splice by bare Series IDs (unit={unit6!r})", out6, rep6)
|
|
546
|
+
try:
|
|
547
|
+
select_one(fake_data, fake_meta, "NOSUCHID") # unknown ID -> fail loud
|
|
548
|
+
except ValueError as exc:
|
|
549
|
+
print(f"unknown ID correctly raised:\n {exc}")
|
|
550
|
+
|
|
514
551
|
print("\nAll cases ran.")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{readabs-0.2.1 → readabs-0.2.2}/src/readabs/.test-data/Qrtly-CPI-Time-series-spreadsheets-all.zip
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|