readabs 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {readabs-0.2.1 → readabs-0.2.2}/CHANGELOG.md +11 -0
  2. {readabs-0.2.1 → readabs-0.2.2}/PKG-INFO +13 -4
  3. {readabs-0.2.1 → readabs-0.2.2}/README.md +12 -3
  4. {readabs-0.2.1 → readabs-0.2.2}/pyproject.toml +1 -1
  5. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/splice.py +60 -23
  6. {readabs-0.2.1 → readabs-0.2.2}/uv.lock +1 -1
  7. {readabs-0.2.1 → readabs-0.2.2}/.gitignore +0 -0
  8. {readabs-0.2.1 → readabs-0.2.2}/.pylintrc +0 -0
  9. {readabs-0.2.1 → readabs-0.2.2}/.python-version +0 -0
  10. {readabs-0.2.1 → readabs-0.2.2}/LICENSE +0 -0
  11. {readabs-0.2.1 → readabs-0.2.2}/build-all.sh +0 -0
  12. {readabs-0.2.1 → readabs-0.2.2}/build-docs.sh +0 -0
  13. {readabs-0.2.1 → readabs-0.2.2}/docs/index.html +0 -0
  14. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/abs_catalogue.html +0 -0
  15. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/grab_abs_url.html +0 -0
  16. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/print_abs_catalogue.html +0 -0
  17. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/rba_catalogue.html +0 -0
  18. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/read_abs_by_desc.html +0 -0
  19. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/read_abs_cat.html +0 -0
  20. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/read_abs_series.html +0 -0
  21. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/read_rba_table.html +0 -0
  22. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/recalibrate.html +0 -0
  23. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/search_abs_meta.html +0 -0
  24. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs/splice.html +0 -0
  25. {readabs-0.2.1 → readabs-0.2.2}/docs/readabs.html +0 -0
  26. {readabs-0.2.1 → readabs-0.2.2}/docs/search.js +0 -0
  27. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/.test-data/Qrtly-CPI-Time-series-spreadsheets-all.zip +0 -0
  28. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/__init__.py +0 -0
  29. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/abs_catalogue.py +0 -0
  30. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/abs_meta_data.py +0 -0
  31. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/datatype.py +0 -0
  32. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/download_cache.py +0 -0
  33. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/get_abs_links.py +0 -0
  34. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/grab_abs_url.py +0 -0
  35. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/lint-all.sh +0 -0
  36. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/print_abs_catalogue.py +0 -0
  37. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/py.typed +0 -0
  38. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/rba_catalogue.py +0 -0
  39. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/rba_meta_data.py +0 -0
  40. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_abs_by_desc.py +0 -0
  41. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_abs_cat.py +0 -0
  42. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_abs_series.py +0 -0
  43. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_rba_table.py +0 -0
  44. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/read_support.py +0 -0
  45. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/recalibrate.py +0 -0
  46. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/search_abs_meta.py +0 -0
  47. {readabs-0.2.1 → readabs-0.2.2}/src/readabs/utilities.py +0 -0
  48. {readabs-0.2.1 → readabs-0.2.2}/test/test.py +0 -0
  49. {readabs-0.2.1 → readabs-0.2.2}/test/test_url_retrieval.py +0 -0
  50. {readabs-0.2.1 → readabs-0.2.2}/uv-upgrade.sh +0 -0
@@ -1,3 +1,14 @@
1
+ Version 0.2.2 released 04-Jun-2026 (Canberra Australia)
2
+
3
+ - A *selector* (in `select_one()`, `select()` and `select_and_splice()` sources)
4
+ can now be a bare ABS Series ID string (e.g. `"A2325846C"`) as well as the
5
+ `{search_value: meta_column}` dict form. A string is matched exactly against
6
+ the metadata's Series ID column via the same `find_abs_id` machinery (so the
7
+ cross-table de-duplication and uniqueness guarantees are unchanged), and an
8
+ unknown ID raises. The two forms mix freely across sources.
9
+
10
+ ---
11
+
1
12
  Version 0.2.1 released 03-Jun-2026 (Canberra Australia)
2
13
 
3
14
  - `splice()` and `select_and_splice()` now default to `rebase=False` — segments
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: readabs
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Get ABS timeseries data in pandas DataFrames
5
5
  Project-URL: Repository, https://github.com/bpalmer4/readabs
6
6
  Project-URL: Homepage, https://github.com/bpalmer4/readabs
@@ -220,9 +220,18 @@ Four composable functions:
220
220
  | `splice(segments)` | Splice an iterable of series, highest priority first → `(series, report)` |
221
221
  | `select_and_splice(sources)` | `select` then `splice` for the no-transform case; checks units → `(series, unit, report)` |
222
222
 
223
- A *selector* is the `{search_value: column}` form used by `find_abs_id` (with
224
- `validate_unique=True`, so it de-duplicates on Series ID and raises on genuine
225
- ambiguity rather than guessing).
223
+ A *selector* is either the `{search_value: column}` form used by `find_abs_id`
224
+ (with `validate_unique=True`, so it de-duplicates on Series ID and raises on
225
+ genuine ambiguity rather than guessing), **or a bare ABS Series ID string**
226
+ (e.g. `"A2325846C"`, matched exactly) for when you already know precisely which
227
+ series you want. The two forms mix freely across sources:
228
+
229
+ ```python
230
+ series, unit, report = ra.select_and_splice([
231
+ (cur, cmeta, base | {"Month": mc.freq}), # by description
232
+ (cur, cmeta, "A2325846C"), # by Series ID (quarterly All groups CPI)
233
+ ], rebase=True)
234
+ ```
226
235
 
227
236
  By default `select` **raises if the selected series carry different ABS units** —
228
237
  coherence is required to splice. Pass `require_same_units=False` to select
@@ -198,9 +198,18 @@ Four composable functions:
198
198
  | `splice(segments)` | Splice an iterable of series, highest priority first → `(series, report)` |
199
199
  | `select_and_splice(sources)` | `select` then `splice` for the no-transform case; checks units → `(series, unit, report)` |
200
200
 
201
- A *selector* is the `{search_value: column}` form used by `find_abs_id` (with
202
- `validate_unique=True`, so it de-duplicates on Series ID and raises on genuine
203
- ambiguity rather than guessing).
201
+ A *selector* is either the `{search_value: column}` form used by `find_abs_id`
202
+ (with `validate_unique=True`, so it de-duplicates on Series ID and raises on
203
+ genuine ambiguity rather than guessing), **or a bare ABS Series ID string**
204
+ (e.g. `"A2325846C"`, matched exactly) for when you already know precisely which
205
+ series you want. The two forms mix freely across sources:
206
+
207
+ ```python
208
+ series, unit, report = ra.select_and_splice([
209
+ (cur, cmeta, base | {"Month": mc.freq}), # by description
210
+ (cur, cmeta, "A2325846C"), # by Series ID (quarterly All groups CPI)
211
+ ], rebase=True)
212
+ ```
204
213
 
205
214
  By default `select` **raises if the selected series carry different ABS units** —
206
215
  coherence is required to splice. Pass `require_same_units=False` to select
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "readabs"
3
- version = "0.2.1"
3
+ version = "0.2.2"
4
4
  description = "Get ABS timeseries data in pandas DataFrames"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -15,7 +15,9 @@ This module has two layers:
15
15
  (carrying each series' ABS unit on ``.attrs["unit"]``), so the common case —
16
16
  splice a few ABS series selected by description/frequency — is one call,
17
17
  while ``select`` stays exposed for when you need a transform between
18
- selecting and splicing.
18
+ selecting and splicing. A *selector* is either a ``{search_value:
19
+ meta_column}`` dict, or a bare ABS Series ID string when you already know
20
+ exactly which series you want.
19
21
 
20
22
  Splice design
21
23
  -------------
@@ -68,6 +70,7 @@ from typing import Literal, cast
68
70
  import pandas as pd
69
71
  from pandas import DataFrame, PeriodIndex, Series
70
72
 
73
+ from readabs.abs_meta_data import metacol as mc # used by the select() layer
71
74
  from readabs.search_abs_meta import find_abs_id # used by the select() layer
72
75
 
73
76
  # Frequency rank — higher number = finer frequency.
@@ -146,9 +149,7 @@ def _to_grid(s: Series, target: str, agg: str) -> Series:
146
149
  return out.sort_index()
147
150
 
148
151
 
149
- def _rebase_factor(
150
- result: Series, seg: Series
151
- ) -> tuple[float, str, int, pd.Period | None, pd.Period | None]:
152
+ def _rebase_factor(result: Series, seg: Series) -> tuple[float, str, int, pd.Period | None, pd.Period | None]:
152
153
  """Compute the factor to bring *seg* onto *result*'s level.
153
154
 
154
155
  Measured as the ratio of mean levels over the overlapping *date span*, so
@@ -294,19 +295,31 @@ def splice(
294
295
  return result, report
295
296
 
296
297
 
297
- # A select_and_splice() source: the fetched data dict, its meta, and a
298
- # {search_value: meta_column} selector (readabs' find_abs_id convention).
299
- Source = tuple[dict[str, DataFrame], DataFrame, dict[str, str]]
298
+ # A select_and_splice() source: the fetched data dict, its meta, and either a
299
+ # {search_value: meta_column} selector (readabs' find_abs_id convention) or a
300
+ # bare ABS Series ID string (matched exactly against the Series ID column).
301
+ Source = tuple[dict[str, DataFrame], DataFrame, dict[str, str] | str]
300
302
 
301
303
 
302
- def select_one(data: dict[str, DataFrame], meta: DataFrame, selector: dict[str, str]) -> Series:
304
+ def select_one(data: dict[str, DataFrame], meta: DataFrame, selector: dict[str, str] | str) -> Series:
303
305
  """Select the single Series for one ``(data, meta, selector)`` — the single-source wrapper.
304
306
 
305
307
  Convenience for the common one-selector case; equivalent to
306
- ``select([(data, meta, selector)])[0]``. Returns the Series named by its
307
- Series ID, with its ABS unit on ``.attrs["unit"]``.
308
+ ``select([(data, meta, selector)])[0]``. The *selector* is either a
309
+ ``{search_value: meta_column}`` dict for ``find_abs_id``, or a bare ABS
310
+ Series ID string, matched exactly against the metadata's Series ID column.
311
+ Returns the Series named by its Series ID, with its ABS unit on
312
+ ``.attrs["unit"]``.
308
313
  """
309
- table, series_id, unit = find_abs_id(meta, selector, validate_unique=True)
314
+ if isinstance(selector, str):
315
+ # A bare Series ID — same find_abs_id machinery, but exact-match on the
316
+ # Series ID column so one ID cannot substring-match another.
317
+ try:
318
+ table, series_id, unit = find_abs_id(meta, {selector: mc.id}, exact_match=True, validate_unique=True)
319
+ except ValueError as exc:
320
+ raise ValueError(f"select: series ID {selector!r} not found in the supplied metadata.") from exc
321
+ else:
322
+ table, series_id, unit = find_abs_id(meta, selector, validate_unique=True)
310
323
  s = data[table][series_id].copy()
311
324
  s.name = series_id
312
325
  s.attrs["unit"] = str(unit)
@@ -333,7 +346,8 @@ def select(sources: Iterable[Source], *, require_same_units: bool = True) -> lis
333
346
  - ``meta`` — the matching metadata DataFrame.
334
347
  - ``selector`` — ``{search_value: meta_column}`` for ``find_abs_id``, e.g.
335
348
  ``{"Index Numbers ; All groups CPI ; Australia ;": mc.did,
336
- "Index Numbers": mc.unit, "Quarter": mc.freq}``.
349
+ "Index Numbers": mc.unit, "Quarter": mc.freq}``; or a bare ABS Series
350
+ ID string (e.g. ``"A2325846C"``), matched exactly.
337
351
  require_same_units
338
352
  If ``True`` (default) **raise** when the selected series do not all share
339
353
  the same ABS unit — units must cohere to be spliced. Set ``False`` when
@@ -380,7 +394,7 @@ def select_and_splice(
380
394
  ) -> tuple[Series, str, DataFrame]:
381
395
  """Select one series per source and :func:`splice` them — the no-transform case.
382
396
 
383
- Sugar for ``splice(select(*src) for src in sources)`` with a unit guard. When
397
+ Sugar for ``splice(select(sources))`` with a unit guard. When
384
398
  you need a transform *between* selecting and splicing (e.g. a growth rate),
385
399
  compose :func:`select` and :func:`splice` directly instead — that is the whole
386
400
  reason :func:`select` is exposed separately.
@@ -395,7 +409,8 @@ def select_and_splice(
395
409
  - ``meta`` — the matching metadata DataFrame.
396
410
  - ``selector`` — ``{search_value: meta_column}`` for ``find_abs_id``,
397
411
  e.g. ``{"Index Numbers ; All groups CPI ; Australia ;": mc.did,
398
- "Index Numbers": mc.unit, "Quarter": mc.freq}``. In the common case
412
+ "Index Numbers": mc.unit, "Quarter": mc.freq}``; or a bare ABS Series
413
+ ID string (e.g. ``"A2325846C"``), matched exactly. In the common case
399
414
  the only thing differing between two sources is the frequency, so a
400
415
  shared *base* selector composes with ``base | {"Quarter": mc.freq}``.
401
416
  target, rebase, agg, output, fill, name
@@ -416,9 +431,7 @@ def select_and_splice(
416
431
  segments = select(sources, require_same_units=require_same_units)
417
432
  units = [str(s.attrs.get("unit", "")) for s in segments]
418
433
 
419
- result, report = splice(
420
- segments, target=target, rebase=rebase, agg=agg, output=output, fill=fill, name=name
421
- )
434
+ result, report = splice(segments, target=target, rebase=rebase, agg=agg, output=output, fill=fill, name=name)
422
435
  # Audit trail: which Series ID / unit did each reported (lower-priority) segment use?
423
436
  if len(report):
424
437
  seg = [int(i) for i in report["segment"]]
@@ -455,10 +468,7 @@ if __name__ == "__main__":
455
468
  )
456
469
  out, rep = splice([m, q], rebase=True) # monthly priority, quarterly fills the back-history
457
470
  _show("Case 1 — M (priority) spliced with Q-DEC, auto-grid", out, rep)
458
- print(
459
- f"check: rebased Q value at 2018-03 = {out.loc['2018-03']:.3f} "
460
- f"(monthly 2018-01 = {m.iloc[0]:.3f})"
461
- )
471
+ print(f"check: rebased Q value at 2018-03 = {out.loc['2018-03']:.3f} (monthly 2018-01 = {m.iloc[0]:.3f})")
462
472
 
463
473
  # --- Case 2: the anchor clash — Q-NOV vs Q-DEC, overlapping in time
464
474
  q_dec = Series(
@@ -503,12 +513,39 @@ if __name__ == "__main__":
503
513
  out4, rep4 = splice([new_m, indic, old_q], name="cpi_long", rebase=True)
504
514
  _show("Case 4 — 3-way: new monthly + indicator + quarterly", out4, rep4)
505
515
  print(
506
- f"\nfull series spans {out4.index.min()} .. {out4.index.max()}, "
507
- f"{out4.notna().sum()} observations present"
516
+ f"\nfull series spans {out4.index.min()} .. {out4.index.max()}, {out4.notna().sum()} observations present"
508
517
  )
509
518
 
510
519
  # --- Case 5: same, but ask for a clean quarterly output (downsample)
511
520
  out5, rep5 = splice([new_m, indic, old_q], output="Q-DEC", name="cpi_long_q", rebase=True)
512
521
  _show("Case 5 — same 3-way, resampled to a clean Q-DEC output", out5, rep5)
513
522
 
523
+ # --- Case 6: the select() layer — dict selector vs bare Series ID string
524
+ fake_meta = DataFrame(
525
+ {
526
+ mc.did: ["Index Numbers ; All groups CPI ; Australia ;"] * 2,
527
+ mc.id: ["A2325846C", "A128478317T"],
528
+ mc.unit: ["Index Numbers", "Index Numbers"],
529
+ mc.freq: ["Quarter", "Month"],
530
+ mc.table: ["640101", "648601"],
531
+ }
532
+ )
533
+ fake_data = {
534
+ "640101": DataFrame({"A2325846C": q.to_numpy()[:40]}, index=q.index[:40]),
535
+ "648601": DataFrame({"A128478317T": m.to_numpy()}, index=m.index),
536
+ }
537
+ by_id = select_one(fake_data, fake_meta, "A2325846C") # bare Series ID string
538
+ by_dict = select_one(fake_data, fake_meta, {"Month": mc.freq}) # selector dict
539
+ print(f"\n{'=' * 70}\nCase 6 — select_one: bare Series ID vs selector dict\n{'=' * 70}")
540
+ print(f"by ID: name={by_id.name} unit={by_id.attrs['unit']!r} n={len(by_id)}")
541
+ print(f"by dict: name={by_dict.name} unit={by_dict.attrs['unit']!r} n={len(by_dict)}")
542
+ out6, unit6, rep6 = select_and_splice(
543
+ [(fake_data, fake_meta, "A128478317T"), (fake_data, fake_meta, "A2325846C")], rebase=True
544
+ )
545
+ _show(f"Case 6b — select_and_splice by bare Series IDs (unit={unit6!r})", out6, rep6)
546
+ try:
547
+ select_one(fake_data, fake_meta, "NOSUCHID") # unknown ID -> fail loud
548
+ except ValueError as exc:
549
+ print(f"unknown ID correctly raised:\n {exc}")
550
+
514
551
  print("\nAll cases ran.")
@@ -1198,7 +1198,7 @@ wheels = [
1198
1198
 
1199
1199
  [[package]]
1200
1200
  name = "readabs"
1201
- version = "0.2.1"
1201
+ version = "0.2.2"
1202
1202
  source = { editable = "." }
1203
1203
  dependencies = [
1204
1204
  { name = "bs4" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes