reg-meta 0.9.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reg_meta-0.9.0/.gitignore +95 -0
- reg_meta-0.9.0/DESIGN.md +863 -0
- reg_meta-0.9.0/PKG-INFO +119 -0
- reg_meta-0.9.0/README.md +101 -0
- reg_meta-0.9.0/pyproject.toml +38 -0
- reg_meta-0.9.0/src/reg_meta/__init__.py +91 -0
- reg_meta-0.9.0/src/reg_meta/__main__.py +4 -0
- reg_meta-0.9.0/src/reg_meta/catalog.py +1195 -0
- reg_meta-0.9.0/src/reg_meta/cli.py +2895 -0
- reg_meta-0.9.0/src/reg_meta/cli_common.py +328 -0
- reg_meta-0.9.0/src/reg_meta/db.py +275 -0
- reg_meta-0.9.0/src/reg_meta/doc_db.py +127 -0
- reg_meta-0.9.0/src/reg_meta/doc_queries.py +197 -0
- reg_meta-0.9.0/src/reg_meta/download.py +474 -0
- reg_meta-0.9.0/src/reg_meta/errors.py +29 -0
- reg_meta-0.9.0/src/reg_meta/fqid.py +517 -0
- reg_meta-0.9.0/src/reg_meta/queries.py +2269 -0
- reg_meta-0.9.0/src/reg_meta/update.py +361 -0
- reg_meta-0.9.0/tests/conftest.py +20 -0
- reg_meta-0.9.0/tests/test_catalog.py +1119 -0
- reg_meta-0.9.0/tests/test_catalog_listing.py +237 -0
- reg_meta-0.9.0/tests/test_commands.py +2059 -0
- reg_meta-0.9.0/tests/test_doc_commands.py +493 -0
- reg_meta-0.9.0/tests/test_fqid.py +567 -0
- reg_meta-0.9.0/tests/test_fqid_output.py +96 -0
- reg_meta-0.9.0/tests/test_hints.py +123 -0
- reg_meta-0.9.0/tests/test_integration.py +123 -0
- reg_meta-0.9.0/tests/test_search_split_siblings.py +111 -0
- reg_meta-0.9.0/tests/test_update.py +549 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
# Root-anchored: only the repo-root packaging dir, NOT the real
|
|
7
|
+
# `reg_monabundle/src/reg_monabundle/build/` source package (the bundle
|
|
8
|
+
# amalgamator). `__pycache__/` above still ignores byte-code anywhere.
|
|
9
|
+
/build/
|
|
10
|
+
|
|
11
|
+
# Environments
|
|
12
|
+
.venv/
|
|
13
|
+
.env
|
|
14
|
+
.envrc
|
|
15
|
+
|
|
16
|
+
# Tools
|
|
17
|
+
.ruff_cache/
|
|
18
|
+
.pytest_cache/
|
|
19
|
+
.mypy_cache/
|
|
20
|
+
node_modules/
|
|
21
|
+
|
|
22
|
+
# IDE
|
|
23
|
+
.idea/
|
|
24
|
+
.vscode/
|
|
25
|
+
*.swp
|
|
26
|
+
|
|
27
|
+
# OS
|
|
28
|
+
.DS_Store
|
|
29
|
+
Thumbs.db
|
|
30
|
+
|
|
31
|
+
# Project
|
|
32
|
+
.tmp/
|
|
33
|
+
|
|
34
|
+
# Input data (SCB CSVs, Socialstyrelsen metadata, source PDFs — not committed).
|
|
35
|
+
# Listed per-subdirectory so the maintainer-curated classifications/ folder
|
|
36
|
+
# can stay tracked without an exclude/re-include dance (git doesn't traverse
|
|
37
|
+
# into a parent dir that's been ignored at the directory level).
|
|
38
|
+
reg_meta_build/input_data/*
|
|
39
|
+
!reg_meta_build/input_data/classifications/
|
|
40
|
+
# Under classifications/ only the normalized CSVs (+ manifest.json) are tracked.
|
|
41
|
+
# The raw SOS source workbooks the fetch script downloads are not — they
|
|
42
|
+
# regenerate from scripts/fetch_sos_classifications.py.
|
|
43
|
+
reg_meta_build/input_data/classifications/sos/*.xls
|
|
44
|
+
reg_meta_build/input_data/classifications/sos/*.xlsx
|
|
45
|
+
# landskoder.csv is fetched but deliberately not seeded (see
|
|
46
|
+
# reg_meta_build/CLASSIFICATIONS.md); a bare fetch run regenerates it —
|
|
47
|
+
# keep it out of status noise.
|
|
48
|
+
reg_meta_build/input_data/classifications/sos/landskoder.csv
|
|
49
|
+
|
|
50
|
+
# mock-data-wizard generated output
|
|
51
|
+
mock_data/
|
|
52
|
+
mock_output/
|
|
53
|
+
mdw_runner.py
|
|
54
|
+
mdw_step1_discovery.json
|
|
55
|
+
mdw_step2_config.json
|
|
56
|
+
mdw_step3_stats.json
|
|
57
|
+
extract_stats*.R
|
|
58
|
+
|
|
59
|
+
# mock-data-wizard runtime artifacts (transient — fcntl sidecar, run logs)
|
|
60
|
+
.mock_data_config.lock
|
|
61
|
+
mdw_log_*.txt
|
|
62
|
+
|
|
63
|
+
# Local test workspaces (real user data, not part of the toolkit)
|
|
64
|
+
/covid-education-immigrants-test/
|
|
65
|
+
|
|
66
|
+
# Personal exploration scripts (not part of the toolkit's curated scripts/)
|
|
67
|
+
scripts/sample_*.py
|
|
68
|
+
|
|
69
|
+
# MONA probe artefacts -- can contain workspace metadata (paths,
|
|
70
|
+
# hostnames, DSNs); inspect before sharing. The findings that matter
|
|
71
|
+
# live in mock_data_wizard/DESIGN.md.
|
|
72
|
+
mdw_probe_*.log
|
|
73
|
+
mdw_python_probe_*.log
|
|
74
|
+
mdw_py_probe_*.csv
|
|
75
|
+
mdw_upload_probe_*.txt
|
|
76
|
+
|
|
77
|
+
# reg_meta database (built from SCB exports, not committed)
|
|
78
|
+
*.db
|
|
79
|
+
|
|
80
|
+
# SCB source PDFs (binary, copyrighted, not committed)
|
|
81
|
+
*.pdf
|
|
82
|
+
|
|
83
|
+
# Marker raw output (regenerable from PDFs + parser)
|
|
84
|
+
reg_meta_build/docs/_raw/
|
|
85
|
+
|
|
86
|
+
# Archive (concluded investigations, ad-hoc scripts, internal notes)
|
|
87
|
+
archive/
|
|
88
|
+
*.har
|
|
89
|
+
|
|
90
|
+
# Claude Code harness state (transient locks, per-user settings, worktree shims).
|
|
91
|
+
# `.claude/skills/` is intentionally tracked for shared skills, so ignore the
|
|
92
|
+
# transient pieces individually rather than blanket-ignoring `.claude/`.
|
|
93
|
+
.claude/scheduled_tasks.lock
|
|
94
|
+
.claude/worktrees/
|
|
95
|
+
.claude/settings.local.json
|