integer-atlas-algos 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. integer_atlas_algos-0.1.0/.gitignore +7 -0
  2. integer_atlas_algos-0.1.0/COMMANDS.md +167 -0
  3. integer_atlas_algos-0.1.0/INTERFACE.md +114 -0
  4. integer_atlas_algos-0.1.0/PKG-INFO +117 -0
  5. integer_atlas_algos-0.1.0/PUBLISHING.md +85 -0
  6. integer_atlas_algos-0.1.0/README.md +96 -0
  7. integer_atlas_algos-0.1.0/integer_atlas_algos/__init__.py +3 -0
  8. integer_atlas_algos-0.1.0/integer_atlas_algos/_lib/__init__.py +0 -0
  9. integer_atlas_algos-0.1.0/integer_atlas_algos/_lib/blake3_py.py +172 -0
  10. integer_atlas_algos-0.1.0/integer_atlas_algos/_lib/factorization.py +87 -0
  11. integer_atlas_algos-0.1.0/integer_atlas_algos/_lib/multiplicative.py +9 -0
  12. integer_atlas_algos-0.1.0/integer_atlas_algos/context.py +28 -0
  13. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/__init__.py +0 -0
  14. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/__main__.py +6 -0
  15. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/atomicio.py +91 -0
  16. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/backends/__init__.py +24 -0
  17. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/backends/csv_backend.py +67 -0
  18. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/backends/parquet_backend.py +87 -0
  19. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/cli.py +147 -0
  20. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/compute.py +251 -0
  21. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/estimate.py +58 -0
  22. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/manifest.py +87 -0
  23. integer_atlas_algos-0.1.0/integer_atlas_algos/executor/verify.py +77 -0
  24. integer_atlas_algos-0.1.0/integer_atlas_algos/precomputed/primes_le_31623.txt +3401 -0
  25. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/__init__.py +12 -0
  26. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/abs_n.py +7 -0
  27. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/abundance_class.py +13 -0
  28. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/abundancy_index.py +12 -0
  29. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/aliquot_sum.py +12 -0
  30. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/binary_popcount.py +7 -0
  31. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/binary_repr.py +7 -0
  32. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/bit_length.py +7 -0
  33. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/carmichael_lambda.py +19 -0
  34. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/collatz_stopping_time.py +15 -0
  35. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/decimal_digit_count.py +7 -0
  36. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/digit_sum.py +7 -0
  37. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/digital_root.py +8 -0
  38. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/divisor_count.py +13 -0
  39. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/divisor_sum.py +11 -0
  40. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/dyadic_valuation.py +11 -0
  41. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/euler_phi.py +14 -0
  42. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/gcd_sum_pillai.py +17 -0
  43. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/hex_repr.py +7 -0
  44. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/integer_sqrt.py +9 -0
  45. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_even.py +7 -0
  46. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_fibonacci.py +15 -0
  47. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_happy.py +14 -0
  48. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_harshad.py +10 -0
  49. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_odd.py +7 -0
  50. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_palindrome.py +8 -0
  51. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_perfect.py +12 -0
  52. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_perfect_power.py +17 -0
  53. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_powerful.py +10 -0
  54. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_practical.py +18 -0
  55. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_prime.py +9 -0
  56. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_prime_power.py +8 -0
  57. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_square.py +10 -0
  58. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_squarefree.py +10 -0
  59. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/is_triangular.py +11 -0
  60. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/largest_prime_factor.py +11 -0
  61. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/liouville_lambda.py +8 -0
  62. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/mobius.py +13 -0
  63. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/octal_repr.py +7 -0
  64. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/omega_big.py +8 -0
  65. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/omega_distinct.py +8 -0
  66. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/partition_count.py +33 -0
  67. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/radical.py +12 -0
  68. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/sign.py +7 -0
  69. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/smallest_prime_factor.py +11 -0
  70. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/sum_of_two_squares_count.py +18 -0
  71. integer_atlas_algos-0.1.0/integer_atlas_algos/properties/von_mangoldt.py +14 -0
  72. integer_atlas_algos-0.1.0/integer_atlas_algos/registry.py +53 -0
  73. integer_atlas_algos-0.1.0/pyproject.toml +47 -0
  74. integer_atlas_algos-0.1.0/tests/__init__.py +0 -0
  75. integer_atlas_algos-0.1.0/tests/manifests/base_1_1000.json +7 -0
  76. integer_atlas_algos-0.1.0/tests/manifests/base_negatives.json +7 -0
  77. integer_atlas_algos-0.1.0/tests/manifests/base_single_row.json +7 -0
  78. integer_atlas_algos-0.1.0/tests/manifests/base_zero.json +7 -0
  79. integer_atlas_algos-0.1.0/tests/manifests/unknown_column.json +7 -0
  80. integer_atlas_algos-0.1.0/tests/test_blake3.py +58 -0
  81. integer_atlas_algos-0.1.0/tests/test_compute.py +93 -0
  82. integer_atlas_algos-0.1.0/tests/test_estimate.py +45 -0
  83. integer_atlas_algos-0.1.0/tests/test_parquet.py +60 -0
  84. integer_atlas_algos-0.1.0/tests/test_properties.py +106 -0
  85. integer_atlas_algos-0.1.0/tests/test_resume.py +75 -0
  86. integer_atlas_algos-0.1.0/tests/test_verify.py +90 -0
  87. integer_atlas_algos-0.1.0/tools/bench.py +248 -0
  88. integer_atlas_algos-0.1.0/tools/make_work_order.py +57 -0
  89. integer_atlas_algos-0.1.0/tools/perfrun.py +93 -0
@@ -0,0 +1,7 @@
1
+ __pycache__/
2
+ *.pyc
3
+ dist/
4
+ build/
5
+ *.egg-info/
6
+ .venv/
7
+ .pytest_cache/
@@ -0,0 +1,167 @@
1
+ # Algos — complete command reference
2
+
3
+ Every command runnable in this repo: the executor CLI (`compute`, `verify`), the
4
+ dev tools, and the test runner.
5
+
6
+ **Environment**
7
+ - Python 3.10+. The CLI is the `atlas-algos` console command. Get it either way:
8
+ - **Installed:** `pip install integer-atlas-algos` (or `pip install -e .` from a
9
+ source checkout) — then `atlas-algos …` works anywhere.
10
+ - **From source without installing:** run from the `algos/` directory as
11
+ `python3 -m integer_atlas_algos.executor …` (same subcommands and options).
12
+ The synopses below use `atlas-algos`; substitute `python3 -m integer_atlas_algos.executor`
13
+ if you haven't installed it.
14
+ - CSV format needs no dependencies. Parquet needs `pyarrow` (`pip install
15
+ integer-atlas-algos[parquet]`). A native `blake3` (`[hash]` extra) is an optional
16
+ fast path; otherwise the bundled pure-Python BLAKE3 is used.
17
+ - First factorization uses `integer_atlas_algos/precomputed/primes_le_31623.txt`
18
+ (shipped with the package; regenerated by sieve if missing, kept in memory if the
19
+ dir is read-only).
20
+
21
+ ---
22
+
23
+ ## 1. `compute` — generate a shard
24
+
25
+ ```
26
+ atlas-algos compute --manifest <work-order.json> --out <path> [options]
27
+ ```
28
+
29
+ | Option | Type | Default | Example | Meaning |
30
+ | --- | --- | --- | --- | --- |
31
+ | `--manifest` | path | required | `tests/manifests/base_1_1000.json` | Work order: JSON `{start, end, columns}` (+ optional `id`, `algorithm_release`). |
32
+ | `--out` | path | required | `/tmp/shard` | Destination shard path; extension is normalized to the format. |
33
+ | `--format` | `parquet`\|`csv` | `parquet` | `--format csv` | Shard format. `parquet` needs pyarrow; `csv` is dependency-free. |
34
+ | `--chunk-size` | int | `50000` | `--chunk-size 100000` | Rows per part = checkpoint/resume granularity **and** the memory knob. |
35
+ | `--force` | flag | off | `--force` | Discard any existing output + checkpoint and rebuild from scratch. |
36
+ | `--no-resume` | flag | off (resume on) | `--no-resume` | Ignore an existing checkpoint and recompute (still writes to `--out`). |
37
+ | `--keep-build` | flag | off | `--keep-build` | Keep the `.build/` working dir after success (default: deleted). |
38
+ | `--dry-run` | flag | off | `--dry-run` | Print the work estimate as JSON and exit; compute nothing. |
39
+ | `--log-file` | path | none | `--log-file run.log` | Also write logs here. |
40
+ | `--log-level` | level | `INFO` | `--log-level WARNING` | `DEBUG`/`INFO`/`WARNING`/`ERROR`/`CRITICAL`. |
41
+
42
+ **Main effect:** computes the requested columns for every integer in `[start, end]`
43
+ and writes one shard file.
44
+
45
+ **Side effects:**
46
+ - Creates `<out>.<ext>` (the shard) and `<out>.<ext>.manifest.json` (columns, types,
47
+ row count, SHA256/SHA512/BLAKE3, range, `algorithm_release`, `verification.status:"computed"`).
48
+ - Creates a transient `<out>.<ext>.build/` (part files + `checkpoint.json`) during the
49
+ run; removed on success unless `--keep-build`.
50
+ - **stdout:** one JSON line — `{"status":"ok"|"noop","shard","manifest","row_count","hashes"}`.
51
+ - **stderr:** an estimate banner, the "safe to Ctrl-C / resume with …" message, and
52
+ per-chunk progress (`chunk i/N P% (rows, eta)`).
53
+ - **Exit codes:** `0` ok/noop · `2` bad input or unknown column · `130` interrupted (Ctrl-C).
54
+ - Re-running a completed work order is a **no-op** (`status:"noop"`). Ctrl-C is safe at
55
+ any point; rerun the same command to resume.
56
+
57
+ **When to use:** to produce a shard (contributor), or any time you need property values
58
+ for a range. Use `--dry-run` first to size a job; `--force` to rebuild; `--chunk-size`
59
+ smaller to cap memory, larger to reduce overhead.
60
+
61
+ ---
62
+
63
+ ## 2. `verify` — validate a shard
64
+
65
+ ```
66
+ atlas-algos verify --manifest <entry.json> --shard <shard> [options]
67
+ ```
68
+
69
+ | Option | Type | Default | Example | Meaning |
70
+ | --- | --- | --- | --- | --- |
71
+ | `--manifest` | path | required | `/tmp/shard.csv.manifest.json` | Manifest entry for the shard (`range_start`/`range_end`/`columns`). |
72
+ | `--shard` | path | required | `/tmp/shard.csv` | The shard file to check. |
73
+ | `--degree` | float | `0.1` | `--degree 1.0` | Fraction of rows independently recomputed (0.1 sampled … 1.0 full). |
74
+ | `--seed` | int | `0` | `--seed 42` | Deterministic sample seed. |
75
+ | `--format` | `parquet`\|`csv` | inferred from extension | `--format csv` | Override the shard format. |
76
+ | `--log-file` | path | none | | Also write logs here. |
77
+ | `--log-level` | level | `INFO` | | Log verbosity. |
78
+
79
+ **Main effect:** recomputes a sample of the requested columns and compares against the
80
+ shard; checks row count, column presence, and `n` contiguity; at `--degree 1.0` also
81
+ re-checks the file's SHA256 if the manifest carries one.
82
+
83
+ **Side effects:** writes nothing. **stdout:** one JSON line —
84
+ `{"status":"pass"|"fail","degree","checked_rows","sampled","row_count","failures":[…]}`.
85
+ **Exit codes:** `0` pass · `3` verification mismatch · `2` bad input.
86
+
87
+ **When to use:** contributor self-check before submitting (`--degree 0.1`); maintainer/CI
88
+ validation; `--degree 1.0` for disputed or release-critical shards.
89
+
90
+ ---
91
+
92
+ ## 3. `tools/make_work_order.py` — build a work order
93
+
94
+ ```
95
+ python3 tools/make_work_order.py --start <S> --end <E> [options]
96
+ ```
97
+
98
+ | Option | Type | Default | Example | Meaning |
99
+ | --- | --- | --- | --- | --- |
100
+ | `--start` | int | required | `0` | Range start (inclusive). |
101
+ | `--end` | int | required | `100000` | Range end (inclusive). |
102
+ | `--columns` | csv | all registered | `--columns is_prime,euler_phi` | Explicit column list. |
103
+ | `--exclude` | csv | none | `--exclude partition_count` | Drop columns from the default-all set. |
104
+ | `--id` | str | `all_<start>_<end>` | `--id demo` | Work-order id. |
105
+ | `--algorithm-release` | str | `dev` | `--algorithm-release algos-2026.06` | Pinned release stamped into the order. |
106
+ | `--out` | path | stdout | `--out /tmp/wo.json` | Write here instead of printing. |
107
+
108
+ **Main effect:** emits a work-order JSON requesting all registered columns (minus
109
+ `--exclude`) or the given `--columns`. **Side effects:** writes `--out` or prints to
110
+ stdout (a one-line note to stderr when `--out` is used). **When to use:** to create the
111
+ input for `compute`/`perfrun`; it emulates what the Shards planner will emit.
112
+
113
+ ---
114
+
115
+ ## 4. `tools/perfrun.py` — benchmark a compute run
116
+
117
+ ```
118
+ python3 tools/perfrun.py --start <S> --end <E> [options]
119
+ ```
120
+
121
+ | Option | Type | Default | Example | Meaning |
122
+ | --- | --- | --- | --- | --- |
123
+ | `--start` / `--end` | int | required | `0` / `100000` | Range to compute. |
124
+ | `--columns` | csv | all registered | | Explicit column list. |
125
+ | `--exclude` | csv | none | `--exclude partition_count` | Drop columns (use this to skip the super-linear ones). |
126
+ | `--format` | `parquet`\|`csv` | `csv` | `--format parquet` | Shard format. |
127
+ | `--chunk-size` | int | `50000` | `--chunk-size 20000` | Part/memory granularity. |
128
+ | `--out` | path | temp file | `--out /tmp/perf` | Output path. |
129
+
130
+ **Main effect:** runs `compute` in-process (with `--force`) and prints wall time,
131
+ user/sys CPU, CPU utilization, throughput (rows/s, µs/row), peak RSS, and output size.
132
+ **Side effects:** writes a shard (a temp file by default). **When to use:** to measure
133
+ performance and seed the planner cost model. For CPU/memory cross-checks also run the
134
+ plain `compute` under `/usr/bin/time -l` (macOS) or `/usr/bin/time -v` (Linux).
135
+
136
+ ---
137
+
138
+ ## 5. `tools/bench.py` — per-property micro-benchmark
139
+
140
+ ```
141
+ python3 tools/bench.py [n]
142
+ ```
143
+
144
+ | Argument | Type | Default | Example | Meaning |
145
+ | --- | --- | --- | --- | --- |
146
+ | `n` | int (positional) | `1000` | `python3 tools/bench.py 1000000` | The integer each property is timed at. |
147
+
148
+ **Main effect:** prints a TSV of `column big_o usec` (best-of-5 per-call timing).
149
+ **Side effects:** none (no files written). **When to use:** to measure per-property cost
150
+ and refine the cost model; note this is the standalone cost (each factor column re-runs
151
+ factorization here, whereas the executor shares one factorization per n).
152
+
153
+ ---
154
+
155
+ ## 6. Tests
156
+
157
+ ```
158
+ python3 -m unittest discover -s tests -t . # all tests
159
+ python3 -m unittest tests.test_properties # one module
160
+ python3 -m unittest tests.test_properties.PropertyTest.test_first_100_all_properties
161
+ /path/to/venv/bin/python -m unittest discover -s tests -t . # include parquet (needs pyarrow)
162
+ ```
163
+
164
+ **Main effect:** runs the unit suite (property test vectors, compute/verify, resume,
165
+ estimate, BLAKE3 vectors, parquet round-trip). **Side effects:** uses temp dirs; the
166
+ first run generates `precomputed/primes_le_31623.txt`. Parquet and native-blake3 tests
167
+ **skip** when those libraries are absent. **When to use:** after any change.
@@ -0,0 +1,114 @@
1
+ # Algos executor — interface
2
+
3
+ The executor is a **Python CLI**, invoked as a module today and as a console
4
+ script (`atlas-algos`) once installed. The Go CLI will wrap these same two
5
+ commands later (`integer-atlas compute` / `verify` shell out to this). It is
6
+ **stateless**: every run is a pure function of its input manifest.
7
+
8
+ ```
9
+ atlas-algos compute --manifest <work-order.json> --out <shard> [options]
10
+ atlas-algos verify --manifest <entry.json> --shard <shard> [options]
11
+ ```
12
+
13
+ ## compute — generate a shard
14
+
15
+ | Option | Default | Meaning |
16
+ | --- | --- | --- |
17
+ | `--manifest PATH` | required | work order: `{start, end, columns}` (+ optional `id`, `algorithm_release`) |
18
+ | `--out PATH` | required | destination shard path (extension normalized to the format) |
19
+ | `--format {parquet,csv}` | `parquet` | shard format. `parquet` needs pyarrow; `csv` is stdlib (dev/tests) |
20
+ | `--chunk-size N` | `50000` | rows per part / checkpoint interval — the resume granularity |
21
+ | `--force` | off | discard any existing output/checkpoint and rebuild |
22
+ | `--no-resume` | off | ignore the checkpoint and recompute from scratch (keeps `--out`) |
23
+ | `--keep-build` | off | keep the `.build` working dir after success (default: removed) |
24
+ | `--dry-run` | off | print the work estimate and exit without computing |
25
+ | `--log-file PATH` | none | also write logs here |
26
+ | `--log-level LEVEL` | `INFO` | `DEBUG`/`INFO`/`WARNING`/… |
27
+
28
+ ## verify — validate a shard
29
+
30
+ | Option | Default | Meaning |
31
+ | --- | --- | --- |
32
+ | `--manifest PATH` | required | manifest entry for the shard (`range_start`/`range_end`/`columns`) |
33
+ | `--shard PATH` | required | the shard file to check |
34
+ | `--degree F` | `0.1` | share of rows independently recomputed (0.1 sampled … 1.0 full) |
35
+ | `--seed N` | `0` | sample seed (deterministic) |
36
+ | `--format {parquet,csv}` | inferred from extension | override the shard format |
37
+ | `--log-file`, `--log-level` | | as above |
38
+
39
+ ## Output
40
+
41
+ - **Shard file** at `--out` (e.g. `shard.parquet`), written atomically.
42
+ - **Manifest sidecar** at `<out>.manifest.json` — columns, types, row count, hashes
43
+ (sha256, sha512, blake3 if available), range, `algorithm_release`, `verification.status: computed`.
44
+ - **Working dir** `<out>.build/` exists only while running (parts + `checkpoint.json`);
45
+ removed on success unless `--keep-build`.
46
+
47
+ ## stdout / stderr / logs
48
+
49
+ - **stdout**: exactly one line of JSON — the result summary — for machine consumption
50
+ (the Go CLI parses this). Nothing else goes to stdout.
51
+ - compute: `{"status": "ok|noop", "shard", "manifest", "row_count", "hashes"}`
52
+ - verify: `{"status": "pass|fail", "degree", "checked_rows", "sampled", "row_count", "failures": [...]}`
53
+ - **stderr**: human-readable logs and per-chunk progress (`chunk 3/4 done (n=501..750, rows 750/1000)`).
54
+ - **log file**: same records as stderr when `--log-file` is given.
55
+
56
+ ## Exit codes
57
+
58
+ | Code | Meaning |
59
+ | --- | --- |
60
+ | `0` | success (compute ok/noop, or verify pass) |
61
+ | `2` | bad input — malformed work order or unknown column |
62
+ | `3` | verification mismatch (verify only) |
63
+ | `130` | interrupted (Ctrl-C) — progress checkpointed |
64
+ | `1` | other error |
65
+
66
+ ## Recovery model
67
+
68
+ Parquet cannot be appended atomically per row, so resumability is by **chunked
69
+ commit + checkpoint**:
70
+
71
+ 1. `[start, end]` is split into `--chunk-size` chunks (≈ one row group each).
72
+ 2. Each chunk is computed in memory and written to its own part file **atomically**
73
+ (temp → `fsync` → `rename`), then `checkpoint.json` is advanced atomically.
74
+ 3. On restart, the executor **checks** the checkpoint, validates existing parts
75
+ (presence + row counts), discards any partial/trailing temp or part beyond the
76
+ trustworthy watermark, and **resumes from the next chunk**. At most one chunk is
77
+ ever recomputed.
78
+ 4. Finalize concatenates parts into the single shard (atomic), hashes it, and writes
79
+ the manifest. The checkpoint phase moves `computing → finalizing → done`.
80
+
81
+ Re-running a completed work order is a **no-op** (`status: noop`) — detected from the
82
+ existing shard + manifest, so it is safe to retry blindly. Because methods are pure
83
+ and chunk ranges are deterministic, a resumed run produces byte-identical output to a
84
+ clean run (verified by the test suite).
85
+
86
+ ## Work estimate, progress & interrupting
87
+
88
+ Every `compute` run prints a banner to stderr **before doing any work**:
89
+
90
+ ```
91
+ Estimated work: 1,000,000 rows x 7 column(s), ~5 min (coarse).
92
+ Safe to interrupt (Ctrl-C) at any time — progress is checkpointed.
93
+ Resume with:
94
+ atlas-algos compute --manifest wo.json --out shard.parquet --chunk-size 300
95
+ ```
96
+
97
+ - **Estimate**: derived from each column's static `complexity` × row count (machine-independent, coarse). Super-linear columns (e.g. `partition_count`) are flagged because a flat per-row cost under-models them. `--dry-run` prints the full estimate as JSON and stops.
98
+ - **Progress**: each chunk logs `chunk i/N P% (done/total rows, eta …)` to stderr (and `--log-file`).
99
+ - **Interrupting**: Ctrl-C is safe at any moment — the chunk in flight is simply not committed, and the checkpoint always reflects the last durably-written chunk. On interrupt the executor prints the percentage reached and the exact **resume command** (re-running `compute` resumes by default), and exits with code 130.
100
+
101
+ ## Running
102
+
103
+ ```
104
+ # tests (stdlib unittest, zero dependencies)
105
+ python3 -m unittest discover -s tests -t .
106
+
107
+ # try it (CSV, no pyarrow needed)
108
+ atlas-algos compute --manifest tests/manifests/base_1_1000.json --out /tmp/shard --format csv
109
+ atlas-algos verify --manifest /tmp/shard.csv.manifest.json --shard /tmp/shard.csv --degree 1.0
110
+
111
+ # real format
112
+ pip install pyarrow # or: uv sync --extra parquet
113
+ atlas-algos compute --manifest <work-order>.json --out shard.parquet
114
+ ```
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: integer-atlas-algos
3
+ Version: 0.1.0
4
+ Summary: Integer Atlas — stateless property methods and shard executor
5
+ Project-URL: Homepage, https://github.com/outcompute/integer-atlas-algos
6
+ Project-URL: Source, https://github.com/outcompute/integer-atlas-algos
7
+ License: Apache-2.0
8
+ Keywords: dataset,integers,number-theory,parquet,shards
9
+ Classifier: License :: OSI Approved :: Apache Software License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
13
+ Requires-Python: >=3.10
14
+ Provides-Extra: dev
15
+ Requires-Dist: pytest>=8; extra == 'dev'
16
+ Provides-Extra: hash
17
+ Requires-Dist: blake3>=0.4; extra == 'hash'
18
+ Provides-Extra: parquet
19
+ Requires-Dist: pyarrow>=15; extra == 'parquet'
20
+ Description-Content-Type: text/markdown
21
+
22
+ # Integer Atlas — Algos
23
+
24
+ > Directory name is provisional and will be renamed later.
25
+
26
+ The property methods and the compute engine (the executor). This repository is **independent and stateless**: nothing here imports the other repos, and it stores no information about any shard, pack, release, or lifecycle. It is used exactly twice per shard — `compute` (by a contributor) and `verify` (by a maintainer). Master design: [`../Integer Atlas Documentation.docx.md`](../Integer%20Atlas%20Documentation.docx.md) (§16, §11, §17, §2.5).
27
+
28
+ ## Purpose
29
+
30
+ - Define each integer property as a small registered method.
31
+ - Provide the executor that computes shards and verifies them.
32
+ - Cut algorithm releases that downstream shards pin to.
33
+
34
+ ## What lives here
35
+
36
+ Algorithms are a **single flat directory** — one method per file, the filename matching the column it produces. Pack, shard, and release names never appear in the layout. Shared helpers live in `_lib`. There are no pack, coverage, or planner files here — those belong to the Shards repo.
37
+
38
+ ```
39
+ integer_atlas_algos/ the installable package (atlas-algos)
40
+ registry.py @property_method + the flat column registry
41
+ context.py per-n memoized context (factorization, divisors)
42
+ properties/ one method per file; filename = column name (46 columns)
43
+ _lib/ shared helpers
44
+ factorization.py prime-table factorization
45
+ multiplicative.py sigma() shared by divisor functions
46
+ blake3_py.py pure-Python BLAKE3 fallback
47
+ precomputed/ regenerable resource cache (not state)
48
+ primes_le_31623.txt base primes up to ceil(sqrt(1e9))
49
+ executor/ the stateless engine
50
+ cli.py argparse CLI (compute / verify, estimate, resume)
51
+ compute.py chunked, resumable, crash-safe, streaming finalize
52
+ verify.py sampled recompute + compare
53
+ estimate.py pre-run work estimate from static complexity
54
+ manifest.py work-order loading, draft manifest, hashing
55
+ atomicio.py atomic write / checkpoint primitives
56
+ backends/ csv_backend (stdlib) + parquet_backend (pyarrow)
57
+ tools/ bench.py, perfrun.py, make_work_order.py (dev only)
58
+ tests/ unittest suite + sample work-order manifests
59
+ pyproject.toml package metadata, console script, extras
60
+ COMMANDS.md INTERFACE.md PUBLISHING.md reference docs
61
+ ```
62
+
63
+ Run it with `pip install -e .` then `atlas-algos …`, or from a source checkout as
64
+ `python3 -m integer_atlas_algos.executor …` (run from the `algos/` directory).
65
+
66
+ All 46 agreed properties are implemented. See [INTERFACE.md](INTERFACE.md) for the
67
+ complete command reference, output layout, exit codes, and the resume model.
68
+
69
+ ## Precomputed data
70
+
71
+ To factor any n up to BOUND² you only need primes up to BOUND; with
72
+ BOUND = 31623 (≥ √1e9) that is ~3401 primes covering the whole 0..1e9 range.
73
+ `precomputed/primes_le_31623.txt` holds them — a deterministic, regenerable cache
74
+ (sieved on first use if missing), not state about any shard or pack. It bounds
75
+ worst-case factorization to ~3401 trial divisions regardless of n's size in range.
76
+
77
+ ## Status and known limitations
78
+
79
+ Done and tested: 46 properties, the stateless executor (compute/verify/estimate,
80
+ resumable + atomic + streaming finalize), CSV and **validated Parquet** backends,
81
+ SHA256/SHA512/BLAKE3 hashing. Remaining performance work (not correctness):
82
+ per-shard run is single-threaded pure Python (~3700 rows/s/core — scale by running
83
+ many shards in parallel, one executor each); a segmented-sieve batch fast-path and
84
+ gmpy2 would speed factorization further; `n` is int64 (covers 0..2^63, hence the
85
+ 0..1e9 target) — int128 only needed beyond that; `partition_count` is small-n only
86
+ (its values explode) even though its per-row recompute is now memoized.
87
+
88
+ ## Method contract
89
+
90
+ - Signature is `f(n, ctx) -> value`. The method **name becomes the column name**.
91
+ - `ctx` is a memoized context of shared intermediates (factorization, divisor list, binary representation) declared via `requires`, so expensive work is computed once per number, not once per method.
92
+ - Metadata (canonical column id, dtype, nullable, zero/negative behavior, `requires`, test vectors) is registered next to the function, so the schema, column ids, and manifest columns are generated from code. A method declares **only its own column** — it says nothing about packs, shards, releases, or any other entity.
93
+ - A method may provide an optional **vectorized fast-path** for speed; the **scalar form is always the verification ground truth**.
94
+ - **Test vectors live with the method** and are reused by both the property-proposal gate and shard verification.
95
+
96
+ ## Executor verbs
97
+
98
+ The executor is **stateless** — both verbs are pure functions of an input manifest that names a `start`, an `end`, and the requested columns. It does not interpret packs, the grid, or policy; if the manifest does not follow project conventions that is fine, and it errors only if a requested column name is unknown.
99
+
100
+ - `compute --manifest <work-order> --out <shard>` — run the requested column functions over `[start, end]` and write the shard, filling the output manifest's column types and hashes from the method metadata.
101
+ - `verify --manifest <entry> --shard <file> --degree <fraction>` — recompute a share of the requested columns (0.1 sampled … 1.0 full) and compare against the shard; report pass/fail.
102
+
103
+ ## Releases
104
+
105
+ Cut an algorithm release once enough methods have merged. It is stamped with the commit id and PR URLs, and is what a shard's `algorithm_versions` pins to. Only released methods are eligible for official shards; unreleased methods exist only for local side-loading.
106
+
107
+ ## Packaging
108
+
109
+ Distributed with **uv**: each release pins `uv.lock`, so `integer-atlas algos sync` (from the CLI) materializes the exact released code and dependencies on any platform — fast, reproducible, no system Python required. Optional PyInstaller one-file artifacts and an OCI image are conveniences; uv is canonical. Compute speed comes from native math libraries (e.g. gmpy2, primesieve) and the vectorized fast-paths, not from the packaging format.
110
+
111
+ ## State: none
112
+
113
+ This repo stores no state about any shard, pack, release, or lifecycle. Planning, pack definitions, the coverage policy, and the manifest sets all live in the Shards repo. Algos sees a given shard exactly twice — `compute` (contributor) and `verify` (maintainer).
114
+
115
+ ## Contributing
116
+
117
+ PRs add algorithms (the code pipeline). Include the method, its metadata, and its test vectors. See §10.1 and §17 in the master doc.
@@ -0,0 +1,85 @@
1
+ # Publishing Algos to PyPI — GitHub-managed
2
+
3
+ Goal: `pip install integer-atlas-algos`, releases driven entirely from GitHub via
4
+ Actions + **PyPI Trusted Publishing** (OIDC — no API tokens stored anywhere).
5
+
6
+ Assumes the **Algos repo is its own GitHub repository** (repo root = this `algos/`
7
+ directory). If you instead keep a monorepo with `algos/` as a subdirectory, add
8
+ `defaults.run.working-directory: algos` to the workflow jobs and a `paths:` filter
9
+ to `ci.yml`.
10
+
11
+ ## Status: packaging is done
12
+
13
+ - Installable package `integer_atlas_algos/` (flat layout), all imports package-qualified.
14
+ - `pyproject.toml` configured: hatchling, dynamic version from
15
+ `integer_atlas_algos/__init__.py`, `atlas-algos` console script, `parquet`/`hash`/`dev`
16
+ extras, primes table shipped via `artifacts`.
17
+ - Build + clean-venv install verified locally (`atlas-algos` runs standalone, all three
18
+ hashes populate, primes data ships).
19
+ - Workflows are committed under `.github/workflows/`: `ci.yml`, `release.yml`, `testpypi.yml`.
20
+
21
+ ## One-time setup
22
+
23
+ 1. **Create the GitHub repo** and push this directory to it (`main` branch).
24
+ 2. **Fill metadata** in `pyproject.toml`: `authors`, and `[project.urls]`
25
+ (Homepage/Source → the new repo). Confirm `license`.
26
+ 3. **Create GitHub Environments** (repo → Settings → Environments): `pypi` and
27
+ (optional) `testpypi`. Add required reviewers on `pypi` if you want a manual approval
28
+ gate before each publish.
29
+ 4. **Configure PyPI Trusted Publishing** (do this *before* the first release, as a
30
+ "pending publisher"): on https://pypi.org → Account → Publishing → Add a pending
31
+ publisher:
32
+ - PyPI Project Name: `integer-atlas-algos`
33
+ - Owner: your GitHub user/org
34
+ - Repository name: the Algos repo
35
+ - Workflow filename: `release.yml`
36
+ - Environment name: `pypi`
37
+ 5. (Optional) Repeat step 4 on https://test.pypi.org with Environment `testpypi` and
38
+ workflow `testpypi.yml`.
39
+
40
+ No tokens are created or stored — OIDC handles auth at publish time.
41
+
42
+ ## The workflows
43
+
44
+ - **`ci.yml`** — on every push to `main` and every PR: installs `.[parquet,hash,dev]`
45
+ across Python 3.10–3.13 and runs the unittest suite (so parquet + native blake3 paths
46
+ are covered in CI).
47
+ - **`release.yml`** — on a pushed tag `v*`: builds the wheel + sdist and publishes to
48
+ PyPI via OIDC (environment `pypi`).
49
+ - **`testpypi.yml`** — manual (`workflow_dispatch`): same build, publishes to TestPyPI
50
+ (environment `testpypi`) for a dry run.
51
+
52
+ ## Cutting a release
53
+
54
+ 1. Bump `__version__` in `integer_atlas_algos/__init__.py` (SemVer). PyPI rejects
55
+ re-uploads of an existing version.
56
+ 2. Open a PR; merge to `main` once CI is green.
57
+ 3. (Optional) Run the **TestPyPI** workflow from the Actions tab and verify
58
+ `pip install -i https://test.pypi.org/simple/ integer-atlas-algos`.
59
+ 4. Tag and push:
60
+ ```
61
+ git tag v0.1.0
62
+ git push origin v0.1.0
63
+ ```
64
+ `release.yml` builds and publishes to PyPI automatically.
65
+ 5. (Optional) Create a GitHub Release from the tag for changelog/notes.
66
+
67
+ ## Local manual fallback (if ever needed)
68
+
69
+ ```
70
+ cd algos
71
+ python3 -m pip install --upgrade build twine
72
+ rm -rf dist && python3 -m build
73
+ python3 -m twine check dist/*
74
+ python3 -m twine upload dist/* # needs TWINE_USERNAME=__token__ / TWINE_PASSWORD=<token>
75
+ ```
76
+
77
+ ## Notes
78
+
79
+ - PyPI **project name** `integer-atlas-algos` must be available; **import name** is
80
+ `integer_atlas_algos`; **command** is `atlas-algos`.
81
+ - `dist/`, `build/`, `*.egg-info/` are git-ignored (see `.gitignore`).
82
+ - `tools/` and `tests/` ship in the sdist but not the wheel.
83
+ - This flow is for the **Python package only** (Algos). The CLI repo (Go) will use a
84
+ different release path (e.g. GoReleaser → GitHub Releases); see the project-level
85
+ integration TODO.
@@ -0,0 +1,96 @@
1
+ # Integer Atlas — Algos
2
+
3
+ > Directory name is provisional and will be renamed later.
4
+
5
+ The property methods and the compute engine (the executor). This repository is **independent and stateless**: nothing here imports the other repos, and it stores no information about any shard, pack, release, or lifecycle. It is used exactly twice per shard — `compute` (by a contributor) and `verify` (by a maintainer). Master design: [`../Integer Atlas Documentation.docx.md`](../Integer%20Atlas%20Documentation.docx.md) (§16, §11, §17, §2.5).
6
+
7
+ ## Purpose
8
+
9
+ - Define each integer property as a small registered method.
10
+ - Provide the executor that computes shards and verifies them.
11
+ - Cut algorithm releases that downstream shards pin to.
12
+
13
+ ## What lives here
14
+
15
+ Algorithms are a **single flat directory** — one method per file, the filename matching the column it produces. Pack, shard, and release names never appear in the layout. Shared helpers live in `_lib`. There are no pack, coverage, or planner files here — those belong to the Shards repo.
16
+
17
+ ```
18
+ integer_atlas_algos/ the installable package (atlas-algos)
19
+ registry.py @property_method + the flat column registry
20
+ context.py per-n memoized context (factorization, divisors)
21
+ properties/ one method per file; filename = column name (46 columns)
22
+ _lib/ shared helpers
23
+ factorization.py prime-table factorization
24
+ multiplicative.py sigma() shared by divisor functions
25
+ blake3_py.py pure-Python BLAKE3 fallback
26
+ precomputed/ regenerable resource cache (not state)
27
+ primes_le_31623.txt base primes up to ceil(sqrt(1e9))
28
+ executor/ the stateless engine
29
+ cli.py argparse CLI (compute / verify, estimate, resume)
30
+ compute.py chunked, resumable, crash-safe, streaming finalize
31
+ verify.py sampled recompute + compare
32
+ estimate.py pre-run work estimate from static complexity
33
+ manifest.py work-order loading, draft manifest, hashing
34
+ atomicio.py atomic write / checkpoint primitives
35
+ backends/ csv_backend (stdlib) + parquet_backend (pyarrow)
36
+ tools/ bench.py, perfrun.py, make_work_order.py (dev only)
37
+ tests/ unittest suite + sample work-order manifests
38
+ pyproject.toml package metadata, console script, extras
39
+ COMMANDS.md INTERFACE.md PUBLISHING.md reference docs
40
+ ```
41
+
42
+ Run it with `pip install -e .` then `atlas-algos …`, or from a source checkout as
43
+ `python3 -m integer_atlas_algos.executor …` (run from the `algos/` directory).
44
+
45
+ All 46 agreed properties are implemented. See [INTERFACE.md](INTERFACE.md) for the
46
+ complete command reference, output layout, exit codes, and the resume model.
47
+
48
+ ## Precomputed data
49
+
50
+ To factor any n up to BOUND² you only need primes up to BOUND; with
51
+ BOUND = 31623 (≥ √1e9) that is ~3401 primes covering the whole 0..1e9 range.
52
+ `precomputed/primes_le_31623.txt` holds them — a deterministic, regenerable cache
53
+ (sieved on first use if missing), not state about any shard or pack. It bounds
54
+ worst-case factorization to ~3401 trial divisions regardless of n's size in range.
55
+
56
+ ## Status and known limitations
57
+
58
+ Done and tested: 46 properties, the stateless executor (compute/verify/estimate,
59
+ resumable + atomic + streaming finalize), CSV and **validated Parquet** backends,
60
+ SHA256/SHA512/BLAKE3 hashing. Remaining performance work (not correctness):
61
+ per-shard run is single-threaded pure Python (~3700 rows/s/core — scale by running
62
+ many shards in parallel, one executor each); a segmented-sieve batch fast-path and
63
+ gmpy2 would speed factorization further; `n` is int64 (covers 0..2^63, hence the
64
+ 0..1e9 target) — int128 only needed beyond that; `partition_count` is small-n only
65
+ (its values explode) even though its per-row recompute is now memoized.
66
+
67
+ ## Method contract
68
+
69
+ - Signature is `f(n, ctx) -> value`. The method **name becomes the column name**.
70
+ - `ctx` is a memoized context of shared intermediates (factorization, divisor list, binary representation) declared via `requires`, so expensive work is computed once per number, not once per method.
71
+ - Metadata (canonical column id, dtype, nullable, zero/negative behavior, `requires`, test vectors) is registered next to the function, so the schema, column ids, and manifest columns are generated from code. A method declares **only its own column** — it says nothing about packs, shards, releases, or any other entity.
72
+ - A method may provide an optional **vectorized fast-path** for speed; the **scalar form is always the verification ground truth**.
73
+ - **Test vectors live with the method** and are reused by both the property-proposal gate and shard verification.
74
+
75
+ ## Executor verbs
76
+
77
+ The executor is **stateless** — both verbs are pure functions of an input manifest that names a `start`, an `end`, and the requested columns. It does not interpret packs, the grid, or policy; if the manifest does not follow project conventions that is fine, and it errors only if a requested column name is unknown.
78
+
79
+ - `compute --manifest <work-order> --out <shard>` — run the requested column functions over `[start, end]` and write the shard, filling the output manifest's column types and hashes from the method metadata.
80
+ - `verify --manifest <entry> --shard <file> --degree <fraction>` — recompute a share of the requested columns (0.1 sampled … 1.0 full) and compare against the shard; report pass/fail.
81
+
82
+ ## Releases
83
+
84
+ Cut an algorithm release once enough methods have merged. It is stamped with the commit id and PR URLs, and is what a shard's `algorithm_versions` pins to. Only released methods are eligible for official shards; unreleased methods exist only for local side-loading.
85
+
86
+ ## Packaging
87
+
88
+ Distributed with **uv**: each release pins `uv.lock`, so `integer-atlas algos sync` (from the CLI) materializes the exact released code and dependencies on any platform — fast, reproducible, no system Python required. Optional PyInstaller one-file artifacts and an OCI image are conveniences; uv is canonical. Compute speed comes from native math libraries (e.g. gmpy2, primesieve) and the vectorized fast-paths, not from the packaging format.
89
+
90
+ ## State: none
91
+
92
+ This repo stores no state about any shard, pack, release, or lifecycle. Planning, pack definitions, the coverage policy, and the manifest sets all live in the Shards repo. Algos sees a given shard exactly twice — `compute` (contributor) and `verify` (maintainer).
93
+
94
+ ## Contributing
95
+
96
+ PRs add algorithms (the code pipeline). Include the method, its metadata, and its test vectors. See §10.1 and §17 in the master doc.
@@ -0,0 +1,3 @@
1
+ """Integer Atlas — stateless property methods and shard executor."""
2
+
3
+ __version__ = "0.1.0"