oeis-seek 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oeis_seek-0.1.0/.github/workflows/ci.yml +27 -0
- oeis_seek-0.1.0/.github/workflows/release.yml +44 -0
- oeis_seek-0.1.0/.gitignore +14 -0
- oeis_seek-0.1.0/LICENSE +21 -0
- oeis_seek-0.1.0/PKG-INFO +134 -0
- oeis_seek-0.1.0/README.md +109 -0
- oeis_seek-0.1.0/pyproject.toml +60 -0
- oeis_seek-0.1.0/src/oeis_seek/__init__.py +7 -0
- oeis_seek-0.1.0/src/oeis_seek/cli.py +185 -0
- oeis_seek-0.1.0/src/oeis_seek/core.py +63 -0
- oeis_seek-0.1.0/src/oeis_seek/data/core_sequences.txt +187 -0
- oeis_seek-0.1.0/src/oeis_seek/download.py +77 -0
- oeis_seek-0.1.0/src/oeis_seek/index.py +154 -0
- oeis_seek-0.1.0/src/oeis_seek/matcher.py +32 -0
- oeis_seek-0.1.0/src/oeis_seek/models.py +41 -0
- oeis_seek-0.1.0/src/oeis_seek/rank.py +106 -0
- oeis_seek-0.1.0/src/oeis_seek/transforms/__init__.py +31 -0
- oeis_seek-0.1.0/src/oeis_seek/transforms/builtin.py +105 -0
- oeis_seek-0.1.0/src/oeis_seek/transforms/normalize.py +23 -0
- oeis_seek-0.1.0/tests/conftest.py +25 -0
- oeis_seek-0.1.0/tests/fixtures/mini-names.txt +7 -0
- oeis_seek-0.1.0/tests/fixtures/mini-stripped.txt +8 -0
- oeis_seek-0.1.0/tests/test_cli.py +143 -0
- oeis_seek-0.1.0/tests/test_matcher.py +46 -0
- oeis_seek-0.1.0/tests/test_rank.py +83 -0
- oeis_seek-0.1.0/tests/test_transforms.py +85 -0
- oeis_seek-0.1.0/tools/generate_core_set.py +123 -0
- oeis_seek-0.1.0/uv.lock +124 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
# Least privilege: lint and tests only read the checkout; they publish nothing.
|
|
8
|
+
permissions:
|
|
9
|
+
contents: read
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
lint-and-test:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
timeout-minutes: 10
|
|
15
|
+
steps:
|
|
16
|
+
- name: Check out the repository
|
|
17
|
+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
|
|
18
|
+
with:
|
|
19
|
+
persist-credentials: false
|
|
20
|
+
- name: Install uv
|
|
21
|
+
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1
|
|
22
|
+
- name: Sync dependencies
|
|
23
|
+
run: uv sync --frozen
|
|
24
|
+
- name: Lint
|
|
25
|
+
run: uv run ruff check .
|
|
26
|
+
- name: Test
|
|
27
|
+
run: uv run pytest
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
# Publish to PyPI when a version tag (v*) is pushed. Trusted Publishing (OIDC)
|
|
4
|
+
# mints a short-lived token at run time, so no API token is ever stored in the
|
|
5
|
+
# repository or its secrets. The PyPI trusted publisher must be registered first
|
|
6
|
+
# (project: oeis-seek, workflow: release.yml, environment: pypi). Tests run before
|
|
7
|
+
# the build because a published version is immutable and cannot be replaced.
|
|
8
|
+
#
|
|
9
|
+
# pypa/gh-action-pypi-publish is a drop-in alternative if uv-native publishing is
|
|
10
|
+
# ever unavailable.
|
|
11
|
+
|
|
12
|
+
on:
|
|
13
|
+
push:
|
|
14
|
+
tags:
|
|
15
|
+
- "v*"
|
|
16
|
+
|
|
17
|
+
# Least privilege: the workflow needs nothing from the repository token. The OIDC
|
|
18
|
+
# id-token is granted only to the release job, the sole job that publishes.
|
|
19
|
+
permissions: {}
|
|
20
|
+
|
|
21
|
+
jobs:
|
|
22
|
+
release:
|
|
23
|
+
runs-on: ubuntu-latest
|
|
24
|
+
timeout-minutes: 10
|
|
25
|
+
environment:
|
|
26
|
+
name: pypi
|
|
27
|
+
url: https://pypi.org/p/oeis-seek
|
|
28
|
+
permissions:
|
|
29
|
+
id-token: write # mint the OIDC token Trusted Publishing exchanges with PyPI
|
|
30
|
+
steps:
|
|
31
|
+
- name: Check out the repository
|
|
32
|
+
uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
|
|
33
|
+
with:
|
|
34
|
+
persist-credentials: false
|
|
35
|
+
- name: Install uv
|
|
36
|
+
uses: astral-sh/setup-uv@f0ec1fc3b38f5e7cd731bb6ce540c5af426746bb # v6.1
|
|
37
|
+
- name: Sync dependencies
|
|
38
|
+
run: uv sync --frozen
|
|
39
|
+
- name: Test
|
|
40
|
+
run: uv run pytest
|
|
41
|
+
- name: Build the sdist and wheel
|
|
42
|
+
run: uv build
|
|
43
|
+
- name: Publish to PyPI via Trusted Publishing
|
|
44
|
+
run: uv publish --trusted-publishing always
|
oeis_seek-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 niarenaw
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
oeis_seek-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: oeis-seek
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Transform-aware identifier for OEIS integer sequences
|
|
5
|
+
Project-URL: Homepage, https://github.com/niarenaw/oeis-seek
|
|
6
|
+
Project-URL: Repository, https://github.com/niarenaw/oeis-seek
|
|
7
|
+
Project-URL: Issues, https://github.com/niarenaw/oeis-seek/issues
|
|
8
|
+
Author: niarenaw
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: cli,integer-sequences,mathematics,oeis,sequence
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: Science/Research
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
22
|
+
Requires-Python: >=3.11
|
|
23
|
+
Requires-Dist: platformdirs>=4.0
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
|
|
26
|
+
# oeis-seek
|
|
27
|
+
|
|
28
|
+
[](https://github.com/niarenaw/oeis-seek/actions/workflows/ci.yml)
|
|
29
|
+
[](https://pypi.org/project/oeis-seek/)
|
|
30
|
+
[](LICENSE)
|
|
31
|
+
|
|
32
|
+
Identify the OEIS sequence a list of integers belongs to - even when the raw
|
|
33
|
+
numbers are not in OEIS but a simple transform of them is.
|
|
34
|
+
|
|
35
|
+
`oeis-seek` works offline against a local copy of the
|
|
36
|
+
[OEIS](https://oeis.org/) bulk dump. You download the dump once, then look up
|
|
37
|
+
sequences with no network calls and no rate limits.
|
|
38
|
+
|
|
39
|
+
## Install
|
|
40
|
+
|
|
41
|
+
Install the released CLI from PyPI (the package is `oeis-seek`; it installs a
|
|
42
|
+
`oeis-seek` command):
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
uv tool install oeis-seek # or: pipx install oeis-seek, or: pip install oeis-seek
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Or work from a clone:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
uv sync
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Usage
|
|
55
|
+
|
|
56
|
+
First, download and index the OEIS dump (one time; re-run to refresh):
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
oeis-seek update
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Then identify a sequence:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
oeis-seek 0,1,1,2,3,5,8,13 # -> A000045 (Fibonacci), raw match
|
|
66
|
+
oeis-seek 2,6,12,20,30,42 # -> hit via first differences
|
|
67
|
+
echo "1 2 6 24 120" | oeis-seek # terms from stdin
|
|
68
|
+
oeis-seek 2,6,12,20 --json # machine-readable output
|
|
69
|
+
oeis-seek 2,6,12,20 --limit 5 # cap results (default 10)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Terms may be comma- or whitespace-separated, passed as arguments or piped on
|
|
73
|
+
stdin. At least four terms are required by default (`--min-terms` lowers it).
|
|
74
|
+
|
|
75
|
+
## How it works
|
|
76
|
+
|
|
77
|
+
Three ideas do the work.
|
|
78
|
+
|
|
79
|
+
### The matcher (framed-substring contiguous match)
|
|
80
|
+
|
|
81
|
+
The index stores each sequence's terms as a comma-framed string,
|
|
82
|
+
`,0,1,1,2,3,5,`. A lookup frames the query the same way, `,1,2,3,5,`, and asks
|
|
83
|
+
which stored sequences contain that substring. Framing the run on both sides
|
|
84
|
+
makes substring containment exactly the contiguous-subsequence predicate OEIS
|
|
85
|
+
itself uses: term boundaries hold (`2,3` cannot match inside `23`), terms must be
|
|
86
|
+
adjacent, and signs are respected. At ~370k sequences this resolves well under a
|
|
87
|
+
second, so the MVP needs no inverted index.
|
|
88
|
+
|
|
89
|
+
### The transforms
|
|
90
|
+
|
|
91
|
+
Before matching, `oeis-seek` also tries simple transforms of your input and looks
|
|
92
|
+
each result up:
|
|
93
|
+
|
|
94
|
+
- **raw** - the input as given
|
|
95
|
+
- **first differences** - `a(n+1) - a(n)`
|
|
96
|
+
- **partial sums** - running totals
|
|
97
|
+
- **consecutive ratios** - `a(n+1) / a(n)` (later over earlier), used only when
|
|
98
|
+
every ratio divides exactly, e.g. `1, 2, 6, 24` becomes `2, 3, 4`
|
|
99
|
+
- **higher-order differences** - first differences applied repeatedly (orders 2
|
|
100
|
+
and 3)
|
|
101
|
+
- **a(n) - n** and **a(n) / n** - subtract or divide by the 0-based index (the
|
|
102
|
+
latter integer-only)
|
|
103
|
+
- **absolute values** - `abs(a(n))`, to recognize a signed sequence by magnitude
|
|
104
|
+
|
|
105
|
+
Transforms live in a registry, so adding more later is one function plus one
|
|
106
|
+
registration.
|
|
107
|
+
|
|
108
|
+
### The ranking
|
|
109
|
+
|
|
110
|
+
Every hit gets a deterministic, explainable score. Matches needing no transform
|
|
111
|
+
rank above transformed ones, more matched terms rank higher, and sequences in
|
|
112
|
+
OEIS's curated `core` set are boosted so canonical sequences outrank obscure ones
|
|
113
|
+
that merely share a run. A run found near a sequence's opening is weak extra
|
|
114
|
+
evidence over one buried deep inside, and ties resolve by ascending A-number so
|
|
115
|
+
output is stable. All scoring weights live in one place, and each result tells
|
|
116
|
+
you which transform found it and why it ranked where it did.
|
|
117
|
+
|
|
118
|
+
## Development
|
|
119
|
+
|
|
120
|
+
Lint, format, and test:
|
|
121
|
+
|
|
122
|
+
```bash
|
|
123
|
+
uv run ruff check .
|
|
124
|
+
uv run ruff format .
|
|
125
|
+
uv run pytest
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
CI runs ruff and pytest on every push and pull request.
|
|
129
|
+
|
|
130
|
+
To refresh the embedded OEIS core set:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
uv run python tools/generate_core_set.py
|
|
134
|
+
```
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# oeis-seek
|
|
2
|
+
|
|
3
|
+
[](https://github.com/niarenaw/oeis-seek/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/oeis-seek/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
Identify the OEIS sequence a list of integers belongs to - even when the raw
|
|
8
|
+
numbers are not in OEIS but a simple transform of them is.
|
|
9
|
+
|
|
10
|
+
`oeis-seek` works offline against a local copy of the
|
|
11
|
+
[OEIS](https://oeis.org/) bulk dump. You download the dump once, then look up
|
|
12
|
+
sequences with no network calls and no rate limits.
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
Install the released CLI from PyPI (the package is `oeis-seek`; it installs a
|
|
17
|
+
`oeis-seek` command):
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
uv tool install oeis-seek # or: pipx install oeis-seek, or: pip install oeis-seek
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Or work from a clone:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
uv sync
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
First, download and index the OEIS dump (one time; re-run to refresh):
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
oeis-seek update
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Then identify a sequence:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
oeis-seek 0,1,1,2,3,5,8,13 # -> A000045 (Fibonacci), raw match
|
|
41
|
+
oeis-seek 2,6,12,20,30,42 # -> hit via first differences
|
|
42
|
+
echo "1 2 6 24 120" | oeis-seek # terms from stdin
|
|
43
|
+
oeis-seek 2,6,12,20 --json # machine-readable output
|
|
44
|
+
oeis-seek 2,6,12,20 --limit 5 # cap results (default 10)
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Terms may be comma- or whitespace-separated, passed as arguments or piped on
|
|
48
|
+
stdin. At least four terms are required by default (`--min-terms` lowers it).
|
|
49
|
+
|
|
50
|
+
## How it works
|
|
51
|
+
|
|
52
|
+
Three ideas do the work.
|
|
53
|
+
|
|
54
|
+
### The matcher (framed-substring contiguous match)
|
|
55
|
+
|
|
56
|
+
The index stores each sequence's terms as a comma-framed string,
|
|
57
|
+
`,0,1,1,2,3,5,`. A lookup frames the query the same way, `,1,2,3,5,`, and asks
|
|
58
|
+
which stored sequences contain that substring. Framing the run on both sides
|
|
59
|
+
makes substring containment exactly the contiguous-subsequence predicate OEIS
|
|
60
|
+
itself uses: term boundaries hold (`2,3` cannot match inside `23`), terms must be
|
|
61
|
+
adjacent, and signs are respected. At ~370k sequences this resolves well under a
|
|
62
|
+
second, so the MVP needs no inverted index.
|
|
63
|
+
|
|
64
|
+
### The transforms
|
|
65
|
+
|
|
66
|
+
Before matching, `oeis-seek` also tries simple transforms of your input and looks
|
|
67
|
+
each result up:
|
|
68
|
+
|
|
69
|
+
- **raw** - the input as given
|
|
70
|
+
- **first differences** - `a(n+1) - a(n)`
|
|
71
|
+
- **partial sums** - running totals
|
|
72
|
+
- **consecutive ratios** - `a(n+1) / a(n)` (later over earlier), used only when
|
|
73
|
+
every ratio divides exactly, e.g. `1, 2, 6, 24` becomes `2, 3, 4`
|
|
74
|
+
- **higher-order differences** - first differences applied repeatedly (orders 2
|
|
75
|
+
and 3)
|
|
76
|
+
- **a(n) - n** and **a(n) / n** - subtract or divide by the 0-based index (the
|
|
77
|
+
latter integer-only)
|
|
78
|
+
- **absolute values** - `abs(a(n))`, to recognize a signed sequence by magnitude
|
|
79
|
+
|
|
80
|
+
Transforms live in a registry, so adding more later is one function plus one
|
|
81
|
+
registration.
|
|
82
|
+
|
|
83
|
+
### The ranking
|
|
84
|
+
|
|
85
|
+
Every hit gets a deterministic, explainable score. Matches needing no transform
|
|
86
|
+
rank above transformed ones, more matched terms rank higher, and sequences in
|
|
87
|
+
OEIS's curated `core` set are boosted so canonical sequences outrank obscure ones
|
|
88
|
+
that merely share a run. A run found near a sequence's opening is weak extra
|
|
89
|
+
evidence over one buried deep inside, and ties resolve by ascending A-number so
|
|
90
|
+
output is stable. All scoring weights live in one place, and each result tells
|
|
91
|
+
you which transform found it and why it ranked where it did.
|
|
92
|
+
|
|
93
|
+
## Development
|
|
94
|
+
|
|
95
|
+
Lint, format, and test:
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
uv run ruff check .
|
|
99
|
+
uv run ruff format .
|
|
100
|
+
uv run pytest
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
CI runs ruff and pytest on every push and pull request.
|
|
104
|
+
|
|
105
|
+
To refresh the embedded OEIS core set:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
uv run python tools/generate_core_set.py
|
|
109
|
+
```
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "oeis-seek"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Transform-aware identifier for OEIS integer sequences"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
license-files = ["LICENSE"]
|
|
9
|
+
authors = [{ name = "niarenaw" }]
|
|
10
|
+
keywords = ["oeis", "integer-sequences", "cli", "mathematics", "sequence"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 4 - Beta",
|
|
13
|
+
"Environment :: Console",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"Intended Audience :: Science/Research",
|
|
16
|
+
"Operating System :: OS Independent",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"platformdirs>=4.0",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/niarenaw/oeis-seek"
|
|
29
|
+
Repository = "https://github.com/niarenaw/oeis-seek"
|
|
30
|
+
Issues = "https://github.com/niarenaw/oeis-seek/issues"
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
oeis-seek = "oeis_seek.cli:main"
|
|
34
|
+
|
|
35
|
+
[dependency-groups]
|
|
36
|
+
dev = [
|
|
37
|
+
"pytest>=8.0",
|
|
38
|
+
"ruff>=0.10",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[tool.ruff]
|
|
42
|
+
line-length = 100
|
|
43
|
+
src = ["src", "tests"]
|
|
44
|
+
|
|
45
|
+
[tool.ruff.lint]
|
|
46
|
+
select = ["E", "F", "I", "UP", "B", "SIM"]
|
|
47
|
+
|
|
48
|
+
[tool.uv]
|
|
49
|
+
# Dependency cooldown to mitigate supply-chain attacks (https://cooldowns.dev/).
|
|
50
|
+
# This uv version (0.9.7) requires a fixed date rather than a relative duration,
|
|
51
|
+
# so this is pinned to three days before the last dependency review and must be
|
|
52
|
+
# advanced when dependencies are intentionally updated.
|
|
53
|
+
exclude-newer = "2026-06-01T00:00:00Z"
|
|
54
|
+
|
|
55
|
+
[build-system]
|
|
56
|
+
requires = ["hatchling"]
|
|
57
|
+
build-backend = "hatchling.build"
|
|
58
|
+
|
|
59
|
+
[tool.hatch.build.targets.wheel]
|
|
60
|
+
packages = ["src/oeis_seek"]
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""The ``oeis-seek`` command: a thin formatter over :func:`oeis_seek.core.identify`.
|
|
2
|
+
|
|
3
|
+
Input parsing, the minimum-terms rule, input bounds, output formatting, and the
|
|
4
|
+
``update`` subcommand all live here. Identification itself is delegated to the
|
|
5
|
+
library core, so the deferred web/API/MCP surfaces reuse that core unchanged.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import json
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import sys
|
|
15
|
+
|
|
16
|
+
from oeis_seek import core, download, index
|
|
17
|
+
from oeis_seek.models import Result
|
|
18
|
+
|
|
19
|
+
DEFAULT_MIN_TERMS = 4
|
|
20
|
+
DEFAULT_LIMIT = 10
|
|
21
|
+
MAX_TERMS = 10_000
|
|
22
|
+
MAX_TERM_DIGITS = 5_000
|
|
23
|
+
|
|
24
|
+
_TOKEN_SPLIT = re.compile(r"[,\s]+")
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def parse_terms(raw: str) -> list[int]:
|
|
28
|
+
"""Parse comma- and/or whitespace-separated integers, with sane bounds."""
|
|
29
|
+
tokens = [t for t in _TOKEN_SPLIT.split(raw.strip()) if t]
|
|
30
|
+
if len(tokens) > MAX_TERMS:
|
|
31
|
+
raise ValueError(f"too many terms (max {MAX_TERMS})")
|
|
32
|
+
terms: list[int] = []
|
|
33
|
+
for token in tokens:
|
|
34
|
+
if len(token.lstrip("-")) > MAX_TERM_DIGITS:
|
|
35
|
+
raise ValueError(f"term too large (max {MAX_TERM_DIGITS} digits)")
|
|
36
|
+
terms.append(int(token))
|
|
37
|
+
return terms
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _read_input(args_terms: list[str]) -> str:
|
|
41
|
+
if args_terms:
|
|
42
|
+
return " ".join(args_terms)
|
|
43
|
+
if not sys.stdin.isatty():
|
|
44
|
+
return sys.stdin.read()
|
|
45
|
+
return ""
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# Field labels are padded to a fixed column so values line up down each block.
|
|
49
|
+
_LABEL_WIDTH = len("confidence")
|
|
50
|
+
_PREVIEW_TERMS = 8
|
|
51
|
+
_BOLD = "1" # ANSI SGR codes, applied only on a TTY
|
|
52
|
+
_DIM = "2"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _use_color() -> bool:
|
|
56
|
+
"""Color only when writing to a terminal and NO_COLOR is unset."""
|
|
57
|
+
return sys.stdout.isatty() and os.environ.get("NO_COLOR") is None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _wrap(text: str, code: str, *, enabled: bool) -> str:
|
|
61
|
+
return f"\033[{code}m{text}\033[0m" if enabled else text
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _field(label: str, value: str) -> str:
|
|
65
|
+
return f" {label.ljust(_LABEL_WIDTH)} {value}"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def format_human(results: list[Result], snapshot: str, *, color: bool | None = None) -> str:
|
|
69
|
+
"""Render a scannable, aligned result block per match; plain when not a TTY."""
|
|
70
|
+
enabled = _use_color() if color is None else color
|
|
71
|
+
header = f"Snapshot: {snapshot}"
|
|
72
|
+
if not results:
|
|
73
|
+
return f"{header}\n\nNo matches found."
|
|
74
|
+
|
|
75
|
+
blocks: list[str] = []
|
|
76
|
+
for i, r in enumerate(results, 1):
|
|
77
|
+
preview = ", ".join(str(t) for t in r.matched_terms[:_PREVIEW_TERMS])
|
|
78
|
+
lead = f"{i}. {_wrap(r.a_number, _BOLD, enabled=enabled)} {r.name}"
|
|
79
|
+
blocks.append(
|
|
80
|
+
"\n".join(
|
|
81
|
+
[
|
|
82
|
+
lead,
|
|
83
|
+
_field("transform", r.transform),
|
|
84
|
+
_field("confidence", f"{r.score:g}"),
|
|
85
|
+
_field("matched", preview),
|
|
86
|
+
_field("why", r.explanation),
|
|
87
|
+
f" {_wrap(r.url, _DIM, enabled=enabled)}",
|
|
88
|
+
]
|
|
89
|
+
)
|
|
90
|
+
)
|
|
91
|
+
return f"{header}\n\n" + "\n\n".join(blocks)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def format_json(results: list[Result], snapshot: str) -> str:
|
|
95
|
+
payload = {
|
|
96
|
+
"snapshot": snapshot,
|
|
97
|
+
"results": [
|
|
98
|
+
{
|
|
99
|
+
"a_number": r.a_number,
|
|
100
|
+
"name": r.name,
|
|
101
|
+
"transform": r.transform,
|
|
102
|
+
"confidence": r.score,
|
|
103
|
+
"matched_terms": r.matched_terms,
|
|
104
|
+
"url": r.url,
|
|
105
|
+
"explanation": r.explanation,
|
|
106
|
+
}
|
|
107
|
+
for r in results
|
|
108
|
+
],
|
|
109
|
+
}
|
|
110
|
+
return json.dumps(payload, indent=2)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _run_update() -> int:
|
|
114
|
+
print("Downloading OEIS dump ...", file=sys.stderr)
|
|
115
|
+
download.download()
|
|
116
|
+
print("Building index ...", file=sys.stderr)
|
|
117
|
+
count = index.build()
|
|
118
|
+
handle = index.open_index()
|
|
119
|
+
try:
|
|
120
|
+
print(f"Indexed {count} sequences (snapshot: {handle.snapshot_date}).")
|
|
121
|
+
finally:
|
|
122
|
+
handle.close()
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _run_lookup(args: argparse.Namespace) -> int:
|
|
127
|
+
raw = _read_input(args.terms)
|
|
128
|
+
if not raw.strip():
|
|
129
|
+
print("No terms given. Example: oeis-seek 2,6,12,20,30", file=sys.stderr)
|
|
130
|
+
return 2
|
|
131
|
+
try:
|
|
132
|
+
terms = parse_terms(raw)
|
|
133
|
+
except ValueError as exc:
|
|
134
|
+
print(f"Invalid input: {exc}", file=sys.stderr)
|
|
135
|
+
return 2
|
|
136
|
+
|
|
137
|
+
if len(terms) < args.min_terms:
|
|
138
|
+
print(
|
|
139
|
+
f"Need at least {args.min_terms} terms (got {len(terms)}). "
|
|
140
|
+
f"Use --min-terms to lower the threshold.",
|
|
141
|
+
file=sys.stderr,
|
|
142
|
+
)
|
|
143
|
+
return 2
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
handle = index.open_index()
|
|
147
|
+
except FileNotFoundError:
|
|
148
|
+
print("No local data - run `oeis-seek update` first.", file=sys.stderr)
|
|
149
|
+
return 3
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
results = core.identify(terms, handle, limit=args.limit)
|
|
153
|
+
snapshot = handle.snapshot_date
|
|
154
|
+
finally:
|
|
155
|
+
handle.close()
|
|
156
|
+
|
|
157
|
+
print(format_json(results, snapshot) if args.json else format_human(results, snapshot))
|
|
158
|
+
return 0 if results else 1
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
162
|
+
parser = argparse.ArgumentParser(
|
|
163
|
+
prog="oeis-seek",
|
|
164
|
+
description="Identify the OEIS sequence a list of integers belongs to.",
|
|
165
|
+
epilog="Run `oeis-seek update` to download and index the OEIS bulk dump.",
|
|
166
|
+
)
|
|
167
|
+
parser.add_argument("terms", nargs="*", help="integer terms (comma/space separated)")
|
|
168
|
+
parser.add_argument("--limit", type=int, default=DEFAULT_LIMIT, help="max results")
|
|
169
|
+
parser.add_argument(
|
|
170
|
+
"--min-terms", type=int, default=DEFAULT_MIN_TERMS, help="minimum input terms"
|
|
171
|
+
)
|
|
172
|
+
parser.add_argument("--json", action="store_true", help="emit JSON")
|
|
173
|
+
return parser
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def main(argv: list[str] | None = None) -> int:
|
|
177
|
+
argv = list(sys.argv[1:] if argv is None else argv)
|
|
178
|
+
if argv and argv[0] == "update":
|
|
179
|
+
return _run_update()
|
|
180
|
+
args = build_parser().parse_args(argv)
|
|
181
|
+
return _run_lookup(args)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
if __name__ == "__main__":
|
|
185
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""The identification fan-out: the single public entry point.
|
|
2
|
+
|
|
3
|
+
``identify`` normalizes the input, runs every registered transform, matches each
|
|
4
|
+
candidate against the index, dedupes the same sequence to its best-scoring hit,
|
|
5
|
+
and returns ranked results. Keeping the fan-out and dedup here (rather than in the
|
|
6
|
+
ranking module) gives the transform loop one home and keeps ranking a pure
|
|
7
|
+
function. Every future surface (CLI today; web/API/MCP later) calls this.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from oeis_seek import rank
|
|
13
|
+
from oeis_seek.index import Index
|
|
14
|
+
from oeis_seek.matcher import find_matches
|
|
15
|
+
from oeis_seek.models import Result
|
|
16
|
+
from oeis_seek.transforms import REGISTRY
|
|
17
|
+
from oeis_seek.transforms.normalize import strip_leading_zeros_and_ones
|
|
18
|
+
|
|
19
|
+
# Lowest-distance transforms first, computed once: a candidate produced by several
|
|
20
|
+
# transforms is then scanned and attributed to its strongest (lowest-distance) one.
|
|
21
|
+
_ORDERED_TRANSFORMS = sorted(REGISTRY.items(), key=lambda item: rank.distance(item[0]))
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def identify(terms: list[int], index: Index, limit: int = 10) -> list[Result]:
|
|
25
|
+
"""Identify the OEIS sequences ``terms`` most likely belong to.
|
|
26
|
+
|
|
27
|
+
Runs the input and each transform of it against ``index``, ranking all hits.
|
|
28
|
+
The caller owns the index handle; acquisition (and its "no local data" error)
|
|
29
|
+
is a boundary concern, so every surface opens the index itself.
|
|
30
|
+
"""
|
|
31
|
+
normalized = strip_leading_zeros_and_ones([int(t) for t in terms])
|
|
32
|
+
best: dict[str, Result] = {}
|
|
33
|
+
seen: set[tuple[int, ...]] = set()
|
|
34
|
+
|
|
35
|
+
# The seen-set skips a redundant index scan when two transforms yield the same
|
|
36
|
+
# candidate; because _ORDERED_TRANSFORMS runs lowest-distance first, the kept
|
|
37
|
+
# scan is the strongest one. Winner selection across distinct candidates is the
|
|
38
|
+
# best-per-A-number-by-score comparison below.
|
|
39
|
+
for name, transform in _ORDERED_TRANSFORMS:
|
|
40
|
+
candidate = transform(normalized)
|
|
41
|
+
if not candidate:
|
|
42
|
+
continue
|
|
43
|
+
key = tuple(candidate)
|
|
44
|
+
if key in seen:
|
|
45
|
+
continue
|
|
46
|
+
seen.add(key)
|
|
47
|
+
for match in find_matches(candidate, index):
|
|
48
|
+
result = Result(
|
|
49
|
+
a_number=match.a_number,
|
|
50
|
+
name=match.name,
|
|
51
|
+
transform=name,
|
|
52
|
+
score=rank.score(match, name),
|
|
53
|
+
matched_terms=match.matched_terms,
|
|
54
|
+
explanation=rank.explain(match, name),
|
|
55
|
+
)
|
|
56
|
+
incumbent = best.get(match.a_number)
|
|
57
|
+
if incumbent is None or result.score > incumbent.score:
|
|
58
|
+
best[match.a_number] = result
|
|
59
|
+
|
|
60
|
+
# Highest score first, ascending A-number as the deterministic tiebreak so
|
|
61
|
+
# equal-scoring results always order the same way.
|
|
62
|
+
ranked = sorted(best.values(), key=lambda r: (-r.score, r.a_number))
|
|
63
|
+
return ranked[:limit]
|