kindling-rec 1.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. kindling_rec-1.0.1/LICENSE +17 -0
  2. kindling_rec-1.0.1/PKG-INFO +179 -0
  3. kindling_rec-1.0.1/README.md +136 -0
  4. kindling_rec-1.0.1/native/Cargo.lock +1161 -0
  5. kindling_rec-1.0.1/native/Cargo.toml +34 -0
  6. kindling_rec-1.0.1/native/kindling_core/Cargo.toml +36 -0
  7. kindling_rec-1.0.1/native/kindling_core/src/engine/channels.rs +107 -0
  8. kindling_rec-1.0.1/native/kindling_core/src/engine/mod.rs +20 -0
  9. kindling_rec-1.0.1/native/kindling_core/src/engine/native_state.rs +711 -0
  10. kindling_rec-1.0.1/native/kindling_core/src/engine/recommend.rs +155 -0
  11. kindling_rec-1.0.1/native/kindling_core/src/lib.rs +33 -0
  12. kindling_rec-1.0.1/native/kindling_core/src/loaders/mod.rs +32 -0
  13. kindling_rec-1.0.1/native/kindling_core/src/repeat/mod.rs +23 -0
  14. kindling_rec-1.0.1/native/kindling_core/src/repeat/multiplier.rs +271 -0
  15. kindling_rec-1.0.1/native/kindling_core/src/repeat/period.rs +173 -0
  16. kindling_rec-1.0.1/native/kindling_core/src/repeat/profile.rs +97 -0
  17. kindling_rec-1.0.1/native/kindling_core/src/score/calibrator.rs +375 -0
  18. kindling_rec-1.0.1/native/kindling_core/src/score/layered.rs +251 -0
  19. kindling_rec-1.0.1/native/kindling_core/src/score/mod.rs +23 -0
  20. kindling_rec-1.0.1/native/kindling_core/src/signals/cooc_transform.rs +66 -0
  21. kindling_rec-1.0.1/native/kindling_core/src/signals/cooccurrence.rs +341 -0
  22. kindling_rec-1.0.1/native/kindling_core/src/signals/directional_cooc.rs +272 -0
  23. kindling_rec-1.0.1/native/kindling_core/src/signals/ease.rs +249 -0
  24. kindling_rec-1.0.1/native/kindling_core/src/signals/metadata_knn.rs +137 -0
  25. kindling_rec-1.0.1/native/kindling_core/src/signals/mod.rs +29 -0
  26. kindling_rec-1.0.1/native/kindling_core/src/signals/session_cooccurrence.rs +193 -0
  27. kindling_rec-1.0.1/pyproject.toml +159 -0
  28. kindling_rec-1.0.1/src/kindling/__init__.py +25 -0
  29. kindling_rec-1.0.1/src/kindling/_native.py +34 -0
  30. kindling_rec-1.0.1/src/kindling/_native_engine.py +155 -0
  31. kindling_rec-1.0.1/src/kindling/activation.py +151 -0
  32. kindling_rec-1.0.1/src/kindling/benchmarks/__init__.py +6 -0
  33. kindling_rec-1.0.1/src/kindling/benchmarks/baselines.py +199 -0
  34. kindling_rec-1.0.1/src/kindling/benchmarks/comparison.py +209 -0
  35. kindling_rec-1.0.1/src/kindling/benchmarks/metrics.py +42 -0
  36. kindling_rec-1.0.1/src/kindling/benchmarks/parity.py +51 -0
  37. kindling_rec-1.0.1/src/kindling/blend/__init__.py +7 -0
  38. kindling_rec-1.0.1/src/kindling/blend/layer_scoring.py +166 -0
  39. kindling_rec-1.0.1/src/kindling/engine.py +1865 -0
  40. kindling_rec-1.0.1/src/kindling/explain/__init__.py +5 -0
  41. kindling_rec-1.0.1/src/kindling/explain/templates.py +53 -0
  42. kindling_rec-1.0.1/src/kindling/graph/__init__.py +5 -0
  43. kindling_rec-1.0.1/src/kindling/graph/cooc_transform.py +81 -0
  44. kindling_rec-1.0.1/src/kindling/graph/item_graph.py +155 -0
  45. kindling_rec-1.0.1/src/kindling/graph/metadata_smoothing.py +223 -0
  46. kindling_rec-1.0.1/src/kindling/ingest/__init__.py +16 -0
  47. kindling_rec-1.0.1/src/kindling/ingest/contract.py +140 -0
  48. kindling_rec-1.0.1/src/kindling/ingest/sessions.py +216 -0
  49. kindling_rec-1.0.1/src/kindling/item_features.py +324 -0
  50. kindling_rec-1.0.1/src/kindling/lifecycle/__init__.py +17 -0
  51. kindling_rec-1.0.1/src/kindling/lifecycle/decay.py +96 -0
  52. kindling_rec-1.0.1/src/kindling/loaders/__init__.py +33 -0
  53. kindling_rec-1.0.1/src/kindling/loaders/_base.py +48 -0
  54. kindling_rec-1.0.1/src/kindling/loaders/amazon.py +142 -0
  55. kindling_rec-1.0.1/src/kindling/loaders/amazon_chrono.py +106 -0
  56. kindling_rec-1.0.1/src/kindling/loaders/dunnhumby.py +114 -0
  57. kindling_rec-1.0.1/src/kindling/loaders/gowalla.py +144 -0
  58. kindling_rec-1.0.1/src/kindling/loaders/instacart.py +141 -0
  59. kindling_rec-1.0.1/src/kindling/loaders/movielens.py +172 -0
  60. kindling_rec-1.0.1/src/kindling/loaders/retailrocket.py +151 -0
  61. kindling_rec-1.0.1/src/kindling/loaders/steam.py +103 -0
  62. kindling_rec-1.0.1/src/kindling/loaders/synthetic.py +163 -0
  63. kindling_rec-1.0.1/src/kindling/loaders/tafeng.py +117 -0
  64. kindling_rec-1.0.1/src/kindling/loaders/yelp.py +136 -0
  65. kindling_rec-1.0.1/src/kindling/persist.py +76 -0
  66. kindling_rec-1.0.1/src/kindling/preprocess.py +134 -0
  67. kindling_rec-1.0.1/src/kindling/serving.py +191 -0
  68. kindling_rec-1.0.1/src/kindling/serving_app.py +79 -0
@@ -0,0 +1,17 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ Copyright 2026 Robert Hoekstra (Awry Labs)
6
+
7
+ Licensed under the Apache License, Version 2.0 (the "License");
8
+ you may not use this file except in compliance with the License.
9
+ You may obtain a copy of the License at
10
+
11
+ http://www.apache.org/licenses/LICENSE-2.0
12
+
13
+ Unless required by applicable law or agreed to in writing, software
14
+ distributed under the License is distributed on an "AS IS" BASIS,
15
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ See the License for the specific language governing permissions and
17
+ limitations under the License.
@@ -0,0 +1,179 @@
1
+ Metadata-Version: 2.4
2
+ Name: kindling-rec
3
+ Version: 1.0.1
4
+ Classifier: Development Status :: 5 - Production/Stable
5
+ Classifier: License :: OSI Approved :: Apache Software License
6
+ Classifier: Programming Language :: Python :: 3.11
7
+ Classifier: Programming Language :: Python :: 3.12
8
+ Classifier: Programming Language :: Python :: 3.13
9
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
10
+ Requires-Dist: numpy>=1.26
11
+ Requires-Dist: pandas>=2.1
12
+ Requires-Dist: scipy>=1.11
13
+ Requires-Dist: implicit>=0.7 ; extra == 'baselines'
14
+ Requires-Dist: requests>=2.31 ; extra == 'bench'
15
+ Requires-Dist: tqdm>=4.66 ; extra == 'bench'
16
+ Requires-Dist: matplotlib>=3.8 ; extra == 'bench'
17
+ Requires-Dist: pytest>=8.0 ; extra == 'dev'
18
+ Requires-Dist: pytest-cov>=5.0 ; extra == 'dev'
19
+ Requires-Dist: hypothesis>=6.100 ; extra == 'dev'
20
+ Requires-Dist: ruff>=0.5 ; extra == 'dev'
21
+ Requires-Dist: mypy>=1.11 ; extra == 'dev'
22
+ Requires-Dist: pandas-stubs ; extra == 'dev'
23
+ Requires-Dist: types-toml ; extra == 'dev'
24
+ Requires-Dist: umap-learn>=0.5 ; extra == 'personas'
25
+ Requires-Dist: hdbscan>=0.8 ; extra == 'personas'
26
+ Requires-Dist: scikit-learn>=1.3 ; extra == 'personas'
27
+ Requires-Dist: fastapi>=0.110 ; extra == 'serve'
28
+ Requires-Dist: uvicorn>=0.29 ; extra == 'serve'
29
+ Provides-Extra: baselines
30
+ Provides-Extra: bench
31
+ Provides-Extra: dev
32
+ Provides-Extra: personas
33
+ Provides-Extra: serve
34
+ License-File: LICENSE
35
+ Summary: A hybrid recommender system that grows with your data
36
+ Author-email: Robert Hoekstra <rhoekstr@gmail.com>
37
+ License-Expression: Apache-2.0
38
+ Requires-Python: >=3.11
39
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
40
+ Project-URL: Homepage, https://github.com/rhoekstr/kindling
41
+ Project-URL: Issues, https://github.com/rhoekstr/kindling/issues
42
+
43
+ # kindling
44
+
45
+ A hybrid recommender that grows with your data — closed-form, no training
46
+ loop, no GPU. One fused base score per (user, item) built from EASE /
47
+ wilson-cooccurrence plus auto-gated z-normalized channels (trend,
48
+ last-item, transitions, user-CF), with a Rust core for the numerics.
49
+
50
+ **Design goals (learned the hard way — see [`docs/EXPERIMENTS.md`](docs/EXPERIMENTS.md)):**
51
+ 1. **A wheel that imports is a wheel that works.** numpy / pandas / scipy
52
+ only; the linear algebra that matters (the EASE inversion) runs on a
53
+ pure-Rust core (`kindling_core`). No PyTorch, no BLAS system deps.
54
+ 2. **Closed-form shallow models, gated per dataset, beat speculative
55
+ complexity.** Every channel is closed-form or a counting statistic;
56
+ every channel is activated by a measurable property of the data; every
57
+ gate exists because the ungated version measurably hurt somewhere.
58
+
59
+ ## Install
60
+
61
+ ```bash
62
+ pip install kindling-rec # from PyPI
63
+ pip install -e ".[dev]" # dev / from source
64
+ pip install -e ".[dev,bench]" # + benchmark harness
65
+ ```
66
+
67
+ ## Quickstart
68
+
69
+ ```python
70
+ from kindling import Engine
71
+ from kindling.loaders import movielens
72
+
73
+ interactions = movielens.load_1m() # entity_id, item_id, timestamp[, rating]
74
+
75
+ engine = Engine()
76
+ engine.fit(interactions)
77
+
78
+ for rec in engine.recommend(entity_id=42, n=10):
79
+ print(rec.item_id, rec.score, rec.base_kind)
80
+
81
+ # Many users at once — runs in parallel in the Rust core (GIL released).
82
+ batches = engine.recommend_batch([42, 99, 7], n=10)
83
+ ```
84
+
85
+ Recommendation is served end-to-end by the Rust core (`kindling_core`): the
86
+ EASE/cooc base, the channel blend, the boost layer, and cold-slots all run
87
+ natively. Single recommend is sub-millisecond; batch is the parallel path.
88
+
89
+ **New / anonymous users** (absent from training) are served from ad-hoc
90
+ seed items with no per-user training — and a zero/all-unknown seed set
91
+ falls back to popularity:
92
+
93
+ ```python
94
+ engine.recommend_for_items(item_ids=[101, 205], n=10) # personalized from seeds
95
+ engine.recommend_for_items(item_ids=[], n=10) # → popularity fallback
96
+ ```
97
+
98
+ ## Intelligent activation
99
+
100
+ Channels turn on by *regime*, not configuration. The base is EASE for
101
+ catalogs ≤ 20k items and wilson-normalized cooccurrence above that;
102
+ the trend channel needs timestamps; transitions additionally need the
103
+ data not to be a rating-burst; user-CF activates only on sparse-history
104
+ data; rating-weighting engages only when true ratings are present. Each
105
+ decision is made from the data at `fit()` time. See
106
+ [`docs/REFERENCE.md`](docs/REFERENCE.md) §2 for the gate table.
107
+
108
+ ## Where it stands (full-ranking NDCG@10, engine defaults)
109
+
110
+ > Full results — discovery growth **and** the repeat-regime dominance — in [`docs/RESULTS.md`](docs/RESULTS.md).
111
+
112
+ | dataset | NDCG@10 | notes |
113
+ |---|---:|---|
114
+ | movielens-1m | 0.293 | rating-weighted EASE |
115
+ | amazon-beauty | 0.033 | + user-CF channel |
116
+ | steam (realistic tier) | 0.066 | open-catalog + cold slots |
117
+ | amazon-book-chrono | 0.032 | timestamps activate trend/transitions |
118
+
119
+ Strongest personalized model on all four; beats implicit ALS everywhere;
120
+ wins cold-*user* buckets on cold-heavy catalogs. The full benchmark
121
+ record — including the negative results, which are half the value — is in
122
+ [`docs/EXPERIMENTS.md`](docs/EXPERIMENTS.md).
123
+
124
+ On **repeat-regime** datasets (grocery/retail), a held-out gate turns on reorder
125
+ recommendation; under repeat-aware eval kindling separates from the field —
126
+ e.g. Dunnhumby 0.48 NDCG@10 vs ~0.05 for every baseline — while correctly
127
+ *declining* on fake-repeat data like Steam (re-logs aren't repurchase). See
128
+ [`docs/REPEAT-GATE.md`](docs/REPEAT-GATE.md). An opt-in EASE+ (EDLAE) base is
129
+ available but off by default ([`docs/EASE-VARIANTS-ASSESSMENT.md`](docs/EASE-VARIANTS-ASSESSMENT.md)).
130
+
131
+ ### Growth curves
132
+
133
+ How accuracy grows from cold to hot, against the standard baselines
134
+ (`bench/plot_growth_curves.py`):
135
+
136
+ ![growth curves](bench/reports/growth_curves_grid.png)
137
+
138
+ ### Serving performance (native engine, `bench/final_state_perf.py`)
139
+
140
+ | dataset | fit | single recommend p50 | batch throughput | NDCG@10 |
141
+ |---|---:|---:|---:|---:|
142
+ | movielens-1m | 4.2 s | 0.17 ms | 15.4k recs/s | 0.2928 |
143
+ | amazon-beauty | 13.1 s | 1.21 ms | 3.0k recs/s | 0.0328 |
144
+ | steam | 110 s | 5.81 ms | 0.8k recs/s | 0.0659 |
145
+
146
+ The recommend path is pure Rust with the GIL released for the batch path —
147
+ single recommend dropped from ~200 ms (the earlier Python path) to
148
+ sub-millisecond, with byte-identical rankings.
149
+
150
+ ### Serving
151
+
152
+ Persist a fit as a self-contained artifact and serve it with no re-fit:
153
+
154
+ ```python
155
+ from kindling.serving import KindlingServer
156
+ KindlingServer.from_engine(engine).save("artifact/")
157
+ # ── in the serving process ──
158
+ server = KindlingServer.load("artifact/")
159
+ server.recommend("user-42", n=10)
160
+ ```
161
+
162
+ A FastAPI example (`kindling.serving_app`) ships behind the optional
163
+ `serve` extra: `pip install 'kindling[serve]'`.
164
+
165
+ ## Project layout
166
+
167
+ ```
168
+ src/kindling/ library source (engine, serving, Rust bindings, loaders)
169
+ native/kindling_core/ Rust core (EASE, cooccurrence, channel blend, recommend)
170
+ bench/ regression gate (bench/verify.py) + frozen reports + plots
171
+ docs/ RESULTS.md (what it brings) · REFERENCE.md (architecture) ·
172
+ EXPERIMENTS.md (record) · LESSONS.md (what the build taught)
173
+ tests/ unit, property, integration
174
+ ```
175
+
176
+ ## License
177
+
178
+ Apache 2.0.
179
+
@@ -0,0 +1,136 @@
1
+ # kindling
2
+
3
+ A hybrid recommender that grows with your data — closed-form, no training
4
+ loop, no GPU. One fused base score per (user, item) built from EASE /
5
+ wilson-cooccurrence plus auto-gated z-normalized channels (trend,
6
+ last-item, transitions, user-CF), with a Rust core for the numerics.
7
+
8
+ **Design goals (learned the hard way — see [`docs/EXPERIMENTS.md`](docs/EXPERIMENTS.md)):**
9
+ 1. **A wheel that imports is a wheel that works.** numpy / pandas / scipy
10
+ only; the linear algebra that matters (the EASE inversion) runs on a
11
+ pure-Rust core (`kindling_core`). No PyTorch, no BLAS system deps.
12
+ 2. **Closed-form shallow models, gated per dataset, beat speculative
13
+ complexity.** Every channel is closed-form or a counting statistic;
14
+ every channel is activated by a measurable property of the data; every
15
+ gate exists because the ungated version measurably hurt somewhere.
16
+
17
+ ## Install
18
+
19
+ ```bash
20
+ pip install kindling-rec # from PyPI
21
+ pip install -e ".[dev]" # dev / from source
22
+ pip install -e ".[dev,bench]" # + benchmark harness
23
+ ```
24
+
25
+ ## Quickstart
26
+
27
+ ```python
28
+ from kindling import Engine
29
+ from kindling.loaders import movielens
30
+
31
+ interactions = movielens.load_1m() # entity_id, item_id, timestamp[, rating]
32
+
33
+ engine = Engine()
34
+ engine.fit(interactions)
35
+
36
+ for rec in engine.recommend(entity_id=42, n=10):
37
+ print(rec.item_id, rec.score, rec.base_kind)
38
+
39
+ # Many users at once — runs in parallel in the Rust core (GIL released).
40
+ batches = engine.recommend_batch([42, 99, 7], n=10)
41
+ ```
42
+
43
+ Recommendation is served end-to-end by the Rust core (`kindling_core`): the
44
+ EASE/cooc base, the channel blend, the boost layer, and cold-slots all run
45
+ natively. Single recommend is sub-millisecond; batch is the parallel path.
46
+
47
+ **New / anonymous users** (absent from training) are served from ad-hoc
48
+ seed items with no per-user training — and a zero/all-unknown seed set
49
+ falls back to popularity:
50
+
51
+ ```python
52
+ engine.recommend_for_items(item_ids=[101, 205], n=10) # personalized from seeds
53
+ engine.recommend_for_items(item_ids=[], n=10) # → popularity fallback
54
+ ```
55
+
56
+ ## Intelligent activation
57
+
58
+ Channels turn on by *regime*, not configuration. The base is EASE for
59
+ catalogs ≤ 20k items and wilson-normalized cooccurrence above that;
60
+ the trend channel needs timestamps; transitions additionally need the
61
+ data not to be a rating-burst; user-CF activates only on sparse-history
62
+ data; rating-weighting engages only when true ratings are present. Each
63
+ decision is made from the data at `fit()` time. See
64
+ [`docs/REFERENCE.md`](docs/REFERENCE.md) §2 for the gate table.
65
+
66
+ ## Where it stands (full-ranking NDCG@10, engine defaults)
67
+
68
+ > Full results — discovery growth **and** the repeat-regime dominance — in [`docs/RESULTS.md`](docs/RESULTS.md).
69
+
70
+ | dataset | NDCG@10 | notes |
71
+ |---|---:|---|
72
+ | movielens-1m | 0.293 | rating-weighted EASE |
73
+ | amazon-beauty | 0.033 | + user-CF channel |
74
+ | steam (realistic tier) | 0.066 | open-catalog + cold slots |
75
+ | amazon-book-chrono | 0.032 | timestamps activate trend/transitions |
76
+
77
+ Strongest personalized model on all four; beats implicit ALS everywhere;
78
+ wins cold-*user* buckets on cold-heavy catalogs. The full benchmark
79
+ record — including the negative results, which are half the value — is in
80
+ [`docs/EXPERIMENTS.md`](docs/EXPERIMENTS.md).
81
+
82
+ On **repeat-regime** datasets (grocery/retail), a held-out gate turns on reorder
83
+ recommendation; under repeat-aware eval kindling separates from the field —
84
+ e.g. Dunnhumby 0.48 NDCG@10 vs ~0.05 for every baseline — while correctly
85
+ *declining* on fake-repeat data like Steam (re-logs aren't repurchase). See
86
+ [`docs/REPEAT-GATE.md`](docs/REPEAT-GATE.md). An opt-in EASE+ (EDLAE) base is
87
+ available but off by default ([`docs/EASE-VARIANTS-ASSESSMENT.md`](docs/EASE-VARIANTS-ASSESSMENT.md)).
88
+
89
+ ### Growth curves
90
+
91
+ How accuracy grows from cold to hot, against the standard baselines
92
+ (`bench/plot_growth_curves.py`):
93
+
94
+ ![growth curves](bench/reports/growth_curves_grid.png)
95
+
96
+ ### Serving performance (native engine, `bench/final_state_perf.py`)
97
+
98
+ | dataset | fit | single recommend p50 | batch throughput | NDCG@10 |
99
+ |---|---:|---:|---:|---:|
100
+ | movielens-1m | 4.2 s | 0.17 ms | 15.4k recs/s | 0.2928 |
101
+ | amazon-beauty | 13.1 s | 1.21 ms | 3.0k recs/s | 0.0328 |
102
+ | steam | 110 s | 5.81 ms | 0.8k recs/s | 0.0659 |
103
+
104
+ The recommend path is pure Rust with the GIL released for the batch path —
105
+ single recommend dropped from ~200 ms (the earlier Python path) to
106
+ sub-millisecond, with byte-identical rankings.
107
+
108
+ ### Serving
109
+
110
+ Persist a fit as a self-contained artifact and serve it with no re-fit:
111
+
112
+ ```python
113
+ from kindling.serving import KindlingServer
114
+ KindlingServer.from_engine(engine).save("artifact/")
115
+ # ── in the serving process ──
116
+ server = KindlingServer.load("artifact/")
117
+ server.recommend("user-42", n=10)
118
+ ```
119
+
120
+ A FastAPI example (`kindling.serving_app`) ships behind the optional
121
+ `serve` extra: `pip install 'kindling[serve]'`.
122
+
123
+ ## Project layout
124
+
125
+ ```
126
+ src/kindling/ library source (engine, serving, Rust bindings, loaders)
127
+ native/kindling_core/ Rust core (EASE, cooccurrence, channel blend, recommend)
128
+ bench/ regression gate (bench/verify.py) + frozen reports + plots
129
+ docs/ RESULTS.md (what it brings) · REFERENCE.md (architecture) ·
130
+ EXPERIMENTS.md (record) · LESSONS.md (what the build taught)
131
+ tests/ unit, property, integration
132
+ ```
133
+
134
+ ## License
135
+
136
+ Apache 2.0.