rfscorer 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rfscorer-0.1.0/.devcontainer/devcontainer.json +22 -0
- rfscorer-0.1.0/.github/workflows/ci.yml +34 -0
- rfscorer-0.1.0/.gitignore +235 -0
- rfscorer-0.1.0/LICENSE +21 -0
- rfscorer-0.1.0/PKG-INFO +208 -0
- rfscorer-0.1.0/README.md +162 -0
- rfscorer-0.1.0/docs/architecture.md +24 -0
- rfscorer-0.1.0/docs/development-guidelines.md +90 -0
- rfscorer-0.1.0/docs/functional-design.md +248 -0
- rfscorer-0.1.0/docs/glossary.md +53 -0
- rfscorer-0.1.0/docs/product-requirements.md +94 -0
- rfscorer-0.1.0/docs/repository-structure.md +95 -0
- rfscorer-0.1.0/examples/README.md +13 -0
- rfscorer-0.1.0/examples/access_log.csv +325909 -0
- rfscorer-0.1.0/examples/basic_usage.ipynb +707 -0
- rfscorer-0.1.0/img/empirical_probability_surface.png +0 -0
- rfscorer-0.1.0/img/mcc_probability_surface.png +0 -0
- rfscorer-0.1.0/img/mono_probability_surface.png +0 -0
- rfscorer-0.1.0/pyproject.toml +54 -0
- rfscorer-0.1.0/src/rfscorer/__init__.py +3 -0
- rfscorer-0.1.0/src/rfscorer/optimizer.py +274 -0
- rfscorer-0.1.0/src/rfscorer/scorer.py +822 -0
- rfscorer-0.1.0/tests/test_optimizer.py +342 -0
- rfscorer-0.1.0/tests/test_scorer.py +454 -0
- rfscorer-0.1.0/uv.lock +3341 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
|
2
|
+
// README at: https://github.com/devcontainers/templates/tree/main/src/python
|
|
3
|
+
{
|
|
4
|
+
"name": "Python 3",
|
|
5
|
+
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
|
|
6
|
+
"image": "mcr.microsoft.com/devcontainers/python:3-3.14-trixie"
|
|
7
|
+
|
|
8
|
+
// Features to add to the dev container. More info: https://containers.dev/features.
|
|
9
|
+
// "features": {},
|
|
10
|
+
|
|
11
|
+
// Use 'forwardPorts' to make a list of ports inside the container available locally.
|
|
12
|
+
// "forwardPorts": [],
|
|
13
|
+
|
|
14
|
+
// Use 'postCreateCommand' to run commands after the container is created.
|
|
15
|
+
// "postCreateCommand": "pip3 install --user -r requirements.txt",
|
|
16
|
+
|
|
17
|
+
// Configure tool-specific properties.
|
|
18
|
+
// "customizations": {},
|
|
19
|
+
|
|
20
|
+
// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
|
|
21
|
+
// "remoteUser": "root"
|
|
22
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: uv sync --dev
|
|
26
|
+
|
|
27
|
+
- name: Lint
|
|
28
|
+
run: uv run ruff check .
|
|
29
|
+
|
|
30
|
+
- name: Format
|
|
31
|
+
run: uv run ruff format --check .
|
|
32
|
+
|
|
33
|
+
- name: Test
|
|
34
|
+
run: uv run pytest
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# macOS
|
|
2
|
+
.DS_Store
|
|
3
|
+
|
|
4
|
+
# Byte-compiled / optimized / DLL files
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[codz]
|
|
7
|
+
*$py.class
|
|
8
|
+
|
|
9
|
+
# C extensions
|
|
10
|
+
*.so
|
|
11
|
+
|
|
12
|
+
# Distribution / packaging
|
|
13
|
+
.Python
|
|
14
|
+
build/
|
|
15
|
+
develop-eggs/
|
|
16
|
+
dist/
|
|
17
|
+
downloads/
|
|
18
|
+
eggs/
|
|
19
|
+
.eggs/
|
|
20
|
+
lib/
|
|
21
|
+
lib64/
|
|
22
|
+
parts/
|
|
23
|
+
sdist/
|
|
24
|
+
var/
|
|
25
|
+
wheels/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# PyInstaller
|
|
33
|
+
# Usually these files are written by a python script from a template
|
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
35
|
+
*.manifest
|
|
36
|
+
*.spec
|
|
37
|
+
|
|
38
|
+
# Installer logs
|
|
39
|
+
pip-log.txt
|
|
40
|
+
pip-delete-this-directory.txt
|
|
41
|
+
|
|
42
|
+
# Unit test / coverage reports
|
|
43
|
+
htmlcov/
|
|
44
|
+
.tox/
|
|
45
|
+
.nox/
|
|
46
|
+
.coverage
|
|
47
|
+
.coverage.*
|
|
48
|
+
.cache
|
|
49
|
+
nosetests.xml
|
|
50
|
+
coverage.xml
|
|
51
|
+
*.cover
|
|
52
|
+
*.py.cover
|
|
53
|
+
.hypothesis/
|
|
54
|
+
.pytest_cache/
|
|
55
|
+
cover/
|
|
56
|
+
|
|
57
|
+
# Translations
|
|
58
|
+
*.mo
|
|
59
|
+
*.pot
|
|
60
|
+
|
|
61
|
+
# Django stuff:
|
|
62
|
+
*.log
|
|
63
|
+
local_settings.py
|
|
64
|
+
db.sqlite3
|
|
65
|
+
db.sqlite3-journal
|
|
66
|
+
|
|
67
|
+
# Flask stuff:
|
|
68
|
+
instance/
|
|
69
|
+
.webassets-cache
|
|
70
|
+
|
|
71
|
+
# Scrapy stuff:
|
|
72
|
+
.scrapy
|
|
73
|
+
|
|
74
|
+
# Sphinx documentation
|
|
75
|
+
docs/_build/
|
|
76
|
+
|
|
77
|
+
# PyBuilder
|
|
78
|
+
.pybuilder/
|
|
79
|
+
target/
|
|
80
|
+
|
|
81
|
+
# Jupyter Notebook
|
|
82
|
+
.ipynb_checkpoints
|
|
83
|
+
|
|
84
|
+
# IPython
|
|
85
|
+
profile_default/
|
|
86
|
+
ipython_config.py
|
|
87
|
+
|
|
88
|
+
# pyenv
|
|
89
|
+
# For a library or package, you might want to ignore these files since the code is
|
|
90
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
91
|
+
# .python-version
|
|
92
|
+
|
|
93
|
+
# pipenv
|
|
94
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
95
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
96
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
97
|
+
# install all needed dependencies.
|
|
98
|
+
# Pipfile.lock
|
|
99
|
+
|
|
100
|
+
# UV
|
|
101
|
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
|
102
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
103
|
+
# commonly ignored for libraries.
|
|
104
|
+
# uv.lock
|
|
105
|
+
|
|
106
|
+
# poetry
|
|
107
|
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
|
108
|
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
|
109
|
+
# commonly ignored for libraries.
|
|
110
|
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
|
111
|
+
# poetry.lock
|
|
112
|
+
# poetry.toml
|
|
113
|
+
|
|
114
|
+
# pdm
|
|
115
|
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
|
116
|
+
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
|
117
|
+
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
|
118
|
+
# pdm.lock
|
|
119
|
+
# pdm.toml
|
|
120
|
+
.pdm-python
|
|
121
|
+
.pdm-build/
|
|
122
|
+
|
|
123
|
+
# pixi
|
|
124
|
+
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
|
125
|
+
# pixi.lock
|
|
126
|
+
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
|
127
|
+
# in the .venv directory. It is recommended not to include this directory in version control.
|
|
128
|
+
.pixi
|
|
129
|
+
|
|
130
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
|
131
|
+
__pypackages__/
|
|
132
|
+
|
|
133
|
+
# Celery stuff
|
|
134
|
+
celerybeat-schedule
|
|
135
|
+
celerybeat.pid
|
|
136
|
+
|
|
137
|
+
# Redis
|
|
138
|
+
*.rdb
|
|
139
|
+
*.aof
|
|
140
|
+
*.pid
|
|
141
|
+
|
|
142
|
+
# RabbitMQ
|
|
143
|
+
mnesia/
|
|
144
|
+
rabbitmq/
|
|
145
|
+
rabbitmq-data/
|
|
146
|
+
|
|
147
|
+
# ActiveMQ
|
|
148
|
+
activemq-data/
|
|
149
|
+
|
|
150
|
+
# SageMath parsed files
|
|
151
|
+
*.sage.py
|
|
152
|
+
|
|
153
|
+
# Environments
|
|
154
|
+
.env
|
|
155
|
+
.envrc
|
|
156
|
+
.venv
|
|
157
|
+
env/
|
|
158
|
+
venv/
|
|
159
|
+
ENV/
|
|
160
|
+
env.bak/
|
|
161
|
+
venv.bak/
|
|
162
|
+
|
|
163
|
+
# Spyder project settings
|
|
164
|
+
.spyderproject
|
|
165
|
+
.spyproject
|
|
166
|
+
|
|
167
|
+
# Rope project settings
|
|
168
|
+
.ropeproject
|
|
169
|
+
|
|
170
|
+
# mkdocs documentation
|
|
171
|
+
/site
|
|
172
|
+
|
|
173
|
+
# mypy
|
|
174
|
+
.mypy_cache/
|
|
175
|
+
.dmypy.json
|
|
176
|
+
dmypy.json
|
|
177
|
+
|
|
178
|
+
# Pyre type checker
|
|
179
|
+
.pyre/
|
|
180
|
+
|
|
181
|
+
# pytype static type analyzer
|
|
182
|
+
.pytype/
|
|
183
|
+
|
|
184
|
+
# Cython debug symbols
|
|
185
|
+
cython_debug/
|
|
186
|
+
|
|
187
|
+
# PyCharm
|
|
188
|
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
|
189
|
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
|
190
|
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
|
191
|
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
|
192
|
+
# .idea/
|
|
193
|
+
|
|
194
|
+
# Abstra
|
|
195
|
+
# Abstra is an AI-powered process automation framework.
|
|
196
|
+
# Ignore directories containing user credentials, local state, and settings.
|
|
197
|
+
# Learn more at https://abstra.io/docs
|
|
198
|
+
.abstra/
|
|
199
|
+
|
|
200
|
+
# Visual Studio Code
|
|
201
|
+
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
|
202
|
+
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
|
203
|
+
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
|
204
|
+
# you could uncomment the following to ignore the entire vscode folder
|
|
205
|
+
# .vscode/
|
|
206
|
+
# Temporary file for partial code execution
|
|
207
|
+
tempCodeRunnerFile.py
|
|
208
|
+
|
|
209
|
+
# Ruff stuff:
|
|
210
|
+
.ruff_cache/
|
|
211
|
+
|
|
212
|
+
# PyPI configuration file
|
|
213
|
+
.pypirc
|
|
214
|
+
|
|
215
|
+
# Marimo
|
|
216
|
+
marimo/_static/
|
|
217
|
+
marimo/_lsp/
|
|
218
|
+
__marimo__/
|
|
219
|
+
|
|
220
|
+
# Streamlit
|
|
221
|
+
.streamlit/secrets.toml
|
|
222
|
+
|
|
223
|
+
# for development
|
|
224
|
+
CLAUDE.md
|
|
225
|
+
src/rfscorer/*.csv
|
|
226
|
+
src/rfscorer/*.png
|
|
227
|
+
workspace/
|
|
228
|
+
examples/*.png
|
|
229
|
+
all_probability.csv
|
|
230
|
+
df_recommend_emp.csv
|
|
231
|
+
df_recommend_mcc.csv
|
|
232
|
+
df_recommend_mono.csv
|
|
233
|
+
surface_empirical_probability.png
|
|
234
|
+
surface_mcc_probability.png
|
|
235
|
+
surface_mono_probability.png
|
rfscorer-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 iwanaga
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
rfscorer-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rfscorer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Recency-Frequency based recommendation scoring
|
|
5
|
+
Project-URL: Repository, https://github.com/jiro-iwanaga/rfscorer
|
|
6
|
+
Author-email: Jiro Iwanaga <iwanaga@erdos-the-book.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2026 iwanaga
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Keywords: e-commerce,frequency,recency,recommendation,scoring
|
|
30
|
+
Classifier: Development Status :: 3 - Alpha
|
|
31
|
+
Classifier: Intended Audience :: Developers
|
|
32
|
+
Classifier: Intended Audience :: Science/Research
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Programming Language :: Python :: 3
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
38
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
39
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
40
|
+
Requires-Python: >=3.10
|
|
41
|
+
Requires-Dist: cvxpy>=1.3
|
|
42
|
+
Requires-Dist: matplotlib>=3.5
|
|
43
|
+
Requires-Dist: numpy>=1.23
|
|
44
|
+
Requires-Dist: pandas>=1.5
|
|
45
|
+
Description-Content-Type: text/markdown
|
|
46
|
+
|
|
47
|
+
# rfscorer
|
|
48
|
+
|
|
49
|
+
[](https://github.com/jiro-iwanaga/rfscorer/actions/workflows/ci.yml)
|
|
50
|
+
[](https://badge.fury.io/py/rfscorer)
|
|
51
|
+
[](https://pypi.org/project/rfscorer/)
|
|
52
|
+
|
|
53
|
+
`rfscorer` is a Python package for Recency-Frequency based recommendation scoring.
|
|
54
|
+
|
|
55
|
+
It estimates **revisit probabilities** — the preference score for each user-item pair, forming a matrix analogous to a rating matrix — from interaction histories, using two simple but powerful behavioral signals: **recency**, which captures how recently a user interacted with an item, and **frequency**, which captures how often the user has interacted with it.
|
|
56
|
+
|
|
57
|
+
The package is designed for product recommendation and revisit modeling, especially in settings where interpretable scoring based on interaction history is preferred over black-box recommendation models.
|
|
58
|
+
|
|
59
|
+
> Note: In this package, **RF** stands for **Recency-Frequency**, not Random Forest.
|
|
60
|
+
|
|
61
|
+
## Features
|
|
62
|
+
|
|
63
|
+
- **scikit-learn-style API** — familiar `fit()` / `transform()` interface makes it easy to integrate into existing data science workflows
|
|
64
|
+
- **Minimal data requirements** — works with any interaction log that has three columns: `user`, `item`, and `datetime`; no ratings or explicit feedback needed
|
|
65
|
+
- **Explainable scoring** — probabilities are derived through mathematical optimization under RF monotonicity constraints, making every score fully traceable and auditable; 3D surface visualization further supports intuitive understanding
|
|
66
|
+
- **Probabilistic output** — revisit probabilities serve as preference scores, enabling expected value calculations and probabilistic ranking of recommendations
|
|
67
|
+
- **Extensible** — the user–item probability matrix produced by `transform()` can be directly used as input to collaborative filtering or other downstream recommendation models
|
|
68
|
+
|
|
69
|
+
## Installation
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install rfscorer
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Usage
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from rfscorer import RecencyFrequencyScorer
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Prepare an interaction log with at least three columns: user ID, item ID, and timestamp.
|
|
82
|
+
Split it into a training set and a test set.
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
df_train = ... # training interaction log (columns: user, item, datetime)
|
|
86
|
+
df_test = ... # test interaction log (columns: user, item, datetime)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
| user | item | datetime |
|
|
90
|
+
|-------|-------|------------|
|
|
91
|
+
| u_001 | i_032 | 2026-07-01 |
|
|
92
|
+
| u_001 | i_017 | 2026-07-03 |
|
|
93
|
+
| u_001 | i_032 | 2026-07-05 |
|
|
94
|
+
| u_002 | i_011 | 2026-07-02 |
|
|
95
|
+
| u_002 | i_058 | 2026-07-04 |
|
|
96
|
+
|
|
97
|
+
The same user-item pair may appear multiple times, representing repeat visits.
|
|
98
|
+
|
|
99
|
+
Instantiate the scorer, specifying the column names if they differ from the defaults (`user`, `item`, `datetime`).
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
scorer = RecencyFrequencyScorer()
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Call `fit()` to estimate empirical revisit probabilities from the training log.
|
|
106
|
+
Specify the observation period (from which recency and frequency are computed) and the evaluation period (which provides the ground-truth revisit labels).
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
scorer.fit(
|
|
110
|
+
df_train,
|
|
111
|
+
observation_period=("2026-07-01", "2026-07-07"),
|
|
112
|
+
evaluation_period=("2026-07-08", "2026-07-08"),
|
|
113
|
+
)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
The empirical surface reflects raw revisit rates and may be irregular due to sparse data.
|
|
117
|
+
|
|
118
|
+

|
|
119
|
+
|
|
120
|
+
Optionally, call `optimize()` to smooth the surface under RF monotonicity constraints using convex quadratic programming.
|
|
121
|
+
`kind="mono"` enforces recency and frequency monotonicity.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
scorer.optimize(kind="mono")
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+

|
|
128
|
+
|
|
129
|
+
`kind="mcc"` additionally adds convexity in recency and concavity in frequency, yielding a smoother surface.
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
scorer.optimize(kind="mcc")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+

|
|
136
|
+
|
|
137
|
+
Call `transform()` to score each user-item pair in the test log.
|
|
138
|
+
It returns a DataFrame with columns `user`, `item`, `recency`, `frequency`, `probability`, and `order` (rank within each user, sorted by probability descending).
|
|
139
|
+
Pass `kind="empirical"`, `kind="mono"`, or `kind="mcc"` to select which probabilities to use.
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
df_rec_mcc = scorer.transform(df_test, target_date="2026-07-07", kind="mcc")
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
| user | item | recency | frequency | probability | order |
|
|
146
|
+
|--------|--------|--------:|----------:|------------:|------:|
|
|
147
|
+
| u_001 | i_032 | 1 | 4 | 0.1167 | 1 |
|
|
148
|
+
| u_001 | i_017 | 2 | 3 | 0.0789 | 2 |
|
|
149
|
+
| u_001 | i_045 | 3 | 1 | 0.0248 | 3 |
|
|
150
|
+
| u_002 | i_011 | 1 | 2 | 0.0621 | 1 |
|
|
151
|
+
| u_002 | i_058 | 4 | 1 | 0.0182 | 2 |
|
|
152
|
+
|
|
153
|
+
Within each user, rows are sorted by `probability` descending; `order` represents the recommendation rank.
|
|
154
|
+
|
|
155
|
+
## Examples
|
|
156
|
+
|
|
157
|
+
- [examples/basic_usage.ipynb](examples/basic_usage.ipynb) — end-to-end walkthrough: load data, fit, optimize, transform, and evaluate
|
|
158
|
+
|
|
159
|
+
## References
|
|
160
|
+
- [Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano, “Estimating product-choice probabilities from recency and frequency of page views,” Knowledge-Based Systems, Volume 99, 2016, Pages 157–167.](https://www.sciencedirect.com/science/article/abs/pii/S0950705116000848)
|
|
161
|
+
|
|
162
|
+
- [Jiro Iwanaga, Kyota Ishihara, Naoki Nishimura, and Ikki Tanaka, *Pythonではじめる数理最適化 ―ケーススタディでモデリングのスキルを身につけよう―*(in Japanese), Ohmsha, 2021.](https://www.ohmsha.co.jp/book/9784274231759/)
|
|
163
|
+
- [Chapter 7: 商品推薦のための興味のスコアリング(in Japanese)](https://github.com/ohmsha/PyOptBook/tree/main/7.recommendation)
|
|
164
|
+
|
|
165
|
+
- [Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano, “Improving collaborative filtering recommendations by estimating user preferences from clickstream data,” Electronic Commerce Research and Applications, Volume 37, Article 100877, 2019.](https://www.sciencedirect.com/science/article/abs/pii/S1567422319300547)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
## Citing
|
|
169
|
+
|
|
170
|
+
If you use `rfscorer` in academic work, please cite the following paper:
|
|
171
|
+
|
|
172
|
+
Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano,
|
|
173
|
+
"Estimating product-choice probabilities from recency and frequency of page views,"
|
|
174
|
+
*Knowledge-Based Systems*, Volume 99, 2016, Pages 157–167.
|
|
175
|
+
|
|
176
|
+
```bibtex
|
|
177
|
+
@article{Iwanaga2016,
|
|
178
|
+
author = {Jiro Iwanaga and Naoki Nishimura and Noriyoshi Sukegawa and Yuichi Takano},
|
|
179
|
+
title = {Estimating product-choice probabilities from recency and frequency of page views},
|
|
180
|
+
journal = {Knowledge-Based Systems},
|
|
181
|
+
volume = {99},
|
|
182
|
+
pages = {157--167},
|
|
183
|
+
year = {2016},
|
|
184
|
+
url = {https://www.sciencedirect.com/science/article/abs/pii/S0950705116000848}
|
|
185
|
+
}
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
If you additionally use the probability matrix as input to a collaborative filtering model, please also cite:
|
|
189
|
+
|
|
190
|
+
Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano,
|
|
191
|
+
"Improving collaborative filtering recommendations by estimating user preferences from clickstream data,"
|
|
192
|
+
*Electronic Commerce Research and Applications*, Volume 37, Article 100877, 2019.
|
|
193
|
+
|
|
194
|
+
```bibtex
|
|
195
|
+
@article{Iwanaga2019,
|
|
196
|
+
author = {Jiro Iwanaga and Naoki Nishimura and Noriyoshi Sukegawa and Yuichi Takano},
|
|
197
|
+
title = {Improving collaborative filtering recommendations by estimating user preferences from clickstream data},
|
|
198
|
+
journal = {Electronic Commerce Research and Applications},
|
|
199
|
+
volume = {37},
|
|
200
|
+
pages = {100877},
|
|
201
|
+
year = {2019},
|
|
202
|
+
url = {https://www.sciencedirect.com/science/article/abs/pii/S1567422319300547}
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## License
|
|
207
|
+
|
|
208
|
+
MIT License
|
rfscorer-0.1.0/README.md
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# rfscorer
|
|
2
|
+
|
|
3
|
+
[](https://github.com/jiro-iwanaga/rfscorer/actions/workflows/ci.yml)
|
|
4
|
+
[](https://badge.fury.io/py/rfscorer)
|
|
5
|
+
[](https://pypi.org/project/rfscorer/)
|
|
6
|
+
|
|
7
|
+
`rfscorer` is a Python package for Recency-Frequency based recommendation scoring.
|
|
8
|
+
|
|
9
|
+
It estimates **revisit probabilities** — the preference score for each user-item pair, forming a matrix analogous to a rating matrix — from interaction histories, using two simple but powerful behavioral signals: **recency**, which captures how recently a user interacted with an item, and **frequency**, which captures how often the user has interacted with it.
|
|
10
|
+
|
|
11
|
+
The package is designed for product recommendation and revisit modeling, especially in settings where interpretable scoring based on interaction history is preferred over black-box recommendation models.
|
|
12
|
+
|
|
13
|
+
> Note: In this package, **RF** stands for **Recency-Frequency**, not Random Forest.
|
|
14
|
+
|
|
15
|
+
## Features
|
|
16
|
+
|
|
17
|
+
- **scikit-learn-style API** — familiar `fit()` / `transform()` interface makes it easy to integrate into existing data science workflows
|
|
18
|
+
- **Minimal data requirements** — works with any interaction log that has three columns: `user`, `item`, and `datetime`; no ratings or explicit feedback needed
|
|
19
|
+
- **Explainable scoring** — probabilities are derived through mathematical optimization under RF monotonicity constraints, making every score fully traceable and auditable; 3D surface visualization further supports intuitive understanding
|
|
20
|
+
- **Probabilistic output** — revisit probabilities serve as preference scores, enabling expected value calculations and probabilistic ranking of recommendations
|
|
21
|
+
- **Extensible** — the user–item probability matrix produced by `transform()` can be directly used as input to collaborative filtering or other downstream recommendation models
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install rfscorer
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from rfscorer import RecencyFrequencyScorer
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Prepare an interaction log with at least three columns: user ID, item ID, and timestamp.
|
|
36
|
+
Split it into a training set and a test set.
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
df_train = ... # training interaction log (columns: user, item, datetime)
|
|
40
|
+
df_test = ... # test interaction log (columns: user, item, datetime)
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
| user | item | datetime |
|
|
44
|
+
|-------|-------|------------|
|
|
45
|
+
| u_001 | i_032 | 2026-07-01 |
|
|
46
|
+
| u_001 | i_017 | 2026-07-03 |
|
|
47
|
+
| u_001 | i_032 | 2026-07-05 |
|
|
48
|
+
| u_002 | i_011 | 2026-07-02 |
|
|
49
|
+
| u_002 | i_058 | 2026-07-04 |
|
|
50
|
+
|
|
51
|
+
The same user-item pair may appear multiple times, representing repeat visits.
|
|
52
|
+
|
|
53
|
+
Instantiate the scorer, specifying the column names if they differ from the defaults (`user`, `item`, `datetime`).
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
scorer = RecencyFrequencyScorer()
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Call `fit()` to estimate empirical revisit probabilities from the training log.
|
|
60
|
+
Specify the observation period (from which recency and frequency are computed) and the evaluation period (which provides the ground-truth revisit labels).
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
scorer.fit(
|
|
64
|
+
df_train,
|
|
65
|
+
observation_period=("2026-07-01", "2026-07-07"),
|
|
66
|
+
evaluation_period=("2026-07-08", "2026-07-08"),
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
The empirical surface reflects raw revisit rates and may be irregular due to sparse data.
|
|
71
|
+
|
|
72
|
+

|
|
73
|
+
|
|
74
|
+
Optionally, call `optimize()` to smooth the surface under RF monotonicity constraints using convex quadratic programming.
|
|
75
|
+
`kind="mono"` enforces recency and frequency monotonicity.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
scorer.optimize(kind="mono")
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+

|
|
82
|
+
|
|
83
|
+
`kind="mcc"` additionally adds convexity in recency and concavity in frequency, yielding a smoother surface.
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
scorer.optimize(kind="mcc")
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+

|
|
90
|
+
|
|
91
|
+
Call `transform()` to score each user-item pair in the test log.
|
|
92
|
+
It returns a DataFrame with columns `user`, `item`, `recency`, `frequency`, `probability`, and `order` (rank within each user, sorted by probability descending).
|
|
93
|
+
Pass `kind="empirical"`, `kind="mono"`, or `kind="mcc"` to select which probabilities to use.
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
df_rec_mcc = scorer.transform(df_test, target_date="2026-07-07", kind="mcc")
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
| user | item | recency | frequency | probability | order |
|
|
100
|
+
|--------|--------|--------:|----------:|------------:|------:|
|
|
101
|
+
| u_001 | i_032 | 1 | 4 | 0.1167 | 1 |
|
|
102
|
+
| u_001 | i_017 | 2 | 3 | 0.0789 | 2 |
|
|
103
|
+
| u_001 | i_045 | 3 | 1 | 0.0248 | 3 |
|
|
104
|
+
| u_002 | i_011 | 1 | 2 | 0.0621 | 1 |
|
|
105
|
+
| u_002 | i_058 | 4 | 1 | 0.0182 | 2 |
|
|
106
|
+
|
|
107
|
+
Within each user, rows are sorted by `probability` descending; `order` represents the recommendation rank.
|
|
108
|
+
|
|
109
|
+
## Examples
|
|
110
|
+
|
|
111
|
+
- [examples/basic_usage.ipynb](examples/basic_usage.ipynb) — end-to-end walkthrough: load data, fit, optimize, transform, and evaluate
|
|
112
|
+
|
|
113
|
+
## References
|
|
114
|
+
- [Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano, “Estimating product-choice probabilities from recency and frequency of page views,” Knowledge-Based Systems, Volume 99, 2016, Pages 157–167.](https://www.sciencedirect.com/science/article/abs/pii/S0950705116000848)
|
|
115
|
+
|
|
116
|
+
- [Jiro Iwanaga, Kyota Ishihara, Naoki Nishimura, and Ikki Tanaka, *Pythonではじめる数理最適化 ―ケーススタディでモデリングのスキルを身につけよう―*(in Japanese), Ohmsha, 2021.](https://www.ohmsha.co.jp/book/9784274231759/)
|
|
117
|
+
- [Chapter 7: 商品推薦のための興味のスコアリング(in Japanese)](https://github.com/ohmsha/PyOptBook/tree/main/7.recommendation)
|
|
118
|
+
|
|
119
|
+
- [Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano, “Improving collaborative filtering recommendations by estimating user preferences from clickstream data,” Electronic Commerce Research and Applications, Volume 37, Article 100877, 2019.](https://www.sciencedirect.com/science/article/abs/pii/S1567422319300547)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
## Citing
|
|
123
|
+
|
|
124
|
+
If you use `rfscorer` in academic work, please cite the following paper:
|
|
125
|
+
|
|
126
|
+
Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano,
|
|
127
|
+
"Estimating product-choice probabilities from recency and frequency of page views,"
|
|
128
|
+
*Knowledge-Based Systems*, Volume 99, 2016, Pages 157–167.
|
|
129
|
+
|
|
130
|
+
```bibtex
|
|
131
|
+
@article{Iwanaga2016,
|
|
132
|
+
author = {Jiro Iwanaga and Naoki Nishimura and Noriyoshi Sukegawa and Yuichi Takano},
|
|
133
|
+
title = {Estimating product-choice probabilities from recency and frequency of page views},
|
|
134
|
+
journal = {Knowledge-Based Systems},
|
|
135
|
+
volume = {99},
|
|
136
|
+
pages = {157--167},
|
|
137
|
+
year = {2016},
|
|
138
|
+
url = {https://www.sciencedirect.com/science/article/abs/pii/S0950705116000848}
|
|
139
|
+
}
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
If you additionally use the probability matrix as input to a collaborative filtering model, please also cite:
|
|
143
|
+
|
|
144
|
+
Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano,
|
|
145
|
+
"Improving collaborative filtering recommendations by estimating user preferences from clickstream data,"
|
|
146
|
+
*Electronic Commerce Research and Applications*, Volume 37, Article 100877, 2019.
|
|
147
|
+
|
|
148
|
+
```bibtex
|
|
149
|
+
@article{Iwanaga2019,
|
|
150
|
+
author = {Jiro Iwanaga and Naoki Nishimura and Noriyoshi Sukegawa and Yuichi Takano},
|
|
151
|
+
title = {Improving collaborative filtering recommendations by estimating user preferences from clickstream data},
|
|
152
|
+
journal = {Electronic Commerce Research and Applications},
|
|
153
|
+
volume = {37},
|
|
154
|
+
pages = {100877},
|
|
155
|
+
year = {2019},
|
|
156
|
+
url = {https://www.sciencedirect.com/science/article/abs/pii/S1567422319300547}
|
|
157
|
+
}
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## License
|
|
161
|
+
|
|
162
|
+
MIT License
|