rfscorer 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ // For format details, see https://aka.ms/devcontainer.json. For config options, see the
2
+ // README at: https://github.com/devcontainers/templates/tree/main/src/python
3
+ {
4
+ "name": "Python 3",
5
+ // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
6
+ "image": "mcr.microsoft.com/devcontainers/python:3-3.14-trixie"
7
+
8
+ // Features to add to the dev container. More info: https://containers.dev/features.
9
+ // "features": {},
10
+
11
+ // Use 'forwardPorts' to make a list of ports inside the container available locally.
12
+ // "forwardPorts": [],
13
+
14
+ // Use 'postCreateCommand' to run commands after the container is created.
15
+ // "postCreateCommand": "pip3 install --user -r requirements.txt",
16
+
17
+ // Configure tool-specific properties.
18
+ // "customizations": {},
19
+
20
+ // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
21
+ // "remoteUser": "root"
22
+ }
@@ -0,0 +1,34 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ strategy:
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12"]
15
+
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Install uv
20
+ uses: astral-sh/setup-uv@v5
21
+ with:
22
+ python-version: ${{ matrix.python-version }}
23
+
24
+ - name: Install dependencies
25
+ run: uv sync --dev
26
+
27
+ - name: Lint
28
+ run: uv run ruff check .
29
+
30
+ - name: Format
31
+ run: uv run ruff format --check .
32
+
33
+ - name: Test
34
+ run: uv run pytest
@@ -0,0 +1,235 @@
1
+ # macOS
2
+ .DS_Store
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[codz]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py.cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+ cover/
56
+
57
+ # Translations
58
+ *.mo
59
+ *.pot
60
+
61
+ # Django stuff:
62
+ *.log
63
+ local_settings.py
64
+ db.sqlite3
65
+ db.sqlite3-journal
66
+
67
+ # Flask stuff:
68
+ instance/
69
+ .webassets-cache
70
+
71
+ # Scrapy stuff:
72
+ .scrapy
73
+
74
+ # Sphinx documentation
75
+ docs/_build/
76
+
77
+ # PyBuilder
78
+ .pybuilder/
79
+ target/
80
+
81
+ # Jupyter Notebook
82
+ .ipynb_checkpoints
83
+
84
+ # IPython
85
+ profile_default/
86
+ ipython_config.py
87
+
88
+ # pyenv
89
+ # For a library or package, you might want to ignore these files since the code is
90
+ # intended to run in multiple environments; otherwise, check them in:
91
+ # .python-version
92
+
93
+ # pipenv
94
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
96
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
97
+ # install all needed dependencies.
98
+ # Pipfile.lock
99
+
100
+ # UV
101
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
102
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
103
+ # commonly ignored for libraries.
104
+ # uv.lock
105
+
106
+ # poetry
107
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
109
+ # commonly ignored for libraries.
110
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111
+ # poetry.lock
112
+ # poetry.toml
113
+
114
+ # pdm
115
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
116
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
117
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
118
+ # pdm.lock
119
+ # pdm.toml
120
+ .pdm-python
121
+ .pdm-build/
122
+
123
+ # pixi
124
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
125
+ # pixi.lock
126
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
127
+ # in the .venv directory. It is recommended not to include this directory in version control.
128
+ .pixi
129
+
130
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
131
+ __pypackages__/
132
+
133
+ # Celery stuff
134
+ celerybeat-schedule
135
+ celerybeat.pid
136
+
137
+ # Redis
138
+ *.rdb
139
+ *.aof
140
+ *.pid
141
+
142
+ # RabbitMQ
143
+ mnesia/
144
+ rabbitmq/
145
+ rabbitmq-data/
146
+
147
+ # ActiveMQ
148
+ activemq-data/
149
+
150
+ # SageMath parsed files
151
+ *.sage.py
152
+
153
+ # Environments
154
+ .env
155
+ .envrc
156
+ .venv
157
+ env/
158
+ venv/
159
+ ENV/
160
+ env.bak/
161
+ venv.bak/
162
+
163
+ # Spyder project settings
164
+ .spyderproject
165
+ .spyproject
166
+
167
+ # Rope project settings
168
+ .ropeproject
169
+
170
+ # mkdocs documentation
171
+ /site
172
+
173
+ # mypy
174
+ .mypy_cache/
175
+ .dmypy.json
176
+ dmypy.json
177
+
178
+ # Pyre type checker
179
+ .pyre/
180
+
181
+ # pytype static type analyzer
182
+ .pytype/
183
+
184
+ # Cython debug symbols
185
+ cython_debug/
186
+
187
+ # PyCharm
188
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
189
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
190
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
191
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
192
+ # .idea/
193
+
194
+ # Abstra
195
+ # Abstra is an AI-powered process automation framework.
196
+ # Ignore directories containing user credentials, local state, and settings.
197
+ # Learn more at https://abstra.io/docs
198
+ .abstra/
199
+
200
+ # Visual Studio Code
201
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
202
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
203
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
204
+ # you could uncomment the following to ignore the entire vscode folder
205
+ # .vscode/
206
+ # Temporary file for partial code execution
207
+ tempCodeRunnerFile.py
208
+
209
+ # Ruff stuff:
210
+ .ruff_cache/
211
+
212
+ # PyPI configuration file
213
+ .pypirc
214
+
215
+ # Marimo
216
+ marimo/_static/
217
+ marimo/_lsp/
218
+ __marimo__/
219
+
220
+ # Streamlit
221
+ .streamlit/secrets.toml
222
+
223
+ # for development
224
+ CLAUDE.md
225
+ src/rfscorer/*.csv
226
+ src/rfscorer/*.png
227
+ workspace/
228
+ examples/*.png
229
+ all_probability.csv
230
+ df_recommend_emp.csv
231
+ df_recommend_mcc.csv
232
+ df_recommend_mono.csv
233
+ surface_empirical_probability.png
234
+ surface_mcc_probability.png
235
+ surface_mono_probability.png
rfscorer-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 iwanaga
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,208 @@
1
+ Metadata-Version: 2.4
2
+ Name: rfscorer
3
+ Version: 0.1.0
4
+ Summary: Recency-Frequency based recommendation scoring
5
+ Project-URL: Repository, https://github.com/jiro-iwanaga/rfscorer
6
+ Author-email: Jiro Iwanaga <iwanaga@erdos-the-book.com>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2026 iwanaga
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+ License-File: LICENSE
29
+ Keywords: e-commerce,frequency,recency,recommendation,scoring
30
+ Classifier: Development Status :: 3 - Alpha
31
+ Classifier: Intended Audience :: Developers
32
+ Classifier: Intended Audience :: Science/Research
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.10
36
+ Classifier: Programming Language :: Python :: 3.11
37
+ Classifier: Programming Language :: Python :: 3.12
38
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
39
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
40
+ Requires-Python: >=3.10
41
+ Requires-Dist: cvxpy>=1.3
42
+ Requires-Dist: matplotlib>=3.5
43
+ Requires-Dist: numpy>=1.23
44
+ Requires-Dist: pandas>=1.5
45
+ Description-Content-Type: text/markdown
46
+
47
+ # rfscorer
48
+
49
+ [![CI](https://github.com/jiro-iwanaga/rfscorer/actions/workflows/ci.yml/badge.svg)](https://github.com/jiro-iwanaga/rfscorer/actions/workflows/ci.yml)
50
+ [![PyPI version](https://badge.fury.io/py/rfscorer.svg)](https://badge.fury.io/py/rfscorer)
51
+ [![Python Versions](https://img.shields.io/pypi/pyversions/rfscorer.svg)](https://pypi.org/project/rfscorer/)
52
+
53
+ `rfscorer` is a Python package for Recency-Frequency based recommendation scoring.
54
+
55
+ It estimates **revisit probabilities** — the preference score for each user-item pair, forming a matrix analogous to a rating matrix — from interaction histories, using two simple but powerful behavioral signals: **recency**, which captures how recently a user interacted with an item, and **frequency**, which captures how often the user has interacted with it.
56
+
57
+ The package is designed for product recommendation and revisit modeling, especially in settings where interpretable scoring based on interaction history is preferred over black-box recommendation models.
58
+
59
+ > Note: In this package, **RF** stands for **Recency-Frequency**, not Random Forest.
60
+
61
+ ## Features
62
+
63
+ - **scikit-learn-style API** — familiar `fit()` / `transform()` interface makes it easy to integrate into existing data science workflows
64
+ - **Minimal data requirements** — works with any interaction log that has three columns: `user`, `item`, and `datetime`; no ratings or explicit feedback needed
65
+ - **Explainable scoring** — probabilities are derived through mathematical optimization under RF monotonicity constraints, making every score fully traceable and auditable; 3D surface visualization further supports intuitive understanding
66
+ - **Probabilistic output** — revisit probabilities serve as preference scores, enabling expected value calculations and probabilistic ranking of recommendations
67
+ - **Extensible** — the user–item probability matrix produced by `transform()` can be directly used as input to collaborative filtering or other downstream recommendation models
68
+
69
+ ## Installation
70
+
71
+ ```bash
72
+ pip install rfscorer
73
+ ```
74
+
75
+ ## Usage
76
+
77
+ ```python
78
+ from rfscorer import RecencyFrequencyScorer
79
+ ```
80
+
81
+ Prepare an interaction log with at least three columns: user ID, item ID, and timestamp.
82
+ Split it into a training set and a test set.
83
+
84
+ ```python
85
+ df_train = ... # training interaction log (columns: user, item, datetime)
86
+ df_test = ... # test interaction log (columns: user, item, datetime)
87
+ ```
88
+
89
+ | user | item | datetime |
90
+ |-------|-------|------------|
91
+ | u_001 | i_032 | 2026-07-01 |
92
+ | u_001 | i_017 | 2026-07-03 |
93
+ | u_001 | i_032 | 2026-07-05 |
94
+ | u_002 | i_011 | 2026-07-02 |
95
+ | u_002 | i_058 | 2026-07-04 |
96
+
97
+ The same user-item pair may appear multiple times, representing repeat visits.
98
+
99
+ Instantiate the scorer, specifying the column names if they differ from the defaults (`user`, `item`, `datetime`).
100
+
101
+ ```python
102
+ scorer = RecencyFrequencyScorer()
103
+ ```
104
+
105
+ Call `fit()` to estimate empirical revisit probabilities from the training log.
106
+ Specify the observation period (from which recency and frequency are computed) and the evaluation period (which provides the ground-truth revisit labels).
107
+
108
+ ```python
109
+ scorer.fit(
110
+ df_train,
111
+ observation_period=("2026-07-01", "2026-07-07"),
112
+ evaluation_period=("2026-07-08", "2026-07-08"),
113
+ )
114
+ ```
115
+
116
+ The empirical surface reflects raw revisit rates and may be irregular due to sparse data.
117
+
118
+ ![empirical probability surface](img/empirical_probability_surface.png)
119
+
120
+ Optionally, call `optimize()` to smooth the surface under RF monotonicity constraints using convex quadratic programming.
121
+ `kind="mono"` enforces recency and frequency monotonicity.
122
+
123
+ ```python
124
+ scorer.optimize(kind="mono")
125
+ ```
126
+
127
+ ![mono probability surface](img/mono_probability_surface.png)
128
+
129
+ `kind="mcc"` additionally adds convexity in recency and concavity in frequency, yielding a smoother surface.
130
+
131
+ ```python
132
+ scorer.optimize(kind="mcc")
133
+ ```
134
+
135
+ ![mcc probability surface](img/mcc_probability_surface.png)
136
+
137
+ Call `transform()` to score each user-item pair in the test log.
138
+ It returns a DataFrame with columns `user`, `item`, `recency`, `frequency`, `probability`, and `order` (rank within each user, sorted by probability descending).
139
+ Pass `kind="empirical"`, `kind="mono"`, or `kind="mcc"` to select which probabilities to use.
140
+
141
+ ```python
142
+ df_rec_mcc = scorer.transform(df_test, target_date="2026-07-07", kind="mcc")
143
+ ```
144
+
145
+ | user | item | recency | frequency | probability | order |
146
+ |--------|--------|--------:|----------:|------------:|------:|
147
+ | u_001 | i_032 | 1 | 4 | 0.1167 | 1 |
148
+ | u_001 | i_017 | 2 | 3 | 0.0789 | 2 |
149
+ | u_001 | i_045 | 3 | 1 | 0.0248 | 3 |
150
+ | u_002 | i_011 | 1 | 2 | 0.0621 | 1 |
151
+ | u_002 | i_058 | 4 | 1 | 0.0182 | 2 |
152
+
153
+ Within each user, rows are sorted by `probability` descending; `order` represents the recommendation rank.
154
+
155
+ ## Examples
156
+
157
+ - [examples/basic_usage.ipynb](examples/basic_usage.ipynb) — end-to-end walkthrough: load data, fit, optimize, transform, and evaluate
158
+
159
+ ## References
160
+ - [Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano, “Estimating product-choice probabilities from recency and frequency of page views,” Knowledge-Based Systems, Volume 99, 2016, Pages 157–167.](https://www.sciencedirect.com/science/article/abs/pii/S0950705116000848)
161
+
162
+ - [Jiro Iwanaga, Kyota Ishihara, Naoki Nishimura, and Ikki Tanaka, *Pythonではじめる数理最適化 ―ケーススタディでモデリングのスキルを身につけよう―*(in Japanese), Ohmsha, 2021.](https://www.ohmsha.co.jp/book/9784274231759/)
163
+ - [Chapter 7: 商品推薦のための興味のスコアリング(in Japanese)](https://github.com/ohmsha/PyOptBook/tree/main/7.recommendation)
164
+
165
+ - [Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano, “Improving collaborative filtering recommendations by estimating user preferences from clickstream data,” Electronic Commerce Research and Applications, Volume 37, Article 100877, 2019.](https://www.sciencedirect.com/science/article/abs/pii/S1567422319300547)
166
+
167
+
168
+ ## Citing
169
+
170
+ If you use `rfscorer` in academic work, please cite the following paper:
171
+
172
+ Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano,
173
+ "Estimating product-choice probabilities from recency and frequency of page views,"
174
+ *Knowledge-Based Systems*, Volume 99, 2016, Pages 157–167.
175
+
176
+ ```bibtex
177
+ @article{Iwanaga2016,
178
+ author = {Jiro Iwanaga and Naoki Nishimura and Noriyoshi Sukegawa and Yuichi Takano},
179
+ title = {Estimating product-choice probabilities from recency and frequency of page views},
180
+ journal = {Knowledge-Based Systems},
181
+ volume = {99},
182
+ pages = {157--167},
183
+ year = {2016},
184
+ url = {https://www.sciencedirect.com/science/article/abs/pii/S0950705116000848}
185
+ }
186
+ ```
187
+
188
+ If you additionally use the probability matrix as input to a collaborative filtering model, please also cite:
189
+
190
+ Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano,
191
+ "Improving collaborative filtering recommendations by estimating user preferences from clickstream data,"
192
+ *Electronic Commerce Research and Applications*, Volume 37, Article 100877, 2019.
193
+
194
+ ```bibtex
195
+ @article{Iwanaga2019,
196
+ author = {Jiro Iwanaga and Naoki Nishimura and Noriyoshi Sukegawa and Yuichi Takano},
197
+ title = {Improving collaborative filtering recommendations by estimating user preferences from clickstream data},
198
+ journal = {Electronic Commerce Research and Applications},
199
+ volume = {37},
200
+ pages = {100877},
201
+ year = {2019},
202
+ url = {https://www.sciencedirect.com/science/article/abs/pii/S1567422319300547}
203
+ }
204
+ ```
205
+
206
+ ## License
207
+
208
+ MIT License
@@ -0,0 +1,162 @@
1
+ # rfscorer
2
+
3
+ [![CI](https://github.com/jiro-iwanaga/rfscorer/actions/workflows/ci.yml/badge.svg)](https://github.com/jiro-iwanaga/rfscorer/actions/workflows/ci.yml)
4
+ [![PyPI version](https://badge.fury.io/py/rfscorer.svg)](https://badge.fury.io/py/rfscorer)
5
+ [![Python Versions](https://img.shields.io/pypi/pyversions/rfscorer.svg)](https://pypi.org/project/rfscorer/)
6
+
7
+ `rfscorer` is a Python package for Recency-Frequency based recommendation scoring.
8
+
9
+ It estimates **revisit probabilities** — the preference score for each user-item pair, forming a matrix analogous to a rating matrix — from interaction histories, using two simple but powerful behavioral signals: **recency**, which captures how recently a user interacted with an item, and **frequency**, which captures how often the user has interacted with it.
10
+
11
+ The package is designed for product recommendation and revisit modeling, especially in settings where interpretable scoring based on interaction history is preferred over black-box recommendation models.
12
+
13
+ > Note: In this package, **RF** stands for **Recency-Frequency**, not Random Forest.
14
+
15
+ ## Features
16
+
17
+ - **scikit-learn-style API** — familiar `fit()` / `transform()` interface makes it easy to integrate into existing data science workflows
18
+ - **Minimal data requirements** — works with any interaction log that has three columns: `user`, `item`, and `datetime`; no ratings or explicit feedback needed
19
+ - **Explainable scoring** — probabilities are derived through mathematical optimization under RF monotonicity constraints, making every score fully traceable and auditable; 3D surface visualization further supports intuitive understanding
20
+ - **Probabilistic output** — revisit probabilities serve as preference scores, enabling expected value calculations and probabilistic ranking of recommendations
21
+ - **Extensible** — the user–item probability matrix produced by `transform()` can be directly used as input to collaborative filtering or other downstream recommendation models
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ pip install rfscorer
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from rfscorer import RecencyFrequencyScorer
33
+ ```
34
+
35
+ Prepare an interaction log with at least three columns: user ID, item ID, and timestamp.
36
+ Split it into a training set and a test set.
37
+
38
+ ```python
39
+ df_train = ... # training interaction log (columns: user, item, datetime)
40
+ df_test = ... # test interaction log (columns: user, item, datetime)
41
+ ```
42
+
43
+ | user | item | datetime |
44
+ |-------|-------|------------|
45
+ | u_001 | i_032 | 2026-07-01 |
46
+ | u_001 | i_017 | 2026-07-03 |
47
+ | u_001 | i_032 | 2026-07-05 |
48
+ | u_002 | i_011 | 2026-07-02 |
49
+ | u_002 | i_058 | 2026-07-04 |
50
+
51
+ The same user-item pair may appear multiple times, representing repeat visits.
52
+
53
+ Instantiate the scorer, specifying the column names if they differ from the defaults (`user`, `item`, `datetime`).
54
+
55
+ ```python
56
+ scorer = RecencyFrequencyScorer()
57
+ ```
58
+
59
+ Call `fit()` to estimate empirical revisit probabilities from the training log.
60
+ Specify the observation period (from which recency and frequency are computed) and the evaluation period (which provides the ground-truth revisit labels).
61
+
62
+ ```python
63
+ scorer.fit(
64
+ df_train,
65
+ observation_period=("2026-07-01", "2026-07-07"),
66
+ evaluation_period=("2026-07-08", "2026-07-08"),
67
+ )
68
+ ```
69
+
70
+ The empirical surface reflects raw revisit rates and may be irregular due to sparse data.
71
+
72
+ ![empirical probability surface](img/empirical_probability_surface.png)
73
+
74
+ Optionally, call `optimize()` to smooth the surface under RF monotonicity constraints using convex quadratic programming.
75
+ `kind="mono"` enforces recency and frequency monotonicity.
76
+
77
+ ```python
78
+ scorer.optimize(kind="mono")
79
+ ```
80
+
81
+ ![mono probability surface](img/mono_probability_surface.png)
82
+
83
+ `kind="mcc"` additionally adds convexity in recency and concavity in frequency, yielding a smoother surface.
84
+
85
+ ```python
86
+ scorer.optimize(kind="mcc")
87
+ ```
88
+
89
+ ![mcc probability surface](img/mcc_probability_surface.png)
90
+
91
+ Call `transform()` to score each user-item pair in the test log.
92
+ It returns a DataFrame with columns `user`, `item`, `recency`, `frequency`, `probability`, and `order` (rank within each user, sorted by probability descending).
93
+ Pass `kind="empirical"`, `kind="mono"`, or `kind="mcc"` to select which probabilities to use.
94
+
95
+ ```python
96
+ df_rec_mcc = scorer.transform(df_test, target_date="2026-07-07", kind="mcc")
97
+ ```
98
+
99
+ | user | item | recency | frequency | probability | order |
100
+ |--------|--------|--------:|----------:|------------:|------:|
101
+ | u_001 | i_032 | 1 | 4 | 0.1167 | 1 |
102
+ | u_001 | i_017 | 2 | 3 | 0.0789 | 2 |
103
+ | u_001 | i_045 | 3 | 1 | 0.0248 | 3 |
104
+ | u_002 | i_011 | 1 | 2 | 0.0621 | 1 |
105
+ | u_002 | i_058 | 4 | 1 | 0.0182 | 2 |
106
+
107
+ Within each user, rows are sorted by `probability` descending; `order` represents the recommendation rank.
108
+
109
+ ## Examples
110
+
111
+ - [examples/basic_usage.ipynb](examples/basic_usage.ipynb) — end-to-end walkthrough: load data, fit, optimize, transform, and evaluate
112
+
113
+ ## References
114
+ - [Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano, “Estimating product-choice probabilities from recency and frequency of page views,” Knowledge-Based Systems, Volume 99, 2016, Pages 157–167.](https://www.sciencedirect.com/science/article/abs/pii/S0950705116000848)
115
+
116
+ - [Jiro Iwanaga, Kyota Ishihara, Naoki Nishimura, and Ikki Tanaka, *Pythonではじめる数理最適化 ―ケーススタディでモデリングのスキルを身につけよう―*(in Japanese), Ohmsha, 2021.](https://www.ohmsha.co.jp/book/9784274231759/)
117
+ - [Chapter 7: 商品推薦のための興味のスコアリング(in Japanese)](https://github.com/ohmsha/PyOptBook/tree/main/7.recommendation)
118
+
119
+ - [Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano, “Improving collaborative filtering recommendations by estimating user preferences from clickstream data,” Electronic Commerce Research and Applications, Volume 37, Article 100877, 2019.](https://www.sciencedirect.com/science/article/abs/pii/S1567422319300547)
120
+
121
+
122
+ ## Citing
123
+
124
+ If you use `rfscorer` in academic work, please cite the following paper:
125
+
126
+ Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano,
127
+ "Estimating product-choice probabilities from recency and frequency of page views,"
128
+ *Knowledge-Based Systems*, Volume 99, 2016, Pages 157–167.
129
+
130
+ ```bibtex
131
+ @article{Iwanaga2016,
132
+ author = {Jiro Iwanaga and Naoki Nishimura and Noriyoshi Sukegawa and Yuichi Takano},
133
+ title = {Estimating product-choice probabilities from recency and frequency of page views},
134
+ journal = {Knowledge-Based Systems},
135
+ volume = {99},
136
+ pages = {157--167},
137
+ year = {2016},
138
+ url = {https://www.sciencedirect.com/science/article/abs/pii/S0950705116000848}
139
+ }
140
+ ```
141
+
142
+ If you additionally use the probability matrix as input to a collaborative filtering model, please also cite:
143
+
144
+ Jiro Iwanaga, Naoki Nishimura, Noriyoshi Sukegawa, and Yuichi Takano,
145
+ "Improving collaborative filtering recommendations by estimating user preferences from clickstream data,"
146
+ *Electronic Commerce Research and Applications*, Volume 37, Article 100877, 2019.
147
+
148
+ ```bibtex
149
+ @article{Iwanaga2019,
150
+ author = {Jiro Iwanaga and Naoki Nishimura and Noriyoshi Sukegawa and Yuichi Takano},
151
+ title = {Improving collaborative filtering recommendations by estimating user preferences from clickstream data},
152
+ journal = {Electronic Commerce Research and Applications},
153
+ volume = {37},
154
+ pages = {100877},
155
+ year = {2019},
156
+ url = {https://www.sciencedirect.com/science/article/abs/pii/S1567422319300547}
157
+ }
158
+ ```
159
+
160
+ ## License
161
+
162
+ MIT License