cosinefusion 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cosinefusion-0.1.0/LICENSE +21 -0
- cosinefusion-0.1.0/PKG-INFO +106 -0
- cosinefusion-0.1.0/README.md +92 -0
- cosinefusion-0.1.0/pyproject.toml +22 -0
- cosinefusion-0.1.0/setup.cfg +4 -0
- cosinefusion-0.1.0/setup.py +15 -0
- cosinefusion-0.1.0/src/cosinefusion/__init__.py +20 -0
- cosinefusion-0.1.0/src/cosinefusion/core_demo.py +53 -0
- cosinefusion-0.1.0/src/cosinefusion.egg-info/PKG-INFO +106 -0
- cosinefusion-0.1.0/src/cosinefusion.egg-info/SOURCES.txt +13 -0
- cosinefusion-0.1.0/src/cosinefusion.egg-info/dependency_links.txt +1 -0
- cosinefusion-0.1.0/src/cosinefusion.egg-info/requires.txt +1 -0
- cosinefusion-0.1.0/src/cosinefusion.egg-info/top_level.txt +2 -0
- cosinefusion-0.1.0/src/cpp/core_init.cpp +76 -0
- cosinefusion-0.1.0/tests/test_bridge.py +65 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Sam Chaudry
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cosinefusion
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A high-performance C++ accelerated vector score fusion engine.
|
|
5
|
+
Classifier: Programming Language :: Python :: 3
|
|
6
|
+
Classifier: Programming Language :: C++
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: numpy>=1.24.0
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
# Cosine Fusion
|
|
16
|
+
|
|
17
|
+
[](https://github.com/CookieMonsteriOS/CosineFusion)
|
|
18
|
+
[](https://github.com/CookieMonsteriOS/CosineFusion/blob/main/LICENSE)
|
|
19
|
+
[](https://www.python.org/)
|
|
20
|
+
[]()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
## Overview
|
|
24
|
+
Cosine Fusion is a lightweight C++/Python cosine similarity engine using **pybind11**. It provides fast, vectorized similarity calculations between user and item feature matrices.
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
- C++ backend for high performance
|
|
28
|
+
- Python interface via pybind11
|
|
29
|
+
- Easy integration into Python projects
|
|
30
|
+
- Simple example with user-item preference vectors
|
|
31
|
+
|
|
32
|
+
## Use Cases / Applications
|
|
33
|
+
- Recommender systems for e-commerce or media content
|
|
34
|
+
- Personalization engines based on user preferences
|
|
35
|
+
- Fast similarity search for AI/ML feature matching
|
|
36
|
+
- Any project requiring high-performance cosine similarity computation
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
Clone the repository and install the package:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
git clone https://github.com/CookieMonsteriOS/CosineFusion.git
|
|
43
|
+
cd CosineFusion
|
|
44
|
+
pip install pybind11
|
|
45
|
+
pip install .
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Usage Example
|
|
49
|
+
```python
|
|
50
|
+
import numpy as np
|
|
51
|
+
import core_init
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Example item features
|
|
55
|
+
items = np.array([
|
|
56
|
+
[0, 1, 0, 0, 1], # Tea
|
|
57
|
+
[0, 1, 0, 0, 1], # Coffee
|
|
58
|
+
[1, 0, 1, 1, 0], # Jaffa Cake
|
|
59
|
+
[1, 0, 1, 0, 0], # Biscuit
|
|
60
|
+
[1, 0, 1, 1, 1], # Chocolate Bar
|
|
61
|
+
[0, 1, 0, 0, 1], # Espresso
|
|
62
|
+
])
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
item_names = ["Tea", "Coffee", "Jaffa Cake", "Biscuit", "Chocolate Bar", "Espresso"]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
user = np.array([[0, 0, 0, 1, 0]]) # User likes sweet + chocolate
|
|
69
|
+
res = core_init.cosine_similarity(user, items)
|
|
70
|
+
sim = res["similarity_matrix"]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
top_indices = np.argsort(sim[0])[::-1]
|
|
74
|
+
print("Top recommendations for user:")
|
|
75
|
+
for i in top_indices:
|
|
76
|
+
print(f"{item_names[i]}: {sim[0][i]:.2f}") # Sample output showing relationships:
|
|
77
|
+
|
|
78
|
+
# Top recommendations for user:
|
|
79
|
+
# Chocolate Bar: 0.89
|
|
80
|
+
# Jaffa Cake: 0.75 <- shows some similarity to Tea in sweetness
|
|
81
|
+
# Biscuit: 0.65
|
|
82
|
+
# Tea: 0.45
|
|
83
|
+
# Coffee: 0.45
|
|
84
|
+
# Espresso: 0.43
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Project Structure
|
|
88
|
+
```
|
|
89
|
+
CosineFusion/
|
|
90
|
+
├── src/
|
|
91
|
+
│ ├── cpp/core_init.cpp
|
|
92
|
+
│ └── python/core_demo.py
|
|
93
|
+
├── tests/test_bridge.py
|
|
94
|
+
├── setup.py
|
|
95
|
+
├── pyproject.toml
|
|
96
|
+
├── README.md
|
|
97
|
+
├── LICENSE
|
|
98
|
+
└── requirements.txt
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## License
|
|
102
|
+
This project is licensed under the MIT License — see the [LICENSE](LICENSE) file for details.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
**Author:** Sam Chaudry
|
|
106
|
+
**GitHub:** [CookieMonsteriOS](https://github.com/CookieMonsteriOS)
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# Cosine Fusion
|
|
2
|
+
|
|
3
|
+
[](https://github.com/CookieMonsteriOS/CosineFusion)
|
|
4
|
+
[](https://github.com/CookieMonsteriOS/CosineFusion/blob/main/LICENSE)
|
|
5
|
+
[](https://www.python.org/)
|
|
6
|
+
[]()
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
## Overview
|
|
10
|
+
Cosine Fusion is a lightweight C++/Python cosine similarity engine using **pybind11**. It provides fast, vectorized similarity calculations between user and item feature matrices.
|
|
11
|
+
|
|
12
|
+
## Features
|
|
13
|
+
- C++ backend for high performance
|
|
14
|
+
- Python interface via pybind11
|
|
15
|
+
- Easy integration into Python projects
|
|
16
|
+
- Simple example with user-item preference vectors
|
|
17
|
+
|
|
18
|
+
## Use Cases / Applications
|
|
19
|
+
- Recommender systems for e-commerce or media content
|
|
20
|
+
- Personalization engines based on user preferences
|
|
21
|
+
- Fast similarity search for AI/ML feature matching
|
|
22
|
+
- Any project requiring high-performance cosine similarity computation
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
Clone the repository and install the package:
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
git clone https://github.com/CookieMonsteriOS/CosineFusion.git
|
|
29
|
+
cd CosineFusion
|
|
30
|
+
pip install pybind11
|
|
31
|
+
pip install .
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Usage Example
|
|
35
|
+
```python
|
|
36
|
+
import numpy as np
|
|
37
|
+
import core_init
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Example item features
|
|
41
|
+
items = np.array([
|
|
42
|
+
[0, 1, 0, 0, 1], # Tea
|
|
43
|
+
[0, 1, 0, 0, 1], # Coffee
|
|
44
|
+
[1, 0, 1, 1, 0], # Jaffa Cake
|
|
45
|
+
[1, 0, 1, 0, 0], # Biscuit
|
|
46
|
+
[1, 0, 1, 1, 1], # Chocolate Bar
|
|
47
|
+
[0, 1, 0, 0, 1], # Espresso
|
|
48
|
+
])
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
item_names = ["Tea", "Coffee", "Jaffa Cake", "Biscuit", "Chocolate Bar", "Espresso"]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
user = np.array([[0, 0, 0, 1, 0]]) # User likes sweet + chocolate
|
|
55
|
+
res = core_init.cosine_similarity(user, items)
|
|
56
|
+
sim = res["similarity_matrix"]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
top_indices = np.argsort(sim[0])[::-1]
|
|
60
|
+
print("Top recommendations for user:")
|
|
61
|
+
for i in top_indices:
|
|
62
|
+
print(f"{item_names[i]}: {sim[0][i]:.2f}") # Sample output showing relationships:
|
|
63
|
+
|
|
64
|
+
# Top recommendations for user:
|
|
65
|
+
# Chocolate Bar: 0.89
|
|
66
|
+
# Jaffa Cake: 0.75 <- shows some similarity to Tea in sweetness
|
|
67
|
+
# Biscuit: 0.65
|
|
68
|
+
# Tea: 0.45
|
|
69
|
+
# Coffee: 0.45
|
|
70
|
+
# Espresso: 0.43
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Project Structure
|
|
74
|
+
```
|
|
75
|
+
CosineFusion/
|
|
76
|
+
├── src/
|
|
77
|
+
│ ├── cpp/core_init.cpp
|
|
78
|
+
│ └── python/core_demo.py
|
|
79
|
+
├── tests/test_bridge.py
|
|
80
|
+
├── setup.py
|
|
81
|
+
├── pyproject.toml
|
|
82
|
+
├── README.md
|
|
83
|
+
├── LICENSE
|
|
84
|
+
└── requirements.txt
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## License
|
|
88
|
+
This project is licensed under the MIT License — see the [LICENSE](LICENSE) file for details.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
**Author:** Sam Chaudry
|
|
92
|
+
**GitHub:** [CookieMonsteriOS](https://github.com/CookieMonsteriOS)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cosinefusion"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A high-performance C++ accelerated vector score fusion engine."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Programming Language :: Python :: 3",
|
|
13
|
+
"Programming Language :: C++",
|
|
14
|
+
"License :: OSI Approved :: MIT License",
|
|
15
|
+
"Operating System :: OS Independent",
|
|
16
|
+
]
|
|
17
|
+
dependencies = [
|
|
18
|
+
"numpy>=1.24.0"
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
[tool.setuptools.packages.find]
|
|
22
|
+
where = ["src"]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from setuptools import setup, Extension
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
# Build the extension with the package namespace `cosinefusion.core_backend`.
|
|
5
|
+
# The compiled binary will be placed in the package namespace so imports like
|
|
6
|
+
# `from cosinefusion import core_backend` resolve directly to the extension.
|
|
7
|
+
module = Extension(
|
|
8
|
+
'cosinefusion.core_backend',
|
|
9
|
+
sources=['src/cpp/core_init.cpp'],
|
|
10
|
+
extra_compile_args=['-O3', '-std=c++17']
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
setup(
|
|
14
|
+
ext_modules=[module]
|
|
15
|
+
)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""cosinefusion package
|
|
2
|
+
|
|
3
|
+
This package exposes the Python layer that interfaces with the compiled
|
|
4
|
+
binary extension module `cosinefusion.core_backend`.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from cosinefusion import recommend
|
|
8
|
+
|
|
9
|
+
The compiled extension is available as `cosinefusion.core_backend` and provides
|
|
10
|
+
low-level routines such as `cosine_similarity(A, B)` which return a dict with
|
|
11
|
+
keys `"similarity_matrix"` and optional `"meta"` information.
|
|
12
|
+
|
|
13
|
+
The Python layer wraps and exposes higher-level helpers implemented in
|
|
14
|
+
`core_demo.py`.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .core_demo import recommend, main # re-export high-level helpers
|
|
18
|
+
from . import core_backend # compiled extension module
|
|
19
|
+
|
|
20
|
+
__all__ = ["recommend", "main", "core_backend"]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from cosinefusion import core_backend as core_init
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def recommend(user, items, item_names=None, top_k=None):
|
|
6
|
+
"""Compute similarity and return ranking info.
|
|
7
|
+
|
|
8
|
+
Parameters
|
|
9
|
+
- user: numpy array of shape (1, D)
|
|
10
|
+
- items: numpy array of shape (N, D)
|
|
11
|
+
- item_names: optional list of names for items
|
|
12
|
+
- top_k: optional int to limit returned results
|
|
13
|
+
|
|
14
|
+
Returns a dict containing:
|
|
15
|
+
- "similarity_matrix": numpy array of shape (1, N)
|
|
16
|
+
- "top_indices": numpy array of ranked indices (desc)
|
|
17
|
+
- "meta": optional metadata from the backend
|
|
18
|
+
"""
|
|
19
|
+
res = core_init.cosine_similarity(user, items)
|
|
20
|
+
sim = res["similarity_matrix"]
|
|
21
|
+
top_indices = np.argsort(sim[0])[::-1]
|
|
22
|
+
if top_k is not None:
|
|
23
|
+
top_indices = top_indices[:top_k]
|
|
24
|
+
return {"similarity_matrix": sim, "top_indices": top_indices, "meta": res.get("meta", {})}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def main():
|
|
28
|
+
items = np.array([
|
|
29
|
+
[0, 1, 0, 0, 1], # Tea
|
|
30
|
+
[0, 1, 0, 0, 1], # Coffee
|
|
31
|
+
[1, 0, 1, 1, 0], # Jaffa Cake
|
|
32
|
+
[1, 0, 1, 0, 0], # Biscuit
|
|
33
|
+
[1, 0, 1, 1, 1], # Chocolate Bar
|
|
34
|
+
[0, 1, 0, 0, 1], # Espresso
|
|
35
|
+
])
|
|
36
|
+
|
|
37
|
+
item_names = ["Tea", "Coffee", "Jaffa Cake",
|
|
38
|
+
"Biscuit", "Chocolate Bar", "Espresso"]
|
|
39
|
+
|
|
40
|
+
user = np.array([[0, 0, 0, 1, 0]])
|
|
41
|
+
|
|
42
|
+
out = recommend(user, items, item_names)
|
|
43
|
+
sim = out["similarity_matrix"]
|
|
44
|
+
top_indices = out["top_indices"]
|
|
45
|
+
|
|
46
|
+
print("Top recommendations for user:")
|
|
47
|
+
for i in top_indices:
|
|
48
|
+
name = item_names[i] if item_names is not None else str(i)
|
|
49
|
+
print(f"{name}: {sim[0][i]:.2f}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__":
|
|
53
|
+
main()
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cosinefusion
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A high-performance C++ accelerated vector score fusion engine.
|
|
5
|
+
Classifier: Programming Language :: Python :: 3
|
|
6
|
+
Classifier: Programming Language :: C++
|
|
7
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: numpy>=1.24.0
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
# Cosine Fusion
|
|
16
|
+
|
|
17
|
+
[](https://github.com/CookieMonsteriOS/CosineFusion)
|
|
18
|
+
[](https://github.com/CookieMonsteriOS/CosineFusion/blob/main/LICENSE)
|
|
19
|
+
[](https://www.python.org/)
|
|
20
|
+
[]()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
## Overview
|
|
24
|
+
Cosine Fusion is a lightweight C++/Python cosine similarity engine using **pybind11**. It provides fast, vectorized similarity calculations between user and item feature matrices.
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
- C++ backend for high performance
|
|
28
|
+
- Python interface via pybind11
|
|
29
|
+
- Easy integration into Python projects
|
|
30
|
+
- Simple example with user-item preference vectors
|
|
31
|
+
|
|
32
|
+
## Use Cases / Applications
|
|
33
|
+
- Recommender systems for e-commerce or media content
|
|
34
|
+
- Personalization engines based on user preferences
|
|
35
|
+
- Fast similarity search for AI/ML feature matching
|
|
36
|
+
- Any project requiring high-performance cosine similarity computation
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
Clone the repository and install the package:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
git clone https://github.com/CookieMonsteriOS/CosineFusion.git
|
|
43
|
+
cd CosineFusion
|
|
44
|
+
pip install pybind11
|
|
45
|
+
pip install .
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
## Usage Example
|
|
49
|
+
```python
|
|
50
|
+
import numpy as np
|
|
51
|
+
import core_init
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Example item features
|
|
55
|
+
items = np.array([
|
|
56
|
+
[0, 1, 0, 0, 1], # Tea
|
|
57
|
+
[0, 1, 0, 0, 1], # Coffee
|
|
58
|
+
[1, 0, 1, 1, 0], # Jaffa Cake
|
|
59
|
+
[1, 0, 1, 0, 0], # Biscuit
|
|
60
|
+
[1, 0, 1, 1, 1], # Chocolate Bar
|
|
61
|
+
[0, 1, 0, 0, 1], # Espresso
|
|
62
|
+
])
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
item_names = ["Tea", "Coffee", "Jaffa Cake", "Biscuit", "Chocolate Bar", "Espresso"]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
user = np.array([[0, 0, 0, 1, 0]]) # User likes sweet + chocolate
|
|
69
|
+
res = core_init.cosine_similarity(user, items)
|
|
70
|
+
sim = res["similarity_matrix"]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
top_indices = np.argsort(sim[0])[::-1]
|
|
74
|
+
print("Top recommendations for user:")
|
|
75
|
+
for i in top_indices:
|
|
76
|
+
print(f"{item_names[i]}: {sim[0][i]:.2f}") # Sample output showing relationships:
|
|
77
|
+
|
|
78
|
+
# Top recommendations for user:
|
|
79
|
+
# Chocolate Bar: 0.89
|
|
80
|
+
# Jaffa Cake: 0.75 <- shows some similarity to Tea in sweetness
|
|
81
|
+
# Biscuit: 0.65
|
|
82
|
+
# Tea: 0.45
|
|
83
|
+
# Coffee: 0.45
|
|
84
|
+
# Espresso: 0.43
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Project Structure
|
|
88
|
+
```
|
|
89
|
+
CosineFusion/
|
|
90
|
+
├── src/
|
|
91
|
+
│ ├── cpp/core_init.cpp
|
|
92
|
+
│ └── python/core_demo.py
|
|
93
|
+
├── tests/test_bridge.py
|
|
94
|
+
├── setup.py
|
|
95
|
+
├── pyproject.toml
|
|
96
|
+
├── README.md
|
|
97
|
+
├── LICENSE
|
|
98
|
+
└── requirements.txt
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## License
|
|
102
|
+
This project is licensed under the MIT License — see the [LICENSE](LICENSE) file for details.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
**Author:** Sam Chaudry
|
|
106
|
+
**GitHub:** [CookieMonsteriOS](https://github.com/CookieMonsteriOS)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
src/cosinefusion/__init__.py
|
|
6
|
+
src/cosinefusion/core_demo.py
|
|
7
|
+
src/cosinefusion.egg-info/PKG-INFO
|
|
8
|
+
src/cosinefusion.egg-info/SOURCES.txt
|
|
9
|
+
src/cosinefusion.egg-info/dependency_links.txt
|
|
10
|
+
src/cosinefusion.egg-info/requires.txt
|
|
11
|
+
src/cosinefusion.egg-info/top_level.txt
|
|
12
|
+
src/cpp/core_init.cpp
|
|
13
|
+
tests/test_bridge.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
numpy>=1.24.0
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
#include <pybind11/pybind11.h>
|
|
2
|
+
#include <iostream>
|
|
3
|
+
#include <pybind11/numpy.h>
|
|
4
|
+
#include <cmath>
|
|
5
|
+
#include <stdexcept>
|
|
6
|
+
|
|
7
|
+
namespace py = pybind11;
|
|
8
|
+
using namespace pybind11::literals;
|
|
9
|
+
|
|
10
|
+
py::dict cosine_similarity(py::array_t<double> A, py::array_t<double> B) {
|
|
11
|
+
|
|
12
|
+
auto a = A.unchecked<2>();
|
|
13
|
+
auto b = B.unchecked<2>();
|
|
14
|
+
|
|
15
|
+
if (a.shape(1) != b.shape(1))
|
|
16
|
+
throw std::runtime_error("Matrix mismatch");
|
|
17
|
+
|
|
18
|
+
ssize_t n_a = a.shape(0); // Count users/rows in A -> eg. Customer
|
|
19
|
+
ssize_t n_b = b.shape(0); // How many items in Row B -> eg. Items
|
|
20
|
+
ssize_t dim = a.shape(1); // Count Number of columns (e.g. 128 features) -> eg. Features data points in each row
|
|
21
|
+
|
|
22
|
+
//Output matrix
|
|
23
|
+
py::array_t<double> result({n_a, n_b});
|
|
24
|
+
auto r = result.mutable_unchecked<2>();
|
|
25
|
+
|
|
26
|
+
// Store user lengths in norm_a and norm_b
|
|
27
|
+
std::vector<double> norm_a(n_a, 0.0), norm_b(n_b, 0.0);
|
|
28
|
+
|
|
29
|
+
// compute norms for rows of A
|
|
30
|
+
for (ssize_t i = 0; i < n_a; ++i) {
|
|
31
|
+
double s = 0.0;
|
|
32
|
+
for (ssize_t d = 0; d < dim; ++d) {
|
|
33
|
+
double v = a(i, d);
|
|
34
|
+
s += v * v;
|
|
35
|
+
}
|
|
36
|
+
norm_a[i] = std::sqrt(s);
|
|
37
|
+
}
|
|
38
|
+
// compute norms for rows of B
|
|
39
|
+
for (ssize_t j = 0; j < n_b; ++j) {
|
|
40
|
+
double s = 0.0;
|
|
41
|
+
for (ssize_t d = 0; d < dim; ++d) {
|
|
42
|
+
double v = b(j, d);
|
|
43
|
+
s += v * v;
|
|
44
|
+
}
|
|
45
|
+
norm_b[j] = std::sqrt(s);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// compute cosine similarity for each pair
|
|
49
|
+
for (ssize_t i = 0; i < n_a; ++i) {
|
|
50
|
+
for (ssize_t j = 0; j < n_b; ++j) {
|
|
51
|
+
double dot = 0.0;
|
|
52
|
+
for (ssize_t d = 0; d < dim; ++d)
|
|
53
|
+
dot += a(i, d) * b(j, d);
|
|
54
|
+
double denom = norm_a[i] * norm_b[j];
|
|
55
|
+
r(i, j) = (denom == 0.0) ? 0.0 : dot / denom;
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
py::dict output;
|
|
60
|
+
output["similarity_matrix"] = result;
|
|
61
|
+
output["meta"] = py::dict(
|
|
62
|
+
"n_a"_a = n_a,
|
|
63
|
+
"n_b"_a = n_b,
|
|
64
|
+
"dim"_a = dim,
|
|
65
|
+
"metric"_a = "cosine_similarity"
|
|
66
|
+
);
|
|
67
|
+
|
|
68
|
+
return output;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
PYBIND11_MODULE(core_backend, m) {
|
|
73
|
+
m.doc() = "C++ cosine similarity exposed to Python via pybind11 as core_backend";
|
|
74
|
+
m.def("cosine_similarity", &cosine_similarity, "Compute cosine similarity",
|
|
75
|
+
py::arg("A"), py::arg("B"));
|
|
76
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from cosinefusion import core_backend as core_init
|
|
2
|
+
import numpy as np
|
|
3
|
+
import sys
|
|
4
|
+
import os
|
|
5
|
+
# Ensure local src/ is on path so tests import the package in-place
|
|
6
|
+
sys.path.insert(0, os.path.abspath(
|
|
7
|
+
os.path.join(os.path.dirname(__file__), '../src')))
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_cosine_similarity_shape():
|
|
11
|
+
"""Ensure the returned similarity matrix has the correct shape."""
|
|
12
|
+
A = np.random.rand(3, 4)
|
|
13
|
+
B = np.random.rand(5, 4)
|
|
14
|
+
res = core_init.cosine_similarity(A, B)
|
|
15
|
+
sim = res["similarity_matrix"]
|
|
16
|
+
meta = res["meta"]
|
|
17
|
+
|
|
18
|
+
# Assert matrix shape
|
|
19
|
+
assert sim.shape == (3, 5)
|
|
20
|
+
# Assert metadata matches
|
|
21
|
+
assert meta["n_a"] == 3
|
|
22
|
+
assert meta["n_b"] == 5
|
|
23
|
+
assert meta["dim"] == 4
|
|
24
|
+
assert meta["metric"] == "cosine_similarity"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def test_cosine_similarity_values():
|
|
28
|
+
"""Check that identical vectors return a similarity of 1."""
|
|
29
|
+
A = np.array([[1, 2, 3]])
|
|
30
|
+
B = np.array([[1, 2, 3], [3, 2, 1]])
|
|
31
|
+
res = core_init.cosine_similarity(A, B)
|
|
32
|
+
sim = res["similarity_matrix"]
|
|
33
|
+
|
|
34
|
+
# The first comparison (same vectors) should be 1.0
|
|
35
|
+
np.testing.assert_almost_equal(sim[0, 0], 1.0, decimal=6)
|
|
36
|
+
# The second should be less than 1.0
|
|
37
|
+
assert sim[0, 1] < 1.0
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_user_item_recommendations():
|
|
41
|
+
"""Validate that top-ranked items match expected feature preferences."""
|
|
42
|
+
items = np.array([
|
|
43
|
+
[0, 1, 0, 0, 1], # Tea
|
|
44
|
+
[0, 1, 0, 0, 1], # Coffee
|
|
45
|
+
[1, 0, 1, 1, 0], # Jaffa Cake
|
|
46
|
+
[1, 0, 1, 0, 0], # Biscuit
|
|
47
|
+
[1, 0, 1, 1, 1], # Chocolate Bar
|
|
48
|
+
[0, 1, 0, 0, 1], # Espresso
|
|
49
|
+
])
|
|
50
|
+
item_names = ["Tea", "Coffee", "Jaffa Cake",
|
|
51
|
+
"Biscuit", "Chocolate Bar", "Espresso"]
|
|
52
|
+
|
|
53
|
+
# User likes sweet + chocolate
|
|
54
|
+
user = np.array([[0, 0, 0, 1, 0]])
|
|
55
|
+
|
|
56
|
+
res = core_init.cosine_similarity(user, items)
|
|
57
|
+
sim = res["similarity_matrix"]
|
|
58
|
+
|
|
59
|
+
top_indices = np.argsort(sim[0])[::-1]
|
|
60
|
+
top_item = item_names[top_indices[0]]
|
|
61
|
+
|
|
62
|
+
# Expect Jaffa Cake to be top match (matches cosine similarity scoring)
|
|
63
|
+
assert top_item == "Jaffa Cake"
|
|
64
|
+
# The similarity should be positive and within range
|
|
65
|
+
assert 0.0 <= sim[0][top_indices[0]] <= 1.0
|