ragit 0.1__tar.gz → 0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragit-0.3/PKG-INFO +163 -0
- {ragit-0.1 → ragit-0.3}/README.md +6 -10
- ragit-0.3/ragit.egg-info/PKG-INFO +163 -0
- {ragit-0.1 → ragit-0.3}/setup.py +7 -2
- ragit-0.1/PKG-INFO +0 -10
- ragit-0.1/ragit.egg-info/PKG-INFO +0 -10
- {ragit-0.1 → ragit-0.3}/ragit/__init__.py +0 -0
- {ragit-0.1 → ragit-0.3}/ragit/main.py +0 -0
- {ragit-0.1 → ragit-0.3}/ragit.egg-info/SOURCES.txt +0 -0
- {ragit-0.1 → ragit-0.3}/ragit.egg-info/dependency_links.txt +0 -0
- {ragit-0.1 → ragit-0.3}/ragit.egg-info/requires.txt +0 -0
- {ragit-0.1 → ragit-0.3}/ragit.egg-info/top_level.txt +0 -0
- {ragit-0.1 → ragit-0.3}/setup.cfg +0 -0
ragit-0.3/PKG-INFO
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: ragit
|
|
3
|
+
Version: 0.3
|
|
4
|
+
Description-Content-Type: text/markdown
|
|
5
|
+
Requires-Dist: sentence-transformers>=3.4.1
|
|
6
|
+
Requires-Dist: pandas>=2.2.3
|
|
7
|
+
Requires-Dist: chromadb>=0.6.3
|
|
8
|
+
Requires-Dist: setuptools>=75.8.0
|
|
9
|
+
Requires-Dist: wheel>=0.45.1
|
|
10
|
+
Requires-Dist: twine>=6.1.0
|
|
11
|
+
Dynamic: description
|
|
12
|
+
Dynamic: description-content-type
|
|
13
|
+
Dynamic: requires-dist
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Ragit
|
|
17
|
+
🚀 Smart, Fast, Scalable Search 🚀
|
|
18
|
+
|
|
19
|
+
Github Repo : [stsfaroz](https://github.com/stsfaroz/ragit)
|
|
20
|
+
|
|
21
|
+
**ragit** is a lightweight Python library that simplifies the management of vector databases using [ChromaDB](https://github.com/chroma-core/chroma). With ragit, you can easily create, update, query, and manage your vector database—all from CSV files containing text data.
|
|
22
|
+
|
|
23
|
+
## Features
|
|
24
|
+
|
|
25
|
+
- **Create a Vector Database:** Build your database from a CSV file with two required columns: `id` and `text`.
|
|
26
|
+
- **Add New Entries:** Insert additional entries from CSV files or add them individually.
|
|
27
|
+
- **Similarity Search:** Find nearby texts using various distance metrics (e.g., cosine, L2) with similarity scores.
|
|
28
|
+
- **Data Retrieval:** Fetch entries by IDs or exact text matches.
|
|
29
|
+
- **Deletion:** Remove single entries or entire collections when needed.
|
|
30
|
+
|
|
31
|
+
## CSV File Format
|
|
32
|
+
ragit expects your CSV file to have exactly two columns: `id` and `text`.
|
|
33
|
+
|
|
34
|
+
## Example CSV (`data.csv`):
|
|
35
|
+
|
|
36
|
+
```csv
|
|
37
|
+
id,text
|
|
38
|
+
1,The quick brown fox jumps over the lazy dog.
|
|
39
|
+
2,Another sample entry for testing.
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Usage
|
|
43
|
+
Below are some examples that demonstrate how to use `ragit`. The examples cover creating a database, adding entries, performing similarity searches, and more.
|
|
44
|
+
|
|
45
|
+
### 1. Importing and Initializing
|
|
46
|
+
First, import the `VectorDBManager` class from `ragit` and initialize it:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from ragit import VectorDBManager
|
|
50
|
+
|
|
51
|
+
# Initialize the vector database manager with a custom persistence directory and model
|
|
52
|
+
db_manager = VectorDBManager(
|
|
53
|
+
persist_directory="./my_vector_db", # Optional # default : "./vector_db"
|
|
54
|
+
provider="sentence_transformer", # Optional # default : "sentence_transformer"
|
|
55
|
+
model_name="all-mpnet-base-v2" # Optional # default : "all-mpnet-base-v2"
|
|
56
|
+
)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### 2. Creating a Database
|
|
60
|
+
Create a new collection (named `my_collection`) using your CSV file. In this example, the `distance_metric` is set to "cosine"(available options: l2, cosine, ip, l1) :
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
db_manager.create_database(
|
|
64
|
+
csv_path="data.csv",
|
|
65
|
+
collection_name="my_collection",
|
|
66
|
+
distance_metric="cosine" # Optional # default : l2
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### 3. Adding a Single Entry
|
|
71
|
+
Add an individual entry to the collection:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
db_manager.add_single_row(
|
|
75
|
+
id_="101",
|
|
76
|
+
text="This is a new test entry for the database.",
|
|
77
|
+
collection_name="my_collection"
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### 4. Adding Multiple Entries from CSV
|
|
82
|
+
You can also add multiple entries from a CSV file. This function skips any entries that already exist in the collection:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
stats = db_manager.add_values_from_csv(
|
|
86
|
+
csv_path="data.csv",
|
|
87
|
+
collection_name="my_collection"
|
|
88
|
+
)
|
|
89
|
+
print(f"Added {stats['new_entries_added']} new entries")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 5. Retrieving Collection Information
|
|
93
|
+
Fetch and display information about your collection:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
info = db_manager.get_collection_info("my_collection")
|
|
97
|
+
print(f"Collection size: {info['count']} entries")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 6. Performing a Similarity Search
|
|
101
|
+
Find texts that are similar to your query. In this example, the query text is "ai", and the search is filtered using the string "Artificial intelligence". The top 2 results are returned:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
results = db_manager.find_nearby_texts(
|
|
105
|
+
text="ai",
|
|
106
|
+
collection_name="my_collection",
|
|
107
|
+
k=2,
|
|
108
|
+
search_string="Artificial intelligence" # Optional
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
print("Results:")
|
|
112
|
+
for item in results:
|
|
113
|
+
print(f"\nID: {item['id']}")
|
|
114
|
+
print(f"Text: {item['text']}")
|
|
115
|
+
print(f"Similarity: {item['similarity']}%")
|
|
116
|
+
print(f"Distance ({item['metric']}): {item['raw_distance']}")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### 7. Deleting an Entry
|
|
120
|
+
Remove an entry from the collection by its ID:
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
db_manager.delete_entry_by_id(
|
|
124
|
+
id_="1",
|
|
125
|
+
collection_name="my_collection"
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### 8. Fetching Texts by IDs
|
|
130
|
+
Retrieve text entries for a list of IDs:
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
ids_to_fetch = ["1", "2", "3"]
|
|
134
|
+
texts = db_manager.get_by_ids(ids_to_fetch, "my_collection")
|
|
135
|
+
print("Texts:", texts)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### 9. Fetching IDs by Texts
|
|
139
|
+
For an exact text match, get the corresponding IDs:
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
texts_to_fetch = [
|
|
143
|
+
"Plato was an ancient Greek philosopher of the Classical period who is considered a foundational thinker in Western philosophy"
|
|
144
|
+
]
|
|
145
|
+
ids = db_manager.get_by_texts(texts_to_fetch, "my_collection")
|
|
146
|
+
print("IDs:", ids)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### 10. Deleting a Collection
|
|
150
|
+
Delete an entire collection. **Note:** You must pass `confirmation="yes"` to proceed with deletion.
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
db_manager.delete_collection(
|
|
154
|
+
collection_name="my_collection",
|
|
155
|
+
confirmation="yes"
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Contributing
|
|
160
|
+
Contributions are welcome! If you encounter any issues or have suggestions for improvements, please feel free to open an issue or submit a pull request on GitHub.
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
This project is licensed under the MIT License. See the `LICENSE` file for details.
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
# ragit
|
|
2
1
|
|
|
3
|
-
|
|
2
|
+
# Ragit
|
|
3
|
+
🚀 Smart, Fast, Scalable Search 🚀
|
|
4
|
+
|
|
5
|
+
Github Repo : [stsfaroz](https://github.com/stsfaroz/ragit)
|
|
6
|
+
|
|
7
|
+
**ragit** is a lightweight Python library that simplifies the management of vector databases using [ChromaDB](https://github.com/chroma-core/chroma). With ragit, you can easily create, update, query, and manage your vector database—all from CSV files containing text data.
|
|
4
8
|
|
|
5
9
|
## Features
|
|
6
10
|
|
|
@@ -10,14 +14,6 @@
|
|
|
10
14
|
- **Data Retrieval:** Fetch entries by IDs or exact text matches.
|
|
11
15
|
- **Deletion:** Remove single entries or entire collections when needed.
|
|
12
16
|
|
|
13
|
-
## Installation
|
|
14
|
-
|
|
15
|
-
Install ragit from PyPI using pip:
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
pip install ragit
|
|
19
|
-
```
|
|
20
|
-
|
|
21
17
|
## CSV File Format
|
|
22
18
|
ragit expects your CSV file to have exactly two columns: `id` and `text`.
|
|
23
19
|
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: ragit
|
|
3
|
+
Version: 0.3
|
|
4
|
+
Description-Content-Type: text/markdown
|
|
5
|
+
Requires-Dist: sentence-transformers>=3.4.1
|
|
6
|
+
Requires-Dist: pandas>=2.2.3
|
|
7
|
+
Requires-Dist: chromadb>=0.6.3
|
|
8
|
+
Requires-Dist: setuptools>=75.8.0
|
|
9
|
+
Requires-Dist: wheel>=0.45.1
|
|
10
|
+
Requires-Dist: twine>=6.1.0
|
|
11
|
+
Dynamic: description
|
|
12
|
+
Dynamic: description-content-type
|
|
13
|
+
Dynamic: requires-dist
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Ragit
|
|
17
|
+
🚀 Smart, Fast, Scalable Search 🚀
|
|
18
|
+
|
|
19
|
+
Github Repo : [stsfaroz](https://github.com/stsfaroz/ragit)
|
|
20
|
+
|
|
21
|
+
**ragit** is a lightweight Python library that simplifies the management of vector databases using [ChromaDB](https://github.com/chroma-core/chroma). With ragit, you can easily create, update, query, and manage your vector database—all from CSV files containing text data.
|
|
22
|
+
|
|
23
|
+
## Features
|
|
24
|
+
|
|
25
|
+
- **Create a Vector Database:** Build your database from a CSV file with two required columns: `id` and `text`.
|
|
26
|
+
- **Add New Entries:** Insert additional entries from CSV files or add them individually.
|
|
27
|
+
- **Similarity Search:** Find nearby texts using various distance metrics (e.g., cosine, L2) with similarity scores.
|
|
28
|
+
- **Data Retrieval:** Fetch entries by IDs or exact text matches.
|
|
29
|
+
- **Deletion:** Remove single entries or entire collections when needed.
|
|
30
|
+
|
|
31
|
+
## CSV File Format
|
|
32
|
+
ragit expects your CSV file to have exactly two columns: `id` and `text`.
|
|
33
|
+
|
|
34
|
+
## Example CSV (`data.csv`):
|
|
35
|
+
|
|
36
|
+
```csv
|
|
37
|
+
id,text
|
|
38
|
+
1,The quick brown fox jumps over the lazy dog.
|
|
39
|
+
2,Another sample entry for testing.
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Usage
|
|
43
|
+
Below are some examples that demonstrate how to use `ragit`. The examples cover creating a database, adding entries, performing similarity searches, and more.
|
|
44
|
+
|
|
45
|
+
### 1. Importing and Initializing
|
|
46
|
+
First, import the `VectorDBManager` class from `ragit` and initialize it:
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from ragit import VectorDBManager
|
|
50
|
+
|
|
51
|
+
# Initialize the vector database manager with a custom persistence directory and model
|
|
52
|
+
db_manager = VectorDBManager(
|
|
53
|
+
persist_directory="./my_vector_db", # Optional # default : "./vector_db"
|
|
54
|
+
provider="sentence_transformer", # Optional # default : "sentence_transformer"
|
|
55
|
+
model_name="all-mpnet-base-v2" # Optional # default : "all-mpnet-base-v2"
|
|
56
|
+
)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### 2. Creating a Database
|
|
60
|
+
Create a new collection (named `my_collection`) using your CSV file. In this example, the `distance_metric` is set to "cosine"(available options: l2, cosine, ip, l1) :
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
db_manager.create_database(
|
|
64
|
+
csv_path="data.csv",
|
|
65
|
+
collection_name="my_collection",
|
|
66
|
+
distance_metric="cosine" # Optional # default : l2
|
|
67
|
+
)
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### 3. Adding a Single Entry
|
|
71
|
+
Add an individual entry to the collection:
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
db_manager.add_single_row(
|
|
75
|
+
id_="101",
|
|
76
|
+
text="This is a new test entry for the database.",
|
|
77
|
+
collection_name="my_collection"
|
|
78
|
+
)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### 4. Adding Multiple Entries from CSV
|
|
82
|
+
You can also add multiple entries from a CSV file. This function skips any entries that already exist in the collection:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
stats = db_manager.add_values_from_csv(
|
|
86
|
+
csv_path="data.csv",
|
|
87
|
+
collection_name="my_collection"
|
|
88
|
+
)
|
|
89
|
+
print(f"Added {stats['new_entries_added']} new entries")
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### 5. Retrieving Collection Information
|
|
93
|
+
Fetch and display information about your collection:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
info = db_manager.get_collection_info("my_collection")
|
|
97
|
+
print(f"Collection size: {info['count']} entries")
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 6. Performing a Similarity Search
|
|
101
|
+
Find texts that are similar to your query. In this example, the query text is "ai", and the search is filtered using the string "Artificial intelligence". The top 2 results are returned:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
results = db_manager.find_nearby_texts(
|
|
105
|
+
text="ai",
|
|
106
|
+
collection_name="my_collection",
|
|
107
|
+
k=2,
|
|
108
|
+
search_string="Artificial intelligence" # Optional
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
print("Results:")
|
|
112
|
+
for item in results:
|
|
113
|
+
print(f"\nID: {item['id']}")
|
|
114
|
+
print(f"Text: {item['text']}")
|
|
115
|
+
print(f"Similarity: {item['similarity']}%")
|
|
116
|
+
print(f"Distance ({item['metric']}): {item['raw_distance']}")
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### 7. Deleting an Entry
|
|
120
|
+
Remove an entry from the collection by its ID:
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
db_manager.delete_entry_by_id(
|
|
124
|
+
id_="1",
|
|
125
|
+
collection_name="my_collection"
|
|
126
|
+
)
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### 8. Fetching Texts by IDs
|
|
130
|
+
Retrieve text entries for a list of IDs:
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
ids_to_fetch = ["1", "2", "3"]
|
|
134
|
+
texts = db_manager.get_by_ids(ids_to_fetch, "my_collection")
|
|
135
|
+
print("Texts:", texts)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### 9. Fetching IDs by Texts
|
|
139
|
+
For an exact text match, get the corresponding IDs:
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
texts_to_fetch = [
|
|
143
|
+
"Plato was an ancient Greek philosopher of the Classical period who is considered a foundational thinker in Western philosophy"
|
|
144
|
+
]
|
|
145
|
+
ids = db_manager.get_by_texts(texts_to_fetch, "my_collection")
|
|
146
|
+
print("IDs:", ids)
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### 10. Deleting a Collection
|
|
150
|
+
Delete an entire collection. **Note:** You must pass `confirmation="yes"` to proceed with deletion.
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
db_manager.delete_collection(
|
|
154
|
+
collection_name="my_collection",
|
|
155
|
+
confirmation="yes"
|
|
156
|
+
)
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
## Contributing
|
|
160
|
+
Contributions are welcome! If you encounter any issues or have suggestions for improvements, please feel free to open an issue or submit a pull request on GitHub.
|
|
161
|
+
|
|
162
|
+
## License
|
|
163
|
+
This project is licensed under the MIT License. See the `LICENSE` file for details.
|
{ragit-0.1 → ragit-0.3}/setup.py
RENAMED
|
@@ -1,11 +1,16 @@
|
|
|
1
1
|
from setuptools import setup, find_packages
|
|
2
2
|
|
|
3
|
+
with open("README.md", "r") as f:
|
|
4
|
+
description=f.read()
|
|
5
|
+
|
|
3
6
|
setup(
|
|
4
7
|
name="ragit",
|
|
5
|
-
version="0.
|
|
8
|
+
version="0.3",
|
|
6
9
|
packages=find_packages(),
|
|
7
10
|
install_requires = ['sentence-transformers>=3.4.1',
|
|
8
11
|
'pandas>=2.2.3', 'chromadb>=0.6.3',
|
|
9
12
|
'setuptools>=75.8.0',
|
|
10
|
-
'wheel>=0.45.1', 'twine>=6.1.0']
|
|
13
|
+
'wheel>=0.45.1', 'twine>=6.1.0'] ,
|
|
14
|
+
long_description= description,
|
|
15
|
+
long_description_content_type="text/markdown"
|
|
11
16
|
)
|
ragit-0.1/PKG-INFO
DELETED
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.2
|
|
2
|
-
Name: ragit
|
|
3
|
-
Version: 0.1
|
|
4
|
-
Requires-Dist: sentence-transformers>=3.4.1
|
|
5
|
-
Requires-Dist: pandas>=2.2.3
|
|
6
|
-
Requires-Dist: chromadb>=0.6.3
|
|
7
|
-
Requires-Dist: setuptools>=75.8.0
|
|
8
|
-
Requires-Dist: wheel>=0.45.1
|
|
9
|
-
Requires-Dist: twine>=6.1.0
|
|
10
|
-
Dynamic: requires-dist
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.2
|
|
2
|
-
Name: ragit
|
|
3
|
-
Version: 0.1
|
|
4
|
-
Requires-Dist: sentence-transformers>=3.4.1
|
|
5
|
-
Requires-Dist: pandas>=2.2.3
|
|
6
|
-
Requires-Dist: chromadb>=0.6.3
|
|
7
|
-
Requires-Dist: setuptools>=75.8.0
|
|
8
|
-
Requires-Dist: wheel>=0.45.1
|
|
9
|
-
Requires-Dist: twine>=6.1.0
|
|
10
|
-
Dynamic: requires-dist
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|