ai_chatbot 0.1.6.4 → 0.1.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ai_chatbot/version.rb +1 -1
- data/lib/ml_model.py +77 -120
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 65a8e9d42a6f0a36cff9a13f7428975232912b4a2decd9509772b38d849f6340
|
4
|
+
data.tar.gz: f54c3642f29fb1f12f49af978f4ae34bd9dcdb85c7d22b48d07b195384354832
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6a7d4c388b24487a16a51dac935eb54d6dfc384f8d478dd80667f5de9a3dc2d8140e2785a0721db5d93f79b29285ad5deb3462042238536925c5b8e66b481939
|
7
|
+
data.tar.gz: e691cb235c3cae3c9b754c30a20cec05ca17bc684e0e7c727528ccf0b8f06e4a100e4473d050793a62029bf3352425f1b1c00540e709f04f868366bb2a0f5559
|
data/lib/ai_chatbot/version.rb
CHANGED
data/lib/ml_model.py
CHANGED
@@ -1,162 +1,119 @@
|
|
1
1
|
import sys
|
2
|
+
import psycopg2
|
3
|
+
import os
|
4
|
+
from dotenv import load_dotenv
|
2
5
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
3
6
|
from sklearn.naive_bayes import MultinomialNB
|
4
7
|
from sklearn.pipeline import make_pipeline
|
5
8
|
from sklearn.metrics.pairwise import cosine_similarity
|
6
|
-
import pickle
|
7
|
-
import os
|
8
9
|
|
9
|
-
#
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
else:
|
19
|
-
questions = [
|
20
|
-
"How to create a new model in Rails?",
|
21
|
-
"What is migration?",
|
22
|
-
"How to add a route?"
|
23
|
-
]
|
24
|
-
answers = [
|
25
|
-
"You can create a model using 'rails generate model'.",
|
26
|
-
"Migration is a database schema change.",
|
27
|
-
"You can add a route in the config/routes.rb file."
|
28
|
-
]
|
29
|
-
|
30
|
-
# Create a pipeline (TF-IDF + MultinomialNB)
|
31
|
-
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
|
32
|
-
model.fit(questions, answers)
|
33
|
-
|
34
|
-
# Populate cache with known questions and answers
|
35
|
-
for q, a in zip(questions, answers):
|
36
|
-
cache[q.lower()] = a
|
37
|
-
|
38
|
-
# Function to predict or retrain the model
|
39
|
-
def main(action, query=None, new_answer=None):
|
40
|
-
if action == "predict":
|
41
|
-
return get_prediction(query)
|
42
|
-
elif action == "train_model":
|
43
|
-
return train_model(query, new_answer)
|
44
|
-
elif action == "update_answer":
|
45
|
-
return update_answer(query, new_answer)
|
46
|
-
elif action == "update_or_delete_question":
|
47
|
-
return update_or_delete_question(query, new_answer)
|
48
|
-
elif action == "list_questions":
|
49
|
-
return list_questions()
|
50
|
-
elif action == "list_answers":
|
51
|
-
return list_answers()
|
10
|
+
# Connect to PostgreSQL
|
11
|
+
conn = psycopg2.connect(
|
12
|
+
dbname= os.getenv("DB_NAME"),
|
13
|
+
user=os.getenv("DB_USERNAME"),
|
14
|
+
password=os.getenv("DB_PASSWORD"),
|
15
|
+
host=os.getenv("DB_HOST"),
|
16
|
+
port=os.getenv("DB_PORT"),
|
17
|
+
)
|
18
|
+
cursor = conn.cursor()
|
52
19
|
|
53
|
-
|
54
|
-
|
55
|
-
|
20
|
+
db_name = os.getenv("DB_NAME")
|
21
|
+
|
22
|
+
cursor.execute("SELECT question, answer FROM qa_data")
|
23
|
+
rows = cursor.fetchall()
|
24
|
+
questions = [row[0] for row in rows]
|
25
|
+
answers = [row[1] for row in rows]
|
26
|
+
|
27
|
+
vectorizer = TfidfVectorizer()
|
28
|
+
question_vecs = vectorizer.fit_transform(questions) if questions else None
|
56
29
|
|
57
|
-
# **Check cache first**
|
58
|
-
if query_lower in cache:
|
59
|
-
return cache[query_lower]
|
60
30
|
|
61
|
-
|
62
|
-
|
31
|
+
def get_prediction(query):
|
32
|
+
if not questions:
|
33
|
+
return "No questions available in the database."
|
34
|
+
|
35
|
+
query_vec = vectorizer.transform([query])
|
36
|
+
similarities = cosine_similarity(query_vec, question_vecs).flatten()
|
63
37
|
|
64
|
-
|
65
|
-
|
66
|
-
max_similarity = similarities.max()
|
38
|
+
max_sim_index = similarities.argmax()
|
39
|
+
max_similarity = similarities[max_sim_index]
|
67
40
|
|
68
41
|
threshold = 0.65
|
69
42
|
if max_similarity < threshold:
|
70
43
|
return "No good match found. Please provide the correct answer."
|
71
44
|
else:
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
cache[query_lower] = prediction
|
76
|
-
|
77
|
-
return prediction
|
78
|
-
|
79
|
-
# Function to train the model with a new question and answer
|
45
|
+
return answers[max_sim_index]
|
46
|
+
|
47
|
+
# Function to train the model with new data
|
80
48
|
def train_model(new_question, new_answer):
|
81
49
|
global questions, answers
|
82
50
|
|
83
|
-
#
|
51
|
+
# Store in database
|
52
|
+
|
53
|
+
cursor.execute("INSERT INTO qa_data (question, answer, created_at,updated_at) VALUES (%s, %s, NOW(),NOW()) ON CONFLICT (question) DO NOTHING",
|
54
|
+
(new_question, new_answer))
|
55
|
+
conn.commit()
|
56
|
+
|
57
|
+
# Update lists and retrain model
|
84
58
|
questions.append(new_question)
|
85
59
|
answers.append(new_answer)
|
86
|
-
|
87
|
-
# Retrain the model
|
88
60
|
model.fit(questions, answers)
|
89
61
|
|
90
|
-
|
91
|
-
cache[new_question.lower()] = new_answer
|
92
|
-
|
93
|
-
# Save the updated model
|
94
|
-
with open("qa_model.pkl", "wb") as f:
|
95
|
-
pickle.dump({"questions": questions, "answers": answers}, f)
|
96
|
-
|
97
|
-
return f"Model retrained with: '{new_question}' -> '{new_answer}'"
|
62
|
+
return f"Added: '{new_question}' -> '{new_answer}'"
|
98
63
|
|
99
64
|
# Function to update an answer
|
100
65
|
def update_answer(existing_question, new_answer):
|
101
|
-
|
66
|
+
cursor.execute("UPDATE qa_data SET answer = %s WHERE question = %s", (new_answer, existing_question))
|
67
|
+
conn.commit()
|
102
68
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
# Retrain the model
|
108
|
-
model.fit(questions, answers)
|
109
|
-
|
110
|
-
# **Update cache**
|
111
|
-
cache[existing_question.lower()] = new_answer
|
69
|
+
# Update lists and retrain model
|
70
|
+
index = questions.index(existing_question)
|
71
|
+
answers[index] = new_answer
|
72
|
+
model.fit(questions, answers)
|
112
73
|
|
113
|
-
|
114
|
-
with open("qa_model.pkl", "wb") as f:
|
115
|
-
pickle.dump({"questions": questions, "answers": answers}, f)
|
116
|
-
|
117
|
-
return f"Answer updated for: '{existing_question}'"
|
118
|
-
|
119
|
-
return "Question not found."
|
74
|
+
return f"Updated: '{existing_question}' -> '{new_answer}'"
|
120
75
|
|
121
|
-
# Function to
|
122
|
-
def
|
123
|
-
|
76
|
+
# Function to delete a question
|
77
|
+
def delete_question(existing_question):
|
78
|
+
cursor.execute("DELETE FROM qa_data WHERE question = %s", (existing_question,))
|
79
|
+
conn.commit()
|
124
80
|
|
125
81
|
if existing_question in questions:
|
126
82
|
index = questions.index(existing_question)
|
127
|
-
|
128
|
-
|
129
|
-
questions[index] = new_question
|
130
|
-
# **Update cache**
|
131
|
-
cache[new_question.lower()] = answers[index]
|
132
|
-
else:
|
133
|
-
# Delete the question
|
134
|
-
del questions[index]
|
135
|
-
del answers[index]
|
136
|
-
|
137
|
-
# Retrain the model
|
83
|
+
del questions[index]
|
84
|
+
del answers[index]
|
138
85
|
model.fit(questions, answers)
|
139
86
|
|
140
|
-
|
141
|
-
if not new_question:
|
142
|
-
cache.pop(existing_question.lower(), None)
|
143
|
-
|
144
|
-
# Save the model
|
145
|
-
with open("qa_model.pkl", "wb") as f:
|
146
|
-
pickle.dump({"questions": questions, "answers": answers}, f)
|
147
|
-
|
148
|
-
return f"Updated question: '{existing_question}' -> '{new_question}'" if new_question else f"Deleted: '{existing_question}'"
|
149
|
-
|
150
|
-
return "Question not found."
|
87
|
+
return f"Deleted: '{existing_question}'"
|
151
88
|
|
89
|
+
# Function to list questions
|
152
90
|
def list_questions():
|
153
|
-
|
91
|
+
cursor.execute("SELECT question FROM qa_data")
|
92
|
+
return [row[0] for row in cursor.fetchall()]
|
154
93
|
|
94
|
+
# Function to list answers
|
155
95
|
def list_answers():
|
156
|
-
|
96
|
+
cursor.execute("SELECT answer FROM qa_data")
|
97
|
+
return [row[0] for row in cursor.fetchall()]
|
157
98
|
|
158
99
|
if __name__ == "__main__":
|
159
100
|
action = sys.argv[1]
|
160
101
|
question = sys.argv[2] if len(sys.argv) > 2 else None
|
161
102
|
answer = sys.argv[3] if len(sys.argv) > 3 else None
|
162
|
-
|
103
|
+
|
104
|
+
if action == "predict":
|
105
|
+
print(get_prediction(question))
|
106
|
+
elif action == "train_model":
|
107
|
+
print(train_model(question, answer))
|
108
|
+
elif action == "update_answer":
|
109
|
+
print(update_answer(question, answer))
|
110
|
+
elif action == "delete_question":
|
111
|
+
print(delete_question(question))
|
112
|
+
elif action == "list_questions":
|
113
|
+
print(list_questions())
|
114
|
+
elif action == "list_answers":
|
115
|
+
print(list_answers())
|
116
|
+
|
117
|
+
# Close DB connection
|
118
|
+
cursor.close()
|
119
|
+
conn.close()
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ai_chatbot
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.6.
|
4
|
+
version: 0.1.6.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sanket
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
11
|
+
date: 2025-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: open3
|
@@ -24,8 +24,8 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
-
description:
|
28
|
-
|
27
|
+
description: Added caching to avoid redundant DB queries, improving chatbot response
|
28
|
+
time. Also updated unanswered question logging.
|
29
29
|
email:
|
30
30
|
- sanket.tikhande@gmail.com
|
31
31
|
executables: []
|
@@ -58,5 +58,5 @@ requirements: []
|
|
58
58
|
rubygems_version: 3.3.7
|
59
59
|
signing_key:
|
60
60
|
specification_version: 4
|
61
|
-
summary: 'Fix:
|
61
|
+
summary: 'Fix: Added postgres integration'
|
62
62
|
test_files: []
|