llm_memory 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +1 -1
- data/Gemfile.lock +5 -5
- data/lib/llm_memory/broca.rb +4 -4
- data/lib/llm_memory/hippocampus.rb +7 -11
- data/lib/llm_memory/version.rb +1 -1
- data/llm_memory.gemspec +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aea28ca9fb65d35a8d98964ac433cb7c445d8137a2672bcf542d35fa31935582
|
4
|
+
data.tar.gz: 02d4d7a619eb3031df0310c2fa9bef9105035f732bc1d9e5381a3dd2d8ded836
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d30618749a0b4016a2ca9cd2815cb6b7b4971a46c50ea83fa3e5b30d1e0813127053b576882587634f166cff7312f3898c6183f71a32ea724b53deea5d676936
|
7
|
+
data.tar.gz: 4957d9857a4a5b05cd725b45e44c44b9ecfb695034e6c2e6c45ed80c9c819d0b581fdc4f9977d4af9180b15af983f544df62507e5d25001f0e508ff90acfa8b6
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
llm_memory (0.1.
|
4
|
+
llm_memory (0.1.5)
|
5
5
|
redis (~> 4.6.0)
|
6
6
|
ruby-openai (~> 3.7.0)
|
7
|
-
|
7
|
+
tokenizers (~> 0.3.3)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
@@ -81,8 +81,8 @@ GEM
|
|
81
81
|
standard-performance (1.0.1)
|
82
82
|
lint_roller (~> 1.0)
|
83
83
|
rubocop-performance (~> 1.16.0)
|
84
|
-
|
85
|
-
|
84
|
+
tokenizers (0.3.3-arm64-darwin)
|
85
|
+
tokenizers (0.3.3-x86_64-linux)
|
86
86
|
unicode-display_width (2.4.2)
|
87
87
|
vcr (6.1.0)
|
88
88
|
webmock (3.18.1)
|
@@ -103,7 +103,7 @@ DEPENDENCIES
|
|
103
103
|
rspec (~> 3.0)
|
104
104
|
ruby-openai
|
105
105
|
standard (~> 1.3)
|
106
|
-
|
106
|
+
tokenizers
|
107
107
|
vcr (~> 6.1.0)
|
108
108
|
webmock (~> 3.18.1)
|
109
109
|
|
data/lib/llm_memory/broca.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
require "erb"
|
2
|
-
require "
|
2
|
+
require "tokenizers"
|
3
3
|
|
4
4
|
module LlmMemory
|
5
5
|
class Broca
|
@@ -51,9 +51,9 @@ module LlmMemory
|
|
51
51
|
count = 0
|
52
52
|
new_messages = []
|
53
53
|
@messages.reverse_each do |message|
|
54
|
-
encoded = tokenizer.encode(message[:content])
|
54
|
+
encoded = tokenizer.encode(message[:content], add_special_tokens: true)
|
55
55
|
if count < @max_token
|
56
|
-
count += encoded.length
|
56
|
+
count += encoded.tokens.length
|
57
57
|
new_messages.push(message)
|
58
58
|
else
|
59
59
|
break
|
@@ -63,7 +63,7 @@ module LlmMemory
|
|
63
63
|
end
|
64
64
|
|
65
65
|
def tokenizer
|
66
|
-
@tokenizer ||=
|
66
|
+
@tokenizer ||= Tokenizers.from_pretrained("gpt2")
|
67
67
|
end
|
68
68
|
end
|
69
69
|
end
|
@@ -23,7 +23,7 @@ module LlmMemory
|
|
23
23
|
raise "Store '#{store_name}' not found." unless store_class
|
24
24
|
@store = store_class.new(index_name: index_name)
|
25
25
|
|
26
|
-
#
|
26
|
+
# char count, not word count
|
27
27
|
@chunk_size = chunk_size
|
28
28
|
@chunk_overlap = chunk_overlap
|
29
29
|
end
|
@@ -87,18 +87,14 @@ module LlmMemory
|
|
87
87
|
docs.each do |item|
|
88
88
|
content = item[:content]
|
89
89
|
metadata = item[:metadata]
|
90
|
-
|
91
|
-
|
92
|
-
if words.length > @chunk_size
|
90
|
+
if content.length > @chunk_size
|
93
91
|
start_index = 0
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
chunk_words = words[start_index...end_index]
|
98
|
-
chunk = chunk_words.join(" ")
|
92
|
+
while start_index < content.length
|
93
|
+
end_index = [start_index + @chunk_size, content.length].min
|
94
|
+
chunk = content[start_index...end_index]
|
99
95
|
result << {content: chunk, metadata: metadata}
|
100
|
-
|
101
|
-
start_index += @chunk_size - @chunk_overlap
|
96
|
+
break if end_index == content.length
|
97
|
+
start_index += @chunk_size - @chunk_overlap
|
102
98
|
end
|
103
99
|
else
|
104
100
|
result << {content: content, metadata: metadata}
|
data/lib/llm_memory/version.rb
CHANGED
data/llm_memory.gemspec
CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
|
|
31
31
|
|
32
32
|
# Uncomment to register a new dependency of your gem
|
33
33
|
# spec.add_dependency "example-gem", "~> 1.0"
|
34
|
-
spec.add_dependency "
|
34
|
+
spec.add_dependency "tokenizers", "~> 0.3.3"
|
35
35
|
spec.add_dependency "ruby-openai", "~> 3.7.0"
|
36
36
|
spec.add_dependency "redis", "~> 4.6.0"
|
37
37
|
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: llm_memory
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shohei Kameda
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05-
|
11
|
+
date: 2023-05-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: tokenizers
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.3.3
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 0.3.3
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: ruby-openai
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|