llm_memory 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 85f3330ed767cc28b5a3276a7772678c36726ce04562c184a4e6dcf287d14d1d
4
- data.tar.gz: 3e1ae83b2517f7f80bb857de5024b62b886bb6a045428f2d40159e48818cf286
3
+ metadata.gz: aea28ca9fb65d35a8d98964ac433cb7c445d8137a2672bcf542d35fa31935582
4
+ data.tar.gz: 02d4d7a619eb3031df0310c2fa9bef9105035f732bc1d9e5381a3dd2d8ded836
5
5
  SHA512:
6
- metadata.gz: dba158d07c4a97a5b2e6bb1a98987173ba427a18a1a06d0a422f366ab2f7335ac168aea0351657b0c7f957352e6ffe747a2d817deb7c8d5454010c2ec535999a
7
- data.tar.gz: 1db3ccb501f3b56d6e6dc00997f3f085b74f395521ffbbc1775bc5799bc388410523f6f209cfc185efbbf1437177c07dd5ee1725a49f6a27b207c6fc296067aa
6
+ metadata.gz: d30618749a0b4016a2ca9cd2815cb6b7b4971a46c50ea83fa3e5b30d1e0813127053b576882587634f166cff7312f3898c6183f71a32ea724b53deea5d676936
7
+ data.tar.gz: 4957d9857a4a5b05cd725b45e44c44b9ecfb695034e6c2e6c45ed80c9c819d0b581fdc4f9977d4af9180b15af983f544df62507e5d25001f0e508ff90acfa8b6
data/Gemfile CHANGED
@@ -11,7 +11,7 @@ gem "standard", "~> 1.3"
11
11
  gem "vcr", "~> 6.1.0"
12
12
  gem "webmock", "~> 3.18.1"
13
13
  gem "ruby-openai"
14
- gem "tiktoken_ruby"
14
+ gem "tokenizers"
15
15
  gem "redis"
16
16
  # dev
17
17
  gem "dotenv"
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- llm_memory (0.1.3)
4
+ llm_memory (0.1.5)
5
5
  redis (~> 4.6.0)
6
6
  ruby-openai (~> 3.7.0)
7
- tiktoken_ruby (~> 0.0.4)
7
+ tokenizers (~> 0.3.3)
8
8
 
9
9
  GEM
10
10
  remote: https://rubygems.org/
@@ -81,8 +81,8 @@ GEM
81
81
  standard-performance (1.0.1)
82
82
  lint_roller (~> 1.0)
83
83
  rubocop-performance (~> 1.16.0)
84
- tiktoken_ruby (0.0.4-arm64-darwin)
85
- tiktoken_ruby (0.0.4-x86_64-linux)
84
+ tokenizers (0.3.3-arm64-darwin)
85
+ tokenizers (0.3.3-x86_64-linux)
86
86
  unicode-display_width (2.4.2)
87
87
  vcr (6.1.0)
88
88
  webmock (3.18.1)
@@ -103,7 +103,7 @@ DEPENDENCIES
103
103
  rspec (~> 3.0)
104
104
  ruby-openai
105
105
  standard (~> 1.3)
106
- tiktoken_ruby
106
+ tokenizers
107
107
  vcr (~> 6.1.0)
108
108
  webmock (~> 3.18.1)
109
109
 
@@ -1,5 +1,5 @@
1
1
  require "erb"
2
- require "tiktoken_ruby"
2
+ require "tokenizers"
3
3
 
4
4
  module LlmMemory
5
5
  class Broca
@@ -51,9 +51,9 @@ module LlmMemory
51
51
  count = 0
52
52
  new_messages = []
53
53
  @messages.reverse_each do |message|
54
- encoded = tokenizer.encode(message[:content])
54
+ encoded = tokenizer.encode(message[:content], add_special_tokens: true)
55
55
  if count < @max_token
56
- count += encoded.length
56
+ count += encoded.tokens.length
57
57
  new_messages.push(message)
58
58
  else
59
59
  break
@@ -63,7 +63,7 @@ module LlmMemory
63
63
  end
64
64
 
65
65
  def tokenizer
66
- @tokenizer ||= Tiktoken.encoding_for_model("gpt-4")
66
+ @tokenizer ||= Tokenizers.from_pretrained("gpt2")
67
67
  end
68
68
  end
69
69
  end
@@ -23,7 +23,7 @@ module LlmMemory
23
23
  raise "Store '#{store_name}' not found." unless store_class
24
24
  @store = store_class.new(index_name: index_name)
25
25
 
26
- # word count, not char count
26
+ # char count, not word count
27
27
  @chunk_size = chunk_size
28
28
  @chunk_overlap = chunk_overlap
29
29
  end
@@ -87,18 +87,14 @@ module LlmMemory
87
87
  docs.each do |item|
88
88
  content = item[:content]
89
89
  metadata = item[:metadata]
90
- words = content.split
91
-
92
- if words.length > @chunk_size
90
+ if content.length > @chunk_size
93
91
  start_index = 0
94
-
95
- while start_index < words.length
96
- end_index = [start_index + @chunk_size, words.length].min
97
- chunk_words = words[start_index...end_index]
98
- chunk = chunk_words.join(" ")
92
+ while start_index < content.length
93
+ end_index = [start_index + @chunk_size, content.length].min
94
+ chunk = content[start_index...end_index]
99
95
  result << {content: chunk, metadata: metadata}
100
-
101
- start_index += @chunk_size - @chunk_overlap # Move index to create a overlap
96
+ break if end_index == content.length
97
+ start_index += @chunk_size - @chunk_overlap
102
98
  end
103
99
  else
104
100
  result << {content: content, metadata: metadata}
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module LlmMemory
4
- VERSION = "0.1.3"
4
+ VERSION = "0.1.5"
5
5
  end
data/llm_memory.gemspec CHANGED
@@ -31,7 +31,7 @@ Gem::Specification.new do |spec|
31
31
 
32
32
  # Uncomment to register a new dependency of your gem
33
33
  # spec.add_dependency "example-gem", "~> 1.0"
34
- spec.add_dependency "tiktoken_ruby", "~> 0.0.4"
34
+ spec.add_dependency "tokenizers", "~> 0.3.3"
35
35
  spec.add_dependency "ruby-openai", "~> 3.7.0"
36
36
  spec.add_dependency "redis", "~> 4.6.0"
37
37
 
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm_memory
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shohei Kameda
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-10 00:00:00.000000000 Z
11
+ date: 2023-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: tiktoken_ruby
14
+ name: tokenizers
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.0.4
19
+ version: 0.3.3
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.0.4
26
+ version: 0.3.3
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: ruby-openai
29
29
  requirement: !ruby/object:Gem::Requirement