ollama-ruby 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/ollama/client.rb CHANGED
@@ -1,6 +1,5 @@
1
1
  require 'tins/xt/string_camelize'
2
2
  require 'tins/annotate'
3
- require 'excon'
4
3
 
5
4
  class Ollama::Client
6
5
  end
@@ -31,6 +30,8 @@ class Ollama::Client
31
30
 
32
31
  attr_accessor :output
33
32
 
33
+ attr_reader :base_url
34
+
34
35
  def ssl_verify_peer?
35
36
  !!@ssl_verify_peer
36
37
  end
@@ -113,11 +114,15 @@ class Ollama::Client
113
114
 
114
115
  def headers
115
116
  {
116
- 'User-Agent' => '%s/%s' % [ self.class, Ollama::VERSION ],
117
+ 'User-Agent' => self.class.user_agent,
117
118
  'Content-Type' => 'application/json; charset=utf-8',
118
119
  }
119
120
  end
120
121
 
122
+ def self.user_agent
123
+ '%s/%s' % [ self.class, Ollama::VERSION ]
124
+ end
125
+
121
126
  def excon(url)
122
127
  params = {
123
128
  connect_timeout: @connect_timeout,
@@ -0,0 +1,44 @@
1
+ class Ollama::Documents::MemoryCache
2
+ def initialize(prefix:)
3
+ @prefix = prefix
4
+ @data = {}
5
+ end
6
+
7
+ attr_writer :prefix
8
+
9
+ def [](key)
10
+ @data[pre(key)]
11
+ end
12
+
13
+ def []=(key, value)
14
+ @data[pre(key)] = value
15
+ end
16
+
17
+ def key?(key)
18
+ @data.key?(pre(key))
19
+ end
20
+
21
+ def delete(key)
22
+ @data.delete(pre(key))
23
+ end
24
+
25
+ def size
26
+ @data.size
27
+ end
28
+
29
+ def clear
30
+ @data.clear
31
+ self
32
+ end
33
+
34
+ def each(&block)
35
+ @data.select { |key,| key.start_with?(@prefix) }.each(&block)
36
+ end
37
+ include Enumerable
38
+
39
+ private
40
+
41
+ def pre(key)
42
+ [ @prefix, key ].join
43
+ end
44
+ end
@@ -0,0 +1,57 @@
1
+ require 'redis'
2
+
3
+ class Ollama::Documents::RedisCache
4
+ def initialize(prefix:, url: ENV['REDIS_URL'])
5
+ url or raise ArgumentError, 'require redis url'
6
+ @prefix, @url = prefix, url
7
+ end
8
+
9
+ attr_writer :prefix
10
+
11
+ def redis
12
+ @redis ||= Redis.new(url: @url)
13
+ end
14
+
15
+ def [](key)
16
+ JSON(redis.get(pre(key)), object_class: Ollama::Documents::Record)
17
+ end
18
+
19
+ def []=(key, value)
20
+ redis.set(pre(key), JSON(value))
21
+ end
22
+
23
+ def key?(key)
24
+ !!redis.exists?(pre(key))
25
+ end
26
+
27
+ def delete(key)
28
+ redis.del(pre(key)) == 1
29
+ end
30
+
31
+ def size
32
+ s = 0
33
+ redis.scan_each(match: "#@prefix*") { |key| s += 1 }
34
+ s
35
+ end
36
+
37
+ def clear
38
+ redis.scan_each(match: "#@prefix*") { |key| redis.del(key) }
39
+ self
40
+ end
41
+
42
+ def each(&block)
43
+ redis.scan_each(match: "#@prefix*") { |key| block.(key, self[unpre(key)]) }
44
+ self
45
+ end
46
+ include Enumerable
47
+
48
+ private
49
+
50
+ def pre(key)
51
+ [ @prefix, key ].join
52
+ end
53
+
54
+ def unpre(key)
55
+ key.sub(/\A#@prefix/, '')
56
+ end
57
+ end
@@ -0,0 +1,70 @@
1
+ module Ollama::Documents::Splitters
2
+ class Character
3
+ DEFAULT_SEPARATOR = /(?:\r?\n){2,}/
4
+
5
+ def initialize(separator: DEFAULT_SEPARATOR, include_separator: false, chunk_size: 4096)
6
+ @separator, @include_separator, @chunk_size = separator, include_separator, chunk_size
7
+ if include_separator
8
+ @separator = Regexp.new("(#@separator)")
9
+ end
10
+ end
11
+
12
+ def split(text)
13
+ texts = []
14
+ text.split(@separator) do |t|
15
+ if @include_separator && t =~ @separator
16
+ texts.last&.concat t
17
+ else
18
+ texts.push(t)
19
+ end
20
+ end
21
+ result = []
22
+ current_text = +''
23
+ texts.each do |t|
24
+ if current_text.size + t.size < @chunk_size
25
+ current_text += t
26
+ else
27
+ current_text.empty? or result << current_text
28
+ current_text = t
29
+ end
30
+ end
31
+ current_text.empty? or result << current_text
32
+ result
33
+ end
34
+ end
35
+
36
+ class RecursiveCharacter
37
+ DEFAULT_SEPARATORS = [
38
+ /(?:\r?\n){2,}/,
39
+ /\r?\n/,
40
+ /\b/,
41
+ //,
42
+ ].freeze
43
+
44
+ def initialize(separators: DEFAULT_SEPARATORS, include_separator: false, chunk_size: 4096)
45
+ separators.empty? and
46
+ raise ArgumentError, "non-empty array of separators required"
47
+ @separators, @include_separator, @chunk_size =
48
+ separators, include_separator, chunk_size
49
+ end
50
+
51
+ def split(text, separators: @separators)
52
+ separators.empty? and return [ text ]
53
+ separators = separators.dup
54
+ separator = separators.shift
55
+ texts = Character.new(
56
+ separator:,
57
+ include_separator: @include_separator,
58
+ chunk_size: @chunk_size
59
+ ).split(text)
60
+ texts.count == 0 and return [ text ]
61
+ texts.inject([]) do |r, t|
62
+ if t.size > @chunk_size
63
+ r.concat(split(t, separators:))
64
+ else
65
+ r.concat([ t ])
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,90 @@
1
+ module Ollama::Documents::Splitters
2
+ class Semantic
3
+ include Ollama::Utils::Math
4
+
5
+ DEFAULT_SEPARATOR = /[.!?]\s*(?:\b|\z)/
6
+
7
+ def initialize(ollama:, model:, model_options: nil, separator: DEFAULT_SEPARATOR, chunk_size: 4096)
8
+ @ollama, @model, @model_options, @separator, @chunk_size =
9
+ ollama, model, model_options, separator, chunk_size
10
+ end
11
+
12
+ def split(text, batch_size: 100, breakpoint: :percentile, **opts)
13
+ sentences = Ollama::Documents::Splitters::Character.new(
14
+ separator: @separator,
15
+ include_separator: true,
16
+ chunk_size: 1,
17
+ ).split(text)
18
+ embeddings = sentences.with_infobar(label: 'Split').each_slice(batch_size).inject([]) do |e, batch|
19
+ e.concat sentence_embeddings(batch)
20
+ infobar.progress by: batch.size
21
+ e
22
+ end
23
+ infobar.newline
24
+ distances = embeddings.each_cons(2).map do |a, b|
25
+ 1.0 - cosine_similarity(a:, b:)
26
+ end
27
+ max_distance = calculate_breakpoint_threshold(breakpoint, distances, **opts)
28
+ gaps = distances.each_with_index.select do |d, i|
29
+ d > max_distance
30
+ end.transpose.last
31
+ gaps or return sentences
32
+ if gaps.last < distances.size
33
+ gaps << distances.size
34
+ end
35
+ if gaps.last < sentences.size - 1
36
+ gaps << sentences.size - 1
37
+ end
38
+ result = []
39
+ sg = 0
40
+ current_text = +''
41
+ gaps.each do |g|
42
+ sg.upto(g) do |i|
43
+ sentence = sentences[i]
44
+ if current_text.size + sentence.size < @chunk_size
45
+ current_text += sentence
46
+ else
47
+ current_text.empty? or result << current_text
48
+ current_text = sentence
49
+ end
50
+ end
51
+ unless current_text.empty?
52
+ result << current_text
53
+ current_text = +''
54
+ end
55
+ sg = g.succ
56
+ end
57
+ current_text.empty? or result << current_text
58
+ result
59
+ end
60
+
61
+ private
62
+
63
+ def calculate_breakpoint_threshold(breakpoint_method, distances, **opts)
64
+ sequence = MoreMath::Sequence.new(distances)
65
+ case breakpoint_method
66
+ when :percentile
67
+ percentile = opts.fetch(:percentile, 95)
68
+ sequence.percentile(percentile)
69
+ when :standard_deviation
70
+ percentage = opts.fetch(:percentage, 100)
71
+ (
72
+ sequence.mean + sequence.standard_deviation * (percentage / 100.0)
73
+ ).clamp(0, sequence.max)
74
+ when :interquartile
75
+ percentage = opts.fetch(:percentage, 100)
76
+ iqr = sequence.interquartile_range
77
+ max = sequence.max
78
+ (sequence.mean + iqr * (percentage / 100.0)).clamp(0, max)
79
+ else
80
+ raise ArgumentError, "invalid breakpoint method #{breakpoint_method}"
81
+ end
82
+ end
83
+
84
+ def sentence_embeddings(input)
85
+ @ollama.embed(model: @model, input:, options: @model_options).embeddings.map! {
86
+ Numo::NArray[*_1]
87
+ }
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,172 @@
1
+ require 'numo/narray'
2
+ require 'digest'
3
+
4
+ class Ollama::Documents
5
+ end
6
+ require 'ollama/documents/memory_cache'
7
+ require 'ollama/documents/redis_cache'
8
+ module Ollama::Documents::Splitters
9
+ end
10
+ require 'ollama/documents/splitters/character'
11
+ require 'ollama/documents/splitters/semantic'
12
+
13
+ class Ollama::Documents
14
+ include Ollama::Utils::Math
15
+
16
+ class Record < JSON::GenericObject
17
+ def to_s
18
+ my_tags = Ollama::Utils::Tags.new(tags)
19
+ my_tags.empty? or my_tags = " #{my_tags}"
20
+ "#<#{self.class} #{text.inspect}#{my_tags} #{similarity || 'n/a'}>"
21
+ end
22
+
23
+ def ==(other)
24
+ text == other.text
25
+ end
26
+
27
+ alias inspect to_s
28
+ end
29
+
30
+ def initialize(ollama:, model:, model_options: nil, collection: :default, cache: MemoryCache, redis_url: nil)
31
+ @ollama, @model, @model_options, @collection = ollama, model, model_options, collection
32
+ @cache, @redis_url = connect_cache(cache), redis_url
33
+ end
34
+
35
+ attr_reader :ollama, :model, :collection
36
+
37
+ def collection=(new_collection)
38
+ @collection = new_collection
39
+ @cache.prefix = prefix
40
+ end
41
+
42
+ def add(inputs, batch_size: 10, source: nil, tags: [])
43
+ inputs = Array(inputs)
44
+ tags = Ollama::Utils::Tags.new(tags)
45
+ source and tags.add File.basename(source)
46
+ inputs.map! { |i|
47
+ text = i.respond_to?(:read) ? i.read : i.to_s
48
+ text
49
+ }
50
+ inputs.reject! { |i| exist?(i) }
51
+ inputs.empty? and return self
52
+ batches = inputs.each_slice(batch_size).
53
+ with_infobar(
54
+ label: "Add #{tags}",
55
+ total: inputs.size
56
+ )
57
+ batches.each do |batch|
58
+ embeddings = fetch_embeddings(model:, options: @model_options, input: batch)
59
+ batch.zip(embeddings) do |text, embedding|
60
+ norm = norm(embedding)
61
+ self[text] = Record[text:, embedding:, norm:, source:, tags: tags.to_a]
62
+ end
63
+ infobar.progress by: batch.size
64
+ end
65
+ infobar.newline
66
+ self
67
+ end
68
+ alias << add
69
+
70
+ def [](text)
71
+ @cache[key(text)]
72
+ end
73
+
74
+ def []=(text, record)
75
+ @cache[key(text)] = record
76
+ end
77
+
78
+ def exist?(text)
79
+ @cache.key?(key(text))
80
+ end
81
+
82
+ def delete(text)
83
+ @cache.delete(key(text))
84
+ end
85
+
86
+ def size
87
+ @cache.size
88
+ end
89
+
90
+ def clear
91
+ @cache.clear
92
+ end
93
+
94
+ def find(string, tags: nil, prompt: nil)
95
+ needle = convert_to_vector(string, prompt:)
96
+ needle_norm = norm(needle)
97
+ records = @cache
98
+ if tags
99
+ tags = Ollama::Utils::Tags.new(tags)
100
+ records = records.select { |_key, record| (tags & record.tags).size >= 1 }
101
+ end
102
+ records = records.sort_by { |key, record|
103
+ record.key = key
104
+ record.similarity = cosine_similarity(
105
+ a: needle,
106
+ b: record.embedding,
107
+ a_norm: needle_norm,
108
+ b_norm: record.norm,
109
+ )
110
+ }
111
+ records.transpose.last&.reverse.to_a
112
+ end
113
+
114
+ def collections
115
+ case @cache
116
+ when MemoryCache
117
+ [ @collection ]
118
+ when RedisCache
119
+ prefix = '%s-' % self.class
120
+ Documents::RedisCache.new(prefix:, url: @redis_url).map { _1[/#{prefix}(.*)-/, 1] }.uniq
121
+ else
122
+ []
123
+ end
124
+ end
125
+
126
+ def tags
127
+ @cache.inject(Ollama::Utils::Tags.new) { |t, (_, record)| t.merge(record.tags) }
128
+ end
129
+
130
+ private
131
+
132
+ def connect_cache(cache_class)
133
+ cache = nil
134
+ if cache_class == RedisCache
135
+ begin
136
+ cache = cache_class.new(prefix:)
137
+ cache.size
138
+ rescue Redis::CannotConnectError
139
+ STDERR.puts(
140
+ "Cannot connect to redis URL #{@redis_url.inspect}, "\
141
+ "falling back to MemoryCache."
142
+ )
143
+ end
144
+ end
145
+ ensure
146
+ cache ||= MemoryCache.new(prefix:)
147
+ return cache
148
+ end
149
+
150
+ def convert_to_vector(input, prompt: nil)
151
+ if prompt
152
+ input = prompt % input
153
+ end
154
+ if input.is_a?(String)
155
+ Numo::NArray[*fetch_embeddings(model:, input:).first]
156
+ else
157
+ super(input)
158
+ end
159
+ end
160
+
161
+ def fetch_embeddings(model:, input:, options: nil)
162
+ @ollama.embed(model:, input:, options:).embeddings
163
+ end
164
+
165
+ def prefix
166
+ '%s-%s-' % [ self.class, @collection ]
167
+ end
168
+
169
+ def key(input)
170
+ Digest::SHA256.hexdigest(input)
171
+ end
172
+ end
@@ -1,4 +1,5 @@
1
1
  require 'infobar'
2
+ require 'tins/unit'
2
3
 
3
4
  class Ollama::Handlers::Progress
4
5
  include Ollama::Handlers::Concern
@@ -13,7 +14,9 @@ class Ollama::Handlers::Progress
13
14
 
14
15
  def call(response)
15
16
  infobar.display.output = @output
16
- status = response.status
17
+ if status = response.status
18
+ infobar.label = status
19
+ end
17
20
  if response.total && response.completed
18
21
  if !@last_status or @last_status != status
19
22
  @last_status and infobar.newline
@@ -24,13 +27,23 @@ class Ollama::Handlers::Progress
24
27
  end
25
28
  infobar.counter.progress(by: response.completed - @current)
26
29
  @current = response.completed
30
+ infobar.update(
31
+ message: message(response.completed, response.total),
32
+ force: true
33
+ )
27
34
  end
28
- if status
29
- infobar.label = status
30
- infobar.update(message: '%l %c/%t in %te, ETA %e @%E', force: true)
31
- elsif error = response.error
35
+ if error = response.error
32
36
  infobar.puts bold { "Error: " } + red { error }
33
37
  end
34
38
  self
35
39
  end
40
+
41
+ private
42
+
43
+ def message(current, total)
44
+ progress = '%s/%s' % [ current, total ].map {
45
+ Tins::Unit.format(_1, format: '%.2f %U')
46
+ }
47
+ '%l ' + progress + ' in %te, ETA %e @%E'
48
+ end
36
49
  end
data/lib/ollama/image.rb CHANGED
@@ -5,26 +5,35 @@ class Ollama::Image
5
5
  @data = data
6
6
  end
7
7
 
8
+ attr_accessor :path
9
+
8
10
  class << self
9
- def for_base64(data)
10
- new(data)
11
+ def for_base64(data, path: nil)
12
+ obj = new(data)
13
+ obj.path = path
14
+ obj
11
15
  end
12
16
 
13
- def for_string(string)
14
- for_base64(Base64.encode64(string))
17
+ def for_string(string, path: nil)
18
+ for_base64(Base64.encode64(string), path:)
15
19
  end
16
20
 
17
- def for_io(io)
18
- for_string(io.read)
21
+ def for_io(io, path: nil)
22
+ path ||= io.path
23
+ for_string(io.read, path:)
19
24
  end
20
25
 
21
26
  def for_filename(path)
22
- File.open(path, 'rb') { |io| for_io(io) }
27
+ File.open(path, 'rb') { |io| for_io(io, path:) }
23
28
  end
24
29
 
25
30
  private :new
26
31
  end
27
32
 
33
+ def ==(other)
34
+ @data == other..data
35
+ end
36
+
28
37
  def to_s
29
38
  @data
30
39
  end
@@ -0,0 +1,30 @@
1
+ require 'amatch'
2
+ require 'search_ui'
3
+
4
+ module Ollama::Utils::Chooser
5
+ include SearchUI
6
+
7
+ module_function
8
+
9
+ def choose(entries)
10
+ entry = Search.new(
11
+ match: -> answer {
12
+ matcher = Amatch::PairDistance.new(answer.downcase)
13
+ matches = entries.map { |n| [ n, -matcher.similar(n.to_s.downcase) ] }.
14
+ select { |_, s| s < 0 }.sort_by(&:last).map(&:first)
15
+ matches.empty? and matches = entries
16
+ matches.first(Tins::Terminal.lines - 1)
17
+ },
18
+ query: -> _answer, matches, selector {
19
+ matches.each_with_index.map { |m, i|
20
+ i == selector ? "#{Term::ANSIColor.blue{?⮕}} #{m.on_blue}" : " #{m}"
21
+ } * ?\n
22
+ },
23
+ found: -> _answer, matches, selector {
24
+ matches[selector]
25
+ },
26
+ output: STDOUT
27
+ ).start
28
+ return entry if entry
29
+ end
30
+ end
@@ -0,0 +1,42 @@
1
+ class Ollama::Utils::ColorizeTexts
2
+ include Math
3
+ include Term::ANSIColor
4
+ include Ollama::Utils::Width
5
+
6
+ def initialize(*texts)
7
+ texts = texts.map(&:to_a)
8
+ @texts = Array(texts.flatten)
9
+ end
10
+
11
+ def to_s
12
+ result = +''
13
+ @texts.each_with_index do |t, i|
14
+ color = colors[(t.hash ^ i.hash) % colors.size]
15
+ wrap(t, percentage: 90).each_line { |l|
16
+ result << on_color(color) { color(text_color(color)) { l } }
17
+ }
18
+ result << "\n##{bold{t.size.to_s}} \n\n"
19
+ end
20
+ result
21
+ end
22
+
23
+ private
24
+
25
+ def text_color(color)
26
+ color = Term::ANSIColor::Attribute[color]
27
+ [
28
+ Attribute.nearest_rgb_color('#000'),
29
+ Attribute.nearest_rgb_color('#fff'),
30
+ ].max_by { |t| t.distance_to(color) }
31
+ end
32
+
33
+ def colors
34
+ @colors ||= (0..255).map { |i|
35
+ [
36
+ 128 + 128 * sin(PI * i / 32.0),
37
+ 128 + 128 * sin(PI * i / 64.0),
38
+ 128 + 128 * sin(PI * i / 128.0),
39
+ ].map { _1.clamp(0, 255).round }
40
+ }
41
+ end
42
+ end