ollama-ruby 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/ollama/client.rb CHANGED
@@ -1,6 +1,5 @@
1
1
  require 'tins/xt/string_camelize'
2
2
  require 'tins/annotate'
3
- require 'excon'
4
3
 
5
4
  class Ollama::Client
6
5
  end
@@ -31,6 +30,8 @@ class Ollama::Client
31
30
 
32
31
  attr_accessor :output
33
32
 
33
+ attr_reader :base_url
34
+
34
35
  def ssl_verify_peer?
35
36
  !!@ssl_verify_peer
36
37
  end
@@ -113,11 +114,15 @@ class Ollama::Client
113
114
 
114
115
  def headers
115
116
  {
116
- 'User-Agent' => '%s/%s' % [ self.class, Ollama::VERSION ],
117
+ 'User-Agent' => self.class.user_agent,
117
118
  'Content-Type' => 'application/json; charset=utf-8',
118
119
  }
119
120
  end
120
121
 
122
+ def self.user_agent
123
+ '%s/%s' % [ self.class, Ollama::VERSION ]
124
+ end
125
+
121
126
  def excon(url)
122
127
  params = {
123
128
  connect_timeout: @connect_timeout,
@@ -0,0 +1,44 @@
1
+ class Ollama::Documents::MemoryCache
2
+ def initialize(prefix:)
3
+ @prefix = prefix
4
+ @data = {}
5
+ end
6
+
7
+ attr_writer :prefix
8
+
9
+ def [](key)
10
+ @data[pre(key)]
11
+ end
12
+
13
+ def []=(key, value)
14
+ @data[pre(key)] = value
15
+ end
16
+
17
+ def key?(key)
18
+ @data.key?(pre(key))
19
+ end
20
+
21
+ def delete(key)
22
+ @data.delete(pre(key))
23
+ end
24
+
25
+ def size
26
+ @data.size
27
+ end
28
+
29
+ def clear
30
+ @data.clear
31
+ self
32
+ end
33
+
34
+ def each(&block)
35
+ @data.select { |key,| key.start_with?(@prefix) }.each(&block)
36
+ end
37
+ include Enumerable
38
+
39
+ private
40
+
41
+ def pre(key)
42
+ [ @prefix, key ].join
43
+ end
44
+ end
@@ -0,0 +1,57 @@
1
+ require 'redis'
2
+
3
+ class Ollama::Documents::RedisCache
4
+ def initialize(prefix:, url: ENV['REDIS_URL'])
5
+ url or raise ArgumentError, 'require redis url'
6
+ @prefix, @url = prefix, url
7
+ end
8
+
9
+ attr_writer :prefix
10
+
11
+ def redis
12
+ @redis ||= Redis.new(url: @url)
13
+ end
14
+
15
+ def [](key)
16
+ JSON(redis.get(pre(key)), object_class: Ollama::Documents::Record)
17
+ end
18
+
19
+ def []=(key, value)
20
+ redis.set(pre(key), JSON(value))
21
+ end
22
+
23
+ def key?(key)
24
+ !!redis.exists?(pre(key))
25
+ end
26
+
27
+ def delete(key)
28
+ redis.del(pre(key)) == 1
29
+ end
30
+
31
+ def size
32
+ s = 0
33
+ redis.scan_each(match: "#@prefix*") { |key| s += 1 }
34
+ s
35
+ end
36
+
37
+ def clear
38
+ redis.scan_each(match: "#@prefix*") { |key| redis.del(key) }
39
+ self
40
+ end
41
+
42
+ def each(&block)
43
+ redis.scan_each(match: "#@prefix*") { |key| block.(key, self[unpre(key)]) }
44
+ self
45
+ end
46
+ include Enumerable
47
+
48
+ private
49
+
50
+ def pre(key)
51
+ [ @prefix, key ].join
52
+ end
53
+
54
+ def unpre(key)
55
+ key.sub(/\A#@prefix/, '')
56
+ end
57
+ end
@@ -0,0 +1,70 @@
1
+ module Ollama::Documents::Splitters
2
+ class Character
3
+ DEFAULT_SEPARATOR = /(?:\r?\n){2,}/
4
+
5
+ def initialize(separator: DEFAULT_SEPARATOR, include_separator: false, chunk_size: 4096)
6
+ @separator, @include_separator, @chunk_size = separator, include_separator, chunk_size
7
+ if include_separator
8
+ @separator = Regexp.new("(#@separator)")
9
+ end
10
+ end
11
+
12
+ def split(text)
13
+ texts = []
14
+ text.split(@separator) do |t|
15
+ if @include_separator && t =~ @separator
16
+ texts.last&.concat t
17
+ else
18
+ texts.push(t)
19
+ end
20
+ end
21
+ result = []
22
+ current_text = +''
23
+ texts.each do |t|
24
+ if current_text.size + t.size < @chunk_size
25
+ current_text += t
26
+ else
27
+ current_text.empty? or result << current_text
28
+ current_text = t
29
+ end
30
+ end
31
+ current_text.empty? or result << current_text
32
+ result
33
+ end
34
+ end
35
+
36
+ class RecursiveCharacter
37
+ DEFAULT_SEPARATORS = [
38
+ /(?:\r?\n){2,}/,
39
+ /\r?\n/,
40
+ /\b/,
41
+ //,
42
+ ].freeze
43
+
44
+ def initialize(separators: DEFAULT_SEPARATORS, include_separator: false, chunk_size: 4096)
45
+ separators.empty? and
46
+ raise ArgumentError, "non-empty array of separators required"
47
+ @separators, @include_separator, @chunk_size =
48
+ separators, include_separator, chunk_size
49
+ end
50
+
51
+ def split(text, separators: @separators)
52
+ separators.empty? and return [ text ]
53
+ separators = separators.dup
54
+ separator = separators.shift
55
+ texts = Character.new(
56
+ separator:,
57
+ include_separator: @include_separator,
58
+ chunk_size: @chunk_size
59
+ ).split(text)
60
+ texts.count == 0 and return [ text ]
61
+ texts.inject([]) do |r, t|
62
+ if t.size > @chunk_size
63
+ r.concat(split(t, separators:))
64
+ else
65
+ r.concat([ t ])
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,90 @@
1
+ module Ollama::Documents::Splitters
2
+ class Semantic
3
+ include Ollama::Utils::Math
4
+
5
+ DEFAULT_SEPARATOR = /[.!?]\s*(?:\b|\z)/
6
+
7
+ def initialize(ollama:, model:, model_options: nil, separator: DEFAULT_SEPARATOR, chunk_size: 4096)
8
+ @ollama, @model, @model_options, @separator, @chunk_size =
9
+ ollama, model, model_options, separator, chunk_size
10
+ end
11
+
12
+ def split(text, batch_size: 100, breakpoint: :percentile, **opts)
13
+ sentences = Ollama::Documents::Splitters::Character.new(
14
+ separator: @separator,
15
+ include_separator: true,
16
+ chunk_size: 1,
17
+ ).split(text)
18
+ embeddings = sentences.with_infobar(label: 'Split').each_slice(batch_size).inject([]) do |e, batch|
19
+ e.concat sentence_embeddings(batch)
20
+ infobar.progress by: batch.size
21
+ e
22
+ end
23
+ infobar.newline
24
+ distances = embeddings.each_cons(2).map do |a, b|
25
+ 1.0 - cosine_similarity(a:, b:)
26
+ end
27
+ max_distance = calculate_breakpoint_threshold(breakpoint, distances, **opts)
28
+ gaps = distances.each_with_index.select do |d, i|
29
+ d > max_distance
30
+ end.transpose.last
31
+ gaps or return sentences
32
+ if gaps.last < distances.size
33
+ gaps << distances.size
34
+ end
35
+ if gaps.last < sentences.size - 1
36
+ gaps << sentences.size - 1
37
+ end
38
+ result = []
39
+ sg = 0
40
+ current_text = +''
41
+ gaps.each do |g|
42
+ sg.upto(g) do |i|
43
+ sentence = sentences[i]
44
+ if current_text.size + sentence.size < @chunk_size
45
+ current_text += sentence
46
+ else
47
+ current_text.empty? or result << current_text
48
+ current_text = sentence
49
+ end
50
+ end
51
+ unless current_text.empty?
52
+ result << current_text
53
+ current_text = +''
54
+ end
55
+ sg = g.succ
56
+ end
57
+ current_text.empty? or result << current_text
58
+ result
59
+ end
60
+
61
+ private
62
+
63
+ def calculate_breakpoint_threshold(breakpoint_method, distances, **opts)
64
+ sequence = MoreMath::Sequence.new(distances)
65
+ case breakpoint_method
66
+ when :percentile
67
+ percentile = opts.fetch(:percentile, 95)
68
+ sequence.percentile(percentile)
69
+ when :standard_deviation
70
+ percentage = opts.fetch(:percentage, 100)
71
+ (
72
+ sequence.mean + sequence.standard_deviation * (percentage / 100.0)
73
+ ).clamp(0, sequence.max)
74
+ when :interquartile
75
+ percentage = opts.fetch(:percentage, 100)
76
+ iqr = sequence.interquartile_range
77
+ max = sequence.max
78
+ (sequence.mean + iqr * (percentage / 100.0)).clamp(0, max)
79
+ else
80
+ raise ArgumentError, "invalid breakpoint method #{breakpoint_method}"
81
+ end
82
+ end
83
+
84
+ def sentence_embeddings(input)
85
+ @ollama.embed(model: @model, input:, options: @model_options).embeddings.map! {
86
+ Numo::NArray[*_1]
87
+ }
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,172 @@
1
+ require 'numo/narray'
2
+ require 'digest'
3
+
4
+ class Ollama::Documents
5
+ end
6
+ require 'ollama/documents/memory_cache'
7
+ require 'ollama/documents/redis_cache'
8
+ module Ollama::Documents::Splitters
9
+ end
10
+ require 'ollama/documents/splitters/character'
11
+ require 'ollama/documents/splitters/semantic'
12
+
13
+ class Ollama::Documents
14
+ include Ollama::Utils::Math
15
+
16
+ class Record < JSON::GenericObject
17
+ def to_s
18
+ my_tags = Ollama::Utils::Tags.new(tags)
19
+ my_tags.empty? or my_tags = " #{my_tags}"
20
+ "#<#{self.class} #{text.inspect}#{my_tags} #{similarity || 'n/a'}>"
21
+ end
22
+
23
+ def ==(other)
24
+ text == other.text
25
+ end
26
+
27
+ alias inspect to_s
28
+ end
29
+
30
+ def initialize(ollama:, model:, model_options: nil, collection: :default, cache: MemoryCache, redis_url: nil)
31
+ @ollama, @model, @model_options, @collection = ollama, model, model_options, collection
32
+ @cache, @redis_url = connect_cache(cache), redis_url
33
+ end
34
+
35
+ attr_reader :ollama, :model, :collection
36
+
37
+ def collection=(new_collection)
38
+ @collection = new_collection
39
+ @cache.prefix = prefix
40
+ end
41
+
42
+ def add(inputs, batch_size: 10, source: nil, tags: [])
43
+ inputs = Array(inputs)
44
+ tags = Ollama::Utils::Tags.new(tags)
45
+ source and tags.add File.basename(source)
46
+ inputs.map! { |i|
47
+ text = i.respond_to?(:read) ? i.read : i.to_s
48
+ text
49
+ }
50
+ inputs.reject! { |i| exist?(i) }
51
+ inputs.empty? and return self
52
+ batches = inputs.each_slice(batch_size).
53
+ with_infobar(
54
+ label: "Add #{tags}",
55
+ total: inputs.size
56
+ )
57
+ batches.each do |batch|
58
+ embeddings = fetch_embeddings(model:, options: @model_options, input: batch)
59
+ batch.zip(embeddings) do |text, embedding|
60
+ norm = norm(embedding)
61
+ self[text] = Record[text:, embedding:, norm:, source:, tags: tags.to_a]
62
+ end
63
+ infobar.progress by: batch.size
64
+ end
65
+ infobar.newline
66
+ self
67
+ end
68
+ alias << add
69
+
70
+ def [](text)
71
+ @cache[key(text)]
72
+ end
73
+
74
+ def []=(text, record)
75
+ @cache[key(text)] = record
76
+ end
77
+
78
+ def exist?(text)
79
+ @cache.key?(key(text))
80
+ end
81
+
82
+ def delete(text)
83
+ @cache.delete(key(text))
84
+ end
85
+
86
+ def size
87
+ @cache.size
88
+ end
89
+
90
+ def clear
91
+ @cache.clear
92
+ end
93
+
94
+ def find(string, tags: nil, prompt: nil)
95
+ needle = convert_to_vector(string, prompt:)
96
+ needle_norm = norm(needle)
97
+ records = @cache
98
+ if tags
99
+ tags = Ollama::Utils::Tags.new(tags)
100
+ records = records.select { |_key, record| (tags & record.tags).size >= 1 }
101
+ end
102
+ records = records.sort_by { |key, record|
103
+ record.key = key
104
+ record.similarity = cosine_similarity(
105
+ a: needle,
106
+ b: record.embedding,
107
+ a_norm: needle_norm,
108
+ b_norm: record.norm,
109
+ )
110
+ }
111
+ records.transpose.last&.reverse.to_a
112
+ end
113
+
114
+ def collections
115
+ case @cache
116
+ when MemoryCache
117
+ [ @collection ]
118
+ when RedisCache
119
+ prefix = '%s-' % self.class
120
+ Documents::RedisCache.new(prefix:, url: @redis_url).map { _1[/#{prefix}(.*)-/, 1] }.uniq
121
+ else
122
+ []
123
+ end
124
+ end
125
+
126
+ def tags
127
+ @cache.inject(Ollama::Utils::Tags.new) { |t, (_, record)| t.merge(record.tags) }
128
+ end
129
+
130
+ private
131
+
132
+ def connect_cache(cache_class)
133
+ cache = nil
134
+ if cache_class == RedisCache
135
+ begin
136
+ cache = cache_class.new(prefix:)
137
+ cache.size
138
+ rescue Redis::CannotConnectError
139
+ STDERR.puts(
140
+ "Cannot connect to redis URL #{@redis_url.inspect}, "\
141
+ "falling back to MemoryCache."
142
+ )
143
+ end
144
+ end
145
+ ensure
146
+ cache ||= MemoryCache.new(prefix:)
147
+ return cache
148
+ end
149
+
150
+ def convert_to_vector(input, prompt: nil)
151
+ if prompt
152
+ input = prompt % input
153
+ end
154
+ if input.is_a?(String)
155
+ Numo::NArray[*fetch_embeddings(model:, input:).first]
156
+ else
157
+ super(input)
158
+ end
159
+ end
160
+
161
+ def fetch_embeddings(model:, input:, options: nil)
162
+ @ollama.embed(model:, input:, options:).embeddings
163
+ end
164
+
165
+ def prefix
166
+ '%s-%s-' % [ self.class, @collection ]
167
+ end
168
+
169
+ def key(input)
170
+ Digest::SHA256.hexdigest(input)
171
+ end
172
+ end
@@ -1,4 +1,5 @@
1
1
  require 'infobar'
2
+ require 'tins/unit'
2
3
 
3
4
  class Ollama::Handlers::Progress
4
5
  include Ollama::Handlers::Concern
@@ -13,7 +14,9 @@ class Ollama::Handlers::Progress
13
14
 
14
15
  def call(response)
15
16
  infobar.display.output = @output
16
- status = response.status
17
+ if status = response.status
18
+ infobar.label = status
19
+ end
17
20
  if response.total && response.completed
18
21
  if !@last_status or @last_status != status
19
22
  @last_status and infobar.newline
@@ -24,13 +27,23 @@ class Ollama::Handlers::Progress
24
27
  end
25
28
  infobar.counter.progress(by: response.completed - @current)
26
29
  @current = response.completed
30
+ infobar.update(
31
+ message: message(response.completed, response.total),
32
+ force: true
33
+ )
27
34
  end
28
- if status
29
- infobar.label = status
30
- infobar.update(message: '%l %c/%t in %te, ETA %e @%E', force: true)
31
- elsif error = response.error
35
+ if error = response.error
32
36
  infobar.puts bold { "Error: " } + red { error }
33
37
  end
34
38
  self
35
39
  end
40
+
41
+ private
42
+
43
+ def message(current, total)
44
+ progress = '%s/%s' % [ current, total ].map {
45
+ Tins::Unit.format(_1, format: '%.2f %U')
46
+ }
47
+ '%l ' + progress + ' in %te, ETA %e @%E'
48
+ end
36
49
  end
data/lib/ollama/image.rb CHANGED
@@ -5,26 +5,35 @@ class Ollama::Image
5
5
  @data = data
6
6
  end
7
7
 
8
+ attr_accessor :path
9
+
8
10
  class << self
9
- def for_base64(data)
10
- new(data)
11
+ def for_base64(data, path: nil)
12
+ obj = new(data)
13
+ obj.path = path
14
+ obj
11
15
  end
12
16
 
13
- def for_string(string)
14
- for_base64(Base64.encode64(string))
17
+ def for_string(string, path: nil)
18
+ for_base64(Base64.encode64(string), path:)
15
19
  end
16
20
 
17
- def for_io(io)
18
- for_string(io.read)
21
+ def for_io(io, path: nil)
22
+ path ||= io.path
23
+ for_string(io.read, path:)
19
24
  end
20
25
 
21
26
  def for_filename(path)
22
- File.open(path, 'rb') { |io| for_io(io) }
27
+ File.open(path, 'rb') { |io| for_io(io, path:) }
23
28
  end
24
29
 
25
30
  private :new
26
31
  end
27
32
 
33
+ def ==(other)
34
+ @data == other..data
35
+ end
36
+
28
37
  def to_s
29
38
  @data
30
39
  end
@@ -0,0 +1,30 @@
1
+ require 'amatch'
2
+ require 'search_ui'
3
+
4
+ module Ollama::Utils::Chooser
5
+ include SearchUI
6
+
7
+ module_function
8
+
9
+ def choose(entries)
10
+ entry = Search.new(
11
+ match: -> answer {
12
+ matcher = Amatch::PairDistance.new(answer.downcase)
13
+ matches = entries.map { |n| [ n, -matcher.similar(n.to_s.downcase) ] }.
14
+ select { |_, s| s < 0 }.sort_by(&:last).map(&:first)
15
+ matches.empty? and matches = entries
16
+ matches.first(Tins::Terminal.lines - 1)
17
+ },
18
+ query: -> _answer, matches, selector {
19
+ matches.each_with_index.map { |m, i|
20
+ i == selector ? "#{Term::ANSIColor.blue{?⮕}} #{m.on_blue}" : " #{m}"
21
+ } * ?\n
22
+ },
23
+ found: -> _answer, matches, selector {
24
+ matches[selector]
25
+ },
26
+ output: STDOUT
27
+ ).start
28
+ return entry if entry
29
+ end
30
+ end
@@ -0,0 +1,42 @@
1
+ class Ollama::Utils::ColorizeTexts
2
+ include Math
3
+ include Term::ANSIColor
4
+ include Ollama::Utils::Width
5
+
6
+ def initialize(*texts)
7
+ texts = texts.map(&:to_a)
8
+ @texts = Array(texts.flatten)
9
+ end
10
+
11
+ def to_s
12
+ result = +''
13
+ @texts.each_with_index do |t, i|
14
+ color = colors[(t.hash ^ i.hash) % colors.size]
15
+ wrap(t, percentage: 90).each_line { |l|
16
+ result << on_color(color) { color(text_color(color)) { l } }
17
+ }
18
+ result << "\n##{bold{t.size.to_s}} \n\n"
19
+ end
20
+ result
21
+ end
22
+
23
+ private
24
+
25
+ def text_color(color)
26
+ color = Term::ANSIColor::Attribute[color]
27
+ [
28
+ Attribute.nearest_rgb_color('#000'),
29
+ Attribute.nearest_rgb_color('#fff'),
30
+ ].max_by { |t| t.distance_to(color) }
31
+ end
32
+
33
+ def colors
34
+ @colors ||= (0..255).map { |i|
35
+ [
36
+ 128 + 128 * sin(PI * i / 32.0),
37
+ 128 + 128 * sin(PI * i / 64.0),
38
+ 128 + 128 * sin(PI * i / 128.0),
39
+ ].map { _1.clamp(0, 255).round }
40
+ }
41
+ end
42
+ end