ollama-ruby 0.12.1 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +1 -0
  3. data/CHANGES.md +39 -0
  4. data/README.md +70 -144
  5. data/Rakefile +5 -17
  6. data/bin/ollama_cli +37 -6
  7. data/lib/ollama/client/command.rb +2 -2
  8. data/lib/ollama/dto.rb +4 -0
  9. data/lib/ollama/version.rb +1 -1
  10. data/lib/ollama.rb +0 -11
  11. data/ollama-ruby.gemspec +11 -22
  12. data/spec/ollama/message_spec.rb +9 -0
  13. metadata +25 -255
  14. data/bin/ollama_chat +0 -1248
  15. data/config/redis.conf +0 -5
  16. data/docker-compose.yml +0 -10
  17. data/lib/ollama/documents/cache/common.rb +0 -36
  18. data/lib/ollama/documents/cache/memory_cache.rb +0 -44
  19. data/lib/ollama/documents/cache/records.rb +0 -87
  20. data/lib/ollama/documents/cache/redis_backed_memory_cache.rb +0 -39
  21. data/lib/ollama/documents/cache/redis_cache.rb +0 -68
  22. data/lib/ollama/documents/cache/sqlite_cache.rb +0 -215
  23. data/lib/ollama/documents/splitters/character.rb +0 -72
  24. data/lib/ollama/documents/splitters/semantic.rb +0 -91
  25. data/lib/ollama/documents.rb +0 -184
  26. data/lib/ollama/utils/cache_fetcher.rb +0 -38
  27. data/lib/ollama/utils/chooser.rb +0 -52
  28. data/lib/ollama/utils/colorize_texts.rb +0 -65
  29. data/lib/ollama/utils/fetcher.rb +0 -175
  30. data/lib/ollama/utils/file_argument.rb +0 -34
  31. data/lib/ollama/utils/math.rb +0 -48
  32. data/lib/ollama/utils/tags.rb +0 -67
  33. data/spec/assets/embeddings.json +0 -1
  34. data/spec/assets/prompt.txt +0 -1
  35. data/spec/ollama/documents/cache/memory_cache_spec.rb +0 -97
  36. data/spec/ollama/documents/cache/redis_backed_memory_cache_spec.rb +0 -118
  37. data/spec/ollama/documents/cache/redis_cache_spec.rb +0 -121
  38. data/spec/ollama/documents/cache/sqlite_cache_spec.rb +0 -141
  39. data/spec/ollama/documents/splitters/character_spec.rb +0 -110
  40. data/spec/ollama/documents/splitters/semantic_spec.rb +0 -56
  41. data/spec/ollama/documents_spec.rb +0 -162
  42. data/spec/ollama/utils/cache_fetcher_spec.rb +0 -43
  43. data/spec/ollama/utils/colorize_texts_spec.rb +0 -13
  44. data/spec/ollama/utils/fetcher_spec.rb +0 -137
  45. data/spec/ollama/utils/file_argument_spec.rb +0 -17
  46. data/spec/ollama/utils/tags_spec.rb +0 -53
data/bin/ollama_chat DELETED
@@ -1,1248 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'ollama'
4
- include Ollama
5
- include Tins::GO
6
- require 'term/ansicolor'
7
- include Term::ANSIColor
8
- require 'reline'
9
- require 'reverse_markdown'
10
- require 'complex_config'
11
- require 'fileutils'
12
- require 'uri'
13
- require 'nokogiri'
14
- require 'rss'
15
- require 'pdf/reader'
16
- require 'csv'
17
- require 'xdg'
18
-
19
- class OllamaChatConfig
20
- include ComplexConfig
21
- include FileUtils
22
-
23
- DEFAULT_CONFIG = <<~EOT
24
- ---
25
- url: <%= ENV['OLLAMA_URL'] || 'http://%s' % ENV.fetch('OLLAMA_HOST') %>
26
- proxy: null # http://localhost:8080
27
- model:
28
- name: <%= ENV.fetch('OLLAMA_CHAT_MODEL', 'llama3.1') %>
29
- options:
30
- num_ctx: 8192
31
- location:
32
- enabled: false
33
- name: Berlin
34
- decimal_degrees: [ 52.514127, 13.475211 ]
35
- units: SI (International System of Units) # or USCS (United States Customary System)
36
- prompts:
37
- embed: "This source was now embedded: %{source}"
38
- summarize: |
39
- Generate an abstract summary of the content in this document using
40
- %{words} words:
41
-
42
- %{source_content}
43
- web: |
44
- Answer the the query %{query} using these sources and summaries:
45
-
46
- %{results}
47
- system_prompts:
48
- default: <%= ENV.fetch('OLLAMA_CHAT_SYSTEM', 'null') %>
49
- voice:
50
- enabled: false
51
- default: Samantha
52
- list: <%= `say -v ? 2>/dev/null`.lines.map { _1[/^(.+?)\s+[a-z]{2}_[a-zA-Z0-9]{2,}/, 1] }.uniq.sort.to_s.force_encoding('ASCII-8BIT') %>
53
- markdown: true
54
- stream: true
55
- document_policy: importing
56
- embedding:
57
- enabled: true
58
- model:
59
- name: mxbai-embed-large
60
- embedding_length: 1024
61
- options: {}
62
- # Retrieval prompt template:
63
- prompt: 'Represent this sentence for searching relevant passages: %s'
64
- batch_size: 10
65
- database_filename: null # ':memory:'
66
- collection: <%= ENV['OLLAMA_CHAT_COLLECTION'] %>
67
- found_texts_size: 4096
68
- found_texts_count: 10
69
- splitter:
70
- name: RecursiveCharacter
71
- chunk_size: 1024
72
- cache: Ollama::Documents::RedisBackedMemoryCache
73
- redis:
74
- documents:
75
- url: <%= ENV.fetch('REDIS_URL', 'null') %>
76
- expiring:
77
- url: <%= ENV.fetch('REDIS_EXPIRING_URL', 'null') %>
78
- ex: 86400
79
- debug: <%= ENV['OLLAMA_CHAT_DEBUG'].to_i == 1 ? true : false %>
80
- ssl_no_verify: []
81
- copy: pbcopy
82
- EOT
83
-
84
- def initialize(filename = nil)
85
- @filename = filename || default_path
86
- unless File.directory?(cache_dir_path)
87
- mkdir_p cache_dir_path.to_s
88
- end
89
- @config = Provider.config(@filename, '⚙️')
90
- retried = false
91
- rescue ConfigurationFileMissing
92
- if @filename == default_path && !retried
93
- retried = true
94
- mkdir_p config_dir_path.to_s
95
- File.secure_write(default_path, DEFAULT_CONFIG)
96
- retry
97
- else
98
- raise
99
- end
100
- end
101
-
102
- attr_reader :filename
103
-
104
- attr_reader :config
105
-
106
- def default_path
107
- config_dir_path + 'config.yml'
108
- end
109
-
110
- def config_dir_path
111
- XDG.new.config_home + 'ollama_chat'
112
- end
113
-
114
- def cache_dir_path
115
- XDG.new.cache_home + 'ollama_chat'
116
- end
117
-
118
- def database_path
119
- cache_dir_path + 'documents.db'
120
- end
121
- end
122
-
123
- class FollowChat
124
- include Handlers::Concern
125
- include Term::ANSIColor
126
-
127
- def initialize(messages:, markdown: false, voice: nil, output: $stdout)
128
- super(output:)
129
- @output.sync = true
130
- @markdown = markdown
131
- @say = voice ? Handlers::Say.new(voice:) : NOP
132
- @messages = messages
133
- @user = nil
134
- end
135
-
136
- def call(response)
137
- $config.debug and jj response
138
- if response&.message&.role == 'assistant'
139
- if @messages.last.role != 'assistant'
140
- @messages << Message.new(role: 'assistant', content: '')
141
- @user = message_type(@messages.last.images) + " " +
142
- bold { color(111) { 'assistant:' } }
143
- puts @user unless @markdown
144
- end
145
- content = response.message&.content
146
- @messages.last.content << content
147
- if @markdown and content = @messages.last.content.full?
148
- markdown_content = Kramdown::ANSI.parse(content)
149
- @output.print clear_screen, move_home, @user, ?\n, markdown_content
150
- else
151
- @output.print content
152
- end
153
- @say.call(response)
154
- end
155
- if response.done
156
- @output.puts "", eval_stats(response)
157
- end
158
- self
159
- end
160
-
161
- def eval_stats(response)
162
- eval_duration = response.eval_duration / 1e9
163
- prompt_eval_duration = response.prompt_eval_duration / 1e9
164
- stats_text = {
165
- eval_duration: Tins::Duration.new(eval_duration),
166
- eval_count: response.eval_count.to_i,
167
- eval_rate: bold { "%.2f c/s" % (response.eval_count.to_i / eval_duration) } + color(111),
168
- prompt_eval_duration: Tins::Duration.new(prompt_eval_duration),
169
- prompt_eval_count: response.prompt_eval_count.to_i,
170
- prompt_eval_rate: bold { "%.2f c/s" % (response.prompt_eval_count.to_i / prompt_eval_duration) } + color(111),
171
- total_duration: Tins::Duration.new(response.total_duration / 1e9),
172
- load_duration: Tins::Duration.new(response.load_duration / 1e9),
173
- }.map { _1 * '=' } * ' '
174
- '📊 ' + color(111) {
175
- Kramdown::ANSI::Width.wrap(stats_text, percentage: 90).gsub(/(?<!\A)^/, ' ')
176
- }
177
- end
178
- end
179
-
180
- module Switches
181
- module CheckSwitch
182
- extend Tins::Concern
183
-
184
- included do
185
- alias_method :on?, :value
186
- end
187
-
188
- def off?
189
- !on?
190
- end
191
-
192
- def show
193
- puts @msg[value]
194
- end
195
- end
196
-
197
- class Switch
198
- def initialize(name, msg:, config: $config)
199
- @value = [ false, true ].include?(config) ? config : !!config.send("#{name}?")
200
- @msg = msg
201
- end
202
-
203
- attr_reader :value
204
-
205
- def set(value, show: false)
206
- @value = !!value
207
- show && self.show
208
- end
209
-
210
- def toggle(show: true)
211
- @value = !@value
212
- show && self.show
213
- end
214
-
215
- include CheckSwitch
216
- end
217
-
218
- class CombinedSwitch
219
- def initialize(value:, msg:)
220
- @value = value
221
- @msg = msg
222
- end
223
-
224
- def value
225
- @value.()
226
- end
227
-
228
- include CheckSwitch
229
- end
230
-
231
- def setup_switches
232
- $markdown = Switch.new(
233
- :markdown,
234
- msg: {
235
- true => "Using #{italic{'ANSI'}} markdown to output content.",
236
- false => "Using plaintext for outputting content.",
237
- }
238
- )
239
-
240
- $stream = Switch.new(
241
- :stream,
242
- msg: {
243
- true => "Streaming enabled.",
244
- false => "Streaming disabled.",
245
- }
246
- )
247
-
248
- $voice = Switch.new(
249
- :stream,
250
- msg: {
251
- true => "Voice output enabled.",
252
- false => "Voice output disabled.",
253
- },
254
- config: $config.voice
255
- )
256
-
257
- $embedding_enabled = Switch.new(
258
- :embedding_enabled,
259
- msg: {
260
- true => "Embedding enabled.",
261
- false => "Embedding disabled.",
262
- }
263
- )
264
-
265
- $embedding_paused = Switch.new(
266
- :embedding_paused,
267
- msg: {
268
- true => "Embedding paused.",
269
- false => "Embedding resumed.",
270
- }
271
- )
272
-
273
- $embedding = CombinedSwitch.new(
274
- value: -> { $embedding_enabled.on? && $embedding_paused.off? },
275
- msg: {
276
- true => "Embedding is currently performed.",
277
- false => "Embedding is currently not performed.",
278
- }
279
- )
280
-
281
- $location = Switch.new(
282
- :location,
283
- msg: {
284
- true => "Location and localtime enabled.",
285
- false => "Location and localtime disabled.",
286
- },
287
- config: $config.location.enabled
288
- )
289
- end
290
- end
291
- include Switches
292
-
293
- def pull_model_unless_present(model, options, retried = false)
294
- ollama.show(name: model) { |response|
295
- puts "Model #{bold{model}} with architecture "\
296
- "#{response.model_info['general.architecture']} found."
297
- if system = response.system
298
- puts "Configured model system prompt is:\n#{italic { system }}"
299
- return system
300
- else
301
- return
302
- end
303
- }
304
- rescue Errors::NotFoundError
305
- puts "Model #{bold{model}} not found locally, attempting to pull it from remote now…"
306
- ollama.pull(name: model)
307
- if retried
308
- exit 1
309
- else
310
- retried = true
311
- retry
312
- end
313
- rescue Errors::Error => e
314
- warn "Caught #{e.class} while pulling model: #{e} => Exiting."
315
- exit 1
316
- end
317
-
318
- def search_web(query, n = nil)
319
- if l = at_location
320
- query += " #{at_location}"
321
- end
322
- n = n.to_i
323
- n < 1 and n = 1
324
- query = URI.encode_uri_component(query)
325
- url = "https://www.duckduckgo.com/html/?q=#{query}"
326
- Utils::Fetcher.get(url, debug: $config.debug) do |tmp|
327
- result = []
328
- doc = Nokogiri::HTML(tmp)
329
- doc.css('.results_links').each do |link|
330
- if n > 0
331
- url = link.css('.result__a').first&.[]('href')
332
- url.sub!(%r(\A(//duckduckgo\.com)?/l/\?uddg=), '')
333
- url.sub!(%r(&rut=.*), '')
334
- url = URI.decode_uri_component(url)
335
- url = URI.parse(url)
336
- url.host =~ /duckduckgo\.com/ and next
337
- $links.add(url.to_s)
338
- result << url
339
- n -= 1
340
- else
341
- break
342
- end
343
- end
344
- result
345
- end
346
- end
347
-
348
- def load_conversation(filename)
349
- unless File.exist?(filename)
350
- puts "File #{filename} doesn't exist. Choose another filename."
351
- return
352
- end
353
- File.open(filename, 'r') do |output|
354
- return JSON(output.read).map { Message.from_hash(_1) }
355
- end
356
- end
357
-
358
- def save_conversation(filename, messages)
359
- if File.exist?(filename)
360
- puts "File #{filename} already exists. Choose another filename."
361
- return
362
- end
363
- File.open(filename, 'w') do |output|
364
- output.puts JSON(messages)
365
- end
366
- end
367
-
368
- def message_type(images)
369
- images.present? ? ?📸 : ?📨
370
- end
371
-
372
- def list_conversation(messages, last = nil)
373
- last = (last || messages.size).clamp(0, messages.size)
374
- messages[-last..-1].to_a.each do |m|
375
- role_color = case m.role
376
- when 'user' then 172
377
- when 'assistant' then 111
378
- when 'system' then 213
379
- else 210
380
- end
381
- content = m.content.full? { $markdown.on? ? Kramdown::ANSI.parse(_1) : _1 }
382
- message_text = message_type(m.images) + " "
383
- message_text += bold { color(role_color) { m.role } }
384
- message_text += ":\n#{content}"
385
- m.images.full? { |images|
386
- message_text += "\nImages: " + italic { images.map(&:path) * ', ' }
387
- }
388
- puts message_text
389
- end
390
- end
391
-
392
- module SourceParsing
393
- def parse_source(source_io)
394
- case source_io&.content_type
395
- when 'text/html'
396
- reverse_markdown(source_io.read)
397
- when 'text/xml'
398
- if source_io.readline =~ %r(^\s*<rss\s)
399
- source_io.rewind
400
- return parse_rss(source_io)
401
- end
402
- source_io.rewind
403
- source_io.read
404
- when 'text/csv'
405
- parse_csv(source_io)
406
- when 'application/rss+xml'
407
- parse_rss(source_io)
408
- when 'application/atom+xml'
409
- parse_atom(source_io)
410
- when 'application/postscript'
411
- ps_read(source_io)
412
- when 'application/pdf'
413
- pdf_read(source_io)
414
- when %r(\Aapplication/(json|ld\+json|x-ruby|x-perl|x-gawk|x-python|x-javascript|x-c?sh|x-dosexec|x-shellscript|x-tex|x-latex|x-lyx|x-bibtex)), %r(\Atext/), nil
415
- source_io.read
416
- else
417
- STDERR.puts "Cannot embed #{source_io&.content_type} document."
418
- return
419
- end
420
- end
421
-
422
- def parse_csv(source_io)
423
- result = +''
424
- CSV.table(File.new(source_io), col_sep: ?,).each do |row|
425
- next if row.fields.select(&:present?).size == 0
426
- result << row.map { |pair|
427
- pair.compact.map { _1.to_s.strip } * ': ' if pair.last.present?
428
- }.select(&:present?).map { _1.prepend(' ') } * ?\n
429
- result << "\n\n"
430
- end
431
- result
432
- end
433
-
434
- def parse_rss(source_io)
435
- feed = RSS::Parser.parse(source_io, false, false)
436
- title = <<~EOT
437
- # #{feed&.channel&.title}
438
-
439
- EOT
440
- feed.items.inject(title) do |text, item|
441
- text << <<~EOT
442
- ## [#{item&.title}](#{item&.link})
443
-
444
- updated on #{item&.pubDate}
445
-
446
- #{reverse_markdown(item&.description)}
447
-
448
- EOT
449
- end
450
- end
451
-
452
- def parse_atom(source_io)
453
- feed = RSS::Parser.parse(source_io, false, false)
454
- title = <<~EOT
455
- # #{feed.title.content}
456
-
457
- EOT
458
- feed.items.inject(title) do |text, item|
459
- text << <<~EOT
460
- ## [#{item&.title&.content}](#{item&.link&.href})
461
-
462
- updated on #{item&.updated&.content}
463
-
464
- #{reverse_markdown(item&.content&.content)}
465
-
466
- EOT
467
- end
468
- end
469
-
470
- def pdf_read(io)
471
- reader = PDF::Reader.new(io)
472
- reader.pages.inject(+'') { |result, page| result << page.text }
473
- end
474
-
475
- def ps_read(io)
476
- gs = `which gs`.chomp
477
- if gs.present?
478
- Tempfile.create do |tmp|
479
- IO.popen("#{gs} -q -sDEVICE=pdfwrite -sOutputFile=#{tmp.path} -", 'wb') do |gs_io|
480
- until io.eof?
481
- buffer = io.read(1 << 17)
482
- IO.select(nil, [ gs_io ], nil)
483
- gs_io.write buffer
484
- end
485
- gs_io.close
486
- File.open(tmp.path, 'rb') do |pdf|
487
- pdf_read(pdf)
488
- end
489
- end
490
- end
491
- else
492
- STDERR.puts "Cannot convert #{io&.content_type} whith ghostscript, gs not in path."
493
- end
494
- end
495
-
496
- def reverse_markdown(html)
497
- ReverseMarkdown.convert(
498
- html,
499
- unknown_tags: :bypass,
500
- github_flavored: true,
501
- tag_border: ''
502
- )
503
- end
504
- end
505
- include SourceParsing
506
-
507
- def http_options(url)
508
- options = {}
509
- if ssl_no_verify = $config.ssl_no_verify?
510
- hostname = URI.parse(url).hostname
511
- options |= { ssl_verify_peer: !ssl_no_verify.include?(hostname) }
512
- end
513
- if proxy = $config.proxy?
514
- options |= { proxy: }
515
- end
516
- options
517
- end
518
-
519
- def fetch_source(source, &block)
520
- case source
521
- when %r(\A!(.*))
522
- command = $1
523
- Utils::Fetcher.execute(command) do |tmp|
524
- block.(tmp)
525
- end
526
- when %r(\Ahttps?://\S+)
527
- $links.add(source.to_s)
528
- Utils::Fetcher.get(
529
- source,
530
- cache: $cache,
531
- debug: $config.debug,
532
- http_options: http_options(Utils::Fetcher.normalize_url(source))
533
- ) do |tmp|
534
- block.(tmp)
535
- end
536
- when %r(\Afile://(/\S*)|\A((?:\.\.|[~.]?)/\S*))
537
- filename = $~.captures.compact.first
538
- filename = File.expand_path(filename)
539
- Utils::Fetcher.read(filename) do |tmp|
540
- block.(tmp)
541
- end
542
- else
543
- raise "invalid source"
544
- end
545
- rescue => e
546
- STDERR.puts "Cannot fetch source #{source.to_s.inspect}: #{e.class} #{e}\n#{e.backtrace * ?\n}"
547
- end
548
-
549
- def add_image(images, source_io, source)
550
- STDERR.puts "Adding #{source_io&.content_type} image #{source.to_s.inspect}."
551
- image = Image.for_io(source_io, path: source.to_s)
552
- (images << image).uniq!
553
- end
554
-
555
- def import_source(source_io, source)
556
- source = source.to_s
557
- puts "Importing #{italic { source_io&.content_type }} document #{source.to_s.inspect} now."
558
- source_content = parse_source(source_io)
559
- "Imported #{source.inspect}:\n#{source_content}\n\n"
560
- end
561
-
562
- def import(source)
563
- fetch_source(source) do |source_io|
564
- content = import_source(source_io, source) or return
565
- source_io.rewind
566
- content
567
- end
568
- end
569
-
570
- def summarize_source(source_io, source, words: nil)
571
- puts "Summarizing #{italic { source_io&.content_type }} document #{source.to_s.inspect} now."
572
- words = words.to_i
573
- words < 1 and words = 100
574
- source_content = parse_source(source_io)
575
- source_content.present? or return
576
- $config.prompts.summarize % { source_content:, words: }
577
- end
578
-
579
- def summarize(source, words: nil)
580
- fetch_source(source) do |source_io|
581
- content = summarize_source(source_io, source, words:) or return
582
- source_io.rewind
583
- content
584
- end
585
- end
586
-
587
- def embed_source(source_io, source, count: nil)
588
- $embedding.on? or return parse_source(source_io)
589
- m = "Embedding #{italic { source_io&.content_type }} document #{source.to_s.inspect}."
590
- if count
591
- puts '%u. %s' % [ count, m ]
592
- else
593
- puts m
594
- end
595
- text = parse_source(source_io) or return
596
- text.downcase!
597
- splitter_config = $config.embedding.splitter
598
- inputs = nil
599
- case splitter_config.name
600
- when 'Character'
601
- splitter = Documents::Splitters::Character.new(
602
- chunk_size: splitter_config.chunk_size,
603
- )
604
- inputs = splitter.split(text)
605
- when 'RecursiveCharacter'
606
- splitter = Documents::Splitters::RecursiveCharacter.new(
607
- chunk_size: splitter_config.chunk_size,
608
- )
609
- inputs = splitter.split(text)
610
- when 'Semantic'
611
- splitter = Documents::Splitters::Semantic.new(
612
- ollama:, model: $config.embedding.model.name,
613
- chunk_size: splitter_config.chunk_size,
614
- )
615
- inputs = splitter.split(
616
- text,
617
- breakpoint: splitter_config.breakpoint.to_sym,
618
- percentage: splitter_config.percentage?,
619
- percentile: splitter_config.percentile?,
620
- )
621
- end
622
- inputs or return
623
- source = source.to_s
624
- if source.start_with?(?!)
625
- source = Kramdown::ANSI::Width.truncate(
626
- source[1..-1].gsub(/\W+/, ?_),
627
- length: 10
628
- )
629
- end
630
- $documents.add(inputs, source:, batch_size: $config.embedding.batch_size?)
631
- end
632
-
633
- def embed(source)
634
- if $embedding.on?
635
- puts "Now embedding #{source.to_s.inspect}."
636
- fetch_source(source) do |source_io|
637
- content = parse_source(source_io)
638
- content.present? or return
639
- source_io.rewind
640
- embed_source(source_io, source)
641
- end
642
- $config.prompts.embed % { source: }
643
- else
644
- puts "Embedding is off, so I will just give a small summary of this source."
645
- summarize(source)
646
- end
647
- end
648
-
649
- def parse_content(content, images)
650
- images.clear
651
- tags = Utils::Tags.new
652
-
653
- contents = [ content ]
654
- content.scan(%r((https?://\S+)|(#\S+)|(?:file://)?(\S*\/\S+))).each do |url, tag, file|
655
- case
656
- when tag
657
- tags.add(tag)
658
- next
659
- when file
660
- file = file.sub(/#.*/, '')
661
- file =~ %r(\A[~./]) or file.prepend('./')
662
- File.exist?(file) or next
663
- source = file
664
- when url
665
- $links.add(url.to_s)
666
- source = url
667
- end
668
- fetch_source(source) do |source_io|
669
- case source_io&.content_type&.media_type
670
- when 'image'
671
- add_image(images, source_io, source)
672
- when 'text', 'application', nil
673
- case $document_policy
674
- when 'ignoring'
675
- nil
676
- when 'importing'
677
- contents << import_source(source_io, source)
678
- when 'embedding'
679
- embed_source(source_io, source)
680
- when 'summarizing'
681
- contents << summarize_source(source_io, source)
682
- end
683
- else
684
- STDERR.puts(
685
- "Cannot fetch #{source.to_s.inspect} with content type "\
686
- "#{source_io&.content_type.inspect}"
687
- )
688
- end
689
- end
690
- end
691
- new_content = contents.select { _1.present? rescue nil }.compact * "\n\n"
692
- return new_content, (tags unless tags.empty?)
693
- end
694
-
695
- def choose_model(cli_model, current_model)
696
- models = ollama.tags.models.map(&:name).sort
697
- model = if cli_model == ''
698
- Utils::Chooser.choose(models) || current_model
699
- else
700
- cli_model || current_model
701
- end
702
- ensure
703
- puts green { "Connecting to #{model}@#{ollama.base_url} now…" }
704
- end
705
-
706
- def ask?(prompt:)
707
- print prompt
708
- STDIN.gets.chomp
709
- end
710
-
711
- def choose_collection(current_collection)
712
- collections = [ current_collection ] + $documents.collections
713
- collections = collections.compact.map(&:to_s).uniq.sort
714
- collections.unshift('[EXIT]').unshift('[NEW]')
715
- collection = Utils::Chooser.choose(collections) || current_collection
716
- case collection
717
- when '[NEW]'
718
- $documents.collection = ask?(prompt: "Enter name of the new collection: ")
719
- when nil, '[EXIT]'
720
- puts "Exiting chooser."
721
- when /./
722
- $documents.collection = collection
723
- end
724
- ensure
725
- puts "Using collection #{bold{$documents.collection}}."
726
- info
727
- end
728
-
729
- def choose_document_policy
730
- policies = %w[ importing embedding summarizing ignoring ].sort
731
- current = if policies.index($document_policy)
732
- $document_policy
733
- elsif policies.index($config.document_policy)
734
- $config.document_policy
735
- else
736
- policies.first
737
- end
738
- policies.unshift('[EXIT]')
739
- policy = Utils::Chooser.choose(policies)
740
- case policy
741
- when nil, '[EXIT]'
742
- puts "Exiting chooser."
743
- policy = current
744
- end
745
- $document_policy = policy
746
- ensure
747
- puts "Using document policy #{bold{$document_policy}}."
748
- info
749
- end
750
-
751
- def collection_stats
752
- puts <<~EOT
753
- Current Collection
754
- Name: #{bold{$documents.collection}}
755
- #Embeddings: #{$documents.size}
756
- #Tags: #{$documents.tags.size}
757
- Tags: #{$documents.tags}
758
- EOT
759
- end
760
-
761
- def configure_cache
762
- if $opts[?M]
763
- Documents::MemoryCache
764
- else
765
- Object.const_get($config.cache)
766
- end
767
- rescue => e
768
- STDERR.puts "Caught #{e.class}: #{e} => Falling back to MemoryCache."
769
- Documents::MemoryCache
770
- end
771
-
772
- def show_system_prompt
773
- puts <<~EOT
774
- Configured system prompt is:
775
- #{Kramdown::ANSI.parse($system.to_s).gsub(/\n+\z/, '').full? || 'n/a'}
776
- EOT
777
- end
778
-
779
- def at_location
780
- if $location.on?
781
- location_name = $config.location.name
782
- location_decimal_degrees = $config.location.decimal_degrees * ', '
783
- localtime = Time.now.iso8601
784
- units = $config.location.units
785
- $config.prompts.location % {
786
- location_name:, location_decimal_degrees:, localtime:, units:,
787
- }
788
- end.to_s
789
- end
790
-
791
- def set_system_prompt(messages, system)
792
- $system = system
793
- messages.clear
794
- messages << Message.new(role: 'system', content: system)
795
- end
796
-
797
- def change_system_prompt(messages, default, system: nil)
798
- selector = Regexp.new(system.to_s[1..-1].to_s)
799
- prompts = $config.system_prompts.attribute_names.compact.grep(selector)
800
- chosen = Utils::Chooser.choose(prompts, return_immediately: true)
801
- system = if chosen
802
- $config.system_prompts.send(chosen)
803
- else
804
- default
805
- end
806
- set_system_prompt(messages, system)
807
- end
808
-
809
- def change_voice
810
- chosen = Utils::Chooser.choose($config.voice.list)
811
- $current_voice = chosen.full? || $config.voice.default
812
- end
813
-
814
- def info
815
- puts "Current model is #{bold{$model}}."
816
- if $model_options.present?
817
- puts " Options: #{JSON.pretty_generate($model_options).gsub(/(?<!\A)^/, ' ')}"
818
- end
819
- $embedding.show
820
- if $embedding.on?
821
- puts "Embedding model is #{bold{$embedding_model}}"
822
- if $embedding_model_options.present?
823
- puts " Options: #{JSON.pretty_generate($embedding_model_options).gsub(/(?<!\A)^/, ' ')}"
824
- end
825
- puts "Text splitter is #{bold{$config.embedding.splitter.name}}."
826
- collection_stats
827
- end
828
- puts "Documents database cache is #{$documents.nil? ? 'n/a' : bold{$documents.cache.class}}"
829
- $markdown.show
830
- $stream.show
831
- $location.show
832
- puts "Document policy for references in user text: #{bold{$document_policy}}"
833
- if $voice.on?
834
- puts "Using voice #{bold{$current_voice}} to speak."
835
- end
836
- show_system_prompt
837
- end
838
-
839
- def clear_messages(messages)
840
- messages.delete_if { _1.role != 'system' }
841
- end
842
-
843
- def copy_to_clipboard(messages)
844
- if message = messages.last and message.role == 'assistant'
845
- copy = `which #{$config.copy}`.chomp
846
- if copy.present?
847
- IO.popen(copy, 'w') do |clipboard|
848
- clipboard.write(message.content)
849
- end
850
- STDOUT.puts "The last response has been copied to the system clipboard."
851
- else
852
- STDERR.puts "#{$config.copy.inspect} command not found in system's path!"
853
- end
854
- else
855
- STDERR.puts "No response available to copy to the system clipboard."
856
- end
857
- end
858
-
859
- def display_chat_help
860
- puts <<~EOT
861
- /copy to copy last response to clipboard
862
- /paste to paste content
863
- /markdown toggle markdown output
864
- /stream toggle stream output
865
- /location toggle location submission
866
- /voice( change) toggle voice output or change the voice
867
- /list [n] list the last n / all conversation exchanges
868
- /clear clear the whole conversation
869
- /clobber clear the conversation and collection
870
- /pop [n] pop the last n exchanges, defaults to 1
871
- /model change the model
872
- /system change system prompt (clears conversation)
873
- /regenerate the last answer message
874
- /collection( clear|change) change (default) collection or clear
875
- /info show information for current session
876
- /document_policy pick a scan policy for document references
877
- /import source import the source's content
878
- /summarize [n] source summarize the source's content in n words
879
- /embedding toggle embedding paused or not
880
- /embed source embed the source's content
881
- /web [n] query query web search & return n or 1 results
882
- /links( clear) display (or clear) links used in the chat
883
- /save filename store conversation messages
884
- /load filename load conversation messages
885
- /quit to quit
886
- /help to view this help
887
- EOT
888
- end
889
-
890
- def usage
891
- puts <<~EOT
892
- Usage: #{File.basename($0)} [OPTIONS]
893
-
894
- -f CONFIG config file to read
895
- -u URL the ollama base url, OLLAMA_URL
896
- -m MODEL the ollama model to chat with, OLLAMA_CHAT_MODEL
897
- -s SYSTEM the system prompt to use as a file, OLLAMA_CHAT_SYSTEM
898
- -c CHAT a saved chat conversation to load
899
- -C COLLECTION name of the collection used in this conversation
900
- -D DOCUMENT load document and add to embeddings collection (multiple)
901
- -M use (empty) MemoryCache for this chat session
902
- -E disable embeddings for this chat session
903
- -V display the current version number and quit
904
- -h this help
905
-
906
- EOT
907
- exit 0
908
- end
909
-
910
- def version
911
- puts "%s %s" % [ File.basename($0), Ollama::VERSION ]
912
- exit 0
913
- end
914
-
915
- def ollama
916
- $ollama
917
- end
918
-
919
- $opts = go 'f:u:m:s:c:C:D:MEVh'
920
-
921
- $ollama_chat_config = OllamaChatConfig.new($opts[?f])
922
- $config = $ollama_chat_config.config
923
-
924
- setup_switches
925
-
926
- $opts[?h] and usage
927
- $opts[?V] and version
928
-
929
- base_url = $opts[?u] || $config.url
930
- user_agent = [ File.basename($0), Ollama::VERSION ] * ?/
931
- $ollama = Client.new(base_url:, debug: $config.debug, user_agent:)
932
-
933
- $document_policy = $config.document_policy
934
- $model = choose_model($opts[?m], $config.model.name)
935
- $model_options = Options[$config.model.options]
936
- model_system = pull_model_unless_present($model, $model_options)
937
- messages = []
938
- $embedding_enabled.set($config.embedding.enabled && !$opts[?E])
939
-
940
- if $opts[?c]
941
- messages.concat load_conversation($opts[?c])
942
- else
943
- default = $config.system_prompts.default? || model_system
944
- if $opts[?s] =~ /\A\?/
945
- change_system_prompt(messages, default, system: $opts[?s])
946
- else
947
- system = Utils::FileArgument.get_file_argument($opts[?s], default:)
948
- system.present? and set_system_prompt(messages, system)
949
- end
950
- end
951
-
952
- if $embedding.on?
953
- $embedding_model = $config.embedding.model.name
954
- $embedding_model_options = Options[$config.embedding.model.options]
955
- pull_model_unless_present($embedding_model, $embedding_model_options)
956
- collection = $opts[?C] || $config.embedding.collection
957
- $documents = Documents.new(
958
- ollama:,
959
- model: $embedding_model,
960
- model_options: $config.embedding.model.options,
961
- database_filename: $config.embedding.database_filename || $ollama_chat_config.database_path,
962
- collection: ,
963
- cache: configure_cache,
964
- redis_url: $config.redis.documents.url?,
965
- debug: $config.debug
966
- )
967
-
968
- document_list = $opts[?D].to_a
969
- if document_list.any?(&:empty?)
970
- puts "Clearing collection #{bold{collection}}."
971
- $documents.clear
972
- document_list.reject!(&:empty?)
973
- end
974
- unless document_list.empty?
975
- document_list.map! do |doc|
976
- if doc =~ %r(\Ahttps?://)
977
- doc
978
- else
979
- File.expand_path(doc)
980
- end
981
- end
982
- puts "Collection #{bold{collection}}: Adding #{document_list.size} documents…"
983
- count = 1
984
- document_list.each_slice(25) do |docs|
985
- docs.each do |doc|
986
- fetch_source(doc) do |doc_io|
987
- embed_source(doc_io, doc, count:)
988
- end
989
- count += 1
990
- end
991
- end
992
- end
993
- else
994
- $documents = Tins::NULL
995
- end
996
-
997
- if redis_expiring_url = $config.redis.expiring.url?
998
- $cache = Documents::RedisCache.new(
999
- prefix: 'Expiring-',
1000
- url: redis_expiring_url,
1001
- ex: $config.redis.expiring.ex,
1002
- )
1003
- end
1004
-
1005
- $current_voice = $config.voice.default
1006
-
1007
- puts "Configuration read from #{$ollama_chat_config.filename.inspect} is:", $config
1008
- info
1009
- puts "\nType /help to display the chat help."
1010
-
1011
- $links = Set.new
1012
- images = []
1013
- loop do
1014
- parse_content = true
1015
- input_prompt = bold { color(172) { message_type(images) + " user" } } + bold { "> " }
1016
- content = Reline.readline(input_prompt, true)&.chomp
1017
-
1018
- case content
1019
- when %r(^/copy$)
1020
- copy_to_clipboard(messages)
1021
- next
1022
- when %r(^/paste$)
1023
- puts bold { "Paste your content and then press C-d!" }
1024
- content = STDIN.read
1025
- when %r(^/markdown$)
1026
- $markdown.toggle
1027
- next
1028
- when %r(^/stream$)
1029
- $stream.toggle
1030
- next
1031
- when %r(^/location$)
1032
- $location.toggle
1033
- next
1034
- when %r(^/voice(?:\s+(change))?$)
1035
- if $1 == 'change'
1036
- change_voice
1037
- else
1038
- $voice.toggle
1039
- end
1040
- next
1041
- when %r(^/list(?:\s+(\d*))?$)
1042
- last = if $1
1043
- 2 * $1.to_i
1044
- end
1045
- list_conversation(messages, last)
1046
- next
1047
- when %r(^/clear$)
1048
- clear_messages(messages)
1049
- puts "Cleared messages."
1050
- next
1051
- when %r(^/clobber$)
1052
- if ask?(prompt: 'Are you sure to clear messages and collection? (y/n) ') =~ /\Ay/i
1053
- clear_messages(messages)
1054
- $documents.clear
1055
- puts "Cleared messages and collection #{bold{$documents.collection}}."
1056
- else
1057
- puts 'Cancelled.'
1058
- end
1059
- next
1060
- when %r(^/pop(?:\s+(\d*))?$)
1061
- if messages.size > 1
1062
- n = $1.to_i.clamp(1, Float::INFINITY)
1063
- r = messages.pop(2 * n)
1064
- m = r.size / 2
1065
- puts "Popped the last #{m} exchanges."
1066
- else
1067
- puts "No more exchanges you can pop."
1068
- end
1069
- list_conversation(messages, 2)
1070
- next
1071
- when %r(^/model$)
1072
- $model = choose_model('', $model)
1073
- next
1074
- when %r(^/system$)
1075
- change_system_prompt(messages, $system)
1076
- info
1077
- next
1078
- when %r(^/regenerate$)
1079
- if content = messages[-2]&.content
1080
- content.gsub!(/\nConsider these chunks for your answer.*\z/, '')
1081
- messages.pop(2)
1082
- else
1083
- puts "Not enough messages in this conversation."
1084
- redo
1085
- end
1086
- parse_content = false
1087
- content
1088
- when %r(^/collection(?:\s+(clear|change))?$)
1089
- case $1 || 'change'
1090
- when 'clear'
1091
- loop do
1092
- tags = $documents.tags.add('[EXIT]').add('[ALL]')
1093
- tag = Utils::Chooser.choose(tags, prompt: 'Clear? %s')
1094
- case tag
1095
- when nil, '[EXIT]'
1096
- puts "Exiting chooser."
1097
- break
1098
- when '[ALL]'
1099
- if ask?(prompt: 'Are you sure? (y/n) ') =~ /\Ay/i
1100
- $documents.clear
1101
- puts "Cleared collection #{bold{$documents.collection}}."
1102
- break
1103
- else
1104
- puts 'Cancelled.'
1105
- sleep 3
1106
- end
1107
- when /./
1108
- $documents.clear(tags: [ tag ])
1109
- puts "Cleared tag #{tag} from collection #{bold{$documents.collection}}."
1110
- sleep 3
1111
- end
1112
- end
1113
- when 'change'
1114
- choose_collection($documents.collection)
1115
- end
1116
- next
1117
- when %r(^/info$)
1118
- info
1119
- next
1120
- when %r(^/document_policy$)
1121
- choose_document_policy
1122
- next
1123
- when %r(^/import\s+(.+))
1124
- parse_content = false
1125
- content = import($1) or next
1126
- when %r(^/summarize\s+(?:(\d+)\s+)?(.+))
1127
- parse_content = false
1128
- content = summarize($2, words: $1) or next
1129
- when %r(^/embedding$)
1130
- $embedding_paused.toggle(show: false)
1131
- $embedding.show
1132
- next
1133
- when %r(^/embed\s+(.+))
1134
- parse_content = false
1135
- content = embed($1) or next
1136
- when %r(^/web\s+(?:(\d+)\s+)?(.+))
1137
- parse_content = false
1138
- urls = search_web($2, $1.to_i)
1139
- urls.each do |url|
1140
- fetch_source(url) { |url_io| embed_source(url_io, url) }
1141
- end
1142
- urls_summarized = urls.map { summarize(_1) }
1143
- query = $2.inspect
1144
- results = urls.zip(urls_summarized).
1145
- map { |u, s| "%s as \n:%s" % [ u, s ] } * "\n\n"
1146
- content = $config.prompts.web % { query:, results: }
1147
- when %r(^/save\s+(.+)$)
1148
- save_conversation($1, messages)
1149
- puts "Saved conversation to #$1."
1150
- next
1151
- when %r(^/links(?:\s+(clear))?$)
1152
- case $1
1153
- when 'clear'
1154
- loop do
1155
- links = $links.dup.add('[EXIT]').add('[ALL]')
1156
- link = Utils::Chooser.choose(links, prompt: 'Clear? %s')
1157
- case link
1158
- when nil, '[EXIT]'
1159
- puts "Exiting chooser."
1160
- break
1161
- when '[ALL]'
1162
- if ask?(prompt: 'Are you sure? (y/n) ') =~ /\Ay/i
1163
- $links.clear
1164
- puts "Cleared all links in list."
1165
- break
1166
- else
1167
- puts 'Cancelled.'
1168
- sleep 3
1169
- end
1170
- when /./
1171
- $links.delete(link)
1172
- puts "Cleared link from links in list."
1173
- sleep 3
1174
- end
1175
- end
1176
- when nil
1177
- if $links.empty?
1178
- puts "List is empty."
1179
- else
1180
- Math.log10($links.size).ceil
1181
- format = "% #{}s. %s"
1182
- connect = -> link { hyperlink(link) { link } }
1183
- puts $links.each_with_index.map { |x, i| format % [ i + 1, connect.(x) ] }
1184
- end
1185
- end
1186
- next
1187
- when %r(^/load\s+(.+)$)
1188
- messages = load_conversation($1)
1189
- puts "Loaded conversation from #$1."
1190
- next
1191
- when %r(^/quit$)
1192
- puts "Goodbye."
1193
- exit 0
1194
- when %r(^/)
1195
- display_chat_help
1196
- next
1197
- when ''
1198
- puts "Type /quit to quit."
1199
- next
1200
- when nil
1201
- puts "Goodbye."
1202
- exit 0
1203
- end
1204
-
1205
- content, tags = if parse_content
1206
- parse_content(content, images)
1207
- else
1208
- [ content, Utils::Tags.new ]
1209
- end
1210
-
1211
- if $embedding.on? && content
1212
- records = $documents.find_where(
1213
- content.downcase,
1214
- tags:,
1215
- prompt: $config.embedding.model.prompt?,
1216
- text_size: $config.embedding.found_texts_size?,
1217
- text_count: $config.embedding.found_texts_count?,
1218
- )
1219
- unless records.empty?
1220
- content += "\nConsider these chunks for your answer:\n\n"\
1221
- "#{records.map { [ _1.text, _1.tags_set ] * ?\n }.join("\n\n---\n\n")}"
1222
- end
1223
- end
1224
-
1225
- if location = at_location.full?
1226
- content += " [#{location} – do not comment on this the time and location, "\
1227
- "just consider it for eventual queries]"
1228
- end
1229
-
1230
- messages << Message.new(role: 'user', content:, images: images.dup)
1231
- images.clear
1232
- handler = FollowChat.new(messages:, markdown: $markdown.on?, voice: ($current_voice if $voice.on?))
1233
- ollama.chat(model: $model, messages:, options: $model_options, stream: $stream.on?, &handler)
1234
-
1235
- if $embedding.on? && !records.empty?
1236
- puts "", records.map { |record|
1237
- link = if record.source =~ %r(\Ahttps?://)
1238
- record.source
1239
- else
1240
- 'file://%s' % File.expand_path(record.source)
1241
- end
1242
- [ link, record.tags.first ]
1243
- }.uniq.map { |l, t| hyperlink(l, t) }.join(' ')
1244
- $config.debug and jj messages
1245
- end
1246
- rescue Interrupt
1247
- puts "Type /quit to quit."
1248
- end