documentrix 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bdd19b49af74153cf419d6ace3a2d05879c6ceb57d90e5651b777d18ca96f2b7
4
- data.tar.gz: 4936bad8197cb4243a0cef32154fc11aba073b41da1e58709767e5d4e58aac96
3
+ metadata.gz: c76bf8eb224b18de775f6c7652cde2dcf4409423f79b2006a7e2c50f158d2769
4
+ data.tar.gz: c59dac8defeea81cae51ada232c3778d8ba95a0cc6c3afc19595309cc33d5a83
5
5
  SHA512:
6
- metadata.gz: f334e64d609f86d32aa0524b535c2649e14f6e7a6793973fa231e1ae6d81148dd957e9095423494555591a36471b574c696f2e69882742110191e7e99b4254bf
7
- data.tar.gz: 0ef7210bcdb325d19ab5415d319a5ea45c3449a9a0a892b1eac2c34006ad55ef545b5b89d98d5a57ce5afe0f18cbc9c707adf6c21ddfb3d8e20061439c60cb53
6
+ metadata.gz: 27758319636b90ff0772ed866187bd2cef9673e203e79fc6ff72bd53ad2b43906029b011a738eb5e695f744e4c2cf733cef1ba46640488df91b23f314f3c6d80
7
+ data.tar.gz: 9c115acc0ec7e43eb7330f0422a69d4f0d8afea592947b994382d40d73f54502b143d43b939b924dcfdd053ea824847f044052aef89176f646ab9e1108c592e2
data/.utilsrc ADDED
@@ -0,0 +1,107 @@
1
+ # vim: set ft=ruby:
2
+
3
+ search do
4
+ prune_dirs /\A(\.svn|\.git|\.terraform|CVS|tmp|coverage|corpus|pkg|\.yardoc|doc)\z/
5
+ skip_files /(\A\.|\.sw[pon]\z|\.(log|fnm|jpg|jpeg|png|pdf|svg)\z|\A(tags|cscope\.out)\z|~\z)/i
6
+ end
7
+
8
+ discover do
9
+ prune_dirs /\A(\.svn|\.git|\.terraform|\.yardoc|CVS|tmp|coverage|corpus|pkg|\.yardoc|doc)\z/
10
+ skip_files /(\A\.|\.sw[pon]\z|\.log\z|~\z)/
11
+ index_expire_after 3_600
12
+ end
13
+
14
+ strip_spaces do
15
+ prune_dirs /\A(\..*|CVS|pkg|\.yardoc)\z/
16
+ skip_files /(\A\.|\.sw[pon]\z|\.log\z|~\z)/
17
+ end
18
+
19
+ probe do
20
+ test_framework :rspec
21
+ end
22
+
23
+ ssh_tunnel do
24
+ terminal_multiplexer :tmux
25
+ login_session "/home/#{ENV['USER']}"
26
+ end
27
+
28
+ classify do
29
+ shift_path_by_default 1
30
+ end
31
+
32
+ code_indexer do
33
+ verbose false
34
+
35
+ gems = %w[
36
+ all_images
37
+ amatch
38
+ base64
39
+ bigdecimal
40
+ complex_config
41
+ connection_pool
42
+ date
43
+ debug
44
+ diff-lcs
45
+ docile
46
+ documentrix
47
+ erb
48
+ excon
49
+ fileutils
50
+ gem_hadar
51
+ infobar
52
+ io-console
53
+ irb
54
+ json
55
+ kramdown
56
+ kramdown-ansi
57
+ kramdown-parser-gfm
58
+ logger
59
+ mize
60
+ more_math
61
+ net-http
62
+ nokogiri
63
+ numo-narray-alt
64
+ ollama-ruby
65
+ openssl
66
+ ostruct
67
+ pp
68
+ prettyprint
69
+ prism
70
+ psych
71
+ racc
72
+ rake
73
+ rdoc
74
+ readline
75
+ redis
76
+ redis-client
77
+ reline
78
+ rexml
79
+ rspec
80
+ rspec-core
81
+ rspec-expectations
82
+ rspec-mocks
83
+ rspec-support
84
+ search_ui
85
+ shellwords
86
+ simplecov
87
+ simplecov-html
88
+ simplecov_json_formatter
89
+ sqlite-vec
90
+ sqlite3
91
+ stringio
92
+ sync
93
+ term-ansicolor
94
+ terminal-table
95
+ tins
96
+ tsort
97
+ unicode-display_width
98
+ unicode-emoji
99
+ uri
100
+ yaml
101
+ yard
102
+ ]
103
+
104
+ paths {
105
+ %w[ lib ] + gems.map { `bundle show #{it}` }.map(&:chomp)
106
+ }
107
+ end
data/CHANGES.md CHANGED
@@ -1,5 +1,33 @@
1
1
  # Changes
2
2
 
3
+ ## 2026-03-29 v0.1.0
4
+
5
+ - Added `Documentrix::Documents#rename_collection` to rename a collection and
6
+ delegate key moving to the cache.
7
+ - Extended `Documentrix::Documents::Cache::Common#pre` and `#unpre` to accept
8
+ an optional `prefix` argument.
9
+ - Implemented `Documentrix::Documents::Cache::MemoryCache#move_prefix`,
10
+ `RedisCache#move_prefix`, and `SQLiteCache#move_prefix` for atomic key
11
+ mass‑move operations.
12
+ - Updated unit tests to cover the new rename and prefix‑moving functionality.
13
+ - Switched to `GemHadar::SimpleCov` for test coverage.
14
+ - Removed the `redis_backed_memory_cache` implementation and its spec.
15
+ - Added a `.utilsrc` configuration file defining blocks for `search`,
16
+ `discover`, `strip_spaces`, `probe`, `ssh_tunnel`, `classify`, and
17
+ `code_indexer`.
18
+ - Reordered Docker `apk add` commands and added friendly echo messages during
19
+ image build and test phases.
20
+ - Added `fail_fast: true` flag to the Docker file definition.
21
+ - Added interface spec for cache implementations (`memory`, `Redis`, `SQLite`).
22
+ - Included `Set` requirement for older Ruby versions.
23
+ - Removed unused expiry functionality from `RedisCache`.
24
+ - Refactored cache system to use explicit inheritance, moving common methods
25
+ into `Documentrix::Documents::Cache::Common` and renaming `clear` to
26
+ `clear_all_with_prefix`.
27
+ - Updated documentation with detailed RDoc comments.
28
+ - Updated CI configuration to use `bundle exec` and cleaned up `Gemfile.lock`.
29
+ - Added `changelog` configuration in `Rakefile` to support `CHANGES.md`.
30
+
3
31
  ## 2025-12-20 v0.0.4
4
32
 
5
33
  - Added `openssl-dev` to the package list in `.all_images.yml` for Docker
data/Rakefile CHANGED
@@ -24,6 +24,10 @@ GemHadar do
24
24
  '.rspec'
25
25
  readme 'README.md'
26
26
 
27
+ changelog do
28
+ filename 'CHANGES.md'
29
+ end
30
+
27
31
  required_ruby_version '>= 3.1'
28
32
 
29
33
  dependency 'infobar', '~> 0.9'
@@ -36,7 +40,7 @@ GemHadar do
36
40
  dependency 'redis', '~> 5.0'
37
41
  dependency 'more_math', '~> 1.1'
38
42
 
39
- development_dependency 'all_images', '~> 0.9'
43
+ development_dependency 'all_images', '~> 0.12'
40
44
  development_dependency 'rspec', '~> 3.2'
41
45
  development_dependency 'kramdown', '~> 2.0'
42
46
  development_dependency 'debug'
data/docker-compose.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  services:
2
2
  redis:
3
- image: valkey/valkey:8.1.1-alpine
3
+ image: valkey/valkey:9.0.1-alpine
4
4
  restart: unless-stopped
5
5
  ports: [ "127.0.0.1:9736:6379" ]
6
6
  volumes:
data/documentrix.gemspec CHANGED
@@ -1,9 +1,9 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: documentrix 0.0.4 ruby lib
2
+ # stub: documentrix 0.1.0 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "documentrix".freeze
6
- s.version = "0.0.4".freeze
6
+ s.version = "0.1.0".freeze
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
@@ -11,20 +11,20 @@ Gem::Specification.new do |s|
11
11
  s.date = "1980-01-02"
12
12
  s.description = "The Ruby library, Documentrix, is designed to provide a way to build and\nquery vector databases for applications in natural language processing\n(NLP) and large language models (LLMs). It allows users to store and\nretrieve dense vector embeddings for text strings.\n".freeze
13
13
  s.email = "flori@ping.de".freeze
14
- s.extra_rdoc_files = ["README.md".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze]
15
- s.files = [".envrc".freeze, ".yardopts".freeze, "CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "docker-compose.yml".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "redis/redis.conf".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
14
+ s.extra_rdoc_files = ["README.md".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze]
15
+ s.files = [".envrc".freeze, ".utilsrc".freeze, ".yardopts".freeze, "CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "docker-compose.yml".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "redis/redis.conf".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/interface_spec.rb".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
16
16
  s.homepage = "https://github.com/flori/documentrix".freeze
17
17
  s.licenses = ["MIT".freeze]
18
18
  s.rdoc_options = ["--title".freeze, "Documentrix - Ruby library for embedding vector database".freeze, "--main".freeze, "README.md".freeze]
19
19
  s.required_ruby_version = Gem::Requirement.new(">= 3.1".freeze)
20
- s.rubygems_version = "4.0.2".freeze
20
+ s.rubygems_version = "4.0.8".freeze
21
21
  s.summary = "Ruby library for embedding vector database".freeze
22
- s.test_files = ["spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
22
+ s.test_files = ["spec/documentrix/documents/cache/interface_spec.rb".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
23
23
 
24
24
  s.specification_version = 4
25
25
 
26
- s.add_development_dependency(%q<gem_hadar>.freeze, ["~> 2.10".freeze])
27
- s.add_development_dependency(%q<all_images>.freeze, ["~> 0.9".freeze])
26
+ s.add_development_dependency(%q<gem_hadar>.freeze, [">= 2.17.0".freeze])
27
+ s.add_development_dependency(%q<all_images>.freeze, ["~> 0.12".freeze])
28
28
  s.add_development_dependency(%q<rspec>.freeze, ["~> 3.2".freeze])
29
29
  s.add_development_dependency(%q<kramdown>.freeze, ["~> 2.0".freeze])
30
30
  s.add_development_dependency(%q<debug>.freeze, [">= 0".freeze])
@@ -1,5 +1,18 @@
1
+ # Common interface for document caches
2
+ #
3
+ # This module defines the standard interface that all document cache
4
+ # implementations must adhere to. It provides shared functionality for managing
5
+ # cached document embeddings, including methods for setting, retrieving, and
6
+ # deleting cache entries, as well as querying and filtering cached data based
7
+ # on tags and similarity searches.
8
+ #
9
+ # The module includes methods for prefix management, collection enumeration,
10
+ # tag extraction, and cache clearing operations, ensuring consistent behavior
11
+ # across different cache backends such as
12
+ # memory, Redis, and SQLite.
1
13
  module Documentrix::Documents::Cache::Common
2
14
  include Documentrix::Utils::Math
15
+ include Enumerable
3
16
 
4
17
  # The initialize method sets up the Documentrix::Documents::Cache instance's
5
18
  # by setting its prefix attribute to the given value.
@@ -27,17 +40,100 @@ module Documentrix::Documents::Cache::Common
27
40
  # Returns a string representing the given `key` prefixed with the defined
28
41
  # prefix.
29
42
  #
30
- # @param key [String] the key to join with the prefix
31
- # @return [String] the joined string of prefix and key
32
- def pre(key)
33
- [ @prefix, key ].join
43
+ # @param key [String] the key to prefix
44
+ # @param prefix [String] the prefix to use (defaults to the cache's prefix)
45
+ # @return [String] the prefixed key
46
+ def pre(key, prefix: @prefix)
47
+ [ prefix, key ].join
34
48
  end
35
49
 
36
50
  # Returns a string with the prefix removed from the given `key`.
37
51
  #
38
52
  # @param key [String] the input string containing the prefix.
53
+ # @param prefix [String] the prefix to use (defaults to the cache's prefix)
39
54
  # @return [String] the input string without the prefix.
40
- def unpre(key)
41
- key.sub(/\A#@prefix/, '')
55
+ def unpre(key, prefix: @prefix)
56
+ key.sub(/\A#{prefix}/, '')
57
+ end
58
+
59
+ # The find_records method finds records that match the given needle and
60
+ # tags.
61
+ #
62
+ # @param needle [ Array ] an array containing the embedding vector
63
+ # @param tags [ String, Array ] a string or array of strings representing the tags to search for
64
+ # @param max_records [ Integer ] the maximum number of records to return
65
+ #
66
+ # @yield [ record ]
67
+ #
68
+ # @return [ Array<Documentrix::Documents::Records> ] an array containing the matching records
69
+ def find_records(needle, tags: nil, max_records: nil)
70
+ tags = Documentrix::Utils::Tags.new(Array(tags)).to_a
71
+ records = self
72
+ if tags.present?
73
+ records = records.select { |_key, record| (tags & record.tags).size >= 1 }
74
+ end
75
+ needle_norm = norm(needle)
76
+ records = records.sort_by { |key, record|
77
+ record.key = key
78
+ record.similarity = cosine_similarity(
79
+ a: needle,
80
+ b: record.embedding,
81
+ a_norm: needle_norm,
82
+ b_norm: record.norm,
83
+ )
84
+ }
85
+ records.transpose.last&.reverse.to_a
86
+ end
87
+
88
+ # Returns a set of unique tags found in the cache records.
89
+ #
90
+ # This method iterates through all records in the cache and collects unique
91
+ # tags from each record's tags collection. It constructs a new
92
+ # Documentrix::Utils::Tags object containing all the unique tags encountered.
93
+ #
94
+ # @return [Documentrix::Utils::Tags] a set of unique tags from all records in
95
+ # the cache
96
+ def tags
97
+ each_with_object(Documentrix::Utils::Tags.new) do |(_, record), t|
98
+ record.tags.each do |tag|
99
+ t.add(tag, source: record.source)
100
+ end
101
+ end
102
+ end
103
+
104
+ # The clear_for_tags method removes all records from the cache that have tags
105
+ # matching any of the provided tags.
106
+ #
107
+ # @param tags [Array<String>] an array of tag names to filter records by
108
+ #
109
+ # @return [self] self
110
+ def clear_for_tags(tags)
111
+ each do |key, record|
112
+ if (tags & record.tags.to_a).size >= 1
113
+ delete(unpre(key))
114
+ end
115
+ end
116
+ self
117
+ end
118
+
119
+ # The clear method removes cached records based on the provided tags or
120
+ # clears all records with the current prefix.
121
+ #
122
+ # When tags are provided, it removes only the records that have matching
123
+ # tags. If no tags are provided, it removes all records that have keys
124
+ # starting with the current prefix.
125
+ #
126
+ # @param tags [NilClass, Array<String>] an array of tag names to filter
127
+ # records by, or nil to clear all records
128
+ #
129
+ # @return [self] returns the cache instance for method chaining
130
+ def clear(tags: nil)
131
+ tags = Documentrix::Utils::Tags.new(tags).to_a
132
+ if tags.present?
133
+ clear_for_tags(tags)
134
+ else
135
+ clear_all_with_prefix
136
+ end
137
+ self
42
138
  end
43
139
  end
@@ -1,5 +1,17 @@
1
1
  require 'documentrix/documents/cache/common'
2
2
 
3
+ # MemoryCache is an in-memory cache implementation for document embeddings.
4
+ #
5
+ # This class provides a cache store for document embeddings using a hash-based
6
+ # in-memory storage mechanism. It implements the common cache interface
7
+ # defined in Documentrix::Documents::Cache::Common and supports operations
8
+ # such as setting, retrieving, and deleting cached entries, as well as
9
+ # iterating over cached items.
10
+ #
11
+ # The cache uses a prefix to namespace keys and supports clearing entries
12
+ # based on prefixes or specific tags. It is designed to be used as a
13
+ # temporary storage mechanism during processing and is not persistent
14
+ # across application restarts.
3
15
  class Documentrix::Documents::MemoryCache
4
16
  include Documentrix::Documents::Cache::Common
5
17
 
@@ -59,32 +71,48 @@ class Documentrix::Documents::MemoryCache
59
71
  count
60
72
  end
61
73
 
62
- # The clear method removes all records from the cache that have keys starting
63
- # with the prefix `prefix`.
74
+ # The clear_all_with_prefix method removes all records from the cache that
75
+ # have keys starting with the prefix `prefix`.
64
76
  #
65
77
  # @return [ Documentrix::Documents::MemoryCache ] self
66
- def clear
78
+ def clear_all_with_prefix
67
79
  @data.delete_if { |key, _| key.start_with?(@prefix) }
68
80
  self
69
81
  end
70
82
 
83
+ # Move all keys that start with +old_prefix+ into a new prefix.
84
+ #
85
+ # This helper is used when a collection is renamed. It iterates over all
86
+ # cached entries, selects those whose keys start with *old_prefix*, strips
87
+ # that prefix, then re‑inserts the entry with the *new_prefix* instead.
88
+ #
89
+ # @param old_prefix [String] The prefix to look for on existing keys.
90
+ # @param new_prefix [String] The prefix that will replace *old_prefix*.
91
+ # @return [Documentrix::Documents::MemoryCache] Returns `self` to allow
92
+ # chaining.
93
+ def move_prefix(old_prefix, new_prefix)
94
+ new_data = @data.dup
95
+ full_each do |key, value|
96
+ key.start_with?(old_prefix) or next
97
+ unpre_key = unpre(key, prefix: old_prefix)
98
+ new_data[pre(unpre_key, prefix: new_prefix)] = new_data.delete(key)
99
+ end
100
+ @data.replace(new_data)
101
+ self
102
+ end
103
+
71
104
  # The each method iterates over the cache's keys and values under a given
72
105
  # prefix `prefix`.
73
106
  #
74
107
  # @yield [key, value] Each key-value pair in the cache
75
- #
76
- # @return [void]
77
108
  def each(&block)
78
109
  @data.select { |key,| key.start_with?(@prefix) }.each(&block)
79
110
  end
80
- include Enumerable
81
111
 
82
112
  # The full_each method iterates over the data hash and yields each key-value
83
113
  # pair to the given block regardless of the prefix `prefix`.
84
114
  #
85
115
  # @yield [key, value] Each key-value pair in the data hash
86
- #
87
- # @return [void]
88
116
  def full_each(&block)
89
117
  @data.each(&block)
90
118
  end
@@ -1,4 +1,18 @@
1
+ # Module for cache record definitions used in Documentrix document caching.
2
+ #
3
+ # This module provides the Record class and RedisFullEach module for managing
4
+ # cached document embeddings and their associated metadata in the Documentrix
5
+ # library's caching system.
1
6
  module Documentrix::Documents::Cache::Records
7
+ # A record class for caching document embeddings and their associated
8
+ # metadata.
9
+ #
10
+ # This class extends JSON::GenericObject and is used to represent cached
11
+ # document entries in the Documentrix library. It stores text content,
12
+ # embedding vectors, normalization values, source information, and tags
13
+ # associated with each document record. The class provides methods for string
14
+ # representation, tag handling, and equality comparison based on text
15
+ # content.
2
16
  class Record < JSON::GenericObject
3
17
  # The initialize method sets default values for the text and norm
4
18
  # attributes.
@@ -41,94 +55,4 @@ module Documentrix::Documents::Cache::Records
41
55
 
42
56
  alias inspect to_s
43
57
  end
44
-
45
- module RedisFullEach
46
- # The full_each method iterates over all records in the cache and yields
47
- # them to the block.
48
- #
49
- # @yield [ key, value ] where key is the record's key and value is the record itself
50
- def full_each(&block)
51
- redis.scan_each(match: [ Documentrix::Documents, ?* ] * ?-) do |key|
52
- value = redis.get(key) or next
53
- value = JSON(value, object_class: Documentrix::Documents::Record)
54
- block.(key, value)
55
- end
56
- end
57
- end
58
-
59
- module FindRecords
60
- # The find_records method finds records that match the given needle and
61
- # tags.
62
- #
63
- # @param needle [ Array ] an array containing the embedding vector
64
- # @param tags [ String, Array ] a string or array of strings representing the tags to search for
65
- # @param max_records [ Integer ] the maximum number of records to return
66
- #
67
- # @yield [ record ]
68
- #
69
- # @return [ Array<Documentrix::Documents::Records> ] an array containing the matching records
70
- def find_records(needle, tags: nil, max_records: nil)
71
- tags = Documentrix::Utils::Tags.new(Array(tags)).to_a
72
- records = self
73
- if tags.present?
74
- records = records.select { |_key, record| (tags & record.tags).size >= 1 }
75
- end
76
- needle_norm = norm(needle)
77
- records = records.sort_by { |key, record|
78
- record.key = key
79
- record.similarity = cosine_similarity(
80
- a: needle,
81
- b: record.embedding,
82
- a_norm: needle_norm,
83
- b_norm: record.norm,
84
- )
85
- }
86
- records.transpose.last&.reverse.to_a
87
- end
88
- end
89
-
90
- module Tags
91
- # The clear method removes all records that match the given tags from the
92
- # cache.
93
- #
94
- # @param tags [ Array<String> ] an array of tag names
95
- #
96
- # @example
97
- # documents.clear(tags: %w[ foo bar ])
98
- #
99
- # @return [ self ]
100
- def clear(tags: nil)
101
- tags = Documentrix::Utils::Tags.new(tags).to_a
102
- if tags.present?
103
- if respond_to?(:clear_for_tags)
104
- clear_for_tags(tags)
105
- else
106
- each do |key, record|
107
- if (tags & record.tags.to_a).size >= 1
108
- delete(unpre(key))
109
- end
110
- end
111
- end
112
- else
113
- super()
114
- end
115
- self
116
- end
117
-
118
- # The tags method returns an array of unique tags from all records.
119
- #
120
- # @return [Documentrix::Utils::Tags] An instance of
121
- # Documentrix::Utils::Tags containing the unique tags.
122
- def tags
123
- if defined? super
124
- super
125
- else
126
- each_with_object(Documentrix::Utils::Tags.new) do |(_, record), t|
127
- record.tags.each do |tag|
128
- t.add(tag, source: record.source)
129
- end
130
- end
131
- end
132
- end
133
- end
134
58
  end
@@ -1,6 +1,18 @@
1
1
  require 'documentrix/documents/cache/common'
2
2
  require 'redis'
3
3
 
4
+ # RedisCache is a cache implementation that uses Redis for storing document
5
+ # embeddings and related metadata.
6
+ #
7
+ # This class provides a persistent cache storage solution for document
8
+ # embeddings, leveraging Redis's capabilities to store both the embedding
9
+ # vectors and associated text data, tags, and source information. It supports
10
+ # efficient vector similarity searches through Redis-based operations.
11
+ #
12
+ # @example
13
+ # cache = Documentrix::Documents::RedisCache.new(prefix: 'docs-', url: 'redis://localhost:6379')
14
+ # cache['key'] = { text: 'example', embedding: [0.1, 0.2, 0.3] }
15
+ # value = cache['key']
4
16
  class Documentrix::Documents::RedisCache
5
17
  include Documentrix::Documents::Cache::Common
6
18
 
@@ -11,11 +23,10 @@ class Documentrix::Documents::RedisCache
11
23
  # @param [String] prefix the string to be used as the prefix for this cache
12
24
  # @param [String] url the URL of the Redis server (default: ENV['REDIS_URL'])
13
25
  # @param [Class] object_class the class of objects stored in Redis (default: nil)
14
- # @param [Integer] ex the expiration time in seconds (default: nil)
15
- def initialize(prefix:, url: ENV['REDIS_URL'], object_class: nil, ex: nil)
26
+ def initialize(prefix:, url: ENV['REDIS_URL'], object_class: nil)
16
27
  super(prefix:)
17
28
  url or raise ArgumentError, 'require redis url'
18
- @url, @object_class, @ex = url, object_class, ex
29
+ @url, @object_class = url, object_class
19
30
  end
20
31
 
21
32
  attr_reader :object_class # the class of objects stored in the cache
@@ -35,7 +46,7 @@ class Documentrix::Documents::RedisCache
35
46
  def [](key)
36
47
  value = redis.get(pre(key))
37
48
  unless value.nil?
38
- object_class ? JSON(value, object_class:) : JSON(value)
49
+ object_class ? JSON.parse(value, object_class:) : JSON.parse(value)
39
50
  end
40
51
  end
41
52
 
@@ -53,28 +64,13 @@ class Documentrix::Documents::RedisCache
53
64
  #
54
65
  # @param [String] key the string representation of the key
55
66
  # @param [Object] value the object to be stored under the given key
56
- # @option ex [Integer] ex the expiration time in seconds (default: nil)
57
67
  #
58
68
  # @return [Object] self
59
- def set(key, value, ex: nil)
60
- ex ||= @ex
61
- if !ex.nil? && ex < 1
62
- redis.del(pre(key))
63
- else
64
- redis.set(pre(key), JSON.generate(value), ex:)
65
- end
69
+ def set(key, value)
70
+ redis.set(pre(key), JSON.generate(value))
66
71
  value
67
72
  end
68
73
 
69
- # The ttl method returns the time-to-live (TTL) value for the given key
70
- #
71
- # @param [String] key the string representation of the key
72
- #
73
- # @return [Integer, nil] the TTL value if it exists in Redis, or nil otherwise
74
- def ttl(key)
75
- redis.ttl(pre(key))
76
- end
77
-
78
74
  # The key? method checks if the given key exists in Redis by calling the
79
75
  # redis.exists? method
80
76
  #
@@ -105,12 +101,33 @@ class Documentrix::Documents::RedisCache
105
101
  s
106
102
  end
107
103
 
108
- # The clear method removes all key-value pairs associated with the given
109
- # prefix from this cache instance.
104
+ # The clear_all_with_prefix method removes all key-value pairs associated
105
+ # with the given prefix from this cache instance.
110
106
  #
111
107
  # @return [Documentrix::Documents::RedisCache] self
112
- def clear
108
+ def clear_all_with_prefix
113
109
  redis.scan_each(match: "#@prefix*") { |key| redis.del(key) }
110
+ defined? super and super
111
+ self
112
+ end
113
+
114
+ # Renames all keys that start with <tt>old_prefix</tt> to use
115
+ # <tt>new_prefix</tt>. The method iterates over every affected key,
116
+ # reconstructs the new key name (preserving the part of the key that follows
117
+ # the old prefix), writes the value under the new name, and deletes the old
118
+ # key.
119
+ #
120
+ # @param old_prefix [String] The prefix that currently identifies the target keys.
121
+ # @param new_prefix [String] The prefix that should replace <tt>old_prefix</tt>.
122
+ #
123
+ # @return [self] The cache instance, facilitating method chaining.
124
+ def move_prefix(old_prefix, new_prefix)
125
+ full_each(prefix: '') do |key, value|
126
+ key.start_with?(old_prefix) or next
127
+ unpre_key = unpre(key, prefix: old_prefix)
128
+ redis.set(pre(unpre_key, prefix: new_prefix), JSON.generate(value))
129
+ redis.del(key)
130
+ end
114
131
  self
115
132
  end
116
133
 
@@ -121,8 +138,23 @@ class Documentrix::Documents::RedisCache
121
138
  #
122
139
  # @return [self] self
123
140
  def each(&block)
141
+ block or return enum_for(__method__)
142
+
124
143
  redis.scan_each(match: "#@prefix*") { |key| block.(key, self[unpre(key)]) }
125
144
  self
126
145
  end
127
- include Enumerable
146
+
147
+ # The full_each method iterates over all records in the cache and yields
148
+ # them to the block.
149
+ #
150
+ # @yield [ key, value ] where key is the record's key and value is the record itself
151
+ def full_each(prefix: 'Documents-', &block)
152
+ block or return enum_for(__method__, prefix:)
153
+
154
+ redis.scan_each(match: prefix + ?*) do |key|
155
+ value = redis.get(key) or next
156
+ value = object_class ? JSON.parse(value, object_class:) : JSON.parse(value)
157
+ block.(key, value)
158
+ end
159
+ end
128
160
  end