documentrix 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7987ac709860e4747aec91c9d99b766e1b959b40142ebb17a3516aa80a0f6b85
4
- data.tar.gz: a9ea8a8f360bef62ad687d1ecbcf04e6a978dc7450a76f26d97c6f04ee1bdf62
3
+ metadata.gz: 3adb452ed3a325dfd48b90e46fb3d75fc060f00e98c4faaa1e1e5646e3166651
4
+ data.tar.gz: 6328ec106db80374061e3ec96b8ece3b85da6c353d937178561518c4e50b42a8
5
5
  SHA512:
6
- metadata.gz: ba03da2c50fffb014489ee3281765e1ad0712da263dae1a6f45d5f449aba345113f28c093c7d2df14986ad283aee6cbd88c8545c6a7dceaacadff405ad27f457
7
- data.tar.gz: 3a10470a23349509a575e79ab1ca18b0969827ef7f720f745537de83924348109cb3fcbd4b7a4a9703daebb471489948ec26ba704362838ad41fa546bb0040f2
6
+ metadata.gz: 7cc0997fcaf8aeab5d3da01a6b6aecd7d90a89e869a4b5dd2f9e6874b7f5e94f62028a708b223eb7f4c69c0cab23ab5cd7aed416dbbe52e5c659ca13d67b3a19
7
+ data.tar.gz: 31b958b1bcd08ccfdfd178b826a05c28b8b3c86818ee29acb03021a1d8c5dfe865b48692e9024bd83ceb62292793baa748c3dcfb56d26bd10968113015c477fa
data/.envrc ADDED
@@ -0,0 +1 @@
1
+ export REDIS_URL=redis://localhost:9736
data/CHANGES.md ADDED
@@ -0,0 +1,29 @@
1
+ # Changes
2
+
3
+ ## 2025-05-26 v0.0.2
4
+
5
+ * Documentrix::Utils::Tags enhancements for improved tagging functionality:
6
+ * Added `valid_tag` parameter to `initialize` method with default value.
7
+ * Introduced `DEFAULT_VALID_TAG` regular expression constant.
8
+ * Updated `initialize` and `add` methods to use new `valid_tag` parameter.
9
+ * Added `attr_reader` for `valid_tag`.
10
+ * Added `describe` block for `Tag` in `tags_spec.rb`.
11
+ * Added three `it` blocks to test instantiation, default tag value trimming, and custom regex usage.
12
+ * Update `.envrc` file to include Redis connection settings:
13
+ * Added `REDIS_URL` environment variable with value `redis://localhost:9736`
14
+ * Created new `.envrc` file in the root directory.
15
+
16
+ ## 2025-01-29 v0.0.1
17
+
18
+ * Added docker-compose redis
19
+ * Added a `services` section to `docker-compose.yml`
20
+ * Created Redis service with image `valkey/valkey:*7.2.8-alpine*` and specified ports
21
+ * Configured Redis volumes, including mounting a Redis config file (`./redis/redis.conf`)
22
+ * Created new file `redis/redis.conf` with Redis configuration settings
23
+ * Added support for **Ruby 3.4** to the Docker image
24
+ * Added copyright notice and permissions to `LICENSE` file
25
+ * Remove double quotes from `summary` field
26
+
27
+ ## 2024-12-06 v0.0.0
28
+
29
+ * Start
data/LICENSE ADDED
@@ -0,0 +1,19 @@
1
+ Copyright Florian Frank
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
4
+ this software and associated documentation files (the “Software”), to deal in
5
+ the Software without restriction, including without limitation the rights to
6
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7
+ of the Software, and to permit persons to whom the Software is furnished to do
8
+ so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
data/Rakefile CHANGED
@@ -8,7 +8,7 @@ GemHadar do
8
8
  author 'Florian Frank'
9
9
  email 'flori@ping.de'
10
10
  homepage "https://github.com/flori/#{name}"
11
- summary '"Ruby library for embedding vector database'
11
+ summary 'Ruby library for embedding vector database'
12
12
  description <<~EOT
13
13
  The Ruby library, Documentrix, is designed to provide a way to build and
14
14
  query vector databases for applications in natural language processing
@@ -26,7 +26,7 @@ GemHadar do
26
26
 
27
27
  required_ruby_version '~> 3.1'
28
28
 
29
- dependency 'infobar', '~> 0.8'
29
+ dependency 'infobar', '~> 0.9'
30
30
  dependency 'json', '~> 2.0'
31
31
  dependency 'tins', '~> 1.34'
32
32
  dependency 'sqlite-vec', '~> 0.0'
@@ -0,0 +1,10 @@
1
+ services:
2
+ redis:
3
+ image: valkey/valkey:7.2.8-alpine
4
+ restart: unless-stopped
5
+ ports: [ "127.0.0.1:9736:6379" ]
6
+ volumes:
7
+ - "redis-data:/data:delegated"
8
+ - "./redis/redis.conf:/etc/redis.conf"
9
+ volumes:
10
+ redis-data:
data/documentrix.gemspec CHANGED
@@ -1,35 +1,35 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: documentrix 0.0.0 ruby lib
2
+ # stub: documentrix 0.0.2 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "documentrix".freeze
6
- s.version = "0.0.0".freeze
6
+ s.version = "0.0.2".freeze
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
10
10
  s.authors = ["Florian Frank".freeze]
11
- s.date = "2024-12-06"
11
+ s.date = "1980-01-02"
12
12
  s.description = "The Ruby library, Documentrix, is designed to provide a way to build and\nquery vector databases for applications in natural language processing\n(NLP) and large language models (LLMs). It allows users to store and\nretrieve dense vector embeddings for text strings.\n".freeze
13
13
  s.email = "flori@ping.de".freeze
14
14
  s.extra_rdoc_files = ["README.md".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze]
15
- s.files = [".yardopts".freeze, "Gemfile".freeze, "README.md".freeze, "Rakefile".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
15
+ s.files = [".envrc".freeze, ".yardopts".freeze, "CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "docker-compose.yml".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "redis/redis.conf".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
16
16
  s.homepage = "https://github.com/flori/documentrix".freeze
17
17
  s.licenses = ["MIT".freeze]
18
- s.rdoc_options = ["--title".freeze, "Documentrix - \"Ruby library for embedding vector database".freeze, "--main".freeze, "README.md".freeze]
18
+ s.rdoc_options = ["--title".freeze, "Documentrix - Ruby library for embedding vector database".freeze, "--main".freeze, "README.md".freeze]
19
19
  s.required_ruby_version = Gem::Requirement.new("~> 3.1".freeze)
20
- s.rubygems_version = "3.5.23".freeze
21
- s.summary = "\"Ruby library for embedding vector database".freeze
20
+ s.rubygems_version = "3.6.7".freeze
21
+ s.summary = "Ruby library for embedding vector database".freeze
22
22
  s.test_files = ["spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
23
23
 
24
24
  s.specification_version = 4
25
25
 
26
- s.add_development_dependency(%q<gem_hadar>.freeze, ["~> 1.19".freeze])
26
+ s.add_development_dependency(%q<gem_hadar>.freeze, ["~> 1.20".freeze])
27
27
  s.add_development_dependency(%q<all_images>.freeze, ["~> 0.6".freeze])
28
28
  s.add_development_dependency(%q<rspec>.freeze, ["~> 3.2".freeze])
29
29
  s.add_development_dependency(%q<kramdown>.freeze, ["~> 2.0".freeze])
30
30
  s.add_development_dependency(%q<debug>.freeze, [">= 0".freeze])
31
31
  s.add_development_dependency(%q<simplecov>.freeze, [">= 0".freeze])
32
- s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.8".freeze])
32
+ s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.9".freeze])
33
33
  s.add_runtime_dependency(%q<json>.freeze, ["~> 2.0".freeze])
34
34
  s.add_runtime_dependency(%q<tins>.freeze, ["~> 1.34".freeze])
35
35
  s.add_runtime_dependency(%q<sqlite-vec>.freeze, ["~> 0.0".freeze])
@@ -1,6 +1,10 @@
1
1
  require 'term/ansicolor'
2
2
 
3
3
  class Documentrix::Utils::Tags
4
+ # Matches tags with optional leading # characters and at least one non-space
5
+ # character by default:
6
+ DEFAULT_VALID_TAG = /\A#*(\S+)/
7
+
4
8
  class Tag < String
5
9
  include Term::ANSIColor
6
10
 
@@ -9,13 +13,15 @@ class Documentrix::Utils::Tags
9
13
  #
10
14
  # @param tag [String] the string representation of the tag
11
15
  # @param source [String, nil] the source URL for the tag (default: nil)
12
- def initialize(tag, source: nil)
13
- super(tag.to_s.gsub(/\A#+/, ''))
16
+ def initialize(tag, valid_tag: DEFAULT_VALID_TAG, source: nil)
17
+ super(tag.to_s[valid_tag, 1])
14
18
  self.source = source
15
19
  end
16
20
 
17
21
  attr_accessor :source # the source URL for the tag
18
22
 
23
+ attr_reader :valid_tag # the regular expression capturing a valid tag's content
24
+
19
25
  # The to_s method formats the tag string for output, including source URL
20
26
  # if requested.
21
27
  #
@@ -46,15 +52,18 @@ class Documentrix::Utils::Tags
46
52
  # Documentrix::Utils::Tags.new(%w[ foo bar ])
47
53
  #
48
54
  # @return [Documentrix::Utils::Tags] an instance of Documentrix::Utils::Tags
49
- def initialize(tags = [], source: nil)
50
- tags = Array(tags)
51
- @set = []
55
+ def initialize(tags = [], valid_tag: DEFAULT_VALID_TAG, source: nil)
56
+ tags = Array(tags)
57
+ @valid_tag = valid_tag
58
+ @set = []
52
59
  tags.each { |tag| add(tag, source:) }
53
60
  end
54
61
 
62
+ attr_reader :valid_tag # the regular expression capturing a valid tag's content
63
+
55
64
  def add(tag, source: nil)
56
65
  unless tag.is_a?(Tag)
57
- tag = Tag.new(tag, source:)
66
+ tag = Tag.new(tag, valid_tag:, source:)
58
67
  end
59
68
  index = @set.bsearch_index { _1 >= tag }
60
69
  if index == nil
@@ -1,6 +1,6 @@
1
1
  module Documentrix
2
2
  # Documentrix version
3
- VERSION = '0.0.0'
3
+ VERSION = '0.0.2'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
data/redis/redis.conf ADDED
@@ -0,0 +1,5 @@
1
+ save 60 1000
2
+ dbfilename dump.rdb
3
+ appendonly yes
4
+ appendfilename "appendonly.aof"
5
+ appendfsync always
@@ -50,4 +50,19 @@ RSpec.describe Documentrix::Utils::Tags do
50
50
  "\e]8;;file:///path/to/bar.html\e\\#bar\e]8;;\e\\ \e]8;;https://foo.example.com\e\\#foo\e]8;;\e\\"
51
51
  )
52
52
  end
53
+
54
+ describe described_class::Tag do
55
+ it 'can be instantiated' do
56
+ expect(described_class.new('#foo')).to be_a described_class
57
+ end
58
+
59
+ it 'can cannot contain a space by default' do
60
+ expect(described_class.new('#foo bar')).to eq 'foo'
61
+ end
62
+
63
+ it 'can cannot use a custom regular expression for capturing the tag value' do
64
+ valid_tag = /\A#*([\w\]\[]+)/
65
+ expect(described_class.new('#foo[bar]!!!!', valid_tag:)).to eq 'foo[bar]'
66
+ end
67
+ end
53
68
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-12-06 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: gem_hadar
@@ -16,14 +15,14 @@ dependencies:
16
15
  requirements:
17
16
  - - "~>"
18
17
  - !ruby/object:Gem::Version
19
- version: '1.19'
18
+ version: '1.20'
20
19
  type: :development
21
20
  prerelease: false
22
21
  version_requirements: !ruby/object:Gem::Requirement
23
22
  requirements:
24
23
  - - "~>"
25
24
  - !ruby/object:Gem::Version
26
- version: '1.19'
25
+ version: '1.20'
27
26
  - !ruby/object:Gem::Dependency
28
27
  name: all_images
29
28
  requirement: !ruby/object:Gem::Requirement
@@ -100,14 +99,14 @@ dependencies:
100
99
  requirements:
101
100
  - - "~>"
102
101
  - !ruby/object:Gem::Version
103
- version: '0.8'
102
+ version: '0.9'
104
103
  type: :runtime
105
104
  prerelease: false
106
105
  version_requirements: !ruby/object:Gem::Requirement
107
106
  requirements:
108
107
  - - "~>"
109
108
  - !ruby/object:Gem::Version
110
- version: '0.8'
109
+ version: '0.9'
111
110
  - !ruby/object:Gem::Dependency
112
111
  name: json
113
112
  requirement: !ruby/object:Gem::Requirement
@@ -258,10 +257,14 @@ extra_rdoc_files:
258
257
  - lib/documentrix/utils/tags.rb
259
258
  - lib/documentrix/version.rb
260
259
  files:
260
+ - ".envrc"
261
261
  - ".yardopts"
262
+ - CHANGES.md
262
263
  - Gemfile
264
+ - LICENSE
263
265
  - README.md
264
266
  - Rakefile
267
+ - docker-compose.yml
265
268
  - documentrix.gemspec
266
269
  - lib/documentrix.rb
267
270
  - lib/documentrix/documents.rb
@@ -278,6 +281,7 @@ files:
278
281
  - lib/documentrix/utils/math.rb
279
282
  - lib/documentrix/utils/tags.rb
280
283
  - lib/documentrix/version.rb
284
+ - redis/redis.conf
281
285
  - spec/assets/embeddings.json
282
286
  - spec/documentrix/documents/cache/memory_cache_spec.rb
283
287
  - spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb
@@ -293,10 +297,9 @@ homepage: https://github.com/flori/documentrix
293
297
  licenses:
294
298
  - MIT
295
299
  metadata: {}
296
- post_install_message:
297
300
  rdoc_options:
298
301
  - "--title"
299
- - Documentrix - "Ruby library for embedding vector database
302
+ - Documentrix - Ruby library for embedding vector database
300
303
  - "--main"
301
304
  - README.md
302
305
  require_paths:
@@ -312,10 +315,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
312
315
  - !ruby/object:Gem::Version
313
316
  version: '0'
314
317
  requirements: []
315
- rubygems_version: 3.5.23
316
- signing_key:
318
+ rubygems_version: 3.6.7
317
319
  specification_version: 4
318
- summary: "\"Ruby library for embedding vector database"
320
+ summary: Ruby library for embedding vector database
319
321
  test_files:
320
322
  - spec/documentrix/documents/cache/memory_cache_spec.rb
321
323
  - spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb