documentrix 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 681b65f2c422db0a749e2ec33fba7eadabfaa1d2edc3c47d88ee6dfa0ac9cdd0
4
- data.tar.gz: ba48587f3434dbd91b074781712ea4117f6f52874cc03890f2a1e79fb36b1891
3
+ metadata.gz: 3adb452ed3a325dfd48b90e46fb3d75fc060f00e98c4faaa1e1e5646e3166651
4
+ data.tar.gz: 6328ec106db80374061e3ec96b8ece3b85da6c353d937178561518c4e50b42a8
5
5
  SHA512:
6
- metadata.gz: 4a4028ee84733d6e5df6300d60c2ede2ae8e003ac893f3583e1cb3288ca6e85451085d0fd8c3e26654eea490b51849d47c0ea027f5663a12a38002c96ef2117c
7
- data.tar.gz: ca0193dea4d6c751a69e001e18676a3ec9afe4cc02d30175fb74a4ae21a7fc39e2978ee8af7d2a50e65a2275ff86cc51bf5e72a22aa0252f3ac13634d247590b
6
+ metadata.gz: 7cc0997fcaf8aeab5d3da01a6b6aecd7d90a89e869a4b5dd2f9e6874b7f5e94f62028a708b223eb7f4c69c0cab23ab5cd7aed416dbbe52e5c659ca13d67b3a19
7
+ data.tar.gz: 31b958b1bcd08ccfdfd178b826a05c28b8b3c86818ee29acb03021a1d8c5dfe865b48692e9024bd83ceb62292793baa748c3dcfb56d26bd10968113015c477fa
data/.envrc ADDED
@@ -0,0 +1 @@
1
+ export REDIS_URL=redis://localhost:9736
data/CHANGES.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Changes
2
2
 
3
+ ## 2025-05-26 v0.0.2
4
+
5
+ * Documentrix::Utils::Tags enhancements for improved tagging functionality:
6
+ * Added `valid_tag` parameter to `initialize` method with default value.
7
+ * Introduced `DEFAULT_VALID_TAG` regular expression constant.
8
+ * Updated `initialize` and `add` methods to use new `valid_tag` parameter.
9
+ * Added `attr_reader` for `valid_tag`.
10
+ * Added `describe` block for `Tag` in `tags_spec.rb`.
11
+ * Added three `it` blocks to test instantiation, default tag value trimming, and custom regex usage.
12
+ * Update `.envrc` file to include Redis connection settings:
13
+ * Added `REDIS_URL` environment variable with value `redis://localhost:9736`
14
+ * Created new `.envrc` file in the root directory.
15
+
3
16
  ## 2025-01-29 v0.0.1
4
17
 
5
18
  * Added docker-compose redis
data/Rakefile CHANGED
@@ -26,7 +26,7 @@ GemHadar do
26
26
 
27
27
  required_ruby_version '~> 3.1'
28
28
 
29
- dependency 'infobar', '~> 0.8'
29
+ dependency 'infobar', '~> 0.9'
30
30
  dependency 'json', '~> 2.0'
31
31
  dependency 'tins', '~> 1.34'
32
32
  dependency 'sqlite-vec', '~> 0.0'
data/documentrix.gemspec CHANGED
@@ -1,35 +1,35 @@
1
1
  # -*- encoding: utf-8 -*-
2
- # stub: documentrix 0.0.1 ruby lib
2
+ # stub: documentrix 0.0.2 ruby lib
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "documentrix".freeze
6
- s.version = "0.0.1".freeze
6
+ s.version = "0.0.2".freeze
7
7
 
8
8
  s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
9
9
  s.require_paths = ["lib".freeze]
10
10
  s.authors = ["Florian Frank".freeze]
11
- s.date = "2025-01-29"
11
+ s.date = "1980-01-02"
12
12
  s.description = "The Ruby library, Documentrix, is designed to provide a way to build and\nquery vector databases for applications in natural language processing\n(NLP) and large language models (LLMs). It allows users to store and\nretrieve dense vector embeddings for text strings.\n".freeze
13
13
  s.email = "flori@ping.de".freeze
14
14
  s.extra_rdoc_files = ["README.md".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze]
15
- s.files = [".yardopts".freeze, "CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "docker-compose.yml".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "redis/redis.conf".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
15
+ s.files = [".envrc".freeze, ".yardopts".freeze, "CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "docker-compose.yml".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "redis/redis.conf".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
16
16
  s.homepage = "https://github.com/flori/documentrix".freeze
17
17
  s.licenses = ["MIT".freeze]
18
18
  s.rdoc_options = ["--title".freeze, "Documentrix - Ruby library for embedding vector database".freeze, "--main".freeze, "README.md".freeze]
19
19
  s.required_ruby_version = Gem::Requirement.new("~> 3.1".freeze)
20
- s.rubygems_version = "3.6.2".freeze
20
+ s.rubygems_version = "3.6.7".freeze
21
21
  s.summary = "Ruby library for embedding vector database".freeze
22
22
  s.test_files = ["spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
23
23
 
24
24
  s.specification_version = 4
25
25
 
26
- s.add_development_dependency(%q<gem_hadar>.freeze, ["~> 1.19".freeze])
26
+ s.add_development_dependency(%q<gem_hadar>.freeze, ["~> 1.20".freeze])
27
27
  s.add_development_dependency(%q<all_images>.freeze, ["~> 0.6".freeze])
28
28
  s.add_development_dependency(%q<rspec>.freeze, ["~> 3.2".freeze])
29
29
  s.add_development_dependency(%q<kramdown>.freeze, ["~> 2.0".freeze])
30
30
  s.add_development_dependency(%q<debug>.freeze, [">= 0".freeze])
31
31
  s.add_development_dependency(%q<simplecov>.freeze, [">= 0".freeze])
32
- s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.8".freeze])
32
+ s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.9".freeze])
33
33
  s.add_runtime_dependency(%q<json>.freeze, ["~> 2.0".freeze])
34
34
  s.add_runtime_dependency(%q<tins>.freeze, ["~> 1.34".freeze])
35
35
  s.add_runtime_dependency(%q<sqlite-vec>.freeze, ["~> 0.0".freeze])
@@ -1,6 +1,10 @@
1
1
  require 'term/ansicolor'
2
2
 
3
3
  class Documentrix::Utils::Tags
4
+ # Matches tags with optional leading # characters and at least one non-space
5
+ # character by default:
6
+ DEFAULT_VALID_TAG = /\A#*(\S+)/
7
+
4
8
  class Tag < String
5
9
  include Term::ANSIColor
6
10
 
@@ -9,13 +13,15 @@ class Documentrix::Utils::Tags
9
13
  #
10
14
  # @param tag [String] the string representation of the tag
11
15
  # @param source [String, nil] the source URL for the tag (default: nil)
12
- def initialize(tag, source: nil)
13
- super(tag.to_s.gsub(/\A#+/, ''))
16
+ def initialize(tag, valid_tag: DEFAULT_VALID_TAG, source: nil)
17
+ super(tag.to_s[valid_tag, 1])
14
18
  self.source = source
15
19
  end
16
20
 
17
21
  attr_accessor :source # the source URL for the tag
18
22
 
23
+ attr_reader :valid_tag # the regular expression capturing a valid tag's content
24
+
19
25
  # The to_s method formats the tag string for output, including source URL
20
26
  # if requested.
21
27
  #
@@ -46,15 +52,18 @@ class Documentrix::Utils::Tags
46
52
  # Documentrix::Utils::Tags.new(%w[ foo bar ])
47
53
  #
48
54
  # @return [Documentrix::Utils::Tags] an instance of Documentrix::Utils::Tags
49
- def initialize(tags = [], source: nil)
50
- tags = Array(tags)
51
- @set = []
55
+ def initialize(tags = [], valid_tag: DEFAULT_VALID_TAG, source: nil)
56
+ tags = Array(tags)
57
+ @valid_tag = valid_tag
58
+ @set = []
52
59
  tags.each { |tag| add(tag, source:) }
53
60
  end
54
61
 
62
+ attr_reader :valid_tag # the regular expression capturing a valid tag's content
63
+
55
64
  def add(tag, source: nil)
56
65
  unless tag.is_a?(Tag)
57
- tag = Tag.new(tag, source:)
66
+ tag = Tag.new(tag, valid_tag:, source:)
58
67
  end
59
68
  index = @set.bsearch_index { _1 >= tag }
60
69
  if index == nil
@@ -1,6 +1,6 @@
1
1
  module Documentrix
2
2
  # Documentrix version
3
- VERSION = '0.0.1'
3
+ VERSION = '0.0.2'
4
4
  VERSION_ARRAY = VERSION.split('.').map(&:to_i) # :nodoc:
5
5
  VERSION_MAJOR = VERSION_ARRAY[0] # :nodoc:
6
6
  VERSION_MINOR = VERSION_ARRAY[1] # :nodoc:
@@ -50,4 +50,19 @@ RSpec.describe Documentrix::Utils::Tags do
50
50
  "\e]8;;file:///path/to/bar.html\e\\#bar\e]8;;\e\\ \e]8;;https://foo.example.com\e\\#foo\e]8;;\e\\"
51
51
  )
52
52
  end
53
+
54
+ describe described_class::Tag do
55
+ it 'can be instantiated' do
56
+ expect(described_class.new('#foo')).to be_a described_class
57
+ end
58
+
59
+ it 'can cannot contain a space by default' do
60
+ expect(described_class.new('#foo bar')).to eq 'foo'
61
+ end
62
+
63
+ it 'can cannot use a custom regular expression for capturing the tag value' do
64
+ valid_tag = /\A#*([\w\]\[]+)/
65
+ expect(described_class.new('#foo[bar]!!!!', valid_tag:)).to eq 'foo[bar]'
66
+ end
67
+ end
53
68
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: documentrix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Frank
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 2025-01-29 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: gem_hadar
@@ -15,14 +15,14 @@ dependencies:
15
15
  requirements:
16
16
  - - "~>"
17
17
  - !ruby/object:Gem::Version
18
- version: '1.19'
18
+ version: '1.20'
19
19
  type: :development
20
20
  prerelease: false
21
21
  version_requirements: !ruby/object:Gem::Requirement
22
22
  requirements:
23
23
  - - "~>"
24
24
  - !ruby/object:Gem::Version
25
- version: '1.19'
25
+ version: '1.20'
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: all_images
28
28
  requirement: !ruby/object:Gem::Requirement
@@ -99,14 +99,14 @@ dependencies:
99
99
  requirements:
100
100
  - - "~>"
101
101
  - !ruby/object:Gem::Version
102
- version: '0.8'
102
+ version: '0.9'
103
103
  type: :runtime
104
104
  prerelease: false
105
105
  version_requirements: !ruby/object:Gem::Requirement
106
106
  requirements:
107
107
  - - "~>"
108
108
  - !ruby/object:Gem::Version
109
- version: '0.8'
109
+ version: '0.9'
110
110
  - !ruby/object:Gem::Dependency
111
111
  name: json
112
112
  requirement: !ruby/object:Gem::Requirement
@@ -257,6 +257,7 @@ extra_rdoc_files:
257
257
  - lib/documentrix/utils/tags.rb
258
258
  - lib/documentrix/version.rb
259
259
  files:
260
+ - ".envrc"
260
261
  - ".yardopts"
261
262
  - CHANGES.md
262
263
  - Gemfile
@@ -314,7 +315,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
314
315
  - !ruby/object:Gem::Version
315
316
  version: '0'
316
317
  requirements: []
317
- rubygems_version: 3.6.2
318
+ rubygems_version: 3.6.7
318
319
  specification_version: 4
319
320
  summary: Ruby library for embedding vector database
320
321
  test_files: