documentrix 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +1 -0
- data/CHANGES.md +29 -0
- data/LICENSE +19 -0
- data/Rakefile +2 -2
- data/docker-compose.yml +10 -0
- data/documentrix.gemspec +9 -9
- data/lib/documentrix/utils/tags.rb +15 -6
- data/lib/documentrix/version.rb +1 -1
- data/redis/redis.conf +5 -0
- data/spec/utils/tags_spec.rb +15 -0
- metadata +14 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3adb452ed3a325dfd48b90e46fb3d75fc060f00e98c4faaa1e1e5646e3166651
|
4
|
+
data.tar.gz: 6328ec106db80374061e3ec96b8ece3b85da6c353d937178561518c4e50b42a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7cc0997fcaf8aeab5d3da01a6b6aecd7d90a89e869a4b5dd2f9e6874b7f5e94f62028a708b223eb7f4c69c0cab23ab5cd7aed416dbbe52e5c659ca13d67b3a19
|
7
|
+
data.tar.gz: 31b958b1bcd08ccfdfd178b826a05c28b8b3c86818ee29acb03021a1d8c5dfe865b48692e9024bd83ceb62292793baa748c3dcfb56d26bd10968113015c477fa
|
data/.envrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
export REDIS_URL=redis://localhost:9736
|
data/CHANGES.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# Changes
|
2
|
+
|
3
|
+
## 2025-05-26 v0.0.2
|
4
|
+
|
5
|
+
* Documentrix::Utils::Tags enhancements for improved tagging functionality:
|
6
|
+
* Added `valid_tag` parameter to `initialize` method with default value.
|
7
|
+
* Introduced `DEFAULT_VALID_TAG` regular expression constant.
|
8
|
+
* Updated `initialize` and `add` methods to use new `valid_tag` parameter.
|
9
|
+
* Added `attr_reader` for `valid_tag`.
|
10
|
+
* Added `describe` block for `Tag` in `tags_spec.rb`.
|
11
|
+
* Added three `it` blocks to test instantiation, default tag value trimming, and custom regex usage.
|
12
|
+
* Update `.envrc` file to include Redis connection settings:
|
13
|
+
* Added `REDIS_URL` environment variable with value `redis://localhost:9736`
|
14
|
+
* Created new `.envrc` file in the root directory.
|
15
|
+
|
16
|
+
## 2025-01-29 v0.0.1
|
17
|
+
|
18
|
+
* Added docker-compose redis
|
19
|
+
* Added a `services` section to `docker-compose.yml`
|
20
|
+
* Created Redis service with image `valkey/valkey:*7.2.8-alpine*` and specified ports
|
21
|
+
* Configured Redis volumes, including mounting a Redis config file (`./redis/redis.conf`)
|
22
|
+
* Created new file `redis/redis.conf` with Redis configuration settings
|
23
|
+
* Added support for **Ruby 3.4** to the Docker image
|
24
|
+
* Added copyright notice and permissions to `LICENSE` file
|
25
|
+
* Remove double quotes from `summary` field
|
26
|
+
|
27
|
+
## 2024-12-06 v0.0.0
|
28
|
+
|
29
|
+
* Start
|
data/LICENSE
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
Copyright Florian Frank
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
4
|
+
this software and associated documentation files (the “Software”), to deal in
|
5
|
+
the Software without restriction, including without limitation the rights to
|
6
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
7
|
+
of the Software, and to permit persons to whom the Software is furnished to do
|
8
|
+
so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ GemHadar do
|
|
8
8
|
author 'Florian Frank'
|
9
9
|
email 'flori@ping.de'
|
10
10
|
homepage "https://github.com/flori/#{name}"
|
11
|
-
summary '
|
11
|
+
summary 'Ruby library for embedding vector database'
|
12
12
|
description <<~EOT
|
13
13
|
The Ruby library, Documentrix, is designed to provide a way to build and
|
14
14
|
query vector databases for applications in natural language processing
|
@@ -26,7 +26,7 @@ GemHadar do
|
|
26
26
|
|
27
27
|
required_ruby_version '~> 3.1'
|
28
28
|
|
29
|
-
dependency 'infobar', '~> 0.
|
29
|
+
dependency 'infobar', '~> 0.9'
|
30
30
|
dependency 'json', '~> 2.0'
|
31
31
|
dependency 'tins', '~> 1.34'
|
32
32
|
dependency 'sqlite-vec', '~> 0.0'
|
data/docker-compose.yml
ADDED
data/documentrix.gemspec
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
# stub: documentrix 0.0.
|
2
|
+
# stub: documentrix 0.0.2 ruby lib
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "documentrix".freeze
|
6
|
-
s.version = "0.0.
|
6
|
+
s.version = "0.0.2".freeze
|
7
7
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
9
9
|
s.require_paths = ["lib".freeze]
|
10
10
|
s.authors = ["Florian Frank".freeze]
|
11
|
-
s.date = "
|
11
|
+
s.date = "1980-01-02"
|
12
12
|
s.description = "The Ruby library, Documentrix, is designed to provide a way to build and\nquery vector databases for applications in natural language processing\n(NLP) and large language models (LLMs). It allows users to store and\nretrieve dense vector embeddings for text strings.\n".freeze
|
13
13
|
s.email = "flori@ping.de".freeze
|
14
14
|
s.extra_rdoc_files = ["README.md".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze]
|
15
|
-
s.files = [".yardopts".freeze, "Gemfile".freeze, "README.md".freeze, "Rakefile".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
|
15
|
+
s.files = [".envrc".freeze, ".yardopts".freeze, "CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "docker-compose.yml".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "redis/redis.conf".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
|
16
16
|
s.homepage = "https://github.com/flori/documentrix".freeze
|
17
17
|
s.licenses = ["MIT".freeze]
|
18
|
-
s.rdoc_options = ["--title".freeze, "Documentrix -
|
18
|
+
s.rdoc_options = ["--title".freeze, "Documentrix - Ruby library for embedding vector database".freeze, "--main".freeze, "README.md".freeze]
|
19
19
|
s.required_ruby_version = Gem::Requirement.new("~> 3.1".freeze)
|
20
|
-
s.rubygems_version = "3.
|
21
|
-
s.summary = "
|
20
|
+
s.rubygems_version = "3.6.7".freeze
|
21
|
+
s.summary = "Ruby library for embedding vector database".freeze
|
22
22
|
s.test_files = ["spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
|
23
23
|
|
24
24
|
s.specification_version = 4
|
25
25
|
|
26
|
-
s.add_development_dependency(%q<gem_hadar>.freeze, ["~> 1.
|
26
|
+
s.add_development_dependency(%q<gem_hadar>.freeze, ["~> 1.20".freeze])
|
27
27
|
s.add_development_dependency(%q<all_images>.freeze, ["~> 0.6".freeze])
|
28
28
|
s.add_development_dependency(%q<rspec>.freeze, ["~> 3.2".freeze])
|
29
29
|
s.add_development_dependency(%q<kramdown>.freeze, ["~> 2.0".freeze])
|
30
30
|
s.add_development_dependency(%q<debug>.freeze, [">= 0".freeze])
|
31
31
|
s.add_development_dependency(%q<simplecov>.freeze, [">= 0".freeze])
|
32
|
-
s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.
|
32
|
+
s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.9".freeze])
|
33
33
|
s.add_runtime_dependency(%q<json>.freeze, ["~> 2.0".freeze])
|
34
34
|
s.add_runtime_dependency(%q<tins>.freeze, ["~> 1.34".freeze])
|
35
35
|
s.add_runtime_dependency(%q<sqlite-vec>.freeze, ["~> 0.0".freeze])
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'term/ansicolor'
|
2
2
|
|
3
3
|
class Documentrix::Utils::Tags
|
4
|
+
# Matches tags with optional leading # characters and at least one non-space
|
5
|
+
# character by default:
|
6
|
+
DEFAULT_VALID_TAG = /\A#*(\S+)/
|
7
|
+
|
4
8
|
class Tag < String
|
5
9
|
include Term::ANSIColor
|
6
10
|
|
@@ -9,13 +13,15 @@ class Documentrix::Utils::Tags
|
|
9
13
|
#
|
10
14
|
# @param tag [String] the string representation of the tag
|
11
15
|
# @param source [String, nil] the source URL for the tag (default: nil)
|
12
|
-
def initialize(tag, source: nil)
|
13
|
-
super(tag.to_s
|
16
|
+
def initialize(tag, valid_tag: DEFAULT_VALID_TAG, source: nil)
|
17
|
+
super(tag.to_s[valid_tag, 1])
|
14
18
|
self.source = source
|
15
19
|
end
|
16
20
|
|
17
21
|
attr_accessor :source # the source URL for the tag
|
18
22
|
|
23
|
+
attr_reader :valid_tag # the regular expression capturing a valid tag's content
|
24
|
+
|
19
25
|
# The to_s method formats the tag string for output, including source URL
|
20
26
|
# if requested.
|
21
27
|
#
|
@@ -46,15 +52,18 @@ class Documentrix::Utils::Tags
|
|
46
52
|
# Documentrix::Utils::Tags.new(%w[ foo bar ])
|
47
53
|
#
|
48
54
|
# @return [Documentrix::Utils::Tags] an instance of Documentrix::Utils::Tags
|
49
|
-
def initialize(tags = [], source: nil)
|
50
|
-
tags
|
51
|
-
@
|
55
|
+
def initialize(tags = [], valid_tag: DEFAULT_VALID_TAG, source: nil)
|
56
|
+
tags = Array(tags)
|
57
|
+
@valid_tag = valid_tag
|
58
|
+
@set = []
|
52
59
|
tags.each { |tag| add(tag, source:) }
|
53
60
|
end
|
54
61
|
|
62
|
+
attr_reader :valid_tag # the regular expression capturing a valid tag's content
|
63
|
+
|
55
64
|
def add(tag, source: nil)
|
56
65
|
unless tag.is_a?(Tag)
|
57
|
-
tag = Tag.new(tag, source:)
|
66
|
+
tag = Tag.new(tag, valid_tag:, source:)
|
58
67
|
end
|
59
68
|
index = @set.bsearch_index { _1 >= tag }
|
60
69
|
if index == nil
|
data/lib/documentrix/version.rb
CHANGED
data/redis/redis.conf
ADDED
data/spec/utils/tags_spec.rb
CHANGED
@@ -50,4 +50,19 @@ RSpec.describe Documentrix::Utils::Tags do
|
|
50
50
|
"\e]8;;file:///path/to/bar.html\e\\#bar\e]8;;\e\\ \e]8;;https://foo.example.com\e\\#foo\e]8;;\e\\"
|
51
51
|
)
|
52
52
|
end
|
53
|
+
|
54
|
+
describe described_class::Tag do
|
55
|
+
it 'can be instantiated' do
|
56
|
+
expect(described_class.new('#foo')).to be_a described_class
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'can cannot contain a space by default' do
|
60
|
+
expect(described_class.new('#foo bar')).to eq 'foo'
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'can cannot use a custom regular expression for capturing the tag value' do
|
64
|
+
valid_tag = /\A#*([\w\]\[]+)/
|
65
|
+
expect(described_class.new('#foo[bar]!!!!', valid_tag:)).to eq 'foo[bar]'
|
66
|
+
end
|
67
|
+
end
|
53
68
|
end
|
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: documentrix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Frank
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: gem_hadar
|
@@ -16,14 +15,14 @@ dependencies:
|
|
16
15
|
requirements:
|
17
16
|
- - "~>"
|
18
17
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
18
|
+
version: '1.20'
|
20
19
|
type: :development
|
21
20
|
prerelease: false
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
23
22
|
requirements:
|
24
23
|
- - "~>"
|
25
24
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
25
|
+
version: '1.20'
|
27
26
|
- !ruby/object:Gem::Dependency
|
28
27
|
name: all_images
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -100,14 +99,14 @@ dependencies:
|
|
100
99
|
requirements:
|
101
100
|
- - "~>"
|
102
101
|
- !ruby/object:Gem::Version
|
103
|
-
version: '0.
|
102
|
+
version: '0.9'
|
104
103
|
type: :runtime
|
105
104
|
prerelease: false
|
106
105
|
version_requirements: !ruby/object:Gem::Requirement
|
107
106
|
requirements:
|
108
107
|
- - "~>"
|
109
108
|
- !ruby/object:Gem::Version
|
110
|
-
version: '0.
|
109
|
+
version: '0.9'
|
111
110
|
- !ruby/object:Gem::Dependency
|
112
111
|
name: json
|
113
112
|
requirement: !ruby/object:Gem::Requirement
|
@@ -258,10 +257,14 @@ extra_rdoc_files:
|
|
258
257
|
- lib/documentrix/utils/tags.rb
|
259
258
|
- lib/documentrix/version.rb
|
260
259
|
files:
|
260
|
+
- ".envrc"
|
261
261
|
- ".yardopts"
|
262
|
+
- CHANGES.md
|
262
263
|
- Gemfile
|
264
|
+
- LICENSE
|
263
265
|
- README.md
|
264
266
|
- Rakefile
|
267
|
+
- docker-compose.yml
|
265
268
|
- documentrix.gemspec
|
266
269
|
- lib/documentrix.rb
|
267
270
|
- lib/documentrix/documents.rb
|
@@ -278,6 +281,7 @@ files:
|
|
278
281
|
- lib/documentrix/utils/math.rb
|
279
282
|
- lib/documentrix/utils/tags.rb
|
280
283
|
- lib/documentrix/version.rb
|
284
|
+
- redis/redis.conf
|
281
285
|
- spec/assets/embeddings.json
|
282
286
|
- spec/documentrix/documents/cache/memory_cache_spec.rb
|
283
287
|
- spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb
|
@@ -293,10 +297,9 @@ homepage: https://github.com/flori/documentrix
|
|
293
297
|
licenses:
|
294
298
|
- MIT
|
295
299
|
metadata: {}
|
296
|
-
post_install_message:
|
297
300
|
rdoc_options:
|
298
301
|
- "--title"
|
299
|
-
- Documentrix -
|
302
|
+
- Documentrix - Ruby library for embedding vector database
|
300
303
|
- "--main"
|
301
304
|
- README.md
|
302
305
|
require_paths:
|
@@ -312,10 +315,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
312
315
|
- !ruby/object:Gem::Version
|
313
316
|
version: '0'
|
314
317
|
requirements: []
|
315
|
-
rubygems_version: 3.
|
316
|
-
signing_key:
|
318
|
+
rubygems_version: 3.6.7
|
317
319
|
specification_version: 4
|
318
|
-
summary:
|
320
|
+
summary: Ruby library for embedding vector database
|
319
321
|
test_files:
|
320
322
|
- spec/documentrix/documents/cache/memory_cache_spec.rb
|
321
323
|
- spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb
|