documentrix 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.envrc +1 -0
- data/CHANGES.md +13 -0
- data/Rakefile +1 -1
- data/documentrix.gemspec +7 -7
- data/lib/documentrix/utils/tags.rb +15 -6
- data/lib/documentrix/version.rb +1 -1
- data/spec/utils/tags_spec.rb +15 -0
- metadata +8 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3adb452ed3a325dfd48b90e46fb3d75fc060f00e98c4faaa1e1e5646e3166651
|
4
|
+
data.tar.gz: 6328ec106db80374061e3ec96b8ece3b85da6c353d937178561518c4e50b42a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7cc0997fcaf8aeab5d3da01a6b6aecd7d90a89e869a4b5dd2f9e6874b7f5e94f62028a708b223eb7f4c69c0cab23ab5cd7aed416dbbe52e5c659ca13d67b3a19
|
7
|
+
data.tar.gz: 31b958b1bcd08ccfdfd178b826a05c28b8b3c86818ee29acb03021a1d8c5dfe865b48692e9024bd83ceb62292793baa748c3dcfb56d26bd10968113015c477fa
|
data/.envrc
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
export REDIS_URL=redis://localhost:9736
|
data/CHANGES.md
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
# Changes
|
2
2
|
|
3
|
+
## 2025-05-26 v0.0.2
|
4
|
+
|
5
|
+
* Documentrix::Utils::Tags enhancements for improved tagging functionality:
|
6
|
+
* Added `valid_tag` parameter to `initialize` method with default value.
|
7
|
+
* Introduced `DEFAULT_VALID_TAG` regular expression constant.
|
8
|
+
* Updated `initialize` and `add` methods to use new `valid_tag` parameter.
|
9
|
+
* Added `attr_reader` for `valid_tag`.
|
10
|
+
* Added `describe` block for `Tag` in `tags_spec.rb`.
|
11
|
+
* Added three `it` blocks to test instantiation, default tag value trimming, and custom regex usage.
|
12
|
+
* Update `.envrc` file to include Redis connection settings:
|
13
|
+
* Added `REDIS_URL` environment variable with value `redis://localhost:9736`
|
14
|
+
* Created new `.envrc` file in the root directory.
|
15
|
+
|
3
16
|
## 2025-01-29 v0.0.1
|
4
17
|
|
5
18
|
* Added docker-compose redis
|
data/Rakefile
CHANGED
data/documentrix.gemspec
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
# -*- encoding: utf-8 -*-
|
2
|
-
# stub: documentrix 0.0.
|
2
|
+
# stub: documentrix 0.0.2 ruby lib
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "documentrix".freeze
|
6
|
-
s.version = "0.0.
|
6
|
+
s.version = "0.0.2".freeze
|
7
7
|
|
8
8
|
s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
|
9
9
|
s.require_paths = ["lib".freeze]
|
10
10
|
s.authors = ["Florian Frank".freeze]
|
11
|
-
s.date = "
|
11
|
+
s.date = "1980-01-02"
|
12
12
|
s.description = "The Ruby library, Documentrix, is designed to provide a way to build and\nquery vector databases for applications in natural language processing\n(NLP) and large language models (LLMs). It allows users to store and\nretrieve dense vector embeddings for text strings.\n".freeze
|
13
13
|
s.email = "flori@ping.de".freeze
|
14
14
|
s.extra_rdoc_files = ["README.md".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze]
|
15
|
-
s.files = [".yardopts".freeze, "CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "docker-compose.yml".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "redis/redis.conf".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
|
15
|
+
s.files = [".envrc".freeze, ".yardopts".freeze, "CHANGES.md".freeze, "Gemfile".freeze, "LICENSE".freeze, "README.md".freeze, "Rakefile".freeze, "docker-compose.yml".freeze, "documentrix.gemspec".freeze, "lib/documentrix.rb".freeze, "lib/documentrix/documents.rb".freeze, "lib/documentrix/documents/cache/common.rb".freeze, "lib/documentrix/documents/cache/memory_cache.rb".freeze, "lib/documentrix/documents/cache/records.rb".freeze, "lib/documentrix/documents/cache/redis_backed_memory_cache.rb".freeze, "lib/documentrix/documents/cache/redis_cache.rb".freeze, "lib/documentrix/documents/cache/sqlite_cache.rb".freeze, "lib/documentrix/documents/splitters/character.rb".freeze, "lib/documentrix/documents/splitters/semantic.rb".freeze, "lib/documentrix/utils.rb".freeze, "lib/documentrix/utils/colorize_texts.rb".freeze, "lib/documentrix/utils/math.rb".freeze, "lib/documentrix/utils/tags.rb".freeze, "lib/documentrix/version.rb".freeze, "redis/redis.conf".freeze, "spec/assets/embeddings.json".freeze, "spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
|
16
16
|
s.homepage = "https://github.com/flori/documentrix".freeze
|
17
17
|
s.licenses = ["MIT".freeze]
|
18
18
|
s.rdoc_options = ["--title".freeze, "Documentrix - Ruby library for embedding vector database".freeze, "--main".freeze, "README.md".freeze]
|
19
19
|
s.required_ruby_version = Gem::Requirement.new("~> 3.1".freeze)
|
20
|
-
s.rubygems_version = "3.6.
|
20
|
+
s.rubygems_version = "3.6.7".freeze
|
21
21
|
s.summary = "Ruby library for embedding vector database".freeze
|
22
22
|
s.test_files = ["spec/documentrix/documents/cache/memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_backed_memory_cache_spec.rb".freeze, "spec/documentrix/documents/cache/redis_cache_spec.rb".freeze, "spec/documentrix/documents/cache/sqlite_cache_spec.rb".freeze, "spec/documentrix/documents/splitters/character_spec.rb".freeze, "spec/documentrix/documents/splitters/semantic_spec.rb".freeze, "spec/documents_spec.rb".freeze, "spec/spec_helper.rb".freeze, "spec/utils/colorize_texts_spec.rb".freeze, "spec/utils/tags_spec.rb".freeze]
|
23
23
|
|
24
24
|
s.specification_version = 4
|
25
25
|
|
26
|
-
s.add_development_dependency(%q<gem_hadar>.freeze, ["~> 1.
|
26
|
+
s.add_development_dependency(%q<gem_hadar>.freeze, ["~> 1.20".freeze])
|
27
27
|
s.add_development_dependency(%q<all_images>.freeze, ["~> 0.6".freeze])
|
28
28
|
s.add_development_dependency(%q<rspec>.freeze, ["~> 3.2".freeze])
|
29
29
|
s.add_development_dependency(%q<kramdown>.freeze, ["~> 2.0".freeze])
|
30
30
|
s.add_development_dependency(%q<debug>.freeze, [">= 0".freeze])
|
31
31
|
s.add_development_dependency(%q<simplecov>.freeze, [">= 0".freeze])
|
32
|
-
s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.
|
32
|
+
s.add_runtime_dependency(%q<infobar>.freeze, ["~> 0.9".freeze])
|
33
33
|
s.add_runtime_dependency(%q<json>.freeze, ["~> 2.0".freeze])
|
34
34
|
s.add_runtime_dependency(%q<tins>.freeze, ["~> 1.34".freeze])
|
35
35
|
s.add_runtime_dependency(%q<sqlite-vec>.freeze, ["~> 0.0".freeze])
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'term/ansicolor'
|
2
2
|
|
3
3
|
class Documentrix::Utils::Tags
|
4
|
+
# Matches tags with optional leading # characters and at least one non-space
|
5
|
+
# character by default:
|
6
|
+
DEFAULT_VALID_TAG = /\A#*(\S+)/
|
7
|
+
|
4
8
|
class Tag < String
|
5
9
|
include Term::ANSIColor
|
6
10
|
|
@@ -9,13 +13,15 @@ class Documentrix::Utils::Tags
|
|
9
13
|
#
|
10
14
|
# @param tag [String] the string representation of the tag
|
11
15
|
# @param source [String, nil] the source URL for the tag (default: nil)
|
12
|
-
def initialize(tag, source: nil)
|
13
|
-
super(tag.to_s
|
16
|
+
def initialize(tag, valid_tag: DEFAULT_VALID_TAG, source: nil)
|
17
|
+
super(tag.to_s[valid_tag, 1])
|
14
18
|
self.source = source
|
15
19
|
end
|
16
20
|
|
17
21
|
attr_accessor :source # the source URL for the tag
|
18
22
|
|
23
|
+
attr_reader :valid_tag # the regular expression capturing a valid tag's content
|
24
|
+
|
19
25
|
# The to_s method formats the tag string for output, including source URL
|
20
26
|
# if requested.
|
21
27
|
#
|
@@ -46,15 +52,18 @@ class Documentrix::Utils::Tags
|
|
46
52
|
# Documentrix::Utils::Tags.new(%w[ foo bar ])
|
47
53
|
#
|
48
54
|
# @return [Documentrix::Utils::Tags] an instance of Documentrix::Utils::Tags
|
49
|
-
def initialize(tags = [], source: nil)
|
50
|
-
tags
|
51
|
-
@
|
55
|
+
def initialize(tags = [], valid_tag: DEFAULT_VALID_TAG, source: nil)
|
56
|
+
tags = Array(tags)
|
57
|
+
@valid_tag = valid_tag
|
58
|
+
@set = []
|
52
59
|
tags.each { |tag| add(tag, source:) }
|
53
60
|
end
|
54
61
|
|
62
|
+
attr_reader :valid_tag # the regular expression capturing a valid tag's content
|
63
|
+
|
55
64
|
def add(tag, source: nil)
|
56
65
|
unless tag.is_a?(Tag)
|
57
|
-
tag = Tag.new(tag, source:)
|
66
|
+
tag = Tag.new(tag, valid_tag:, source:)
|
58
67
|
end
|
59
68
|
index = @set.bsearch_index { _1 >= tag }
|
60
69
|
if index == nil
|
data/lib/documentrix/version.rb
CHANGED
data/spec/utils/tags_spec.rb
CHANGED
@@ -50,4 +50,19 @@ RSpec.describe Documentrix::Utils::Tags do
|
|
50
50
|
"\e]8;;file:///path/to/bar.html\e\\#bar\e]8;;\e\\ \e]8;;https://foo.example.com\e\\#foo\e]8;;\e\\"
|
51
51
|
)
|
52
52
|
end
|
53
|
+
|
54
|
+
describe described_class::Tag do
|
55
|
+
it 'can be instantiated' do
|
56
|
+
expect(described_class.new('#foo')).to be_a described_class
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'can cannot contain a space by default' do
|
60
|
+
expect(described_class.new('#foo bar')).to eq 'foo'
|
61
|
+
end
|
62
|
+
|
63
|
+
it 'can cannot use a custom regular expression for capturing the tag value' do
|
64
|
+
valid_tag = /\A#*([\w\]\[]+)/
|
65
|
+
expect(described_class.new('#foo[bar]!!!!', valid_tag:)).to eq 'foo[bar]'
|
66
|
+
end
|
67
|
+
end
|
53
68
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: documentrix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Frank
|
8
8
|
bindir: bin
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: gem_hadar
|
@@ -15,14 +15,14 @@ dependencies:
|
|
15
15
|
requirements:
|
16
16
|
- - "~>"
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version: '1.
|
18
|
+
version: '1.20'
|
19
19
|
type: :development
|
20
20
|
prerelease: false
|
21
21
|
version_requirements: !ruby/object:Gem::Requirement
|
22
22
|
requirements:
|
23
23
|
- - "~>"
|
24
24
|
- !ruby/object:Gem::Version
|
25
|
-
version: '1.
|
25
|
+
version: '1.20'
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: all_images
|
28
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -99,14 +99,14 @@ dependencies:
|
|
99
99
|
requirements:
|
100
100
|
- - "~>"
|
101
101
|
- !ruby/object:Gem::Version
|
102
|
-
version: '0.
|
102
|
+
version: '0.9'
|
103
103
|
type: :runtime
|
104
104
|
prerelease: false
|
105
105
|
version_requirements: !ruby/object:Gem::Requirement
|
106
106
|
requirements:
|
107
107
|
- - "~>"
|
108
108
|
- !ruby/object:Gem::Version
|
109
|
-
version: '0.
|
109
|
+
version: '0.9'
|
110
110
|
- !ruby/object:Gem::Dependency
|
111
111
|
name: json
|
112
112
|
requirement: !ruby/object:Gem::Requirement
|
@@ -257,6 +257,7 @@ extra_rdoc_files:
|
|
257
257
|
- lib/documentrix/utils/tags.rb
|
258
258
|
- lib/documentrix/version.rb
|
259
259
|
files:
|
260
|
+
- ".envrc"
|
260
261
|
- ".yardopts"
|
261
262
|
- CHANGES.md
|
262
263
|
- Gemfile
|
@@ -314,7 +315,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
314
315
|
- !ruby/object:Gem::Version
|
315
316
|
version: '0'
|
316
317
|
requirements: []
|
317
|
-
rubygems_version: 3.6.
|
318
|
+
rubygems_version: 3.6.7
|
318
319
|
specification_version: 4
|
319
320
|
summary: Ruby library for embedding vector database
|
320
321
|
test_files:
|