documentrix 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGES.md +80 -0
- data/documentrix.gemspec +5 -5
- data/lib/documentrix/documents/cache/common.rb +63 -11
- data/lib/documentrix/documents/cache/records.rb +1 -1
- data/lib/documentrix/documents/cache/redis_cache.rb +3 -3
- data/lib/documentrix/documents/cache/sqlite_cache.rb +132 -33
- data/lib/documentrix/documents/splitters/character.rb +56 -4
- data/lib/documentrix/documents/splitters/common.rb +38 -0
- data/lib/documentrix/documents/splitters/semantic.rb +67 -8
- data/lib/documentrix/documents.rb +133 -29
- data/lib/documentrix/utils/colorize_texts.rb +25 -21
- data/lib/documentrix/utils/digests.rb +78 -0
- data/lib/documentrix/utils.rb +1 -0
- data/lib/documentrix/version.rb +1 -1
- data/spec/documentrix/documents/cache/interface_spec.rb +16 -3
- data/spec/documentrix/documents/cache/memory_cache_spec.rb +64 -2
- data/spec/documentrix/documents/cache/redis_cache_spec.rb +68 -19
- data/spec/documentrix/documents/cache/sqlite_cache_spec.rb +169 -2
- data/spec/documentrix/documents/splitters/character_spec.rb +20 -2
- data/spec/documentrix/documents/splitters/semantic_spec.rb +17 -5
- data/spec/documents_spec.rb +59 -3
- data/spec/utils/colorize_texts_spec.rb +0 -2
- data/spec/utils/digests_spec.rb +97 -0
- data/spec/utils/tags_spec.rb +0 -2
- metadata +7 -1
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
require 'tempfile'
|
|
2
|
+
|
|
3
|
+
describe Documentrix::Utils::Digests do
|
|
4
|
+
let(:test_class) do
|
|
5
|
+
Class.new do
|
|
6
|
+
include Documentrix::Utils::Digests
|
|
7
|
+
end
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
let(:subject) { test_class.new.expose }
|
|
11
|
+
|
|
12
|
+
describe '#compute_digest' do
|
|
13
|
+
it 'computes a valid SHA256 digest of a string' do
|
|
14
|
+
text = 'hello world'
|
|
15
|
+
expected = Digest::SHA256.hexdigest(text)
|
|
16
|
+
expect(subject.compute_digest(text)).to eq expected
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
describe '#compute_file_digest' do
|
|
21
|
+
it 'returns nil for an empty filename' do
|
|
22
|
+
expect(subject.compute_file_digest(nil)).to be_nil
|
|
23
|
+
expect(subject.compute_file_digest('')).to be_nil
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
it 'returns nil for an absolute URL' do
|
|
27
|
+
expect(subject.compute_file_digest('https://example.com/file.txt')).to be_nil
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
it 'returns nil for a non-existent file' do
|
|
31
|
+
expect(subject.compute_file_digest('/tmp/non_existent_file_12345')).to be_nil
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it 'computes the digest of a local file' do
|
|
35
|
+
file = Tempfile.create('documentrix_test')
|
|
36
|
+
content = 'file content'
|
|
37
|
+
file.write(content)
|
|
38
|
+
file.close
|
|
39
|
+
|
|
40
|
+
expected = Digest::SHA256.hexdigest(content)
|
|
41
|
+
expect(subject.compute_file_digest(file.path)).to eq expected
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
context 'with caching' do
|
|
45
|
+
let(:file) { Tempfile.create('documentrix_cache_test') }
|
|
46
|
+
let(:content) { 'initial content' }
|
|
47
|
+
|
|
48
|
+
before do
|
|
49
|
+
file.write(content)
|
|
50
|
+
file.close
|
|
51
|
+
subject.file_digest_cache_clear
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it 'returns the same digest on subsequent calls' do
|
|
55
|
+
digest1 = subject.compute_file_digest(file.path)
|
|
56
|
+
digest2 = subject.compute_file_digest(file.path)
|
|
57
|
+
expect(digest1).to eq digest2
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it 'recomputes the digest when the file is modified' do
|
|
61
|
+
digest1 = subject.compute_file_digest(file.path)
|
|
62
|
+
|
|
63
|
+
# Update file content and force mtime change
|
|
64
|
+
File.write(file.path, 'updated content')
|
|
65
|
+
# Ensure mtime is actually different (some FS have low precision)
|
|
66
|
+
File.utime(Time.now + 1, Time.now + 1, file.path)
|
|
67
|
+
|
|
68
|
+
digest2 = subject.compute_file_digest(file.path)
|
|
69
|
+
expect(digest1).not_to eq digest2
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
it 'recomputes the digest after cache clear' do
|
|
73
|
+
digest1 = subject.compute_file_digest(file.path)
|
|
74
|
+
subject.file_digest_cache_clear
|
|
75
|
+
|
|
76
|
+
# Even though file hasn't changed, it should re-read and return same value
|
|
77
|
+
digest2 = subject.compute_file_digest(file.path)
|
|
78
|
+
expect(digest1).to eq digest2
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
describe '#file_digest_cache_clear' do
|
|
84
|
+
it 'clears the internal cache' do
|
|
85
|
+
file = Tempfile.create('documentrix_clear_test')
|
|
86
|
+
file.write('test')
|
|
87
|
+
file.close
|
|
88
|
+
|
|
89
|
+
subject.compute_file_digest(file.path)
|
|
90
|
+
subject.file_digest_cache_clear
|
|
91
|
+
|
|
92
|
+
# We can verify this indirectly by checking if the cache is empty
|
|
93
|
+
# or by the fact that it will re-compute in tests.
|
|
94
|
+
expect(subject).to respond_to(:file_digest_cache_clear)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
end
|
data/spec/utils/tags_spec.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: documentrix
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Florian Frank
|
|
@@ -249,9 +249,11 @@ extra_rdoc_files:
|
|
|
249
249
|
- lib/documentrix/documents/cache/redis_cache.rb
|
|
250
250
|
- lib/documentrix/documents/cache/sqlite_cache.rb
|
|
251
251
|
- lib/documentrix/documents/splitters/character.rb
|
|
252
|
+
- lib/documentrix/documents/splitters/common.rb
|
|
252
253
|
- lib/documentrix/documents/splitters/semantic.rb
|
|
253
254
|
- lib/documentrix/utils.rb
|
|
254
255
|
- lib/documentrix/utils/colorize_texts.rb
|
|
256
|
+
- lib/documentrix/utils/digests.rb
|
|
255
257
|
- lib/documentrix/utils/math.rb
|
|
256
258
|
- lib/documentrix/utils/tags.rb
|
|
257
259
|
- lib/documentrix/version.rb
|
|
@@ -274,9 +276,11 @@ files:
|
|
|
274
276
|
- lib/documentrix/documents/cache/redis_cache.rb
|
|
275
277
|
- lib/documentrix/documents/cache/sqlite_cache.rb
|
|
276
278
|
- lib/documentrix/documents/splitters/character.rb
|
|
279
|
+
- lib/documentrix/documents/splitters/common.rb
|
|
277
280
|
- lib/documentrix/documents/splitters/semantic.rb
|
|
278
281
|
- lib/documentrix/utils.rb
|
|
279
282
|
- lib/documentrix/utils/colorize_texts.rb
|
|
283
|
+
- lib/documentrix/utils/digests.rb
|
|
280
284
|
- lib/documentrix/utils/math.rb
|
|
281
285
|
- lib/documentrix/utils/tags.rb
|
|
282
286
|
- lib/documentrix/version.rb
|
|
@@ -291,6 +295,7 @@ files:
|
|
|
291
295
|
- spec/documents_spec.rb
|
|
292
296
|
- spec/spec_helper.rb
|
|
293
297
|
- spec/utils/colorize_texts_spec.rb
|
|
298
|
+
- spec/utils/digests_spec.rb
|
|
294
299
|
- spec/utils/tags_spec.rb
|
|
295
300
|
homepage: https://github.com/flori/documentrix
|
|
296
301
|
licenses:
|
|
@@ -327,4 +332,5 @@ test_files:
|
|
|
327
332
|
- spec/documents_spec.rb
|
|
328
333
|
- spec/spec_helper.rb
|
|
329
334
|
- spec/utils/colorize_texts_spec.rb
|
|
335
|
+
- spec/utils/digests_spec.rb
|
|
330
336
|
- spec/utils/tags_spec.rb
|