picky 4.27.1 → 4.28.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/picky/backends/google_hash.rb +4 -1
- data/lib/picky/backends/memory/basic.rb +13 -7
- data/lib/picky/extensions/string.rb +11 -5
- data/lib/picky/extensions/symbol.rb +8 -2
- data/lib/picky/index.rb +8 -0
- data/lib/picky/indexes.rb +12 -0
- data/lib/picky/loader.rb +3 -1
- data/lib/picky/optimizers/memory/array_deduplicator.rb +26 -0
- data/lib/picky/optimizers.rb +7 -0
- data/lib/picky/query/tokens.rb +1 -1
- data/lib/picky.rb +2 -4
- data/spec/functional/backends/file_spec.rb +11 -11
- data/spec/functional/no_dump_hint_spec.rb +3 -3
- data/spec/functional/optimize_memory_spec.rb +44 -0
- data/spec/lib/extensions/string_spec.rb +3 -0
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/picky_spec.rb +6 -6
- data/spec/lib/query/tokens_spec.rb +15 -1
- metadata +7 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1070a597a2893ecafe13461959d34eb72b774fb1
|
4
|
+
data.tar.gz: 187eba56c158b4a3301360c5321423a37e3410af
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 399f943b00ce22fffd54c7cb961155fa6386bd898a3d5566f9778c5e883cdb380da9c873eda6644bf1a5d7629e7fe3916e6db64f7049df75f196880e381d0d95
|
7
|
+
data.tar.gz: 7eaa2a8156eb638476fa08edf92617e67ff687096dc631470402f63b7ce91cc0813ee936badd31d4caf7f41bc16e6431e09eeded924ef3b112189224d37df464
|
@@ -15,9 +15,9 @@ module Picky
|
|
15
15
|
|
16
16
|
include Helpers::File
|
17
17
|
|
18
|
-
# This file's
|
18
|
+
# This file's cache file without extensions.
|
19
19
|
#
|
20
|
-
attr_reader :
|
20
|
+
attr_reader :cache_file_path
|
21
21
|
|
22
22
|
# What hash type to use. Default: ::Hash
|
23
23
|
#
|
@@ -26,11 +26,11 @@ module Picky
|
|
26
26
|
# An index cache takes a path, without file extension,
|
27
27
|
# which will be provided by the subclasses.
|
28
28
|
#
|
29
|
-
def initialize
|
30
|
-
@
|
31
|
-
@hash_type
|
32
|
-
@empty
|
33
|
-
@initial
|
29
|
+
def initialize cache_file_path, hash_type = Hash, options = {}
|
30
|
+
@cache_file_path = cache_file_path
|
31
|
+
@hash_type = hash_type
|
32
|
+
@empty = options[:empty]
|
33
|
+
@initial = options[:initial]
|
34
34
|
end
|
35
35
|
|
36
36
|
# The default extension for index files is "index".
|
@@ -38,6 +38,12 @@ module Picky
|
|
38
38
|
def extension
|
39
39
|
:index
|
40
40
|
end
|
41
|
+
def type
|
42
|
+
:memory
|
43
|
+
end
|
44
|
+
def cache_path
|
45
|
+
[cache_file_path, type, extension].join(?.)
|
46
|
+
end
|
41
47
|
|
42
48
|
# The empty index that is used for putting the index
|
43
49
|
# together before it is dumped into the files.
|
@@ -27,17 +27,23 @@ class String
|
|
27
27
|
#
|
28
28
|
def each_subtoken from_length = 1, range = nil
|
29
29
|
sub = self
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
|
31
|
+
if range
|
32
|
+
unless (range.first.zero? && range.last == -1)
|
33
|
+
sub = sub[range].freeze
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
33
37
|
yield sub
|
34
|
-
|
38
|
+
|
35
39
|
size = sub.size
|
36
40
|
from_length = size + from_length + 1 if from_length < 0
|
37
41
|
from_length = size if size < from_length
|
38
42
|
from_length = 1 if from_length < 1
|
39
43
|
|
40
|
-
size.downto(from_length + 1) { yield sub = sub.chop }
|
44
|
+
size.downto(from_length + 1) { yield sub = sub.chop.freeze }
|
45
|
+
|
46
|
+
sub = nil
|
41
47
|
end
|
42
48
|
|
43
49
|
# 'keys'.each_intoken # => yields each of ['keys', 'key', 'eys', 'ke', 'ey', 'ys', 'k', 'e', 'y', 's']
|
@@ -30,7 +30,11 @@ class Symbol
|
|
30
30
|
def each_subtoken from_length = 1, range = nil
|
31
31
|
sub = self.id2name
|
32
32
|
|
33
|
-
|
33
|
+
if range
|
34
|
+
unless (range.first.zero? && range.last == -1)
|
35
|
+
sub = sub[range]
|
36
|
+
end
|
37
|
+
end
|
34
38
|
|
35
39
|
yield sub.intern
|
36
40
|
|
@@ -39,7 +43,9 @@ class Symbol
|
|
39
43
|
from_length = size if size < from_length
|
40
44
|
from_length = 1 if from_length < 1
|
41
45
|
|
42
|
-
size.downto(from_length + 1)
|
46
|
+
size.downto(from_length + 1) do
|
47
|
+
yield sub.chop!.intern
|
48
|
+
end
|
43
49
|
end
|
44
50
|
|
45
51
|
# :keys.each_intoken # => yields each of [:keys, :key, :eys, :ke, :ey, :ys, :k, :e, :y, :s]
|
data/lib/picky/index.rb
CHANGED
@@ -134,6 +134,14 @@ module Picky
|
|
134
134
|
require_relative 'index/hints'
|
135
135
|
@hints = Hints.new hints
|
136
136
|
end
|
137
|
+
|
138
|
+
# Explicitly trigger memory optimization.
|
139
|
+
#
|
140
|
+
def optimize_memory array_references = Hash.new
|
141
|
+
dedup = Picky::Optimizers::Memory::ArrayDeduplicator.new
|
142
|
+
dedup.deduplicate categories.map(&:exact).map(&:inverted), array_references
|
143
|
+
dedup.deduplicate categories.map(&:partial).map(&:inverted), array_references
|
144
|
+
end
|
137
145
|
|
138
146
|
# TODO Doc.
|
139
147
|
#
|
data/lib/picky/indexes.rb
CHANGED
@@ -43,6 +43,18 @@ module Picky
|
|
43
43
|
@indexes = []
|
44
44
|
@index_mapping = Hash.new
|
45
45
|
end
|
46
|
+
|
47
|
+
# Tries to optimize the memory usage of the indexes.
|
48
|
+
#
|
49
|
+
def optimize_memory array_references = Hash.new
|
50
|
+
dedup = Picky::Optimizers::Memory::ArrayDeduplicator.new
|
51
|
+
@indexes.each do |index|
|
52
|
+
index.optimize_memory array_references
|
53
|
+
end
|
54
|
+
end
|
55
|
+
def self.optimize_memory array_references = Hash.new
|
56
|
+
self.instance.optimize_memory array_references
|
57
|
+
end
|
46
58
|
|
47
59
|
# Registers an index with the indexes.
|
48
60
|
#
|
data/lib/picky/loader.rb
CHANGED
@@ -69,7 +69,9 @@ module Picky
|
|
69
69
|
load_relative 'helpers/measuring',
|
70
70
|
'helpers/indexing',
|
71
71
|
'helpers/identification',
|
72
|
-
'splitter'
|
72
|
+
'splitter',
|
73
|
+
'optimizers',
|
74
|
+
'optimizers/memory/array_deduplicator'
|
73
75
|
end
|
74
76
|
def load_index_generation_strategies
|
75
77
|
load_relative 'indexers/base',
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Picky::Optimizers::Memory
|
2
|
+
|
3
|
+
# Straightforward implementation of an array deduplicator.
|
4
|
+
# Tries to find duplicate instances of Array values in a hash
|
5
|
+
# and points references that point to a duplicate to one of the
|
6
|
+
# Array instances.
|
7
|
+
#
|
8
|
+
# TODO Could we have C-Ruby point to parts of another Array?
|
9
|
+
#
|
10
|
+
class ArrayDeduplicator
|
11
|
+
|
12
|
+
def deduplicate hashes, array_references = Hash.new
|
13
|
+
hashes.inject(array_references) do |array_references, hash|
|
14
|
+
deduplicate_hash hash, array_references
|
15
|
+
array_references
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def deduplicate_hash hash, array_references
|
20
|
+
hash.each do |k, ary|
|
21
|
+
hash[k] = (array_references[ary] ||= ary)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
data/lib/picky/query/tokens.rb
CHANGED
data/lib/picky.rb
CHANGED
@@ -11,10 +11,8 @@ module Picky
|
|
11
11
|
# Set default encoding.
|
12
12
|
# (Note: Rails does this already as well)
|
13
13
|
#
|
14
|
-
|
15
|
-
|
16
|
-
# Encoding.default_external = Encoding::UTF_8
|
17
|
-
# Encoding.default_internal = Encoding::UTF_8
|
14
|
+
Encoding.default_external = Encoding::UTF_8
|
15
|
+
Encoding.default_internal = Encoding::UTF_8
|
18
16
|
|
19
17
|
# External libraries.
|
20
18
|
#
|
@@ -7,7 +7,7 @@ require 'spec_helper'
|
|
7
7
|
#
|
8
8
|
describe Picky::Backends::File do
|
9
9
|
|
10
|
-
class
|
10
|
+
class PoetryBook
|
11
11
|
attr_reader :id, :title, :author
|
12
12
|
def initialize id, title, author
|
13
13
|
@id, @title, @author = id, title, author
|
@@ -44,7 +44,7 @@ describe Picky::Backends::File do
|
|
44
44
|
books.search('title').ids.should == []
|
45
45
|
end
|
46
46
|
it 'handles removing with more than one entry' do
|
47
|
-
data.add
|
47
|
+
data.add PoetryBook.new(2, 'title', 'author')
|
48
48
|
|
49
49
|
books.search('title').ids.should == ['2', '1']
|
50
50
|
|
@@ -53,8 +53,8 @@ describe Picky::Backends::File do
|
|
53
53
|
books.search('title').ids.should == ['2']
|
54
54
|
end
|
55
55
|
it 'handles removing with three entries' do
|
56
|
-
data.add
|
57
|
-
data.add
|
56
|
+
data.add PoetryBook.new(2, 'title', 'author')
|
57
|
+
data.add PoetryBook.new(3, 'title', 'author')
|
58
58
|
|
59
59
|
books.search('title').ids.should == ['3', '2', '1']
|
60
60
|
|
@@ -63,7 +63,7 @@ describe Picky::Backends::File do
|
|
63
63
|
books.search('title').ids.should == ['3', '2']
|
64
64
|
end
|
65
65
|
it 'handles replacing' do
|
66
|
-
data.replace
|
66
|
+
data.replace PoetryBook.new(1, 'toitle', 'oithor')
|
67
67
|
|
68
68
|
books.search('title').ids.should == []
|
69
69
|
books.search('toitle').ids.should == ['1']
|
@@ -100,7 +100,7 @@ describe Picky::Backends::File do
|
|
100
100
|
books.search('title').ids.should == []
|
101
101
|
end
|
102
102
|
it 'handles removing with more than one entry' do
|
103
|
-
data.add
|
103
|
+
data.add PoetryBook.new(2, 'title', 'author')
|
104
104
|
|
105
105
|
books.search('title').ids.should == [2, 1]
|
106
106
|
|
@@ -109,8 +109,8 @@ describe Picky::Backends::File do
|
|
109
109
|
books.search('title').ids.should == [2]
|
110
110
|
end
|
111
111
|
it 'handles removing with three entries' do
|
112
|
-
data.add
|
113
|
-
data.add
|
112
|
+
data.add PoetryBook.new(2, 'title', 'author')
|
113
|
+
data.add PoetryBook.new(3, 'title', 'author')
|
114
114
|
|
115
115
|
books.search('title').ids.should == [3, 2, 1]
|
116
116
|
|
@@ -119,7 +119,7 @@ describe Picky::Backends::File do
|
|
119
119
|
books.search('title').ids.should == [3, 2]
|
120
120
|
end
|
121
121
|
it 'handles replacing' do
|
122
|
-
data.replace
|
122
|
+
data.replace PoetryBook.new(1, 'toitle', 'oithor')
|
123
123
|
|
124
124
|
books.search('title').ids.should == []
|
125
125
|
books.search('toitle').ids.should == [1]
|
@@ -144,7 +144,7 @@ describe Picky::Backends::File do
|
|
144
144
|
data.backend described_class.new
|
145
145
|
data.clear
|
146
146
|
|
147
|
-
data.add
|
147
|
+
data.add PoetryBook.new(1, 'title', 'author')
|
148
148
|
end
|
149
149
|
|
150
150
|
instance_eval &its_to_s
|
@@ -157,7 +157,7 @@ describe Picky::Backends::File do
|
|
157
157
|
data.backend described_class.new
|
158
158
|
data.clear
|
159
159
|
|
160
|
-
data.add
|
160
|
+
data.add PoetryBook.new(1, 'title', 'author')
|
161
161
|
end
|
162
162
|
|
163
163
|
instance_eval &its_to_i
|
@@ -5,7 +5,7 @@ require 'ostruct'
|
|
5
5
|
|
6
6
|
describe "Hint: no_dump" do
|
7
7
|
|
8
|
-
|
8
|
+
ComicBook = Struct.new(:id, :title, :author)
|
9
9
|
|
10
10
|
let(:index) do
|
11
11
|
Picky::Index.new :no_dump do
|
@@ -18,14 +18,14 @@ describe "Hint: no_dump" do
|
|
18
18
|
let(:try) { Picky::Search.new index }
|
19
19
|
|
20
20
|
it 'can index and search' do
|
21
|
-
index.replace
|
21
|
+
index.replace ComicBook.new(2, "Title", "Author")
|
22
22
|
|
23
23
|
try.search("title:title").ids.should == [2]
|
24
24
|
end
|
25
25
|
|
26
26
|
context 'dumping and loading' do
|
27
27
|
it "raises" do
|
28
|
-
index.replace
|
28
|
+
index.replace ComicBook.new(2, "Title New", "Author New")
|
29
29
|
|
30
30
|
expect {
|
31
31
|
index.dump
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "Memory optimization" do
|
6
|
+
|
7
|
+
it 'saves memory' do
|
8
|
+
# Remove all indexes.
|
9
|
+
Picky::Indexes.clear_indexes
|
10
|
+
|
11
|
+
index = Picky::Index.new :memory_optimization do
|
12
|
+
category :text1
|
13
|
+
category :text2
|
14
|
+
category :text3
|
15
|
+
category :text4
|
16
|
+
end
|
17
|
+
try = Picky::Search.new index
|
18
|
+
|
19
|
+
thing = Struct.new(:id, :text1, :text2, :text3, :text4)
|
20
|
+
|
21
|
+
require 'objspace'
|
22
|
+
|
23
|
+
GC.start
|
24
|
+
memsize_without_added_thing = ObjectSpace.memsize_of_all(Array)
|
25
|
+
GC.start
|
26
|
+
|
27
|
+
index.add thing.new(1, 'one', 'two', 'three', 'four')
|
28
|
+
|
29
|
+
GC.start
|
30
|
+
memsize_with_added_thing = ObjectSpace.memsize_of_all(Array)
|
31
|
+
GC.start
|
32
|
+
|
33
|
+
memsize_with_added_thing.should > memsize_without_added_thing
|
34
|
+
|
35
|
+
Picky::Indexes.optimize_memory
|
36
|
+
|
37
|
+
GC.start
|
38
|
+
memsize_with_optimized_memory = ObjectSpace.memsize_of_all(Array)
|
39
|
+
GC.start
|
40
|
+
|
41
|
+
memsize_with_optimized_memory.should < memsize_with_added_thing
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
@@ -45,7 +45,7 @@ describe Picky::Indexes do
|
|
45
45
|
end
|
46
46
|
end
|
47
47
|
def self.it_forwards_each name
|
48
|
-
describe name do
|
48
|
+
describe "forwarding ##{name}" do
|
49
49
|
it "calls #{name} on each in order" do
|
50
50
|
@index1.should_receive(name).once.with(no_args).ordered
|
51
51
|
@index2.should_receive(name).once.with(no_args).ordered
|
data/spec/lib/picky_spec.rb
CHANGED
@@ -2,17 +2,17 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Picky do
|
4
4
|
|
5
|
-
it 'sets the right
|
5
|
+
it 'sets the right external encoding' do
|
6
6
|
Encoding.default_external.should == Encoding::UTF_8
|
7
7
|
end
|
8
|
-
# THINK What to set default
|
8
|
+
# THINK What to set default internal encoding to?
|
9
9
|
#
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
it 'sets the right internal encoding' do
|
11
|
+
Encoding.default_internal.should == Encoding::UTF_8
|
12
|
+
end
|
13
13
|
|
14
14
|
it 'loads in a simple ruby environment with the defined requirements' do
|
15
|
-
#TODO Picky.root is set to /spec/temp in spec_helper, so is this the "best" way?
|
15
|
+
# TODO Picky.root is set to /spec/temp in spec_helper, so is this the "best" way?
|
16
16
|
load_path = File.expand_path('../../../lib', __FILE__)
|
17
17
|
ruby = File.join(RbConfig::CONFIG['bindir'], RbConfig::CONFIG['ruby_install_name']).sub(/.*\s.*/m, '"\&"')
|
18
18
|
|
@@ -224,10 +224,24 @@ describe Picky::Query::Tokens do
|
|
224
224
|
it 'should work correctly' do
|
225
225
|
(@tokens + @other).to_s.should == 'Hello~ I~ Am A* Token~'
|
226
226
|
end
|
227
|
+
it 'should work correctly' do
|
228
|
+
(@tokens + @other).texts.should == ['hello', 'i', 'am', 'a', 'token']
|
229
|
+
end
|
230
|
+
it 'should work correctly' do
|
231
|
+
(@tokens + @other).originals.should == ['Hello~', 'I~', 'Am', 'A*', 'Token~']
|
232
|
+
end
|
233
|
+
it 'correctly handles ignore_unassigned' do
|
234
|
+
(@tokens + @other).ignore_unassigned.should == false
|
235
|
+
end
|
236
|
+
it 'correctly handles ignore_unassigned' do
|
237
|
+
yet_another = described_class.new [], true
|
238
|
+
(@tokens + yet_another).ignore_unassigned.should == true
|
239
|
+
(yet_another + @tokens).ignore_unassigned.should == true
|
240
|
+
end
|
227
241
|
end
|
228
242
|
|
229
243
|
def self.it_should_forward name
|
230
|
-
describe name do
|
244
|
+
describe "forwarding ##{name}" do
|
231
245
|
before(:each) do
|
232
246
|
@internal_tokens = double :internal_tokens
|
233
247
|
@tokens = described_class.new @internal_tokens
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.28.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Hanke
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -52,20 +52,6 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '2009.0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: google_hash
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0.8'
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0.8'
|
69
55
|
description: Fast Ruby semantic text search engine with comfortable single field interface.
|
70
56
|
email: florian.hanke+picky@gmail.com
|
71
57
|
executables:
|
@@ -182,6 +168,8 @@ files:
|
|
182
168
|
- lib/picky/loggers/default.rb
|
183
169
|
- lib/picky/loggers/silent.rb
|
184
170
|
- lib/picky/loggers/verbose.rb
|
171
|
+
- lib/picky/optimizers.rb
|
172
|
+
- lib/picky/optimizers/memory/array_deduplicator.rb
|
185
173
|
- lib/picky/performant.rb
|
186
174
|
- lib/picky/platforms/macruby.rb
|
187
175
|
- lib/picky/pool.rb
|
@@ -255,6 +243,7 @@ files:
|
|
255
243
|
- spec/functional/non_specific_ids_larger_than_20_spec.rb
|
256
244
|
- spec/functional/object_use_spec.rb
|
257
245
|
- spec/functional/only_spec.rb
|
246
|
+
- spec/functional/optimize_memory_spec.rb
|
258
247
|
- spec/functional/or_spec.rb
|
259
248
|
- spec/functional/pool_spec.rb
|
260
249
|
- spec/functional/range_queries_spec.rb
|
@@ -405,7 +394,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
405
394
|
version: '0'
|
406
395
|
requirements: []
|
407
396
|
rubyforge_project: http://rubyforge.org/projects/picky
|
408
|
-
rubygems_version: 2.4.
|
397
|
+
rubygems_version: 2.4.6
|
409
398
|
signing_key:
|
410
399
|
specification_version: 4
|
411
400
|
summary: 'Picky: Semantic Search Engine. Clever Interface. Good Tools.'
|
@@ -439,6 +428,7 @@ test_files:
|
|
439
428
|
- spec/functional/non_specific_ids_larger_than_20_spec.rb
|
440
429
|
- spec/functional/object_use_spec.rb
|
441
430
|
- spec/functional/only_spec.rb
|
431
|
+
- spec/functional/optimize_memory_spec.rb
|
442
432
|
- spec/functional/or_spec.rb
|
443
433
|
- spec/functional/pool_spec.rb
|
444
434
|
- spec/functional/range_queries_spec.rb
|