picky 4.27.1 → 4.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3a32ef9ec55047df769e8a264eee585bab738f5d
4
- data.tar.gz: 6574e4c5d634313b160d0ff291a4c68e8ee9f553
3
+ metadata.gz: 1070a597a2893ecafe13461959d34eb72b774fb1
4
+ data.tar.gz: 187eba56c158b4a3301360c5321423a37e3410af
5
5
  SHA512:
6
- metadata.gz: ecbb19fb08c6e69ad40bc9a9d6b7416db8ad5b5fc1ac6d340070714cc1e02d70c01ed68fd0de8bd79628af5cb847126983b92c1f847b9bedbd5ffaa2134d85aa
7
- data.tar.gz: 9fe9fbcd9109b245777bd48246d466078646a602260e278f73e9c0ba0233e2b69e852903af046412acdebc169072544435d8b2b0eef4d5a9ffdb8f0b400ea693
6
+ metadata.gz: 399f943b00ce22fffd54c7cb961155fa6386bd898a3d5566f9778c5e883cdb380da9c873eda6644bf1a5d7629e7fe3916e6db64f7049df75f196880e381d0d95
7
+ data.tar.gz: 7eaa2a8156eb638476fa08edf92617e67ff687096dc631470402f63b7ce91cc0813ee936badd31d4caf7f41bc16e6431e09eeded924ef3b112189224d37df464
@@ -1,4 +1,5 @@
1
- if require 'google_hash'
1
+ begin
2
+ require 'google_hash'
2
3
 
3
4
  class ::GoogleHashSparseRubyToRuby
4
5
 
@@ -47,4 +48,6 @@ if require 'google_hash'
47
48
 
48
49
  end
49
50
 
51
+ rescue LoadError
52
+ # Welp. Don't do anything.
50
53
  end
@@ -15,9 +15,9 @@ module Picky
15
15
 
16
16
  include Helpers::File
17
17
 
18
- # This file's location.
18
+ # This file's cache file without extensions.
19
19
  #
20
- attr_reader :cache_path
20
+ attr_reader :cache_file_path
21
21
 
22
22
  # What hash type to use. Default: ::Hash
23
23
  #
@@ -26,11 +26,11 @@ module Picky
26
26
  # An index cache takes a path, without file extension,
27
27
  # which will be provided by the subclasses.
28
28
  #
29
- def initialize cache_path, hash_type = Hash, options = {}
30
- @cache_path = "#{cache_path}.memory.#{extension}"
31
- @hash_type = hash_type
32
- @empty = options[:empty]
33
- @initial = options[:initial]
29
+ def initialize cache_file_path, hash_type = Hash, options = {}
30
+ @cache_file_path = cache_file_path
31
+ @hash_type = hash_type
32
+ @empty = options[:empty]
33
+ @initial = options[:initial]
34
34
  end
35
35
 
36
36
  # The default extension for index files is "index".
@@ -38,6 +38,12 @@ module Picky
38
38
  def extension
39
39
  :index
40
40
  end
41
+ def type
42
+ :memory
43
+ end
44
+ def cache_path
45
+ [cache_file_path, type, extension].join(?.)
46
+ end
41
47
 
42
48
  # The empty index that is used for putting the index
43
49
  # together before it is dumped into the files.
@@ -27,17 +27,23 @@ class String
27
27
  #
28
28
  def each_subtoken from_length = 1, range = nil
29
29
  sub = self
30
-
31
- sub = sub[range] if range
32
-
30
+
31
+ if range
32
+ unless (range.first.zero? && range.last == -1)
33
+ sub = sub[range].freeze
34
+ end
35
+ end
36
+
33
37
  yield sub
34
-
38
+
35
39
  size = sub.size
36
40
  from_length = size + from_length + 1 if from_length < 0
37
41
  from_length = size if size < from_length
38
42
  from_length = 1 if from_length < 1
39
43
 
40
- size.downto(from_length + 1) { yield sub = sub.chop }
44
+ size.downto(from_length + 1) { yield sub = sub.chop.freeze }
45
+
46
+ sub = nil
41
47
  end
42
48
 
43
49
  # 'keys'.each_intoken # => yields each of ['keys', 'key', 'eys', 'ke', 'ey', 'ys', 'k', 'e', 'y', 's']
@@ -30,7 +30,11 @@ class Symbol
30
30
  def each_subtoken from_length = 1, range = nil
31
31
  sub = self.id2name
32
32
 
33
- sub = sub[range] if range
33
+ if range
34
+ unless (range.first.zero? && range.last == -1)
35
+ sub = sub[range]
36
+ end
37
+ end
34
38
 
35
39
  yield sub.intern
36
40
 
@@ -39,7 +43,9 @@ class Symbol
39
43
  from_length = size if size < from_length
40
44
  from_length = 1 if from_length < 1
41
45
 
42
- size.downto(from_length + 1) { yield sub.chop!.intern }
46
+ size.downto(from_length + 1) do
47
+ yield sub.chop!.intern
48
+ end
43
49
  end
44
50
 
45
51
  # :keys.each_intoken # => yields each of [:keys, :key, :eys, :ke, :ey, :ys, :k, :e, :y, :s]
data/lib/picky/index.rb CHANGED
@@ -134,6 +134,14 @@ module Picky
134
134
  require_relative 'index/hints'
135
135
  @hints = Hints.new hints
136
136
  end
137
+
138
+ # Explicitly trigger memory optimization.
139
+ #
140
+ def optimize_memory array_references = Hash.new
141
+ dedup = Picky::Optimizers::Memory::ArrayDeduplicator.new
142
+ dedup.deduplicate categories.map(&:exact).map(&:inverted), array_references
143
+ dedup.deduplicate categories.map(&:partial).map(&:inverted), array_references
144
+ end
137
145
 
138
146
  # TODO Doc.
139
147
  #
data/lib/picky/indexes.rb CHANGED
@@ -43,6 +43,18 @@ module Picky
43
43
  @indexes = []
44
44
  @index_mapping = Hash.new
45
45
  end
46
+
47
+ # Tries to optimize the memory usage of the indexes.
48
+ #
49
+ def optimize_memory array_references = Hash.new
50
+ dedup = Picky::Optimizers::Memory::ArrayDeduplicator.new
51
+ @indexes.each do |index|
52
+ index.optimize_memory array_references
53
+ end
54
+ end
55
+ def self.optimize_memory array_references = Hash.new
56
+ self.instance.optimize_memory array_references
57
+ end
46
58
 
47
59
  # Registers an index with the indexes.
48
60
  #
data/lib/picky/loader.rb CHANGED
@@ -69,7 +69,9 @@ module Picky
69
69
  load_relative 'helpers/measuring',
70
70
  'helpers/indexing',
71
71
  'helpers/identification',
72
- 'splitter'
72
+ 'splitter',
73
+ 'optimizers',
74
+ 'optimizers/memory/array_deduplicator'
73
75
  end
74
76
  def load_index_generation_strategies
75
77
  load_relative 'indexers/base',
@@ -0,0 +1,26 @@
1
+ module Picky::Optimizers::Memory
2
+
3
+ # Straightforward implementation of an array deduplicator.
4
+ # Tries to find duplicate instances of Array values in a hash
5
+ # and points references that point to a duplicate to one of the
6
+ # Array instances.
7
+ #
8
+ # TODO Could we have C-Ruby point to parts of another Array?
9
+ #
10
+ class ArrayDeduplicator
11
+
12
+ def deduplicate hashes, array_references = Hash.new
13
+ hashes.inject(array_references) do |array_references, hash|
14
+ deduplicate_hash hash, array_references
15
+ array_references
16
+ end
17
+ end
18
+
19
+ def deduplicate_hash hash, array_references
20
+ hash.each do |k, ary|
21
+ hash[k] = (array_references[ary] ||= ary)
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,7 @@
1
+ module Picky
2
+ module Optimizers
3
+ module Memory
4
+
5
+ end
6
+ end
7
+ end
@@ -80,7 +80,7 @@ module Picky
80
80
  @tokens.map(&:original)
81
81
  end
82
82
  def original
83
- @tokens.map(&:original)
83
+ originals
84
84
  end
85
85
  # TODO
86
86
  #
data/lib/picky.rb CHANGED
@@ -11,10 +11,8 @@ module Picky
11
11
  # Set default encoding.
12
12
  # (Note: Rails does this already as well)
13
13
  #
14
- # THINK Set default encoding?
15
- #
16
- # Encoding.default_external = Encoding::UTF_8
17
- # Encoding.default_internal = Encoding::UTF_8
14
+ Encoding.default_external = Encoding::UTF_8
15
+ Encoding.default_internal = Encoding::UTF_8
18
16
 
19
17
  # External libraries.
20
18
  #
@@ -7,7 +7,7 @@ require 'spec_helper'
7
7
  #
8
8
  describe Picky::Backends::File do
9
9
 
10
- class Book
10
+ class PoetryBook
11
11
  attr_reader :id, :title, :author
12
12
  def initialize id, title, author
13
13
  @id, @title, @author = id, title, author
@@ -44,7 +44,7 @@ describe Picky::Backends::File do
44
44
  books.search('title').ids.should == []
45
45
  end
46
46
  it 'handles removing with more than one entry' do
47
- data.add Book.new(2, 'title', 'author')
47
+ data.add PoetryBook.new(2, 'title', 'author')
48
48
 
49
49
  books.search('title').ids.should == ['2', '1']
50
50
 
@@ -53,8 +53,8 @@ describe Picky::Backends::File do
53
53
  books.search('title').ids.should == ['2']
54
54
  end
55
55
  it 'handles removing with three entries' do
56
- data.add Book.new(2, 'title', 'author')
57
- data.add Book.new(3, 'title', 'author')
56
+ data.add PoetryBook.new(2, 'title', 'author')
57
+ data.add PoetryBook.new(3, 'title', 'author')
58
58
 
59
59
  books.search('title').ids.should == ['3', '2', '1']
60
60
 
@@ -63,7 +63,7 @@ describe Picky::Backends::File do
63
63
  books.search('title').ids.should == ['3', '2']
64
64
  end
65
65
  it 'handles replacing' do
66
- data.replace Book.new(1, 'toitle', 'oithor')
66
+ data.replace PoetryBook.new(1, 'toitle', 'oithor')
67
67
 
68
68
  books.search('title').ids.should == []
69
69
  books.search('toitle').ids.should == ['1']
@@ -100,7 +100,7 @@ describe Picky::Backends::File do
100
100
  books.search('title').ids.should == []
101
101
  end
102
102
  it 'handles removing with more than one entry' do
103
- data.add Book.new(2, 'title', 'author')
103
+ data.add PoetryBook.new(2, 'title', 'author')
104
104
 
105
105
  books.search('title').ids.should == [2, 1]
106
106
 
@@ -109,8 +109,8 @@ describe Picky::Backends::File do
109
109
  books.search('title').ids.should == [2]
110
110
  end
111
111
  it 'handles removing with three entries' do
112
- data.add Book.new(2, 'title', 'author')
113
- data.add Book.new(3, 'title', 'author')
112
+ data.add PoetryBook.new(2, 'title', 'author')
113
+ data.add PoetryBook.new(3, 'title', 'author')
114
114
 
115
115
  books.search('title').ids.should == [3, 2, 1]
116
116
 
@@ -119,7 +119,7 @@ describe Picky::Backends::File do
119
119
  books.search('title').ids.should == [3, 2]
120
120
  end
121
121
  it 'handles replacing' do
122
- data.replace Book.new(1, 'toitle', 'oithor')
122
+ data.replace PoetryBook.new(1, 'toitle', 'oithor')
123
123
 
124
124
  books.search('title').ids.should == []
125
125
  books.search('toitle').ids.should == [1]
@@ -144,7 +144,7 @@ describe Picky::Backends::File do
144
144
  data.backend described_class.new
145
145
  data.clear
146
146
 
147
- data.add Book.new(1, 'title', 'author')
147
+ data.add PoetryBook.new(1, 'title', 'author')
148
148
  end
149
149
 
150
150
  instance_eval &its_to_s
@@ -157,7 +157,7 @@ describe Picky::Backends::File do
157
157
  data.backend described_class.new
158
158
  data.clear
159
159
 
160
- data.add Book.new(1, 'title', 'author')
160
+ data.add PoetryBook.new(1, 'title', 'author')
161
161
  end
162
162
 
163
163
  instance_eval &its_to_i
@@ -5,7 +5,7 @@ require 'ostruct'
5
5
 
6
6
  describe "Hint: no_dump" do
7
7
 
8
- Book = Struct.new(:id, :title, :author)
8
+ ComicBook = Struct.new(:id, :title, :author)
9
9
 
10
10
  let(:index) do
11
11
  Picky::Index.new :no_dump do
@@ -18,14 +18,14 @@ describe "Hint: no_dump" do
18
18
  let(:try) { Picky::Search.new index }
19
19
 
20
20
  it 'can index and search' do
21
- index.replace Book.new(2, "Title", "Author")
21
+ index.replace ComicBook.new(2, "Title", "Author")
22
22
 
23
23
  try.search("title:title").ids.should == [2]
24
24
  end
25
25
 
26
26
  context 'dumping and loading' do
27
27
  it "raises" do
28
- index.replace Book.new(2, "Title New", "Author New")
28
+ index.replace ComicBook.new(2, "Title New", "Author New")
29
29
 
30
30
  expect {
31
31
  index.dump
@@ -0,0 +1,44 @@
1
+ # encoding: utf-8
2
+ #
3
+ require 'spec_helper'
4
+
5
+ describe "Memory optimization" do
6
+
7
+ it 'saves memory' do
8
+ # Remove all indexes.
9
+ Picky::Indexes.clear_indexes
10
+
11
+ index = Picky::Index.new :memory_optimization do
12
+ category :text1
13
+ category :text2
14
+ category :text3
15
+ category :text4
16
+ end
17
+ try = Picky::Search.new index
18
+
19
+ thing = Struct.new(:id, :text1, :text2, :text3, :text4)
20
+
21
+ require 'objspace'
22
+
23
+ GC.start
24
+ memsize_without_added_thing = ObjectSpace.memsize_of_all(Array)
25
+ GC.start
26
+
27
+ index.add thing.new(1, 'one', 'two', 'three', 'four')
28
+
29
+ GC.start
30
+ memsize_with_added_thing = ObjectSpace.memsize_of_all(Array)
31
+ GC.start
32
+
33
+ memsize_with_added_thing.should > memsize_without_added_thing
34
+
35
+ Picky::Indexes.optimize_memory
36
+
37
+ GC.start
38
+ memsize_with_optimized_memory = ObjectSpace.memsize_of_all(Array)
39
+ GC.start
40
+
41
+ memsize_with_optimized_memory.should < memsize_with_added_thing
42
+ end
43
+
44
+ end
@@ -2,6 +2,9 @@
2
2
  #
3
3
  require 'spec_helper'
4
4
 
5
+ # Requiring text here as it's optional in Picky.
6
+ require 'text'
7
+
5
8
  describe String do
6
9
 
7
10
  context 'performance' do
@@ -45,7 +45,7 @@ describe Picky::Indexes do
45
45
  end
46
46
  end
47
47
  def self.it_forwards_each name
48
- describe name do
48
+ describe "forwarding ##{name}" do
49
49
  it "calls #{name} on each in order" do
50
50
  @index1.should_receive(name).once.with(no_args).ordered
51
51
  @index2.should_receive(name).once.with(no_args).ordered
@@ -2,17 +2,17 @@ require 'spec_helper'
2
2
 
3
3
  describe Picky do
4
4
 
5
- it 'sets the right internal encoding' do
5
+ it 'sets the right external encoding' do
6
6
  Encoding.default_external.should == Encoding::UTF_8
7
7
  end
8
- # THINK What to set default external encoding to?
8
+ # THINK What to set default internal encoding to?
9
9
  #
10
- # it 'sets the right external encoding' do
11
- # Encoding.default_internal.should == Encoding::UTF_8
12
- # end
10
+ it 'sets the right internal encoding' do
11
+ Encoding.default_internal.should == Encoding::UTF_8
12
+ end
13
13
 
14
14
  it 'loads in a simple ruby environment with the defined requirements' do
15
- #TODO Picky.root is set to /spec/temp in spec_helper, so is this the "best" way?
15
+ # TODO Picky.root is set to /spec/temp in spec_helper, so is this the "best" way?
16
16
  load_path = File.expand_path('../../../lib', __FILE__)
17
17
  ruby = File.join(RbConfig::CONFIG['bindir'], RbConfig::CONFIG['ruby_install_name']).sub(/.*\s.*/m, '"\&"')
18
18
 
@@ -224,10 +224,24 @@ describe Picky::Query::Tokens do
224
224
  it 'should work correctly' do
225
225
  (@tokens + @other).to_s.should == 'Hello~ I~ Am A* Token~'
226
226
  end
227
+ it 'should work correctly' do
228
+ (@tokens + @other).texts.should == ['hello', 'i', 'am', 'a', 'token']
229
+ end
230
+ it 'should work correctly' do
231
+ (@tokens + @other).originals.should == ['Hello~', 'I~', 'Am', 'A*', 'Token~']
232
+ end
233
+ it 'correctly handles ignore_unassigned' do
234
+ (@tokens + @other).ignore_unassigned.should == false
235
+ end
236
+ it 'correctly handles ignore_unassigned' do
237
+ yet_another = described_class.new [], true
238
+ (@tokens + yet_another).ignore_unassigned.should == true
239
+ (yet_another + @tokens).ignore_unassigned.should == true
240
+ end
227
241
  end
228
242
 
229
243
  def self.it_should_forward name
230
- describe name do
244
+ describe "forwarding ##{name}" do
231
245
  before(:each) do
232
246
  @internal_tokens = double :internal_tokens
233
247
  @tokens = described_class.new @internal_tokens
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.27.1
4
+ version: 4.28.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Hanke
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-03 00:00:00.000000000 Z
11
+ date: 2015-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -52,20 +52,6 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: '2009.0'
55
- - !ruby/object:Gem::Dependency
56
- name: google_hash
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '0.8'
62
- type: :runtime
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - "~>"
67
- - !ruby/object:Gem::Version
68
- version: '0.8'
69
55
  description: Fast Ruby semantic text search engine with comfortable single field interface.
70
56
  email: florian.hanke+picky@gmail.com
71
57
  executables:
@@ -182,6 +168,8 @@ files:
182
168
  - lib/picky/loggers/default.rb
183
169
  - lib/picky/loggers/silent.rb
184
170
  - lib/picky/loggers/verbose.rb
171
+ - lib/picky/optimizers.rb
172
+ - lib/picky/optimizers/memory/array_deduplicator.rb
185
173
  - lib/picky/performant.rb
186
174
  - lib/picky/platforms/macruby.rb
187
175
  - lib/picky/pool.rb
@@ -255,6 +243,7 @@ files:
255
243
  - spec/functional/non_specific_ids_larger_than_20_spec.rb
256
244
  - spec/functional/object_use_spec.rb
257
245
  - spec/functional/only_spec.rb
246
+ - spec/functional/optimize_memory_spec.rb
258
247
  - spec/functional/or_spec.rb
259
248
  - spec/functional/pool_spec.rb
260
249
  - spec/functional/range_queries_spec.rb
@@ -405,7 +394,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
405
394
  version: '0'
406
395
  requirements: []
407
396
  rubyforge_project: http://rubyforge.org/projects/picky
408
- rubygems_version: 2.4.1
397
+ rubygems_version: 2.4.6
409
398
  signing_key:
410
399
  specification_version: 4
411
400
  summary: 'Picky: Semantic Search Engine. Clever Interface. Good Tools.'
@@ -439,6 +428,7 @@ test_files:
439
428
  - spec/functional/non_specific_ids_larger_than_20_spec.rb
440
429
  - spec/functional/object_use_spec.rb
441
430
  - spec/functional/only_spec.rb
431
+ - spec/functional/optimize_memory_spec.rb
442
432
  - spec/functional/or_spec.rb
443
433
  - spec/functional/pool_spec.rb
444
434
  - spec/functional/range_queries_spec.rb