picky 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/auxiliary/terminal.rb +147 -0
- data/lib/picky/indexing/indexes.rb +0 -2
- data/lib/picky/internals/ext/ruby19/performant.c +22 -17
- data/lib/picky/internals/index/redis/list_hash.rb +0 -2
- data/lib/picky/internals/indexed/bundle/redis.rb +1 -1
- data/lib/picky/internals/indexed/category.rb +0 -2
- data/lib/picky/internals/indexers/solr.rb +10 -8
- data/lib/picky/internals/indexing/bundle/base.rb +0 -3
- data/lib/picky/internals/indexing/bundle/super_base.rb +13 -13
- data/lib/picky/internals/indexing/category.rb +4 -3
- data/lib/picky/internals/query/allocations.rb +0 -2
- data/lib/picky/internals/query/combinations/memory.rb +5 -11
- data/lib/picky/internals/query/indexes.rb +27 -2
- data/lib/picky/internals/query/tokens.rb +7 -1
- data/lib/picky/search.rb +5 -53
- data/lib/tasks/search.rake +7 -0
- data/spec/ext/performant_spec.rb +27 -28
- data/spec/lib/auxiliary/terminal_spec.rb +56 -0
- data/spec/lib/generators/aliases_spec.rb +15 -1
- data/spec/lib/internals/indexed/bundle/redis_spec.rb +42 -0
- data/spec/lib/query/indexes_spec.rb +21 -0
- data/spec/lib/search_spec.rb +5 -73
- metadata +8 -2
@@ -0,0 +1,147 @@
|
|
1
|
+
class Terminal
|
2
|
+
|
3
|
+
attr_reader :client
|
4
|
+
|
5
|
+
def initialize given_uri
|
6
|
+
check_highline_gem
|
7
|
+
check_picky_client_gem
|
8
|
+
|
9
|
+
require 'uri'
|
10
|
+
uri = URI.parse given_uri
|
11
|
+
unless uri.path
|
12
|
+
uri = URI.parse "http://#{given_uri}"
|
13
|
+
end
|
14
|
+
unless uri.path =~ /^\//
|
15
|
+
uri.path = "/#{uri.path}"
|
16
|
+
end
|
17
|
+
|
18
|
+
@searches = 0
|
19
|
+
@durations = 0
|
20
|
+
@client = Picky::Client.new :host => (uri.host || 'localhost'), :port => (uri.port || 8080), :path => uri.path
|
21
|
+
|
22
|
+
install_trap
|
23
|
+
end
|
24
|
+
def check_highline_gem # :nodoc:
|
25
|
+
require "highline/system_extensions"
|
26
|
+
extend HighLine::SystemExtensions
|
27
|
+
rescue LoadError
|
28
|
+
warn_gem_missing 'highline', 'the terminal interface'
|
29
|
+
exit 1
|
30
|
+
end
|
31
|
+
def check_picky_client_gem # :nodoc:
|
32
|
+
require 'picky-client'
|
33
|
+
rescue LoadError
|
34
|
+
warn_gem_missing 'picky-client', 'the terminal interface'
|
35
|
+
exit 1
|
36
|
+
end
|
37
|
+
|
38
|
+
def install_trap
|
39
|
+
Signal.trap('INT') do
|
40
|
+
print "\e[100D"
|
41
|
+
flush
|
42
|
+
puts "\n"
|
43
|
+
puts "You performed #{@searches} searches, totalling #{"%.3f" % @durations} seconds."
|
44
|
+
print "\e[100D"
|
45
|
+
flush
|
46
|
+
exit
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def flush
|
51
|
+
STDOUT.flush
|
52
|
+
end
|
53
|
+
def left amount = 1
|
54
|
+
print "\e[#{amount}D"
|
55
|
+
flush
|
56
|
+
end
|
57
|
+
def right amount = 1
|
58
|
+
print "\e[#{amount}C"
|
59
|
+
flush
|
60
|
+
end
|
61
|
+
def move_to position
|
62
|
+
relative = position - @cursor_offset
|
63
|
+
if relative > 0
|
64
|
+
right relative
|
65
|
+
else
|
66
|
+
left relative
|
67
|
+
end
|
68
|
+
@cursor_offset = position
|
69
|
+
flush
|
70
|
+
end
|
71
|
+
def backspace
|
72
|
+
@current_text.chop!
|
73
|
+
print "\e[1D"
|
74
|
+
print " "
|
75
|
+
print "\e[1D"
|
76
|
+
flush
|
77
|
+
end
|
78
|
+
def write text
|
79
|
+
print text
|
80
|
+
@cursor_offset += text.size
|
81
|
+
flush
|
82
|
+
end
|
83
|
+
def type_search character
|
84
|
+
@current_text << character
|
85
|
+
write character
|
86
|
+
end
|
87
|
+
def write_results results
|
88
|
+
move_to 0
|
89
|
+
write "%9d" % (results && results.total || 0)
|
90
|
+
move_to 10 + @current_text.size
|
91
|
+
end
|
92
|
+
def move_to_ids
|
93
|
+
move_to 10 + @current_text.size + 2
|
94
|
+
end
|
95
|
+
def write_ids results
|
96
|
+
move_to_ids
|
97
|
+
write "=> #{results.total ? results.ids : []}"
|
98
|
+
end
|
99
|
+
def clear_ids
|
100
|
+
move_to_ids
|
101
|
+
write " "*200
|
102
|
+
end
|
103
|
+
def log results
|
104
|
+
@searches += 1
|
105
|
+
@durations += (results[:duration] || 0)
|
106
|
+
end
|
107
|
+
def search full = false
|
108
|
+
client.search @current_text, :ids => (full ? 20 : 0)
|
109
|
+
end
|
110
|
+
def search_and_write full = false
|
111
|
+
results = search full
|
112
|
+
results.extend Picky::Convenience
|
113
|
+
|
114
|
+
log results
|
115
|
+
|
116
|
+
full ? write_ids(results) : clear_ids
|
117
|
+
|
118
|
+
write_results results
|
119
|
+
end
|
120
|
+
|
121
|
+
def run
|
122
|
+
puts "Type and see the result count update. Press enter for the first 20 result ids."
|
123
|
+
puts "Break with Ctrl-C."
|
124
|
+
|
125
|
+
@current_text = ''
|
126
|
+
@cursor_offset = 0
|
127
|
+
@last_ids = ''
|
128
|
+
move_to 10
|
129
|
+
search_and_write
|
130
|
+
|
131
|
+
loop do
|
132
|
+
input = get_character
|
133
|
+
|
134
|
+
case input
|
135
|
+
when 127
|
136
|
+
backspace
|
137
|
+
search_and_write
|
138
|
+
when 13
|
139
|
+
search_and_write true
|
140
|
+
else
|
141
|
+
type_search input.chr
|
142
|
+
search_and_write
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
// Copying internal ruby methods.
|
4
4
|
//
|
5
|
-
|
5
|
+
inline VALUE rb_ary_elt(ary, offset)
|
6
6
|
VALUE ary;
|
7
7
|
long offset;
|
8
8
|
{
|
@@ -12,7 +12,7 @@ static inline VALUE rb_ary_elt(ary, offset)
|
|
12
12
|
}
|
13
13
|
return RARRAY_PTR(ary)[offset];
|
14
14
|
}
|
15
|
-
|
15
|
+
inline VALUE ary_make_hash(ary1, ary2)
|
16
16
|
VALUE ary1, ary2;
|
17
17
|
{
|
18
18
|
VALUE hash = rb_hash_new();
|
@@ -28,48 +28,53 @@ static VALUE ary_make_hash(ary1, ary2)
|
|
28
28
|
}
|
29
29
|
return hash;
|
30
30
|
}
|
31
|
+
inline VALUE rb_ary_length(VALUE ary) {
|
32
|
+
long length = RARRAY_LEN(ary);
|
33
|
+
return LONG2NUM(length);
|
34
|
+
}
|
31
35
|
|
32
|
-
// This version
|
33
|
-
//
|
34
|
-
//
|
35
|
-
//
|
36
|
-
inline VALUE memory_efficient_intersect(VALUE self, VALUE
|
36
|
+
// This version:
|
37
|
+
// * orders the arrays by ascending size, small to large.
|
38
|
+
// * calls the & consecutively for all arrays.
|
39
|
+
//
|
40
|
+
inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_of_arrays) {
|
37
41
|
// Counters.
|
38
42
|
//
|
39
43
|
long i, j;
|
40
|
-
|
44
|
+
|
41
45
|
// Vars.
|
42
46
|
//
|
43
47
|
struct RArray *rb_array_of_arrays;
|
44
48
|
VALUE smallest_array;
|
45
49
|
VALUE current_array;
|
46
50
|
VALUE hash;
|
47
|
-
|
51
|
+
|
48
52
|
// Temps.
|
49
53
|
//
|
50
54
|
VALUE v, vv;
|
51
|
-
|
52
|
-
// Conversions.
|
55
|
+
|
56
|
+
// Conversions & presorting.
|
53
57
|
//
|
54
|
-
rb_array_of_arrays =
|
58
|
+
rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
|
55
59
|
smallest_array = (VALUE) RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
|
56
|
-
|
60
|
+
|
57
61
|
// Iterate through all arrays.
|
58
62
|
//
|
59
63
|
for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
|
60
64
|
// Break if the smallest array is empty
|
65
|
+
//
|
61
66
|
if (RARRAY_LEN(smallest_array) == 0) {
|
62
67
|
break;
|
63
68
|
}
|
64
|
-
|
69
|
+
|
65
70
|
// Make a hash from the currently smallest version.
|
66
71
|
//
|
67
72
|
hash = ary_make_hash(smallest_array, 0);
|
68
|
-
|
73
|
+
|
69
74
|
// Clear for use as temp array.
|
70
75
|
//
|
71
76
|
rb_ary_clear(smallest_array);
|
72
|
-
|
77
|
+
|
73
78
|
// Iterate through all array elements.
|
74
79
|
//
|
75
80
|
current_array = RARRAY_PTR(rb_array_of_arrays)[i];
|
@@ -80,7 +85,7 @@ inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of
|
|
80
85
|
}
|
81
86
|
}
|
82
87
|
}
|
83
|
-
|
88
|
+
|
84
89
|
return smallest_array;
|
85
90
|
}
|
86
91
|
|
@@ -1,8 +1,10 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
require 'rsolr'
|
4
|
+
|
4
5
|
module Indexers
|
5
|
-
|
6
|
+
|
7
|
+
# Deprecated. Only here as an example.
|
6
8
|
#
|
7
9
|
class Solr
|
8
10
|
|
@@ -19,19 +21,19 @@ module Indexers
|
|
19
21
|
def index
|
20
22
|
timed_exclaim "Indexing solr for #{type.name}:#{fields.join(', ')}"
|
21
23
|
statement = "SELECT indexed_id, #{fields.join(',')} FROM #{type.snapshot_table_name}"
|
22
|
-
|
24
|
+
|
23
25
|
DB.connect
|
24
26
|
results = DB.connection.execute statement
|
25
|
-
|
26
|
-
return unless results
|
27
|
-
|
27
|
+
|
28
|
+
return unless results
|
29
|
+
|
28
30
|
type_name = @type.name.to_s
|
29
|
-
|
31
|
+
|
30
32
|
solr.delete_by_query "type:#{type_name}"
|
31
33
|
solr.commit
|
32
|
-
|
34
|
+
|
33
35
|
documents = []
|
34
|
-
|
36
|
+
|
35
37
|
results.each do |indexed_id, *values|
|
36
38
|
values.each &:downcase!
|
37
39
|
documents << hashed(values).merge(id: indexed_id, type: type_name)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
|
-
#
|
3
|
+
# TODO Merge into Base, extract common with Indexed::Base.
|
4
4
|
#
|
5
5
|
module Indexing # :nodoc:all
|
6
6
|
# A Bundle is a number of indexes
|
@@ -10,7 +10,7 @@ module Internals
|
|
10
10
|
# * *core* index (always used)
|
11
11
|
# * *weights* index (always used)
|
12
12
|
# * *similarity* index (used with similarity)
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# In Picky, indexing is separated from the index
|
15
15
|
# handling itself through a parallel structure.
|
16
16
|
#
|
@@ -24,27 +24,27 @@ module Internals
|
|
24
24
|
# memory and looking up search data as fast as possible.
|
25
25
|
#
|
26
26
|
module Bundle
|
27
|
-
|
27
|
+
|
28
28
|
class SuperBase
|
29
|
-
|
29
|
+
|
30
30
|
attr_reader :identifier, :files
|
31
31
|
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
32
|
-
|
32
|
+
|
33
33
|
delegate :clear, :to => :index
|
34
34
|
delegate :[], :[]=, :to => :configuration
|
35
|
-
|
35
|
+
|
36
36
|
def initialize name, configuration, similarity_strategy
|
37
37
|
@identifier = "#{configuration.identifier}:#{name}"
|
38
38
|
@files = Internals::Index::Files.new name, configuration
|
39
|
-
|
39
|
+
|
40
40
|
@index = {}
|
41
41
|
@weights = {}
|
42
42
|
@similarity = {}
|
43
43
|
@configuration = {} # A hash with config options.
|
44
|
-
|
44
|
+
|
45
45
|
@similarity_strategy = similarity_strategy
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
# Get a list of similar texts.
|
49
49
|
#
|
50
50
|
# Note: Does not return itself.
|
@@ -55,11 +55,11 @@ module Internals
|
|
55
55
|
similar_codes.delete text if similar_codes
|
56
56
|
similar_codes || []
|
57
57
|
end
|
58
|
-
|
58
|
+
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
end
|
@@ -12,16 +12,17 @@ module Internals
|
|
12
12
|
# Mandatory params:
|
13
13
|
# * name: Category name to use as identifier and file names.
|
14
14
|
# * index: Index to which this category is attached to.
|
15
|
+
#
|
15
16
|
# Options:
|
16
17
|
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
17
18
|
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
18
19
|
# * source: Use if the category should use a different source.
|
19
20
|
# * from: The source category identifier to take the data from.
|
20
21
|
#
|
21
|
-
# Advanced Options
|
22
|
+
# Advanced Options:
|
22
23
|
#
|
23
|
-
# * weights:
|
24
|
-
# * tokenizer:
|
24
|
+
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
25
|
+
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
25
26
|
#
|
26
27
|
# TODO Should source be not optional, or taken from the index?
|
27
28
|
#
|
@@ -21,31 +21,25 @@ module Internals
|
|
21
21
|
# 1. [2, 30, 400, 100_000]
|
22
22
|
# 2. (100_000 & (400 & (30 & 2))) # => result
|
23
23
|
#
|
24
|
-
# Note: Uses a C-optimized intersection routine
|
24
|
+
# Note: Uses a C-optimized intersection routine (in performant.c)
|
25
|
+
# for speed and memory efficiency.
|
25
26
|
#
|
26
27
|
# Note: In the memory based version we ignore the (amount) needed hint.
|
27
|
-
# We
|
28
|
+
# We cannot use the information to speed up the algorithm, unfortunately.
|
28
29
|
#
|
29
30
|
def ids _, _
|
30
31
|
return [] if @combinations.empty?
|
31
32
|
|
32
33
|
# Get the ids for each combination.
|
33
34
|
#
|
34
|
-
# TODO For combinations with Redis
|
35
|
-
#
|
36
35
|
id_arrays = @combinations.inject([]) do |total, combination|
|
37
36
|
total << combination.ids
|
38
37
|
end
|
39
38
|
|
40
|
-
# Order by smallest size first such that the intersect can be performed faster.
|
41
|
-
#
|
42
|
-
# TODO Move into the memory_efficient_intersect such that
|
43
|
-
# this precondition for a fast algorithm is always given.
|
44
|
-
#
|
45
|
-
id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
|
46
|
-
|
47
39
|
# Call the optimized C algorithm.
|
48
40
|
#
|
41
|
+
# Note: It orders the passed arrays by size.
|
42
|
+
#
|
49
43
|
Performant::Array.memory_efficient_intersect id_arrays
|
50
44
|
end
|
51
45
|
|
@@ -25,11 +25,36 @@ module Internals
|
|
25
25
|
@indexes = index_definitions.map &:indexed
|
26
26
|
end
|
27
27
|
|
28
|
-
# Returns a number of
|
28
|
+
# Returns a number of prepared (sorted, reduced etc.) allocations for the given tokens.
|
29
29
|
#
|
30
|
-
def
|
30
|
+
def prepared_allocations_for tokens, weights = {}
|
31
|
+
allocations = allocations_for tokens
|
32
|
+
|
33
|
+
# Remove double allocations.
|
34
|
+
#
|
35
|
+
allocations.uniq
|
36
|
+
|
37
|
+
# Score the allocations using weights as bias.
|
38
|
+
#
|
39
|
+
allocations.calculate_score weights
|
40
|
+
|
41
|
+
# Sort the allocations.
|
42
|
+
# (allocations are sorted according to score, highest to lowest)
|
43
|
+
#
|
44
|
+
allocations.sort!
|
31
45
|
|
46
|
+
# Reduce the amount of allocations.
|
47
|
+
#
|
48
|
+
# allocations.reduce_to some_amount
|
49
|
+
|
50
|
+
# Remove identifiers from allocations.
|
51
|
+
#
|
52
|
+
# allocations.remove some_array_of_identifiers_to_remove
|
53
|
+
|
54
|
+
allocations
|
32
55
|
end
|
56
|
+
# Returns a number of possible allocations for the given tokens.
|
57
|
+
#
|
33
58
|
def allocations_for tokens
|
34
59
|
Allocations.new allocations_ary_for(tokens)
|
35
60
|
end
|
@@ -14,11 +14,17 @@ module Internals
|
|
14
14
|
#
|
15
15
|
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
16
16
|
|
17
|
-
#
|
17
|
+
# Create a new Tokens object with the array of tokens passed in.
|
18
18
|
#
|
19
19
|
def initialize tokens = []
|
20
20
|
@tokens = tokens
|
21
21
|
end
|
22
|
+
|
23
|
+
# Creates a new Tokens object from a number of Strings.
|
24
|
+
#
|
25
|
+
# Options:
|
26
|
+
# * downcase: Whether to downcase the passed strings (default is true)
|
27
|
+
#
|
22
28
|
def self.processed words, downcase = true
|
23
29
|
new words.collect! { |word| Token.processed word, downcase }
|
24
30
|
end
|
data/lib/picky/search.rb
CHANGED
@@ -14,8 +14,8 @@ class Search
|
|
14
14
|
include Helpers::Measuring
|
15
15
|
|
16
16
|
attr_reader :indexes
|
17
|
-
attr_writer :tokenizer
|
18
|
-
attr_accessor :
|
17
|
+
attr_writer :tokenizer
|
18
|
+
attr_accessor :weights
|
19
19
|
|
20
20
|
# Takes:
|
21
21
|
# * A number of indexes
|
@@ -23,6 +23,8 @@ class Search
|
|
23
23
|
# * tokenizer: Tokenizers::Query.default by default.
|
24
24
|
# * weights: A hash of weights, or a Query::Weights object.
|
25
25
|
#
|
26
|
+
# TODO Add identifiers_to_remove (rename) and reduce_allocations_to_amount (rename).
|
27
|
+
#
|
26
28
|
def initialize *index_definitions
|
27
29
|
options = Hash === index_definitions.last ? index_definitions.pop : {}
|
28
30
|
|
@@ -113,58 +115,8 @@ class Search
|
|
113
115
|
|
114
116
|
# Gets sorted allocations for the tokens.
|
115
117
|
#
|
116
|
-
# This generates the possible allocations, sorted.
|
117
|
-
#
|
118
|
-
# TODO Smallify.
|
119
|
-
#
|
120
|
-
# TODO Rename: allocations
|
121
|
-
#
|
122
118
|
def sorted_allocations tokens # :nodoc:
|
123
|
-
|
124
|
-
#
|
125
|
-
# TODO Pass in reduce_to_amount (aka max_allocations)
|
126
|
-
#
|
127
|
-
# TODO uniq, score, sort in there
|
128
|
-
#
|
129
|
-
allocations = @indexes.allocations_for tokens
|
130
|
-
|
131
|
-
# Callbacks.
|
132
|
-
#
|
133
|
-
# TODO Reduce before sort?
|
134
|
-
#
|
135
|
-
reduce allocations
|
136
|
-
remove_from allocations
|
137
|
-
|
138
|
-
# Remove double allocations.
|
139
|
-
#
|
140
|
-
allocations.uniq
|
141
|
-
|
142
|
-
# Score the allocations using weights as bias.
|
143
|
-
#
|
144
|
-
allocations.calculate_score weights
|
145
|
-
|
146
|
-
# Sort the allocations.
|
147
|
-
# (allocations are sorted according to score, highest to lowest)
|
148
|
-
#
|
149
|
-
allocations.sort!
|
150
|
-
|
151
|
-
# Return the allocations.
|
152
|
-
#
|
153
|
-
allocations
|
154
|
-
end
|
155
|
-
def reduce allocations # :nodoc:
|
156
|
-
allocations.reduce_to reduce_to_amount if reduce_to_amount
|
157
|
-
end
|
158
|
-
|
159
|
-
#
|
160
|
-
#
|
161
|
-
def remove_from allocations # :nodoc:
|
162
|
-
allocations.remove identifiers_to_remove
|
163
|
-
end
|
164
|
-
#
|
165
|
-
#
|
166
|
-
def identifiers_to_remove # :nodoc:
|
167
|
-
@identifiers_to_remove ||= []
|
119
|
+
@indexes.prepared_allocations_for tokens, weights
|
168
120
|
end
|
169
121
|
|
170
122
|
# Display some nice information for the user.
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# Tasks for testing your engine configuration in the terminal.
|
2
|
+
#
|
3
|
+
task :search do
|
4
|
+
load File.expand_path '../../picky/auxiliary/terminal.rb', __FILE__
|
5
|
+
terminal = Terminal.new ARGV[1] || raise("Usage:\n rake search <URL>\n E.g. rake search /books\n rake search localhost:8080/books")
|
6
|
+
terminal.run
|
7
|
+
end
|
data/spec/ext/performant_spec.rb
CHANGED
@@ -5,44 +5,43 @@ describe Performant::Array do
|
|
5
5
|
describe "memory_efficient_intersect" do
|
6
6
|
it "should intersect empty arrays correctly" do
|
7
7
|
arys = [[3,4], [1,2,3], []]
|
8
|
-
|
9
|
-
Performant::Array.memory_efficient_intersect(arys
|
8
|
+
|
9
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
10
10
|
end
|
11
11
|
it "should handle intermediate empty results correctly" do
|
12
12
|
arys = [[5,4], [1,2,3], [3,4,5,8,9]]
|
13
|
-
|
14
|
-
Performant::Array.memory_efficient_intersect(arys
|
13
|
+
|
14
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
15
15
|
end
|
16
16
|
it "should intersect correctly" do
|
17
17
|
arys = [[3,4], [1,2,3], [3,4,5,8,9]]
|
18
|
-
|
19
|
-
Performant::Array.memory_efficient_intersect(arys
|
18
|
+
|
19
|
+
Performant::Array.memory_efficient_intersect(arys).should == [3]
|
20
20
|
end
|
21
21
|
it "should intersect correctly again" do
|
22
|
-
arys = [[3,
|
23
|
-
|
24
|
-
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == [3,5,6,7]
|
22
|
+
arys = [[1,2,3,5,6,7], [3,4,5,6,7,8,9], [3,4,5,6,7]]
|
23
|
+
Performant::Array.memory_efficient_intersect(arys).should == [3,5,6,7]
|
25
24
|
end
|
26
25
|
it "should intersect many arrays" do
|
27
26
|
arys = [[3,4,5,6,7], [1,2,3,5,6,7], [3,4,5,6,7,8,9], [1,2,3,4,5,6,7,8,9,10], [2,3,5,6,7,19], [1,2,3,4,5,6,7,8,9,10], [2,3,5,6,7,19]]
|
28
|
-
|
29
|
-
Performant::Array.memory_efficient_intersect(arys
|
27
|
+
|
28
|
+
Performant::Array.memory_efficient_intersect(arys).should == [3,5,6,7]
|
30
29
|
end
|
31
30
|
it "should handle random arrays" do
|
32
31
|
proto = Array.new(100, 3_500_000)
|
33
32
|
arys = [proto.map { |e| rand e }, proto.map { |e| rand e }, proto.map { |e| rand e }]
|
34
|
-
|
35
|
-
Performant::Array.memory_efficient_intersect(arys
|
33
|
+
|
34
|
+
Performant::Array.memory_efficient_intersect(arys).should == arys.inject(arys.shift.dup) { |total, ary| total & arys }
|
36
35
|
end
|
37
36
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
38
37
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
39
|
-
|
38
|
+
|
40
39
|
# brute force
|
41
|
-
performance_of { Performant::Array.memory_efficient_intersect(arys
|
40
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
|
42
41
|
end
|
43
42
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
44
43
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
45
|
-
|
44
|
+
|
46
45
|
# &
|
47
46
|
performance_of do
|
48
47
|
arys.inject(arys.shift.dup) do |total, ary|
|
@@ -55,33 +54,33 @@ describe Performant::Array do
|
|
55
54
|
describe "memory_efficient_intersect with symbols" do
|
56
55
|
it "should intersect empty arrays correctly" do
|
57
56
|
arys = [[:c,:d], [:a,:b,:c], []]
|
58
|
-
|
59
|
-
Performant::Array.memory_efficient_intersect(arys
|
57
|
+
|
58
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
60
59
|
end
|
61
60
|
it "should handle intermediate empty results correctly" do
|
62
61
|
arys = [[:e,:d], [:a,:b,:c], [:c,:d,:e,:h,:i]]
|
63
|
-
|
64
|
-
Performant::Array.memory_efficient_intersect(arys
|
62
|
+
|
63
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
65
64
|
end
|
66
65
|
it "should intersect correctly" do
|
67
66
|
arys = [[:c,:d], [:a,:b,:c], [:c,:d,:e,:h,:i]]
|
68
|
-
|
69
|
-
Performant::Array.memory_efficient_intersect(arys
|
67
|
+
|
68
|
+
Performant::Array.memory_efficient_intersect(arys).should == [:c]
|
70
69
|
end
|
71
70
|
it "should intersect many arrays" do
|
72
71
|
arys = [[:c,:d,:e,:f,:g], [:a,:b,:c,:e,:f,:g], [:c,:d,:e,:f,:g,:h,:i], [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j], [:b,:c,:e,:f,:g,:s], [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j], [:b,:c,:e,:f,:g,:s]]
|
73
|
-
|
74
|
-
Performant::Array.memory_efficient_intersect(arys
|
72
|
+
|
73
|
+
Performant::Array.memory_efficient_intersect(arys).should == [:c,:e,:f,:g]
|
75
74
|
end
|
76
75
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
77
|
-
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a
|
78
|
-
|
76
|
+
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a]
|
77
|
+
|
79
78
|
# brute force
|
80
|
-
performance_of { Performant::Array.memory_efficient_intersect(arys
|
79
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
|
81
80
|
end
|
82
81
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
83
82
|
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
|
84
|
-
|
83
|
+
|
85
84
|
# &
|
86
85
|
performance_of do
|
87
86
|
arys.inject(arys.shift.dup) do |total, ary|
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
# We need to load the Statistics file explicitly as the Statistics
|
6
|
+
# are not loaded with the Loader (not needed in the server, only for script runs).
|
7
|
+
#
|
8
|
+
require File.expand_path '../../../../lib/picky/auxiliary/terminal', __FILE__
|
9
|
+
|
10
|
+
describe Terminal do
|
11
|
+
|
12
|
+
let(:terminal) { described_class.new('/some/url') }
|
13
|
+
|
14
|
+
before(:each) do
|
15
|
+
terminal.stub! :search => { :total => 0, :duration => 0.01 }
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'left' do
|
19
|
+
it 'moves by amount' do
|
20
|
+
terminal.should_receive(:print).once.ordered.with "\e[13D"
|
21
|
+
terminal.should_receive(:flush).once.ordered
|
22
|
+
|
23
|
+
terminal.left 13
|
24
|
+
end
|
25
|
+
it 'default is 1' do
|
26
|
+
terminal.should_receive(:print).once.ordered.with "\e[1D"
|
27
|
+
terminal.should_receive(:flush).once.ordered
|
28
|
+
|
29
|
+
terminal.left
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'right' do
|
34
|
+
it 'moves by amount' do
|
35
|
+
terminal.should_receive(:print).once.ordered.with "\e[13C"
|
36
|
+
terminal.should_receive(:flush).once.ordered
|
37
|
+
|
38
|
+
terminal.right 13
|
39
|
+
end
|
40
|
+
it 'default is 1' do
|
41
|
+
terminal.should_receive(:print).once.ordered.with "\e[1C"
|
42
|
+
terminal.should_receive(:flush).once.ordered
|
43
|
+
|
44
|
+
terminal.right
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe 'flush' do
|
49
|
+
it 'flushes STDOUT' do
|
50
|
+
STDOUT.should_receive(:flush).once.with()
|
51
|
+
|
52
|
+
terminal.flush
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -1 +1,15 @@
|
|
1
|
-
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'aliases' do
|
4
|
+
|
5
|
+
it 'aliases correctly' do
|
6
|
+
Partial.should == Internals::Generators::Partial
|
7
|
+
end
|
8
|
+
it 'aliases correctly' do
|
9
|
+
Similarity.should == Internals::Generators::Similarity
|
10
|
+
end
|
11
|
+
it 'aliases correctly' do
|
12
|
+
Weights.should == Internals::Generators::Weights
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Internals::Indexed::Bundle::Redis do
|
4
|
+
|
5
|
+
before(:each) do
|
6
|
+
@backend = stub :backend
|
7
|
+
|
8
|
+
Internals::Index::Redis.stub! :new => @backend
|
9
|
+
|
10
|
+
@category = stub :category, :name => :some_category
|
11
|
+
@index = stub :index, :name => :some_index
|
12
|
+
@configuration = Configuration::Index.new @index, @category
|
13
|
+
|
14
|
+
@similarity = stub :similarity
|
15
|
+
@bundle = described_class.new :some_name, @configuration, @similarity
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'ids' do
|
19
|
+
it 'delegates to the backend' do
|
20
|
+
@backend.should_receive(:ids).once.with :some_sym
|
21
|
+
|
22
|
+
@bundle.ids :some_sym
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe 'weight' do
|
27
|
+
it 'delegates to the backend' do
|
28
|
+
@backend.should_receive(:weight).once.with :some_sym
|
29
|
+
|
30
|
+
@bundle.weight :some_sym
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '[]' do
|
35
|
+
it 'delegates to the backend' do
|
36
|
+
@backend.should_receive(:setting).once.with :some_sym
|
37
|
+
|
38
|
+
@bundle[:some_sym]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -77,5 +77,26 @@ describe Internals::Query::Indexes do
|
|
77
77
|
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00045
|
78
78
|
end
|
79
79
|
end
|
80
|
+
|
81
|
+
describe 'prepared_allocations_for' do
|
82
|
+
before(:each) do
|
83
|
+
@allocations = stub :allocations
|
84
|
+
indexes.stub! :allocations_for => @allocations
|
85
|
+
end
|
86
|
+
it 'calls the right method in order' do
|
87
|
+
@allocations.should_receive(:uniq).once.ordered.with()
|
88
|
+
@allocations.should_receive(:calculate_score).once.ordered.with(:some_weights)
|
89
|
+
@allocations.should_receive(:sort!).once.ordered.with()
|
90
|
+
|
91
|
+
indexes.prepared_allocations_for :some_tokens, :some_weights
|
92
|
+
end
|
93
|
+
it 'calls the right method in order' do
|
94
|
+
@allocations.should_receive(:uniq).once.ordered.with()
|
95
|
+
@allocations.should_receive(:calculate_score).once.ordered.with({})
|
96
|
+
@allocations.should_receive(:sort!).once.ordered.with()
|
97
|
+
|
98
|
+
indexes.prepared_allocations_for :some_tokens
|
99
|
+
end
|
100
|
+
end
|
80
101
|
|
81
102
|
end
|
data/spec/lib/search_spec.rb
CHANGED
@@ -4,6 +4,11 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Search do
|
6
6
|
|
7
|
+
before(:each) do
|
8
|
+
@type = stub :type
|
9
|
+
@index = stub :some_index, :indexed => @type
|
10
|
+
end
|
11
|
+
|
7
12
|
describe 'combinations_type_for' do
|
8
13
|
let(:search) { described_class.new }
|
9
14
|
it 'returns a specific Combination for a specific input' do
|
@@ -46,25 +51,6 @@ describe Search do
|
|
46
51
|
end
|
47
52
|
end
|
48
53
|
|
49
|
-
# describe "empty_results" do
|
50
|
-
# before(:each) do
|
51
|
-
# @search = search::Full.new
|
52
|
-
#
|
53
|
-
# @result_type = stub :result_type
|
54
|
-
# @search.stub! :result_type => @result_type
|
55
|
-
# end
|
56
|
-
# it "returns a new result type" do
|
57
|
-
# @result_type.should_receive(:new).once.with :some_offset
|
58
|
-
#
|
59
|
-
# @search.empty_results :some_offset
|
60
|
-
# end
|
61
|
-
# it "returns a new result type with default offset" do
|
62
|
-
# @result_type.should_receive(:new).once.with 0
|
63
|
-
#
|
64
|
-
# @search.empty_results
|
65
|
-
# end
|
66
|
-
# end
|
67
|
-
|
68
54
|
describe "search_with_text" do
|
69
55
|
before(:each) do
|
70
56
|
@search = Search.new
|
@@ -83,60 +69,6 @@ describe Search do
|
|
83
69
|
end
|
84
70
|
end
|
85
71
|
|
86
|
-
describe 'reduce' do
|
87
|
-
context 'real' do
|
88
|
-
before(:each) do
|
89
|
-
@allocations = stub :allocations
|
90
|
-
@search = Search.new
|
91
|
-
end
|
92
|
-
context 'reduce_to_amount not set' do
|
93
|
-
it 'should not call anything on the allocations' do
|
94
|
-
@allocations.should_receive(:reduce_to).never
|
95
|
-
|
96
|
-
@search.reduce @allocations
|
97
|
-
end
|
98
|
-
end
|
99
|
-
context 'reduce_to_amount set' do
|
100
|
-
before(:each) do
|
101
|
-
@search.reduce_to_amount = :some_amount
|
102
|
-
end
|
103
|
-
it 'should call reduce_to on the allocations' do
|
104
|
-
@allocations.should_receive(:reduce_to).once.with :some_amount
|
105
|
-
|
106
|
-
@search.reduce @allocations
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
context 'stubbed' do
|
111
|
-
before(:each) do
|
112
|
-
@allocations = stub :allocations
|
113
|
-
@search = Search.new
|
114
|
-
end
|
115
|
-
context 'reduce_to_amount not set' do
|
116
|
-
it 'should not call anything on the allocations' do
|
117
|
-
@allocations.should_receive(:reduce_to).never
|
118
|
-
|
119
|
-
@search.reduce @allocations
|
120
|
-
end
|
121
|
-
end
|
122
|
-
context 'reduce_to_amount set' do
|
123
|
-
before(:each) do
|
124
|
-
@search.stub! :reduce_to_amount => :some_amount
|
125
|
-
end
|
126
|
-
it 'should call reduce_to on the allocations' do
|
127
|
-
@allocations.should_receive(:reduce_to).once.with :some_amount
|
128
|
-
|
129
|
-
@search.reduce @allocations
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
before(:each) do
|
136
|
-
@type = stub :type
|
137
|
-
@index = stub :some_index, :indexed => @type
|
138
|
-
end
|
139
|
-
|
140
72
|
describe 'initializer' do
|
141
73
|
context 'with tokenizer' do
|
142
74
|
before(:each) do
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.1.
|
5
|
+
version: 2.1.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Florian Hanke
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-04-
|
13
|
+
date: 2011-04-11 00:00:00 +10:00
|
14
14
|
default_executable: picky
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- lib/picky/aliases.rb
|
40
40
|
- lib/picky/analyzer.rb
|
41
41
|
- lib/picky/application.rb
|
42
|
+
- lib/picky/auxiliary/terminal.rb
|
42
43
|
- lib/picky/character_substituters/west_european.rb
|
43
44
|
- lib/picky/cli.rb
|
44
45
|
- lib/picky/cores.rb
|
@@ -154,6 +155,7 @@ files:
|
|
154
155
|
- lib/tasks/framework.rake
|
155
156
|
- lib/tasks/index.rake
|
156
157
|
- lib/tasks/routes.rake
|
158
|
+
- lib/tasks/search.rake
|
157
159
|
- lib/tasks/server.rake
|
158
160
|
- lib/tasks/shortcuts.rake
|
159
161
|
- lib/tasks/solr.rake
|
@@ -166,6 +168,7 @@ files:
|
|
166
168
|
- spec/lib/aliases_spec.rb
|
167
169
|
- spec/lib/analyzer_spec.rb
|
168
170
|
- spec/lib/application_spec.rb
|
171
|
+
- spec/lib/auxiliary/terminal_spec.rb
|
169
172
|
- spec/lib/bundling_spec.rb
|
170
173
|
- spec/lib/character_substituters/west_european_spec.rb
|
171
174
|
- spec/lib/cli_spec.rb
|
@@ -211,6 +214,7 @@ files:
|
|
211
214
|
- spec/lib/internals/index/redis/string_hash_spec.rb
|
212
215
|
- spec/lib/internals/index/redis_spec.rb
|
213
216
|
- spec/lib/internals/indexed/bundle/memory_spec.rb
|
217
|
+
- spec/lib/internals/indexed/bundle/redis_spec.rb
|
214
218
|
- spec/lib/internals/indexed/categories_spec.rb
|
215
219
|
- spec/lib/internals/indexed/category_spec.rb
|
216
220
|
- spec/lib/internals/indexed/index_spec.rb
|
@@ -290,6 +294,7 @@ test_files:
|
|
290
294
|
- spec/lib/aliases_spec.rb
|
291
295
|
- spec/lib/analyzer_spec.rb
|
292
296
|
- spec/lib/application_spec.rb
|
297
|
+
- spec/lib/auxiliary/terminal_spec.rb
|
293
298
|
- spec/lib/bundling_spec.rb
|
294
299
|
- spec/lib/character_substituters/west_european_spec.rb
|
295
300
|
- spec/lib/cli_spec.rb
|
@@ -335,6 +340,7 @@ test_files:
|
|
335
340
|
- spec/lib/internals/index/redis/string_hash_spec.rb
|
336
341
|
- spec/lib/internals/index/redis_spec.rb
|
337
342
|
- spec/lib/internals/indexed/bundle/memory_spec.rb
|
343
|
+
- spec/lib/internals/indexed/bundle/redis_spec.rb
|
338
344
|
- spec/lib/internals/indexed/categories_spec.rb
|
339
345
|
- spec/lib/internals/indexed/category_spec.rb
|
340
346
|
- spec/lib/internals/indexed/index_spec.rb
|