picky 2.1.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/auxiliary/terminal.rb +147 -0
- data/lib/picky/indexing/indexes.rb +0 -2
- data/lib/picky/internals/ext/ruby19/performant.c +22 -17
- data/lib/picky/internals/index/redis/list_hash.rb +0 -2
- data/lib/picky/internals/indexed/bundle/redis.rb +1 -1
- data/lib/picky/internals/indexed/category.rb +0 -2
- data/lib/picky/internals/indexers/solr.rb +10 -8
- data/lib/picky/internals/indexing/bundle/base.rb +0 -3
- data/lib/picky/internals/indexing/bundle/super_base.rb +13 -13
- data/lib/picky/internals/indexing/category.rb +4 -3
- data/lib/picky/internals/query/allocations.rb +0 -2
- data/lib/picky/internals/query/combinations/memory.rb +5 -11
- data/lib/picky/internals/query/indexes.rb +27 -2
- data/lib/picky/internals/query/tokens.rb +7 -1
- data/lib/picky/search.rb +5 -53
- data/lib/tasks/search.rake +7 -0
- data/spec/ext/performant_spec.rb +27 -28
- data/spec/lib/auxiliary/terminal_spec.rb +56 -0
- data/spec/lib/generators/aliases_spec.rb +15 -1
- data/spec/lib/internals/indexed/bundle/redis_spec.rb +42 -0
- data/spec/lib/query/indexes_spec.rb +21 -0
- data/spec/lib/search_spec.rb +5 -73
- metadata +8 -2
@@ -0,0 +1,147 @@
|
|
1
|
+
class Terminal
|
2
|
+
|
3
|
+
attr_reader :client
|
4
|
+
|
5
|
+
def initialize given_uri
|
6
|
+
check_highline_gem
|
7
|
+
check_picky_client_gem
|
8
|
+
|
9
|
+
require 'uri'
|
10
|
+
uri = URI.parse given_uri
|
11
|
+
unless uri.path
|
12
|
+
uri = URI.parse "http://#{given_uri}"
|
13
|
+
end
|
14
|
+
unless uri.path =~ /^\//
|
15
|
+
uri.path = "/#{uri.path}"
|
16
|
+
end
|
17
|
+
|
18
|
+
@searches = 0
|
19
|
+
@durations = 0
|
20
|
+
@client = Picky::Client.new :host => (uri.host || 'localhost'), :port => (uri.port || 8080), :path => uri.path
|
21
|
+
|
22
|
+
install_trap
|
23
|
+
end
|
24
|
+
def check_highline_gem # :nodoc:
|
25
|
+
require "highline/system_extensions"
|
26
|
+
extend HighLine::SystemExtensions
|
27
|
+
rescue LoadError
|
28
|
+
warn_gem_missing 'highline', 'the terminal interface'
|
29
|
+
exit 1
|
30
|
+
end
|
31
|
+
def check_picky_client_gem # :nodoc:
|
32
|
+
require 'picky-client'
|
33
|
+
rescue LoadError
|
34
|
+
warn_gem_missing 'picky-client', 'the terminal interface'
|
35
|
+
exit 1
|
36
|
+
end
|
37
|
+
|
38
|
+
def install_trap
|
39
|
+
Signal.trap('INT') do
|
40
|
+
print "\e[100D"
|
41
|
+
flush
|
42
|
+
puts "\n"
|
43
|
+
puts "You performed #{@searches} searches, totalling #{"%.3f" % @durations} seconds."
|
44
|
+
print "\e[100D"
|
45
|
+
flush
|
46
|
+
exit
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def flush
|
51
|
+
STDOUT.flush
|
52
|
+
end
|
53
|
+
def left amount = 1
|
54
|
+
print "\e[#{amount}D"
|
55
|
+
flush
|
56
|
+
end
|
57
|
+
def right amount = 1
|
58
|
+
print "\e[#{amount}C"
|
59
|
+
flush
|
60
|
+
end
|
61
|
+
def move_to position
|
62
|
+
relative = position - @cursor_offset
|
63
|
+
if relative > 0
|
64
|
+
right relative
|
65
|
+
else
|
66
|
+
left relative
|
67
|
+
end
|
68
|
+
@cursor_offset = position
|
69
|
+
flush
|
70
|
+
end
|
71
|
+
def backspace
|
72
|
+
@current_text.chop!
|
73
|
+
print "\e[1D"
|
74
|
+
print " "
|
75
|
+
print "\e[1D"
|
76
|
+
flush
|
77
|
+
end
|
78
|
+
def write text
|
79
|
+
print text
|
80
|
+
@cursor_offset += text.size
|
81
|
+
flush
|
82
|
+
end
|
83
|
+
def type_search character
|
84
|
+
@current_text << character
|
85
|
+
write character
|
86
|
+
end
|
87
|
+
def write_results results
|
88
|
+
move_to 0
|
89
|
+
write "%9d" % (results && results.total || 0)
|
90
|
+
move_to 10 + @current_text.size
|
91
|
+
end
|
92
|
+
def move_to_ids
|
93
|
+
move_to 10 + @current_text.size + 2
|
94
|
+
end
|
95
|
+
def write_ids results
|
96
|
+
move_to_ids
|
97
|
+
write "=> #{results.total ? results.ids : []}"
|
98
|
+
end
|
99
|
+
def clear_ids
|
100
|
+
move_to_ids
|
101
|
+
write " "*200
|
102
|
+
end
|
103
|
+
def log results
|
104
|
+
@searches += 1
|
105
|
+
@durations += (results[:duration] || 0)
|
106
|
+
end
|
107
|
+
def search full = false
|
108
|
+
client.search @current_text, :ids => (full ? 20 : 0)
|
109
|
+
end
|
110
|
+
def search_and_write full = false
|
111
|
+
results = search full
|
112
|
+
results.extend Picky::Convenience
|
113
|
+
|
114
|
+
log results
|
115
|
+
|
116
|
+
full ? write_ids(results) : clear_ids
|
117
|
+
|
118
|
+
write_results results
|
119
|
+
end
|
120
|
+
|
121
|
+
def run
|
122
|
+
puts "Type and see the result count update. Press enter for the first 20 result ids."
|
123
|
+
puts "Break with Ctrl-C."
|
124
|
+
|
125
|
+
@current_text = ''
|
126
|
+
@cursor_offset = 0
|
127
|
+
@last_ids = ''
|
128
|
+
move_to 10
|
129
|
+
search_and_write
|
130
|
+
|
131
|
+
loop do
|
132
|
+
input = get_character
|
133
|
+
|
134
|
+
case input
|
135
|
+
when 127
|
136
|
+
backspace
|
137
|
+
search_and_write
|
138
|
+
when 13
|
139
|
+
search_and_write true
|
140
|
+
else
|
141
|
+
type_search input.chr
|
142
|
+
search_and_write
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
// Copying internal ruby methods.
|
4
4
|
//
|
5
|
-
|
5
|
+
inline VALUE rb_ary_elt(ary, offset)
|
6
6
|
VALUE ary;
|
7
7
|
long offset;
|
8
8
|
{
|
@@ -12,7 +12,7 @@ static inline VALUE rb_ary_elt(ary, offset)
|
|
12
12
|
}
|
13
13
|
return RARRAY_PTR(ary)[offset];
|
14
14
|
}
|
15
|
-
|
15
|
+
inline VALUE ary_make_hash(ary1, ary2)
|
16
16
|
VALUE ary1, ary2;
|
17
17
|
{
|
18
18
|
VALUE hash = rb_hash_new();
|
@@ -28,48 +28,53 @@ static VALUE ary_make_hash(ary1, ary2)
|
|
28
28
|
}
|
29
29
|
return hash;
|
30
30
|
}
|
31
|
+
inline VALUE rb_ary_length(VALUE ary) {
|
32
|
+
long length = RARRAY_LEN(ary);
|
33
|
+
return LONG2NUM(length);
|
34
|
+
}
|
31
35
|
|
32
|
-
// This version
|
33
|
-
//
|
34
|
-
//
|
35
|
-
//
|
36
|
-
inline VALUE memory_efficient_intersect(VALUE self, VALUE
|
36
|
+
// This version:
|
37
|
+
// * orders the arrays by ascending size, small to large.
|
38
|
+
// * calls the & consecutively for all arrays.
|
39
|
+
//
|
40
|
+
inline VALUE memory_efficient_intersect(VALUE self, VALUE unsorted_array_of_arrays) {
|
37
41
|
// Counters.
|
38
42
|
//
|
39
43
|
long i, j;
|
40
|
-
|
44
|
+
|
41
45
|
// Vars.
|
42
46
|
//
|
43
47
|
struct RArray *rb_array_of_arrays;
|
44
48
|
VALUE smallest_array;
|
45
49
|
VALUE current_array;
|
46
50
|
VALUE hash;
|
47
|
-
|
51
|
+
|
48
52
|
// Temps.
|
49
53
|
//
|
50
54
|
VALUE v, vv;
|
51
|
-
|
52
|
-
// Conversions.
|
55
|
+
|
56
|
+
// Conversions & presorting.
|
53
57
|
//
|
54
|
-
rb_array_of_arrays =
|
58
|
+
rb_array_of_arrays = rb_block_call(unsorted_array_of_arrays, rb_intern("sort_by!"), 0, 0, rb_ary_length, 0);
|
55
59
|
smallest_array = (VALUE) RARRAY(rb_ary_dup(RARRAY_PTR(rb_array_of_arrays)[0]));
|
56
|
-
|
60
|
+
|
57
61
|
// Iterate through all arrays.
|
58
62
|
//
|
59
63
|
for (i = 1; i < RARRAY_LEN(rb_array_of_arrays); i++) {
|
60
64
|
// Break if the smallest array is empty
|
65
|
+
//
|
61
66
|
if (RARRAY_LEN(smallest_array) == 0) {
|
62
67
|
break;
|
63
68
|
}
|
64
|
-
|
69
|
+
|
65
70
|
// Make a hash from the currently smallest version.
|
66
71
|
//
|
67
72
|
hash = ary_make_hash(smallest_array, 0);
|
68
|
-
|
73
|
+
|
69
74
|
// Clear for use as temp array.
|
70
75
|
//
|
71
76
|
rb_ary_clear(smallest_array);
|
72
|
-
|
77
|
+
|
73
78
|
// Iterate through all array elements.
|
74
79
|
//
|
75
80
|
current_array = RARRAY_PTR(rb_array_of_arrays)[i];
|
@@ -80,7 +85,7 @@ inline VALUE memory_efficient_intersect(VALUE self, VALUE length_sorted_array_of
|
|
80
85
|
}
|
81
86
|
}
|
82
87
|
}
|
83
|
-
|
88
|
+
|
84
89
|
return smallest_array;
|
85
90
|
}
|
86
91
|
|
@@ -1,8 +1,10 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
require 'rsolr'
|
4
|
+
|
4
5
|
module Indexers
|
5
|
-
|
6
|
+
|
7
|
+
# Deprecated. Only here as an example.
|
6
8
|
#
|
7
9
|
class Solr
|
8
10
|
|
@@ -19,19 +21,19 @@ module Indexers
|
|
19
21
|
def index
|
20
22
|
timed_exclaim "Indexing solr for #{type.name}:#{fields.join(', ')}"
|
21
23
|
statement = "SELECT indexed_id, #{fields.join(',')} FROM #{type.snapshot_table_name}"
|
22
|
-
|
24
|
+
|
23
25
|
DB.connect
|
24
26
|
results = DB.connection.execute statement
|
25
|
-
|
26
|
-
return unless results
|
27
|
-
|
27
|
+
|
28
|
+
return unless results
|
29
|
+
|
28
30
|
type_name = @type.name.to_s
|
29
|
-
|
31
|
+
|
30
32
|
solr.delete_by_query "type:#{type_name}"
|
31
33
|
solr.commit
|
32
|
-
|
34
|
+
|
33
35
|
documents = []
|
34
|
-
|
36
|
+
|
35
37
|
results.each do |indexed_id, *values|
|
36
38
|
values.each &:downcase!
|
37
39
|
documents << hashed(values).merge(id: indexed_id, type: type_name)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
|
-
#
|
3
|
+
# TODO Merge into Base, extract common with Indexed::Base.
|
4
4
|
#
|
5
5
|
module Indexing # :nodoc:all
|
6
6
|
# A Bundle is a number of indexes
|
@@ -10,7 +10,7 @@ module Internals
|
|
10
10
|
# * *core* index (always used)
|
11
11
|
# * *weights* index (always used)
|
12
12
|
# * *similarity* index (used with similarity)
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# In Picky, indexing is separated from the index
|
15
15
|
# handling itself through a parallel structure.
|
16
16
|
#
|
@@ -24,27 +24,27 @@ module Internals
|
|
24
24
|
# memory and looking up search data as fast as possible.
|
25
25
|
#
|
26
26
|
module Bundle
|
27
|
-
|
27
|
+
|
28
28
|
class SuperBase
|
29
|
-
|
29
|
+
|
30
30
|
attr_reader :identifier, :files
|
31
31
|
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
32
|
-
|
32
|
+
|
33
33
|
delegate :clear, :to => :index
|
34
34
|
delegate :[], :[]=, :to => :configuration
|
35
|
-
|
35
|
+
|
36
36
|
def initialize name, configuration, similarity_strategy
|
37
37
|
@identifier = "#{configuration.identifier}:#{name}"
|
38
38
|
@files = Internals::Index::Files.new name, configuration
|
39
|
-
|
39
|
+
|
40
40
|
@index = {}
|
41
41
|
@weights = {}
|
42
42
|
@similarity = {}
|
43
43
|
@configuration = {} # A hash with config options.
|
44
|
-
|
44
|
+
|
45
45
|
@similarity_strategy = similarity_strategy
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
# Get a list of similar texts.
|
49
49
|
#
|
50
50
|
# Note: Does not return itself.
|
@@ -55,11 +55,11 @@ module Internals
|
|
55
55
|
similar_codes.delete text if similar_codes
|
56
56
|
similar_codes || []
|
57
57
|
end
|
58
|
-
|
58
|
+
|
59
59
|
end
|
60
|
-
|
60
|
+
|
61
61
|
end
|
62
|
-
|
62
|
+
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
end
|
@@ -12,16 +12,17 @@ module Internals
|
|
12
12
|
# Mandatory params:
|
13
13
|
# * name: Category name to use as identifier and file names.
|
14
14
|
# * index: Index to which this category is attached to.
|
15
|
+
#
|
15
16
|
# Options:
|
16
17
|
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
17
18
|
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
18
19
|
# * source: Use if the category should use a different source.
|
19
20
|
# * from: The source category identifier to take the data from.
|
20
21
|
#
|
21
|
-
# Advanced Options
|
22
|
+
# Advanced Options:
|
22
23
|
#
|
23
|
-
# * weights:
|
24
|
-
# * tokenizer:
|
24
|
+
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
25
|
+
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
25
26
|
#
|
26
27
|
# TODO Should source be not optional, or taken from the index?
|
27
28
|
#
|
@@ -21,31 +21,25 @@ module Internals
|
|
21
21
|
# 1. [2, 30, 400, 100_000]
|
22
22
|
# 2. (100_000 & (400 & (30 & 2))) # => result
|
23
23
|
#
|
24
|
-
# Note: Uses a C-optimized intersection routine
|
24
|
+
# Note: Uses a C-optimized intersection routine (in performant.c)
|
25
|
+
# for speed and memory efficiency.
|
25
26
|
#
|
26
27
|
# Note: In the memory based version we ignore the (amount) needed hint.
|
27
|
-
# We
|
28
|
+
# We cannot use the information to speed up the algorithm, unfortunately.
|
28
29
|
#
|
29
30
|
def ids _, _
|
30
31
|
return [] if @combinations.empty?
|
31
32
|
|
32
33
|
# Get the ids for each combination.
|
33
34
|
#
|
34
|
-
# TODO For combinations with Redis
|
35
|
-
#
|
36
35
|
id_arrays = @combinations.inject([]) do |total, combination|
|
37
36
|
total << combination.ids
|
38
37
|
end
|
39
38
|
|
40
|
-
# Order by smallest size first such that the intersect can be performed faster.
|
41
|
-
#
|
42
|
-
# TODO Move into the memory_efficient_intersect such that
|
43
|
-
# this precondition for a fast algorithm is always given.
|
44
|
-
#
|
45
|
-
id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
|
46
|
-
|
47
39
|
# Call the optimized C algorithm.
|
48
40
|
#
|
41
|
+
# Note: It orders the passed arrays by size.
|
42
|
+
#
|
49
43
|
Performant::Array.memory_efficient_intersect id_arrays
|
50
44
|
end
|
51
45
|
|
@@ -25,11 +25,36 @@ module Internals
|
|
25
25
|
@indexes = index_definitions.map &:indexed
|
26
26
|
end
|
27
27
|
|
28
|
-
# Returns a number of
|
28
|
+
# Returns a number of prepared (sorted, reduced etc.) allocations for the given tokens.
|
29
29
|
#
|
30
|
-
def
|
30
|
+
def prepared_allocations_for tokens, weights = {}
|
31
|
+
allocations = allocations_for tokens
|
32
|
+
|
33
|
+
# Remove double allocations.
|
34
|
+
#
|
35
|
+
allocations.uniq
|
36
|
+
|
37
|
+
# Score the allocations using weights as bias.
|
38
|
+
#
|
39
|
+
allocations.calculate_score weights
|
40
|
+
|
41
|
+
# Sort the allocations.
|
42
|
+
# (allocations are sorted according to score, highest to lowest)
|
43
|
+
#
|
44
|
+
allocations.sort!
|
31
45
|
|
46
|
+
# Reduce the amount of allocations.
|
47
|
+
#
|
48
|
+
# allocations.reduce_to some_amount
|
49
|
+
|
50
|
+
# Remove identifiers from allocations.
|
51
|
+
#
|
52
|
+
# allocations.remove some_array_of_identifiers_to_remove
|
53
|
+
|
54
|
+
allocations
|
32
55
|
end
|
56
|
+
# Returns a number of possible allocations for the given tokens.
|
57
|
+
#
|
33
58
|
def allocations_for tokens
|
34
59
|
Allocations.new allocations_ary_for(tokens)
|
35
60
|
end
|
@@ -14,11 +14,17 @@ module Internals
|
|
14
14
|
#
|
15
15
|
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
16
16
|
|
17
|
-
#
|
17
|
+
# Create a new Tokens object with the array of tokens passed in.
|
18
18
|
#
|
19
19
|
def initialize tokens = []
|
20
20
|
@tokens = tokens
|
21
21
|
end
|
22
|
+
|
23
|
+
# Creates a new Tokens object from a number of Strings.
|
24
|
+
#
|
25
|
+
# Options:
|
26
|
+
# * downcase: Whether to downcase the passed strings (default is true)
|
27
|
+
#
|
22
28
|
def self.processed words, downcase = true
|
23
29
|
new words.collect! { |word| Token.processed word, downcase }
|
24
30
|
end
|
data/lib/picky/search.rb
CHANGED
@@ -14,8 +14,8 @@ class Search
|
|
14
14
|
include Helpers::Measuring
|
15
15
|
|
16
16
|
attr_reader :indexes
|
17
|
-
attr_writer :tokenizer
|
18
|
-
attr_accessor :
|
17
|
+
attr_writer :tokenizer
|
18
|
+
attr_accessor :weights
|
19
19
|
|
20
20
|
# Takes:
|
21
21
|
# * A number of indexes
|
@@ -23,6 +23,8 @@ class Search
|
|
23
23
|
# * tokenizer: Tokenizers::Query.default by default.
|
24
24
|
# * weights: A hash of weights, or a Query::Weights object.
|
25
25
|
#
|
26
|
+
# TODO Add identifiers_to_remove (rename) and reduce_allocations_to_amount (rename).
|
27
|
+
#
|
26
28
|
def initialize *index_definitions
|
27
29
|
options = Hash === index_definitions.last ? index_definitions.pop : {}
|
28
30
|
|
@@ -113,58 +115,8 @@ class Search
|
|
113
115
|
|
114
116
|
# Gets sorted allocations for the tokens.
|
115
117
|
#
|
116
|
-
# This generates the possible allocations, sorted.
|
117
|
-
#
|
118
|
-
# TODO Smallify.
|
119
|
-
#
|
120
|
-
# TODO Rename: allocations
|
121
|
-
#
|
122
118
|
def sorted_allocations tokens # :nodoc:
|
123
|
-
|
124
|
-
#
|
125
|
-
# TODO Pass in reduce_to_amount (aka max_allocations)
|
126
|
-
#
|
127
|
-
# TODO uniq, score, sort in there
|
128
|
-
#
|
129
|
-
allocations = @indexes.allocations_for tokens
|
130
|
-
|
131
|
-
# Callbacks.
|
132
|
-
#
|
133
|
-
# TODO Reduce before sort?
|
134
|
-
#
|
135
|
-
reduce allocations
|
136
|
-
remove_from allocations
|
137
|
-
|
138
|
-
# Remove double allocations.
|
139
|
-
#
|
140
|
-
allocations.uniq
|
141
|
-
|
142
|
-
# Score the allocations using weights as bias.
|
143
|
-
#
|
144
|
-
allocations.calculate_score weights
|
145
|
-
|
146
|
-
# Sort the allocations.
|
147
|
-
# (allocations are sorted according to score, highest to lowest)
|
148
|
-
#
|
149
|
-
allocations.sort!
|
150
|
-
|
151
|
-
# Return the allocations.
|
152
|
-
#
|
153
|
-
allocations
|
154
|
-
end
|
155
|
-
def reduce allocations # :nodoc:
|
156
|
-
allocations.reduce_to reduce_to_amount if reduce_to_amount
|
157
|
-
end
|
158
|
-
|
159
|
-
#
|
160
|
-
#
|
161
|
-
def remove_from allocations # :nodoc:
|
162
|
-
allocations.remove identifiers_to_remove
|
163
|
-
end
|
164
|
-
#
|
165
|
-
#
|
166
|
-
def identifiers_to_remove # :nodoc:
|
167
|
-
@identifiers_to_remove ||= []
|
119
|
+
@indexes.prepared_allocations_for tokens, weights
|
168
120
|
end
|
169
121
|
|
170
122
|
# Display some nice information for the user.
|
@@ -0,0 +1,7 @@
|
|
1
|
+
# Tasks for testing your engine configuration in the terminal.
|
2
|
+
#
|
3
|
+
task :search do
|
4
|
+
load File.expand_path '../../picky/auxiliary/terminal.rb', __FILE__
|
5
|
+
terminal = Terminal.new ARGV[1] || raise("Usage:\n rake search <URL>\n E.g. rake search /books\n rake search localhost:8080/books")
|
6
|
+
terminal.run
|
7
|
+
end
|
data/spec/ext/performant_spec.rb
CHANGED
@@ -5,44 +5,43 @@ describe Performant::Array do
|
|
5
5
|
describe "memory_efficient_intersect" do
|
6
6
|
it "should intersect empty arrays correctly" do
|
7
7
|
arys = [[3,4], [1,2,3], []]
|
8
|
-
|
9
|
-
Performant::Array.memory_efficient_intersect(arys
|
8
|
+
|
9
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
10
10
|
end
|
11
11
|
it "should handle intermediate empty results correctly" do
|
12
12
|
arys = [[5,4], [1,2,3], [3,4,5,8,9]]
|
13
|
-
|
14
|
-
Performant::Array.memory_efficient_intersect(arys
|
13
|
+
|
14
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
15
15
|
end
|
16
16
|
it "should intersect correctly" do
|
17
17
|
arys = [[3,4], [1,2,3], [3,4,5,8,9]]
|
18
|
-
|
19
|
-
Performant::Array.memory_efficient_intersect(arys
|
18
|
+
|
19
|
+
Performant::Array.memory_efficient_intersect(arys).should == [3]
|
20
20
|
end
|
21
21
|
it "should intersect correctly again" do
|
22
|
-
arys = [[3,
|
23
|
-
|
24
|
-
Performant::Array.memory_efficient_intersect(arys.sort_by(&:size)).should == [3,5,6,7]
|
22
|
+
arys = [[1,2,3,5,6,7], [3,4,5,6,7,8,9], [3,4,5,6,7]]
|
23
|
+
Performant::Array.memory_efficient_intersect(arys).should == [3,5,6,7]
|
25
24
|
end
|
26
25
|
it "should intersect many arrays" do
|
27
26
|
arys = [[3,4,5,6,7], [1,2,3,5,6,7], [3,4,5,6,7,8,9], [1,2,3,4,5,6,7,8,9,10], [2,3,5,6,7,19], [1,2,3,4,5,6,7,8,9,10], [2,3,5,6,7,19]]
|
28
|
-
|
29
|
-
Performant::Array.memory_efficient_intersect(arys
|
27
|
+
|
28
|
+
Performant::Array.memory_efficient_intersect(arys).should == [3,5,6,7]
|
30
29
|
end
|
31
30
|
it "should handle random arrays" do
|
32
31
|
proto = Array.new(100, 3_500_000)
|
33
32
|
arys = [proto.map { |e| rand e }, proto.map { |e| rand e }, proto.map { |e| rand e }]
|
34
|
-
|
35
|
-
Performant::Array.memory_efficient_intersect(arys
|
33
|
+
|
34
|
+
Performant::Array.memory_efficient_intersect(arys).should == arys.inject(arys.shift.dup) { |total, ary| total & arys }
|
36
35
|
end
|
37
36
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
38
37
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
39
|
-
|
38
|
+
|
40
39
|
# brute force
|
41
|
-
performance_of { Performant::Array.memory_efficient_intersect(arys
|
40
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
|
42
41
|
end
|
43
42
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
44
43
|
arys = [(1..50).to_a, (10_000..20_000).to_a << 7]
|
45
|
-
|
44
|
+
|
46
45
|
# &
|
47
46
|
performance_of do
|
48
47
|
arys.inject(arys.shift.dup) do |total, ary|
|
@@ -55,33 +54,33 @@ describe Performant::Array do
|
|
55
54
|
describe "memory_efficient_intersect with symbols" do
|
56
55
|
it "should intersect empty arrays correctly" do
|
57
56
|
arys = [[:c,:d], [:a,:b,:c], []]
|
58
|
-
|
59
|
-
Performant::Array.memory_efficient_intersect(arys
|
57
|
+
|
58
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
60
59
|
end
|
61
60
|
it "should handle intermediate empty results correctly" do
|
62
61
|
arys = [[:e,:d], [:a,:b,:c], [:c,:d,:e,:h,:i]]
|
63
|
-
|
64
|
-
Performant::Array.memory_efficient_intersect(arys
|
62
|
+
|
63
|
+
Performant::Array.memory_efficient_intersect(arys).should == []
|
65
64
|
end
|
66
65
|
it "should intersect correctly" do
|
67
66
|
arys = [[:c,:d], [:a,:b,:c], [:c,:d,:e,:h,:i]]
|
68
|
-
|
69
|
-
Performant::Array.memory_efficient_intersect(arys
|
67
|
+
|
68
|
+
Performant::Array.memory_efficient_intersect(arys).should == [:c]
|
70
69
|
end
|
71
70
|
it "should intersect many arrays" do
|
72
71
|
arys = [[:c,:d,:e,:f,:g], [:a,:b,:c,:e,:f,:g], [:c,:d,:e,:f,:g,:h,:i], [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j], [:b,:c,:e,:f,:g,:s], [:a,:b,:c,:d,:e,:f,:g,:h,:i,:j], [:b,:c,:e,:f,:g,:s]]
|
73
|
-
|
74
|
-
Performant::Array.memory_efficient_intersect(arys
|
72
|
+
|
73
|
+
Performant::Array.memory_efficient_intersect(arys).should == [:c,:e,:f,:g]
|
75
74
|
end
|
76
75
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
77
|
-
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a
|
78
|
-
|
76
|
+
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a]
|
77
|
+
|
79
78
|
# brute force
|
80
|
-
performance_of { Performant::Array.memory_efficient_intersect(arys
|
79
|
+
performance_of { Performant::Array.memory_efficient_intersect(arys) }.should < 0.001
|
81
80
|
end
|
82
81
|
it "should be optimal for 2 small arrays of 50/10_000" do
|
83
82
|
arys = [(:'1'..:'50').to_a, (:'10_000'..:'20_000').to_a << 7]
|
84
|
-
|
83
|
+
|
85
84
|
# &
|
86
85
|
performance_of do
|
87
86
|
arys.inject(arys.shift.dup) do |total, ary|
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
# We need to load the Statistics file explicitly as the Statistics
|
6
|
+
# are not loaded with the Loader (not needed in the server, only for script runs).
|
7
|
+
#
|
8
|
+
require File.expand_path '../../../../lib/picky/auxiliary/terminal', __FILE__
|
9
|
+
|
10
|
+
describe Terminal do
|
11
|
+
|
12
|
+
let(:terminal) { described_class.new('/some/url') }
|
13
|
+
|
14
|
+
before(:each) do
|
15
|
+
terminal.stub! :search => { :total => 0, :duration => 0.01 }
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'left' do
|
19
|
+
it 'moves by amount' do
|
20
|
+
terminal.should_receive(:print).once.ordered.with "\e[13D"
|
21
|
+
terminal.should_receive(:flush).once.ordered
|
22
|
+
|
23
|
+
terminal.left 13
|
24
|
+
end
|
25
|
+
it 'default is 1' do
|
26
|
+
terminal.should_receive(:print).once.ordered.with "\e[1D"
|
27
|
+
terminal.should_receive(:flush).once.ordered
|
28
|
+
|
29
|
+
terminal.left
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'right' do
|
34
|
+
it 'moves by amount' do
|
35
|
+
terminal.should_receive(:print).once.ordered.with "\e[13C"
|
36
|
+
terminal.should_receive(:flush).once.ordered
|
37
|
+
|
38
|
+
terminal.right 13
|
39
|
+
end
|
40
|
+
it 'default is 1' do
|
41
|
+
terminal.should_receive(:print).once.ordered.with "\e[1C"
|
42
|
+
terminal.should_receive(:flush).once.ordered
|
43
|
+
|
44
|
+
terminal.right
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe 'flush' do
|
49
|
+
it 'flushes STDOUT' do
|
50
|
+
STDOUT.should_receive(:flush).once.with()
|
51
|
+
|
52
|
+
terminal.flush
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -1 +1,15 @@
|
|
1
|
-
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe 'aliases' do
|
4
|
+
|
5
|
+
it 'aliases correctly' do
|
6
|
+
Partial.should == Internals::Generators::Partial
|
7
|
+
end
|
8
|
+
it 'aliases correctly' do
|
9
|
+
Similarity.should == Internals::Generators::Similarity
|
10
|
+
end
|
11
|
+
it 'aliases correctly' do
|
12
|
+
Weights.should == Internals::Generators::Weights
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Internals::Indexed::Bundle::Redis do
|
4
|
+
|
5
|
+
before(:each) do
|
6
|
+
@backend = stub :backend
|
7
|
+
|
8
|
+
Internals::Index::Redis.stub! :new => @backend
|
9
|
+
|
10
|
+
@category = stub :category, :name => :some_category
|
11
|
+
@index = stub :index, :name => :some_index
|
12
|
+
@configuration = Configuration::Index.new @index, @category
|
13
|
+
|
14
|
+
@similarity = stub :similarity
|
15
|
+
@bundle = described_class.new :some_name, @configuration, @similarity
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'ids' do
|
19
|
+
it 'delegates to the backend' do
|
20
|
+
@backend.should_receive(:ids).once.with :some_sym
|
21
|
+
|
22
|
+
@bundle.ids :some_sym
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe 'weight' do
|
27
|
+
it 'delegates to the backend' do
|
28
|
+
@backend.should_receive(:weight).once.with :some_sym
|
29
|
+
|
30
|
+
@bundle.weight :some_sym
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '[]' do
|
35
|
+
it 'delegates to the backend' do
|
36
|
+
@backend.should_receive(:setting).once.with :some_sym
|
37
|
+
|
38
|
+
@bundle[:some_sym]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
@@ -77,5 +77,26 @@ describe Internals::Query::Indexes do
|
|
77
77
|
performance_of { indexes.expand_combinations_from(combinations) }.should < 0.00045
|
78
78
|
end
|
79
79
|
end
|
80
|
+
|
81
|
+
describe 'prepared_allocations_for' do
|
82
|
+
before(:each) do
|
83
|
+
@allocations = stub :allocations
|
84
|
+
indexes.stub! :allocations_for => @allocations
|
85
|
+
end
|
86
|
+
it 'calls the right method in order' do
|
87
|
+
@allocations.should_receive(:uniq).once.ordered.with()
|
88
|
+
@allocations.should_receive(:calculate_score).once.ordered.with(:some_weights)
|
89
|
+
@allocations.should_receive(:sort!).once.ordered.with()
|
90
|
+
|
91
|
+
indexes.prepared_allocations_for :some_tokens, :some_weights
|
92
|
+
end
|
93
|
+
it 'calls the right method in order' do
|
94
|
+
@allocations.should_receive(:uniq).once.ordered.with()
|
95
|
+
@allocations.should_receive(:calculate_score).once.ordered.with({})
|
96
|
+
@allocations.should_receive(:sort!).once.ordered.with()
|
97
|
+
|
98
|
+
indexes.prepared_allocations_for :some_tokens
|
99
|
+
end
|
100
|
+
end
|
80
101
|
|
81
102
|
end
|
data/spec/lib/search_spec.rb
CHANGED
@@ -4,6 +4,11 @@ require 'spec_helper'
|
|
4
4
|
|
5
5
|
describe Search do
|
6
6
|
|
7
|
+
before(:each) do
|
8
|
+
@type = stub :type
|
9
|
+
@index = stub :some_index, :indexed => @type
|
10
|
+
end
|
11
|
+
|
7
12
|
describe 'combinations_type_for' do
|
8
13
|
let(:search) { described_class.new }
|
9
14
|
it 'returns a specific Combination for a specific input' do
|
@@ -46,25 +51,6 @@ describe Search do
|
|
46
51
|
end
|
47
52
|
end
|
48
53
|
|
49
|
-
# describe "empty_results" do
|
50
|
-
# before(:each) do
|
51
|
-
# @search = search::Full.new
|
52
|
-
#
|
53
|
-
# @result_type = stub :result_type
|
54
|
-
# @search.stub! :result_type => @result_type
|
55
|
-
# end
|
56
|
-
# it "returns a new result type" do
|
57
|
-
# @result_type.should_receive(:new).once.with :some_offset
|
58
|
-
#
|
59
|
-
# @search.empty_results :some_offset
|
60
|
-
# end
|
61
|
-
# it "returns a new result type with default offset" do
|
62
|
-
# @result_type.should_receive(:new).once.with 0
|
63
|
-
#
|
64
|
-
# @search.empty_results
|
65
|
-
# end
|
66
|
-
# end
|
67
|
-
|
68
54
|
describe "search_with_text" do
|
69
55
|
before(:each) do
|
70
56
|
@search = Search.new
|
@@ -83,60 +69,6 @@ describe Search do
|
|
83
69
|
end
|
84
70
|
end
|
85
71
|
|
86
|
-
describe 'reduce' do
|
87
|
-
context 'real' do
|
88
|
-
before(:each) do
|
89
|
-
@allocations = stub :allocations
|
90
|
-
@search = Search.new
|
91
|
-
end
|
92
|
-
context 'reduce_to_amount not set' do
|
93
|
-
it 'should not call anything on the allocations' do
|
94
|
-
@allocations.should_receive(:reduce_to).never
|
95
|
-
|
96
|
-
@search.reduce @allocations
|
97
|
-
end
|
98
|
-
end
|
99
|
-
context 'reduce_to_amount set' do
|
100
|
-
before(:each) do
|
101
|
-
@search.reduce_to_amount = :some_amount
|
102
|
-
end
|
103
|
-
it 'should call reduce_to on the allocations' do
|
104
|
-
@allocations.should_receive(:reduce_to).once.with :some_amount
|
105
|
-
|
106
|
-
@search.reduce @allocations
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
context 'stubbed' do
|
111
|
-
before(:each) do
|
112
|
-
@allocations = stub :allocations
|
113
|
-
@search = Search.new
|
114
|
-
end
|
115
|
-
context 'reduce_to_amount not set' do
|
116
|
-
it 'should not call anything on the allocations' do
|
117
|
-
@allocations.should_receive(:reduce_to).never
|
118
|
-
|
119
|
-
@search.reduce @allocations
|
120
|
-
end
|
121
|
-
end
|
122
|
-
context 'reduce_to_amount set' do
|
123
|
-
before(:each) do
|
124
|
-
@search.stub! :reduce_to_amount => :some_amount
|
125
|
-
end
|
126
|
-
it 'should call reduce_to on the allocations' do
|
127
|
-
@allocations.should_receive(:reduce_to).once.with :some_amount
|
128
|
-
|
129
|
-
@search.reduce @allocations
|
130
|
-
end
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
before(:each) do
|
136
|
-
@type = stub :type
|
137
|
-
@index = stub :some_index, :indexed => @type
|
138
|
-
end
|
139
|
-
|
140
72
|
describe 'initializer' do
|
141
73
|
context 'with tokenizer' do
|
142
74
|
before(:each) do
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 2.1.
|
5
|
+
version: 2.1.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Florian Hanke
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-04-
|
13
|
+
date: 2011-04-11 00:00:00 +10:00
|
14
14
|
default_executable: picky
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -39,6 +39,7 @@ files:
|
|
39
39
|
- lib/picky/aliases.rb
|
40
40
|
- lib/picky/analyzer.rb
|
41
41
|
- lib/picky/application.rb
|
42
|
+
- lib/picky/auxiliary/terminal.rb
|
42
43
|
- lib/picky/character_substituters/west_european.rb
|
43
44
|
- lib/picky/cli.rb
|
44
45
|
- lib/picky/cores.rb
|
@@ -154,6 +155,7 @@ files:
|
|
154
155
|
- lib/tasks/framework.rake
|
155
156
|
- lib/tasks/index.rake
|
156
157
|
- lib/tasks/routes.rake
|
158
|
+
- lib/tasks/search.rake
|
157
159
|
- lib/tasks/server.rake
|
158
160
|
- lib/tasks/shortcuts.rake
|
159
161
|
- lib/tasks/solr.rake
|
@@ -166,6 +168,7 @@ files:
|
|
166
168
|
- spec/lib/aliases_spec.rb
|
167
169
|
- spec/lib/analyzer_spec.rb
|
168
170
|
- spec/lib/application_spec.rb
|
171
|
+
- spec/lib/auxiliary/terminal_spec.rb
|
169
172
|
- spec/lib/bundling_spec.rb
|
170
173
|
- spec/lib/character_substituters/west_european_spec.rb
|
171
174
|
- spec/lib/cli_spec.rb
|
@@ -211,6 +214,7 @@ files:
|
|
211
214
|
- spec/lib/internals/index/redis/string_hash_spec.rb
|
212
215
|
- spec/lib/internals/index/redis_spec.rb
|
213
216
|
- spec/lib/internals/indexed/bundle/memory_spec.rb
|
217
|
+
- spec/lib/internals/indexed/bundle/redis_spec.rb
|
214
218
|
- spec/lib/internals/indexed/categories_spec.rb
|
215
219
|
- spec/lib/internals/indexed/category_spec.rb
|
216
220
|
- spec/lib/internals/indexed/index_spec.rb
|
@@ -290,6 +294,7 @@ test_files:
|
|
290
294
|
- spec/lib/aliases_spec.rb
|
291
295
|
- spec/lib/analyzer_spec.rb
|
292
296
|
- spec/lib/application_spec.rb
|
297
|
+
- spec/lib/auxiliary/terminal_spec.rb
|
293
298
|
- spec/lib/bundling_spec.rb
|
294
299
|
- spec/lib/character_substituters/west_european_spec.rb
|
295
300
|
- spec/lib/cli_spec.rb
|
@@ -335,6 +340,7 @@ test_files:
|
|
335
340
|
- spec/lib/internals/index/redis/string_hash_spec.rb
|
336
341
|
- spec/lib/internals/index/redis_spec.rb
|
337
342
|
- spec/lib/internals/indexed/bundle/memory_spec.rb
|
343
|
+
- spec/lib/internals/indexed/bundle/redis_spec.rb
|
338
344
|
- spec/lib/internals/indexed/categories_spec.rb
|
339
345
|
- spec/lib/internals/indexed/category_spec.rb
|
340
346
|
- spec/lib/internals/indexed/index_spec.rb
|