picky 4.15.0 → 4.15.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/lib/picky/analyzer.rb +55 -38
- data/lib/picky/category_realtime.rb +11 -2
- data/lib/picky/index_indexing.rb +9 -3
- data/spec/functional/arrays_as_ids_spec.rb +57 -0
- data/spec/functional/from_spec.rb +25 -0
- metadata +54 -30
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
M2MxMTQxZjI1MDcwYzA1N2UzNWY2MTQ5ZGNkMTllYWFjYmI1MDk0Nw==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NjA3MTc5MTEwMWUzZWQ1NGZjNWU0ZjRhZDJmMTJhZjhhYjdiZWIzNA==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NjdhYmYyYTkwOGJlZmUzZjljYTA2MjIxNzg0NWEwYzZkZWE2ZmE3OTdmZGEz
|
10
|
+
Mjc4YjYwMWZlMTU4Zjk0NGQyNGE4NzM2MjA0ZWY2MGRkODlhM2Y5Y2ZhYWZk
|
11
|
+
MzFjOWM2ZTQ4MzQzYTFiYTA3ZjUyZmQyZTU1NGU0ZjExMGQyYTA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MzlhODU0NmZlNGRkOGM4YzlkZDY3MTQxYmJlYTA2MWJkYWEwMGI4MjEwNzA1
|
14
|
+
M2VhYjExNzQ2YzdhODcyZjE0ZWVlN2QzMDRmNTU0YTI0YjVjMjFkNzc1ODk2
|
15
|
+
ZjQ3OTQyNTVhNmY3ZjhiN2ZhZWY4MWM0OTcyMDYxYjIzMTQ1YTY=
|
data/lib/picky/analyzer.rb
CHANGED
@@ -1,6 +1,17 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
|
4
|
+
class Range
|
5
|
+
def expand_with thing
|
6
|
+
return (thing..thing) unless min
|
7
|
+
if max < thing
|
8
|
+
(min..thing)
|
9
|
+
else
|
10
|
+
thing < min ? (thing..max) : self
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
4
15
|
# Analyzes indexes (index bundles, actually).
|
5
16
|
#
|
6
17
|
class Analyzer
|
@@ -38,46 +49,39 @@ class Analyzer
|
|
38
49
|
|
39
50
|
self
|
40
51
|
end
|
52
|
+
|
41
53
|
def cardinality identifier, index
|
42
|
-
return
|
43
|
-
return unless index.respond_to?(:each_pair)
|
44
|
-
|
45
|
-
key_length_average = 0
|
46
|
-
ids_length_average = 0
|
54
|
+
return unless can_calculate_cardinality? index
|
47
55
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
56
|
+
key_length_sum = 0
|
57
|
+
ids_length_sum = 0
|
58
|
+
|
59
|
+
key_length = (1.0/0..0)
|
60
|
+
ids_length = (1.0/0..0)
|
52
61
|
|
53
|
-
key_size, ids_size = 0, 0
|
54
62
|
index.each_pair do |key, ids|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
max_key_length = key_size if key_size > max_key_length
|
60
|
-
end
|
61
|
-
key_length_average += key_size
|
62
|
-
|
63
|
-
ids_size = ids.size
|
64
|
-
if ids_size < min_ids_length
|
65
|
-
min_ids_length = ids_size
|
66
|
-
else
|
67
|
-
max_ids_length = ids_size if ids_size > max_ids_length
|
68
|
-
end
|
69
|
-
ids_length_average += ids_size
|
63
|
+
key_length = key_length.expand_with key.size
|
64
|
+
key_length_sum += key.size
|
65
|
+
ids_length = ids_length.expand_with ids.size
|
66
|
+
ids_length_sum += ids.size
|
70
67
|
end
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
analysis[identifier] ||= {}
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
68
|
+
|
69
|
+
report_cardinality identifier, index, key_length, ids_length, key_length_sum, ids_length_sum
|
70
|
+
end
|
71
|
+
def report_cardinality identifier, index, key_length, ids_length, key_length_sum, ids_length_sum
|
72
|
+
analysis_identifier = analysis[identifier] ||= {}
|
73
|
+
analysis_identifier[:key_length] = key_length
|
74
|
+
analysis_identifier[:ids_length] = ids_length
|
75
|
+
analysis_identifier[:key_length_average] = key_length_sum.to_f / index.size
|
76
|
+
analysis_identifier[:ids_length_average] = ids_length_sum.to_f / index.size
|
77
|
+
end
|
78
|
+
|
79
|
+
def can_calculate_cardinality? index
|
80
|
+
return if index.size.zero?
|
81
|
+
return unless index.respond_to? :each_pair
|
82
|
+
true
|
80
83
|
end
|
84
|
+
|
81
85
|
def index_analysis
|
82
86
|
return unless analysis[:index]
|
83
87
|
|
@@ -91,6 +95,7 @@ class Analyzer
|
|
91
95
|
comments << "\033[33mIndex matches single characters.\033[m"
|
92
96
|
end
|
93
97
|
end
|
98
|
+
|
94
99
|
def weights index
|
95
100
|
return if !index.respond_to?(:size) || index.size.zero?
|
96
101
|
return unless index.respond_to?(:each_pair)
|
@@ -115,6 +120,7 @@ class Analyzer
|
|
115
120
|
analysis[:weights][:weight_range] = (min_weight..max_weight)
|
116
121
|
analysis[:weights][:weight_average] = weight_average
|
117
122
|
end
|
123
|
+
|
118
124
|
def weights_analysis
|
119
125
|
return unless analysis[:weights]
|
120
126
|
|
@@ -131,25 +137,36 @@ class Analyzer
|
|
131
137
|
def to_s
|
132
138
|
[*comments, index_to_s, weights_to_s, similarity_to_s, configuration_to_s].compact.join "\n"
|
133
139
|
end
|
140
|
+
|
134
141
|
def index_to_s
|
135
142
|
return if analysis[:__keys].zero?
|
136
|
-
ary = ["index key cardinality: #{"%
|
143
|
+
ary = ["index key cardinality: #{"%9d" % analysis[:__keys]}"]
|
137
144
|
return ary.join "\n" unless analysis[:index]
|
138
|
-
ary << formatted(
|
145
|
+
ary << formatted(nil, :key_length)
|
139
146
|
ary << formatted('ids per', :ids_length)
|
140
147
|
ary.join "\n"
|
141
148
|
end
|
149
|
+
|
142
150
|
def formatted description, key, index = :index
|
143
|
-
"index
|
151
|
+
what = "%-40s" % ["index", description, "key length range (avg):"].compact.join(' ')
|
152
|
+
range = "%7s" % analysis[index][key]
|
153
|
+
average = "%8s" % "(#{analysis[index][:"#{key}_average"].round(2)})"
|
154
|
+
what + range + average
|
144
155
|
end
|
156
|
+
|
145
157
|
def weights_to_s
|
146
158
|
return unless analysis[:weights]
|
147
|
-
%
|
159
|
+
what = "%-30s" % "weights range (avg):"
|
160
|
+
range = "%17s" % analysis[:weights][:weight_range]
|
161
|
+
average = "%8s" % "(#{analysis[:weights][:weight_average].round(2)})"
|
162
|
+
what + range + average
|
148
163
|
end
|
164
|
+
|
149
165
|
def similarity_to_s
|
150
166
|
return unless analysis[:similarity]
|
151
167
|
formatted('similarity', :key_length, :similarity)
|
152
168
|
end
|
169
|
+
|
153
170
|
def configuration_to_s
|
154
171
|
# analysis[:configuration]
|
155
172
|
end
|
@@ -17,7 +17,11 @@ module Picky
|
|
17
17
|
# given object.
|
18
18
|
#
|
19
19
|
def add object, where = :unshift
|
20
|
-
|
20
|
+
if from.respond_to? :call
|
21
|
+
add_text object.id, from.call(object), where
|
22
|
+
else
|
23
|
+
add_text object.id, object.send(from), where
|
24
|
+
end
|
21
25
|
end
|
22
26
|
|
23
27
|
# Removes the object's id, and then
|
@@ -65,8 +69,13 @@ module Picky
|
|
65
69
|
else
|
66
70
|
tokens = text_or_tokens
|
67
71
|
end
|
72
|
+
|
73
|
+
# TODO Have an "as is" key_format?
|
74
|
+
#
|
68
75
|
tokens.each { |text| add_tokenized_token id.send(key_format), text, where, false }
|
69
76
|
rescue NoMethodError
|
77
|
+
# TODO This also is raised on a wrong key_format.
|
78
|
+
# TODO Improve error message by pointing out what exactly goes wrong: thing xy does not have an #each method.
|
70
79
|
raise %Q{You probably set tokenize: false on category "#{name}". It will need an Enumerator of previously tokenized tokens.}
|
71
80
|
end
|
72
81
|
|
@@ -98,4 +107,4 @@ module Picky
|
|
98
107
|
|
99
108
|
end
|
100
109
|
|
101
|
-
end
|
110
|
+
end
|
data/lib/picky/index_indexing.rb
CHANGED
@@ -5,7 +5,9 @@ module Picky
|
|
5
5
|
class Index
|
6
6
|
include Helpers::Indexing
|
7
7
|
|
8
|
-
forward :cache,
|
8
|
+
forward :cache,
|
9
|
+
:clear,
|
10
|
+
:to => :categories
|
9
11
|
|
10
12
|
# Define an index tokenizer on the index.
|
11
13
|
#
|
@@ -21,6 +23,8 @@ module Picky
|
|
21
23
|
# Decides whether to use a parallel indexer or whether to
|
22
24
|
# forward to each category to prepare themselves.
|
23
25
|
#
|
26
|
+
# TODO Do a critical reading of this on the blog.
|
27
|
+
#
|
24
28
|
def prepare scheduler = Scheduler.new
|
25
29
|
if source.respond_to?(:each)
|
26
30
|
check_source_empty
|
@@ -79,7 +83,7 @@ module Picky
|
|
79
83
|
some_source ? (@source = Source.from(some_source, false, name)) : unblock_source
|
80
84
|
end
|
81
85
|
# Get the actual source if it is wrapped in a time
|
82
|
-
# capsule,
|
86
|
+
# capsule, ie. a block/lambda.
|
83
87
|
#
|
84
88
|
def unblock_source
|
85
89
|
@source.respond_to?(:call) ? @source.call : @source
|
@@ -87,7 +91,9 @@ module Picky
|
|
87
91
|
|
88
92
|
# Define a key_format on the index.
|
89
93
|
#
|
90
|
-
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
|
94
|
+
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip, :split).
|
95
|
+
#
|
96
|
+
# TODO Rename id_format?
|
91
97
|
#
|
92
98
|
def key_format key_format = nil
|
93
99
|
key_format ? (@key_format = key_format) : @key_format
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "Array IDs" do
|
6
|
+
|
7
|
+
# This tests the weights option.
|
8
|
+
#
|
9
|
+
it 'can use Arrays as IDs' do
|
10
|
+
index = Picky::Index.new :arrays do
|
11
|
+
key_format :to_a
|
12
|
+
|
13
|
+
category :text1
|
14
|
+
end
|
15
|
+
|
16
|
+
require 'ostruct'
|
17
|
+
|
18
|
+
thing = OpenStruct.new id: ['id1', 'thing1'], text1: "ohai"
|
19
|
+
other = OpenStruct.new id: ['id2', 'thing2'], text1: "ohai kthxbye"
|
20
|
+
|
21
|
+
index.add thing
|
22
|
+
index.add other
|
23
|
+
|
24
|
+
try = Picky::Search.new index
|
25
|
+
|
26
|
+
try.search("text1:ohai").ids.should == [
|
27
|
+
["id2", "thing2"],
|
28
|
+
["id1", "thing1"]
|
29
|
+
] # WAT
|
30
|
+
end
|
31
|
+
|
32
|
+
# This tests the weights option.
|
33
|
+
#
|
34
|
+
it 'can use split as key_format' do
|
35
|
+
index = Picky::Index.new :arrays do
|
36
|
+
key_format :split
|
37
|
+
|
38
|
+
category :text1
|
39
|
+
end
|
40
|
+
|
41
|
+
require 'ostruct'
|
42
|
+
|
43
|
+
thing = OpenStruct.new id: "id1 thing1", text1: "ohai"
|
44
|
+
other = OpenStruct.new id: "id2 thing2", text1: "ohai kthxbye"
|
45
|
+
|
46
|
+
index.add thing
|
47
|
+
index.add other
|
48
|
+
|
49
|
+
try = Picky::Search.new index
|
50
|
+
|
51
|
+
try.search("text1:ohai").ids.should == [
|
52
|
+
["id2", "thing2"],
|
53
|
+
["id1", "thing1"]
|
54
|
+
] # WAT
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe "From option" do
|
6
|
+
|
7
|
+
it 'can be given a lambda' do
|
8
|
+
index = Picky::Index.new :lambda do
|
9
|
+
category :text, from: ->(thing){ thing.some_text * 2 } # Anything, really.
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'ostruct'
|
13
|
+
|
14
|
+
thing = OpenStruct.new id: 1, some_text: "ohai"
|
15
|
+
other = OpenStruct.new id: 2, some_text: "ohai kthxbye"
|
16
|
+
|
17
|
+
index.add thing
|
18
|
+
index.add other
|
19
|
+
|
20
|
+
try = Picky::Search.new index
|
21
|
+
|
22
|
+
try.search("text:ohaiohai").ids.should == [1]
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
metadata
CHANGED
@@ -1,93 +1,113 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.15.
|
5
|
-
prerelease:
|
4
|
+
version: 4.15.1
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Florian Hanke
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-
|
11
|
+
date: 2013-06-13 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rspec
|
16
|
-
requirement:
|
17
|
-
none: false
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
|
-
version_requirements:
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ! '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
25
27
|
- !ruby/object:Gem::Dependency
|
26
28
|
name: rake-compiler
|
27
|
-
requirement:
|
28
|
-
none: false
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
29
30
|
requirements:
|
30
31
|
- - ! '>='
|
31
32
|
- !ruby/object:Gem::Version
|
32
33
|
version: '0'
|
33
34
|
type: :development
|
34
35
|
prerelease: false
|
35
|
-
version_requirements:
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
36
41
|
- !ruby/object:Gem::Dependency
|
37
42
|
name: picky-client
|
38
|
-
requirement:
|
39
|
-
none: false
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
40
44
|
requirements:
|
41
45
|
- - ~>
|
42
46
|
- !ruby/object:Gem::Version
|
43
|
-
version: 4.15.
|
47
|
+
version: 4.15.1
|
44
48
|
type: :development
|
45
49
|
prerelease: false
|
46
|
-
version_requirements:
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 4.15.1
|
47
55
|
- !ruby/object:Gem::Dependency
|
48
56
|
name: text
|
49
|
-
requirement:
|
50
|
-
none: false
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
51
58
|
requirements:
|
52
59
|
- - ! '>='
|
53
60
|
- !ruby/object:Gem::Version
|
54
61
|
version: '0'
|
55
62
|
type: :runtime
|
56
63
|
prerelease: false
|
57
|
-
version_requirements:
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: multi_json
|
60
|
-
requirement:
|
61
|
-
none: false
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
62
72
|
requirements:
|
63
73
|
- - ! '>='
|
64
74
|
- !ruby/object:Gem::Version
|
65
75
|
version: '0'
|
66
76
|
type: :runtime
|
67
77
|
prerelease: false
|
68
|
-
version_requirements:
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ! '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: activesupport
|
71
|
-
requirement:
|
72
|
-
none: false
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
73
86
|
requirements:
|
74
87
|
- - ! '>='
|
75
88
|
- !ruby/object:Gem::Version
|
76
89
|
version: '3.0'
|
77
90
|
type: :runtime
|
78
91
|
prerelease: false
|
79
|
-
version_requirements:
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ! '>='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '3.0'
|
80
97
|
- !ruby/object:Gem::Dependency
|
81
98
|
name: rack_fast_escape
|
82
|
-
requirement:
|
83
|
-
none: false
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
84
100
|
requirements:
|
85
101
|
- - ! '>='
|
86
102
|
- !ruby/object:Gem::Version
|
87
103
|
version: '0'
|
88
104
|
type: :runtime
|
89
105
|
prerelease: false
|
90
|
-
version_requirements:
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ! '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
91
111
|
description: Fast Ruby semantic text search engine with comfortable single field interface.
|
92
112
|
email: florian.hanke+picky@gmail.com
|
93
113
|
executables:
|
@@ -244,6 +264,7 @@ files:
|
|
244
264
|
- ext/picky/picky.c
|
245
265
|
- spec/aux/picky/cli_spec.rb
|
246
266
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
267
|
+
- spec/functional/arrays_as_ids_spec.rb
|
247
268
|
- spec/functional/automatic_segmentation_spec.rb
|
248
269
|
- spec/functional/backends/file_spec.rb
|
249
270
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
@@ -259,6 +280,7 @@ files:
|
|
259
280
|
- spec/functional/error_messages_spec.rb
|
260
281
|
- spec/functional/exact_first_spec.rb
|
261
282
|
- spec/functional/facets_spec.rb
|
283
|
+
- spec/functional/from_spec.rb
|
262
284
|
- spec/functional/ignore_allocations_spec.rb
|
263
285
|
- spec/functional/ignore_spec.rb
|
264
286
|
- spec/functional/max_allocations_spec.rb
|
@@ -385,35 +407,36 @@ files:
|
|
385
407
|
- spec/lib/tasks/try_spec.rb
|
386
408
|
- spec/lib/tokenizer_spec.rb
|
387
409
|
- spec/performant_spec.rb
|
388
|
-
-
|
410
|
+
- !binary |-
|
411
|
+
YmluL3BpY2t5
|
389
412
|
- ext/picky/extconf.rb
|
390
413
|
homepage: http://florianhanke.com/picky
|
391
414
|
licenses: []
|
415
|
+
metadata: {}
|
392
416
|
post_install_message:
|
393
417
|
rdoc_options: []
|
394
418
|
require_paths:
|
395
419
|
- lib
|
396
420
|
required_ruby_version: !ruby/object:Gem::Requirement
|
397
|
-
none: false
|
398
421
|
requirements:
|
399
422
|
- - ! '>='
|
400
423
|
- !ruby/object:Gem::Version
|
401
424
|
version: '0'
|
402
425
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
403
|
-
none: false
|
404
426
|
requirements:
|
405
427
|
- - ! '>='
|
406
428
|
- !ruby/object:Gem::Version
|
407
429
|
version: '0'
|
408
430
|
requirements: []
|
409
431
|
rubyforge_project: http://rubyforge.org/projects/picky
|
410
|
-
rubygems_version:
|
432
|
+
rubygems_version: 2.0.3
|
411
433
|
signing_key:
|
412
|
-
specification_version:
|
434
|
+
specification_version: 4
|
413
435
|
summary: ! 'Picky: Semantic Search Engine. Clever Interface. Good Tools.'
|
414
436
|
test_files:
|
415
437
|
- spec/aux/picky/cli_spec.rb
|
416
438
|
- spec/functional/allocations_uniq_by_definition_spec.rb
|
439
|
+
- spec/functional/arrays_as_ids_spec.rb
|
417
440
|
- spec/functional/automatic_segmentation_spec.rb
|
418
441
|
- spec/functional/backends/file_spec.rb
|
419
442
|
- spec/functional/backends/memory_bundle_realtime_spec.rb
|
@@ -429,6 +452,7 @@ test_files:
|
|
429
452
|
- spec/functional/error_messages_spec.rb
|
430
453
|
- spec/functional/exact_first_spec.rb
|
431
454
|
- spec/functional/facets_spec.rb
|
455
|
+
- spec/functional/from_spec.rb
|
432
456
|
- spec/functional/ignore_allocations_spec.rb
|
433
457
|
- spec/functional/ignore_spec.rb
|
434
458
|
- spec/functional/max_allocations_spec.rb
|