picky 2.1.2 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{lib → aux}/picky/cli.rb +50 -38
- data/bin/picky +1 -1
- data/lib/picky/application.rb +5 -2
- data/lib/picky/index/base.rb +88 -25
- data/lib/picky/index/memory.rb +8 -8
- data/lib/picky/index/redis.rb +8 -8
- data/lib/picky/index_bundle.rb +2 -2
- data/lib/picky/indexing/indexes.rb +6 -6
- data/lib/picky/internals/calculations/location.rb +54 -42
- data/lib/picky/internals/index/backend.rb +21 -21
- data/lib/picky/internals/index/file/text.rb +11 -11
- data/lib/picky/internals/index/files.rb +6 -6
- data/lib/picky/internals/index/redis.rb +14 -14
- data/lib/picky/internals/indexed/bundle/base.rb +2 -2
- data/lib/picky/internals/indexed/bundle/redis.rb +3 -3
- data/lib/picky/internals/indexed/category.rb +8 -9
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +25 -23
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +36 -34
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +35 -33
- data/lib/picky/internals/indexed/wrappers/category/location.rb +27 -0
- data/lib/picky/internals/indexers/base.rb +28 -0
- data/lib/picky/internals/indexers/parallel.rb +64 -0
- data/lib/picky/internals/indexers/serial.rb +20 -29
- data/lib/picky/internals/indexing/bundle/base.rb +2 -2
- data/lib/picky/internals/indexing/bundle/super_base.rb +3 -3
- data/lib/picky/internals/indexing/category.rb +30 -27
- data/lib/picky/internals/indexing/index.rb +82 -27
- data/lib/picky/internals/indexing/wrappers/category/location.rb +27 -0
- data/lib/picky/internals/query/indexes.rb +1 -1
- data/lib/picky/internals/query/qualifiers.rb +7 -6
- data/lib/picky/internals/query/weights.rb +6 -0
- data/lib/picky/internals/shared/category.rb +52 -0
- data/lib/picky/internals/tokenizers/base.rb +1 -1
- data/lib/picky/internals/tokenizers/location.rb +54 -0
- data/lib/picky/loader.rb +16 -3
- data/lib/picky/no_source_specified_exception.rb +3 -0
- data/lib/picky/search.rb +44 -5
- data/lib/picky/sources/base.rb +2 -2
- data/lib/picky/sources/couch.rb +1 -1
- data/lib/picky/sources/csv.rb +1 -1
- data/lib/picky/sources/db.rb +9 -9
- data/lib/picky/sources/delicious.rb +1 -1
- data/lib/picky/sources/wrappers/base.rb +12 -13
- data/lib/picky/sources/wrappers/location.rb +24 -54
- data/lib/tasks/search.rake +4 -5
- data/lib/tasks/todo.rake +1 -1
- data/spec/{lib → aux/picky}/cli_spec.rb +13 -8
- data/spec/lib/application_spec.rb +21 -16
- data/spec/lib/index/base_spec.rb +74 -27
- data/spec/lib/index/redis_spec.rb +1 -1
- data/spec/lib/index_bundle_spec.rb +1 -1
- data/spec/lib/indexing/indexes_spec.rb +5 -5
- data/spec/lib/internals/calculations/location_spec.rb +14 -3
- data/spec/lib/internals/index/files_spec.rb +2 -3
- data/spec/lib/internals/index/redis_spec.rb +122 -49
- data/spec/lib/internals/indexed/bundle/memory_spec.rb +4 -6
- data/spec/lib/internals/indexed/bundle/redis_spec.rb +2 -3
- data/spec/lib/internals/indexed/wrappers/bundle/calculation_spec.rb +3 -3
- data/spec/lib/internals/indexed/wrappers/bundle/wrapper_spec.rb +3 -3
- data/spec/lib/internals/indexers/parallel_spec.rb +36 -0
- data/spec/lib/internals/indexers/serial_spec.rb +6 -14
- data/spec/lib/internals/indexing/bundle/memory_partial_generation_speed_spec.rb +2 -3
- data/spec/lib/internals/indexing/bundle/memory_spec.rb +5 -6
- data/spec/lib/internals/indexing/bundle/redis_spec.rb +5 -6
- data/spec/lib/internals/indexing/category_spec.rb +21 -6
- data/spec/lib/internals/indexing/index_spec.rb +43 -7
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/search_spec.rb +51 -2
- data/spec/lib/sources/couch_spec.rb +6 -6
- data/spec/lib/sources/csv_spec.rb +4 -4
- data/spec/lib/sources/db_spec.rb +13 -14
- data/spec/lib/sources/delicious_spec.rb +3 -3
- data/spec/lib/sources/wrappers/base_spec.rb +9 -10
- data/spec/lib/sources/wrappers/location_spec.rb +11 -23
- metadata +14 -15
- data/lib/picky/auxiliary/terminal.rb +0 -219
- data/lib/picky/internals/configuration/index.rb +0 -67
- data/lib/picky/internals/indexers/no_source_specified_error.rb +0 -7
- data/lib/picky/internals/indexing/categories.rb +0 -46
- data/spec/lib/auxiliary/terminal_spec.rb +0 -150
- data/spec/lib/internals/configuration/index_spec.rb +0 -80
- data/spec/lib/internals/indexing/categories_spec.rb +0 -49
@@ -1,13 +1,13 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
3
|
module Index
|
4
|
-
|
4
|
+
|
5
5
|
module File
|
6
|
-
|
6
|
+
|
7
7
|
# Index data dumped in the text format.
|
8
8
|
#
|
9
9
|
class Text < Basic
|
10
|
-
|
10
|
+
|
11
11
|
# Uses the extension "txt".
|
12
12
|
#
|
13
13
|
def extension
|
@@ -20,12 +20,12 @@ module Internals
|
|
20
20
|
raise "Can't load from text file. Use JSON or Marshal."
|
21
21
|
end
|
22
22
|
# Text files are used exclusively for
|
23
|
-
# prepared data files.
|
23
|
+
# prepared data files.
|
24
24
|
#
|
25
25
|
def dump hash
|
26
26
|
raise "Can't dump to text file. Use JSON or Marshal."
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
# Retrieves prepared index data in the form
|
30
30
|
# * id,data\n
|
31
31
|
# * id,data\n
|
@@ -43,18 +43,18 @@ module Internals
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
end
|
46
|
-
|
46
|
+
|
47
47
|
#
|
48
48
|
#
|
49
49
|
def open_for_indexing &block
|
50
50
|
::File.open cache_path, 'w:binary', &block
|
51
51
|
end
|
52
|
-
|
53
|
-
|
52
|
+
|
53
|
+
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
end
|
57
|
-
|
57
|
+
|
58
58
|
end
|
59
|
-
|
59
|
+
|
60
60
|
end
|
@@ -4,17 +4,17 @@ module Internals
|
|
4
4
|
|
5
5
|
class Files < Backend
|
6
6
|
|
7
|
-
def initialize bundle_name,
|
8
|
-
super bundle_name,
|
7
|
+
def initialize bundle_name, category
|
8
|
+
super bundle_name, category
|
9
9
|
|
10
10
|
# Note: We marshal the similarity, as the
|
11
11
|
# Yajl json lib cannot load symbolized
|
12
12
|
# values, just keys.
|
13
13
|
#
|
14
|
-
@index = File::JSON.new
|
15
|
-
@weights = File::JSON.new
|
16
|
-
@similarity = File::Marshal.new
|
17
|
-
@configuration = File::JSON.new
|
14
|
+
@index = File::JSON.new category.index_path(bundle_name, :index)
|
15
|
+
@weights = File::JSON.new category.index_path(bundle_name, :weights)
|
16
|
+
@similarity = File::Marshal.new category.index_path(bundle_name, :similarity)
|
17
|
+
@configuration = File::JSON.new category.index_path(bundle_name, :configuration)
|
18
18
|
end
|
19
19
|
|
20
20
|
def to_s
|
@@ -1,28 +1,28 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
3
|
module Index
|
4
|
-
|
4
|
+
|
5
5
|
# TODO Needs a reconnect to be run after forking.
|
6
6
|
#
|
7
7
|
class Redis < Backend
|
8
|
-
|
9
|
-
def initialize bundle_name,
|
10
|
-
super bundle_name,
|
8
|
+
|
9
|
+
def initialize bundle_name, category
|
10
|
+
super bundle_name, category
|
11
11
|
|
12
12
|
# Refine a few Redis "types".
|
13
13
|
#
|
14
|
-
@index = Redis::ListHash.new "#{
|
15
|
-
@weights = Redis::StringHash.new "#{
|
16
|
-
@similarity = Redis::ListHash.new "#{
|
17
|
-
@configuration = Redis::StringHash.new "#{
|
14
|
+
@index = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:index"
|
15
|
+
@weights = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:weights"
|
16
|
+
@similarity = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:similarity"
|
17
|
+
@configuration = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:configuration"
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
# Delegate to the right collection.
|
21
21
|
#
|
22
22
|
def ids sym
|
23
23
|
index.collection sym
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
# Delegate to the right member value.
|
27
27
|
#
|
28
28
|
# Note: Converts to float.
|
@@ -30,15 +30,15 @@ module Internals
|
|
30
30
|
def weight sym
|
31
31
|
weights.member(sym).to_f
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
# Delegate to a member value.
|
35
35
|
#
|
36
36
|
def setting sym
|
37
37
|
configuration.member sym
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
end
|
41
|
-
|
41
|
+
|
42
42
|
end
|
43
|
-
|
43
|
+
|
44
44
|
end
|
@@ -35,8 +35,8 @@ module Internals
|
|
35
35
|
delegate :[], :to => :configuration
|
36
36
|
delegate :size, :to => :index
|
37
37
|
|
38
|
-
def initialize name,
|
39
|
-
@identifier = "#{
|
38
|
+
def initialize name, category, similarity_strategy
|
39
|
+
@identifier = "#{category.identifier}:#{name}"
|
40
40
|
|
41
41
|
@index = {}
|
42
42
|
@weights = {}
|
@@ -14,10 +14,10 @@ module Internals
|
|
14
14
|
#
|
15
15
|
class Redis < Base
|
16
16
|
|
17
|
-
def initialize name,
|
18
|
-
super name,
|
17
|
+
def initialize name, category, *args
|
18
|
+
super name, category, *args
|
19
19
|
|
20
|
-
@backend = Internals::Index::Redis.new name,
|
20
|
+
@backend = Internals::Index::Redis.new name, category
|
21
21
|
end
|
22
22
|
|
23
23
|
# Get the ids for the given symbol.
|
@@ -9,18 +9,17 @@ module Internals
|
|
9
9
|
#
|
10
10
|
class Category
|
11
11
|
|
12
|
+
include Internals::Shared::Category
|
13
|
+
|
12
14
|
attr_accessor :exact
|
13
|
-
attr_reader :
|
15
|
+
attr_reader :name, :index
|
14
16
|
attr_writer :partial
|
15
17
|
|
16
18
|
#
|
17
19
|
#
|
18
20
|
def initialize name, index, options = {}
|
19
|
-
@name
|
20
|
-
|
21
|
-
configuration = Configuration::Index.new index, self
|
22
|
-
|
23
|
-
@identifier = configuration.identifier
|
21
|
+
@name = name
|
22
|
+
@index = index
|
24
23
|
|
25
24
|
# TODO Push the defaults out into the index.
|
26
25
|
#
|
@@ -28,15 +27,15 @@ module Internals
|
|
28
27
|
similarity = options[:similarity] || Internals::Generators::Similarity::Default
|
29
28
|
|
30
29
|
bundle_class = options[:indexed_bundle_class] || Bundle::Memory
|
31
|
-
@exact = bundle_class.new :exact,
|
32
|
-
@partial = bundle_class.new :partial,
|
30
|
+
@exact = bundle_class.new :exact, self, similarity
|
31
|
+
@partial = bundle_class.new :partial, self, similarity
|
33
32
|
|
34
33
|
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
35
34
|
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
36
35
|
|
37
36
|
# TODO Extract?
|
38
37
|
#
|
39
|
-
Query::Qualifiers.add(
|
38
|
+
Query::Qualifiers.add(name, generate_qualifiers_from(options) || [name])
|
40
39
|
end
|
41
40
|
|
42
41
|
def to_s
|
@@ -1,35 +1,37 @@
|
|
1
|
-
module
|
2
|
-
module
|
1
|
+
module Internals
|
2
|
+
module Indexed
|
3
|
+
module Wrappers
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
# A calculation rewrites the symbol into a float.
|
7
|
-
#
|
8
|
-
# TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
|
9
|
-
#
|
10
|
-
class Calculation < Wrapper
|
5
|
+
module Bundle
|
11
6
|
|
7
|
+
# A calculation rewrites the symbol into a float.
|
12
8
|
#
|
9
|
+
# TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
|
13
10
|
#
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
class Calculation < Wrapper
|
12
|
+
|
13
|
+
#
|
14
|
+
#
|
15
|
+
def recalculate float
|
16
|
+
float
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
#
|
21
|
+
def ids sym
|
22
|
+
@bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
#
|
27
|
+
def weight sym
|
28
|
+
@bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
|
29
|
+
end
|
17
30
|
|
18
|
-
#
|
19
|
-
#
|
20
|
-
def ids sym
|
21
|
-
@bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
|
22
|
-
end
|
23
|
-
|
24
|
-
#
|
25
|
-
#
|
26
|
-
def weight sym
|
27
|
-
@bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
|
28
31
|
end
|
29
32
|
|
30
33
|
end
|
31
34
|
|
32
35
|
end
|
33
|
-
|
34
36
|
end
|
35
37
|
end
|
@@ -1,42 +1,44 @@
|
|
1
|
-
module
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
class Location < Calculation
|
9
|
-
|
10
|
-
def initialize bundle, options = {}
|
11
|
-
super bundle
|
12
|
-
|
13
|
-
precision = options[:precision] || 1
|
14
|
-
user_grid = options[:grid] || raise("Gridsize needs to be given for location #{bundle.identifier}.")
|
15
|
-
|
16
|
-
@calculation = Calculations::Location.new user_grid, precision
|
17
|
-
end
|
18
|
-
|
19
|
-
#
|
20
|
-
#
|
21
|
-
def recalculate float
|
22
|
-
@calculation.recalculate float
|
23
|
-
end
|
24
|
-
|
1
|
+
module Internals
|
2
|
+
module Indexed
|
3
|
+
module Wrappers
|
4
|
+
|
5
|
+
module Bundle
|
6
|
+
|
7
|
+
# A location calculation recalculates a location to the Picky internal location.
|
25
8
|
#
|
26
|
-
|
27
|
-
|
28
|
-
|
9
|
+
class Location < Calculation
|
10
|
+
|
11
|
+
def initialize bundle, options = {}
|
12
|
+
super bundle
|
13
|
+
|
14
|
+
precision = options[:precision] || 1
|
15
|
+
user_grid = options[:grid] || raise("Gridsize needs to be given for location #{bundle.identifier}.")
|
16
|
+
|
17
|
+
@calculation = Internals::Calculations::Location.new user_grid, precision
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
#
|
22
|
+
def recalculate float
|
23
|
+
@calculation.recalculate float
|
24
|
+
end
|
25
|
+
|
29
26
|
#
|
30
|
-
bundle.load
|
31
|
-
# TODO Move the to_f to the backend.
|
32
27
|
#
|
33
|
-
|
34
|
-
|
28
|
+
def load
|
29
|
+
# Load first the bundle, then extract the config.
|
30
|
+
#
|
31
|
+
bundle.load
|
32
|
+
# TODO Move the to_f to the backend.
|
33
|
+
#
|
34
|
+
minimum = bundle[:location_minimum] && bundle[:location_minimum].to_f || raise("Configuration :location_minimum for #{bundle.identifier} missing. Did you run rake index already?")
|
35
|
+
@calculation.minimum = minimum
|
36
|
+
end
|
37
|
+
|
35
38
|
end
|
36
|
-
|
39
|
+
|
37
40
|
end
|
38
|
-
|
41
|
+
|
39
42
|
end
|
40
|
-
|
41
43
|
end
|
42
44
|
end
|
@@ -1,43 +1,45 @@
|
|
1
|
-
module
|
2
|
-
module
|
1
|
+
module Internals
|
2
|
+
module Indexed
|
3
|
+
module Wrappers
|
3
4
|
|
4
|
-
|
5
|
-
#
|
6
|
-
module Bundle
|
7
|
-
|
8
|
-
# Base wrapper. Just delegates all methods to the bundle.
|
5
|
+
# Per Bundle wrappers.
|
9
6
|
#
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
module Bundle
|
8
|
+
|
9
|
+
# Base wrapper. Just delegates all methods to the bundle.
|
10
|
+
#
|
11
|
+
class Wrapper
|
12
|
+
|
13
|
+
attr_reader :bundle
|
14
|
+
|
15
|
+
def initialize bundle
|
16
|
+
@bundle = bundle
|
17
|
+
end
|
18
|
+
|
19
|
+
delegate :load,
|
20
|
+
:load_index,
|
21
|
+
:load_weights,
|
22
|
+
:load_similarity,
|
23
|
+
:load_configuration,
|
24
|
+
:clear_index,
|
25
|
+
:clear_weights,
|
26
|
+
:clear_similarity,
|
27
|
+
:clear_configuration,
|
28
|
+
:ids,
|
29
|
+
:weight,
|
30
|
+
:identifier,
|
31
|
+
:analyze,
|
32
|
+
:size,
|
33
|
+
:index,
|
34
|
+
:weights,
|
35
|
+
:similarity,
|
36
|
+
:configuration,
|
37
|
+
:to => :@bundle
|
13
38
|
|
14
|
-
def initialize bundle
|
15
|
-
@bundle = bundle
|
16
39
|
end
|
17
40
|
|
18
|
-
delegate :load,
|
19
|
-
:load_index,
|
20
|
-
:load_weights,
|
21
|
-
:load_similarity,
|
22
|
-
:load_configuration,
|
23
|
-
:clear_index,
|
24
|
-
:clear_weights,
|
25
|
-
:clear_similarity,
|
26
|
-
:clear_configuration,
|
27
|
-
:ids,
|
28
|
-
:weight,
|
29
|
-
:identifier,
|
30
|
-
:analyze,
|
31
|
-
:size,
|
32
|
-
:index,
|
33
|
-
:weights,
|
34
|
-
:similarity,
|
35
|
-
:configuration,
|
36
|
-
:to => :@bundle
|
37
|
-
|
38
41
|
end
|
39
42
|
|
40
43
|
end
|
41
|
-
|
42
44
|
end
|
43
45
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Internals
|
2
|
+
module Indexed
|
3
|
+
module Wrappers
|
4
|
+
module Category
|
5
|
+
|
6
|
+
module Location
|
7
|
+
|
8
|
+
def self.install_on category, grid, precision = 1
|
9
|
+
wrapped_exact = Internals::Indexed::Wrappers::Bundle::Location.new category.exact, grid: grid, precision: precision
|
10
|
+
|
11
|
+
category.class_eval do
|
12
|
+
define_method :exact do
|
13
|
+
wrapped_exact
|
14
|
+
end
|
15
|
+
define_method :partial do
|
16
|
+
wrapped_exact
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexers
|
4
|
+
|
5
|
+
#
|
6
|
+
#
|
7
|
+
class Base
|
8
|
+
|
9
|
+
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
10
|
+
#
|
11
|
+
# Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
|
12
|
+
#
|
13
|
+
def index
|
14
|
+
indexing_message
|
15
|
+
process
|
16
|
+
end
|
17
|
+
|
18
|
+
# Delegates the key format to the source.
|
19
|
+
#
|
20
|
+
# Default is to_i.
|
21
|
+
#
|
22
|
+
def key_format
|
23
|
+
source.respond_to?(:key_format) && source.key_format || :to_i
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexers
|
4
|
+
|
5
|
+
# Uses a number of categories, a source, and a tokenizer to index data.
|
6
|
+
#
|
7
|
+
# The tokenizer is taken from each category if specified, from the index, if not.
|
8
|
+
#
|
9
|
+
class Parallel < Base
|
10
|
+
|
11
|
+
delegate :categories, :source, :to => :@index
|
12
|
+
|
13
|
+
def initialize index
|
14
|
+
@index = index
|
15
|
+
end
|
16
|
+
|
17
|
+
def process
|
18
|
+
comma = ?,
|
19
|
+
newline = ?\n
|
20
|
+
|
21
|
+
# Prepare a combined object - array.
|
22
|
+
#
|
23
|
+
combined = categories.map { |category| [category, [], category.prepared_index_file, (category.tokenizer || tokenizer)] }
|
24
|
+
|
25
|
+
# Index.
|
26
|
+
#
|
27
|
+
i = 0
|
28
|
+
source.each do |object|
|
29
|
+
id = object.id
|
30
|
+
|
31
|
+
# This needs to be rewritten.
|
32
|
+
#
|
33
|
+
# Is it a good idea that not the tokenizer has control over when he gets the next text?
|
34
|
+
#
|
35
|
+
combined.each do |category, cache, _, tokenizer|
|
36
|
+
tokenizer.tokenize(object.send(category.from).to_s).each do |token_text|
|
37
|
+
next unless token_text
|
38
|
+
cache << id << comma << token_text << newline
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
if i >= 100_000
|
43
|
+
flush combined
|
44
|
+
i = 0
|
45
|
+
end
|
46
|
+
i += 1
|
47
|
+
end
|
48
|
+
flush combined
|
49
|
+
combined.each { |_, _, file, _| file.close }
|
50
|
+
end
|
51
|
+
def flush combined
|
52
|
+
combined.each do |_, cache, file, _|
|
53
|
+
file.write(cache.join) && cache.clear
|
54
|
+
end
|
55
|
+
end
|
56
|
+
#
|
57
|
+
#
|
58
|
+
def indexing_message
|
59
|
+
timed_exclaim %Q{"#{@index.name}": Starting parallel indexing.}
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
@@ -2,40 +2,28 @@
|
|
2
2
|
#
|
3
3
|
module Indexers
|
4
4
|
|
5
|
-
#
|
5
|
+
# Uses a category to index its data.
|
6
6
|
#
|
7
|
-
|
7
|
+
# Note: It is called serial since it indexes each
|
8
|
+
#
|
9
|
+
# FIXME Giving the serial a category would be enough, since it already contains an index!
|
10
|
+
#
|
11
|
+
class Serial < Base
|
8
12
|
|
9
|
-
|
13
|
+
attr_reader :category
|
10
14
|
|
11
|
-
|
12
|
-
@configuration = configuration
|
13
|
-
@source = source || raise_no_source
|
14
|
-
@tokenizer = tokenizer
|
15
|
-
end
|
15
|
+
delegate :source, :to => :category
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
def raise_no_source
|
20
|
-
raise NoSourceSpecifiedException.new("No source given for #{@configuration}.")
|
17
|
+
def initialize category
|
18
|
+
@category = category
|
21
19
|
end
|
22
20
|
|
23
|
-
#
|
24
|
-
#
|
25
|
-
# Default is to_i.
|
21
|
+
# The tokenizer used is a cached tokenizer from the category.
|
26
22
|
#
|
27
|
-
def
|
28
|
-
@
|
23
|
+
def tokenizer
|
24
|
+
@tokenizer ||= category.tokenizer
|
29
25
|
end
|
30
26
|
|
31
|
-
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
32
|
-
#
|
33
|
-
# Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
|
34
|
-
#
|
35
|
-
def index
|
36
|
-
indexing_message
|
37
|
-
process
|
38
|
-
end
|
39
27
|
def process
|
40
28
|
comma = ?,
|
41
29
|
newline = ?\n
|
@@ -52,10 +40,11 @@ module Indexers
|
|
52
40
|
# end
|
53
41
|
# end
|
54
42
|
#
|
55
|
-
|
43
|
+
local_tokenizer = tokenizer
|
44
|
+
category.prepared_index_file do |file|
|
56
45
|
result = []
|
57
|
-
source.harvest(
|
58
|
-
|
46
|
+
source.harvest(category) do |indexed_id, text|
|
47
|
+
local_tokenizer.tokenize(text).each do |token_text|
|
59
48
|
next unless token_text
|
60
49
|
result << indexed_id << comma << token_text << newline
|
61
50
|
end
|
@@ -64,8 +53,10 @@ module Indexers
|
|
64
53
|
file.write result.join
|
65
54
|
end
|
66
55
|
end
|
56
|
+
#
|
57
|
+
#
|
67
58
|
def indexing_message
|
68
|
-
timed_exclaim %Q{"#{@
|
59
|
+
timed_exclaim %Q{"#{@category.identifier}": Starting serial indexing.}
|
69
60
|
end
|
70
61
|
|
71
62
|
end
|
@@ -16,8 +16,8 @@ module Internals
|
|
16
16
|
|
17
17
|
# Path is in which directory the cache is located.
|
18
18
|
#
|
19
|
-
def initialize name,
|
20
|
-
super name,
|
19
|
+
def initialize name, category, similarity_strategy, partial_strategy, weights_strategy
|
20
|
+
super name, category, similarity_strategy
|
21
21
|
|
22
22
|
@partial_strategy = partial_strategy
|
23
23
|
@weights_strategy = weights_strategy
|
@@ -33,9 +33,9 @@ module Internals
|
|
33
33
|
delegate :clear, :to => :index
|
34
34
|
delegate :[], :[]=, :to => :configuration
|
35
35
|
|
36
|
-
def initialize name,
|
37
|
-
@identifier
|
38
|
-
@files
|
36
|
+
def initialize name, category, similarity_strategy
|
37
|
+
@identifier = "#{category.identifier}:#{name}"
|
38
|
+
@files = Internals::Index::Files.new name, category
|
39
39
|
|
40
40
|
@index = {}
|
41
41
|
@weights = {}
|