picky 2.1.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{lib → aux}/picky/cli.rb +50 -38
- data/bin/picky +1 -1
- data/lib/picky/application.rb +5 -2
- data/lib/picky/index/base.rb +88 -25
- data/lib/picky/index/memory.rb +8 -8
- data/lib/picky/index/redis.rb +8 -8
- data/lib/picky/index_bundle.rb +2 -2
- data/lib/picky/indexing/indexes.rb +6 -6
- data/lib/picky/internals/calculations/location.rb +54 -42
- data/lib/picky/internals/index/backend.rb +21 -21
- data/lib/picky/internals/index/file/text.rb +11 -11
- data/lib/picky/internals/index/files.rb +6 -6
- data/lib/picky/internals/index/redis.rb +14 -14
- data/lib/picky/internals/indexed/bundle/base.rb +2 -2
- data/lib/picky/internals/indexed/bundle/redis.rb +3 -3
- data/lib/picky/internals/indexed/category.rb +8 -9
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +25 -23
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +36 -34
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +35 -33
- data/lib/picky/internals/indexed/wrappers/category/location.rb +27 -0
- data/lib/picky/internals/indexers/base.rb +28 -0
- data/lib/picky/internals/indexers/parallel.rb +64 -0
- data/lib/picky/internals/indexers/serial.rb +20 -29
- data/lib/picky/internals/indexing/bundle/base.rb +2 -2
- data/lib/picky/internals/indexing/bundle/super_base.rb +3 -3
- data/lib/picky/internals/indexing/category.rb +30 -27
- data/lib/picky/internals/indexing/index.rb +82 -27
- data/lib/picky/internals/indexing/wrappers/category/location.rb +27 -0
- data/lib/picky/internals/query/indexes.rb +1 -1
- data/lib/picky/internals/query/qualifiers.rb +7 -6
- data/lib/picky/internals/query/weights.rb +6 -0
- data/lib/picky/internals/shared/category.rb +52 -0
- data/lib/picky/internals/tokenizers/base.rb +1 -1
- data/lib/picky/internals/tokenizers/location.rb +54 -0
- data/lib/picky/loader.rb +16 -3
- data/lib/picky/no_source_specified_exception.rb +3 -0
- data/lib/picky/search.rb +44 -5
- data/lib/picky/sources/base.rb +2 -2
- data/lib/picky/sources/couch.rb +1 -1
- data/lib/picky/sources/csv.rb +1 -1
- data/lib/picky/sources/db.rb +9 -9
- data/lib/picky/sources/delicious.rb +1 -1
- data/lib/picky/sources/wrappers/base.rb +12 -13
- data/lib/picky/sources/wrappers/location.rb +24 -54
- data/lib/tasks/search.rake +4 -5
- data/lib/tasks/todo.rake +1 -1
- data/spec/{lib → aux/picky}/cli_spec.rb +13 -8
- data/spec/lib/application_spec.rb +21 -16
- data/spec/lib/index/base_spec.rb +74 -27
- data/spec/lib/index/redis_spec.rb +1 -1
- data/spec/lib/index_bundle_spec.rb +1 -1
- data/spec/lib/indexing/indexes_spec.rb +5 -5
- data/spec/lib/internals/calculations/location_spec.rb +14 -3
- data/spec/lib/internals/index/files_spec.rb +2 -3
- data/spec/lib/internals/index/redis_spec.rb +122 -49
- data/spec/lib/internals/indexed/bundle/memory_spec.rb +4 -6
- data/spec/lib/internals/indexed/bundle/redis_spec.rb +2 -3
- data/spec/lib/internals/indexed/wrappers/bundle/calculation_spec.rb +3 -3
- data/spec/lib/internals/indexed/wrappers/bundle/wrapper_spec.rb +3 -3
- data/spec/lib/internals/indexers/parallel_spec.rb +36 -0
- data/spec/lib/internals/indexers/serial_spec.rb +6 -14
- data/spec/lib/internals/indexing/bundle/memory_partial_generation_speed_spec.rb +2 -3
- data/spec/lib/internals/indexing/bundle/memory_spec.rb +5 -6
- data/spec/lib/internals/indexing/bundle/redis_spec.rb +5 -6
- data/spec/lib/internals/indexing/category_spec.rb +21 -6
- data/spec/lib/internals/indexing/index_spec.rb +43 -7
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/search_spec.rb +51 -2
- data/spec/lib/sources/couch_spec.rb +6 -6
- data/spec/lib/sources/csv_spec.rb +4 -4
- data/spec/lib/sources/db_spec.rb +13 -14
- data/spec/lib/sources/delicious_spec.rb +3 -3
- data/spec/lib/sources/wrappers/base_spec.rb +9 -10
- data/spec/lib/sources/wrappers/location_spec.rb +11 -23
- metadata +14 -15
- data/lib/picky/auxiliary/terminal.rb +0 -219
- data/lib/picky/internals/configuration/index.rb +0 -67
- data/lib/picky/internals/indexers/no_source_specified_error.rb +0 -7
- data/lib/picky/internals/indexing/categories.rb +0 -46
- data/spec/lib/auxiliary/terminal_spec.rb +0 -150
- data/spec/lib/internals/configuration/index_spec.rb +0 -80
- data/spec/lib/internals/indexing/categories_spec.rb +0 -49
@@ -1,13 +1,13 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
3
|
module Index
|
4
|
-
|
4
|
+
|
5
5
|
module File
|
6
|
-
|
6
|
+
|
7
7
|
# Index data dumped in the text format.
|
8
8
|
#
|
9
9
|
class Text < Basic
|
10
|
-
|
10
|
+
|
11
11
|
# Uses the extension "txt".
|
12
12
|
#
|
13
13
|
def extension
|
@@ -20,12 +20,12 @@ module Internals
|
|
20
20
|
raise "Can't load from text file. Use JSON or Marshal."
|
21
21
|
end
|
22
22
|
# Text files are used exclusively for
|
23
|
-
# prepared data files.
|
23
|
+
# prepared data files.
|
24
24
|
#
|
25
25
|
def dump hash
|
26
26
|
raise "Can't dump to text file. Use JSON or Marshal."
|
27
27
|
end
|
28
|
-
|
28
|
+
|
29
29
|
# Retrieves prepared index data in the form
|
30
30
|
# * id,data\n
|
31
31
|
# * id,data\n
|
@@ -43,18 +43,18 @@ module Internals
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
end
|
46
|
-
|
46
|
+
|
47
47
|
#
|
48
48
|
#
|
49
49
|
def open_for_indexing &block
|
50
50
|
::File.open cache_path, 'w:binary', &block
|
51
51
|
end
|
52
|
-
|
53
|
-
|
52
|
+
|
53
|
+
|
54
54
|
end
|
55
|
-
|
55
|
+
|
56
56
|
end
|
57
|
-
|
57
|
+
|
58
58
|
end
|
59
|
-
|
59
|
+
|
60
60
|
end
|
@@ -4,17 +4,17 @@ module Internals
|
|
4
4
|
|
5
5
|
class Files < Backend
|
6
6
|
|
7
|
-
def initialize bundle_name,
|
8
|
-
super bundle_name,
|
7
|
+
def initialize bundle_name, category
|
8
|
+
super bundle_name, category
|
9
9
|
|
10
10
|
# Note: We marshal the similarity, as the
|
11
11
|
# Yajl json lib cannot load symbolized
|
12
12
|
# values, just keys.
|
13
13
|
#
|
14
|
-
@index = File::JSON.new
|
15
|
-
@weights = File::JSON.new
|
16
|
-
@similarity = File::Marshal.new
|
17
|
-
@configuration = File::JSON.new
|
14
|
+
@index = File::JSON.new category.index_path(bundle_name, :index)
|
15
|
+
@weights = File::JSON.new category.index_path(bundle_name, :weights)
|
16
|
+
@similarity = File::Marshal.new category.index_path(bundle_name, :similarity)
|
17
|
+
@configuration = File::JSON.new category.index_path(bundle_name, :configuration)
|
18
18
|
end
|
19
19
|
|
20
20
|
def to_s
|
@@ -1,28 +1,28 @@
|
|
1
1
|
module Internals
|
2
2
|
|
3
3
|
module Index
|
4
|
-
|
4
|
+
|
5
5
|
# TODO Needs a reconnect to be run after forking.
|
6
6
|
#
|
7
7
|
class Redis < Backend
|
8
|
-
|
9
|
-
def initialize bundle_name,
|
10
|
-
super bundle_name,
|
8
|
+
|
9
|
+
def initialize bundle_name, category
|
10
|
+
super bundle_name, category
|
11
11
|
|
12
12
|
# Refine a few Redis "types".
|
13
13
|
#
|
14
|
-
@index = Redis::ListHash.new "#{
|
15
|
-
@weights = Redis::StringHash.new "#{
|
16
|
-
@similarity = Redis::ListHash.new "#{
|
17
|
-
@configuration = Redis::StringHash.new "#{
|
14
|
+
@index = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:index"
|
15
|
+
@weights = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:weights"
|
16
|
+
@similarity = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:similarity"
|
17
|
+
@configuration = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:configuration"
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
# Delegate to the right collection.
|
21
21
|
#
|
22
22
|
def ids sym
|
23
23
|
index.collection sym
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
# Delegate to the right member value.
|
27
27
|
#
|
28
28
|
# Note: Converts to float.
|
@@ -30,15 +30,15 @@ module Internals
|
|
30
30
|
def weight sym
|
31
31
|
weights.member(sym).to_f
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
# Delegate to a member value.
|
35
35
|
#
|
36
36
|
def setting sym
|
37
37
|
configuration.member sym
|
38
38
|
end
|
39
|
-
|
39
|
+
|
40
40
|
end
|
41
|
-
|
41
|
+
|
42
42
|
end
|
43
|
-
|
43
|
+
|
44
44
|
end
|
@@ -35,8 +35,8 @@ module Internals
|
|
35
35
|
delegate :[], :to => :configuration
|
36
36
|
delegate :size, :to => :index
|
37
37
|
|
38
|
-
def initialize name,
|
39
|
-
@identifier = "#{
|
38
|
+
def initialize name, category, similarity_strategy
|
39
|
+
@identifier = "#{category.identifier}:#{name}"
|
40
40
|
|
41
41
|
@index = {}
|
42
42
|
@weights = {}
|
@@ -14,10 +14,10 @@ module Internals
|
|
14
14
|
#
|
15
15
|
class Redis < Base
|
16
16
|
|
17
|
-
def initialize name,
|
18
|
-
super name,
|
17
|
+
def initialize name, category, *args
|
18
|
+
super name, category, *args
|
19
19
|
|
20
|
-
@backend = Internals::Index::Redis.new name,
|
20
|
+
@backend = Internals::Index::Redis.new name, category
|
21
21
|
end
|
22
22
|
|
23
23
|
# Get the ids for the given symbol.
|
@@ -9,18 +9,17 @@ module Internals
|
|
9
9
|
#
|
10
10
|
class Category
|
11
11
|
|
12
|
+
include Internals::Shared::Category
|
13
|
+
|
12
14
|
attr_accessor :exact
|
13
|
-
attr_reader :
|
15
|
+
attr_reader :name, :index
|
14
16
|
attr_writer :partial
|
15
17
|
|
16
18
|
#
|
17
19
|
#
|
18
20
|
def initialize name, index, options = {}
|
19
|
-
@name
|
20
|
-
|
21
|
-
configuration = Configuration::Index.new index, self
|
22
|
-
|
23
|
-
@identifier = configuration.identifier
|
21
|
+
@name = name
|
22
|
+
@index = index
|
24
23
|
|
25
24
|
# TODO Push the defaults out into the index.
|
26
25
|
#
|
@@ -28,15 +27,15 @@ module Internals
|
|
28
27
|
similarity = options[:similarity] || Internals::Generators::Similarity::Default
|
29
28
|
|
30
29
|
bundle_class = options[:indexed_bundle_class] || Bundle::Memory
|
31
|
-
@exact = bundle_class.new :exact,
|
32
|
-
@partial = bundle_class.new :partial,
|
30
|
+
@exact = bundle_class.new :exact, self, similarity
|
31
|
+
@partial = bundle_class.new :partial, self, similarity
|
33
32
|
|
34
33
|
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
35
34
|
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
36
35
|
|
37
36
|
# TODO Extract?
|
38
37
|
#
|
39
|
-
Query::Qualifiers.add(
|
38
|
+
Query::Qualifiers.add(name, generate_qualifiers_from(options) || [name])
|
40
39
|
end
|
41
40
|
|
42
41
|
def to_s
|
@@ -1,35 +1,37 @@
|
|
1
|
-
module
|
2
|
-
module
|
1
|
+
module Internals
|
2
|
+
module Indexed
|
3
|
+
module Wrappers
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
# A calculation rewrites the symbol into a float.
|
7
|
-
#
|
8
|
-
# TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
|
9
|
-
#
|
10
|
-
class Calculation < Wrapper
|
5
|
+
module Bundle
|
11
6
|
|
7
|
+
# A calculation rewrites the symbol into a float.
|
12
8
|
#
|
9
|
+
# TODO I really need to allow integers as keys. The code below is just not up to the needed quality.
|
13
10
|
#
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
class Calculation < Wrapper
|
12
|
+
|
13
|
+
#
|
14
|
+
#
|
15
|
+
def recalculate float
|
16
|
+
float
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
#
|
21
|
+
def ids sym
|
22
|
+
@bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
#
|
27
|
+
def weight sym
|
28
|
+
@bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
|
29
|
+
end
|
17
30
|
|
18
|
-
#
|
19
|
-
#
|
20
|
-
def ids sym
|
21
|
-
@bundle.ids recalculate(sym.to_s.to_f).to_s.to_sym
|
22
|
-
end
|
23
|
-
|
24
|
-
#
|
25
|
-
#
|
26
|
-
def weight sym
|
27
|
-
@bundle.weight recalculate(sym.to_s.to_f).to_s.to_sym
|
28
31
|
end
|
29
32
|
|
30
33
|
end
|
31
34
|
|
32
35
|
end
|
33
|
-
|
34
36
|
end
|
35
37
|
end
|
@@ -1,42 +1,44 @@
|
|
1
|
-
module
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
class Location < Calculation
|
9
|
-
|
10
|
-
def initialize bundle, options = {}
|
11
|
-
super bundle
|
12
|
-
|
13
|
-
precision = options[:precision] || 1
|
14
|
-
user_grid = options[:grid] || raise("Gridsize needs to be given for location #{bundle.identifier}.")
|
15
|
-
|
16
|
-
@calculation = Calculations::Location.new user_grid, precision
|
17
|
-
end
|
18
|
-
|
19
|
-
#
|
20
|
-
#
|
21
|
-
def recalculate float
|
22
|
-
@calculation.recalculate float
|
23
|
-
end
|
24
|
-
|
1
|
+
module Internals
|
2
|
+
module Indexed
|
3
|
+
module Wrappers
|
4
|
+
|
5
|
+
module Bundle
|
6
|
+
|
7
|
+
# A location calculation recalculates a location to the Picky internal location.
|
25
8
|
#
|
26
|
-
|
27
|
-
|
28
|
-
|
9
|
+
class Location < Calculation
|
10
|
+
|
11
|
+
def initialize bundle, options = {}
|
12
|
+
super bundle
|
13
|
+
|
14
|
+
precision = options[:precision] || 1
|
15
|
+
user_grid = options[:grid] || raise("Gridsize needs to be given for location #{bundle.identifier}.")
|
16
|
+
|
17
|
+
@calculation = Internals::Calculations::Location.new user_grid, precision
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
#
|
22
|
+
def recalculate float
|
23
|
+
@calculation.recalculate float
|
24
|
+
end
|
25
|
+
|
29
26
|
#
|
30
|
-
bundle.load
|
31
|
-
# TODO Move the to_f to the backend.
|
32
27
|
#
|
33
|
-
|
34
|
-
|
28
|
+
def load
|
29
|
+
# Load first the bundle, then extract the config.
|
30
|
+
#
|
31
|
+
bundle.load
|
32
|
+
# TODO Move the to_f to the backend.
|
33
|
+
#
|
34
|
+
minimum = bundle[:location_minimum] && bundle[:location_minimum].to_f || raise("Configuration :location_minimum for #{bundle.identifier} missing. Did you run rake index already?")
|
35
|
+
@calculation.minimum = minimum
|
36
|
+
end
|
37
|
+
|
35
38
|
end
|
36
|
-
|
39
|
+
|
37
40
|
end
|
38
|
-
|
41
|
+
|
39
42
|
end
|
40
|
-
|
41
43
|
end
|
42
44
|
end
|
@@ -1,43 +1,45 @@
|
|
1
|
-
module
|
2
|
-
module
|
1
|
+
module Internals
|
2
|
+
module Indexed
|
3
|
+
module Wrappers
|
3
4
|
|
4
|
-
|
5
|
-
#
|
6
|
-
module Bundle
|
7
|
-
|
8
|
-
# Base wrapper. Just delegates all methods to the bundle.
|
5
|
+
# Per Bundle wrappers.
|
9
6
|
#
|
10
|
-
|
11
|
-
|
12
|
-
|
7
|
+
module Bundle
|
8
|
+
|
9
|
+
# Base wrapper. Just delegates all methods to the bundle.
|
10
|
+
#
|
11
|
+
class Wrapper
|
12
|
+
|
13
|
+
attr_reader :bundle
|
14
|
+
|
15
|
+
def initialize bundle
|
16
|
+
@bundle = bundle
|
17
|
+
end
|
18
|
+
|
19
|
+
delegate :load,
|
20
|
+
:load_index,
|
21
|
+
:load_weights,
|
22
|
+
:load_similarity,
|
23
|
+
:load_configuration,
|
24
|
+
:clear_index,
|
25
|
+
:clear_weights,
|
26
|
+
:clear_similarity,
|
27
|
+
:clear_configuration,
|
28
|
+
:ids,
|
29
|
+
:weight,
|
30
|
+
:identifier,
|
31
|
+
:analyze,
|
32
|
+
:size,
|
33
|
+
:index,
|
34
|
+
:weights,
|
35
|
+
:similarity,
|
36
|
+
:configuration,
|
37
|
+
:to => :@bundle
|
13
38
|
|
14
|
-
def initialize bundle
|
15
|
-
@bundle = bundle
|
16
39
|
end
|
17
40
|
|
18
|
-
delegate :load,
|
19
|
-
:load_index,
|
20
|
-
:load_weights,
|
21
|
-
:load_similarity,
|
22
|
-
:load_configuration,
|
23
|
-
:clear_index,
|
24
|
-
:clear_weights,
|
25
|
-
:clear_similarity,
|
26
|
-
:clear_configuration,
|
27
|
-
:ids,
|
28
|
-
:weight,
|
29
|
-
:identifier,
|
30
|
-
:analyze,
|
31
|
-
:size,
|
32
|
-
:index,
|
33
|
-
:weights,
|
34
|
-
:similarity,
|
35
|
-
:configuration,
|
36
|
-
:to => :@bundle
|
37
|
-
|
38
41
|
end
|
39
42
|
|
40
43
|
end
|
41
|
-
|
42
44
|
end
|
43
45
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Internals
|
2
|
+
module Indexed
|
3
|
+
module Wrappers
|
4
|
+
module Category
|
5
|
+
|
6
|
+
module Location
|
7
|
+
|
8
|
+
def self.install_on category, grid, precision = 1
|
9
|
+
wrapped_exact = Internals::Indexed::Wrappers::Bundle::Location.new category.exact, grid: grid, precision: precision
|
10
|
+
|
11
|
+
category.class_eval do
|
12
|
+
define_method :exact do
|
13
|
+
wrapped_exact
|
14
|
+
end
|
15
|
+
define_method :partial do
|
16
|
+
wrapped_exact
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexers
|
4
|
+
|
5
|
+
#
|
6
|
+
#
|
7
|
+
class Base
|
8
|
+
|
9
|
+
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
10
|
+
#
|
11
|
+
# Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
|
12
|
+
#
|
13
|
+
def index
|
14
|
+
indexing_message
|
15
|
+
process
|
16
|
+
end
|
17
|
+
|
18
|
+
# Delegates the key format to the source.
|
19
|
+
#
|
20
|
+
# Default is to_i.
|
21
|
+
#
|
22
|
+
def key_format
|
23
|
+
source.respond_to?(:key_format) && source.key_format || :to_i
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexers
|
4
|
+
|
5
|
+
# Uses a number of categories, a source, and a tokenizer to index data.
|
6
|
+
#
|
7
|
+
# The tokenizer is taken from each category if specified, from the index, if not.
|
8
|
+
#
|
9
|
+
class Parallel < Base
|
10
|
+
|
11
|
+
delegate :categories, :source, :to => :@index
|
12
|
+
|
13
|
+
def initialize index
|
14
|
+
@index = index
|
15
|
+
end
|
16
|
+
|
17
|
+
def process
|
18
|
+
comma = ?,
|
19
|
+
newline = ?\n
|
20
|
+
|
21
|
+
# Prepare a combined object - array.
|
22
|
+
#
|
23
|
+
combined = categories.map { |category| [category, [], category.prepared_index_file, (category.tokenizer || tokenizer)] }
|
24
|
+
|
25
|
+
# Index.
|
26
|
+
#
|
27
|
+
i = 0
|
28
|
+
source.each do |object|
|
29
|
+
id = object.id
|
30
|
+
|
31
|
+
# This needs to be rewritten.
|
32
|
+
#
|
33
|
+
# Is it a good idea that not the tokenizer has control over when he gets the next text?
|
34
|
+
#
|
35
|
+
combined.each do |category, cache, _, tokenizer|
|
36
|
+
tokenizer.tokenize(object.send(category.from).to_s).each do |token_text|
|
37
|
+
next unless token_text
|
38
|
+
cache << id << comma << token_text << newline
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
if i >= 100_000
|
43
|
+
flush combined
|
44
|
+
i = 0
|
45
|
+
end
|
46
|
+
i += 1
|
47
|
+
end
|
48
|
+
flush combined
|
49
|
+
combined.each { |_, _, file, _| file.close }
|
50
|
+
end
|
51
|
+
def flush combined
|
52
|
+
combined.each do |_, cache, file, _|
|
53
|
+
file.write(cache.join) && cache.clear
|
54
|
+
end
|
55
|
+
end
|
56
|
+
#
|
57
|
+
#
|
58
|
+
def indexing_message
|
59
|
+
timed_exclaim %Q{"#{@index.name}": Starting parallel indexing.}
|
60
|
+
end
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
@@ -2,40 +2,28 @@
|
|
2
2
|
#
|
3
3
|
module Indexers
|
4
4
|
|
5
|
-
#
|
5
|
+
# Uses a category to index its data.
|
6
6
|
#
|
7
|
-
|
7
|
+
# Note: It is called serial since it indexes each
|
8
|
+
#
|
9
|
+
# FIXME Giving the serial a category would be enough, since it already contains an index!
|
10
|
+
#
|
11
|
+
class Serial < Base
|
8
12
|
|
9
|
-
|
13
|
+
attr_reader :category
|
10
14
|
|
11
|
-
|
12
|
-
@configuration = configuration
|
13
|
-
@source = source || raise_no_source
|
14
|
-
@tokenizer = tokenizer
|
15
|
-
end
|
15
|
+
delegate :source, :to => :category
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
def raise_no_source
|
20
|
-
raise NoSourceSpecifiedException.new("No source given for #{@configuration}.")
|
17
|
+
def initialize category
|
18
|
+
@category = category
|
21
19
|
end
|
22
20
|
|
23
|
-
#
|
24
|
-
#
|
25
|
-
# Default is to_i.
|
21
|
+
# The tokenizer used is a cached tokenizer from the category.
|
26
22
|
#
|
27
|
-
def
|
28
|
-
@
|
23
|
+
def tokenizer
|
24
|
+
@tokenizer ||= category.tokenizer
|
29
25
|
end
|
30
26
|
|
31
|
-
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
32
|
-
#
|
33
|
-
# Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
|
34
|
-
#
|
35
|
-
def index
|
36
|
-
indexing_message
|
37
|
-
process
|
38
|
-
end
|
39
27
|
def process
|
40
28
|
comma = ?,
|
41
29
|
newline = ?\n
|
@@ -52,10 +40,11 @@ module Indexers
|
|
52
40
|
# end
|
53
41
|
# end
|
54
42
|
#
|
55
|
-
|
43
|
+
local_tokenizer = tokenizer
|
44
|
+
category.prepared_index_file do |file|
|
56
45
|
result = []
|
57
|
-
source.harvest(
|
58
|
-
|
46
|
+
source.harvest(category) do |indexed_id, text|
|
47
|
+
local_tokenizer.tokenize(text).each do |token_text|
|
59
48
|
next unless token_text
|
60
49
|
result << indexed_id << comma << token_text << newline
|
61
50
|
end
|
@@ -64,8 +53,10 @@ module Indexers
|
|
64
53
|
file.write result.join
|
65
54
|
end
|
66
55
|
end
|
56
|
+
#
|
57
|
+
#
|
67
58
|
def indexing_message
|
68
|
-
timed_exclaim %Q{"#{@
|
59
|
+
timed_exclaim %Q{"#{@category.identifier}": Starting serial indexing.}
|
69
60
|
end
|
70
61
|
|
71
62
|
end
|
@@ -16,8 +16,8 @@ module Internals
|
|
16
16
|
|
17
17
|
# Path is in which directory the cache is located.
|
18
18
|
#
|
19
|
-
def initialize name,
|
20
|
-
super name,
|
19
|
+
def initialize name, category, similarity_strategy, partial_strategy, weights_strategy
|
20
|
+
super name, category, similarity_strategy
|
21
21
|
|
22
22
|
@partial_strategy = partial_strategy
|
23
23
|
@weights_strategy = weights_strategy
|
@@ -33,9 +33,9 @@ module Internals
|
|
33
33
|
delegate :clear, :to => :index
|
34
34
|
delegate :[], :[]=, :to => :configuration
|
35
35
|
|
36
|
-
def initialize name,
|
37
|
-
@identifier
|
38
|
-
@files
|
36
|
+
def initialize name, category, similarity_strategy
|
37
|
+
@identifier = "#{category.identifier}:#{name}"
|
38
|
+
@files = Internals::Index::Files.new name, category
|
39
39
|
|
40
40
|
@index = {}
|
41
41
|
@weights = {}
|