picky 0.11.2 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/Index_api.rb +49 -0
- data/lib/picky/alias_instances.rb +4 -1
- data/lib/picky/application.rb +16 -15
- data/lib/picky/cacher/partial/{subtoken.rb → substring.rb} +19 -18
- data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb} +2 -2
- data/lib/picky/configuration/index.rb +67 -0
- data/lib/picky/cores.rb +3 -0
- data/lib/picky/index/bundle.rb +35 -51
- data/lib/picky/index/file/basic.rb +39 -5
- data/lib/picky/index/file/json.rb +10 -0
- data/lib/picky/index/file/marshal.rb +10 -0
- data/lib/picky/index/file/text.rb +22 -0
- data/lib/picky/index/files.rb +11 -36
- data/lib/picky/indexed/bundle.rb +61 -0
- data/lib/picky/{index → indexed}/categories.rb +1 -1
- data/lib/picky/{index → indexed}/category.rb +13 -16
- data/lib/picky/{index/type.rb → indexed/index.rb} +6 -6
- data/lib/picky/{index/types.rb → indexed/indexes.rb} +10 -10
- data/lib/picky/{index → indexed}/wrappers/exact_first.rb +8 -8
- data/lib/picky/indexers/no_source_specified_error.rb +1 -1
- data/lib/picky/indexers/serial.rb +64 -0
- data/lib/picky/indexers/solr.rb +1 -3
- data/lib/picky/indexes_api.rb +41 -0
- data/lib/picky/indexing/bundle.rb +43 -13
- data/lib/picky/indexing/category.rb +17 -64
- data/lib/picky/indexing/{type.rb → index.rb} +13 -3
- data/lib/picky/indexing/{types.rb → indexes.rb} +22 -22
- data/lib/picky/loader.rb +17 -22
- data/lib/picky/query/base.rb +1 -1
- data/lib/picky/rack/harakiri.rb +9 -2
- data/lib/picky/signals.rb +1 -1
- data/lib/picky/sources/base.rb +14 -14
- data/lib/picky/sources/couch.rb +8 -7
- data/lib/picky/sources/csv.rb +10 -10
- data/lib/picky/sources/db.rb +8 -8
- data/lib/picky/sources/delicious.rb +2 -2
- data/lib/picky/sources/wrappers/location.rb +3 -3
- data/lib/picky/tokenizers/base.rb +1 -11
- data/lib/picky/tokenizers/index.rb +0 -1
- data/lib/picky/tokenizers/query.rb +0 -1
- data/lib/tasks/index.rake +4 -4
- data/lib/tasks/shortcuts.rake +4 -4
- data/lib/tasks/try.rake +8 -8
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/application.rb +13 -12
- data/spec/lib/application_spec.rb +10 -38
- data/spec/lib/cacher/partial/{subtoken_spec.rb → substring_spec.rb} +0 -0
- data/spec/lib/{character_substitution/european_spec.rb → character_substituters/west_european_spec.rb} +6 -2
- data/spec/lib/configuration/index_spec.rb +80 -0
- data/spec/lib/cores_spec.rb +1 -1
- data/spec/lib/index/file/text_spec.rb +1 -1
- data/spec/lib/index/files_spec.rb +12 -32
- data/spec/lib/indexed/bundle_spec.rb +119 -0
- data/spec/lib/{indexing → indexed}/categories_spec.rb +13 -14
- data/spec/lib/{index → indexed}/category_spec.rb +6 -6
- data/spec/lib/{index/type_spec.rb → indexed/index_spec.rb} +3 -3
- data/spec/lib/{index → indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/serial_spec.rb +62 -0
- data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +7 -5
- data/spec/lib/indexing/bundle_spec.rb +9 -14
- data/spec/lib/indexing/category_spec.rb +9 -125
- data/spec/lib/indexing/{type_spec.rb → index_spec.rb} +3 -3
- data/spec/lib/query/base_spec.rb +1 -1
- data/spec/lib/query/full_spec.rb +1 -1
- data/spec/lib/query/live_spec.rb +2 -4
- data/spec/lib/sources/couch_spec.rb +5 -5
- data/spec/lib/sources/db_spec.rb +6 -7
- data/spec/lib/tokenizers/base_spec.rb +1 -24
- data/spec/lib/tokenizers/query_spec.rb +0 -1
- metadata +38 -41
- data/lib/picky/bundle.rb +0 -33
- data/lib/picky/configuration/indexes.rb +0 -51
- data/lib/picky/configuration/queries.rb +0 -15
- data/lib/picky/indexers/base.rb +0 -85
- data/lib/picky/indexers/default.rb +0 -3
- data/lib/picky/type.rb +0 -46
- data/lib/picky/types.rb +0 -41
- data/lib/tasks/cache.rake +0 -46
- data/spec/lib/configuration/indexes_spec.rb +0 -28
- data/spec/lib/index/bundle_spec.rb +0 -151
- data/spec/lib/indexers/base_spec.rb +0 -89
@@ -0,0 +1,49 @@
|
|
1
|
+
# This class defines the indexing and index API.
|
2
|
+
#
|
3
|
+
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
|
4
|
+
#
|
5
|
+
class IndexAPI
|
6
|
+
|
7
|
+
# TODO Delegation.
|
8
|
+
#
|
9
|
+
|
10
|
+
attr_reader :name, :indexing, :indexed
|
11
|
+
|
12
|
+
def initialize name, source, options = {}
|
13
|
+
@name = name
|
14
|
+
@indexing = Indexing::Index.new name, source, options
|
15
|
+
@indexed = Indexed::Index.new name, options
|
16
|
+
|
17
|
+
# Centralized registry.
|
18
|
+
#
|
19
|
+
Indexes.register self
|
20
|
+
end
|
21
|
+
|
22
|
+
# API.
|
23
|
+
#
|
24
|
+
# TODO Spec! Doc!
|
25
|
+
#
|
26
|
+
def define_category category_name, options = {}
|
27
|
+
category_name = category_name.to_sym
|
28
|
+
|
29
|
+
indexing_category = indexing.add_category category_name, options
|
30
|
+
indexed_category = indexed.add_category category_name, options
|
31
|
+
|
32
|
+
yield indexing_category, indexed_category if block_given?
|
33
|
+
|
34
|
+
self
|
35
|
+
end
|
36
|
+
alias category define_category
|
37
|
+
|
38
|
+
def define_location name, options = {}
|
39
|
+
grid = options[:grid]
|
40
|
+
precision = options[:precision]
|
41
|
+
|
42
|
+
define_category name, options do |indexing, _|
|
43
|
+
indexing.source = Sources::Wrappers::Location.new indexing.source, grid: grid, precision: precision
|
44
|
+
indexing.tokenizer = Tokenizers::Index.new
|
45
|
+
end
|
46
|
+
end
|
47
|
+
alias location define_location
|
48
|
+
|
49
|
+
end
|
data/lib/picky/application.rb
CHANGED
@@ -4,24 +4,37 @@ class Application
|
|
4
4
|
|
5
5
|
class << self
|
6
6
|
|
7
|
+
# API
|
8
|
+
#
|
9
|
+
|
7
10
|
# Returns a configured tokenizer that
|
8
11
|
# is used for indexing by default.
|
9
12
|
#
|
10
13
|
def default_indexing options = {}
|
11
|
-
|
14
|
+
Tokenizers::Index.default = Tokenizers::Index.new(options)
|
12
15
|
end
|
13
16
|
|
14
17
|
# Returns a configured tokenizer that
|
15
18
|
# is used for querying by default.
|
16
19
|
#
|
17
20
|
def default_querying options = {}
|
18
|
-
|
21
|
+
Tokenizers::Query.default = Tokenizers::Query.new(options)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Returns a new index frontend for configuring the index.
|
25
|
+
#
|
26
|
+
def index *args
|
27
|
+
IndexAPI.new *args
|
19
28
|
end
|
20
29
|
|
21
30
|
# Routes.
|
22
31
|
#
|
23
32
|
delegate :route, :root, :to => :routing
|
24
33
|
|
34
|
+
#
|
35
|
+
# API
|
36
|
+
|
37
|
+
|
25
38
|
# An application simply delegates to the routing to handle a request.
|
26
39
|
#
|
27
40
|
def call env
|
@@ -30,18 +43,6 @@ class Application
|
|
30
43
|
def routing
|
31
44
|
@routing ||= Routing.new
|
32
45
|
end
|
33
|
-
def indexing
|
34
|
-
@indexing ||= Configuration::Indexes.new
|
35
|
-
end
|
36
|
-
def querying
|
37
|
-
@queries ||= Configuration::Queries.new
|
38
|
-
end
|
39
|
-
|
40
|
-
# "API".
|
41
|
-
#
|
42
|
-
def index *args
|
43
|
-
::Type.new *args
|
44
|
-
end
|
45
46
|
|
46
47
|
# Finalize the subclass as soon as it
|
47
48
|
# has finished loading.
|
@@ -64,7 +65,7 @@ class Application
|
|
64
65
|
routing.freeze
|
65
66
|
end
|
66
67
|
|
67
|
-
# TODO Add more info.
|
68
|
+
# TODO Add more info if possible.
|
68
69
|
#
|
69
70
|
def to_s
|
70
71
|
"#{self.name}:\n#{routing}"
|
@@ -2,7 +2,7 @@ module Cacher
|
|
2
2
|
|
3
3
|
module Partial
|
4
4
|
|
5
|
-
# Generates the right
|
5
|
+
# Generates the right substrings for use in the substring strategy.
|
6
6
|
#
|
7
7
|
class SubstringGenerator
|
8
8
|
|
@@ -27,38 +27,39 @@ module Cacher
|
|
27
27
|
|
28
28
|
# The subtoken partial strategy.
|
29
29
|
#
|
30
|
-
# If given
|
31
|
-
# "
|
32
|
-
#
|
33
|
-
# "floria"
|
34
|
-
# "flori"
|
35
|
-
# "flor"
|
36
|
-
# "flo"
|
37
|
-
# "fl"
|
38
|
-
# "f"
|
39
|
-
# Depending on what the given from value is. (Example with from == 1)
|
30
|
+
# If given "florian"
|
31
|
+
# it will index "floria", "flori", "flor", "flo", "fl", "f"
|
32
|
+
# (Depending on what the given from value is, the example is with option from: 1)
|
40
33
|
#
|
41
34
|
class Substring < Strategy
|
42
35
|
|
43
|
-
#
|
36
|
+
# The from option signifies where in the symbol it
|
37
|
+
# will start in generating the subtokens.
|
44
38
|
#
|
45
39
|
# Examples:
|
46
|
-
# With :hello, and to -1
|
47
|
-
# * down to == 1: [:hello, :hell, :hel, :he, :h]
|
48
|
-
# * down to == 4: [:hello, :hell]
|
49
40
|
#
|
50
|
-
# With :hello, and to -
|
51
|
-
# *
|
52
|
-
# *
|
41
|
+
# With :hello, and to: -1 (default)
|
42
|
+
# * from: 1 # => [:hello, :hell, :hel, :he, :h]
|
43
|
+
# * from: 4 # => [:hello, :hell]
|
44
|
+
#
|
45
|
+
# With :hello, and to: -2
|
46
|
+
# * from: 1 # => [:hell, :hel, :he, :h]
|
47
|
+
# * from: 4 # => [:hell]
|
53
48
|
#
|
54
49
|
def initialize options = {}
|
55
50
|
from = options[:from] || 1
|
56
51
|
to = options[:to] || -1
|
57
52
|
@generator = SubstringGenerator.new from, to
|
58
53
|
end
|
54
|
+
|
55
|
+
# Delegator to generator#from.
|
56
|
+
#
|
59
57
|
def from
|
60
58
|
@generator.from
|
61
59
|
end
|
60
|
+
|
61
|
+
# Delegator to generator#to.
|
62
|
+
#
|
62
63
|
def to
|
63
64
|
@generator.to
|
64
65
|
end
|
data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb}
RENAMED
@@ -1,11 +1,11 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
|
-
module
|
3
|
+
module CharacterSubstituters
|
4
4
|
# Substitutes Umlauts like
|
5
5
|
# ä, ö, ü => ae, oe, ue.
|
6
6
|
# (and more, see specs)
|
7
7
|
#
|
8
|
-
class
|
8
|
+
class WestEuropean
|
9
9
|
|
10
10
|
def initialize
|
11
11
|
@chars = ActiveSupport::Multibyte.proxy_class
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Configuration
|
2
|
+
|
3
|
+
# Holds the configuration for a
|
4
|
+
# index/category combination.
|
5
|
+
#
|
6
|
+
# TODO Rename paths?
|
7
|
+
#
|
8
|
+
class Index
|
9
|
+
|
10
|
+
attr_reader :index, :category
|
11
|
+
|
12
|
+
def initialize index, category
|
13
|
+
@index = index
|
14
|
+
@category = category
|
15
|
+
end
|
16
|
+
|
17
|
+
def index_name
|
18
|
+
@index_name ||= index.name
|
19
|
+
end
|
20
|
+
def category_name
|
21
|
+
@category_name ||= category.name
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
#
|
26
|
+
def index_path bundle_name, name
|
27
|
+
"#{index_directory}/#{category_name}_#{bundle_name}_#{name}"
|
28
|
+
end
|
29
|
+
|
30
|
+
# Was: search_index_file_name
|
31
|
+
#
|
32
|
+
def prepared_index_path
|
33
|
+
@prepared_index_path ||= "#{index_directory}/prepared_#{category_name}_index"
|
34
|
+
end
|
35
|
+
def prepared_index_file &block
|
36
|
+
@prepared_index_file ||= ::Index::File::Text.new prepared_index_path
|
37
|
+
@prepared_index_file.open_for_indexing &block
|
38
|
+
end
|
39
|
+
|
40
|
+
# def file_name
|
41
|
+
# @file_name ||= "#{@index_name}_#{@category_name}"
|
42
|
+
# end
|
43
|
+
|
44
|
+
def identifier
|
45
|
+
@identifier ||= "#{index_name} #{category_name}"
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.index_root
|
49
|
+
@index_root ||= "#{PICKY_ROOT}/index"
|
50
|
+
end
|
51
|
+
def index_root
|
52
|
+
self.class.index_root
|
53
|
+
end
|
54
|
+
# Was: cache_directory
|
55
|
+
#
|
56
|
+
def index_directory
|
57
|
+
@index_directory ||= "#{index_root}/#{PICKY_ENVIRONMENT}/#{index_name}"
|
58
|
+
end
|
59
|
+
# Was: prepare_cache_directory
|
60
|
+
#
|
61
|
+
def prepare_index_directory
|
62
|
+
FileUtils.mkdir_p index_directory
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
data/lib/picky/cores.rb
CHANGED
@@ -15,6 +15,9 @@ class Cores
|
|
15
15
|
# * max: Maximum # of processors to use. Default is all it can get.
|
16
16
|
#
|
17
17
|
def self.forked ary_or_generator, options = {}
|
18
|
+
return if ary_or_generator.empty?
|
19
|
+
raise "Block argument needed when running Cores.forked" unless block_given?
|
20
|
+
|
18
21
|
ary_or_generator = ary_or_generator.sort_by { rand } if options[:randomly]
|
19
22
|
generator = ary_or_generator.each
|
20
23
|
|
data/lib/picky/index/bundle.rb
CHANGED
@@ -1,63 +1,47 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
1
|
module Index
|
4
|
-
|
5
|
-
#
|
2
|
+
# A Bundle is a number of indexes
|
3
|
+
# per [index, category] combination.
|
6
4
|
#
|
7
|
-
#
|
5
|
+
# At most, there are three indexes:
|
6
|
+
# * *core* index (always used)
|
7
|
+
# * *weights* index (always used)
|
8
|
+
# * *similarity* index (used with similarity)
|
9
|
+
#
|
10
|
+
# In Picky, indexing is separated from the index
|
11
|
+
# handling itself through a parallel structure.
|
8
12
|
#
|
9
|
-
#
|
13
|
+
# Both use methods provided by this base class, but
|
14
|
+
# have very different goals:
|
10
15
|
#
|
11
|
-
|
16
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
17
|
+
# and providing helper functions to e.g. check the indexes.
|
18
|
+
#
|
19
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
20
|
+
# memory and looking up search data as fast as possible.
|
21
|
+
#
|
22
|
+
class Bundle
|
12
23
|
|
13
|
-
|
14
|
-
|
15
|
-
def ids text
|
16
|
-
@index[text] || []
|
17
|
-
end
|
18
|
-
# Get a weight for the text.
|
19
|
-
#
|
20
|
-
def weight text
|
21
|
-
@weights[text]
|
22
|
-
end
|
24
|
+
attr_reader :identifier, :files
|
25
|
+
attr_accessor :index, :weights, :similarity, :similarity_strategy
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
#
|
36
|
-
def retrieve
|
37
|
-
files.retrieve do |id, token|
|
38
|
-
initialize_index_for token
|
39
|
-
index[token] << id
|
40
|
-
end
|
41
|
-
end
|
42
|
-
def initialize_index_for token
|
43
|
-
index[token] ||= []
|
27
|
+
delegate :[], :[]=, :clear, :to => :index
|
28
|
+
|
29
|
+
def initialize name, configuration, similarity_strategy
|
30
|
+
@identifier = "#{configuration.identifier} (#{name})"
|
31
|
+
@files = Files.new name, configuration
|
32
|
+
|
33
|
+
@index = {}
|
34
|
+
@weights = {}
|
35
|
+
@similarity = {}
|
36
|
+
|
37
|
+
@similarity_strategy = similarity_strategy
|
44
38
|
end
|
45
39
|
|
46
|
-
#
|
40
|
+
# Get a list of similar texts.
|
47
41
|
#
|
48
|
-
def
|
49
|
-
|
50
|
-
|
51
|
-
load_weights
|
52
|
-
end
|
53
|
-
def load_index
|
54
|
-
self.index = files.load_index
|
55
|
-
end
|
56
|
-
def load_similarity
|
57
|
-
self.similarity = files.load_similarity
|
58
|
-
end
|
59
|
-
def load_weights
|
60
|
-
self.weights = files.load_weights
|
42
|
+
def similar text
|
43
|
+
code = similarity_strategy.encoded text
|
44
|
+
code && @similarity[code] || []
|
61
45
|
end
|
62
46
|
|
63
47
|
end
|
@@ -1,43 +1,71 @@
|
|
1
1
|
module Index
|
2
2
|
|
3
|
+
# Handles all aspects of index files, such as dumping/loading.
|
4
|
+
#
|
3
5
|
module File
|
4
6
|
|
7
|
+
# Base class for all index files.
|
8
|
+
#
|
9
|
+
# Provides necessary helper methods for its
|
10
|
+
# subclasses.
|
11
|
+
# Not directly useable, as it does not provide
|
12
|
+
# dump/load methods.
|
13
|
+
#
|
5
14
|
class Basic
|
6
15
|
|
7
16
|
attr_reader :cache_path
|
8
17
|
|
18
|
+
# An index cache takes a path, without file extension,
|
19
|
+
# which will be provided by the subclasses.
|
20
|
+
#
|
9
21
|
def initialize cache_path
|
10
22
|
@cache_path = "#{cache_path}.#{extension}"
|
11
23
|
end
|
12
24
|
|
25
|
+
# The default extension for index files is "index".
|
26
|
+
#
|
13
27
|
def extension
|
14
28
|
:index
|
15
29
|
end
|
16
30
|
|
17
|
-
#
|
31
|
+
# Will copy the index file to a location that
|
32
|
+
# is in a directory named "backup" right under
|
33
|
+
# the directory the index file is in.
|
18
34
|
#
|
19
35
|
def backup
|
20
36
|
prepare_backup backup_path
|
21
37
|
FileUtils.cp cache_path, target, verbose: true
|
22
38
|
end
|
39
|
+
# The backup directory of this file.
|
40
|
+
# Equal to the file's dirname plus /backup
|
41
|
+
#
|
42
|
+
# TODO: Rename to backup_dir.
|
43
|
+
#
|
23
44
|
def backup_path
|
24
45
|
::File.join ::File.dirname(cache_path), 'backup'
|
25
46
|
end
|
47
|
+
# Prepares the backup directory for the file.
|
48
|
+
#
|
26
49
|
def prepare_backup target
|
27
50
|
FileUtils.mkdir target unless Dir.exists?(target)
|
28
51
|
end
|
29
52
|
|
30
|
-
#
|
53
|
+
# Copies the file from its backup location back
|
54
|
+
# to the original location.
|
31
55
|
#
|
32
56
|
def restore
|
33
57
|
FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
|
34
58
|
end
|
59
|
+
# The backup filename.
|
60
|
+
#
|
61
|
+
# TODO: Duplicate work done here?
|
62
|
+
#
|
35
63
|
def backup_file_path_of path
|
36
64
|
dir, name = ::File.split path
|
37
65
|
::File.join dir, 'backup', name
|
38
66
|
end
|
39
67
|
|
40
|
-
#
|
68
|
+
# Deletes the file.
|
41
69
|
#
|
42
70
|
def delete
|
43
71
|
`rm -Rf #{cache_path}`
|
@@ -46,16 +74,22 @@ module Index
|
|
46
74
|
# Checks.
|
47
75
|
#
|
48
76
|
|
49
|
-
# Is
|
77
|
+
# Is this cache file suspiciously small?
|
78
|
+
# (less than 8 Bytes of size)
|
50
79
|
#
|
51
80
|
def cache_small?
|
52
81
|
size_of(cache_path) < 8
|
53
82
|
end
|
54
|
-
# Is the cache ok?
|
83
|
+
# Is the cache ok? (existing and larger than
|
84
|
+
# zero Bytes in size)
|
85
|
+
#
|
86
|
+
# A small cache is still ok.
|
55
87
|
#
|
56
88
|
def cache_ok?
|
57
89
|
size_of(cache_path) > 0
|
58
90
|
end
|
91
|
+
# Extracts the size of the file in Bytes.
|
92
|
+
#
|
59
93
|
def size_of path
|
60
94
|
`ls -l #{path} | awk '{print $5}'`.to_i
|
61
95
|
end
|
@@ -2,17 +2,27 @@ module Index
|
|
2
2
|
|
3
3
|
module File
|
4
4
|
|
5
|
+
# Index files dumped in the JSON format.
|
6
|
+
#
|
5
7
|
class JSON < Basic
|
6
8
|
|
9
|
+
# Uses the extension "json".
|
10
|
+
#
|
7
11
|
def extension
|
8
12
|
:json
|
9
13
|
end
|
14
|
+
# Loads the index hash from json format.
|
15
|
+
#
|
10
16
|
def load
|
11
17
|
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
|
12
18
|
end
|
19
|
+
# Dumps the index hash in json format.
|
20
|
+
#
|
13
21
|
def dump hash
|
14
22
|
hash.dump_json cache_path
|
15
23
|
end
|
24
|
+
# A json file does not provide retrieve functionality.
|
25
|
+
#
|
16
26
|
def retrieve
|
17
27
|
raise "Can't retrieve from JSON file. Use text file."
|
18
28
|
end
|
@@ -2,17 +2,27 @@ module Index
|
|
2
2
|
|
3
3
|
module File
|
4
4
|
|
5
|
+
# Index data in the Ruby Marshal format.
|
6
|
+
#
|
5
7
|
class Marshal < Basic
|
6
8
|
|
9
|
+
# Uses the extension "dump".
|
10
|
+
#
|
7
11
|
def extension
|
8
12
|
:dump
|
9
13
|
end
|
14
|
+
# Loads the index hash from marshal format.
|
15
|
+
#
|
10
16
|
def load
|
11
17
|
::Marshal.load ::File.open(cache_path, 'r:binary')
|
12
18
|
end
|
19
|
+
# Dumps the index hash in marshal format.
|
20
|
+
#
|
13
21
|
def dump hash
|
14
22
|
hash.dump_marshalled cache_path
|
15
23
|
end
|
24
|
+
# A marshal file does not provide retrieve functionality.
|
25
|
+
#
|
16
26
|
def retrieve
|
17
27
|
raise "Can't retrieve from marshalled file. Use text file."
|
18
28
|
end
|
@@ -2,18 +2,33 @@ module Index
|
|
2
2
|
|
3
3
|
module File
|
4
4
|
|
5
|
+
# Index data dumped in the text format.
|
6
|
+
#
|
5
7
|
class Text < Basic
|
6
8
|
|
9
|
+
# Uses the extension "txt".
|
10
|
+
#
|
7
11
|
def extension
|
8
12
|
:txt
|
9
13
|
end
|
14
|
+
# Text files are used exclusively for
|
15
|
+
# prepared data files.
|
16
|
+
#
|
10
17
|
def load
|
11
18
|
raise "Can't load from text file. Use JSON or Marshal."
|
12
19
|
end
|
20
|
+
# Text files are used exclusively for
|
21
|
+
# prepared data files.
|
22
|
+
#
|
13
23
|
def dump hash
|
14
24
|
raise "Can't dump to text file. Use JSON or Marshal."
|
15
25
|
end
|
16
26
|
|
27
|
+
# Retrieves prepared index data in the form
|
28
|
+
# * id,data\n
|
29
|
+
# * id,data\n
|
30
|
+
# * id,data\n
|
31
|
+
#
|
17
32
|
# Yields an id and a symbol token.
|
18
33
|
#
|
19
34
|
def retrieve
|
@@ -26,6 +41,13 @@ module Index
|
|
26
41
|
end
|
27
42
|
end
|
28
43
|
|
44
|
+
#
|
45
|
+
#
|
46
|
+
def open_for_indexing &block
|
47
|
+
::File.open cache_path, 'w:binary', &block
|
48
|
+
end
|
49
|
+
|
50
|
+
|
29
51
|
end
|
30
52
|
|
31
53
|
end
|
data/lib/picky/index/files.rb
CHANGED
@@ -2,59 +2,34 @@ module Index
|
|
2
2
|
|
3
3
|
class Files
|
4
4
|
|
5
|
-
attr_reader :bundle_name
|
5
|
+
attr_reader :bundle_name
|
6
6
|
attr_reader :prepared, :index, :similarity, :weights
|
7
7
|
|
8
|
-
|
8
|
+
delegate :index_name, :category_name, :to => :@configuration
|
9
|
+
|
10
|
+
def initialize bundle_name, configuration
|
9
11
|
@bundle_name = bundle_name
|
10
|
-
@
|
11
|
-
@type_name = type_name
|
12
|
+
@configuration = configuration
|
12
13
|
|
13
14
|
# Note: We marshal the similarity, as the
|
14
15
|
# Yajl json lib cannot load symbolized
|
15
16
|
# values, just keys.
|
16
17
|
#
|
17
|
-
@prepared = File::Text.new
|
18
|
-
@index = File::JSON.new
|
19
|
-
@similarity = File::Marshal.new
|
20
|
-
@weights = File::JSON.new
|
21
|
-
end
|
22
|
-
|
23
|
-
# Paths.
|
24
|
-
#
|
25
|
-
|
26
|
-
# Cache path, for File-s.
|
27
|
-
#
|
28
|
-
def cache_path name
|
29
|
-
::File.join cache_directory, "#{bundle_name}_#{category_name}_#{name}"
|
18
|
+
@prepared = File::Text.new configuration.prepared_index_path
|
19
|
+
@index = File::JSON.new configuration.index_path(bundle_name, :index)
|
20
|
+
@similarity = File::Marshal.new configuration.index_path(bundle_name, :similarity)
|
21
|
+
@weights = File::JSON.new configuration.index_path(bundle_name, :weights)
|
30
22
|
end
|
31
23
|
|
32
|
-
#
|
24
|
+
# Delegators.
|
33
25
|
#
|
34
|
-
def search_index_root
|
35
|
-
::File.join PICKY_ROOT, 'index'
|
36
|
-
end
|
37
26
|
|
38
|
-
#
|
27
|
+
# Retrieving data.
|
39
28
|
#
|
40
|
-
def create_directory
|
41
|
-
FileUtils.mkdir_p cache_directory
|
42
|
-
end
|
43
|
-
# TODO Move to config. Duplicate Code in field.rb.
|
44
|
-
#
|
45
|
-
def cache_directory
|
46
|
-
"#{search_index_root}/#{PICKY_ENVIRONMENT}/#{type_name}"
|
47
|
-
end
|
48
29
|
def retrieve &block
|
49
30
|
prepared.retrieve &block
|
50
31
|
end
|
51
32
|
|
52
|
-
# Single index/similarity/weights files delegation.
|
53
|
-
#
|
54
|
-
|
55
|
-
# Delegators.
|
56
|
-
#
|
57
|
-
|
58
33
|
# Dumping.
|
59
34
|
#
|
60
35
|
def dump_index index_hash
|