picky 0.11.2 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/Index_api.rb +49 -0
- data/lib/picky/alias_instances.rb +4 -1
- data/lib/picky/application.rb +16 -15
- data/lib/picky/cacher/partial/{subtoken.rb → substring.rb} +19 -18
- data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb} +2 -2
- data/lib/picky/configuration/index.rb +67 -0
- data/lib/picky/cores.rb +3 -0
- data/lib/picky/index/bundle.rb +35 -51
- data/lib/picky/index/file/basic.rb +39 -5
- data/lib/picky/index/file/json.rb +10 -0
- data/lib/picky/index/file/marshal.rb +10 -0
- data/lib/picky/index/file/text.rb +22 -0
- data/lib/picky/index/files.rb +11 -36
- data/lib/picky/indexed/bundle.rb +61 -0
- data/lib/picky/{index → indexed}/categories.rb +1 -1
- data/lib/picky/{index → indexed}/category.rb +13 -16
- data/lib/picky/{index/type.rb → indexed/index.rb} +6 -6
- data/lib/picky/{index/types.rb → indexed/indexes.rb} +10 -10
- data/lib/picky/{index → indexed}/wrappers/exact_first.rb +8 -8
- data/lib/picky/indexers/no_source_specified_error.rb +1 -1
- data/lib/picky/indexers/serial.rb +64 -0
- data/lib/picky/indexers/solr.rb +1 -3
- data/lib/picky/indexes_api.rb +41 -0
- data/lib/picky/indexing/bundle.rb +43 -13
- data/lib/picky/indexing/category.rb +17 -64
- data/lib/picky/indexing/{type.rb → index.rb} +13 -3
- data/lib/picky/indexing/{types.rb → indexes.rb} +22 -22
- data/lib/picky/loader.rb +17 -22
- data/lib/picky/query/base.rb +1 -1
- data/lib/picky/rack/harakiri.rb +9 -2
- data/lib/picky/signals.rb +1 -1
- data/lib/picky/sources/base.rb +14 -14
- data/lib/picky/sources/couch.rb +8 -7
- data/lib/picky/sources/csv.rb +10 -10
- data/lib/picky/sources/db.rb +8 -8
- data/lib/picky/sources/delicious.rb +2 -2
- data/lib/picky/sources/wrappers/location.rb +3 -3
- data/lib/picky/tokenizers/base.rb +1 -11
- data/lib/picky/tokenizers/index.rb +0 -1
- data/lib/picky/tokenizers/query.rb +0 -1
- data/lib/tasks/index.rake +4 -4
- data/lib/tasks/shortcuts.rake +4 -4
- data/lib/tasks/try.rake +8 -8
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/application.rb +13 -12
- data/spec/lib/application_spec.rb +10 -38
- data/spec/lib/cacher/partial/{subtoken_spec.rb → substring_spec.rb} +0 -0
- data/spec/lib/{character_substitution/european_spec.rb → character_substituters/west_european_spec.rb} +6 -2
- data/spec/lib/configuration/index_spec.rb +80 -0
- data/spec/lib/cores_spec.rb +1 -1
- data/spec/lib/index/file/text_spec.rb +1 -1
- data/spec/lib/index/files_spec.rb +12 -32
- data/spec/lib/indexed/bundle_spec.rb +119 -0
- data/spec/lib/{indexing → indexed}/categories_spec.rb +13 -14
- data/spec/lib/{index → indexed}/category_spec.rb +6 -6
- data/spec/lib/{index/type_spec.rb → indexed/index_spec.rb} +3 -3
- data/spec/lib/{index → indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/serial_spec.rb +62 -0
- data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +7 -5
- data/spec/lib/indexing/bundle_spec.rb +9 -14
- data/spec/lib/indexing/category_spec.rb +9 -125
- data/spec/lib/indexing/{type_spec.rb → index_spec.rb} +3 -3
- data/spec/lib/query/base_spec.rb +1 -1
- data/spec/lib/query/full_spec.rb +1 -1
- data/spec/lib/query/live_spec.rb +2 -4
- data/spec/lib/sources/couch_spec.rb +5 -5
- data/spec/lib/sources/db_spec.rb +6 -7
- data/spec/lib/tokenizers/base_spec.rb +1 -24
- data/spec/lib/tokenizers/query_spec.rb +0 -1
- metadata +38 -41
- data/lib/picky/bundle.rb +0 -33
- data/lib/picky/configuration/indexes.rb +0 -51
- data/lib/picky/configuration/queries.rb +0 -15
- data/lib/picky/indexers/base.rb +0 -85
- data/lib/picky/indexers/default.rb +0 -3
- data/lib/picky/type.rb +0 -46
- data/lib/picky/types.rb +0 -41
- data/lib/tasks/cache.rake +0 -46
- data/spec/lib/configuration/indexes_spec.rb +0 -28
- data/spec/lib/index/bundle_spec.rb +0 -151
- data/spec/lib/indexers/base_spec.rb +0 -89
@@ -0,0 +1,49 @@
|
|
1
|
+
# This class defines the indexing and index API.
|
2
|
+
#
|
3
|
+
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
|
4
|
+
#
|
5
|
+
class IndexAPI
|
6
|
+
|
7
|
+
# TODO Delegation.
|
8
|
+
#
|
9
|
+
|
10
|
+
attr_reader :name, :indexing, :indexed
|
11
|
+
|
12
|
+
def initialize name, source, options = {}
|
13
|
+
@name = name
|
14
|
+
@indexing = Indexing::Index.new name, source, options
|
15
|
+
@indexed = Indexed::Index.new name, options
|
16
|
+
|
17
|
+
# Centralized registry.
|
18
|
+
#
|
19
|
+
Indexes.register self
|
20
|
+
end
|
21
|
+
|
22
|
+
# API.
|
23
|
+
#
|
24
|
+
# TODO Spec! Doc!
|
25
|
+
#
|
26
|
+
def define_category category_name, options = {}
|
27
|
+
category_name = category_name.to_sym
|
28
|
+
|
29
|
+
indexing_category = indexing.add_category category_name, options
|
30
|
+
indexed_category = indexed.add_category category_name, options
|
31
|
+
|
32
|
+
yield indexing_category, indexed_category if block_given?
|
33
|
+
|
34
|
+
self
|
35
|
+
end
|
36
|
+
alias category define_category
|
37
|
+
|
38
|
+
def define_location name, options = {}
|
39
|
+
grid = options[:grid]
|
40
|
+
precision = options[:precision]
|
41
|
+
|
42
|
+
define_category name, options do |indexing, _|
|
43
|
+
indexing.source = Sources::Wrappers::Location.new indexing.source, grid: grid, precision: precision
|
44
|
+
indexing.tokenizer = Tokenizers::Index.new
|
45
|
+
end
|
46
|
+
end
|
47
|
+
alias location define_location
|
48
|
+
|
49
|
+
end
|
data/lib/picky/application.rb
CHANGED
@@ -4,24 +4,37 @@ class Application
|
|
4
4
|
|
5
5
|
class << self
|
6
6
|
|
7
|
+
# API
|
8
|
+
#
|
9
|
+
|
7
10
|
# Returns a configured tokenizer that
|
8
11
|
# is used for indexing by default.
|
9
12
|
#
|
10
13
|
def default_indexing options = {}
|
11
|
-
|
14
|
+
Tokenizers::Index.default = Tokenizers::Index.new(options)
|
12
15
|
end
|
13
16
|
|
14
17
|
# Returns a configured tokenizer that
|
15
18
|
# is used for querying by default.
|
16
19
|
#
|
17
20
|
def default_querying options = {}
|
18
|
-
|
21
|
+
Tokenizers::Query.default = Tokenizers::Query.new(options)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Returns a new index frontend for configuring the index.
|
25
|
+
#
|
26
|
+
def index *args
|
27
|
+
IndexAPI.new *args
|
19
28
|
end
|
20
29
|
|
21
30
|
# Routes.
|
22
31
|
#
|
23
32
|
delegate :route, :root, :to => :routing
|
24
33
|
|
34
|
+
#
|
35
|
+
# API
|
36
|
+
|
37
|
+
|
25
38
|
# An application simply delegates to the routing to handle a request.
|
26
39
|
#
|
27
40
|
def call env
|
@@ -30,18 +43,6 @@ class Application
|
|
30
43
|
def routing
|
31
44
|
@routing ||= Routing.new
|
32
45
|
end
|
33
|
-
def indexing
|
34
|
-
@indexing ||= Configuration::Indexes.new
|
35
|
-
end
|
36
|
-
def querying
|
37
|
-
@queries ||= Configuration::Queries.new
|
38
|
-
end
|
39
|
-
|
40
|
-
# "API".
|
41
|
-
#
|
42
|
-
def index *args
|
43
|
-
::Type.new *args
|
44
|
-
end
|
45
46
|
|
46
47
|
# Finalize the subclass as soon as it
|
47
48
|
# has finished loading.
|
@@ -64,7 +65,7 @@ class Application
|
|
64
65
|
routing.freeze
|
65
66
|
end
|
66
67
|
|
67
|
-
# TODO Add more info.
|
68
|
+
# TODO Add more info if possible.
|
68
69
|
#
|
69
70
|
def to_s
|
70
71
|
"#{self.name}:\n#{routing}"
|
@@ -2,7 +2,7 @@ module Cacher
|
|
2
2
|
|
3
3
|
module Partial
|
4
4
|
|
5
|
-
# Generates the right
|
5
|
+
# Generates the right substrings for use in the substring strategy.
|
6
6
|
#
|
7
7
|
class SubstringGenerator
|
8
8
|
|
@@ -27,38 +27,39 @@ module Cacher
|
|
27
27
|
|
28
28
|
# The subtoken partial strategy.
|
29
29
|
#
|
30
|
-
# If given
|
31
|
-
# "
|
32
|
-
#
|
33
|
-
# "floria"
|
34
|
-
# "flori"
|
35
|
-
# "flor"
|
36
|
-
# "flo"
|
37
|
-
# "fl"
|
38
|
-
# "f"
|
39
|
-
# Depending on what the given from value is. (Example with from == 1)
|
30
|
+
# If given "florian"
|
31
|
+
# it will index "floria", "flori", "flor", "flo", "fl", "f"
|
32
|
+
# (Depending on what the given from value is, the example is with option from: 1)
|
40
33
|
#
|
41
34
|
class Substring < Strategy
|
42
35
|
|
43
|
-
#
|
36
|
+
# The from option signifies where in the symbol it
|
37
|
+
# will start in generating the subtokens.
|
44
38
|
#
|
45
39
|
# Examples:
|
46
|
-
# With :hello, and to -1
|
47
|
-
# * down to == 1: [:hello, :hell, :hel, :he, :h]
|
48
|
-
# * down to == 4: [:hello, :hell]
|
49
40
|
#
|
50
|
-
# With :hello, and to -
|
51
|
-
# *
|
52
|
-
# *
|
41
|
+
# With :hello, and to: -1 (default)
|
42
|
+
# * from: 1 # => [:hello, :hell, :hel, :he, :h]
|
43
|
+
# * from: 4 # => [:hello, :hell]
|
44
|
+
#
|
45
|
+
# With :hello, and to: -2
|
46
|
+
# * from: 1 # => [:hell, :hel, :he, :h]
|
47
|
+
# * from: 4 # => [:hell]
|
53
48
|
#
|
54
49
|
def initialize options = {}
|
55
50
|
from = options[:from] || 1
|
56
51
|
to = options[:to] || -1
|
57
52
|
@generator = SubstringGenerator.new from, to
|
58
53
|
end
|
54
|
+
|
55
|
+
# Delegator to generator#from.
|
56
|
+
#
|
59
57
|
def from
|
60
58
|
@generator.from
|
61
59
|
end
|
60
|
+
|
61
|
+
# Delegator to generator#to.
|
62
|
+
#
|
62
63
|
def to
|
63
64
|
@generator.to
|
64
65
|
end
|
data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb}
RENAMED
@@ -1,11 +1,11 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
|
-
module
|
3
|
+
module CharacterSubstituters
|
4
4
|
# Substitutes Umlauts like
|
5
5
|
# ä, ö, ü => ae, oe, ue.
|
6
6
|
# (and more, see specs)
|
7
7
|
#
|
8
|
-
class
|
8
|
+
class WestEuropean
|
9
9
|
|
10
10
|
def initialize
|
11
11
|
@chars = ActiveSupport::Multibyte.proxy_class
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module Configuration
|
2
|
+
|
3
|
+
# Holds the configuration for a
|
4
|
+
# index/category combination.
|
5
|
+
#
|
6
|
+
# TODO Rename paths?
|
7
|
+
#
|
8
|
+
class Index
|
9
|
+
|
10
|
+
attr_reader :index, :category
|
11
|
+
|
12
|
+
def initialize index, category
|
13
|
+
@index = index
|
14
|
+
@category = category
|
15
|
+
end
|
16
|
+
|
17
|
+
def index_name
|
18
|
+
@index_name ||= index.name
|
19
|
+
end
|
20
|
+
def category_name
|
21
|
+
@category_name ||= category.name
|
22
|
+
end
|
23
|
+
|
24
|
+
#
|
25
|
+
#
|
26
|
+
def index_path bundle_name, name
|
27
|
+
"#{index_directory}/#{category_name}_#{bundle_name}_#{name}"
|
28
|
+
end
|
29
|
+
|
30
|
+
# Was: search_index_file_name
|
31
|
+
#
|
32
|
+
def prepared_index_path
|
33
|
+
@prepared_index_path ||= "#{index_directory}/prepared_#{category_name}_index"
|
34
|
+
end
|
35
|
+
def prepared_index_file &block
|
36
|
+
@prepared_index_file ||= ::Index::File::Text.new prepared_index_path
|
37
|
+
@prepared_index_file.open_for_indexing &block
|
38
|
+
end
|
39
|
+
|
40
|
+
# def file_name
|
41
|
+
# @file_name ||= "#{@index_name}_#{@category_name}"
|
42
|
+
# end
|
43
|
+
|
44
|
+
def identifier
|
45
|
+
@identifier ||= "#{index_name} #{category_name}"
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.index_root
|
49
|
+
@index_root ||= "#{PICKY_ROOT}/index"
|
50
|
+
end
|
51
|
+
def index_root
|
52
|
+
self.class.index_root
|
53
|
+
end
|
54
|
+
# Was: cache_directory
|
55
|
+
#
|
56
|
+
def index_directory
|
57
|
+
@index_directory ||= "#{index_root}/#{PICKY_ENVIRONMENT}/#{index_name}"
|
58
|
+
end
|
59
|
+
# Was: prepare_cache_directory
|
60
|
+
#
|
61
|
+
def prepare_index_directory
|
62
|
+
FileUtils.mkdir_p index_directory
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
data/lib/picky/cores.rb
CHANGED
@@ -15,6 +15,9 @@ class Cores
|
|
15
15
|
# * max: Maximum # of processors to use. Default is all it can get.
|
16
16
|
#
|
17
17
|
def self.forked ary_or_generator, options = {}
|
18
|
+
return if ary_or_generator.empty?
|
19
|
+
raise "Block argument needed when running Cores.forked" unless block_given?
|
20
|
+
|
18
21
|
ary_or_generator = ary_or_generator.sort_by { rand } if options[:randomly]
|
19
22
|
generator = ary_or_generator.each
|
20
23
|
|
data/lib/picky/index/bundle.rb
CHANGED
@@ -1,63 +1,47 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
1
|
module Index
|
4
|
-
|
5
|
-
#
|
2
|
+
# A Bundle is a number of indexes
|
3
|
+
# per [index, category] combination.
|
6
4
|
#
|
7
|
-
#
|
5
|
+
# At most, there are three indexes:
|
6
|
+
# * *core* index (always used)
|
7
|
+
# * *weights* index (always used)
|
8
|
+
# * *similarity* index (used with similarity)
|
9
|
+
#
|
10
|
+
# In Picky, indexing is separated from the index
|
11
|
+
# handling itself through a parallel structure.
|
8
12
|
#
|
9
|
-
#
|
13
|
+
# Both use methods provided by this base class, but
|
14
|
+
# have very different goals:
|
10
15
|
#
|
11
|
-
|
16
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
17
|
+
# and providing helper functions to e.g. check the indexes.
|
18
|
+
#
|
19
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
20
|
+
# memory and looking up search data as fast as possible.
|
21
|
+
#
|
22
|
+
class Bundle
|
12
23
|
|
13
|
-
|
14
|
-
|
15
|
-
def ids text
|
16
|
-
@index[text] || []
|
17
|
-
end
|
18
|
-
# Get a weight for the text.
|
19
|
-
#
|
20
|
-
def weight text
|
21
|
-
@weights[text]
|
22
|
-
end
|
24
|
+
attr_reader :identifier, :files
|
25
|
+
attr_accessor :index, :weights, :similarity, :similarity_strategy
|
23
26
|
|
24
|
-
|
25
|
-
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
#
|
36
|
-
def retrieve
|
37
|
-
files.retrieve do |id, token|
|
38
|
-
initialize_index_for token
|
39
|
-
index[token] << id
|
40
|
-
end
|
41
|
-
end
|
42
|
-
def initialize_index_for token
|
43
|
-
index[token] ||= []
|
27
|
+
delegate :[], :[]=, :clear, :to => :index
|
28
|
+
|
29
|
+
def initialize name, configuration, similarity_strategy
|
30
|
+
@identifier = "#{configuration.identifier} (#{name})"
|
31
|
+
@files = Files.new name, configuration
|
32
|
+
|
33
|
+
@index = {}
|
34
|
+
@weights = {}
|
35
|
+
@similarity = {}
|
36
|
+
|
37
|
+
@similarity_strategy = similarity_strategy
|
44
38
|
end
|
45
39
|
|
46
|
-
#
|
40
|
+
# Get a list of similar texts.
|
47
41
|
#
|
48
|
-
def
|
49
|
-
|
50
|
-
|
51
|
-
load_weights
|
52
|
-
end
|
53
|
-
def load_index
|
54
|
-
self.index = files.load_index
|
55
|
-
end
|
56
|
-
def load_similarity
|
57
|
-
self.similarity = files.load_similarity
|
58
|
-
end
|
59
|
-
def load_weights
|
60
|
-
self.weights = files.load_weights
|
42
|
+
def similar text
|
43
|
+
code = similarity_strategy.encoded text
|
44
|
+
code && @similarity[code] || []
|
61
45
|
end
|
62
46
|
|
63
47
|
end
|
@@ -1,43 +1,71 @@
|
|
1
1
|
module Index
|
2
2
|
|
3
|
+
# Handles all aspects of index files, such as dumping/loading.
|
4
|
+
#
|
3
5
|
module File
|
4
6
|
|
7
|
+
# Base class for all index files.
|
8
|
+
#
|
9
|
+
# Provides necessary helper methods for its
|
10
|
+
# subclasses.
|
11
|
+
# Not directly useable, as it does not provide
|
12
|
+
# dump/load methods.
|
13
|
+
#
|
5
14
|
class Basic
|
6
15
|
|
7
16
|
attr_reader :cache_path
|
8
17
|
|
18
|
+
# An index cache takes a path, without file extension,
|
19
|
+
# which will be provided by the subclasses.
|
20
|
+
#
|
9
21
|
def initialize cache_path
|
10
22
|
@cache_path = "#{cache_path}.#{extension}"
|
11
23
|
end
|
12
24
|
|
25
|
+
# The default extension for index files is "index".
|
26
|
+
#
|
13
27
|
def extension
|
14
28
|
:index
|
15
29
|
end
|
16
30
|
|
17
|
-
#
|
31
|
+
# Will copy the index file to a location that
|
32
|
+
# is in a directory named "backup" right under
|
33
|
+
# the directory the index file is in.
|
18
34
|
#
|
19
35
|
def backup
|
20
36
|
prepare_backup backup_path
|
21
37
|
FileUtils.cp cache_path, target, verbose: true
|
22
38
|
end
|
39
|
+
# The backup directory of this file.
|
40
|
+
# Equal to the file's dirname plus /backup
|
41
|
+
#
|
42
|
+
# TODO: Rename to backup_dir.
|
43
|
+
#
|
23
44
|
def backup_path
|
24
45
|
::File.join ::File.dirname(cache_path), 'backup'
|
25
46
|
end
|
47
|
+
# Prepares the backup directory for the file.
|
48
|
+
#
|
26
49
|
def prepare_backup target
|
27
50
|
FileUtils.mkdir target unless Dir.exists?(target)
|
28
51
|
end
|
29
52
|
|
30
|
-
#
|
53
|
+
# Copies the file from its backup location back
|
54
|
+
# to the original location.
|
31
55
|
#
|
32
56
|
def restore
|
33
57
|
FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
|
34
58
|
end
|
59
|
+
# The backup filename.
|
60
|
+
#
|
61
|
+
# TODO: Duplicate work done here?
|
62
|
+
#
|
35
63
|
def backup_file_path_of path
|
36
64
|
dir, name = ::File.split path
|
37
65
|
::File.join dir, 'backup', name
|
38
66
|
end
|
39
67
|
|
40
|
-
#
|
68
|
+
# Deletes the file.
|
41
69
|
#
|
42
70
|
def delete
|
43
71
|
`rm -Rf #{cache_path}`
|
@@ -46,16 +74,22 @@ module Index
|
|
46
74
|
# Checks.
|
47
75
|
#
|
48
76
|
|
49
|
-
# Is
|
77
|
+
# Is this cache file suspiciously small?
|
78
|
+
# (less than 8 Bytes of size)
|
50
79
|
#
|
51
80
|
def cache_small?
|
52
81
|
size_of(cache_path) < 8
|
53
82
|
end
|
54
|
-
# Is the cache ok?
|
83
|
+
# Is the cache ok? (existing and larger than
|
84
|
+
# zero Bytes in size)
|
85
|
+
#
|
86
|
+
# A small cache is still ok.
|
55
87
|
#
|
56
88
|
def cache_ok?
|
57
89
|
size_of(cache_path) > 0
|
58
90
|
end
|
91
|
+
# Extracts the size of the file in Bytes.
|
92
|
+
#
|
59
93
|
def size_of path
|
60
94
|
`ls -l #{path} | awk '{print $5}'`.to_i
|
61
95
|
end
|
@@ -2,17 +2,27 @@ module Index
|
|
2
2
|
|
3
3
|
module File
|
4
4
|
|
5
|
+
# Index files dumped in the JSON format.
|
6
|
+
#
|
5
7
|
class JSON < Basic
|
6
8
|
|
9
|
+
# Uses the extension "json".
|
10
|
+
#
|
7
11
|
def extension
|
8
12
|
:json
|
9
13
|
end
|
14
|
+
# Loads the index hash from json format.
|
15
|
+
#
|
10
16
|
def load
|
11
17
|
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
|
12
18
|
end
|
19
|
+
# Dumps the index hash in json format.
|
20
|
+
#
|
13
21
|
def dump hash
|
14
22
|
hash.dump_json cache_path
|
15
23
|
end
|
24
|
+
# A json file does not provide retrieve functionality.
|
25
|
+
#
|
16
26
|
def retrieve
|
17
27
|
raise "Can't retrieve from JSON file. Use text file."
|
18
28
|
end
|
@@ -2,17 +2,27 @@ module Index
|
|
2
2
|
|
3
3
|
module File
|
4
4
|
|
5
|
+
# Index data in the Ruby Marshal format.
|
6
|
+
#
|
5
7
|
class Marshal < Basic
|
6
8
|
|
9
|
+
# Uses the extension "dump".
|
10
|
+
#
|
7
11
|
def extension
|
8
12
|
:dump
|
9
13
|
end
|
14
|
+
# Loads the index hash from marshal format.
|
15
|
+
#
|
10
16
|
def load
|
11
17
|
::Marshal.load ::File.open(cache_path, 'r:binary')
|
12
18
|
end
|
19
|
+
# Dumps the index hash in marshal format.
|
20
|
+
#
|
13
21
|
def dump hash
|
14
22
|
hash.dump_marshalled cache_path
|
15
23
|
end
|
24
|
+
# A marshal file does not provide retrieve functionality.
|
25
|
+
#
|
16
26
|
def retrieve
|
17
27
|
raise "Can't retrieve from marshalled file. Use text file."
|
18
28
|
end
|
@@ -2,18 +2,33 @@ module Index
|
|
2
2
|
|
3
3
|
module File
|
4
4
|
|
5
|
+
# Index data dumped in the text format.
|
6
|
+
#
|
5
7
|
class Text < Basic
|
6
8
|
|
9
|
+
# Uses the extension "txt".
|
10
|
+
#
|
7
11
|
def extension
|
8
12
|
:txt
|
9
13
|
end
|
14
|
+
# Text files are used exclusively for
|
15
|
+
# prepared data files.
|
16
|
+
#
|
10
17
|
def load
|
11
18
|
raise "Can't load from text file. Use JSON or Marshal."
|
12
19
|
end
|
20
|
+
# Text files are used exclusively for
|
21
|
+
# prepared data files.
|
22
|
+
#
|
13
23
|
def dump hash
|
14
24
|
raise "Can't dump to text file. Use JSON or Marshal."
|
15
25
|
end
|
16
26
|
|
27
|
+
# Retrieves prepared index data in the form
|
28
|
+
# * id,data\n
|
29
|
+
# * id,data\n
|
30
|
+
# * id,data\n
|
31
|
+
#
|
17
32
|
# Yields an id and a symbol token.
|
18
33
|
#
|
19
34
|
def retrieve
|
@@ -26,6 +41,13 @@ module Index
|
|
26
41
|
end
|
27
42
|
end
|
28
43
|
|
44
|
+
#
|
45
|
+
#
|
46
|
+
def open_for_indexing &block
|
47
|
+
::File.open cache_path, 'w:binary', &block
|
48
|
+
end
|
49
|
+
|
50
|
+
|
29
51
|
end
|
30
52
|
|
31
53
|
end
|
data/lib/picky/index/files.rb
CHANGED
@@ -2,59 +2,34 @@ module Index
|
|
2
2
|
|
3
3
|
class Files
|
4
4
|
|
5
|
-
attr_reader :bundle_name
|
5
|
+
attr_reader :bundle_name
|
6
6
|
attr_reader :prepared, :index, :similarity, :weights
|
7
7
|
|
8
|
-
|
8
|
+
delegate :index_name, :category_name, :to => :@configuration
|
9
|
+
|
10
|
+
def initialize bundle_name, configuration
|
9
11
|
@bundle_name = bundle_name
|
10
|
-
@
|
11
|
-
@type_name = type_name
|
12
|
+
@configuration = configuration
|
12
13
|
|
13
14
|
# Note: We marshal the similarity, as the
|
14
15
|
# Yajl json lib cannot load symbolized
|
15
16
|
# values, just keys.
|
16
17
|
#
|
17
|
-
@prepared = File::Text.new
|
18
|
-
@index = File::JSON.new
|
19
|
-
@similarity = File::Marshal.new
|
20
|
-
@weights = File::JSON.new
|
21
|
-
end
|
22
|
-
|
23
|
-
# Paths.
|
24
|
-
#
|
25
|
-
|
26
|
-
# Cache path, for File-s.
|
27
|
-
#
|
28
|
-
def cache_path name
|
29
|
-
::File.join cache_directory, "#{bundle_name}_#{category_name}_#{name}"
|
18
|
+
@prepared = File::Text.new configuration.prepared_index_path
|
19
|
+
@index = File::JSON.new configuration.index_path(bundle_name, :index)
|
20
|
+
@similarity = File::Marshal.new configuration.index_path(bundle_name, :similarity)
|
21
|
+
@weights = File::JSON.new configuration.index_path(bundle_name, :weights)
|
30
22
|
end
|
31
23
|
|
32
|
-
#
|
24
|
+
# Delegators.
|
33
25
|
#
|
34
|
-
def search_index_root
|
35
|
-
::File.join PICKY_ROOT, 'index'
|
36
|
-
end
|
37
26
|
|
38
|
-
#
|
27
|
+
# Retrieving data.
|
39
28
|
#
|
40
|
-
def create_directory
|
41
|
-
FileUtils.mkdir_p cache_directory
|
42
|
-
end
|
43
|
-
# TODO Move to config. Duplicate Code in field.rb.
|
44
|
-
#
|
45
|
-
def cache_directory
|
46
|
-
"#{search_index_root}/#{PICKY_ENVIRONMENT}/#{type_name}"
|
47
|
-
end
|
48
29
|
def retrieve &block
|
49
30
|
prepared.retrieve &block
|
50
31
|
end
|
51
32
|
|
52
|
-
# Single index/similarity/weights files delegation.
|
53
|
-
#
|
54
|
-
|
55
|
-
# Delegators.
|
56
|
-
#
|
57
|
-
|
58
33
|
# Dumping.
|
59
34
|
#
|
60
35
|
def dump_index index_hash
|