picky 2.7.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
data/lib/picky/application.rb
CHANGED
|
@@ -1,254 +1,264 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
# A Picky Application is where you configure the whole search engine.
|
|
4
|
-
#
|
|
5
|
-
# This is a step-by-step description on how to configure your Picky app.
|
|
6
|
-
#
|
|
7
|
-
# Start by subclassing Application:
|
|
8
|
-
# class MyGreatSearch < Application
|
|
9
|
-
# # Your configuration goes here.
|
|
10
|
-
# end
|
|
11
|
-
# The generator
|
|
12
|
-
# $ picky generate unicorn_server project_name
|
|
13
|
-
# will generate an example <tt>project_name/app/application.rb</tt> file for you
|
|
14
|
-
# with some example code inside.
|
|
15
|
-
#
|
|
16
|
-
# == Index::Memory.new(name)
|
|
17
|
-
#
|
|
18
|
-
# Next, define where your data comes from, creating an <tt>Index</tt>. You use the <tt>Index::Memory.new</tt> method for that:
|
|
19
|
-
# my_index = Index::Memory.new :some_index_name
|
|
20
|
-
# You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
|
|
21
|
-
# class MyGreatSearch < Application
|
|
22
|
-
#
|
|
23
|
-
# books = Index::Memory.new :books do
|
|
24
|
-
# source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
25
|
-
# end
|
|
26
|
-
#
|
|
27
|
-
# end
|
|
28
|
-
# Now we have an index <tt>books</tt>.
|
|
29
|
-
#
|
|
30
|
-
# That on itself won't do much good.
|
|
31
|
-
#
|
|
32
|
-
# Note that a Redis index is also available: Index::Redis.new.
|
|
33
|
-
#
|
|
34
|
-
# == category(identifier, options = {})
|
|
35
|
-
#
|
|
36
|
-
# Picky needs us to define categories on the data.
|
|
37
|
-
#
|
|
38
|
-
# Categories help your user find data.
|
|
39
|
-
# It's best if you look at an example yourself: http://floere.github.com/picky/examples.html
|
|
40
|
-
#
|
|
41
|
-
# Let's go ahead and define a category:
|
|
42
|
-
# class MyGreatSearch < Application
|
|
43
|
-
#
|
|
44
|
-
# books = Index::Memory.new :books do
|
|
45
|
-
# source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
46
|
-
# category :title
|
|
47
|
-
# end
|
|
48
|
-
#
|
|
49
|
-
# end
|
|
50
|
-
# Now we could already run the indexer:
|
|
51
|
-
# $ rake index
|
|
52
|
-
#
|
|
53
|
-
# (You can define similarity or partial search capabilities on a category, see http://github.com/floere/picky/wiki/Categories-configuration for info)
|
|
54
|
-
#
|
|
55
|
-
# So now we have indexed data (the title), but nobody to ask the index anything.
|
|
56
|
-
#
|
|
57
|
-
# == Search.new(*indexes, options = {})
|
|
58
|
-
#
|
|
59
|
-
# We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration):
|
|
60
|
-
# books_search = Search.new books
|
|
61
|
-
#
|
|
62
|
-
# Now we have somebody we can ask about the index. But no external interface.
|
|
63
|
-
#
|
|
64
|
-
# == route(/regexp1/ => search1, /regexp2/ => search2, ...)
|
|
65
|
-
#
|
|
66
|
-
# Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
|
|
67
|
-
# route %r{^/books$} => books_query
|
|
68
|
-
# In full glory:
|
|
69
|
-
# class MyGreatSearch < Application
|
|
70
|
-
#
|
|
71
|
-
# books = Index::Memory.new :books do
|
|
72
|
-
# source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
73
|
-
# category :title
|
|
74
|
-
# end
|
|
75
|
-
#
|
|
76
|
-
# route %r{^/books$} => Search.new(books)
|
|
77
|
-
#
|
|
78
|
-
# end
|
|
79
|
-
# That's it!
|
|
80
|
-
#
|
|
81
|
-
# Now run the indexer and server:
|
|
82
|
-
# $ rake index
|
|
83
|
-
# $ rake start
|
|
84
|
-
# Run your first query:
|
|
85
|
-
# $ curl 'localhost:8080/books?query=hello server'
|
|
86
|
-
#
|
|
87
|
-
# Nice, right? Your first query!
|
|
88
|
-
#
|
|
89
|
-
# Maybe you don't find everything. We need to process the data before it goes into the index.
|
|
90
|
-
#
|
|
91
|
-
# == indexing(options = {})
|
|
92
|
-
#
|
|
93
|
-
# That's what the <tt>indexing</tt> method is for:
|
|
94
|
-
# indexing options
|
|
95
|
-
# Read more about the options here: http://github.com/floere/picky/wiki/Indexing-configuration
|
|
96
|
-
#
|
|
97
|
-
# Same thing with the search text – we need to process that as well.
|
|
98
|
-
#
|
|
99
|
-
# == searching(options = {})
|
|
100
|
-
#
|
|
101
|
-
# Analog to the indexing method, we use the <tt>searching</tt> method.
|
|
102
|
-
# searching options
|
|
103
|
-
# Read more about the options here: http://github.com/floere/picky/wiki/Searching-Configuration
|
|
104
|
-
#
|
|
105
|
-
# And that's all there is. It's incredibly powerful though, as you can combine, weigh, refine to the max.
|
|
106
|
-
#
|
|
107
|
-
# == Wiki
|
|
108
|
-
#
|
|
109
|
-
# Read more in the Wiki: http://github.com/floere/picky/wiki
|
|
110
|
-
#
|
|
111
|
-
# Have fun!
|
|
112
|
-
#
|
|
113
|
-
# == Full example
|
|
114
|
-
#
|
|
115
|
-
# Our example, fully fleshed out with indexing, querying, and weights:
|
|
116
|
-
# class MyGreatSearch < Application
|
|
117
|
-
#
|
|
118
|
-
# indexing removes_characters: /[^a-zA-Z0-9\.]/,
|
|
119
|
-
# stopwords: /\b(and|or|in|on|is|has)\b/,
|
|
120
|
-
# splits_text_on: /\s/,
|
|
121
|
-
# removes_characters_after_splitting: /\./,
|
|
122
|
-
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
|
123
|
-
# normalizes_words: [
|
|
124
|
-
# [/(.*)hausen/, 'hn'],
|
|
125
|
-
# [/\b(\w*)str(eet)?/, 'st']
|
|
126
|
-
# ]
|
|
127
|
-
#
|
|
128
|
-
# searching removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/,
|
|
129
|
-
# stopwords: /\b(and|the|of|it|in|for)\b/,
|
|
130
|
-
# splits_text_on: /[\s\/\-\,\&]+/,
|
|
131
|
-
# removes_characters_after_splitting: /\./,
|
|
132
|
-
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
|
133
|
-
# maximum_tokens: 4
|
|
134
|
-
#
|
|
135
|
-
# books = Index::Memory.new :books do
|
|
136
|
-
# source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
137
|
-
# category :title,
|
|
138
|
-
# qualifiers: [:t, :title, :titre],
|
|
139
|
-
# partial: Partial::Substring.new(:from => 1),
|
|
140
|
-
# similarity: Similarity::DoubleMetaphone.new(2)
|
|
141
|
-
# category :author,
|
|
142
|
-
# partial: Partial::Substring.new(:from => -2)
|
|
143
|
-
# category :isbn
|
|
144
|
-
# end
|
|
145
|
-
#
|
|
146
|
-
# route %r{^/books$} => Search.new(books) do
|
|
147
|
-
# boost [:title, :author] => +3, [:author, :title] => -1
|
|
148
|
-
# end
|
|
149
|
-
#
|
|
150
|
-
# end
|
|
151
|
-
# That's actually already a full-blown Picky App!
|
|
152
|
-
#
|
|
153
|
-
class Application
|
|
1
|
+
module Picky
|
|
154
2
|
|
|
155
|
-
|
|
3
|
+
# = Picky Applications
|
|
4
|
+
#
|
|
5
|
+
# A Picky Application is where you configure the whole search engine.
|
|
6
|
+
#
|
|
7
|
+
# This is a step-by-step description on how to configure your Picky app.
|
|
8
|
+
#
|
|
9
|
+
# Start by subclassing Application:
|
|
10
|
+
# class MyGreatSearch < Application
|
|
11
|
+
# # Your configuration goes here.
|
|
12
|
+
# end
|
|
13
|
+
# The generator
|
|
14
|
+
# $ picky generate unicorn_server project_name
|
|
15
|
+
# will generate an example <tt>project_name/app/application.rb</tt> file for you
|
|
16
|
+
# with some example code inside.
|
|
17
|
+
#
|
|
18
|
+
# == Indexes::Memory.new(name)
|
|
19
|
+
#
|
|
20
|
+
# Next, define where your data comes from, creating an <tt>Index</tt>. You use the <tt>Indexes::Memory.new</tt> method for that:
|
|
21
|
+
# my_index = Indexes::Memory.new :some_index_name
|
|
22
|
+
# You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
|
|
23
|
+
# class MyGreatSearch < Application
|
|
24
|
+
#
|
|
25
|
+
# books = Indexes::Memory.new :books do
|
|
26
|
+
# source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
27
|
+
# end
|
|
28
|
+
#
|
|
29
|
+
# end
|
|
30
|
+
# Now we have an index <tt>books</tt>.
|
|
31
|
+
#
|
|
32
|
+
# That on itself won't do much good.
|
|
33
|
+
#
|
|
34
|
+
# Note that a Redis index is also available: Indexes::Redis.new.
|
|
35
|
+
#
|
|
36
|
+
# == category(identifier, options = {})
|
|
37
|
+
#
|
|
38
|
+
# Picky needs us to define categories on the data.
|
|
39
|
+
#
|
|
40
|
+
# Categories help your user find data.
|
|
41
|
+
# It's best if you look at an example yourself: http://floere.github.com/picky/examples.html
|
|
42
|
+
#
|
|
43
|
+
# Let's go ahead and define a category:
|
|
44
|
+
# class MyGreatSearch < Application
|
|
45
|
+
#
|
|
46
|
+
# books = Indexes::Memory.new :books do
|
|
47
|
+
# source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
48
|
+
# category :title
|
|
49
|
+
# end
|
|
50
|
+
#
|
|
51
|
+
# end
|
|
52
|
+
# Now we could already run the indexer:
|
|
53
|
+
# $ rake index
|
|
54
|
+
#
|
|
55
|
+
# (You can define similarity or partial search capabilities on a category, see http://github.com/floere/picky/wiki/Categories-configuration for info)
|
|
56
|
+
#
|
|
57
|
+
# So now we have indexed data (the title), but nobody to ask the index anything.
|
|
58
|
+
#
|
|
59
|
+
# == Search.new(*indexes, options = {})
|
|
60
|
+
#
|
|
61
|
+
# We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration):
|
|
62
|
+
# books_search = Search.new books
|
|
63
|
+
#
|
|
64
|
+
# Now we have somebody we can ask about the index. But no external interface.
|
|
65
|
+
#
|
|
66
|
+
# == route(/regexp1/ => search1, /regexp2/ => search2, ...)
|
|
67
|
+
#
|
|
68
|
+
# Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
|
|
69
|
+
# route %r{^/books$} => books_query
|
|
70
|
+
# In full glory:
|
|
71
|
+
# class MyGreatSearch < Application
|
|
72
|
+
#
|
|
73
|
+
# books = Indexes::Memory.new :books do
|
|
74
|
+
# source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
75
|
+
# category :title
|
|
76
|
+
# end
|
|
77
|
+
#
|
|
78
|
+
# route %r{^/books$} => Search.new(books)
|
|
79
|
+
#
|
|
80
|
+
# end
|
|
81
|
+
# That's it!
|
|
82
|
+
#
|
|
83
|
+
# Now run the indexer and server:
|
|
84
|
+
# $ rake index
|
|
85
|
+
# $ rake start
|
|
86
|
+
# Run your first query:
|
|
87
|
+
# $ curl 'localhost:8080/books?query=hello server'
|
|
88
|
+
#
|
|
89
|
+
# Nice, right? Your first query!
|
|
90
|
+
#
|
|
91
|
+
# Maybe you don't find everything. We need to process the data before it goes into the index.
|
|
92
|
+
#
|
|
93
|
+
# == indexing(options = {})
|
|
94
|
+
#
|
|
95
|
+
# That's what the <tt>indexing</tt> method is for:
|
|
96
|
+
# indexing options
|
|
97
|
+
# Read more about the options here: http://github.com/floere/picky/wiki/Indexing-configuration
|
|
98
|
+
#
|
|
99
|
+
# Same thing with the search text – we need to process that as well.
|
|
100
|
+
#
|
|
101
|
+
# == searching(options = {})
|
|
102
|
+
#
|
|
103
|
+
# Analog to the indexing method, we use the <tt>searching</tt> method.
|
|
104
|
+
# searching options
|
|
105
|
+
# Read more about the options here: http://github.com/floere/picky/wiki/Searching-Configuration
|
|
106
|
+
#
|
|
107
|
+
# And that's all there is. It's incredibly powerful though, as you can combine, weigh, refine to the max.
|
|
108
|
+
#
|
|
109
|
+
# == Wiki
|
|
110
|
+
#
|
|
111
|
+
# Read more in the Wiki: http://github.com/floere/picky/wiki
|
|
112
|
+
#
|
|
113
|
+
# Have fun!
|
|
114
|
+
#
|
|
115
|
+
# == Full example
|
|
116
|
+
#
|
|
117
|
+
# Our example, fully fleshed out with indexing, querying, and weights:
|
|
118
|
+
# class MyGreatSearch < Application
|
|
119
|
+
#
|
|
120
|
+
# indexing removes_characters: /[^a-zA-Z0-9\.]/,
|
|
121
|
+
# stopwords: /\b(and|or|in|on|is|has)\b/,
|
|
122
|
+
# splits_text_on: /\s/,
|
|
123
|
+
# removes_characters_after_splitting: /\./,
|
|
124
|
+
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
|
125
|
+
# normalizes_words: [
|
|
126
|
+
# [/(.*)hausen/, 'hn'],
|
|
127
|
+
# [/\b(\w*)str(eet)?/, 'st']
|
|
128
|
+
# ]
|
|
129
|
+
#
|
|
130
|
+
# searching removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/,
|
|
131
|
+
# stopwords: /\b(and|the|of|it|in|for)\b/,
|
|
132
|
+
# splits_text_on: /[\s\/\-\,\&]+/,
|
|
133
|
+
# removes_characters_after_splitting: /\./,
|
|
134
|
+
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
|
135
|
+
# maximum_tokens: 4
|
|
136
|
+
#
|
|
137
|
+
# books = Indexes::Memory.new :books do
|
|
138
|
+
# source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
139
|
+
# category :title,
|
|
140
|
+
# qualifiers: [:t, :title, :titre],
|
|
141
|
+
# partial: Partial::Substring.new(:from => 1),
|
|
142
|
+
# similarity: Similarity::DoubleMetaphone.new(2)
|
|
143
|
+
# category :author,
|
|
144
|
+
# partial: Partial::Substring.new(:from => -2)
|
|
145
|
+
# category :isbn
|
|
146
|
+
# end
|
|
147
|
+
#
|
|
148
|
+
# route %r{^/books$} => Search.new(books) do
|
|
149
|
+
# boost [:title, :author] => +3, [:author, :title] => -1
|
|
150
|
+
# end
|
|
151
|
+
#
|
|
152
|
+
# end
|
|
153
|
+
# That's actually already a full-blown Picky App!
|
|
154
|
+
#
|
|
155
|
+
class Application
|
|
156
156
|
|
|
157
|
-
|
|
158
|
-
#
|
|
157
|
+
class << self
|
|
159
158
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
#
|
|
163
|
-
def indexing options = {}
|
|
164
|
-
Tokenizers::Index.default = Tokenizers::Index.new(options)
|
|
165
|
-
end
|
|
166
|
-
alias default_indexing indexing
|
|
159
|
+
# API
|
|
160
|
+
#
|
|
167
161
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
alias default_querying searching
|
|
175
|
-
alias querying searching
|
|
162
|
+
# Returns a configured tokenizer that
|
|
163
|
+
# is used for indexing by default.
|
|
164
|
+
#
|
|
165
|
+
def indexing options = {}
|
|
166
|
+
Tokenizers::Index.default = Tokenizers::Index.new(options)
|
|
167
|
+
end
|
|
176
168
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
169
|
+
# Returns a configured tokenizer that
|
|
170
|
+
# is used for querying by default.
|
|
171
|
+
#
|
|
172
|
+
def searching options = {}
|
|
173
|
+
Tokenizers::Query.default = Tokenizers::Query.new(options)
|
|
174
|
+
end
|
|
180
175
|
|
|
181
|
-
|
|
182
|
-
|
|
176
|
+
# Routes.
|
|
177
|
+
#
|
|
178
|
+
delegate :route, :to => :rack_adapter
|
|
183
179
|
|
|
180
|
+
# A Picky application implements the Rack interface.
|
|
181
|
+
#
|
|
182
|
+
# Delegates to its routing to handle a request.
|
|
183
|
+
#
|
|
184
|
+
def call env
|
|
185
|
+
rack_adapter.call env
|
|
186
|
+
end
|
|
187
|
+
def rack_adapter # :nodoc:
|
|
188
|
+
@rack_adapter || reset_rack_adapter
|
|
189
|
+
end
|
|
190
|
+
def reset_rack_adapter
|
|
191
|
+
@rack_adapter = FrontendAdapters::Rack.new
|
|
192
|
+
end
|
|
184
193
|
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
end
|
|
194
|
+
# Reloads & finalizes the apps.
|
|
195
|
+
#
|
|
196
|
+
def reload
|
|
197
|
+
Loader.load_user 'app' # Sinatra app_file.
|
|
198
|
+
Loader.load_user 'app/logging' # Standard Picky logging.
|
|
199
|
+
Loader.load_user 'app/application' # Standard Picky appfile.
|
|
200
|
+
finalize_apps
|
|
201
|
+
exclaim "Application #{apps.map(&:name).join(', ')} loaded."
|
|
202
|
+
end
|
|
195
203
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
204
|
+
# Finalize the subclass as soon as it
|
|
205
|
+
# has finished loading.
|
|
206
|
+
#
|
|
207
|
+
attr_reader :apps # :nodoc:
|
|
208
|
+
def initialize_apps # :nodoc:
|
|
209
|
+
@apps ||= []
|
|
210
|
+
end
|
|
211
|
+
def inherited app # :nodoc:
|
|
212
|
+
initialize_apps
|
|
213
|
+
apps << app
|
|
214
|
+
end
|
|
215
|
+
def finalize_apps # :nodoc:
|
|
216
|
+
initialize_apps
|
|
217
|
+
apps.each &:finalize
|
|
218
|
+
end
|
|
219
|
+
# Finalizes the routes.
|
|
220
|
+
#
|
|
221
|
+
def finalize # :nodoc:
|
|
222
|
+
check
|
|
223
|
+
rack_adapter.finalize
|
|
224
|
+
end
|
|
225
|
+
# Checks app for missing things.
|
|
226
|
+
#
|
|
227
|
+
# Warns if something is missing.
|
|
228
|
+
#
|
|
229
|
+
def check # :nodoc:
|
|
230
|
+
warnings = []
|
|
231
|
+
warnings << check_external_interface
|
|
232
|
+
warn "\n#{warnings.join(?\n)}\n\n" unless warnings.all? &:nil?
|
|
233
|
+
end
|
|
234
|
+
def check_external_interface
|
|
235
|
+
"WARNING: No routes defined for application configuration in #{self.class}." if rack_adapter.empty?
|
|
236
|
+
end
|
|
229
237
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
\033[1mIndexing (default)\033[m:
|
|
233
|
-
#{Tokenizers::Index.default.indented_to_s}
|
|
238
|
+
def to_stats
|
|
239
|
+
<<-APP
|
|
240
|
+
\033[1mIndexing (default)\033[m:
|
|
241
|
+
#{Tokenizers::Index.default.indented_to_s}
|
|
234
242
|
|
|
235
|
-
\033[1mQuerying (default)\033[m:
|
|
236
|
-
#{Tokenizers::Query.default.indented_to_s}
|
|
243
|
+
\033[1mQuerying (default)\033[m:
|
|
244
|
+
#{Tokenizers::Query.default.indented_to_s}
|
|
237
245
|
|
|
238
|
-
\033[1mIndexes\033[m:
|
|
239
|
-
#{Indexes.to_s.indented_to_s}
|
|
246
|
+
\033[1mIndexes\033[m:
|
|
247
|
+
#{Indexes.to_s.indented_to_s}
|
|
240
248
|
|
|
241
|
-
\033[1mRoutes\033[m:
|
|
242
|
-
#{to_routes.indented_to_s}
|
|
243
|
-
APP
|
|
244
|
-
|
|
249
|
+
\033[1mRoutes\033[m:
|
|
250
|
+
#{to_routes.indented_to_s}
|
|
251
|
+
APP
|
|
252
|
+
end
|
|
245
253
|
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
254
|
+
def to_routes
|
|
255
|
+
rack_adapter.to_s
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def to_s # :nodoc:
|
|
259
|
+
self.name
|
|
260
|
+
end
|
|
249
261
|
|
|
250
|
-
def to_s # :nodoc:
|
|
251
|
-
self.name
|
|
252
262
|
end
|
|
253
263
|
|
|
254
264
|
end
|