picky 0.0.9 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/lib/picky/application.rb +38 -37
  2. data/lib/picky/cacher/partial/default.rb +1 -3
  3. data/lib/picky/cacher/partial/subtoken.rb +44 -18
  4. data/lib/picky/configuration/field.rb +6 -2
  5. data/lib/picky/configuration/indexes.rb +16 -7
  6. data/lib/picky/configuration/queries.rb +3 -13
  7. data/lib/picky/extensions/symbol.rb +19 -4
  8. data/lib/picky/generator.rb +9 -0
  9. data/lib/picky/helpers/measuring.rb +3 -3
  10. data/lib/picky/index/bundle.rb +5 -4
  11. data/lib/picky/index/category.rb +14 -7
  12. data/lib/picky/index/combined.rb +6 -1
  13. data/lib/picky/indexers/no_source_specified_error.rb +2 -0
  14. data/lib/picky/indexes.rb +3 -9
  15. data/lib/picky/query/allocation.rb +1 -1
  16. data/lib/picky/query/allocations.rb +2 -2
  17. data/lib/picky/rack/harakiri.rb +10 -8
  18. data/lib/picky/routing.rb +19 -21
  19. data/lib/picky/solr/schema_generator.rb +4 -4
  20. data/lib/picky/sources/base.rb +16 -4
  21. data/lib/picky/sources/csv.rb +3 -0
  22. data/lib/picky/sources/db.rb +30 -22
  23. data/lib/picky/tokenizers/base.rb +7 -5
  24. data/lib/picky/tokenizers/index.rb +5 -5
  25. data/lib/picky/tokenizers/query.rb +9 -9
  26. data/prototype_project/app/application.rb +36 -29
  27. data/prototype_project/app/db.yml +1 -1
  28. data/prototype_project/config.ru +3 -2
  29. data/spec/ext/performant_spec.rb +2 -2
  30. data/spec/lib/application_spec.rb +54 -8
  31. data/spec/lib/cacher/partial/default_spec.rb +15 -0
  32. data/spec/lib/cacher/partial/subtoken_spec.rb +54 -2
  33. data/spec/lib/extensions/symbol_spec.rb +124 -30
  34. data/spec/lib/index/bundle_partial_generation_speed_spec.rb +1 -1
  35. data/spec/lib/query/allocations_spec.rb +5 -5
  36. data/spec/lib/query/combinations_spec.rb +3 -3
  37. data/spec/lib/rack/harakiri_spec.rb +29 -0
  38. data/spec/lib/routing_spec.rb +22 -98
  39. data/spec/lib/tokenizers/index_spec.rb +1 -1
  40. data/spec/specific/speed_spec.rb +4 -5
  41. metadata +7 -3
@@ -1,5 +1,7 @@
1
1
  module Indexers
2
2
 
3
+ # Raised if no source is available on a field.
4
+ #
3
5
  class NoSourceSpecifiedException < StandardError; end
4
6
 
5
7
  end
data/lib/picky/indexes.rb CHANGED
@@ -171,16 +171,10 @@ module Indexes
171
171
  end
172
172
  end
173
173
 
174
- # Loads all index definitions.
175
- #
176
- def self.setup
177
- self.types ||= []
178
- self.type_mapping ||= {}
179
- configuration.types.each do |type|
180
- add type.generate
181
- end
182
- end
183
174
  def self.add type
175
+ self.type_mapping ||= {}
176
+ self.types ||= []
177
+
184
178
  self.type_mapping[type.name] = type
185
179
  self.types << type
186
180
  end
@@ -61,7 +61,7 @@ module Query
61
61
  # Transform the allocation into result form.
62
62
  #
63
63
  def to_result
64
- [self.result_type, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
64
+ [self.result_type, self.score, count, @combinations.to_result, self.ids] if count = self.count > 0
65
65
  end
66
66
 
67
67
  # Json representation of this allocation.
@@ -66,8 +66,8 @@ module Query
66
66
  # TODO can there be no @allocations???
67
67
  return [] if @allocations.empty?
68
68
  ids = @allocations.first.ids
69
- indexes = Array.new(ids.size) { |i| i }.sort_by { rand }
70
- indexes.first(amount).map { |i| ids[i] }
69
+ indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
70
+ indexes.first(amount).map { |id| ids[id] }
71
71
  end
72
72
 
73
73
  # This is the main method of this class that will replace ids and count.
@@ -1,22 +1,24 @@
1
- # Simple Rack Middleware to kill Unicorns after X requests.
2
- #
3
- # Use as follows in e.g. your rackup File:
4
- #
5
- # Rack::Harakiri.after = 50
6
- # use Rack::Harakiri
7
- #
8
1
  module Rack
2
+
3
+ # Simple Rack Middleware to kill Unicorns after X requests.
4
+ #
5
+ # Use as follows in e.g. your rackup File:
6
+ #
7
+ # Rack::Harakiri.after = 50
8
+ # use Rack::Harakiri
9
+ #
9
10
  class Harakiri
10
11
 
11
12
  # Set the amount of requests before the Unicorn commits Harakiri.
12
13
  #
13
14
  cattr_accessor :after
15
+ attr_reader :quit_after_requests
14
16
 
15
17
  def initialize app
16
18
  @app = app
17
19
 
18
20
  @requests = 0
19
- @quit_after_requests = @@after || 50
21
+ @quit_after_requests = self.class.after || 50
20
22
  end
21
23
 
22
24
  def call env
data/lib/picky/routing.rb CHANGED
@@ -14,14 +14,6 @@ class Routing
14
14
  @defaults = @@defaults.dup
15
15
  end
16
16
 
17
- # #
18
- # #
19
- # def define_using &block
20
- # reset_routes
21
- # instance_eval &block
22
- # routes.freeze
23
- # end
24
-
25
17
  #
26
18
  #
27
19
  def reset_routes
@@ -60,21 +52,27 @@ class Routing
60
52
 
61
53
  #
62
54
  #
63
- def route url, query, route_options = {}
64
- query.tokenizer = @defaults[:tokenizer] if @defaults[:tokenizer]
65
- routes.add_route generate_app(query, route_options), default_options(url, route_options)
55
+ def route options = {}
56
+ mappings, route_options = split options
57
+ mappings.each do |url, query|
58
+ route_one url, query, route_options
59
+ end
66
60
  end
67
- #
68
- #
69
- def live url, *indexes_and_options
70
- route_options = Hash === indexes_and_options.last ? indexes_and_options.pop : {}
71
- route url, Query::Live.new(*indexes_and_options), route_options
61
+ def split options
62
+ mappings = {}
63
+ route_options = {}
64
+ options.each_pair do |key, value|
65
+ if Regexp === key or String === key
66
+ mappings[key] = value
67
+ else
68
+ route_options[key] = value
69
+ end
70
+ end
71
+ [mappings, route_options]
72
72
  end
73
- #
74
- #
75
- def full url, *indexes_and_options
76
- route_options = Hash === indexes_and_options.last ? indexes_and_options.pop : {}
77
- route url, Query::Full.new(*indexes_and_options), route_options
73
+ def route_one url, query, route_options = {}
74
+ query.tokenizer = @defaults[:tokenizer] if @defaults[:tokenizer]
75
+ routes.add_route generate_app(query, route_options), default_options(url, route_options)
78
76
  end
79
77
  #
80
78
  #
@@ -54,8 +54,8 @@ module Solr
54
54
  def read_template
55
55
  template_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml.erb'
56
56
  schema = ''
57
- File.open(template_path, 'r') do |f|
58
- schema = f.read
57
+ File.open(template_path, 'r') do |file|
58
+ schema = file.read
59
59
  end
60
60
  schema
61
61
  end
@@ -64,8 +64,8 @@ module Solr
64
64
  #
65
65
  def write result
66
66
  schema_path = File.join PICKY_ROOT, 'solr', 'conf', 'schema.xml'
67
- File.open(schema_path, 'w') do |f|
68
- f << result
67
+ File.open(schema_path, 'w') do |file|
68
+ file << result
69
69
  end
70
70
  end
71
71
 
@@ -1,16 +1,28 @@
1
1
  module Sources
2
2
 
3
+ # Sources are where your data comes from.
4
+ # Harvest is the most important method as it is used always to get data.
5
+ #
3
6
  class Base
4
7
 
5
- def connect_backend
6
-
8
+ # Note: Methods listed for illustrative purposes.
9
+ #
10
+
11
+ # Yield the data (id, text for id) for the given type and field.
12
+ #
13
+ def harvest type, field
14
+ # yields nothing
7
15
  end
8
16
 
9
- def take_snapshot type
17
+ # Connects to the backend.
18
+ #
19
+ def connect_backend
10
20
 
11
21
  end
12
22
 
13
- def harvest type, field
23
+ # Take a snapshot of your data, if it is fast changing.
24
+ #
25
+ def take_snapshot type
14
26
 
15
27
  end
16
28
 
@@ -2,6 +2,9 @@ require 'csv'
2
2
 
3
3
  module Sources
4
4
 
5
+ # Describes a CSV source, a file with csv in it.
6
+ # Give it a sequence of field names and a file option with the filename.
7
+ #
5
8
  class NoCSVFileGiven < StandardError; end
6
9
 
7
10
  class CSV < Base
@@ -1,13 +1,16 @@
1
1
  module Sources
2
2
 
3
+ # Describes a database source. Just give it a select statement
4
+ # (with id in it), and a file option or the options from an AR config file.
5
+ #
3
6
  class DB < Base
4
7
 
5
8
  attr_reader :select_statement, :database, :connection_options
6
9
 
7
- def initialize select_statement, with_options = { :file => 'app/db.yml' }
10
+ def initialize select_statement, options = { :file => 'app/db.yml' }
8
11
  @select_statement = select_statement
9
12
  @database = create_database_adapter
10
- configure with_options
13
+ @options = options
11
14
  end
12
15
 
13
16
  # Get a configured Database backend.
@@ -34,7 +37,7 @@ module Sources
34
37
  #
35
38
  def configure options
36
39
  @connection_options = if filename = options[:file]
37
- File.open(File.join(PICKY_ROOT, filename)) { |f| YAML::load(f) }
40
+ File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
38
41
  else
39
42
  options
40
43
  end
@@ -44,6 +47,7 @@ module Sources
44
47
  # Connect the backend.
45
48
  #
46
49
  def connect_backend
50
+ configure @options
47
51
  return if PICKY_ENVIRONMENT.to_s == 'test' # TODO Unclean.
48
52
  raise "Database backend not configured" unless connection_options
49
53
  database.establish_connection connection_options
@@ -55,15 +59,17 @@ module Sources
55
59
  connect_backend
56
60
 
57
61
  origin = snapshot_table_name type
58
-
59
- database.connection.execute "DROP TABLE IF EXISTS #{origin}"
60
- database.connection.execute "CREATE TABLE #{origin} AS #{select_statement}"
61
- database.connection.execute "ALTER TABLE #{origin} CHANGE COLUMN id indexed_id INTEGER"
62
- database.connection.execute "ALTER TABLE #{origin} ADD COLUMN id INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT"
63
-
62
+
63
+ on_database = database.connection
64
+
65
+ on_database.execute "DROP TABLE IF EXISTS #{origin}"
66
+ on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
67
+ on_database.execute "ALTER TABLE #{origin} CHANGE COLUMN id indexed_id INTEGER"
68
+ on_database.execute "ALTER TABLE #{origin} ADD COLUMN id INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT"
69
+
64
70
  # Execute any special queries this type needs executed.
65
71
  #
66
- database.connection.execute type.after_indexing if type.after_indexing
72
+ on_database.execute type.after_indexing if type.after_indexing
67
73
  end
68
74
 
69
75
  # Counts all the entries that are used for the index.
@@ -86,6 +92,8 @@ module Sources
86
92
  # Example:
87
93
  # "SELECT indexed_id, value FROM bla_table st WHERE kind = 'bla'"
88
94
  #
95
+ # TODO Perhaps it should be just harvest field.
96
+ #
89
97
  def harvest type, field
90
98
  connect_backend
91
99
 
@@ -98,24 +106,12 @@ module Sources
98
106
  end
99
107
  end
100
108
 
101
- # Override in subclasses.
102
- #
103
- def chunksize
104
- 25_000
105
- end
106
-
107
109
  # Gets database from the backend.
108
110
  #
109
111
  def get_data type, field, offset
110
112
  database.connection.execute harvest_statement_with_offset(type, field, offset)
111
113
  end
112
114
 
113
- # Base harvest statement for dbs.
114
- #
115
- def harvest_statement type, field
116
- "SELECT indexed_id, #{field.name} FROM #{snapshot_table_name(type)} st"
117
- end
118
-
119
115
  # Builds a harvest statement for getting data to index.
120
116
  #
121
117
  # TODO Use the adapter for this.
@@ -128,6 +124,18 @@ module Sources
128
124
  "#{statement} st.id > #{offset} LIMIT #{chunksize}"
129
125
  end
130
126
 
127
+ # Base harvest statement for dbs.
128
+ #
129
+ def harvest_statement type, field
130
+ "SELECT indexed_id, #{field.name} FROM #{snapshot_table_name(type)} st"
131
+ end
132
+
133
+ # Override in subclasses.
134
+ #
135
+ def chunksize
136
+ 25_000
137
+ end
138
+
131
139
  end
132
140
 
133
141
  end
@@ -1,5 +1,7 @@
1
1
  module Tokenizers
2
2
 
3
+ # Defines tokenizing processes used both in indexing and querying.
4
+ #
3
5
  class Base
4
6
 
5
7
  # Stopwords.
@@ -21,7 +23,7 @@ module Tokenizers
21
23
 
22
24
  # Contraction.
23
25
  #
24
- def self.contract_expressions what, to_what
26
+ def self.contracts_expressions what, to_what
25
27
  define_method :contract do |text|
26
28
  text.gsub! what, to_what
27
29
  end
@@ -32,7 +34,7 @@ module Tokenizers
32
34
  #
33
35
  # TODO Should there be a legal?
34
36
  #
35
- def self.illegal_characters regexp
37
+ def self.removes_characters regexp
36
38
  define_method :remove_illegals do |text|
37
39
  text.gsub! regexp, ''
38
40
  end
@@ -41,7 +43,7 @@ module Tokenizers
41
43
 
42
44
  # Splitting.
43
45
  #
44
- def self.split_text_on regexp
46
+ def self.splits_text_on regexp
45
47
  define_method :split do |text|
46
48
  text.split regexp
47
49
  end
@@ -50,7 +52,7 @@ module Tokenizers
50
52
 
51
53
  # Normalizing.
52
54
  #
53
- def self.normalize_words regexp_replaces
55
+ def self.normalizes_words regexp_replaces
54
56
  define_method :normalize_with_patterns do |text|
55
57
  regexp_replaces.each do |regex, replace|
56
58
  # This should be sufficient
@@ -65,7 +67,7 @@ module Tokenizers
65
67
 
66
68
  # Illegal after normalizing.
67
69
  #
68
- def self.illegal_characters_after_splitting regexp
70
+ def self.removes_characters_after_splitting regexp
69
71
  define_method :remove_after_normalizing_illegals do |text|
70
72
  text.gsub! regexp, ''
71
73
  end
@@ -9,12 +9,12 @@ module Tokenizers
9
9
 
10
10
  # Default handling definitions. Override in config.
11
11
  #
12
- illegal_characters(//)
12
+ removes_characters(//)
13
13
  stopwords(//)
14
- contract_expressions(//, '')
15
- split_text_on(/\s/)
16
- normalize_words([])
17
- illegal_characters_after_splitting(//)
14
+ contracts_expressions(//, '')
15
+ splits_text_on(/\s/)
16
+ normalizes_words([])
17
+ removes_characters_after_splitting(//)
18
18
 
19
19
  # Default indexing preprocessing hook.
20
20
  #
@@ -4,12 +4,12 @@ module Tokenizers
4
4
 
5
5
  # There are a few class methods that you can use to configure how a query works.
6
6
  #
7
- # illegal_characters regexp
7
+ # removes_characters regexp
8
8
  # illegal_after_normalizing regexp
9
9
  # stopwords regexp
10
- # contract_expressions regexp, to_string
11
- # split_text_on regexp
12
- # normalize_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
10
+ # contracts_expressions regexp, to_string
11
+ # splits_text_on regexp
12
+ # normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
13
13
  #
14
14
  class Query < Base
15
15
 
@@ -17,12 +17,12 @@ module Tokenizers
17
17
 
18
18
  # Default query tokenizer behaviour. Override in config.
19
19
  #
20
- illegal_characters(//)
20
+ removes_characters(//)
21
21
  stopwords(//)
22
- contract_expressions(//, '')
23
- split_text_on(/\s/)
24
- normalize_words([])
25
- illegal_characters_after_splitting(//)
22
+ contracts_expressions(//, '')
23
+ splits_text_on(/\s/)
24
+ normalizes_words([])
25
+ removes_characters_after_splitting(//)
26
26
 
27
27
  def preprocess text
28
28
  remove_illegals text # Remove illegal characters
@@ -4,43 +4,50 @@
4
4
  #
5
5
  # Have fun with Picky!
6
6
  #
7
- class PickySearch < Application # The App Constant needs to be identical in application.ru.
7
+ class PickySearch < Application # The App Constant needs to be identical in config.ru.
8
8
 
9
9
  # This is an example with books that you can adapt.
10
10
  #
11
11
  # Note: Much more is possible, but let's start out super easy.
12
12
  #
13
- # Ask me if you have questions or specific requests!
13
+ # Ask me if you have questions or specific requests.
14
14
  #
15
15
 
16
- indexes do
17
- illegal_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
18
- stopwords(/\b(and|the|of|it|in|for)\b/)
19
- split_text_on(/[\s\/\-\"\&\.]/)
16
+ indexing.removes_characters(/[^a-zA-Z0-9\s\/\-\"\&\.]/)
17
+ indexing.stopwords(/\b(and|the|of|it|in|for)\b/)
18
+ indexing.splits_text_on(/[\s\/\-\"\&\.]/)
20
19
 
21
- add_index :books,
22
- Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
23
- field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
24
- field(:author),
25
- field(:isbn, :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
26
- end
20
+ books_index = index :books,
21
+ Sources::DB.new('SELECT id, title, author, isbn13 as isbn FROM books', :file => 'app/db.yml'),
22
+ field(:title, :similarity => Similarity::DoubleLevenshtone.new(3)), # Up to three similar title word indexed.
23
+ field(:author),
24
+ field(:isbn, :partial => Partial::None.new) # Partially searching on an ISBN makes not much sense.
27
25
 
28
- queries do
29
- maximum_tokens 5
30
- # Note that Picky needs the following characters to
31
- # pass through, as they are control characters: *"~:
32
- #
33
- illegal_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
34
- stopwords(/\b(and|the|of|it|in|for)\b/)
35
- split_text_on(/[\s\/\-\,\&]+/)
36
-
37
- # The example defines two queries that use the same index(es).
38
- #
39
- # A Full query returns ids, combinations, and counts.
40
- # A Live query does return all that Full returns, without ids.
41
- #
42
- route %r{^/books/full}, Query::Full.new(Indexes[:books])
43
- route %r{^/books/live}, Query::Live.new(Indexes[:books])
44
- end
26
+ # Defines the maximum tokens (words) that pass through to the engine.
27
+ #
28
+ querying.maximum_tokens 5
29
+
30
+ # Note that Picky needs the following characters to
31
+ # pass through, as they are control characters: *"~:
32
+ #
33
+ querying.removes_characters(/[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/)
34
+ querying.stopwords(/\b(and|the|of|it|in|for)\b/)
35
+ querying.splits_text_on(/[\s\/\-\,\&]+/)
36
+
37
+ # The example defines two queries that use the same index(es).
38
+ #
39
+ # A Full query returns ids, combinations, and counts.
40
+ # A Live query does return all that Full returns, without ids.
41
+ #
42
+ # Note: You can pass a query multiple indexes and it will combine them.
43
+ #
44
+ full_books = Query::Full.new books_index
45
+ live_books = Query::Live.new books_index
46
+
47
+ # Routing is simple.
48
+ # A path regexp pointing to a query that will be run.
49
+ #
50
+ route %r{^/books/full} => full_books
51
+ route %r{^/books/live} => live_books
45
52
 
46
53
  end