picky 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/picky CHANGED
@@ -2,13 +2,13 @@
2
2
  #
3
3
 
4
4
  begin
5
- require 'picky/generator'
5
+ require 'picky/cli'
6
6
  rescue LoadError => e
7
7
  require 'rubygems'
8
8
  picky_path = File.expand_path '../../lib', __FILE__
9
9
  $:.unshift(picky_path) if File.directory?(picky_path) && !$:.include?(picky_path)
10
- require 'picky/generator'
10
+ require 'picky/cli'
11
11
  end
12
12
 
13
- generator = Picky::Generator.new
14
- generator.generate ARGV
13
+ cli = Picky::CLI.new
14
+ cli.execute *ARGV
@@ -9,7 +9,7 @@
9
9
  # # Your configuration goes here.
10
10
  # end
11
11
  # The generator
12
- # $ picky project project_name
12
+ # $ picky generate unicorn_server project_name
13
13
  # will generate an example <tt>project_name/app/application.rb</tt> file for you
14
14
  # with some example code inside.
15
15
  #
data/lib/picky/cli.rb ADDED
@@ -0,0 +1,58 @@
1
+ module Picky
2
+
3
+ # A very simple CLI selector.
4
+ #
5
+ class CLI # :nodoc:all
6
+
7
+ def self.mapping
8
+ @@mapping
9
+ end
10
+
11
+ # Execute a command.
12
+ #
13
+ # Note: By default, help is displayed. I.e. when no command is given.
14
+ #
15
+ def execute selector = nil, *args
16
+ executor_class, *params = selector && @@mapping[selector.to_sym] || Help
17
+ executor = executor_class.new
18
+ executor.execute selector, args, params
19
+ end
20
+
21
+ class Base
22
+ def usage name, params
23
+ puts "Usage\n picky #{name} #{params_to_s(params)}"
24
+ end
25
+ def params_to_s params
26
+ params.map { |param| "<#{param}>" }.join(' ') if params
27
+ end
28
+ end
29
+ class Generate < Base
30
+ def execute name, args, params
31
+ system "picky-generate #{args.join(' ')}"
32
+ end
33
+ end
34
+ class Help < Base
35
+ # Displays usage information.
36
+ #
37
+ def execute name, args, params
38
+ commands = Picky::CLI.mapping.map do |command, object_and_params|
39
+ _, *params = object_and_params
40
+ " picky #{command} #{params_to_s(params)}"
41
+ end.join(?\n)
42
+
43
+ puts "Possible commands:\n" + commands
44
+ end
45
+ end
46
+
47
+ # Maps commands to the other gem's command.
48
+ #
49
+ # TODO Add optional params.
50
+ #
51
+ @@mapping = {
52
+ :generate => [Generate, 'thing_to_generate: e.g. "unicorn_server"', :parameters],
53
+ :help => [Help]
54
+ }
55
+
56
+ end
57
+
58
+ end
@@ -35,6 +35,8 @@ class Array # :nodoc:all
35
35
  def sort_by_levenshtein! from
36
36
  from = from.to_s
37
37
  sort! do |this, that|
38
+ # TODO Cache for speed?
39
+ #
38
40
  Text::Levenshtein.distance(this.to_s, from) <=> Text::Levenshtein.distance(that.to_s, from)
39
41
  end
40
42
  end
data/lib/picky/loader.rb CHANGED
@@ -260,10 +260,6 @@ module Loader # :nodoc:all
260
260
  #
261
261
  # load_relative 'solr/schema_generator'
262
262
  load_relative 'cores'
263
-
264
- # Load generation.
265
- #
266
- load_relative 'generator'
267
263
  end
268
264
 
269
265
  end
@@ -47,6 +47,8 @@ module Query
47
47
  # [c,e]
48
48
  # ]
49
49
  #
50
+ # TODO Use transpose?
51
+ #
50
52
  expanded_combinations = expanded_combinations.shift.zip *expanded_combinations
51
53
 
52
54
  # Wrap into a real combination.
@@ -38,7 +38,7 @@ module Query
38
38
  # Note: Cache this if more complicated weighings become necessary.
39
39
  #
40
40
  def score combinations
41
- # TODO Beautify?
41
+ # TODO Beautify? Use categories for weights?
42
42
  #
43
43
  # weight_for combinations.map(&:category).clustered_uniq_fast.map!(&:name)
44
44
 
@@ -107,22 +107,23 @@ module Sources
107
107
 
108
108
  # Harvests the data to index in chunks.
109
109
  #
110
- def harvest type, category
110
+ def harvest type, category, &block
111
111
  connect_backend
112
112
 
113
113
  (0..count(type)).step(chunksize) do |offset|
114
- get_data(type, category, offset).each do |indexed_id, text|
115
- next unless text
116
- text.force_encoding 'utf-8' # TODO Still needed?
117
- yield indexed_id, text
118
- end
114
+ get_data type, category, offset, &block
119
115
  end
120
116
  end
121
117
 
122
118
  # Gets the data from the backend.
123
119
  #
124
- def get_data type, category, offset # :nodoc:
125
- database.connection.execute harvest_statement_with_offset(type, category, offset)
120
+ def get_data type, category, offset, &block # :nodoc:
121
+ select_statement = harvest_statement_with_offset(type, category, offset)
122
+ database.connection.execute(select_statement).each do |indexed_id, text|
123
+ next unless text
124
+ text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
125
+ yield indexed_id, text
126
+ end
126
127
  end
127
128
 
128
129
  # Builds a harvest statement for getting data to index.
@@ -4,8 +4,9 @@ module Tokenizers # :nodoc:all
4
4
  #
5
5
  class Base
6
6
 
7
- # TODO use frozen EMPTY_STRING for ''
7
+ # TODO Move EMPTY_STRING top level.
8
8
  #
9
+ EMPTY_STRING = ''.freeze
9
10
 
10
11
  # Stopwords.
11
12
  #
@@ -13,7 +14,7 @@ module Tokenizers # :nodoc:all
13
14
  @remove_stopwords_regexp = regexp
14
15
  end
15
16
  def remove_stopwords text
16
- text.gsub! @remove_stopwords_regexp, '' if @remove_stopwords_regexp
17
+ text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
17
18
  text
18
19
  end
19
20
  @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
@@ -30,7 +31,7 @@ module Tokenizers # :nodoc:all
30
31
  @removes_characters_regexp = regexp
31
32
  end
32
33
  def remove_illegals text
33
- text.gsub! @removes_characters_regexp, '' if @removes_characters_regexp
34
+ text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
34
35
  text
35
36
  end
36
37
 
@@ -66,7 +67,7 @@ module Tokenizers # :nodoc:all
66
67
  @removes_characters_after_splitting_regexp = regexp
67
68
  end
68
69
  def remove_after_normalizing_illegals text
69
- text.gsub! @removes_characters_after_splitting_regexp, '' if @removes_characters_after_splitting_regexp
70
+ text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
70
71
  end
71
72
 
72
73
  # Substitute Characters with this substituter.
@@ -1,4 +1,5 @@
1
1
  module Tokenizers
2
+
2
3
  # The base indexing tokenizer.
3
4
  #
4
5
  # Override in indexing subclasses and define in configuration.
@@ -15,11 +16,10 @@ module Tokenizers
15
16
  # Default indexing preprocessing hook.
16
17
  #
17
18
  # Does:
18
- # 1. Umlaut substitution.
19
- # 2. Downcasing.
20
- # 3. Remove illegal expressions.
21
- # 4. Contraction.
22
- # 5. Remove non-single stopwords. (Stopwords that occur with other words)
19
+ # 1. Character substitution.
20
+ # 2. Downcasing.
21
+ # 3. Remove illegal expressions.
22
+ # 4. Remove non-single stopwords. (Stopwords that occur with other words)
23
23
  #
24
24
  def preprocess text
25
25
  text = substitute_characters text
@@ -27,7 +27,7 @@ module Tokenizers
27
27
  remove_illegals text
28
28
  # we do not remove single stopwords for an entirely different
29
29
  # reason than in the query tokenizer.
30
- # An indexed thing with just name "UND" (a stopword) should not lose its name.
30
+ # An indexed thing with just name "UND" (a possible stopword) should not lose its name.
31
31
  #
32
32
  remove_non_single_stopwords text
33
33
  text
@@ -36,8 +36,8 @@ module Tokenizers
36
36
  # Default indexing pretokenizing hook.
37
37
  #
38
38
  # Does:
39
- # 1. Split the text into words.
40
- # 2. Normalize each word.
39
+ # 1. Split the text into words.
40
+ # 2. Normalize each word.
41
41
  #
42
42
  # TODO Rename into wordize? Or somesuch?
43
43
  #
@@ -60,7 +60,7 @@ module Tokenizers
60
60
  #
61
61
  # Override in subclasses to redefine behaviour.
62
62
  #
63
- # TODO Make parametrizable! reject { |token| }
63
+ # TODO TODO TODO Make parametrizable! reject { |token| }
64
64
  #
65
65
  def reject tokens
66
66
  tokens.reject! &:blank?
@@ -14,7 +14,6 @@ namespace :server do
14
14
  # desc "Start the unicorns. (Wehee!)"
15
15
  task :start => :framework do
16
16
  chdir_to_root
17
- # Rake::Task[:"solr:start"].invoke # TODO Move to better place.
18
17
  daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
19
18
  command = "export PICKY_ENV=#{PICKY_ENVIRONMENT}; unicorn -c unicorn.ru #{daemonize}".strip
20
19
  puts "Running \`#{command}\`."
@@ -24,7 +23,6 @@ namespace :server do
24
23
  # desc "Stop the unicorns. (Blam!)"
25
24
  task :stop => :framework do
26
25
  `kill -QUIT #{current_pid}` if current_pid
27
- # Rake::Task[:"solr:stop"].invoke # TODO Move to better place.
28
26
  end
29
27
 
30
28
  # desc "Restart the unicorns."
@@ -4,6 +4,7 @@ require 'spec_helper'
4
4
  describe Rack::Harakiri do
5
5
  before(:each) do
6
6
  @app = stub :app
7
+ Process.stub! :kill # not taking any chances
7
8
  end
8
9
  context "defaults" do
9
10
  before(:each) do
@@ -17,13 +17,33 @@ describe Sources::DB do
17
17
  end
18
18
 
19
19
  describe "get_data" do
20
- it "delegates" do
21
- type = stub :type, :name => :some_type
22
- category = stub :category, :from => :some_category
23
-
24
- @connection.should_receive(:execute).once.with 'SELECT indexed_id, some_category FROM some_type_type_index st WHERE st.id > some_offset LIMIT 25000'
25
-
26
- @source.get_data type, category, :some_offset
20
+ before(:each) do
21
+ @type = stub :type, :name => :some_type
22
+ @category = stub :category, :from => :some_category
23
+ end
24
+ context 'no data' do
25
+ it "delegates to the connection" do
26
+
27
+ @connection.should_receive(:execute).
28
+ once.
29
+ with('SELECT indexed_id, some_category FROM some_type_type_index st WHERE st.id > some_offset LIMIT 25000').
30
+ and_return []
31
+
32
+ @source.get_data @type, @category, :some_offset
33
+ end
34
+ end
35
+ context 'with data' do
36
+ it 'yields to the caller' do
37
+ @connection.should_receive(:execute).
38
+ any_number_of_times.
39
+ with('SELECT indexed_id, some_category FROM some_type_type_index st WHERE st.id > some_offset LIMIT 25000').
40
+ and_return [[1, 'text']]
41
+
42
+ @source.get_data @type, @category, :some_offset do |id, text|
43
+ id.should == 1
44
+ text.should == 'text'
45
+ end
46
+ end
27
47
  end
28
48
  end
29
49
 
@@ -67,48 +87,22 @@ describe Sources::DB do
67
87
  end
68
88
  end
69
89
 
70
- # TODO Redo.
71
- #
72
- # describe "harvest" do
73
- # before(:each) do
74
- # @source.stub! :harvest_statement_with_offset
75
- # end
76
- # context 'expectations' do
77
- # before(:each) do
78
- # @connection.stub! :execute => []
79
- # @connection.stub! :select_value
80
- # end
81
- # after(:each) do
82
- # @source.harvest :type_name, :some_field
83
- # end
84
- # context "with WHERE" do
85
- # before(:each) do
86
- # @source.stub! :select_statement => 'bla WHERE blu'
87
- # end
88
- # it "should connect" do
89
- # @source.should_receive(:connect_backend).once.with
90
- # end
91
- # it "should call the harvest statement with an offset" do
92
- # @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
93
- # end
94
- # end
95
- # context "without WHERE" do
96
- # it "should connect" do
97
- # @adapter.should_receive(:connect).once.with
98
- # end
99
- # it "should call the harvest statement with an offset" do
100
- # @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
101
- # end
102
- # end
103
- # end
104
- # context 'returns' do
105
- # it "should return whatever the execute statement returns" do
106
- # @connection.stub! :execute => :some_result
107
- #
108
- # @source.harvest(:some_type, :some_field).should == :some_result
109
- # end
110
- # end
111
- # end
90
+ describe 'harvest' do
91
+ before(:each) do
92
+ @type = stub :type, :name => :some_type
93
+ @category = stub :category, :name => :some_category
94
+
95
+ @source.should_receive(:get_data).any_number_of_times.and_return [[:some_id, 'some_text']].cycle
96
+ @source.stub! :count => 17
97
+ end
98
+ it 'calls connect_backend' do
99
+ @source.should_receive(:connect_backend).once.with()
100
+
101
+ @source.harvest @type, @category do |id, text|
102
+ p [id, text]
103
+ end
104
+ end
105
+ end
112
106
 
113
107
  describe "harvest_statement_with_offset" do
114
108
  before(:each) do
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
+ - 1
7
8
  - 0
8
- - 0
9
- version: 1.0.0
9
+ version: 1.1.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-09 00:00:00 +01:00
17
+ date: 2010-12-12 00:00:00 +01:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -63,6 +63,7 @@ files:
63
63
  - lib/picky/cacher/weights_generator.rb
64
64
  - lib/picky/calculations/location.rb
65
65
  - lib/picky/character_substituters/west_european.rb
66
+ - lib/picky/cli.rb
66
67
  - lib/picky/configuration/index.rb
67
68
  - lib/picky/cores.rb
68
69
  - lib/picky/ext/maybe_compile.rb
@@ -72,7 +73,6 @@ files:
72
73
  - lib/picky/extensions/module.rb
73
74
  - lib/picky/extensions/object.rb
74
75
  - lib/picky/extensions/symbol.rb
75
- - lib/picky/generator.rb
76
76
  - lib/picky/helpers/cache.rb
77
77
  - lib/picky/helpers/gc.rb
78
78
  - lib/picky/helpers/measuring.rb
@@ -147,19 +147,6 @@ files:
147
147
  - lib/tasks/statistics.rake
148
148
  - lib/tasks/try.rake
149
149
  - lib/picky/ext/ruby19/performant.c
150
- - project_prototype/app/application.rb
151
- - project_prototype/app/db.yml
152
- - project_prototype/app/library.csv
153
- - project_prototype/app/logging.rb
154
- - project_prototype/app/README
155
- - project_prototype/config.ru
156
- - project_prototype/Gemfile
157
- - project_prototype/log/README
158
- - project_prototype/Rakefile
159
- - project_prototype/script/console
160
- - project_prototype/tmp/pids/README
161
- - project_prototype/tmp/README
162
- - project_prototype/unicorn.ru
163
150
  - spec/ext/performant_spec.rb
164
151
  - spec/lib/application_spec.rb
165
152
  - spec/lib/cacher/cacher_strategy_spec.rb
@@ -1,198 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- require 'fileutils'
4
-
5
- module Picky
6
-
7
- # Thrown when no generator for the command
8
- # picky <command> <options>
9
- # is found.
10
- #
11
- class NoGeneratorError < StandardError # :nodoc:all
12
-
13
- def initialize generator
14
- super usage + possible_commands(generator.types)
15
- end
16
-
17
- def usage
18
- "\nUsage:\n" +
19
- "picky <command> <params>\n" +
20
- ?\n
21
- end
22
-
23
- def possible_commands types
24
- "Possible commands:\n" +
25
- types.map do |name, klass_params|
26
- result = "picky #{name}"
27
- _, params = *klass_params
28
- result << ' ' << [*params].map { |param| "<#{param}>" }.join(' ') if params
29
- result
30
- end.join(?\n) + ?\n
31
- end
32
-
33
- end
34
-
35
- # This is a very simple project generator.
36
- # Not at all like Padrino's or Rails'.
37
- # (No diss, just by way of a faster explanation)
38
- #
39
- # Basically copies a prototype project into a newly generated directory.
40
- #
41
- class Generator # :nodoc:all
42
-
43
- attr_reader :types
44
-
45
- def initialize
46
- @types = {
47
- project: [Project, :project_name]
48
- }
49
- end
50
-
51
- # Run the generators with this command.
52
- #
53
- # This will "route" the commands to the right specific generator.
54
- #
55
- def generate args
56
- generator = generator_for *args
57
- generator.generate
58
- end
59
-
60
- #
61
- #
62
- def generator_for identifier, *args
63
- generator_info = types[identifier.to_sym]
64
- raise NoGeneratorError.new(self) unless generator_info
65
- generator_class = generator_info.first
66
- generator_for_class generator_class, identifier, *args
67
- end
68
-
69
- #
70
- #
71
- def generator_for_class klass, *args
72
- klass.new *args
73
- end
74
-
75
- # Generates a new Picky project.
76
- #
77
- # Example:
78
- # > picky project my_lovely_project
79
- #
80
- class Project
81
-
82
- attr_reader :name, :project_prototype_basedir
83
-
84
- def initialize identifier, name, *args
85
- @name = name
86
- @project_prototype_basedir = File.expand_path '../../../project_prototype', __FILE__
87
- end
88
-
89
- #
90
- #
91
- def generate
92
- exclaim "Setting up Picky project \"#{name}\"."
93
- create_target_directory
94
- copy_all_files
95
- exclaim "\"#{name}\" is a great project name! Have fun :)\n"
96
- exclaim ""
97
- exclaim "Next steps:"
98
- exclaim "1. cd #{name}"
99
- exclaim "2. bundle install"
100
- exclaim "3. rake index"
101
- exclaim "4. rake start"
102
- exclaim "5. rake # (optional) shows you where Picky needs input from you"
103
- exclaim " # if you want to define your own search."
104
- end
105
-
106
- #
107
- #
108
- def create_target_directory
109
- if File.exists?(target_directory)
110
- exists target_directory
111
- else
112
- FileUtils.mkdir target_directory
113
- created target_directory
114
- end
115
- end
116
-
117
- #
118
- #
119
- def copy_all_files
120
- all_prototype_files.each do |filename|
121
- next if filename.match(/\.textile$/)
122
- copy_single_file filename
123
- end
124
- end
125
-
126
- #
127
- #
128
- def target_filename_for filename
129
- filename.gsub(%r{#{project_prototype_basedir}}, target_directory)
130
- end
131
- #
132
- #
133
- def copy_single_file filename
134
- target = target_filename_for filename
135
- if File.exists? target
136
- exists target
137
- else
138
- smart_copy filename, target
139
- end
140
- end
141
-
142
- # Well, "smart" ;)
143
- #
144
- def smart_copy filename, target
145
- # p "Trying to copy #{filename} -> #{target}"
146
- FileUtils.copy_file filename, target
147
- created target
148
- rescue Errno::EISDIR
149
- # p "EISDIR #{filename} -> #{target}"
150
- FileUtils.rm target
151
- FileUtils.mkdir_p target unless Dir.exists?(target)
152
- created target
153
- rescue Errno::EEXIST
154
- # p "EEXIST #{filename} -> #{target}"
155
- exists target
156
- rescue Errno::ENOTDIR
157
- # p "ENOTDIR #{filename} -> #{target}"
158
- FileUtils.mkdir_p File.dirname(target) rescue nil
159
- retry
160
- rescue Errno::ENOENT => e
161
- # p "ENOENT #{filename} -> #{target}"
162
- if File.exists? filename
163
- FileUtils.mkdir_p File.dirname(target)
164
- retry
165
- else
166
- raise e
167
- end
168
- end
169
-
170
- #
171
- #
172
- def all_prototype_files
173
- Dir[File.join(project_prototype_basedir, '**', '*')]
174
- end
175
-
176
- #
177
- #
178
- def target_directory
179
- File.expand_path name, Dir.pwd
180
- end
181
-
182
- def created entry
183
- exclaim "#{entry} \x1b[32mcreated\x1b[m."
184
- end
185
-
186
- def exists entry
187
- exclaim "#{entry} \x1b[31mexists\x1b[m, skipping."
188
- end
189
-
190
- def exclaim something
191
- puts something
192
- end
193
-
194
- end
195
-
196
- end
197
-
198
- end
@@ -1,30 +0,0 @@
1
- source :gemcutter
2
-
3
- # Gems required by Picky.
4
- #
5
- gem 'picky', '~> 1.0.0'
6
- gem 'rake'
7
- gem 'bundler'
8
- gem 'rack', '~> 1.2.1'
9
- gem 'rack-mount', '~> 0.6.9'
10
- gem 'text', '~> 0.2.0'
11
- gem 'yajl-ruby', '~> 0.7.8', :require => 'yajl'
12
-
13
- # Should be optional, but isn't yet. Sorry.
14
- #
15
- gem 'activerecord', '~> 2.3.8', :require => 'active_record'
16
-
17
-
18
-
19
-
20
- # Optional. Makes rack faster.
21
- #
22
- gem 'rack_fast_escape', '2009.06.24'
23
-
24
- # Optional. Use your preferred web server.
25
- #
26
- gem 'unicorn'
27
-
28
- # Optional. Use your preferred database adapter.
29
- #
30
- # gem 'mysql'
@@ -1,11 +0,0 @@
1
- require 'picky-tasks'
2
-
3
- desc "Finds where Picky still needs input from you."
4
- task :todo do
5
- if system "grep -e 'TO#{}DO.*' -n --color=always -R *"
6
- puts "Picky needs a bit of input from you there. Thanks."
7
- else
8
- puts "Picky seems to be fine (no TO#{}DOs found)."
9
- end
10
- end
11
- task :default => :todo
@@ -1,5 +0,0 @@
1
- All your application definitions go here.
2
-
3
- /app needs a few files defined by picky:
4
- * application.rb - how to handle indexing, queries, routing.
5
- * logging.rb