picky 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/picky CHANGED
@@ -2,13 +2,13 @@
2
2
  #
3
3
 
4
4
  begin
5
- require 'picky/generator'
5
+ require 'picky/cli'
6
6
  rescue LoadError => e
7
7
  require 'rubygems'
8
8
  picky_path = File.expand_path '../../lib', __FILE__
9
9
  $:.unshift(picky_path) if File.directory?(picky_path) && !$:.include?(picky_path)
10
- require 'picky/generator'
10
+ require 'picky/cli'
11
11
  end
12
12
 
13
- generator = Picky::Generator.new
14
- generator.generate ARGV
13
+ cli = Picky::CLI.new
14
+ cli.execute *ARGV
@@ -9,7 +9,7 @@
9
9
  # # Your configuration goes here.
10
10
  # end
11
11
  # The generator
12
- # $ picky project project_name
12
+ # $ picky generate unicorn_server project_name
13
13
  # will generate an example <tt>project_name/app/application.rb</tt> file for you
14
14
  # with some example code inside.
15
15
  #
data/lib/picky/cli.rb ADDED
@@ -0,0 +1,58 @@
1
+ module Picky
2
+
3
+ # A very simple CLI selector.
4
+ #
5
+ class CLI # :nodoc:all
6
+
7
+ def self.mapping
8
+ @@mapping
9
+ end
10
+
11
+ # Execute a command.
12
+ #
13
+ # Note: By default, help is displayed. I.e. when no command is given.
14
+ #
15
+ def execute selector = nil, *args
16
+ executor_class, *params = selector && @@mapping[selector.to_sym] || Help
17
+ executor = executor_class.new
18
+ executor.execute selector, args, params
19
+ end
20
+
21
+ class Base
22
+ def usage name, params
23
+ puts "Usage\n picky #{name} #{params_to_s(params)}"
24
+ end
25
+ def params_to_s params
26
+ params.map { |param| "<#{param}>" }.join(' ') if params
27
+ end
28
+ end
29
+ class Generate < Base
30
+ def execute name, args, params
31
+ system "picky-generate #{args.join(' ')}"
32
+ end
33
+ end
34
+ class Help < Base
35
+ # Displays usage information.
36
+ #
37
+ def execute name, args, params
38
+ commands = Picky::CLI.mapping.map do |command, object_and_params|
39
+ _, *params = object_and_params
40
+ " picky #{command} #{params_to_s(params)}"
41
+ end.join(?\n)
42
+
43
+ puts "Possible commands:\n" + commands
44
+ end
45
+ end
46
+
47
+ # Maps commands to the other gem's command.
48
+ #
49
+ # TODO Add optional params.
50
+ #
51
+ @@mapping = {
52
+ :generate => [Generate, 'thing_to_generate: e.g. "unicorn_server"', :parameters],
53
+ :help => [Help]
54
+ }
55
+
56
+ end
57
+
58
+ end
@@ -35,6 +35,8 @@ class Array # :nodoc:all
35
35
  def sort_by_levenshtein! from
36
36
  from = from.to_s
37
37
  sort! do |this, that|
38
+ # TODO Cache for speed?
39
+ #
38
40
  Text::Levenshtein.distance(this.to_s, from) <=> Text::Levenshtein.distance(that.to_s, from)
39
41
  end
40
42
  end
data/lib/picky/loader.rb CHANGED
@@ -260,10 +260,6 @@ module Loader # :nodoc:all
260
260
  #
261
261
  # load_relative 'solr/schema_generator'
262
262
  load_relative 'cores'
263
-
264
- # Load generation.
265
- #
266
- load_relative 'generator'
267
263
  end
268
264
 
269
265
  end
@@ -47,6 +47,8 @@ module Query
47
47
  # [c,e]
48
48
  # ]
49
49
  #
50
+ # TODO Use transpose?
51
+ #
50
52
  expanded_combinations = expanded_combinations.shift.zip *expanded_combinations
51
53
 
52
54
  # Wrap into a real combination.
@@ -38,7 +38,7 @@ module Query
38
38
  # Note: Cache this if more complicated weighings become necessary.
39
39
  #
40
40
  def score combinations
41
- # TODO Beautify?
41
+ # TODO Beautify? Use categories for weights?
42
42
  #
43
43
  # weight_for combinations.map(&:category).clustered_uniq_fast.map!(&:name)
44
44
 
@@ -107,22 +107,23 @@ module Sources
107
107
 
108
108
  # Harvests the data to index in chunks.
109
109
  #
110
- def harvest type, category
110
+ def harvest type, category, &block
111
111
  connect_backend
112
112
 
113
113
  (0..count(type)).step(chunksize) do |offset|
114
- get_data(type, category, offset).each do |indexed_id, text|
115
- next unless text
116
- text.force_encoding 'utf-8' # TODO Still needed?
117
- yield indexed_id, text
118
- end
114
+ get_data type, category, offset, &block
119
115
  end
120
116
  end
121
117
 
122
118
  # Gets the data from the backend.
123
119
  #
124
- def get_data type, category, offset # :nodoc:
125
- database.connection.execute harvest_statement_with_offset(type, category, offset)
120
+ def get_data type, category, offset, &block # :nodoc:
121
+ select_statement = harvest_statement_with_offset(type, category, offset)
122
+ database.connection.execute(select_statement).each do |indexed_id, text|
123
+ next unless text
124
+ text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
125
+ yield indexed_id, text
126
+ end
126
127
  end
127
128
 
128
129
  # Builds a harvest statement for getting data to index.
@@ -4,8 +4,9 @@ module Tokenizers # :nodoc:all
4
4
  #
5
5
  class Base
6
6
 
7
- # TODO use frozen EMPTY_STRING for ''
7
+ # TODO Move EMPTY_STRING top level.
8
8
  #
9
+ EMPTY_STRING = ''.freeze
9
10
 
10
11
  # Stopwords.
11
12
  #
@@ -13,7 +14,7 @@ module Tokenizers # :nodoc:all
13
14
  @remove_stopwords_regexp = regexp
14
15
  end
15
16
  def remove_stopwords text
16
- text.gsub! @remove_stopwords_regexp, '' if @remove_stopwords_regexp
17
+ text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
17
18
  text
18
19
  end
19
20
  @@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
@@ -30,7 +31,7 @@ module Tokenizers # :nodoc:all
30
31
  @removes_characters_regexp = regexp
31
32
  end
32
33
  def remove_illegals text
33
- text.gsub! @removes_characters_regexp, '' if @removes_characters_regexp
34
+ text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
34
35
  text
35
36
  end
36
37
 
@@ -66,7 +67,7 @@ module Tokenizers # :nodoc:all
66
67
  @removes_characters_after_splitting_regexp = regexp
67
68
  end
68
69
  def remove_after_normalizing_illegals text
69
- text.gsub! @removes_characters_after_splitting_regexp, '' if @removes_characters_after_splitting_regexp
70
+ text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
70
71
  end
71
72
 
72
73
  # Substitute Characters with this substituter.
@@ -1,4 +1,5 @@
1
1
  module Tokenizers
2
+
2
3
  # The base indexing tokenizer.
3
4
  #
4
5
  # Override in indexing subclasses and define in configuration.
@@ -15,11 +16,10 @@ module Tokenizers
15
16
  # Default indexing preprocessing hook.
16
17
  #
17
18
  # Does:
18
- # 1. Umlaut substitution.
19
- # 2. Downcasing.
20
- # 3. Remove illegal expressions.
21
- # 4. Contraction.
22
- # 5. Remove non-single stopwords. (Stopwords that occur with other words)
19
+ # 1. Character substitution.
20
+ # 2. Downcasing.
21
+ # 3. Remove illegal expressions.
22
+ # 4. Remove non-single stopwords. (Stopwords that occur with other words)
23
23
  #
24
24
  def preprocess text
25
25
  text = substitute_characters text
@@ -27,7 +27,7 @@ module Tokenizers
27
27
  remove_illegals text
28
28
  # we do not remove single stopwords for an entirely different
29
29
  # reason than in the query tokenizer.
30
- # An indexed thing with just name "UND" (a stopword) should not lose its name.
30
+ # An indexed thing with just name "UND" (a possible stopword) should not lose its name.
31
31
  #
32
32
  remove_non_single_stopwords text
33
33
  text
@@ -36,8 +36,8 @@ module Tokenizers
36
36
  # Default indexing pretokenizing hook.
37
37
  #
38
38
  # Does:
39
- # 1. Split the text into words.
40
- # 2. Normalize each word.
39
+ # 1. Split the text into words.
40
+ # 2. Normalize each word.
41
41
  #
42
42
  # TODO Rename into wordize? Or somesuch?
43
43
  #
@@ -60,7 +60,7 @@ module Tokenizers
60
60
  #
61
61
  # Override in subclasses to redefine behaviour.
62
62
  #
63
- # TODO Make parametrizable! reject { |token| }
63
+ # TODO TODO TODO Make parametrizable! reject { |token| }
64
64
  #
65
65
  def reject tokens
66
66
  tokens.reject! &:blank?
@@ -14,7 +14,6 @@ namespace :server do
14
14
  # desc "Start the unicorns. (Wehee!)"
15
15
  task :start => :framework do
16
16
  chdir_to_root
17
- # Rake::Task[:"solr:start"].invoke # TODO Move to better place.
18
17
  daemonize = PICKY_ENVIRONMENT == 'production' ? '-D' : ''
19
18
  command = "export PICKY_ENV=#{PICKY_ENVIRONMENT}; unicorn -c unicorn.ru #{daemonize}".strip
20
19
  puts "Running \`#{command}\`."
@@ -24,7 +23,6 @@ namespace :server do
24
23
  # desc "Stop the unicorns. (Blam!)"
25
24
  task :stop => :framework do
26
25
  `kill -QUIT #{current_pid}` if current_pid
27
- # Rake::Task[:"solr:stop"].invoke # TODO Move to better place.
28
26
  end
29
27
 
30
28
  # desc "Restart the unicorns."
@@ -4,6 +4,7 @@ require 'spec_helper'
4
4
  describe Rack::Harakiri do
5
5
  before(:each) do
6
6
  @app = stub :app
7
+ Process.stub! :kill # not taking any chances
7
8
  end
8
9
  context "defaults" do
9
10
  before(:each) do
@@ -17,13 +17,33 @@ describe Sources::DB do
17
17
  end
18
18
 
19
19
  describe "get_data" do
20
- it "delegates" do
21
- type = stub :type, :name => :some_type
22
- category = stub :category, :from => :some_category
23
-
24
- @connection.should_receive(:execute).once.with 'SELECT indexed_id, some_category FROM some_type_type_index st WHERE st.id > some_offset LIMIT 25000'
25
-
26
- @source.get_data type, category, :some_offset
20
+ before(:each) do
21
+ @type = stub :type, :name => :some_type
22
+ @category = stub :category, :from => :some_category
23
+ end
24
+ context 'no data' do
25
+ it "delegates to the connection" do
26
+
27
+ @connection.should_receive(:execute).
28
+ once.
29
+ with('SELECT indexed_id, some_category FROM some_type_type_index st WHERE st.id > some_offset LIMIT 25000').
30
+ and_return []
31
+
32
+ @source.get_data @type, @category, :some_offset
33
+ end
34
+ end
35
+ context 'with data' do
36
+ it 'yields to the caller' do
37
+ @connection.should_receive(:execute).
38
+ any_number_of_times.
39
+ with('SELECT indexed_id, some_category FROM some_type_type_index st WHERE st.id > some_offset LIMIT 25000').
40
+ and_return [[1, 'text']]
41
+
42
+ @source.get_data @type, @category, :some_offset do |id, text|
43
+ id.should == 1
44
+ text.should == 'text'
45
+ end
46
+ end
27
47
  end
28
48
  end
29
49
 
@@ -67,48 +87,22 @@ describe Sources::DB do
67
87
  end
68
88
  end
69
89
 
70
- # TODO Redo.
71
- #
72
- # describe "harvest" do
73
- # before(:each) do
74
- # @source.stub! :harvest_statement_with_offset
75
- # end
76
- # context 'expectations' do
77
- # before(:each) do
78
- # @connection.stub! :execute => []
79
- # @connection.stub! :select_value
80
- # end
81
- # after(:each) do
82
- # @source.harvest :type_name, :some_field
83
- # end
84
- # context "with WHERE" do
85
- # before(:each) do
86
- # @source.stub! :select_statement => 'bla WHERE blu'
87
- # end
88
- # it "should connect" do
89
- # @source.should_receive(:connect_backend).once.with
90
- # end
91
- # it "should call the harvest statement with an offset" do
92
- # @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
93
- # end
94
- # end
95
- # context "without WHERE" do
96
- # it "should connect" do
97
- # @adapter.should_receive(:connect).once.with
98
- # end
99
- # it "should call the harvest statement with an offset" do
100
- # @source.should_receive(:harvest_statement_with_offset).once.with :some_type, :some_field, :some_offset
101
- # end
102
- # end
103
- # end
104
- # context 'returns' do
105
- # it "should return whatever the execute statement returns" do
106
- # @connection.stub! :execute => :some_result
107
- #
108
- # @source.harvest(:some_type, :some_field).should == :some_result
109
- # end
110
- # end
111
- # end
90
+ describe 'harvest' do
91
+ before(:each) do
92
+ @type = stub :type, :name => :some_type
93
+ @category = stub :category, :name => :some_category
94
+
95
+ @source.should_receive(:get_data).any_number_of_times.and_return [[:some_id, 'some_text']].cycle
96
+ @source.stub! :count => 17
97
+ end
98
+ it 'calls connect_backend' do
99
+ @source.should_receive(:connect_backend).once.with()
100
+
101
+ @source.harvest @type, @category do |id, text|
102
+ p [id, text]
103
+ end
104
+ end
105
+ end
112
106
 
113
107
  describe "harvest_statement_with_offset" do
114
108
  before(:each) do
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
+ - 1
7
8
  - 0
8
- - 0
9
- version: 1.0.0
9
+ version: 1.1.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Florian Hanke
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-09 00:00:00 +01:00
17
+ date: 2010-12-12 00:00:00 +01:00
18
18
  default_executable: picky
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -63,6 +63,7 @@ files:
63
63
  - lib/picky/cacher/weights_generator.rb
64
64
  - lib/picky/calculations/location.rb
65
65
  - lib/picky/character_substituters/west_european.rb
66
+ - lib/picky/cli.rb
66
67
  - lib/picky/configuration/index.rb
67
68
  - lib/picky/cores.rb
68
69
  - lib/picky/ext/maybe_compile.rb
@@ -72,7 +73,6 @@ files:
72
73
  - lib/picky/extensions/module.rb
73
74
  - lib/picky/extensions/object.rb
74
75
  - lib/picky/extensions/symbol.rb
75
- - lib/picky/generator.rb
76
76
  - lib/picky/helpers/cache.rb
77
77
  - lib/picky/helpers/gc.rb
78
78
  - lib/picky/helpers/measuring.rb
@@ -147,19 +147,6 @@ files:
147
147
  - lib/tasks/statistics.rake
148
148
  - lib/tasks/try.rake
149
149
  - lib/picky/ext/ruby19/performant.c
150
- - project_prototype/app/application.rb
151
- - project_prototype/app/db.yml
152
- - project_prototype/app/library.csv
153
- - project_prototype/app/logging.rb
154
- - project_prototype/app/README
155
- - project_prototype/config.ru
156
- - project_prototype/Gemfile
157
- - project_prototype/log/README
158
- - project_prototype/Rakefile
159
- - project_prototype/script/console
160
- - project_prototype/tmp/pids/README
161
- - project_prototype/tmp/README
162
- - project_prototype/unicorn.ru
163
150
  - spec/ext/performant_spec.rb
164
151
  - spec/lib/application_spec.rb
165
152
  - spec/lib/cacher/cacher_strategy_spec.rb
@@ -1,198 +0,0 @@
1
- # encoding: utf-8
2
- #
3
- require 'fileutils'
4
-
5
- module Picky
6
-
7
- # Thrown when no generator for the command
8
- # picky <command> <options>
9
- # is found.
10
- #
11
- class NoGeneratorError < StandardError # :nodoc:all
12
-
13
- def initialize generator
14
- super usage + possible_commands(generator.types)
15
- end
16
-
17
- def usage
18
- "\nUsage:\n" +
19
- "picky <command> <params>\n" +
20
- ?\n
21
- end
22
-
23
- def possible_commands types
24
- "Possible commands:\n" +
25
- types.map do |name, klass_params|
26
- result = "picky #{name}"
27
- _, params = *klass_params
28
- result << ' ' << [*params].map { |param| "<#{param}>" }.join(' ') if params
29
- result
30
- end.join(?\n) + ?\n
31
- end
32
-
33
- end
34
-
35
- # This is a very simple project generator.
36
- # Not at all like Padrino's or Rails'.
37
- # (No diss, just by way of a faster explanation)
38
- #
39
- # Basically copies a prototype project into a newly generated directory.
40
- #
41
- class Generator # :nodoc:all
42
-
43
- attr_reader :types
44
-
45
- def initialize
46
- @types = {
47
- project: [Project, :project_name]
48
- }
49
- end
50
-
51
- # Run the generators with this command.
52
- #
53
- # This will "route" the commands to the right specific generator.
54
- #
55
- def generate args
56
- generator = generator_for *args
57
- generator.generate
58
- end
59
-
60
- #
61
- #
62
- def generator_for identifier, *args
63
- generator_info = types[identifier.to_sym]
64
- raise NoGeneratorError.new(self) unless generator_info
65
- generator_class = generator_info.first
66
- generator_for_class generator_class, identifier, *args
67
- end
68
-
69
- #
70
- #
71
- def generator_for_class klass, *args
72
- klass.new *args
73
- end
74
-
75
- # Generates a new Picky project.
76
- #
77
- # Example:
78
- # > picky project my_lovely_project
79
- #
80
- class Project
81
-
82
- attr_reader :name, :project_prototype_basedir
83
-
84
- def initialize identifier, name, *args
85
- @name = name
86
- @project_prototype_basedir = File.expand_path '../../../project_prototype', __FILE__
87
- end
88
-
89
- #
90
- #
91
- def generate
92
- exclaim "Setting up Picky project \"#{name}\"."
93
- create_target_directory
94
- copy_all_files
95
- exclaim "\"#{name}\" is a great project name! Have fun :)\n"
96
- exclaim ""
97
- exclaim "Next steps:"
98
- exclaim "1. cd #{name}"
99
- exclaim "2. bundle install"
100
- exclaim "3. rake index"
101
- exclaim "4. rake start"
102
- exclaim "5. rake # (optional) shows you where Picky needs input from you"
103
- exclaim " # if you want to define your own search."
104
- end
105
-
106
- #
107
- #
108
- def create_target_directory
109
- if File.exists?(target_directory)
110
- exists target_directory
111
- else
112
- FileUtils.mkdir target_directory
113
- created target_directory
114
- end
115
- end
116
-
117
- #
118
- #
119
- def copy_all_files
120
- all_prototype_files.each do |filename|
121
- next if filename.match(/\.textile$/)
122
- copy_single_file filename
123
- end
124
- end
125
-
126
- #
127
- #
128
- def target_filename_for filename
129
- filename.gsub(%r{#{project_prototype_basedir}}, target_directory)
130
- end
131
- #
132
- #
133
- def copy_single_file filename
134
- target = target_filename_for filename
135
- if File.exists? target
136
- exists target
137
- else
138
- smart_copy filename, target
139
- end
140
- end
141
-
142
- # Well, "smart" ;)
143
- #
144
- def smart_copy filename, target
145
- # p "Trying to copy #{filename} -> #{target}"
146
- FileUtils.copy_file filename, target
147
- created target
148
- rescue Errno::EISDIR
149
- # p "EISDIR #{filename} -> #{target}"
150
- FileUtils.rm target
151
- FileUtils.mkdir_p target unless Dir.exists?(target)
152
- created target
153
- rescue Errno::EEXIST
154
- # p "EEXIST #{filename} -> #{target}"
155
- exists target
156
- rescue Errno::ENOTDIR
157
- # p "ENOTDIR #{filename} -> #{target}"
158
- FileUtils.mkdir_p File.dirname(target) rescue nil
159
- retry
160
- rescue Errno::ENOENT => e
161
- # p "ENOENT #{filename} -> #{target}"
162
- if File.exists? filename
163
- FileUtils.mkdir_p File.dirname(target)
164
- retry
165
- else
166
- raise e
167
- end
168
- end
169
-
170
- #
171
- #
172
- def all_prototype_files
173
- Dir[File.join(project_prototype_basedir, '**', '*')]
174
- end
175
-
176
- #
177
- #
178
- def target_directory
179
- File.expand_path name, Dir.pwd
180
- end
181
-
182
- def created entry
183
- exclaim "#{entry} \x1b[32mcreated\x1b[m."
184
- end
185
-
186
- def exists entry
187
- exclaim "#{entry} \x1b[31mexists\x1b[m, skipping."
188
- end
189
-
190
- def exclaim something
191
- puts something
192
- end
193
-
194
- end
195
-
196
- end
197
-
198
- end
@@ -1,30 +0,0 @@
1
- source :gemcutter
2
-
3
- # Gems required by Picky.
4
- #
5
- gem 'picky', '~> 1.0.0'
6
- gem 'rake'
7
- gem 'bundler'
8
- gem 'rack', '~> 1.2.1'
9
- gem 'rack-mount', '~> 0.6.9'
10
- gem 'text', '~> 0.2.0'
11
- gem 'yajl-ruby', '~> 0.7.8', :require => 'yajl'
12
-
13
- # Should be optional, but isn't yet. Sorry.
14
- #
15
- gem 'activerecord', '~> 2.3.8', :require => 'active_record'
16
-
17
-
18
-
19
-
20
- # Optional. Makes rack faster.
21
- #
22
- gem 'rack_fast_escape', '2009.06.24'
23
-
24
- # Optional. Use your preferred web server.
25
- #
26
- gem 'unicorn'
27
-
28
- # Optional. Use your preferred database adapter.
29
- #
30
- # gem 'mysql'
@@ -1,11 +0,0 @@
1
- require 'picky-tasks'
2
-
3
- desc "Finds where Picky still needs input from you."
4
- task :todo do
5
- if system "grep -e 'TO#{}DO.*' -n --color=always -R *"
6
- puts "Picky needs a bit of input from you there. Thanks."
7
- else
8
- puts "Picky seems to be fine (no TO#{}DOs found)."
9
- end
10
- end
11
- task :default => :todo
@@ -1,5 +0,0 @@
1
- All your application definitions go here.
2
-
3
- /app needs a few files defined by picky:
4
- * application.rb - how to handle indexing, queries, routing.
5
- * logging.rb