muck-raker 0.3.3 → 0.3.4

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -26,7 +26,7 @@ begin
26
26
  Jeweler::Tasks.new do |gemspec|
27
27
  gemspec.name = "muck-raker"
28
28
  gemspec.summary = "The aggregation and recommendation daemons for the muck system"
29
- gemspec.email = "justinball@gmail.com"
29
+ gemspec.email = "justin@tatemae.com"
30
30
  gemspec.homepage = "http://github.com/tatemae/muck-raker"
31
31
  gemspec.description = "The aggregation and recommendation daemons for the muck system."
32
32
  gemspec.authors = ["Joel Duffin Justin Ball"]
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.3
1
+ 0.3.4
@@ -1,5 +1,7 @@
1
1
  ENV['RAILS_ENV'] = (ENV['RAILS_ENV'] || 'development').dup
2
2
 
3
+ SOLR_PATH = "#{RAILS_ROOT}/config/solr" unless defined? SOLR_PATH
4
+
3
5
  unless defined? SOLR_LOGS_PATH
4
6
  SOLR_LOGS_PATH = ENV["SOLR_LOGS_PATH"] || "#{RAILS_ROOT}/log"
5
7
  end
@@ -7,14 +9,14 @@ unless defined? SOLR_PIDS_PATH
7
9
  SOLR_PIDS_PATH = ENV["SOLR_PIDS_PATH"] || "#{RAILS_ROOT}/tmp/pids/solr"
8
10
  end
9
11
  unless defined? SOLR_DATA_PATH
10
- SOLR_DATA_PATH = ENV["SOLR_DATA_PATH"] || "#{RAILS_ROOT}/solr_indexes/#{ENV['RAILS_ENV']}"
12
+ SOLR_DATA_PATH = ENV["SOLR_DATA_PATH"] || "#{RAILS_ROOT}/solr/#{ENV['RAILS_ENV']}"
11
13
  end
12
14
  unless defined? SOLR_CONFIG_PATH
13
- SOLR_CONFIG_PATH = ENV["SOLR_CONFIG_PATH"] || File.join(RAILS_ROOT, 'config', 'solr')
15
+ SOLR_CONFIG_PATH = ENV["SOLR_CONFIG_PATH"] || SOLR_PATH
14
16
  end
15
17
 
16
- unless defined? RAKER_LOG_FILE
17
- RAKER_LOG_FILE = File.join(RAILS_ROOT, 'log', 'recommenderd.log')
18
+ unless defined? RAKER_LOGS_PATH
19
+ RAKER_LOGS_PATH = ENV["RAKER_LOGS_PATH"] || "#{RAILS_ROOT}/log"
18
20
  end
19
21
  unless defined? RAKER_PIDS_PATH
20
22
  RAKER_PIDS_PATH = ENV["RAKER_PIDS_PATH"] || "#{RAILS_ROOT}/tmp/pids/raker"
@@ -22,21 +24,15 @@ end
22
24
  unless defined? RAKER_FEED_ARCHIVE_PATH
23
25
  if ENV['RAILS_ENV'] == 'production'
24
26
  RAKER_FEED_ARCHIVE_PATH = File.join(RAILS_ROOT, '..', '..', 'shared', 'feed_archive')
25
- else
27
+ elsif ENV['RAILS_ENV'] == 'development'
26
28
  RAKER_FEED_ARCHIVE_PATH = File.join(RAILS_ROOT, '..', 'feed_archive')
27
29
  end
28
30
  end
29
- unless defined? RAKER_DATABASE_CONFIG
31
+ unless defined? RAKER_DATABASE_CONFIG_FILE
30
32
  if ENV['RAILS_ENV'] == 'production'
31
- RAKER_DATABASE_CONFIG = File.join(RAILS_ROOT, '..', '..', 'shared', 'config', 'database.yml')
33
+ RAKER_DATABASE_CONFIG_FILE = File.join(RAILS_ROOT, '..', '..', 'shared', 'config', 'database.yml')
32
34
  else
33
- RAKER_DATABASE_CONFIG = File.join(RAILS_ROOT, 'config', 'database.yml')
34
- end
35
- end
36
- unless defined? RAKER_LOG_TO_CONSOLE
37
- if ENV['RAILS_ENV'] == 'production'
38
- RAKER_LOG_TO_CONSOLE = 'false'
39
- else
40
- RAKER_LOG_TO_CONSOLE = 'true'
35
+ RAKER_DATABASE_CONFIG_FILE = File.join(RAILS_ROOT, 'config', 'database.yml')
41
36
  end
42
37
  end
38
+
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -5,12 +5,14 @@ require 'fileutils'
5
5
  module MuckRaker
6
6
  class Tasks < ::Rake::TaskLib
7
7
  def initialize
8
+ ENV['DEBUG'] = 'true' unless ENV['DEBUG'] == 'false'
9
+ ENV['RAILS_ENV'] = 'development' unless ENV['RAILS_ENV']
8
10
  define
9
11
  end
10
-
12
+
11
13
  private
12
14
  def define
13
-
15
+
14
16
  namespace :muck do
15
17
 
16
18
  namespace :raker do
@@ -38,7 +40,7 @@ module MuckRaker
38
40
  end
39
41
  }
40
42
  end
41
-
43
+
42
44
  desc "Loads some feeds oai endpoints to get things started"
43
45
  task :bootstrap => :environment do
44
46
  require 'active_record/fixtures'
@@ -56,14 +58,14 @@ module MuckRaker
56
58
  ServiceCategory.delete_all
57
59
  yml = File.join(File.dirname(__FILE__), '..', '..', 'db', 'bootstrap',"service_categories")
58
60
  Fixtures.new(Service.connection,"service_categories",ServiceCategory,yml).insert_fixtures
59
-
61
+
60
62
  Service.delete_all
61
63
  yml = File.join(File.dirname(__FILE__), '..', '..', 'db', 'bootstrap',"services")
62
64
  Fixtures.new(Service.connection,"services",Service,yml).insert_fixtures
63
65
 
64
66
  end
65
67
 
66
- desc "Deletes and reloads all services and service categories"
68
+ desc "Deletes and reloads services and service categories"
67
69
  task :bootstrap_services => :environment do
68
70
  require 'active_record/fixtures'
69
71
  ActiveRecord::Base.establish_connection(RAILS_ENV.to_sym)
@@ -71,14 +73,15 @@ module MuckRaker
71
73
  ServiceCategory.delete_all
72
74
  yml = File.join(File.dirname(__FILE__), '..', '..', 'db', 'bootstrap',"service_categories")
73
75
  Fixtures.new(Service.connection,"service_categories",ServiceCategory,yml).insert_fixtures
74
-
76
+
75
77
  Service.delete_all
76
78
  yml = File.join(File.dirname(__FILE__), '..', '..', 'db', 'bootstrap',"services")
77
79
  Fixtures.new(Service.connection,"services",Service,yml).insert_fixtures
80
+
78
81
  end
79
-
82
+
80
83
  end
81
-
84
+
82
85
  desc "Sync files from muck raker."
83
86
  task :sync do
84
87
  path = File.join(File.dirname(__FILE__), *%w[.. ..])
@@ -87,122 +90,115 @@ module MuckRaker
87
90
  system "rsync -ruv #{path}/config/solr ./config"
88
91
  end
89
92
 
90
- def pid_file
91
- "#{RAKER_PIDS_PATH}/#{ENV['RAILS_ENV']}_pid"
93
+
94
+ def show_options
95
+ puts "RAILS_ENV=#{ENV['RAILS_ENV']} "
96
+ puts "solr.solr.home=\"#{SOLR_CONFIG_PATH}\" "
97
+ puts "solr.data.dir=\"#{SOLR_DATA_PATH}\""
98
+ puts "RAKER_PIDS_PATH=\"#{RAKER_PIDS_PATH}\" "
99
+ puts "recommender.database.config_file=\"#{RAKER_DATABASE_CONFIG_FILE}\" "
100
+ puts "recommender.log_file=\"#{RAKER_LOGS_PATH}/raker.log\" "
101
+ puts "recommender.log_to_console=\"#{ENV['DEBUG']}\" "
102
+ puts "aggregator.feed_archive_path=\"#{RAKER_FEED_ARCHIVE_PATH}\" "
92
103
  end
93
104
 
94
- def raker_task task = 'daemon', param = ''
105
+ def daemon_task task = 'all', task_param = nil
95
106
  require File.expand_path("#{File.dirname(__FILE__)}/../../config/muck_raker_environment")
96
- show_options
107
+ task_param ||= (ENV['redo'] == 'true') ? 'redo' : ''
97
108
  FileUtils.mkdir_p(RAKER_PIDS_PATH)
98
109
  FileUtils.mkdir_p(RAKER_LOGS_PATH)
110
+ FileUtils.mkdir_p(RAKER_FEED_ARCHIVE_PATH)
111
+ show_options
112
+ options = "-DRAILS_ENV=#{ENV['RAILS_ENV']} "
113
+ options << "-DDEBUG=#{ENV['DEBUG']} "
114
+ options << "-Dsolr.solr.home=\"#{SOLR_CONFIG_PATH}\" "
115
+ options << "-Dsolr.data.dir=\"#{SOLR_DATA_PATH}\" "
116
+ options << "-Drecommender.database.config_file=\"#{RAKER_DATABASE_CONFIG_FILE}\" "
117
+ options << "-Drecommender.log_file=\"#{RAKER_LOGS_PATH}/raker.log\" "
118
+ options << "-Drecommender.log_to_console=\"#{ENV['DEBUG']}\" "
119
+ options << "-Daggregator.feed_archive_path=\"#{RAKER_FEED_ARCHIVE_PATH}\" "
120
+ javaclass = "edu.usu.cosl.recommenderd.Recommenderd "
99
121
  separator = (RUBY_PLATFORM =~ /(win|w)32$/ ? ';' : ':')
100
- puts "RAILS_ENV=" + ENV['RAILS_ENV']
101
122
  Dir.chdir(File.join(File.dirname(__FILE__), '../../', 'raker', 'lib')) do
102
123
  jars = Dir['*.jar','solr/*.jar'].join(separator)
103
- options = "-DRAILS_ENV=#{ENV['RAILS_ENV']} "
104
- options << "-Dsolr.solr.home=\"#{SOLR_CONFIG_PATH}\" "
105
- options << "-Dsolr.data.dir=\"#{SOLR_DATA_PATH}\" "
106
- options << "-Draker.database.config=\"#{RAKER_DATABASE_CONFIG}\" "
107
- options << "-Draker.log.file=\"#{RAKER_LOG_FILE}\" "
108
- options << "-Draker.feed_archive_dir=\"#{RAKER_FEED_ARCHIVE_PATH}\" "
109
- options << "-Draker.log_to_console=\"#{RAKER_LOG_TO_CONSOLE}\" "
110
124
  classpath = "-classpath #{jars}#{separator}. "
111
- memory_options = "-Xms32m -Xmx128m "
112
- javaclass = "edu.usu.cosl.recommenderd.Recommenderd "
113
- cmd = "java " + options + classpath + memory_options + javaclass + task + " " + param
125
+ cmd = "java " + options + classpath + javaclass + task + ' ' + task_param
114
126
  puts ("Executing: " + cmd)
115
127
  windows = RUBY_PLATFORM =~ /(win|w)32$/
116
128
  if windows
117
- exec cmd
129
+ exec cmd
118
130
  else
119
131
  pid = fork do
120
132
  exec cmd
121
133
  end
122
134
  end
123
135
  sleep(5)
124
- File.open(pid_file, "w"){ |f| f << pid} unless windows
125
- puts "#{ENV['RAILS_ENV']} Muck raker harvest command started successfully, pid: #{pid}."
136
+ File.open("#{RAKER_PIDS_PATH}/#{ENV['RAILS_ENV']}_pid", "w"){ |f| f << pid} unless windows
137
+ puts "#{ENV['RAILS_ENV']} Muck raker command started successfully, pid: #{pid}."
126
138
  end
127
139
  end
128
-
129
- def show_options
130
- puts "RAILS_ENV=#{ENV['RAILS_ENV']} "
131
- puts "solr.solr.home=\"#{SOLR_CONFIG_PATH}\" "
132
- puts "solr.data.dir=\"#{SOLR_DATA_PATH}\" "
133
- puts "raker.db.config=\"#{RAKER_DATABASE_CONFIG}\" "
134
- puts "raker.log.dir=\"#{RAKER_LOGS_PATH}\" "
135
- puts "raker.feed_archive_dir=\"#{RAKER_FEED_ARCHIVE_PATH}\" "
136
- puts "raker.log_to_console=\"#{RAKER_LOG_TO_CONSOLE}\" "
137
- end
138
-
139
- desc "Print out muck raker dependent environment variables"
140
- task :show_options => :environment do
141
- require File.expand_path("#{File.dirname(__FILE__)}/../../config/muck_raker_environment")
142
- show_options
143
- end
144
140
 
145
- desc "Start the recommender daemon process"
141
+ desc "Start daemon."
146
142
  task :start => :environment do
147
- raker_task
143
+ daemon_task 'all'
148
144
  end
149
145
 
150
- desc "Redo everything (re-index, redo autogenerated subjects, rebuild tag clouds re-recommend)"
146
+ desc "Redo everything once and quit."
151
147
  task :rebuild => :environment do
152
- raker_task 'daemon', 'full'
153
- end
154
-
155
- desc "Get some data into the recommender system"
156
- task :bootstrap => :environment do
157
- raker_task 'bootstrap'
148
+ daemon_task 'all', 'redo'
158
149
  end
159
150
 
160
- desc "Harvest without recommending"
151
+ desc "Harvest stale feeds. Add redo=true to harvest all feeds."
161
152
  task :harvest => :environment do
162
- raker_task 'harvest'
153
+ daemon_task 'harvest'
163
154
  end
164
155
 
165
- desc "Update solr index to changes made to recommender database"
156
+ desc "Index new entries."
166
157
  task :index => :environment do
167
- raker_task 'index'
158
+ daemon_task 'index'
159
+ end
160
+
161
+ desc "Re-index all entries."
162
+ task :reindex => :environment do
163
+ daemon_task 'index', 'redo'
168
164
  end
169
165
 
170
- desc "Recommend without harvesting"
166
+ desc "Update recommendations."
171
167
  task :recommend => :environment do
172
- raker_task 'recommend'
168
+ daemon_task 'recommend'
173
169
  end
174
170
 
175
- desc "Redo recommendations only"
171
+ desc "Redo all recommendations."
176
172
  task :redo_recommendations => :environment do
177
- raker_task 'recommend', 'full'
173
+ daemon_task 'recommend', 'redo'
178
174
  end
179
175
 
180
- desc "Autogenerate subjects"
176
+ desc "Auto-generate tags for new entries that don't have at least 4. Add redo=true to regenerate for all entries."
181
177
  task :subjects => :environment do
182
- raker_task 'auto_generate_subjects'
178
+ daemon_task 'subjects'
183
179
  end
184
180
 
185
- desc "Generate tag clouds"
181
+ desc "Re-generate tag clouds."
186
182
  task :tag_clouds => :environment do
187
- raker_task 'tag_clouds'
183
+ daemon_task 'tag_clouds', 'redo'
188
184
  end
189
185
 
190
- desc "Stop a raker daemon process"
186
+ desc "Stop a raker daemon process."
191
187
  task :stop => :environment do
192
- file_path = pid_file
188
+ file_path = "#{RAKER_PIDS_PATH}/#{ENV['RAILS_ENV']}_pid"
193
189
  if File.exists?(file_path)
194
190
  File.open(file_path, "r") do |f|
195
191
  pid = f.readline
196
192
  Process.kill('TERM', pid.to_i)
197
193
  end
198
194
  File.unlink(file_path)
199
- puts "Raker task successfully."
195
+ puts "Raker shutdown successfully."
200
196
  else
201
197
  puts "PID file not found at #{file_path}. Either Raker is not running or no PID file was written."
202
198
  end
203
199
  end
204
200
 
205
- end
201
+ end
206
202
 
207
203
  end
208
204
 
data/muck-raker.gemspec CHANGED
@@ -5,13 +5,13 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{muck-raker}
8
- s.version = "0.3.3"
8
+ s.version = "0.3.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Joel Duffin Justin Ball"]
12
- s.date = %q{2009-11-05}
12
+ s.date = %q{2009-11-13}
13
13
  s.description = %q{The aggregation and recommendation daemons for the muck system.}
14
- s.email = %q{justinball@gmail.com}
14
+ s.email = %q{justin@tatemae.com}
15
15
  s.extra_rdoc_files = [
16
16
  "README.rdoc"
17
17
  ]
Binary file
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: muck-raker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joel Duffin Justin Ball
@@ -9,12 +9,12 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-05 00:00:00 -07:00
12
+ date: 2009-11-13 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
16
  description: The aggregation and recommendation daemons for the muck system.
17
- email: justinball@gmail.com
17
+ email: justin@tatemae.com
18
18
  executables: []
19
19
 
20
20
  extensions: []