muck-raker 0.3.3 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -26,7 +26,7 @@ begin
26
26
  Jeweler::Tasks.new do |gemspec|
27
27
  gemspec.name = "muck-raker"
28
28
  gemspec.summary = "The aggregation and recommendation daemons for the muck system"
29
- gemspec.email = "justinball@gmail.com"
29
+ gemspec.email = "justin@tatemae.com"
30
30
  gemspec.homepage = "http://github.com/tatemae/muck-raker"
31
31
  gemspec.description = "The aggregation and recommendation daemons for the muck system."
32
32
  gemspec.authors = ["Joel Duffin Justin Ball"]
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.3
1
+ 0.3.4
@@ -1,5 +1,7 @@
1
1
  ENV['RAILS_ENV'] = (ENV['RAILS_ENV'] || 'development').dup
2
2
 
3
+ SOLR_PATH = "#{RAILS_ROOT}/config/solr" unless defined? SOLR_PATH
4
+
3
5
  unless defined? SOLR_LOGS_PATH
4
6
  SOLR_LOGS_PATH = ENV["SOLR_LOGS_PATH"] || "#{RAILS_ROOT}/log"
5
7
  end
@@ -7,14 +9,14 @@ unless defined? SOLR_PIDS_PATH
7
9
  SOLR_PIDS_PATH = ENV["SOLR_PIDS_PATH"] || "#{RAILS_ROOT}/tmp/pids/solr"
8
10
  end
9
11
  unless defined? SOLR_DATA_PATH
10
- SOLR_DATA_PATH = ENV["SOLR_DATA_PATH"] || "#{RAILS_ROOT}/solr_indexes/#{ENV['RAILS_ENV']}"
12
+ SOLR_DATA_PATH = ENV["SOLR_DATA_PATH"] || "#{RAILS_ROOT}/solr/#{ENV['RAILS_ENV']}"
11
13
  end
12
14
  unless defined? SOLR_CONFIG_PATH
13
- SOLR_CONFIG_PATH = ENV["SOLR_CONFIG_PATH"] || File.join(RAILS_ROOT, 'config', 'solr')
15
+ SOLR_CONFIG_PATH = ENV["SOLR_CONFIG_PATH"] || SOLR_PATH
14
16
  end
15
17
 
16
- unless defined? RAKER_LOG_FILE
17
- RAKER_LOG_FILE = File.join(RAILS_ROOT, 'log', 'recommenderd.log')
18
+ unless defined? RAKER_LOGS_PATH
19
+ RAKER_LOGS_PATH = ENV["RAKER_LOGS_PATH"] || "#{RAILS_ROOT}/log"
18
20
  end
19
21
  unless defined? RAKER_PIDS_PATH
20
22
  RAKER_PIDS_PATH = ENV["RAKER_PIDS_PATH"] || "#{RAILS_ROOT}/tmp/pids/raker"
@@ -22,21 +24,15 @@ end
22
24
  unless defined? RAKER_FEED_ARCHIVE_PATH
23
25
  if ENV['RAILS_ENV'] == 'production'
24
26
  RAKER_FEED_ARCHIVE_PATH = File.join(RAILS_ROOT, '..', '..', 'shared', 'feed_archive')
25
- else
27
+ elsif ENV['RAILS_ENV'] == 'development'
26
28
  RAKER_FEED_ARCHIVE_PATH = File.join(RAILS_ROOT, '..', 'feed_archive')
27
29
  end
28
30
  end
29
- unless defined? RAKER_DATABASE_CONFIG
31
+ unless defined? RAKER_DATABASE_CONFIG_FILE
30
32
  if ENV['RAILS_ENV'] == 'production'
31
- RAKER_DATABASE_CONFIG = File.join(RAILS_ROOT, '..', '..', 'shared', 'config', 'database.yml')
33
+ RAKER_DATABASE_CONFIG_FILE = File.join(RAILS_ROOT, '..', '..', 'shared', 'config', 'database.yml')
32
34
  else
33
- RAKER_DATABASE_CONFIG = File.join(RAILS_ROOT, 'config', 'database.yml')
34
- end
35
- end
36
- unless defined? RAKER_LOG_TO_CONSOLE
37
- if ENV['RAILS_ENV'] == 'production'
38
- RAKER_LOG_TO_CONSOLE = 'false'
39
- else
40
- RAKER_LOG_TO_CONSOLE = 'true'
35
+ RAKER_DATABASE_CONFIG_FILE = File.join(RAILS_ROOT, 'config', 'database.yml')
41
36
  end
42
37
  end
38
+
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -38,8 +38,12 @@
38
38
  <maxBufferedDocs>1000</maxBufferedDocs>
39
39
  <maxMergeDocs>2147483647</maxMergeDocs>
40
40
  <maxFieldLength>10000</maxFieldLength>
41
- <writeLockTimeout>1000</writeLockTimeout>
42
- <commitLockTimeout>10000</commitLockTimeout>
41
+ <!--
42
+ <writeLockTimeout>1000</writeLockTimeout>
43
+ <commitLockTimeout>10000</commitLockTimeout>
44
+ <unlockOnStartup>true</unlockOnStartup>
45
+ -->
46
+ <lockType>single</lockType>
43
47
  </indexDefaults>
44
48
  <!-- options specific to the main on-disk lucene index -->
45
49
  <!--
@@ -5,12 +5,14 @@ require 'fileutils'
5
5
  module MuckRaker
6
6
  class Tasks < ::Rake::TaskLib
7
7
  def initialize
8
+ ENV['DEBUG'] = 'true' unless ENV['DEBUG'] == 'false'
9
+ ENV['RAILS_ENV'] = 'development' unless ENV['RAILS_ENV']
8
10
  define
9
11
  end
10
-
12
+
11
13
  private
12
14
  def define
13
-
15
+
14
16
  namespace :muck do
15
17
 
16
18
  namespace :raker do
@@ -38,7 +40,7 @@ module MuckRaker
38
40
  end
39
41
  }
40
42
  end
41
-
43
+
42
44
  desc "Loads some feeds oai endpoints to get things started"
43
45
  task :bootstrap => :environment do
44
46
  require 'active_record/fixtures'
@@ -56,14 +58,14 @@ module MuckRaker
56
58
  ServiceCategory.delete_all
57
59
  yml = File.join(File.dirname(__FILE__), '..', '..', 'db', 'bootstrap',"service_categories")
58
60
  Fixtures.new(Service.connection,"service_categories",ServiceCategory,yml).insert_fixtures
59
-
61
+
60
62
  Service.delete_all
61
63
  yml = File.join(File.dirname(__FILE__), '..', '..', 'db', 'bootstrap',"services")
62
64
  Fixtures.new(Service.connection,"services",Service,yml).insert_fixtures
63
65
 
64
66
  end
65
67
 
66
- desc "Deletes and reloads all services and service categories"
68
+ desc "Deletes and reloads services and service categories"
67
69
  task :bootstrap_services => :environment do
68
70
  require 'active_record/fixtures'
69
71
  ActiveRecord::Base.establish_connection(RAILS_ENV.to_sym)
@@ -71,14 +73,15 @@ module MuckRaker
71
73
  ServiceCategory.delete_all
72
74
  yml = File.join(File.dirname(__FILE__), '..', '..', 'db', 'bootstrap',"service_categories")
73
75
  Fixtures.new(Service.connection,"service_categories",ServiceCategory,yml).insert_fixtures
74
-
76
+
75
77
  Service.delete_all
76
78
  yml = File.join(File.dirname(__FILE__), '..', '..', 'db', 'bootstrap',"services")
77
79
  Fixtures.new(Service.connection,"services",Service,yml).insert_fixtures
80
+
78
81
  end
79
-
82
+
80
83
  end
81
-
84
+
82
85
  desc "Sync files from muck raker."
83
86
  task :sync do
84
87
  path = File.join(File.dirname(__FILE__), *%w[.. ..])
@@ -87,122 +90,115 @@ module MuckRaker
87
90
  system "rsync -ruv #{path}/config/solr ./config"
88
91
  end
89
92
 
90
- def pid_file
91
- "#{RAKER_PIDS_PATH}/#{ENV['RAILS_ENV']}_pid"
93
+
94
+ def show_options
95
+ puts "RAILS_ENV=#{ENV['RAILS_ENV']} "
96
+ puts "solr.solr.home=\"#{SOLR_CONFIG_PATH}\" "
97
+ puts "solr.data.dir=\"#{SOLR_DATA_PATH}\""
98
+ puts "RAKER_PIDS_PATH=\"#{RAKER_PIDS_PATH}\" "
99
+ puts "recommender.database.config_file=\"#{RAKER_DATABASE_CONFIG_FILE}\" "
100
+ puts "recommender.log_file=\"#{RAKER_LOGS_PATH}/raker.log\" "
101
+ puts "recommender.log_to_console=\"#{ENV['DEBUG']}\" "
102
+ puts "aggregator.feed_archive_path=\"#{RAKER_FEED_ARCHIVE_PATH}\" "
92
103
  end
93
104
 
94
- def raker_task task = 'daemon', param = ''
105
+ def daemon_task task = 'all', task_param = nil
95
106
  require File.expand_path("#{File.dirname(__FILE__)}/../../config/muck_raker_environment")
96
- show_options
107
+ task_param ||= (ENV['redo'] == 'true') ? 'redo' : ''
97
108
  FileUtils.mkdir_p(RAKER_PIDS_PATH)
98
109
  FileUtils.mkdir_p(RAKER_LOGS_PATH)
110
+ FileUtils.mkdir_p(RAKER_FEED_ARCHIVE_PATH)
111
+ show_options
112
+ options = "-DRAILS_ENV=#{ENV['RAILS_ENV']} "
113
+ options << "-DDEBUG=#{ENV['DEBUG']} "
114
+ options << "-Dsolr.solr.home=\"#{SOLR_CONFIG_PATH}\" "
115
+ options << "-Dsolr.data.dir=\"#{SOLR_DATA_PATH}\" "
116
+ options << "-Drecommender.database.config_file=\"#{RAKER_DATABASE_CONFIG_FILE}\" "
117
+ options << "-Drecommender.log_file=\"#{RAKER_LOGS_PATH}/raker.log\" "
118
+ options << "-Drecommender.log_to_console=\"#{ENV['DEBUG']}\" "
119
+ options << "-Daggregator.feed_archive_path=\"#{RAKER_FEED_ARCHIVE_PATH}\" "
120
+ javaclass = "edu.usu.cosl.recommenderd.Recommenderd "
99
121
  separator = (RUBY_PLATFORM =~ /(win|w)32$/ ? ';' : ':')
100
- puts "RAILS_ENV=" + ENV['RAILS_ENV']
101
122
  Dir.chdir(File.join(File.dirname(__FILE__), '../../', 'raker', 'lib')) do
102
123
  jars = Dir['*.jar','solr/*.jar'].join(separator)
103
- options = "-DRAILS_ENV=#{ENV['RAILS_ENV']} "
104
- options << "-Dsolr.solr.home=\"#{SOLR_CONFIG_PATH}\" "
105
- options << "-Dsolr.data.dir=\"#{SOLR_DATA_PATH}\" "
106
- options << "-Draker.database.config=\"#{RAKER_DATABASE_CONFIG}\" "
107
- options << "-Draker.log.file=\"#{RAKER_LOG_FILE}\" "
108
- options << "-Draker.feed_archive_dir=\"#{RAKER_FEED_ARCHIVE_PATH}\" "
109
- options << "-Draker.log_to_console=\"#{RAKER_LOG_TO_CONSOLE}\" "
110
124
  classpath = "-classpath #{jars}#{separator}. "
111
- memory_options = "-Xms32m -Xmx128m "
112
- javaclass = "edu.usu.cosl.recommenderd.Recommenderd "
113
- cmd = "java " + options + classpath + memory_options + javaclass + task + " " + param
125
+ cmd = "java " + options + classpath + javaclass + task + ' ' + task_param
114
126
  puts ("Executing: " + cmd)
115
127
  windows = RUBY_PLATFORM =~ /(win|w)32$/
116
128
  if windows
117
- exec cmd
129
+ exec cmd
118
130
  else
119
131
  pid = fork do
120
132
  exec cmd
121
133
  end
122
134
  end
123
135
  sleep(5)
124
- File.open(pid_file, "w"){ |f| f << pid} unless windows
125
- puts "#{ENV['RAILS_ENV']} Muck raker harvest command started successfully, pid: #{pid}."
136
+ File.open("#{RAKER_PIDS_PATH}/#{ENV['RAILS_ENV']}_pid", "w"){ |f| f << pid} unless windows
137
+ puts "#{ENV['RAILS_ENV']} Muck raker command started successfully, pid: #{pid}."
126
138
  end
127
139
  end
128
-
129
- def show_options
130
- puts "RAILS_ENV=#{ENV['RAILS_ENV']} "
131
- puts "solr.solr.home=\"#{SOLR_CONFIG_PATH}\" "
132
- puts "solr.data.dir=\"#{SOLR_DATA_PATH}\" "
133
- puts "raker.db.config=\"#{RAKER_DATABASE_CONFIG}\" "
134
- puts "raker.log.dir=\"#{RAKER_LOGS_PATH}\" "
135
- puts "raker.feed_archive_dir=\"#{RAKER_FEED_ARCHIVE_PATH}\" "
136
- puts "raker.log_to_console=\"#{RAKER_LOG_TO_CONSOLE}\" "
137
- end
138
-
139
- desc "Print out muck raker dependent environment variables"
140
- task :show_options => :environment do
141
- require File.expand_path("#{File.dirname(__FILE__)}/../../config/muck_raker_environment")
142
- show_options
143
- end
144
140
 
145
- desc "Start the recommender daemon process"
141
+ desc "Start daemon."
146
142
  task :start => :environment do
147
- raker_task
143
+ daemon_task 'all'
148
144
  end
149
145
 
150
- desc "Redo everything (re-index, redo autogenerated subjects, rebuild tag clouds re-recommend)"
146
+ desc "Redo everything once and quit."
151
147
  task :rebuild => :environment do
152
- raker_task 'daemon', 'full'
153
- end
154
-
155
- desc "Get some data into the recommender system"
156
- task :bootstrap => :environment do
157
- raker_task 'bootstrap'
148
+ daemon_task 'all', 'redo'
158
149
  end
159
150
 
160
- desc "Harvest without recommending"
151
+ desc "Harvest stale feeds. Add redo=true to harvest all feeds."
161
152
  task :harvest => :environment do
162
- raker_task 'harvest'
153
+ daemon_task 'harvest'
163
154
  end
164
155
 
165
- desc "Update solr index to changes made to recommender database"
156
+ desc "Index new entries."
166
157
  task :index => :environment do
167
- raker_task 'index'
158
+ daemon_task 'index'
159
+ end
160
+
161
+ desc "Re-index all entries."
162
+ task :reindex => :environment do
163
+ daemon_task 'index', 'redo'
168
164
  end
169
165
 
170
- desc "Recommend without harvesting"
166
+ desc "Update recommendations."
171
167
  task :recommend => :environment do
172
- raker_task 'recommend'
168
+ daemon_task 'recommend'
173
169
  end
174
170
 
175
- desc "Redo recommendations only"
171
+ desc "Redo all recommendations."
176
172
  task :redo_recommendations => :environment do
177
- raker_task 'recommend', 'full'
173
+ daemon_task 'recommend', 'redo'
178
174
  end
179
175
 
180
- desc "Autogenerate subjects"
176
+ desc "Auto-generate tags for new entries that don't have at least 4. Add redo=true to regenerate for all entries."
181
177
  task :subjects => :environment do
182
- raker_task 'auto_generate_subjects'
178
+ daemon_task 'subjects'
183
179
  end
184
180
 
185
- desc "Generate tag clouds"
181
+ desc "Re-generate tag clouds."
186
182
  task :tag_clouds => :environment do
187
- raker_task 'tag_clouds'
183
+ daemon_task 'tag_clouds', 'redo'
188
184
  end
189
185
 
190
- desc "Stop a raker daemon process"
186
+ desc "Stop a raker daemon process."
191
187
  task :stop => :environment do
192
- file_path = pid_file
188
+ file_path = "#{RAKER_PIDS_PATH}/#{ENV['RAILS_ENV']}_pid"
193
189
  if File.exists?(file_path)
194
190
  File.open(file_path, "r") do |f|
195
191
  pid = f.readline
196
192
  Process.kill('TERM', pid.to_i)
197
193
  end
198
194
  File.unlink(file_path)
199
- puts "Raker task successfully."
195
+ puts "Raker shutdown successfully."
200
196
  else
201
197
  puts "PID file not found at #{file_path}. Either Raker is not running or no PID file was written."
202
198
  end
203
199
  end
204
200
 
205
- end
201
+ end
206
202
 
207
203
  end
208
204
 
data/muck-raker.gemspec CHANGED
@@ -5,13 +5,13 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{muck-raker}
8
- s.version = "0.3.3"
8
+ s.version = "0.3.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Joel Duffin Justin Ball"]
12
- s.date = %q{2009-11-05}
12
+ s.date = %q{2009-11-13}
13
13
  s.description = %q{The aggregation and recommendation daemons for the muck system.}
14
- s.email = %q{justinball@gmail.com}
14
+ s.email = %q{justin@tatemae.com}
15
15
  s.extra_rdoc_files = [
16
16
  "README.rdoc"
17
17
  ]
Binary file
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: muck-raker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Joel Duffin Justin Ball
@@ -9,12 +9,12 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-05 00:00:00 -07:00
12
+ date: 2009-11-13 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
16
  description: The aggregation and recommendation daemons for the muck system.
17
- email: justinball@gmail.com
17
+ email: justin@tatemae.com
18
18
  executables: []
19
19
 
20
20
  extensions: []