pampa_workers 0.0.38

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,49 @@
1
+ module BlackStack
2
+
3
+ # process class
4
+ class MyCrawlProcess < BlackStack::MyLocalProcess
5
+
6
+ attr_accessor :nErrors, :nSuccesses, :browser, :proxy, :bot
7
+
8
+ # constructor
9
+ def initialize(
10
+ the_worker_name,
11
+ the_division_name,
12
+ the_minimum_enlapsed_seconds=MyProcess::DEFAULT_MINIMUM_ENLAPSED_SECONDS,
13
+ the_verify_configuration=true,
14
+ the_email=nil,
15
+ the_password=nil
16
+ )
17
+ super(the_worker_name, the_division_name, the_minimum_enlapsed_seconds, the_verify_configuration, the_email, the_password)
18
+ self.nErrors = 0
19
+ self.nSuccesses = 0
20
+ self.browser = nil
21
+ self.proxy = nil
22
+ self.bot = nil
23
+ end
24
+
25
+ def canRun?()
26
+ super &&
27
+ nErrors < Params.getValue("crawl.company.discretion.max_errors") &&
28
+ nSuccesses < Params.getValue("crawl.company.discretion.max_successes")
29
+ #(Params.getValue("crawl.company.use_proxy")==false || Company.availableProxiesWithDiscretionForCrawl() > 0)
30
+ end
31
+
32
+ def whyCantRun()
33
+ ret = super
34
+ if (ret.to_s.size == 0)
35
+ if (self.nErrors >= Params.getValue("crawl.company.discretion.max_errors"))
36
+ return "Reached the max number of errors (#{self.nErrors.to_s})"
37
+ end
38
+
39
+ if (self.nSuccesses >= Params.getValue("crawl.company.discretion.max_successes"))
40
+ return "Reached the max number of successes (#{self.nSuccesses.to_s})"
41
+ end
42
+ end
43
+ return ret
44
+ end
45
+
46
+ end # class MyCrawlProcess
47
+
48
+
49
+ end # module BlackStack
@@ -0,0 +1,164 @@
1
+ module BlackStack
2
+
3
+ # Process located in the same LAN than the Database Server
4
+ class MyLocalProcess < BlackStack::MyChildProcess
5
+
6
+ # constructor
7
+ def initialize(
8
+ the_worker_name,
9
+ the_division_name,
10
+ the_minimum_enlapsed_seconds=BlackStack::MyProcess::DEFAULT_MINIMUM_ENLAPSED_SECONDS,
11
+ the_verify_configuration=true,
12
+ the_email=nil,
13
+ the_password=nil
14
+ )
15
+ super(the_worker_name, the_division_name, the_minimum_enlapsed_seconds, the_verify_configuration, the_email, the_password)
16
+ end
17
+
18
+ def division()
19
+ if (self.division_name != "local")
20
+ d = BlackStack::Division.where(:name=>self.division_name).first
21
+ if (d!=nil)
22
+ return BlackStack::Division.where(:db_name=>d.db_name, :home=>true).first
23
+ else
24
+ return nil
25
+ end
26
+ else
27
+ return BlackStack::Division.where(:central=>true).first
28
+ end
29
+ end
30
+
31
+ def worker()
32
+ BlackStack::Worker.where(:name=>self.fullWorkerName).first
33
+ end
34
+
35
+
36
+ # update worker configuration in the division
37
+ def updateWorker()
38
+ w = BlackStack::Worker.where(:name=>self.fullWorkerName).first
39
+ if (w==nil)
40
+ w = BlackStack::Worker.new
41
+ w.id = guid()
42
+ w.process = ''
43
+ w.last_ping_time = now()
44
+ w.name = self.fullWorkerName
45
+ w.assigned_process = self.assigned_process
46
+ w.id_object = self.id_object
47
+ w.division_name = self.division_name
48
+ w.save
49
+ end
50
+ if (w!=nil)
51
+ w.assigned_process = self.assigned_process
52
+ w.id_object = self.id_object
53
+ w.division_name = self.division_name
54
+ w.id_division = self.id_division
55
+ w.save
56
+ end
57
+ end
58
+
59
+ def run()
60
+ super
61
+
62
+ # creo el objeto logger
63
+ self.logger = RemoteLogger.new(
64
+ "#{self.fullWorkerName}.log",
65
+ BlackStack::Pampa::api_protocol,
66
+ BlackStack::Pampa::api_domain,
67
+ BlackStack::Pampa::api_port,
68
+ BlackStack::Pampa::api_key
69
+ )
70
+
71
+ # announcing my in the log
72
+ logger.log "Child process is alive!"
73
+
74
+ # obtengo los parametros del worker
75
+ logger.logs "Update from central (1)... "
76
+ self.get
77
+ logger.done
78
+
79
+ # actualizo los datos de este worker (parent process)
80
+ logger.logs "Update worker (1)... "
81
+ self.updateWorker
82
+ logger.done
83
+
84
+ while (self.canRun?)
85
+ # reseteo en contador nested del logger
86
+ self.logger.reset()
87
+
88
+ # announcing my in the log
89
+ logger.log "Going to Run Local"
90
+ logger.log "Process: #{self.assigned_process.to_s}."
91
+ logger.log "Object: #{(self.id_object.to_s.size==0)? 'n/a' : self.id_object.to_s})"
92
+
93
+ # obtengo la hora de inicio
94
+ start_time = Time.now
95
+
96
+ begin
97
+ # libero recursos
98
+ logger.logs "Release resources... "
99
+ GC.start
100
+ DB.disconnect
101
+ logger.done
102
+
103
+ # cargo el objeto worker
104
+ logger.logs "Load the worker... "
105
+ the_worker = self.worker
106
+ logger.done
107
+
108
+ # actualizo el valor del proceso que corre actualmente para este worker
109
+ logger.logs "Update current process... "
110
+ the_worker.process=self.assigned_process
111
+ the_worker.active = true
112
+ the_worker.save()
113
+ logger.done
114
+
115
+ logger.logs "Ping... "
116
+ the_worker.ping()
117
+ logger.done
118
+
119
+ # corro el procesamiento
120
+ self.process(ARGV)
121
+
122
+ rescue => e
123
+ puts ""
124
+ logger.log "Local Process Error: " + e.to_s + "\r\n" + e.backtrace.join("\r\n").to_s
125
+ end
126
+
127
+ # obtengo los parametros del worker
128
+ logger.logs "Update from central (2)... "
129
+ self.get
130
+ logger.done
131
+
132
+ # actualizo los datos de este worker (parent process)
133
+ logger.logs "Update worker (2)... "
134
+ self.updateWorker
135
+ logger.done
136
+
137
+ # sleep
138
+ logger.logs "Sleep... "
139
+ self.doSleep(start_time)
140
+ logger.done
141
+
142
+ logger.log "-------------------------------------------"
143
+
144
+ DB.disconnect
145
+ GC.start
146
+ end # main while
147
+
148
+ #
149
+ logger.log "Process Finish!"
150
+ logger.log "Finish Reason: " + self.whyCantRun.to_s
151
+
152
+ #
153
+ logger.logs "Disconnect to Database... "
154
+ begin
155
+ DB.disconnect()
156
+ logger.done
157
+ rescue => e
158
+ logger.error(e)
159
+ end
160
+ end # run
161
+
162
+ end # class MyLocalProcess
163
+
164
+ end # module BlackStack
@@ -0,0 +1,141 @@
1
+ module BlackStack
2
+
3
+ # es un proceso sin conexion a base de datos, que itera infinitamente.
4
+ # en cada iteracion saluda a la central (hello), obtiene parametros (get)
5
+ class MyParentProcess < BlackStack::MyProcess
6
+ def run()
7
+ super
8
+
9
+ # creo el objeto logger
10
+ self.logger = BlackStack::RemoteLogger.new(
11
+ "#{self.fullWorkerName}.log",
12
+ BlackStack::Pampa::api_protocol,
13
+ BlackStack::Pampa::api_domain,
14
+ BlackStack::Pampa::api_port,
15
+ BlackStack::Pampa::api_key
16
+ )
17
+
18
+ #
19
+ pid = nil
20
+ while (true)
21
+ begin
22
+ GC.start # 331 - avoid lack of memory
23
+ #DB.disconnect # este proceso esta desacoplado de la conexion a la base de datos
24
+
25
+ # reseteo en contador nested del logger
26
+ self.logger.reset()
27
+
28
+ # get the start time
29
+ start_time = Time.now
30
+
31
+ # consulto a la central por la division asignada
32
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/hello.json"
33
+ logger.logs("Hello to the central... ")
34
+ res = BlackStack::Netting::call_post(url, {
35
+ 'api_key' => BlackStack::Pampa::api_key,
36
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
37
+ )
38
+ parsed = JSON.parse(res.body)
39
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
40
+ self.logger.logf("Error: " + parsed['status'].to_s)
41
+ else
42
+ self.logger.done
43
+
44
+ logger.logs("Get worker data... ")
45
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/get.json"
46
+ res = BlackStack::Netting::call_post(url, {
47
+ 'api_key' => BlackStack::Pampa::api_key,
48
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
49
+ )
50
+ parsed = JSON.parse(res.body)
51
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
52
+ self.logger.logf("Error: " + parsed['status'].to_s)
53
+ else
54
+ # map response
55
+ self.id = parsed['id']
56
+ self.assigned_process = parsed['assigned_process']
57
+ self.id_object = parsed['id_object']
58
+ self.id_division = parsed['id_division']
59
+ self.division_name = parsed['division_name']
60
+ self.ws_url = parsed['ws_url']
61
+ self.ws_port = parsed['ws_port']
62
+ self.logger.logf "done (#{self.division_name})"
63
+
64
+ #
65
+ self.logger.logs "Notify division... "
66
+ if self.division_name.to_s.size == 0
67
+ self.logger.logf "no division assigned"
68
+ else
69
+ self.notify # notifico a la division
70
+ self.logger.done
71
+
72
+ #
73
+ self.logger.logs "Spawn child process... "
74
+ # lanzo el proceso
75
+ if self.assigned_process.to_s.size > 0
76
+ command = "ruby #{self.assigned_process} name=#{self.worker_name} division=#{self.division_name}"
77
+ pid = Process.spawn(command)
78
+ logger.logf "done (pid=#{pid.to_s})"
79
+
80
+ logger.log("Wait to child process to finish.")
81
+ Process.wait(pid)
82
+ else
83
+ if self.assigned_process.to_s.size == 0
84
+ self.logger.logf "no process assigned"
85
+ end
86
+ end # if self.assigned_process.to_s.size > 0
87
+ end # if self.division_name.to_s.size == 0
88
+ end # if (parsed['status'] != "success") <-- #{BlackStack::Pampa::api_url}/api1.3/pampa/get.json
89
+ end # if (parsed['status'] != "success") <-- #{BlackStack::Pampa::api_url}/api1.3/pampa/hello.json
90
+
91
+ #
92
+ logger.logs "Sleep... "
93
+ self.doSleep(start_time)
94
+ logger.done
95
+
96
+ logger.log "-------------------------------------------"
97
+
98
+ rescue Interrupt => e
99
+ logger.reset
100
+
101
+ logger.log "Interrupt signal!"
102
+
103
+ logger.logs "Kill process... "
104
+ if (pid!=nil)
105
+ system("taskkill /im #{pid.to_s} /f /t >nul 2>&1")
106
+ end
107
+ logger.done
108
+
109
+ logger.logs "Disconnect to Database... "
110
+ begin
111
+ # DB.disconnect()
112
+ logger.done
113
+ rescue => e
114
+ logger.error(e)
115
+ end
116
+
117
+ logger.log "Process is out."
118
+ exit(0)
119
+
120
+ rescue => e
121
+ begin
122
+ logger.log "Unhandled exception: #{e.to_s}\r\n#{e.backtrace.join("\r\n").to_s}"
123
+ logger.logs "Sleep #{self.minimum_enlapsed_seconds.to_s} seconds... "
124
+ sleep(self.minimum_enlapsed_seconds)
125
+ logger.done
126
+ rescue => e
127
+ puts "Fatal error: #{e.to_s}"
128
+ print "Sleep #{self.minimum_enlapsed_seconds.to_s} seconds... "
129
+ sleep(self.minimum_enlapsed_seconds)
130
+ puts
131
+ end
132
+
133
+ end # rescue
134
+
135
+ end # while
136
+
137
+ end # def run()
138
+
139
+ end # class MyParentProcess
140
+
141
+ end # module BlackStack
data/lib/myprocess.rb ADDED
@@ -0,0 +1,264 @@
1
+ module BlackStack
2
+
3
+ class MyProcess
4
+ DEFAULT_MINIMUM_ENLAPSED_SECONDS = 60
5
+
6
+ attr_accessor :assigned_process_changed, :assigned_division_changed, :verify_configuration
7
+ attr_accessor :logger, :id, :worker_name, :division_name, :minimum_enlapsed_seconds, :assigned_process, :id_object, :id_division, :ws_url, :ws_port
8
+ attr_accessor :email, :password
9
+
10
+ # constructor
11
+ def initialize(
12
+ the_worker_name,
13
+ the_division_name,
14
+ the_minimum_enlapsed_seconds=MyProcess::DEFAULT_MINIMUM_ENLAPSED_SECONDS,
15
+ the_verify_configuration=true,
16
+ the_email=nil,
17
+ the_password=nil
18
+ )
19
+ self.assigned_process_changed = false
20
+ self.assigned_division_changed = false
21
+ self.assigned_process = File.expand_path($0)
22
+ self.worker_name = "#{the_worker_name}"
23
+ self.division_name = the_division_name
24
+ self.minimum_enlapsed_seconds = the_minimum_enlapsed_seconds
25
+ self.verify_configuration = the_verify_configuration
26
+ self.email = the_email
27
+ self.password = the_password
28
+ end
29
+
30
+ # retrieves the id of the current process
31
+ def pid()
32
+ Process.pid.to_s
33
+ end
34
+
35
+ # Retorna un array de hashes.
36
+ # => Cada elemento del hash tiene la forma: {:executablepath, :pid, :ppid},
37
+ # => donde imagename es el patch completo del proceso, pid es el id del proceso
38
+ # => y ppid es el id del proceso padre.
39
+ def list()
40
+ a = []
41
+ s = `wmic process get executablepath,processid,parentprocessid`
42
+ s.split(/\n+/).each { |e|
43
+ aux = e.strip.scan(/^(.+)\s+(\d+)\s+(\d+)$/)[0]
44
+ if (aux!=nil)
45
+ if (aux.size>=3)
46
+ a << {
47
+ :executablepath => aux[0].strip.to_s,
48
+ :pid => aux[2].to_s, # TODO: deberia ser aux[1], pero por algo que no entiendo ahora el pid viene en aux[2]
49
+ :ppid => aux[1].to_s, # TODO: deberia ser aux[2], pero por algo que no entiendo ahora el pid viene en aux[1]
50
+ }
51
+ end
52
+ end
53
+ }
54
+ a
55
+ end
56
+
57
+ # ejecuta TASKKILL /F /PID #{the_pid} y retorna el output del comando
58
+ def self.kill(the_pid)
59
+ system("TASKKILL /F /PID #{the_pid}")
60
+ end
61
+
62
+ # obtiene la diferencia en segundos entre la hora actual y el parametro the_start_time.
63
+ # si la diferencia es mayor al atributo minimum_enlapsed_seconds, entonces duerme el tiempo restante.
64
+ def doSleep(the_start_time)
65
+ # si el proceso tardo menos del minimum_enlapsed_seconds, entonces duermo el tiempo restante
66
+ end_time = Time.now
67
+ elapsed_seconds = end_time - the_start_time # in seconds
68
+ if (elapsed_seconds < self.minimum_enlapsed_seconds)
69
+ sleep_seconds = self.minimum_enlapsed_seconds - elapsed_seconds
70
+ sleep(sleep_seconds)
71
+ end
72
+ end
73
+
74
+ # This function works in windows only
75
+ # TODO: Esta funcion no retorna la mac address completa
76
+ # TODO: Validar que no se retorne una macaddress virtual, con todos valores en 0
77
+ def self.macaddress()
78
+ BlackStack::SimpleHostMonitoring.macaddress
79
+ end
80
+
81
+ def self.fullWorkerName(name)
82
+ "#{Socket.gethostname}.#{MyProcess.macaddress}.#{name}"
83
+ end
84
+
85
+ def fullWorkerName()
86
+ MyProcess.fullWorkerName(self.worker_name)
87
+ end
88
+
89
+ # saluda a la central
90
+ def hello()
91
+ # me notifico a la central. obtengo asignacion si ya la tenia
92
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/hello.json"
93
+ res = BlackStack::Netting::call_post(url, {
94
+ 'api_key' => BlackStack::Pampa::api_key,
95
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
96
+ )
97
+ parsed = JSON.parse(res.body)
98
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
99
+ raise parsed['status'].to_s
100
+ end
101
+ end # hello()
102
+
103
+ # notifico mis parametros (assigned_process, id_object) a la division asignada
104
+ def set(new_assigned_process, new_id_object)
105
+ if (self.ws_url.to_s.size > 0 && self.ws_port.to_s.size > 0)
106
+ url = "#{BlackStack::Pampa::api_protocol}://#{self.ws_url.to_s}:#{self.ws_port.to_s}/api1.3/pampa/notify.json"
107
+ res = BlackStack::Netting::call_post(url, {
108
+ 'api_key' => BlackStack::Pampa::api_key,
109
+ 'name' => self.fullWorkerName,
110
+ 'assigned_process' => new_assigned_process,
111
+ 'id_object' => new_id_object }.merge( BlackStack::RemoteHost.new.poll )
112
+ )
113
+ end
114
+ end
115
+
116
+ # obtiene sus parametros de la central
117
+ def get()
118
+ # me notifico a la central. obtengo asignacion que tenga
119
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/get.json"
120
+ res = BlackStack::Netting::call_post(url, {
121
+ 'api_key' => BlackStack::Pampa::api_key,
122
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
123
+ )
124
+ parsed = JSON.parse(res.body)
125
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
126
+ raise parsed['status'].to_s
127
+ else
128
+ if self.verify_configuration
129
+ # si ya tenia un proceso asignado, y ahora se le asigna un nuevo proceso
130
+ if self.assigned_process.to_s.size > 0
131
+ a = File.expand_path(self.assigned_process)
132
+ b = File.expand_path(parsed['assigned_process'])
133
+ if a != b
134
+ self.assigned_process_changed = true
135
+ else
136
+ self.assigned_process_changed = false
137
+ end
138
+ end
139
+
140
+ # si ya tenia un proceso asignado, y ahora se le asigna un nuevo proceso
141
+ if self.id_division.to_s.size > 0
142
+ if self.id_division.to_guid != parsed['id_division'].to_guid
143
+ self.assigned_division_changed = true
144
+ else
145
+ self.assigned_division_changed = false
146
+ end
147
+ end
148
+ end # verify_configuration
149
+
150
+ # si ya tenia asignada una division, entonces le notifico mi nueva configuracion
151
+ self.set(parsed['assigned_process'], parsed['id_object'])
152
+
153
+ self.id = parsed['id']
154
+ self.assigned_process = parsed['assigned_process']
155
+ self.id_object = parsed['id_object']
156
+ self.id_division = parsed['id_division']
157
+ self.division_name = parsed['division_name']
158
+ self.ws_url = parsed['ws_url']
159
+ self.ws_port = parsed['ws_port']
160
+
161
+ # le notifico a la nueva division asignada mi nueva configuracion
162
+ self.set(parsed['assigned_process'], parsed['id_object'])
163
+ end
164
+ end # get()
165
+
166
+
167
+ # update worker configuration in the division
168
+ def updateWorker()
169
+ raise "Abstract Method."
170
+ end
171
+
172
+ # ping the central database
173
+ def ping()
174
+ # me notifico a la central.
175
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/ping.json"
176
+ res = BlackStack::Netting::call_post(url, {
177
+ 'api_key' => BlackStack::Pampa::api_key,
178
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
179
+ )
180
+ parsed = JSON.parse(res.body)
181
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
182
+ raise parsed['status'].to_s
183
+ end
184
+
185
+ # me notifico a la division.
186
+ if (self.ws_url != nil && self.ws_port != nil)
187
+ url = "#{BlackStack::Pampa::api_protocol}://#{self.ws_url.to_s}:#{self.ws_port.to_s}/api1.3/pampa/ping.json"
188
+ res = BlackStack::Netting::call_post(url, {
189
+ 'api_key' => BlackStack::Pampa::api_key,
190
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
191
+ )
192
+ parsed = JSON.parse(res.body)
193
+ if (parsed['status'] != "success")
194
+ raise parsed['status'].to_s
195
+ end
196
+ end # if
197
+ end # ping()
198
+
199
+ # se notifica al dispatcher de la division
200
+ def notify()
201
+ if (self.ws_url==nil || self.ws_port==nil)
202
+ raise "Cannot notify. Worker has not parameters."
203
+ end
204
+
205
+ # me notifico a la division. obtengo trabajo
206
+ url = "#{BlackStack::Pampa::api_protocol}://#{self.ws_url}:#{self.ws_port}/api1.3/pampa/notify.json"
207
+ res = BlackStack::Netting::call_post(url,
208
+ {
209
+ 'api_key' => BlackStack::Pampa::api_key,
210
+ 'name' => self.fullWorkerName,
211
+ 'assigned_process' => self.assigned_process,
212
+ 'id_object' => self.id_object }.merge( BlackStack::RemoteHost.new.poll )
213
+ )
214
+ parsed = JSON.parse(res.body)
215
+ if (parsed['status'] != "success")
216
+ raise parsed['status'].to_s
217
+ end
218
+ end
219
+
220
+ # Get the data object of the divison assigned to this worker.
221
+ # Needs database connections. So it's available for ChildProcess only.
222
+ def division()
223
+ raise "This is an abstract method."
224
+ end
225
+
226
+ # Get the data object of worker linked to this process.
227
+ # Needs database connections. So it's available for ChildProcess only.
228
+ def worker()
229
+ raise "This is an abstract method."
230
+ end
231
+
232
+ # retorna true si el proceso hijo (child) esta habilitado para trabajar.
233
+ def canRun?()
234
+ self.assigned_process_changed == false &&
235
+ self.assigned_division_changed == false
236
+ end
237
+
238
+ def whyCantRun()
239
+ if self.assigned_process_changed == true
240
+ return "Assigned process has changed."
241
+ elsif self.assigned_division_changed == true
242
+ return "Assigned division has changed."
243
+ else
244
+ return "unknown"
245
+ end
246
+ end
247
+
248
+ # este metodo
249
+ # ejecuta el trabajo para el que fue creado el objeto.
250
+ def process(argv)
251
+ raise "This is an abstract method."
252
+ end
253
+
254
+ # ejecuta el proceso, en modo parent, bot o child segun la clase que se implemente.
255
+ # en modo parent, hace un loop infinito.
256
+ # en modo bot o child, hace un loop hasta que el metodo canRun? retorne false.
257
+ # en modo bot o child, invoca al metodo process() en cada ciclo.
258
+ def run()
259
+ #raise "This is an abstract method"
260
+ end # run
261
+
262
+ end # class MyProcess
263
+
264
+ end # module BlackStack