pampa_workers 0.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ module BlackStack
2
+
3
+ # process class
4
+ class MyCrawlProcess < BlackStack::MyLocalProcess
5
+
6
+ attr_accessor :nErrors, :nSuccesses, :browser, :proxy, :bot
7
+
8
+ # constructor
9
+ def initialize(
10
+ the_worker_name,
11
+ the_division_name,
12
+ the_minimum_enlapsed_seconds=MyProcess::DEFAULT_MINIMUM_ENLAPSED_SECONDS,
13
+ the_verify_configuration=true,
14
+ the_email=nil,
15
+ the_password=nil
16
+ )
17
+ super(the_worker_name, the_division_name, the_minimum_enlapsed_seconds, the_verify_configuration, the_email, the_password)
18
+ self.nErrors = 0
19
+ self.nSuccesses = 0
20
+ self.browser = nil
21
+ self.proxy = nil
22
+ self.bot = nil
23
+ end
24
+
25
+ def canRun?()
26
+ super &&
27
+ nErrors < Params.getValue("crawl.company.discretion.max_errors") &&
28
+ nSuccesses < Params.getValue("crawl.company.discretion.max_successes")
29
+ #(Params.getValue("crawl.company.use_proxy")==false || Company.availableProxiesWithDiscretionForCrawl() > 0)
30
+ end
31
+
32
+ def whyCantRun()
33
+ ret = super
34
+ if (ret.to_s.size == 0)
35
+ if (self.nErrors >= Params.getValue("crawl.company.discretion.max_errors"))
36
+ return "Reached the max number of errors (#{self.nErrors.to_s})"
37
+ end
38
+
39
+ if (self.nSuccesses >= Params.getValue("crawl.company.discretion.max_successes"))
40
+ return "Reached the max number of successes (#{self.nSuccesses.to_s})"
41
+ end
42
+ end
43
+ return ret
44
+ end
45
+
46
+ end # class MyCrawlProcess
47
+
48
+
49
+ end # module BlackStack
@@ -0,0 +1,164 @@
1
+ module BlackStack
2
+
3
+ # Process located in the same LAN than the Database Server
4
+ class MyLocalProcess < BlackStack::MyChildProcess
5
+
6
+ # constructor
7
+ def initialize(
8
+ the_worker_name,
9
+ the_division_name,
10
+ the_minimum_enlapsed_seconds=BlackStack::MyProcess::DEFAULT_MINIMUM_ENLAPSED_SECONDS,
11
+ the_verify_configuration=true,
12
+ the_email=nil,
13
+ the_password=nil
14
+ )
15
+ super(the_worker_name, the_division_name, the_minimum_enlapsed_seconds, the_verify_configuration, the_email, the_password)
16
+ end
17
+
18
+ def division()
19
+ if (self.division_name != "local")
20
+ d = BlackStack::Division.where(:name=>self.division_name).first
21
+ if (d!=nil)
22
+ return BlackStack::Division.where(:db_name=>d.db_name, :home=>true).first
23
+ else
24
+ return nil
25
+ end
26
+ else
27
+ return BlackStack::Division.where(:central=>true).first
28
+ end
29
+ end
30
+
31
+ def worker()
32
+ BlackStack::Worker.where(:name=>self.fullWorkerName).first
33
+ end
34
+
35
+
36
+ # update worker configuration in the division
37
+ def updateWorker()
38
+ w = BlackStack::Worker.where(:name=>self.fullWorkerName).first
39
+ if (w==nil)
40
+ w = BlackStack::Worker.new
41
+ w.id = guid()
42
+ w.process = ''
43
+ w.last_ping_time = now()
44
+ w.name = self.fullWorkerName
45
+ w.assigned_process = self.assigned_process
46
+ w.id_object = self.id_object
47
+ w.division_name = self.division_name
48
+ w.save
49
+ end
50
+ if (w!=nil)
51
+ w.assigned_process = self.assigned_process
52
+ w.id_object = self.id_object
53
+ w.division_name = self.division_name
54
+ w.id_division = self.id_division
55
+ w.save
56
+ end
57
+ end
58
+
59
+ def run()
60
+ super
61
+
62
+ # creo el objeto logger
63
+ self.logger = RemoteLogger.new(
64
+ "#{self.fullWorkerName}.log",
65
+ BlackStack::Pampa::api_protocol,
66
+ BlackStack::Pampa::api_domain,
67
+ BlackStack::Pampa::api_port,
68
+ BlackStack::Pampa::api_key
69
+ )
70
+
71
+ # announcing my in the log
72
+ logger.log "Child process is alive!"
73
+
74
+ # obtengo los parametros del worker
75
+ logger.logs "Update from central (1)... "
76
+ self.get
77
+ logger.done
78
+
79
+ # actualizo los datos de este worker (parent process)
80
+ logger.logs "Update worker (1)... "
81
+ self.updateWorker
82
+ logger.done
83
+
84
+ while (self.canRun?)
85
+ # reseteo en contador nested del logger
86
+ self.logger.reset()
87
+
88
+ # announcing my in the log
89
+ logger.log "Going to Run Local"
90
+ logger.log "Process: #{self.assigned_process.to_s}."
91
+ logger.log "Object: #{(self.id_object.to_s.size==0)? 'n/a' : self.id_object.to_s})"
92
+
93
+ # obtengo la hora de inicio
94
+ start_time = Time.now
95
+
96
+ begin
97
+ # libero recursos
98
+ logger.logs "Release resources... "
99
+ GC.start
100
+ DB.disconnect
101
+ logger.done
102
+
103
+ # cargo el objeto worker
104
+ logger.logs "Load the worker... "
105
+ the_worker = self.worker
106
+ logger.done
107
+
108
+ # actualizo el valor del proceso que corre actualmente para este worker
109
+ logger.logs "Update current process... "
110
+ the_worker.process=self.assigned_process
111
+ the_worker.active = true
112
+ the_worker.save()
113
+ logger.done
114
+
115
+ logger.logs "Ping... "
116
+ the_worker.ping()
117
+ logger.done
118
+
119
+ # corro el procesamiento
120
+ self.process(ARGV)
121
+
122
+ rescue => e
123
+ puts ""
124
+ logger.log "Local Process Error: " + e.to_s + "\r\n" + e.backtrace.join("\r\n").to_s
125
+ end
126
+
127
+ # obtengo los parametros del worker
128
+ logger.logs "Update from central (2)... "
129
+ self.get
130
+ logger.done
131
+
132
+ # actualizo los datos de este worker (parent process)
133
+ logger.logs "Update worker (2)... "
134
+ self.updateWorker
135
+ logger.done
136
+
137
+ # sleep
138
+ logger.logs "Sleep... "
139
+ self.doSleep(start_time)
140
+ logger.done
141
+
142
+ logger.log "-------------------------------------------"
143
+
144
+ DB.disconnect
145
+ GC.start
146
+ end # main while
147
+
148
+ #
149
+ logger.log "Process Finish!"
150
+ logger.log "Finish Reason: " + self.whyCantRun.to_s
151
+
152
+ #
153
+ logger.logs "Disconnect to Database... "
154
+ begin
155
+ DB.disconnect()
156
+ logger.done
157
+ rescue => e
158
+ logger.error(e)
159
+ end
160
+ end # run
161
+
162
+ end # class MyLocalProcess
163
+
164
+ end # module BlackStack
@@ -0,0 +1,141 @@
1
+ module BlackStack
2
+
3
+ # es un proceso sin conexion a base de datos, que itera infinitamente.
4
+ # en cada iteracion saluda a la central (hello), obtiene parametros (get)
5
+ class MyParentProcess < BlackStack::MyProcess
6
+ def run()
7
+ super
8
+
9
+ # creo el objeto logger
10
+ self.logger = BlackStack::RemoteLogger.new(
11
+ "#{self.fullWorkerName}.log",
12
+ BlackStack::Pampa::api_protocol,
13
+ BlackStack::Pampa::api_domain,
14
+ BlackStack::Pampa::api_port,
15
+ BlackStack::Pampa::api_key
16
+ )
17
+
18
+ #
19
+ pid = nil
20
+ while (true)
21
+ begin
22
+ GC.start # 331 - avoid lack of memory
23
+ #DB.disconnect # este proceso esta desacoplado de la conexion a la base de datos
24
+
25
+ # reseteo en contador nested del logger
26
+ self.logger.reset()
27
+
28
+ # get the start time
29
+ start_time = Time.now
30
+
31
+ # consulto a la central por la division asignada
32
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/hello.json"
33
+ logger.logs("Hello to the central... ")
34
+ res = BlackStack::Netting::call_post(url, {
35
+ 'api_key' => BlackStack::Pampa::api_key,
36
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
37
+ )
38
+ parsed = JSON.parse(res.body)
39
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
40
+ self.logger.logf("Error: " + parsed['status'].to_s)
41
+ else
42
+ self.logger.done
43
+
44
+ logger.logs("Get worker data... ")
45
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/get.json"
46
+ res = BlackStack::Netting::call_post(url, {
47
+ 'api_key' => BlackStack::Pampa::api_key,
48
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
49
+ )
50
+ parsed = JSON.parse(res.body)
51
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
52
+ self.logger.logf("Error: " + parsed['status'].to_s)
53
+ else
54
+ # map response
55
+ self.id = parsed['id']
56
+ self.assigned_process = parsed['assigned_process']
57
+ self.id_object = parsed['id_object']
58
+ self.id_division = parsed['id_division']
59
+ self.division_name = parsed['division_name']
60
+ self.ws_url = parsed['ws_url']
61
+ self.ws_port = parsed['ws_port']
62
+ self.logger.logf "done (#{self.division_name})"
63
+
64
+ #
65
+ self.logger.logs "Notify division... "
66
+ if self.division_name.to_s.size == 0
67
+ self.logger.logf "no division assigned"
68
+ else
69
+ self.notify # notifico a la division
70
+ self.logger.done
71
+
72
+ #
73
+ self.logger.logs "Spawn child process... "
74
+ # lanzo el proceso
75
+ if self.assigned_process.to_s.size > 0
76
+ command = "ruby #{self.assigned_process} name=#{self.worker_name} division=#{self.division_name}"
77
+ pid = Process.spawn(command)
78
+ logger.logf "done (pid=#{pid.to_s})"
79
+
80
+ logger.log("Wait to child process to finish.")
81
+ Process.wait(pid)
82
+ else
83
+ if self.assigned_process.to_s.size == 0
84
+ self.logger.logf "no process assigned"
85
+ end
86
+ end # if self.assigned_process.to_s.size > 0
87
+ end # if self.division_name.to_s.size == 0
88
+ end # if (parsed['status'] != "success") <-- #{BlackStack::Pampa::api_url}/api1.3/pampa/get.json
89
+ end # if (parsed['status'] != "success") <-- #{BlackStack::Pampa::api_url}/api1.3/pampa/hello.json
90
+
91
+ #
92
+ logger.logs "Sleep... "
93
+ self.doSleep(start_time)
94
+ logger.done
95
+
96
+ logger.log "-------------------------------------------"
97
+
98
+ rescue Interrupt => e
99
+ logger.reset
100
+
101
+ logger.log "Interrupt signal!"
102
+
103
+ logger.logs "Kill process... "
104
+ if (pid!=nil)
105
+ system("taskkill /im #{pid.to_s} /f /t >nul 2>&1")
106
+ end
107
+ logger.done
108
+
109
+ logger.logs "Disconnect to Database... "
110
+ begin
111
+ # DB.disconnect()
112
+ logger.done
113
+ rescue => e
114
+ logger.error(e)
115
+ end
116
+
117
+ logger.log "Process is out."
118
+ exit(0)
119
+
120
+ rescue => e
121
+ begin
122
+ logger.log "Unhandled exception: #{e.to_s}\r\n#{e.backtrace.join("\r\n").to_s}"
123
+ logger.logs "Sleep #{self.minimum_enlapsed_seconds.to_s} seconds... "
124
+ sleep(self.minimum_enlapsed_seconds)
125
+ logger.done
126
+ rescue => e
127
+ puts "Fatal error: #{e.to_s}"
128
+ print "Sleep #{self.minimum_enlapsed_seconds.to_s} seconds... "
129
+ sleep(self.minimum_enlapsed_seconds)
130
+ puts
131
+ end
132
+
133
+ end # rescue
134
+
135
+ end # while
136
+
137
+ end # def run()
138
+
139
+ end # class MyParentProcess
140
+
141
+ end # module BlackStack
data/lib/myprocess.rb ADDED
@@ -0,0 +1,264 @@
1
+ module BlackStack
2
+
3
+ class MyProcess
4
+ DEFAULT_MINIMUM_ENLAPSED_SECONDS = 60
5
+
6
+ attr_accessor :assigned_process_changed, :assigned_division_changed, :verify_configuration
7
+ attr_accessor :logger, :id, :worker_name, :division_name, :minimum_enlapsed_seconds, :assigned_process, :id_object, :id_division, :ws_url, :ws_port
8
+ attr_accessor :email, :password
9
+
10
+ # constructor
11
+ def initialize(
12
+ the_worker_name,
13
+ the_division_name,
14
+ the_minimum_enlapsed_seconds=MyProcess::DEFAULT_MINIMUM_ENLAPSED_SECONDS,
15
+ the_verify_configuration=true,
16
+ the_email=nil,
17
+ the_password=nil
18
+ )
19
+ self.assigned_process_changed = false
20
+ self.assigned_division_changed = false
21
+ self.assigned_process = File.expand_path($0)
22
+ self.worker_name = "#{the_worker_name}"
23
+ self.division_name = the_division_name
24
+ self.minimum_enlapsed_seconds = the_minimum_enlapsed_seconds
25
+ self.verify_configuration = the_verify_configuration
26
+ self.email = the_email
27
+ self.password = the_password
28
+ end
29
+
30
+ # retrieves the id of the current process
31
+ def pid()
32
+ Process.pid.to_s
33
+ end
34
+
35
+ # Retorna un array de hashes.
36
+ # => Cada elemento del hash tiene la forma: {:executablepath, :pid, :ppid},
37
+ # => donde imagename es el patch completo del proceso, pid es el id del proceso
38
+ # => y ppid es el id del proceso padre.
39
+ def list()
40
+ a = []
41
+ s = `wmic process get executablepath,processid,parentprocessid`
42
+ s.split(/\n+/).each { |e|
43
+ aux = e.strip.scan(/^(.+)\s+(\d+)\s+(\d+)$/)[0]
44
+ if (aux!=nil)
45
+ if (aux.size>=3)
46
+ a << {
47
+ :executablepath => aux[0].strip.to_s,
48
+ :pid => aux[2].to_s, # TODO: deberia ser aux[1], pero por algo que no entiendo ahora el pid viene en aux[2]
49
+ :ppid => aux[1].to_s, # TODO: deberia ser aux[2], pero por algo que no entiendo ahora el pid viene en aux[1]
50
+ }
51
+ end
52
+ end
53
+ }
54
+ a
55
+ end
56
+
57
+ # ejecuta TASKKILL /F /PID #{the_pid} y retorna el output del comando
58
+ def self.kill(the_pid)
59
+ system("TASKKILL /F /PID #{the_pid}")
60
+ end
61
+
62
+ # obtiene la diferencia en segundos entre la hora actual y el parametro the_start_time.
63
+ # si la diferencia es mayor al atributo minimum_enlapsed_seconds, entonces duerme el tiempo restante.
64
+ def doSleep(the_start_time)
65
+ # si el proceso tardo menos del minimum_enlapsed_seconds, entonces duermo el tiempo restante
66
+ end_time = Time.now
67
+ elapsed_seconds = end_time - the_start_time # in seconds
68
+ if (elapsed_seconds < self.minimum_enlapsed_seconds)
69
+ sleep_seconds = self.minimum_enlapsed_seconds - elapsed_seconds
70
+ sleep(sleep_seconds)
71
+ end
72
+ end
73
+
74
+ # This function works in windows only
75
+ # TODO: Esta funcion no retorna la mac address completa
76
+ # TODO: Validar que no se retorne una macaddress virtual, con todos valores en 0
77
+ def self.macaddress()
78
+ BlackStack::SimpleHostMonitoring.macaddress
79
+ end
80
+
81
+ def self.fullWorkerName(name)
82
+ "#{Socket.gethostname}.#{MyProcess.macaddress}.#{name}"
83
+ end
84
+
85
+ def fullWorkerName()
86
+ MyProcess.fullWorkerName(self.worker_name)
87
+ end
88
+
89
+ # saluda a la central
90
+ def hello()
91
+ # me notifico a la central. obtengo asignacion si ya la tenia
92
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/hello.json"
93
+ res = BlackStack::Netting::call_post(url, {
94
+ 'api_key' => BlackStack::Pampa::api_key,
95
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
96
+ )
97
+ parsed = JSON.parse(res.body)
98
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
99
+ raise parsed['status'].to_s
100
+ end
101
+ end # hello()
102
+
103
+ # notifico mis parametros (assigned_process, id_object) a la division asignada
104
+ def set(new_assigned_process, new_id_object)
105
+ if (self.ws_url.to_s.size > 0 && self.ws_port.to_s.size > 0)
106
+ url = "#{BlackStack::Pampa::api_protocol}://#{self.ws_url.to_s}:#{self.ws_port.to_s}/api1.3/pampa/notify.json"
107
+ res = BlackStack::Netting::call_post(url, {
108
+ 'api_key' => BlackStack::Pampa::api_key,
109
+ 'name' => self.fullWorkerName,
110
+ 'assigned_process' => new_assigned_process,
111
+ 'id_object' => new_id_object }.merge( BlackStack::RemoteHost.new.poll )
112
+ )
113
+ end
114
+ end
115
+
116
+ # obtiene sus parametros de la central
117
+ def get()
118
+ # me notifico a la central. obtengo asignacion que tenga
119
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/get.json"
120
+ res = BlackStack::Netting::call_post(url, {
121
+ 'api_key' => BlackStack::Pampa::api_key,
122
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
123
+ )
124
+ parsed = JSON.parse(res.body)
125
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
126
+ raise parsed['status'].to_s
127
+ else
128
+ if self.verify_configuration
129
+ # si ya tenia un proceso asignado, y ahora se le asigna un nuevo proceso
130
+ if self.assigned_process.to_s.size > 0
131
+ a = File.expand_path(self.assigned_process)
132
+ b = File.expand_path(parsed['assigned_process'])
133
+ if a != b
134
+ self.assigned_process_changed = true
135
+ else
136
+ self.assigned_process_changed = false
137
+ end
138
+ end
139
+
140
+ # si ya tenia un proceso asignado, y ahora se le asigna un nuevo proceso
141
+ if self.id_division.to_s.size > 0
142
+ if self.id_division.to_guid != parsed['id_division'].to_guid
143
+ self.assigned_division_changed = true
144
+ else
145
+ self.assigned_division_changed = false
146
+ end
147
+ end
148
+ end # verify_configuration
149
+
150
+ # si ya tenia asignada una division, entonces le notifico mi nueva configuracion
151
+ self.set(parsed['assigned_process'], parsed['id_object'])
152
+
153
+ self.id = parsed['id']
154
+ self.assigned_process = parsed['assigned_process']
155
+ self.id_object = parsed['id_object']
156
+ self.id_division = parsed['id_division']
157
+ self.division_name = parsed['division_name']
158
+ self.ws_url = parsed['ws_url']
159
+ self.ws_port = parsed['ws_port']
160
+
161
+ # le notifico a la nueva division asignada mi nueva configuracion
162
+ self.set(parsed['assigned_process'], parsed['id_object'])
163
+ end
164
+ end # get()
165
+
166
+
167
+ # update worker configuration in the division
168
+ def updateWorker()
169
+ raise "Abstract Method."
170
+ end
171
+
172
+ # ping the central database
173
+ def ping()
174
+ # me notifico a la central.
175
+ url = "#{BlackStack::Pampa::api_url}/api1.3/pampa/ping.json"
176
+ res = BlackStack::Netting::call_post(url, {
177
+ 'api_key' => BlackStack::Pampa::api_key,
178
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
179
+ )
180
+ parsed = JSON.parse(res.body)
181
+ if (parsed['status'] != BlackStack::Netting::SUCCESS)
182
+ raise parsed['status'].to_s
183
+ end
184
+
185
+ # me notifico a la division.
186
+ if (self.ws_url != nil && self.ws_port != nil)
187
+ url = "#{BlackStack::Pampa::api_protocol}://#{self.ws_url.to_s}:#{self.ws_port.to_s}/api1.3/pampa/ping.json"
188
+ res = BlackStack::Netting::call_post(url, {
189
+ 'api_key' => BlackStack::Pampa::api_key,
190
+ 'name' => self.fullWorkerName }.merge( BlackStack::RemoteHost.new.poll )
191
+ )
192
+ parsed = JSON.parse(res.body)
193
+ if (parsed['status'] != "success")
194
+ raise parsed['status'].to_s
195
+ end
196
+ end # if
197
+ end # ping()
198
+
199
+ # se notifica al dispatcher de la division
200
+ def notify()
201
+ if (self.ws_url==nil || self.ws_port==nil)
202
+ raise "Cannot notify. Worker has not parameters."
203
+ end
204
+
205
+ # me notifico a la division. obtengo trabajo
206
+ url = "#{BlackStack::Pampa::api_protocol}://#{self.ws_url}:#{self.ws_port}/api1.3/pampa/notify.json"
207
+ res = BlackStack::Netting::call_post(url,
208
+ {
209
+ 'api_key' => BlackStack::Pampa::api_key,
210
+ 'name' => self.fullWorkerName,
211
+ 'assigned_process' => self.assigned_process,
212
+ 'id_object' => self.id_object }.merge( BlackStack::RemoteHost.new.poll )
213
+ )
214
+ parsed = JSON.parse(res.body)
215
+ if (parsed['status'] != "success")
216
+ raise parsed['status'].to_s
217
+ end
218
+ end
219
+
220
+ # Get the data object of the divison assigned to this worker.
221
+ # Needs database connections. So it's available for ChildProcess only.
222
+ def division()
223
+ raise "This is an abstract method."
224
+ end
225
+
226
+ # Get the data object of worker linked to this process.
227
+ # Needs database connections. So it's available for ChildProcess only.
228
+ def worker()
229
+ raise "This is an abstract method."
230
+ end
231
+
232
+ # retorna true si el proceso hijo (child) esta habilitado para trabajar.
233
+ def canRun?()
234
+ self.assigned_process_changed == false &&
235
+ self.assigned_division_changed == false
236
+ end
237
+
238
+ def whyCantRun()
239
+ if self.assigned_process_changed == true
240
+ return "Assigned process has changed."
241
+ elsif self.assigned_division_changed == true
242
+ return "Assigned division has changed."
243
+ else
244
+ return "unknown"
245
+ end
246
+ end
247
+
248
+ # este metodo
249
+ # ejecuta el trabajo para el que fue creado el objeto.
250
+ def process(argv)
251
+ raise "This is an abstract method."
252
+ end
253
+
254
+ # ejecuta el proceso, en modo parent, bot o child segun la clase que se implemente.
255
+ # en modo parent, hace un loop infinito.
256
+ # en modo bot o child, hace un loop hasta que el metodo canRun? retorne false.
257
+ # en modo bot o child, invoca al metodo process() en cada ciclo.
258
+ def run()
259
+ #raise "This is an abstract method"
260
+ end # run
261
+
262
+ end # class MyProcess
263
+
264
+ end # module BlackStack