mass-client 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,187 @@
1
+ module Mass
2
+ class Source < BlackStack::Base
3
+ attr_accessor :type
4
+
5
+ def self.object_name
6
+ 'source'
7
+ end
8
+
9
+ def initialize(h)
10
+ super(h)
11
+ self.type = Mass::SourceType.new(h['source_type_desc']).child_class_instance
12
+ end
13
+
14
+ # convert the source_type into the ruby class to create an instance.
15
+ # example: Apollo --> Mass::ApolloAPI
16
+ def class_name_from_source_type
17
+ source_type = self.desc['source_type']
18
+ "Mass::#{source_type}"
19
+ end
20
+
21
+ # crate an instance of the profile type using the class defined in the `desc['name']` attribute.
22
+ # override the base method
23
+ def child_class_instance
24
+ source_type = self.desc['source_type']
25
+ key = self.class_name_from_source_type
26
+ raise "Source code of souurce type #{source_type} not found. Create a class #{key} in the folder `/lib` of your mass-sdk." unless Kernel.const_defined?(key)
27
+ ret = Kernel.const_get(key).new(self.desc)
28
+ return ret
29
+ end
30
+
31
+ # If the profile `access` is not `:rpa`, raise an exception.
32
+ # Return `true` if the `url` is valid.
33
+ # Return `false` if the `url` is not valid.
34
+ #
35
+ # Overload this method in the child class.
36
+ #
37
+ def valid_source_url?(url:)
38
+ # If the profile `access` is not `:rpa`, raise an exception.
39
+ raise "The method `valid_source_url?` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :rpa
40
+ # Return `true` if the `url` is valid.
41
+ # Return `false` if the `url` is not valid.
42
+ true
43
+ end
44
+
45
+ # Return the same URL in a normalized form:
46
+ # - remove all GET parameters.
47
+ # - remove all trailing slashes.
48
+ #
49
+ # If the profile `access` is not `:rpa`, raise an exception.
50
+ # If the `url` is not valid, raise an exception.
51
+ # Return the normalized URL.
52
+ #
53
+ # Overload this method in the child class.
54
+ #
55
+ def normalized_source_url(url:)
56
+ # If the profile `access` is not `:rpa`, raise an exception.
57
+ raise "The method `normalized_source_url` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :rpa
58
+ # If the `url` is not valid, raise an exception.
59
+ raise "The URL is not valid." if !self.valid_source_url?(url: url)
60
+ # Return the same URL in a normalized form:
61
+ # - remove all GET parameters.
62
+ # - remove all trailing slashes.
63
+ url = url.gsub(/\?.*$/, '').strip
64
+ url = ret.gsub(/\/+$/, '')
65
+ # Return the normalized URL.
66
+ url
67
+ end
68
+
69
+ # If the profile `access` is not `:api`, raise an exception.
70
+ # Parameter `params` must be a hash.
71
+ # Return `true` if the `params` are valid.
72
+ # Return `false` if the `params` are not valid.
73
+ def valid_source_params?(params:)
74
+ # If the profile `access` is not `:api`, raise an exception.
75
+ raise "The method `valid_source_params?` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :api
76
+ # Parameter `params` must be a hash.
77
+ raise "The parameter `params` must be a hash." if !params.is_a?(Hash)
78
+ # Return `true` if the `params` are valid.
79
+ # Return `false` if the `params` are not valid.
80
+ true
81
+ end
82
+
83
+ # return array of event elements
84
+ def event_elements(job:)
85
+ raise "The method `event_elements` is not implemented for #{self.class.name}."
86
+ end
87
+
88
+ # scroll down the page until N event elements are showed up
89
+ def show_up_event_elements(job:, event_limit:, max_scrolls:, logger:nil)
90
+ l = logger || BlackStack::DummyLogger.new(nil)
91
+ driver = job.profile.driver
92
+ # scroll down
93
+ i = 0
94
+ prev_n_events = 0
95
+ security_height = 150
96
+ lis = self.event_elements(job: job)
97
+ n_events = lis.size
98
+ while (i<max_scrolls || n_events>prev_n_events) && n_events<event_limit
99
+ i += 1
100
+
101
+ prev_n_events = n_events
102
+ lis = self.event_elements(job: job)
103
+ n_events = lis.size
104
+
105
+ # scroll down the exact height of the viewport
106
+ # reference: https://stackoverflow.com/questions/1248081/how-to-get-the-browser-viewport-dimensions
107
+ l.logs "Scrolling down (#{i.to_s.blue}/#{max_scrolls.to_s.blue} - #{n_events.to_s.blue}/#{event_limit.to_s.blue} events showed up)... "
108
+ step = self.desc['scrolling_step'] + rand(self.desc['scrolling_step_random'].to_i)
109
+ driver.execute_script("window.scrollTo(0, #{i.to_s}*#{step})")
110
+ #driver.execute_script("window.scrollTo(0, #{i.to_s}*(Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0)-#{security_height}))")
111
+ sleep(5)
112
+ l.logf "done".green
113
+
114
+ # screenshot
115
+ l.logs 'Screenshot... '
116
+ job.desc['screenshots'] << job.profile.screenshot
117
+ l.logf 'done'.green + " (#{job.desc['screenshots'].size.to_s.blue} total)"
118
+ end
119
+ end
120
+
121
+ # Return a hash desriptor of the events found.
122
+ #
123
+ # Parameters:
124
+ # - If the profile `access` is `:rpa`, then the `bot_driver` parameter is mandatory.
125
+ # - If the profile `access` is `:api`, then the `api_key` parameter is mandatory.
126
+ #
127
+ # - If the profile `access` is `:mta`, raise an exception.
128
+ #
129
+ # - If the profile `access` is `:rpa`, then the `bot_url` parameter is mandatory, and it must be a valid URL.
130
+ # - If the profile `access` is `:api`, then the `api_params` parameter is mandatory and it must be a hash.
131
+ #
132
+ # - The `event_count` is for scrolling down (or perform any other required action) until finding `event_count` events.
133
+ #
134
+ # Output:
135
+ # {
136
+ # 'status' => :performed, # if it is not 'success', then it is an error description.
137
+ # 'snapshot' => 'https://foo.com/snapshot.png'
138
+ # 'screenshots' => [
139
+ # # array of URLs to screenshots
140
+ # ],
141
+ # 'events' => [
142
+ # 'url' => 'https://facebook.com/john-doe/posts/12345', # normalized URL of the event
143
+ # 'title' => 'Join my Facebook Community!'
144
+ # 'content' => 'My name is John Doe and I invite everyone to join my Facebook Community: facebook.com/groups/john-doe-restaurants!',
145
+ # 'pictures' => [
146
+ # # array of URLs to pictures scraped from the post and uploaded to our DropBox.
147
+ # ],
148
+ # 'lead' => {
149
+ # 'name' => 'John Doe',
150
+ # 'url' => 'https://facebook.com/john-doe',
151
+ # 'headline' => "Founder & CEO at Doe's Restaurants",
152
+ # 'picture' => 'https://foo.com/john-doe.png'
153
+ # }
154
+ # ],
155
+ # }
156
+ #
157
+ def do(job:, logger:nil)
158
+ # If the profile `access` is `:rpa`, then the `bot_driver` parameter is mandatory.
159
+
160
+ #raise "The parameter `bot_driver` is mandatory." if bot_driver.nil? if self.profile_type.desc['access'].to_sym == :rpa
161
+ # If the profile `access` is `:api`, then the `api_key` parameter is mandatory.
162
+ #raise "The parameter `api_key` is mandatory." if api_key.nil? if self.profile_type.desc['access'].to_sym == :api
163
+ # If the profile `access` is `:mta`, raise an exception.
164
+ raise "The method `do` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'].to_sym == :mta
165
+ # If the profile `access` is `:rpa`, then the `bot_url` parameter is mandatory, and it must be a valid URL.
166
+ #raise "The parameter `bot_url` is mandatory." if bot_url.nil? if self.profile_type.desc['access'].to_sym == :rpa
167
+ # If the profile `access` is `:api`, then the `api_params` parameter is mandatory and it must be a hash.
168
+ #raise "The parameter `api_params` is mandatory." if api_params.nil? if self.profile_type.desc['access'].to_sym == :api
169
+ # The `event_count` is for scrolling down (or perform any other required action) until finding `event_count` events.
170
+ #raise "The parameter `event_count` must be an integer higher or equal then 0." if !event_count.is_a?(Integer) || event_count < 0
171
+
172
+ # return
173
+ return {
174
+ 'status' => :performed, # if it is not 'success', then it is an error description.
175
+ 'screenshots' => [
176
+ # array of URLs to screenshots
177
+ ],
178
+ # array of URLs to HTML snapshots
179
+ 'snapshot_url' => nil,
180
+ 'events' => [
181
+ # array of event descriptors
182
+ ],
183
+ }
184
+ end # def do
185
+
186
+ end # class Source
187
+ end # module Mass
@@ -0,0 +1,24 @@
1
+ module Mass
2
+ class SourceType < BlackStack::Base
3
+ # NEVER EVER DOWNLOAD LINKEDIN/FACEBOOK IMAGES USING THE LOCAL IP ADDRESS.
4
+ # USE THE SAME BROWSER DRIVER AND JAVASCRIPT INSTEAD OF THE LOCAL IP ADDRESS.
5
+ #extend BlackStack::Storage # include the module with the class methods
6
+
7
+ attr_accessor :profile_type
8
+
9
+ def initialize(h={})
10
+ super(h)
11
+ self.profile_type = Mass::ProfileType.page(
12
+ page: 1,
13
+ limit: 1,
14
+ filters: {
15
+ name: h['profile_type']
16
+ }
17
+ ).first.child_class_instance if h['profile_type']
18
+ end
19
+
20
+ def self.object_name
21
+ 'source_type'
22
+ end
23
+ end # class SourceType
24
+ end # module Mass
@@ -0,0 +1,7 @@
1
+ module Mass
2
+ class Tag < BlackStack::Base
3
+ def self.object_name
4
+ 'tag'
5
+ end
6
+ end # class Tag
7
+ end # module Mass
@@ -0,0 +1,7 @@
1
+ module Mass
2
+ class ProfileAPI < Mass::Profile
3
+
4
+ # TODO: Code Me!
5
+
6
+ end # class ProfileAPI
7
+ end # module Mass
@@ -0,0 +1,147 @@
1
+ module Mass
2
+ class ProfileMTA < Mass::Profile
3
+
4
+ # Scrape the inbox of the profile.
5
+ # Return a an array of hash descriptors of outreach records.
6
+ #
7
+ # Parameters:
8
+ # - limit: the maximum number of messages to scrape. Default: 100.
9
+ # - only_unread: if true, then only the unread messages will be scraped. This parameter is not used by :mta profiles. Default: true.
10
+ # - logger: a logger object to log the process. Default: nil.
11
+ #
12
+ # Example of a hash descritor into the returned array:
13
+ # ```
14
+ # {
15
+ # # a scraped message is always a :performed message
16
+ # 'status' => :performed,
17
+ # # what is the outreach type?
18
+ # # e.g.: :LinkedIn_DirectMessage
19
+ # # decide this in the child class.
20
+ # 'outreach_type' => nil,
21
+ # # hash descriptor of the profile who is scraping the inbox
22
+ # 'profile' => self.desc,
23
+ # # hash descriptor of the lead who is the conversation partner
24
+ # 'lead' => nil,
25
+ # # if the message has been sent by the profile, it is :outgoing.
26
+ # # if the message has been sent by the lead, it is :incoming.
27
+ # 'direction' => nil,
28
+ # # the content of the message
29
+ # 'subject' => nil,
30
+ # 'body' => nil,
31
+ # }
32
+ # ```
33
+ #
34
+ def inboxcheck(limit: 100, only_unread:true, logger:nil)
35
+ l = logger || BlackStack::DummyLogger.new(nil)
36
+ ret = []
37
+ p = self
38
+ t = self.type
39
+ sources = [
40
+ {:folder=>p.desc['inbox_label'] || t.desc['default_inbox_label'], :track_field=>'imap_inbox_last_id'},
41
+ #{:folder=>p.desc['spam_label'] || t.desc['default_inbox_label'], :track_field=>'imap_spam_last_id'},
42
+ ]
43
+
44
+ # connecting imap
45
+ l.logs "Connecting IMAP... "
46
+ imap = Net::IMAP.new(
47
+ p.desc['imap_address'] || t.desc['default_imap_address'],
48
+ p.desc['imap_port'] || t.desc['default_imap_port'],
49
+ true
50
+ )
51
+ conn = imap.login(
52
+ p.desc['imap_username'],
53
+ p.desc['imap_password']
54
+ )
55
+ l.logf "done (#{conn.name})"
56
+
57
+ sources.each { |source|
58
+ folder = source[:folder]
59
+ track_field = source[:track_field]
60
+
61
+ l.logs "Choosing mailbox #{folder}... "
62
+ l.logs "Examine folder... "
63
+ res = imap.examine(folder)
64
+ l.logf "done (#{res.name})"
65
+
66
+ # Gettin latest `limit` messages received, in descendent order (newer first),
67
+ # in order to stop when I find the latest procesed before.
68
+ l.logs "Getting latest #{limit.to_s} messages... "
69
+ ids = imap.search(["SUBJECT", p.desc['search_all_wildcard']]).reverse[0..limit]
70
+ l.logf "done (#{ids.size.to_s} messages)"
71
+
72
+ # iterate the messages
73
+ last_message_id = nil
74
+ ids.each { |id|
75
+ l.logs "Processing message #{id.to_s.blue}... "
76
+ # getting the envelope
77
+ envelope = imap.fetch(id, "ENVELOPE")[0].attr["ENVELOPE"]
78
+
79
+ # TODO: develop a normalization function for mail.message_id
80
+ message_id = envelope.message_id.to_s.gsub(/^</, '').gsub(/>$/, '')
81
+
82
+ # if this is the first message, then remember it
83
+ last_message_id = message_id if last_message_id.nil?
84
+
85
+ # check if this message_id is the latest processed
86
+ if message_id == p.desc[track_field]
87
+ l.logf "skip".yellow + " (already processed)"
88
+ break
89
+ #elsif envelope.subject =~ /[0-9A-Z]{7}\-[0-9A-Z]{7}/i
90
+ # l.logf "Instantly warming email".red
91
+ else
92
+ lead_email = envelope.from[0].mailbox.to_s + '@' + envelope.from[0].host.to_s
93
+ lead_name = envelope.from[0].name
94
+ subject = envelope.subject
95
+ body = imap.fetch(id, "BODY[]")[0].attr["BODY[]"]
96
+
97
+ # analyzing bounce reports
98
+ #rep = Sisimai.make(body)
99
+ #is_bounce = !rep.nil?
100
+ #bounce_reason = rep[0].reason if rep
101
+ #bounce_diagnosticcode = rep[0].diagnosticcode if rep
102
+
103
+ h = {
104
+ # a scraped message is always a :performed message
105
+ 'status' => :performed,
106
+ # what is the outreach type?
107
+ # e.g.: :LinkedIn_DirectMessage
108
+ # decide this in the child class.
109
+ 'outreach_type' => :GMail_DirectMessage,
110
+ # hash descriptor of the profile who is scraping the inbox
111
+ 'profile' => p.desc,
112
+ # hash descriptor of the lead who is the conversation partner
113
+ 'lead_or_company' => {
114
+ 'name' => lead_name,
115
+ 'email' => lead_email,
116
+ },
117
+ # if the message has been sent by the profile, it is :outgoing.
118
+ # if the message has been sent by the lead, it is :incoming.
119
+ 'direction' => :incoming,
120
+ # the content of the message
121
+ 'subject' => subject,
122
+ 'body' => body,
123
+ }
124
+ ret << h
125
+
126
+ l.logf "done".green
127
+ end
128
+ }
129
+
130
+ # remember the latest message_id processed
131
+ p.desc[track_field] = last_message_id if last_message_id
132
+
133
+ l.done
134
+
135
+ } # end folders.each
136
+
137
+ # disconnect
138
+ l.logs "Disconnecting IMAP... "
139
+ res = imap.logout
140
+ l.logf "done (#{res.name})"
141
+
142
+ # return
143
+ return ret
144
+ end # def inboxcheck
145
+
146
+ end # class ProfileMTA
147
+ end # module Mass