mass-client 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,187 @@
1
+ module Mass
2
+ class Source < BlackStack::Base
3
+ attr_accessor :type
4
+
5
+ def self.object_name
6
+ 'source'
7
+ end
8
+
9
+ def initialize(h)
10
+ super(h)
11
+ self.type = Mass::SourceType.new(h['source_type_desc']).child_class_instance
12
+ end
13
+
14
+ # convert the source_type into the ruby class to create an instance.
15
+ # example: Apollo --> Mass::ApolloAPI
16
+ def class_name_from_source_type
17
+ source_type = self.desc['source_type']
18
+ "Mass::#{source_type}"
19
+ end
20
+
21
+ # crate an instance of the profile type using the class defined in the `desc['name']` attribute.
22
+ # override the base method
23
+ def child_class_instance
24
+ source_type = self.desc['source_type']
25
+ key = self.class_name_from_source_type
26
+ raise "Source code of souurce type #{source_type} not found. Create a class #{key} in the folder `/lib` of your mass-sdk." unless Kernel.const_defined?(key)
27
+ ret = Kernel.const_get(key).new(self.desc)
28
+ return ret
29
+ end
30
+
31
+ # If the profile `access` is not `:rpa`, raise an exception.
32
+ # Return `true` if the `url` is valid.
33
+ # Return `false` if the `url` is not valid.
34
+ #
35
+ # Overload this method in the child class.
36
+ #
37
+ def valid_source_url?(url:)
38
+ # If the profile `access` is not `:rpa`, raise an exception.
39
+ raise "The method `valid_source_url?` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :rpa
40
+ # Return `true` if the `url` is valid.
41
+ # Return `false` if the `url` is not valid.
42
+ true
43
+ end
44
+
45
+ # Return the same URL in a normalized form:
46
+ # - remove all GET parameters.
47
+ # - remove all trailing slashes.
48
+ #
49
+ # If the profile `access` is not `:rpa`, raise an exception.
50
+ # If the `url` is not valid, raise an exception.
51
+ # Return the normalized URL.
52
+ #
53
+ # Overload this method in the child class.
54
+ #
55
+ def normalized_source_url(url:)
56
+ # If the profile `access` is not `:rpa`, raise an exception.
57
+ raise "The method `normalized_source_url` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :rpa
58
+ # If the `url` is not valid, raise an exception.
59
+ raise "The URL is not valid." if !self.valid_source_url?(url: url)
60
+ # Return the same URL in a normalized form:
61
+ # - remove all GET parameters.
62
+ # - remove all trailing slashes.
63
+ url = url.gsub(/\?.*$/, '').strip
64
+ url = ret.gsub(/\/+$/, '')
65
+ # Return the normalized URL.
66
+ url
67
+ end
68
+
69
+ # If the profile `access` is not `:api`, raise an exception.
70
+ # Parameter `params` must be a hash.
71
+ # Return `true` if the `params` are valid.
72
+ # Return `false` if the `params` are not valid.
73
+ def valid_source_params?(params:)
74
+ # If the profile `access` is not `:api`, raise an exception.
75
+ raise "The method `valid_source_params?` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :api
76
+ # Parameter `params` must be a hash.
77
+ raise "The parameter `params` must be a hash." if !params.is_a?(Hash)
78
+ # Return `true` if the `params` are valid.
79
+ # Return `false` if the `params` are not valid.
80
+ true
81
+ end
82
+
83
+ # return array of event elements
84
+ def event_elements(job:)
85
+ raise "The method `event_elements` is not implemented for #{self.class.name}."
86
+ end
87
+
88
+ # scroll down the page until N event elements are showed up
89
+ def show_up_event_elements(job:, event_limit:, max_scrolls:, logger:nil)
90
+ l = logger || BlackStack::DummyLogger.new(nil)
91
+ driver = job.profile.driver
92
+ # scroll down
93
+ i = 0
94
+ prev_n_events = 0
95
+ security_height = 150
96
+ lis = self.event_elements(job: job)
97
+ n_events = lis.size
98
+ while (i<max_scrolls || n_events>prev_n_events) && n_events<event_limit
99
+ i += 1
100
+
101
+ prev_n_events = n_events
102
+ lis = self.event_elements(job: job)
103
+ n_events = lis.size
104
+
105
+ # scroll down the exact height of the viewport
106
+ # reference: https://stackoverflow.com/questions/1248081/how-to-get-the-browser-viewport-dimensions
107
+ l.logs "Scrolling down (#{i.to_s.blue}/#{max_scrolls.to_s.blue} - #{n_events.to_s.blue}/#{event_limit.to_s.blue} events showed up)... "
108
+ step = self.desc['scrolling_step'] + rand(self.desc['scrolling_step_random'].to_i)
109
+ driver.execute_script("window.scrollTo(0, #{i.to_s}*#{step})")
110
+ #driver.execute_script("window.scrollTo(0, #{i.to_s}*(Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0)-#{security_height}))")
111
+ sleep(5)
112
+ l.logf "done".green
113
+
114
+ # screenshot
115
+ l.logs 'Screenshot... '
116
+ job.desc['screenshots'] << job.profile.screenshot
117
+ l.logf 'done'.green + " (#{job.desc['screenshots'].size.to_s.blue} total)"
118
+ end
119
+ end
120
+
121
+ # Return a hash desriptor of the events found.
122
+ #
123
+ # Parameters:
124
+ # - If the profile `access` is `:rpa`, then the `bot_driver` parameter is mandatory.
125
+ # - If the profile `access` is `:api`, then the `api_key` parameter is mandatory.
126
+ #
127
+ # - If the profile `access` is `:mta`, raise an exception.
128
+ #
129
+ # - If the profile `access` is `:rpa`, then the `bot_url` parameter is mandatory, and it must be a valid URL.
130
+ # - If the profile `access` is `:api`, then the `api_params` parameter is mandatory and it must be a hash.
131
+ #
132
+ # - The `event_count` is for scrolling down (or perform any other required action) until finding `event_count` events.
133
+ #
134
+ # Output:
135
+ # {
136
+ # 'status' => :performed, # if it is not 'success', then it is an error description.
137
+ # 'snapshot' => 'https://foo.com/snapshot.png'
138
+ # 'screenshots' => [
139
+ # # array of URLs to screenshots
140
+ # ],
141
+ # 'events' => [
142
+ # 'url' => 'https://facebook.com/john-doe/posts/12345', # normalized URL of the event
143
+ # 'title' => 'Join my Facebook Community!'
144
+ # 'content' => 'My name is John Doe and I invite everyone to join my Facebook Community: facebook.com/groups/john-doe-restaurants!',
145
+ # 'pictures' => [
146
+ # # array of URLs to pictures scraped from the post and uploaded to our DropBox.
147
+ # ],
148
+ # 'lead' => {
149
+ # 'name' => 'John Doe',
150
+ # 'url' => 'https://facebook.com/john-doe',
151
+ # 'headline' => "Founder & CEO at Doe's Restaurants",
152
+ # 'picture' => 'https://foo.com/john-doe.png'
153
+ # }
154
+ # ],
155
+ # }
156
+ #
157
+ def do(job:, logger:nil)
158
+ # If the profile `access` is `:rpa`, then the `bot_driver` parameter is mandatory.
159
+
160
+ #raise "The parameter `bot_driver` is mandatory." if bot_driver.nil? if self.profile_type.desc['access'].to_sym == :rpa
161
+ # If the profile `access` is `:api`, then the `api_key` parameter is mandatory.
162
+ #raise "The parameter `api_key` is mandatory." if api_key.nil? if self.profile_type.desc['access'].to_sym == :api
163
+ # If the profile `access` is `:mta`, raise an exception.
164
+ raise "The method `do` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'].to_sym == :mta
165
+ # If the profile `access` is `:rpa`, then the `bot_url` parameter is mandatory, and it must be a valid URL.
166
+ #raise "The parameter `bot_url` is mandatory." if bot_url.nil? if self.profile_type.desc['access'].to_sym == :rpa
167
+ # If the profile `access` is `:api`, then the `api_params` parameter is mandatory and it must be a hash.
168
+ #raise "The parameter `api_params` is mandatory." if api_params.nil? if self.profile_type.desc['access'].to_sym == :api
169
+ # The `event_count` is for scrolling down (or perform any other required action) until finding `event_count` events.
170
+ #raise "The parameter `event_count` must be an integer higher or equal then 0." if !event_count.is_a?(Integer) || event_count < 0
171
+
172
+ # return
173
+ return {
174
+ 'status' => :performed, # if it is not 'success', then it is an error description.
175
+ 'screenshots' => [
176
+ # array of URLs to screenshots
177
+ ],
178
+ # array of URLs to HTML snapshots
179
+ 'snapshot_url' => nil,
180
+ 'events' => [
181
+ # array of event descriptors
182
+ ],
183
+ }
184
+ end # def do
185
+
186
+ end # class Source
187
+ end # module Mass
@@ -0,0 +1,24 @@
1
+ module Mass
2
+ class SourceType < BlackStack::Base
3
+ # NEVER EVER DOWNLOAD LINKEDIN/FACEBOOK IMAGES USING THE LOCAL IP ADDRESS.
4
+ # USE THE SAME BROWSER DRIVER AND JAVASCRIPT INSTEAD OF THE LOCAL IP ADDRESS.
5
+ #extend BlackStack::Storage # include the module with the class methods
6
+
7
+ attr_accessor :profile_type
8
+
9
+ def initialize(h={})
10
+ super(h)
11
+ self.profile_type = Mass::ProfileType.page(
12
+ page: 1,
13
+ limit: 1,
14
+ filters: {
15
+ name: h['profile_type']
16
+ }
17
+ ).first.child_class_instance if h['profile_type']
18
+ end
19
+
20
+ def self.object_name
21
+ 'source_type'
22
+ end
23
+ end # class SourceType
24
+ end # module Mass
@@ -0,0 +1,7 @@
1
+ module Mass
2
+ class Tag < BlackStack::Base
3
+ def self.object_name
4
+ 'tag'
5
+ end
6
+ end # class Tag
7
+ end # module Mass
@@ -0,0 +1,7 @@
1
+ module Mass
2
+ class ProfileAPI < Mass::Profile
3
+
4
+ # TODO: Code Me!
5
+
6
+ end # class ProfileAPI
7
+ end # module Mass
@@ -0,0 +1,147 @@
1
+ module Mass
2
+ class ProfileMTA < Mass::Profile
3
+
4
+ # Scrape the inbox of the profile.
5
+ # Return a an array of hash descriptors of outreach records.
6
+ #
7
+ # Parameters:
8
+ # - limit: the maximum number of messages to scrape. Default: 100.
9
+ # - only_unread: if true, then only the unread messages will be scraped. This parameter is not used by :mta profiles. Default: true.
10
+ # - logger: a logger object to log the process. Default: nil.
11
+ #
12
+ # Example of a hash descritor into the returned array:
13
+ # ```
14
+ # {
15
+ # # a scraped message is always a :performed message
16
+ # 'status' => :performed,
17
+ # # what is the outreach type?
18
+ # # e.g.: :LinkedIn_DirectMessage
19
+ # # decide this in the child class.
20
+ # 'outreach_type' => nil,
21
+ # # hash descriptor of the profile who is scraping the inbox
22
+ # 'profile' => self.desc,
23
+ # # hash descriptor of the lead who is the conversation partner
24
+ # 'lead' => nil,
25
+ # # if the message has been sent by the profile, it is :outgoing.
26
+ # # if the message has been sent by the lead, it is :incoming.
27
+ # 'direction' => nil,
28
+ # # the content of the message
29
+ # 'subject' => nil,
30
+ # 'body' => nil,
31
+ # }
32
+ # ```
33
+ #
34
+ def inboxcheck(limit: 100, only_unread:true, logger:nil)
35
+ l = logger || BlackStack::DummyLogger.new(nil)
36
+ ret = []
37
+ p = self
38
+ t = self.type
39
+ sources = [
40
+ {:folder=>p.desc['inbox_label'] || t.desc['default_inbox_label'], :track_field=>'imap_inbox_last_id'},
41
+ #{:folder=>p.desc['spam_label'] || t.desc['default_inbox_label'], :track_field=>'imap_spam_last_id'},
42
+ ]
43
+
44
+ # connecting imap
45
+ l.logs "Connecting IMAP... "
46
+ imap = Net::IMAP.new(
47
+ p.desc['imap_address'] || t.desc['default_imap_address'],
48
+ p.desc['imap_port'] || t.desc['default_imap_port'],
49
+ true
50
+ )
51
+ conn = imap.login(
52
+ p.desc['imap_username'],
53
+ p.desc['imap_password']
54
+ )
55
+ l.logf "done (#{conn.name})"
56
+
57
+ sources.each { |source|
58
+ folder = source[:folder]
59
+ track_field = source[:track_field]
60
+
61
+ l.logs "Choosing mailbox #{folder}... "
62
+ l.logs "Examine folder... "
63
+ res = imap.examine(folder)
64
+ l.logf "done (#{res.name})"
65
+
66
+ # Gettin latest `limit` messages received, in descendent order (newer first),
67
+ # in order to stop when I find the latest procesed before.
68
+ l.logs "Getting latest #{limit.to_s} messages... "
69
+ ids = imap.search(["SUBJECT", p.desc['search_all_wildcard']]).reverse[0..limit]
70
+ l.logf "done (#{ids.size.to_s} messages)"
71
+
72
+ # iterate the messages
73
+ last_message_id = nil
74
+ ids.each { |id|
75
+ l.logs "Processing message #{id.to_s.blue}... "
76
+ # getting the envelope
77
+ envelope = imap.fetch(id, "ENVELOPE")[0].attr["ENVELOPE"]
78
+
79
+ # TODO: develop a normalization function for mail.message_id
80
+ message_id = envelope.message_id.to_s.gsub(/^</, '').gsub(/>$/, '')
81
+
82
+ # if this is the first message, then remember it
83
+ last_message_id = message_id if last_message_id.nil?
84
+
85
+ # check if this message_id is the latest processed
86
+ if message_id == p.desc[track_field]
87
+ l.logf "skip".yellow + " (already processed)"
88
+ break
89
+ #elsif envelope.subject =~ /[0-9A-Z]{7}\-[0-9A-Z]{7}/i
90
+ # l.logf "Instantly warming email".red
91
+ else
92
+ lead_email = envelope.from[0].mailbox.to_s + '@' + envelope.from[0].host.to_s
93
+ lead_name = envelope.from[0].name
94
+ subject = envelope.subject
95
+ body = imap.fetch(id, "BODY[]")[0].attr["BODY[]"]
96
+
97
+ # analyzing bounce reports
98
+ #rep = Sisimai.make(body)
99
+ #is_bounce = !rep.nil?
100
+ #bounce_reason = rep[0].reason if rep
101
+ #bounce_diagnosticcode = rep[0].diagnosticcode if rep
102
+
103
+ h = {
104
+ # a scraped message is always a :performed message
105
+ 'status' => :performed,
106
+ # what is the outreach type?
107
+ # e.g.: :LinkedIn_DirectMessage
108
+ # decide this in the child class.
109
+ 'outreach_type' => :GMail_DirectMessage,
110
+ # hash descriptor of the profile who is scraping the inbox
111
+ 'profile' => p.desc,
112
+ # hash descriptor of the lead who is the conversation partner
113
+ 'lead_or_company' => {
114
+ 'name' => lead_name,
115
+ 'email' => lead_email,
116
+ },
117
+ # if the message has been sent by the profile, it is :outgoing.
118
+ # if the message has been sent by the lead, it is :incoming.
119
+ 'direction' => :incoming,
120
+ # the content of the message
121
+ 'subject' => subject,
122
+ 'body' => body,
123
+ }
124
+ ret << h
125
+
126
+ l.logf "done".green
127
+ end
128
+ }
129
+
130
+ # remember the latest message_id processed
131
+ p.desc[track_field] = last_message_id if last_message_id
132
+
133
+ l.done
134
+
135
+ } # end folders.each
136
+
137
+ # disconnect
138
+ l.logs "Disconnecting IMAP... "
139
+ res = imap.logout
140
+ l.logf "done (#{res.name})"
141
+
142
+ # return
143
+ return ret
144
+ end # def inboxcheck
145
+
146
+ end # class ProfileMTA
147
+ end # module Mass