mass-client 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/base-line/channel.rb +7 -0
- data/lib/base-line/company.rb +7 -0
- data/lib/base-line/connectioncheck.rb +14 -0
- data/lib/base-line/data_type.rb +8 -0
- data/lib/base-line/enrichment.rb +51 -0
- data/lib/base-line/enrichment_type.rb +60 -0
- data/lib/base-line/event.rb +15 -0
- data/lib/base-line/headcount.rb +7 -0
- data/lib/base-line/inboxcheck.rb +14 -0
- data/lib/base-line/industry.rb +7 -0
- data/lib/base-line/job.rb +60 -0
- data/lib/base-line/lead.rb +7 -0
- data/lib/base-line/location.rb +7 -0
- data/lib/base-line/outreach.rb +42 -0
- data/lib/base-line/outreach_type.rb +76 -0
- data/lib/base-line/profile.rb +108 -0
- data/lib/base-line/profile_type.rb +10 -0
- data/lib/base-line/request.rb +40 -0
- data/lib/base-line/revenue.rb +7 -0
- data/lib/base-line/rule.rb +7 -0
- data/lib/base-line/source.rb +187 -0
- data/lib/base-line/source_type.rb +24 -0
- data/lib/base-line/tag.rb +7 -0
- data/lib/first-line/profile_api.rb +7 -0
- data/lib/first-line/profile_mta.rb +147 -0
- data/lib/first-line/profile_rpa.rb +375 -0
- data/lib/mass-client.rb +53 -0
- metadata +209 -0
@@ -0,0 +1,187 @@
|
|
1
|
+
module Mass
|
2
|
+
class Source < BlackStack::Base
|
3
|
+
attr_accessor :type
|
4
|
+
|
5
|
+
def self.object_name
|
6
|
+
'source'
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(h)
|
10
|
+
super(h)
|
11
|
+
self.type = Mass::SourceType.new(h['source_type_desc']).child_class_instance
|
12
|
+
end
|
13
|
+
|
14
|
+
# convert the source_type into the ruby class to create an instance.
|
15
|
+
# example: Apollo --> Mass::ApolloAPI
|
16
|
+
def class_name_from_source_type
|
17
|
+
source_type = self.desc['source_type']
|
18
|
+
"Mass::#{source_type}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# crate an instance of the profile type using the class defined in the `desc['name']` attribute.
|
22
|
+
# override the base method
|
23
|
+
def child_class_instance
|
24
|
+
source_type = self.desc['source_type']
|
25
|
+
key = self.class_name_from_source_type
|
26
|
+
raise "Source code of souurce type #{source_type} not found. Create a class #{key} in the folder `/lib` of your mass-sdk." unless Kernel.const_defined?(key)
|
27
|
+
ret = Kernel.const_get(key).new(self.desc)
|
28
|
+
return ret
|
29
|
+
end
|
30
|
+
|
31
|
+
# If the profile `access` is not `:rpa`, raise an exception.
|
32
|
+
# Return `true` if the `url` is valid.
|
33
|
+
# Return `false` if the `url` is not valid.
|
34
|
+
#
|
35
|
+
# Overload this method in the child class.
|
36
|
+
#
|
37
|
+
def valid_source_url?(url:)
|
38
|
+
# If the profile `access` is not `:rpa`, raise an exception.
|
39
|
+
raise "The method `valid_source_url?` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :rpa
|
40
|
+
# Return `true` if the `url` is valid.
|
41
|
+
# Return `false` if the `url` is not valid.
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
45
|
+
# Return the same URL in a normalized form:
|
46
|
+
# - remove all GET parameters.
|
47
|
+
# - remove all trailing slashes.
|
48
|
+
#
|
49
|
+
# If the profile `access` is not `:rpa`, raise an exception.
|
50
|
+
# If the `url` is not valid, raise an exception.
|
51
|
+
# Return the normalized URL.
|
52
|
+
#
|
53
|
+
# Overload this method in the child class.
|
54
|
+
#
|
55
|
+
def normalized_source_url(url:)
|
56
|
+
# If the profile `access` is not `:rpa`, raise an exception.
|
57
|
+
raise "The method `normalized_source_url` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :rpa
|
58
|
+
# If the `url` is not valid, raise an exception.
|
59
|
+
raise "The URL is not valid." if !self.valid_source_url?(url: url)
|
60
|
+
# Return the same URL in a normalized form:
|
61
|
+
# - remove all GET parameters.
|
62
|
+
# - remove all trailing slashes.
|
63
|
+
url = url.gsub(/\?.*$/, '').strip
|
64
|
+
url = ret.gsub(/\/+$/, '')
|
65
|
+
# Return the normalized URL.
|
66
|
+
url
|
67
|
+
end
|
68
|
+
|
69
|
+
# If the profile `access` is not `:api`, raise an exception.
|
70
|
+
# Parameter `params` must be a hash.
|
71
|
+
# Return `true` if the `params` are valid.
|
72
|
+
# Return `false` if the `params` are not valid.
|
73
|
+
def valid_source_params?(params:)
|
74
|
+
# If the profile `access` is not `:api`, raise an exception.
|
75
|
+
raise "The method `valid_source_params?` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :api
|
76
|
+
# Parameter `params` must be a hash.
|
77
|
+
raise "The parameter `params` must be a hash." if !params.is_a?(Hash)
|
78
|
+
# Return `true` if the `params` are valid.
|
79
|
+
# Return `false` if the `params` are not valid.
|
80
|
+
true
|
81
|
+
end
|
82
|
+
|
83
|
+
# return array of event elements
|
84
|
+
def event_elements(job:)
|
85
|
+
raise "The method `event_elements` is not implemented for #{self.class.name}."
|
86
|
+
end
|
87
|
+
|
88
|
+
# scroll down the page until N event elements are showed up
|
89
|
+
def show_up_event_elements(job:, event_limit:, max_scrolls:, logger:nil)
|
90
|
+
l = logger || BlackStack::DummyLogger.new(nil)
|
91
|
+
driver = job.profile.driver
|
92
|
+
# scroll down
|
93
|
+
i = 0
|
94
|
+
prev_n_events = 0
|
95
|
+
security_height = 150
|
96
|
+
lis = self.event_elements(job: job)
|
97
|
+
n_events = lis.size
|
98
|
+
while (i<max_scrolls || n_events>prev_n_events) && n_events<event_limit
|
99
|
+
i += 1
|
100
|
+
|
101
|
+
prev_n_events = n_events
|
102
|
+
lis = self.event_elements(job: job)
|
103
|
+
n_events = lis.size
|
104
|
+
|
105
|
+
# scroll down the exact height of the viewport
|
106
|
+
# reference: https://stackoverflow.com/questions/1248081/how-to-get-the-browser-viewport-dimensions
|
107
|
+
l.logs "Scrolling down (#{i.to_s.blue}/#{max_scrolls.to_s.blue} - #{n_events.to_s.blue}/#{event_limit.to_s.blue} events showed up)... "
|
108
|
+
step = self.desc['scrolling_step'] + rand(self.desc['scrolling_step_random'].to_i)
|
109
|
+
driver.execute_script("window.scrollTo(0, #{i.to_s}*#{step})")
|
110
|
+
#driver.execute_script("window.scrollTo(0, #{i.to_s}*(Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0)-#{security_height}))")
|
111
|
+
sleep(5)
|
112
|
+
l.logf "done".green
|
113
|
+
|
114
|
+
# screenshot
|
115
|
+
l.logs 'Screenshot... '
|
116
|
+
job.desc['screenshots'] << job.profile.screenshot
|
117
|
+
l.logf 'done'.green + " (#{job.desc['screenshots'].size.to_s.blue} total)"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Return a hash desriptor of the events found.
|
122
|
+
#
|
123
|
+
# Parameters:
|
124
|
+
# - If the profile `access` is `:rpa`, then the `bot_driver` parameter is mandatory.
|
125
|
+
# - If the profile `access` is `:api`, then the `api_key` parameter is mandatory.
|
126
|
+
#
|
127
|
+
# - If the profile `access` is `:mta`, raise an exception.
|
128
|
+
#
|
129
|
+
# - If the profile `access` is `:rpa`, then the `bot_url` parameter is mandatory, and it must be a valid URL.
|
130
|
+
# - If the profile `access` is `:api`, then the `api_params` parameter is mandatory and it must be a hash.
|
131
|
+
#
|
132
|
+
# - The `event_count` is for scrolling down (or perform any other required action) until finding `event_count` events.
|
133
|
+
#
|
134
|
+
# Output:
|
135
|
+
# {
|
136
|
+
# 'status' => :performed, # if it is not 'success', then it is an error description.
|
137
|
+
# 'snapshot' => 'https://foo.com/snapshot.png'
|
138
|
+
# 'screenshots' => [
|
139
|
+
# # array of URLs to screenshots
|
140
|
+
# ],
|
141
|
+
# 'events' => [
|
142
|
+
# 'url' => 'https://facebook.com/john-doe/posts/12345', # normalized URL of the event
|
143
|
+
# 'title' => 'Join my Facebook Community!'
|
144
|
+
# 'content' => 'My name is John Doe and I invite everyone to join my Facebook Community: facebook.com/groups/john-doe-restaurants!',
|
145
|
+
# 'pictures' => [
|
146
|
+
# # array of URLs to pictures scraped from the post and uploaded to our DropBox.
|
147
|
+
# ],
|
148
|
+
# 'lead' => {
|
149
|
+
# 'name' => 'John Doe',
|
150
|
+
# 'url' => 'https://facebook.com/john-doe',
|
151
|
+
# 'headline' => "Founder & CEO at Doe's Restaurants",
|
152
|
+
# 'picture' => 'https://foo.com/john-doe.png'
|
153
|
+
# }
|
154
|
+
# ],
|
155
|
+
# }
|
156
|
+
#
|
157
|
+
def do(job:, logger:nil)
|
158
|
+
# If the profile `access` is `:rpa`, then the `bot_driver` parameter is mandatory.
|
159
|
+
|
160
|
+
#raise "The parameter `bot_driver` is mandatory." if bot_driver.nil? if self.profile_type.desc['access'].to_sym == :rpa
|
161
|
+
# If the profile `access` is `:api`, then the `api_key` parameter is mandatory.
|
162
|
+
#raise "The parameter `api_key` is mandatory." if api_key.nil? if self.profile_type.desc['access'].to_sym == :api
|
163
|
+
# If the profile `access` is `:mta`, raise an exception.
|
164
|
+
raise "The method `do` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'].to_sym == :mta
|
165
|
+
# If the profile `access` is `:rpa`, then the `bot_url` parameter is mandatory, and it must be a valid URL.
|
166
|
+
#raise "The parameter `bot_url` is mandatory." if bot_url.nil? if self.profile_type.desc['access'].to_sym == :rpa
|
167
|
+
# If the profile `access` is `:api`, then the `api_params` parameter is mandatory and it must be a hash.
|
168
|
+
#raise "The parameter `api_params` is mandatory." if api_params.nil? if self.profile_type.desc['access'].to_sym == :api
|
169
|
+
# The `event_count` is for scrolling down (or perform any other required action) until finding `event_count` events.
|
170
|
+
#raise "The parameter `event_count` must be an integer higher or equal then 0." if !event_count.is_a?(Integer) || event_count < 0
|
171
|
+
|
172
|
+
# return
|
173
|
+
return {
|
174
|
+
'status' => :performed, # if it is not 'success', then it is an error description.
|
175
|
+
'screenshots' => [
|
176
|
+
# array of URLs to screenshots
|
177
|
+
],
|
178
|
+
# array of URLs to HTML snapshots
|
179
|
+
'snapshot_url' => nil,
|
180
|
+
'events' => [
|
181
|
+
# array of event descriptors
|
182
|
+
],
|
183
|
+
}
|
184
|
+
end # def do
|
185
|
+
|
186
|
+
end # class Source
|
187
|
+
end # module Mass
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Mass
|
2
|
+
class SourceType < BlackStack::Base
|
3
|
+
# NEVER EVER DOWNLOAD LINKEDIN/FACEBOOK IMAGES USING THE LOCAL IP ADDRESS.
|
4
|
+
# USE THE SAME BROWSER DRIVER AND JAVASCRIPT INSTEAD OF THE LOCAL IP ADDRESS.
|
5
|
+
#extend BlackStack::Storage # include the module with the class methods
|
6
|
+
|
7
|
+
attr_accessor :profile_type
|
8
|
+
|
9
|
+
def initialize(h={})
|
10
|
+
super(h)
|
11
|
+
self.profile_type = Mass::ProfileType.page(
|
12
|
+
page: 1,
|
13
|
+
limit: 1,
|
14
|
+
filters: {
|
15
|
+
name: h['profile_type']
|
16
|
+
}
|
17
|
+
).first.child_class_instance if h['profile_type']
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.object_name
|
21
|
+
'source_type'
|
22
|
+
end
|
23
|
+
end # class SourceType
|
24
|
+
end # module Mass
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module Mass
|
2
|
+
class ProfileMTA < Mass::Profile
|
3
|
+
|
4
|
+
# Scrape the inbox of the profile.
|
5
|
+
# Return a an array of hash descriptors of outreach records.
|
6
|
+
#
|
7
|
+
# Parameters:
|
8
|
+
# - limit: the maximum number of messages to scrape. Default: 100.
|
9
|
+
# - only_unread: if true, then only the unread messages will be scraped. This parameter is not used by :mta profiles. Default: true.
|
10
|
+
# - logger: a logger object to log the process. Default: nil.
|
11
|
+
#
|
12
|
+
# Example of a hash descritor into the returned array:
|
13
|
+
# ```
|
14
|
+
# {
|
15
|
+
# # a scraped message is always a :performed message
|
16
|
+
# 'status' => :performed,
|
17
|
+
# # what is the outreach type?
|
18
|
+
# # e.g.: :LinkedIn_DirectMessage
|
19
|
+
# # decide this in the child class.
|
20
|
+
# 'outreach_type' => nil,
|
21
|
+
# # hash descriptor of the profile who is scraping the inbox
|
22
|
+
# 'profile' => self.desc,
|
23
|
+
# # hash descriptor of the lead who is the conversation partner
|
24
|
+
# 'lead' => nil,
|
25
|
+
# # if the message has been sent by the profile, it is :outgoing.
|
26
|
+
# # if the message has been sent by the lead, it is :incoming.
|
27
|
+
# 'direction' => nil,
|
28
|
+
# # the content of the message
|
29
|
+
# 'subject' => nil,
|
30
|
+
# 'body' => nil,
|
31
|
+
# }
|
32
|
+
# ```
|
33
|
+
#
|
34
|
+
def inboxcheck(limit: 100, only_unread:true, logger:nil)
|
35
|
+
l = logger || BlackStack::DummyLogger.new(nil)
|
36
|
+
ret = []
|
37
|
+
p = self
|
38
|
+
t = self.type
|
39
|
+
sources = [
|
40
|
+
{:folder=>p.desc['inbox_label'] || t.desc['default_inbox_label'], :track_field=>'imap_inbox_last_id'},
|
41
|
+
#{:folder=>p.desc['spam_label'] || t.desc['default_inbox_label'], :track_field=>'imap_spam_last_id'},
|
42
|
+
]
|
43
|
+
|
44
|
+
# connecting imap
|
45
|
+
l.logs "Connecting IMAP... "
|
46
|
+
imap = Net::IMAP.new(
|
47
|
+
p.desc['imap_address'] || t.desc['default_imap_address'],
|
48
|
+
p.desc['imap_port'] || t.desc['default_imap_port'],
|
49
|
+
true
|
50
|
+
)
|
51
|
+
conn = imap.login(
|
52
|
+
p.desc['imap_username'],
|
53
|
+
p.desc['imap_password']
|
54
|
+
)
|
55
|
+
l.logf "done (#{conn.name})"
|
56
|
+
|
57
|
+
sources.each { |source|
|
58
|
+
folder = source[:folder]
|
59
|
+
track_field = source[:track_field]
|
60
|
+
|
61
|
+
l.logs "Choosing mailbox #{folder}... "
|
62
|
+
l.logs "Examine folder... "
|
63
|
+
res = imap.examine(folder)
|
64
|
+
l.logf "done (#{res.name})"
|
65
|
+
|
66
|
+
# Gettin latest `limit` messages received, in descendent order (newer first),
|
67
|
+
# in order to stop when I find the latest procesed before.
|
68
|
+
l.logs "Getting latest #{limit.to_s} messages... "
|
69
|
+
ids = imap.search(["SUBJECT", p.desc['search_all_wildcard']]).reverse[0..limit]
|
70
|
+
l.logf "done (#{ids.size.to_s} messages)"
|
71
|
+
|
72
|
+
# iterate the messages
|
73
|
+
last_message_id = nil
|
74
|
+
ids.each { |id|
|
75
|
+
l.logs "Processing message #{id.to_s.blue}... "
|
76
|
+
# getting the envelope
|
77
|
+
envelope = imap.fetch(id, "ENVELOPE")[0].attr["ENVELOPE"]
|
78
|
+
|
79
|
+
# TODO: develop a normalization function for mail.message_id
|
80
|
+
message_id = envelope.message_id.to_s.gsub(/^</, '').gsub(/>$/, '')
|
81
|
+
|
82
|
+
# if this is the first message, then remember it
|
83
|
+
last_message_id = message_id if last_message_id.nil?
|
84
|
+
|
85
|
+
# check if this message_id is the latest processed
|
86
|
+
if message_id == p.desc[track_field]
|
87
|
+
l.logf "skip".yellow + " (already processed)"
|
88
|
+
break
|
89
|
+
#elsif envelope.subject =~ /[0-9A-Z]{7}\-[0-9A-Z]{7}/i
|
90
|
+
# l.logf "Instantly warming email".red
|
91
|
+
else
|
92
|
+
lead_email = envelope.from[0].mailbox.to_s + '@' + envelope.from[0].host.to_s
|
93
|
+
lead_name = envelope.from[0].name
|
94
|
+
subject = envelope.subject
|
95
|
+
body = imap.fetch(id, "BODY[]")[0].attr["BODY[]"]
|
96
|
+
|
97
|
+
# analyzing bounce reports
|
98
|
+
#rep = Sisimai.make(body)
|
99
|
+
#is_bounce = !rep.nil?
|
100
|
+
#bounce_reason = rep[0].reason if rep
|
101
|
+
#bounce_diagnosticcode = rep[0].diagnosticcode if rep
|
102
|
+
|
103
|
+
h = {
|
104
|
+
# a scraped message is always a :performed message
|
105
|
+
'status' => :performed,
|
106
|
+
# what is the outreach type?
|
107
|
+
# e.g.: :LinkedIn_DirectMessage
|
108
|
+
# decide this in the child class.
|
109
|
+
'outreach_type' => :GMail_DirectMessage,
|
110
|
+
# hash descriptor of the profile who is scraping the inbox
|
111
|
+
'profile' => p.desc,
|
112
|
+
# hash descriptor of the lead who is the conversation partner
|
113
|
+
'lead_or_company' => {
|
114
|
+
'name' => lead_name,
|
115
|
+
'email' => lead_email,
|
116
|
+
},
|
117
|
+
# if the message has been sent by the profile, it is :outgoing.
|
118
|
+
# if the message has been sent by the lead, it is :incoming.
|
119
|
+
'direction' => :incoming,
|
120
|
+
# the content of the message
|
121
|
+
'subject' => subject,
|
122
|
+
'body' => body,
|
123
|
+
}
|
124
|
+
ret << h
|
125
|
+
|
126
|
+
l.logf "done".green
|
127
|
+
end
|
128
|
+
}
|
129
|
+
|
130
|
+
# remember the latest message_id processed
|
131
|
+
p.desc[track_field] = last_message_id if last_message_id
|
132
|
+
|
133
|
+
l.done
|
134
|
+
|
135
|
+
} # end folders.each
|
136
|
+
|
137
|
+
# disconnect
|
138
|
+
l.logs "Disconnecting IMAP... "
|
139
|
+
res = imap.logout
|
140
|
+
l.logf "done (#{res.name})"
|
141
|
+
|
142
|
+
# return
|
143
|
+
return ret
|
144
|
+
end # def inboxcheck
|
145
|
+
|
146
|
+
end # class ProfileMTA
|
147
|
+
end # module Mass
|