mass-client 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/base-line/channel.rb +7 -0
- data/lib/base-line/company.rb +7 -0
- data/lib/base-line/connectioncheck.rb +14 -0
- data/lib/base-line/data_type.rb +8 -0
- data/lib/base-line/enrichment.rb +51 -0
- data/lib/base-line/enrichment_type.rb +60 -0
- data/lib/base-line/event.rb +15 -0
- data/lib/base-line/headcount.rb +7 -0
- data/lib/base-line/inboxcheck.rb +14 -0
- data/lib/base-line/industry.rb +7 -0
- data/lib/base-line/job.rb +60 -0
- data/lib/base-line/lead.rb +7 -0
- data/lib/base-line/location.rb +7 -0
- data/lib/base-line/outreach.rb +42 -0
- data/lib/base-line/outreach_type.rb +76 -0
- data/lib/base-line/profile.rb +108 -0
- data/lib/base-line/profile_type.rb +10 -0
- data/lib/base-line/request.rb +40 -0
- data/lib/base-line/revenue.rb +7 -0
- data/lib/base-line/rule.rb +7 -0
- data/lib/base-line/source.rb +187 -0
- data/lib/base-line/source_type.rb +24 -0
- data/lib/base-line/tag.rb +7 -0
- data/lib/first-line/profile_api.rb +7 -0
- data/lib/first-line/profile_mta.rb +147 -0
- data/lib/first-line/profile_rpa.rb +375 -0
- data/lib/mass-client.rb +53 -0
- metadata +209 -0
@@ -0,0 +1,187 @@
|
|
1
|
+
module Mass
|
2
|
+
class Source < BlackStack::Base
|
3
|
+
attr_accessor :type
|
4
|
+
|
5
|
+
def self.object_name
|
6
|
+
'source'
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(h)
|
10
|
+
super(h)
|
11
|
+
self.type = Mass::SourceType.new(h['source_type_desc']).child_class_instance
|
12
|
+
end
|
13
|
+
|
14
|
+
# convert the source_type into the ruby class to create an instance.
|
15
|
+
# example: Apollo --> Mass::ApolloAPI
|
16
|
+
def class_name_from_source_type
|
17
|
+
source_type = self.desc['source_type']
|
18
|
+
"Mass::#{source_type}"
|
19
|
+
end
|
20
|
+
|
21
|
+
# crate an instance of the profile type using the class defined in the `desc['name']` attribute.
|
22
|
+
# override the base method
|
23
|
+
def child_class_instance
|
24
|
+
source_type = self.desc['source_type']
|
25
|
+
key = self.class_name_from_source_type
|
26
|
+
raise "Source code of souurce type #{source_type} not found. Create a class #{key} in the folder `/lib` of your mass-sdk." unless Kernel.const_defined?(key)
|
27
|
+
ret = Kernel.const_get(key).new(self.desc)
|
28
|
+
return ret
|
29
|
+
end
|
30
|
+
|
31
|
+
# If the profile `access` is not `:rpa`, raise an exception.
|
32
|
+
# Return `true` if the `url` is valid.
|
33
|
+
# Return `false` if the `url` is not valid.
|
34
|
+
#
|
35
|
+
# Overload this method in the child class.
|
36
|
+
#
|
37
|
+
def valid_source_url?(url:)
|
38
|
+
# If the profile `access` is not `:rpa`, raise an exception.
|
39
|
+
raise "The method `valid_source_url?` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :rpa
|
40
|
+
# Return `true` if the `url` is valid.
|
41
|
+
# Return `false` if the `url` is not valid.
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
45
|
+
# Return the same URL in a normalized form:
|
46
|
+
# - remove all GET parameters.
|
47
|
+
# - remove all trailing slashes.
|
48
|
+
#
|
49
|
+
# If the profile `access` is not `:rpa`, raise an exception.
|
50
|
+
# If the `url` is not valid, raise an exception.
|
51
|
+
# Return the normalized URL.
|
52
|
+
#
|
53
|
+
# Overload this method in the child class.
|
54
|
+
#
|
55
|
+
def normalized_source_url(url:)
|
56
|
+
# If the profile `access` is not `:rpa`, raise an exception.
|
57
|
+
raise "The method `normalized_source_url` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :rpa
|
58
|
+
# If the `url` is not valid, raise an exception.
|
59
|
+
raise "The URL is not valid." if !self.valid_source_url?(url: url)
|
60
|
+
# Return the same URL in a normalized form:
|
61
|
+
# - remove all GET parameters.
|
62
|
+
# - remove all trailing slashes.
|
63
|
+
url = url.gsub(/\?.*$/, '').strip
|
64
|
+
url = ret.gsub(/\/+$/, '')
|
65
|
+
# Return the normalized URL.
|
66
|
+
url
|
67
|
+
end
|
68
|
+
|
69
|
+
# If the profile `access` is not `:api`, raise an exception.
|
70
|
+
# Parameter `params` must be a hash.
|
71
|
+
# Return `true` if the `params` are valid.
|
72
|
+
# Return `false` if the `params` are not valid.
|
73
|
+
def valid_source_params?(params:)
|
74
|
+
# If the profile `access` is not `:api`, raise an exception.
|
75
|
+
raise "The method `valid_source_params?` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'] != :api
|
76
|
+
# Parameter `params` must be a hash.
|
77
|
+
raise "The parameter `params` must be a hash." if !params.is_a?(Hash)
|
78
|
+
# Return `true` if the `params` are valid.
|
79
|
+
# Return `false` if the `params` are not valid.
|
80
|
+
true
|
81
|
+
end
|
82
|
+
|
83
|
+
# return array of event elements
|
84
|
+
def event_elements(job:)
|
85
|
+
raise "The method `event_elements` is not implemented for #{self.class.name}."
|
86
|
+
end
|
87
|
+
|
88
|
+
# scroll down the page until N event elements are showed up
|
89
|
+
def show_up_event_elements(job:, event_limit:, max_scrolls:, logger:nil)
|
90
|
+
l = logger || BlackStack::DummyLogger.new(nil)
|
91
|
+
driver = job.profile.driver
|
92
|
+
# scroll down
|
93
|
+
i = 0
|
94
|
+
prev_n_events = 0
|
95
|
+
security_height = 150
|
96
|
+
lis = self.event_elements(job: job)
|
97
|
+
n_events = lis.size
|
98
|
+
while (i<max_scrolls || n_events>prev_n_events) && n_events<event_limit
|
99
|
+
i += 1
|
100
|
+
|
101
|
+
prev_n_events = n_events
|
102
|
+
lis = self.event_elements(job: job)
|
103
|
+
n_events = lis.size
|
104
|
+
|
105
|
+
# scroll down the exact height of the viewport
|
106
|
+
# reference: https://stackoverflow.com/questions/1248081/how-to-get-the-browser-viewport-dimensions
|
107
|
+
l.logs "Scrolling down (#{i.to_s.blue}/#{max_scrolls.to_s.blue} - #{n_events.to_s.blue}/#{event_limit.to_s.blue} events showed up)... "
|
108
|
+
step = self.desc['scrolling_step'] + rand(self.desc['scrolling_step_random'].to_i)
|
109
|
+
driver.execute_script("window.scrollTo(0, #{i.to_s}*#{step})")
|
110
|
+
#driver.execute_script("window.scrollTo(0, #{i.to_s}*(Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0)-#{security_height}))")
|
111
|
+
sleep(5)
|
112
|
+
l.logf "done".green
|
113
|
+
|
114
|
+
# screenshot
|
115
|
+
l.logs 'Screenshot... '
|
116
|
+
job.desc['screenshots'] << job.profile.screenshot
|
117
|
+
l.logf 'done'.green + " (#{job.desc['screenshots'].size.to_s.blue} total)"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Return a hash desriptor of the events found.
|
122
|
+
#
|
123
|
+
# Parameters:
|
124
|
+
# - If the profile `access` is `:rpa`, then the `bot_driver` parameter is mandatory.
|
125
|
+
# - If the profile `access` is `:api`, then the `api_key` parameter is mandatory.
|
126
|
+
#
|
127
|
+
# - If the profile `access` is `:mta`, raise an exception.
|
128
|
+
#
|
129
|
+
# - If the profile `access` is `:rpa`, then the `bot_url` parameter is mandatory, and it must be a valid URL.
|
130
|
+
# - If the profile `access` is `:api`, then the `api_params` parameter is mandatory and it must be a hash.
|
131
|
+
#
|
132
|
+
# - The `event_count` is for scrolling down (or perform any other required action) until finding `event_count` events.
|
133
|
+
#
|
134
|
+
# Output:
|
135
|
+
# {
|
136
|
+
# 'status' => :performed, # if it is not 'success', then it is an error description.
|
137
|
+
# 'snapshot' => 'https://foo.com/snapshot.png'
|
138
|
+
# 'screenshots' => [
|
139
|
+
# # array of URLs to screenshots
|
140
|
+
# ],
|
141
|
+
# 'events' => [
|
142
|
+
# 'url' => 'https://facebook.com/john-doe/posts/12345', # normalized URL of the event
|
143
|
+
# 'title' => 'Join my Facebook Community!'
|
144
|
+
# 'content' => 'My name is John Doe and I invite everyone to join my Facebook Community: facebook.com/groups/john-doe-restaurants!',
|
145
|
+
# 'pictures' => [
|
146
|
+
# # array of URLs to pictures scraped from the post and uploaded to our DropBox.
|
147
|
+
# ],
|
148
|
+
# 'lead' => {
|
149
|
+
# 'name' => 'John Doe',
|
150
|
+
# 'url' => 'https://facebook.com/john-doe',
|
151
|
+
# 'headline' => "Founder & CEO at Doe's Restaurants",
|
152
|
+
# 'picture' => 'https://foo.com/john-doe.png'
|
153
|
+
# }
|
154
|
+
# ],
|
155
|
+
# }
|
156
|
+
#
|
157
|
+
def do(job:, logger:nil)
|
158
|
+
# If the profile `access` is `:rpa`, then the `bot_driver` parameter is mandatory.
|
159
|
+
|
160
|
+
#raise "The parameter `bot_driver` is mandatory." if bot_driver.nil? if self.profile_type.desc['access'].to_sym == :rpa
|
161
|
+
# If the profile `access` is `:api`, then the `api_key` parameter is mandatory.
|
162
|
+
#raise "The parameter `api_key` is mandatory." if api_key.nil? if self.profile_type.desc['access'].to_sym == :api
|
163
|
+
# If the profile `access` is `:mta`, raise an exception.
|
164
|
+
raise "The method `do` is not allowed for #{self.profile_type.desc['access'].to_s} access." if self.profile_type.desc['access'].to_sym == :mta
|
165
|
+
# If the profile `access` is `:rpa`, then the `bot_url` parameter is mandatory, and it must be a valid URL.
|
166
|
+
#raise "The parameter `bot_url` is mandatory." if bot_url.nil? if self.profile_type.desc['access'].to_sym == :rpa
|
167
|
+
# If the profile `access` is `:api`, then the `api_params` parameter is mandatory and it must be a hash.
|
168
|
+
#raise "The parameter `api_params` is mandatory." if api_params.nil? if self.profile_type.desc['access'].to_sym == :api
|
169
|
+
# The `event_count` is for scrolling down (or perform any other required action) until finding `event_count` events.
|
170
|
+
#raise "The parameter `event_count` must be an integer higher or equal then 0." if !event_count.is_a?(Integer) || event_count < 0
|
171
|
+
|
172
|
+
# return
|
173
|
+
return {
|
174
|
+
'status' => :performed, # if it is not 'success', then it is an error description.
|
175
|
+
'screenshots' => [
|
176
|
+
# array of URLs to screenshots
|
177
|
+
],
|
178
|
+
# array of URLs to HTML snapshots
|
179
|
+
'snapshot_url' => nil,
|
180
|
+
'events' => [
|
181
|
+
# array of event descriptors
|
182
|
+
],
|
183
|
+
}
|
184
|
+
end # def do
|
185
|
+
|
186
|
+
end # class Source
|
187
|
+
end # module Mass
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Mass
|
2
|
+
class SourceType < BlackStack::Base
|
3
|
+
# NEVER EVER DOWNLOAD LINKEDIN/FACEBOOK IMAGES USING THE LOCAL IP ADDRESS.
|
4
|
+
# USE THE SAME BROWSER DRIVER AND JAVASCRIPT INSTEAD OF THE LOCAL IP ADDRESS.
|
5
|
+
#extend BlackStack::Storage # include the module with the class methods
|
6
|
+
|
7
|
+
attr_accessor :profile_type
|
8
|
+
|
9
|
+
def initialize(h={})
|
10
|
+
super(h)
|
11
|
+
self.profile_type = Mass::ProfileType.page(
|
12
|
+
page: 1,
|
13
|
+
limit: 1,
|
14
|
+
filters: {
|
15
|
+
name: h['profile_type']
|
16
|
+
}
|
17
|
+
).first.child_class_instance if h['profile_type']
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.object_name
|
21
|
+
'source_type'
|
22
|
+
end
|
23
|
+
end # class SourceType
|
24
|
+
end # module Mass
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module Mass
|
2
|
+
class ProfileMTA < Mass::Profile
|
3
|
+
|
4
|
+
# Scrape the inbox of the profile.
|
5
|
+
# Return a an array of hash descriptors of outreach records.
|
6
|
+
#
|
7
|
+
# Parameters:
|
8
|
+
# - limit: the maximum number of messages to scrape. Default: 100.
|
9
|
+
# - only_unread: if true, then only the unread messages will be scraped. This parameter is not used by :mta profiles. Default: true.
|
10
|
+
# - logger: a logger object to log the process. Default: nil.
|
11
|
+
#
|
12
|
+
# Example of a hash descritor into the returned array:
|
13
|
+
# ```
|
14
|
+
# {
|
15
|
+
# # a scraped message is always a :performed message
|
16
|
+
# 'status' => :performed,
|
17
|
+
# # what is the outreach type?
|
18
|
+
# # e.g.: :LinkedIn_DirectMessage
|
19
|
+
# # decide this in the child class.
|
20
|
+
# 'outreach_type' => nil,
|
21
|
+
# # hash descriptor of the profile who is scraping the inbox
|
22
|
+
# 'profile' => self.desc,
|
23
|
+
# # hash descriptor of the lead who is the conversation partner
|
24
|
+
# 'lead' => nil,
|
25
|
+
# # if the message has been sent by the profile, it is :outgoing.
|
26
|
+
# # if the message has been sent by the lead, it is :incoming.
|
27
|
+
# 'direction' => nil,
|
28
|
+
# # the content of the message
|
29
|
+
# 'subject' => nil,
|
30
|
+
# 'body' => nil,
|
31
|
+
# }
|
32
|
+
# ```
|
33
|
+
#
|
34
|
+
def inboxcheck(limit: 100, only_unread:true, logger:nil)
|
35
|
+
l = logger || BlackStack::DummyLogger.new(nil)
|
36
|
+
ret = []
|
37
|
+
p = self
|
38
|
+
t = self.type
|
39
|
+
sources = [
|
40
|
+
{:folder=>p.desc['inbox_label'] || t.desc['default_inbox_label'], :track_field=>'imap_inbox_last_id'},
|
41
|
+
#{:folder=>p.desc['spam_label'] || t.desc['default_inbox_label'], :track_field=>'imap_spam_last_id'},
|
42
|
+
]
|
43
|
+
|
44
|
+
# connecting imap
|
45
|
+
l.logs "Connecting IMAP... "
|
46
|
+
imap = Net::IMAP.new(
|
47
|
+
p.desc['imap_address'] || t.desc['default_imap_address'],
|
48
|
+
p.desc['imap_port'] || t.desc['default_imap_port'],
|
49
|
+
true
|
50
|
+
)
|
51
|
+
conn = imap.login(
|
52
|
+
p.desc['imap_username'],
|
53
|
+
p.desc['imap_password']
|
54
|
+
)
|
55
|
+
l.logf "done (#{conn.name})"
|
56
|
+
|
57
|
+
sources.each { |source|
|
58
|
+
folder = source[:folder]
|
59
|
+
track_field = source[:track_field]
|
60
|
+
|
61
|
+
l.logs "Choosing mailbox #{folder}... "
|
62
|
+
l.logs "Examine folder... "
|
63
|
+
res = imap.examine(folder)
|
64
|
+
l.logf "done (#{res.name})"
|
65
|
+
|
66
|
+
# Gettin latest `limit` messages received, in descendent order (newer first),
|
67
|
+
# in order to stop when I find the latest procesed before.
|
68
|
+
l.logs "Getting latest #{limit.to_s} messages... "
|
69
|
+
ids = imap.search(["SUBJECT", p.desc['search_all_wildcard']]).reverse[0..limit]
|
70
|
+
l.logf "done (#{ids.size.to_s} messages)"
|
71
|
+
|
72
|
+
# iterate the messages
|
73
|
+
last_message_id = nil
|
74
|
+
ids.each { |id|
|
75
|
+
l.logs "Processing message #{id.to_s.blue}... "
|
76
|
+
# getting the envelope
|
77
|
+
envelope = imap.fetch(id, "ENVELOPE")[0].attr["ENVELOPE"]
|
78
|
+
|
79
|
+
# TODO: develop a normalization function for mail.message_id
|
80
|
+
message_id = envelope.message_id.to_s.gsub(/^</, '').gsub(/>$/, '')
|
81
|
+
|
82
|
+
# if this is the first message, then remember it
|
83
|
+
last_message_id = message_id if last_message_id.nil?
|
84
|
+
|
85
|
+
# check if this message_id is the latest processed
|
86
|
+
if message_id == p.desc[track_field]
|
87
|
+
l.logf "skip".yellow + " (already processed)"
|
88
|
+
break
|
89
|
+
#elsif envelope.subject =~ /[0-9A-Z]{7}\-[0-9A-Z]{7}/i
|
90
|
+
# l.logf "Instantly warming email".red
|
91
|
+
else
|
92
|
+
lead_email = envelope.from[0].mailbox.to_s + '@' + envelope.from[0].host.to_s
|
93
|
+
lead_name = envelope.from[0].name
|
94
|
+
subject = envelope.subject
|
95
|
+
body = imap.fetch(id, "BODY[]")[0].attr["BODY[]"]
|
96
|
+
|
97
|
+
# analyzing bounce reports
|
98
|
+
#rep = Sisimai.make(body)
|
99
|
+
#is_bounce = !rep.nil?
|
100
|
+
#bounce_reason = rep[0].reason if rep
|
101
|
+
#bounce_diagnosticcode = rep[0].diagnosticcode if rep
|
102
|
+
|
103
|
+
h = {
|
104
|
+
# a scraped message is always a :performed message
|
105
|
+
'status' => :performed,
|
106
|
+
# what is the outreach type?
|
107
|
+
# e.g.: :LinkedIn_DirectMessage
|
108
|
+
# decide this in the child class.
|
109
|
+
'outreach_type' => :GMail_DirectMessage,
|
110
|
+
# hash descriptor of the profile who is scraping the inbox
|
111
|
+
'profile' => p.desc,
|
112
|
+
# hash descriptor of the lead who is the conversation partner
|
113
|
+
'lead_or_company' => {
|
114
|
+
'name' => lead_name,
|
115
|
+
'email' => lead_email,
|
116
|
+
},
|
117
|
+
# if the message has been sent by the profile, it is :outgoing.
|
118
|
+
# if the message has been sent by the lead, it is :incoming.
|
119
|
+
'direction' => :incoming,
|
120
|
+
# the content of the message
|
121
|
+
'subject' => subject,
|
122
|
+
'body' => body,
|
123
|
+
}
|
124
|
+
ret << h
|
125
|
+
|
126
|
+
l.logf "done".green
|
127
|
+
end
|
128
|
+
}
|
129
|
+
|
130
|
+
# remember the latest message_id processed
|
131
|
+
p.desc[track_field] = last_message_id if last_message_id
|
132
|
+
|
133
|
+
l.done
|
134
|
+
|
135
|
+
} # end folders.each
|
136
|
+
|
137
|
+
# disconnect
|
138
|
+
l.logs "Disconnecting IMAP... "
|
139
|
+
res = imap.logout
|
140
|
+
l.logf "done (#{res.name})"
|
141
|
+
|
142
|
+
# return
|
143
|
+
return ret
|
144
|
+
end # def inboxcheck
|
145
|
+
|
146
|
+
end # class ProfileMTA
|
147
|
+
end # module Mass
|