ish_models 0.0.33.75 → 0.0.33.76
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ish/crawler.rb +35 -0
- data/lib/ish/lead.rb +5 -0
- data/lib/ish_models.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: deeec6fc835a3c8501c2b2441ae52be690fc05f9
|
4
|
+
data.tar.gz: a5ca96a086af904a497f264181d868e48cf68070
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f9332c01b89e1df6d74d85ad8452cd42ca3e36db82521b9451553246158b0622ec6ef26007f19208fc0c6f27c55d5d8c32d9690e2eaa59838df152584e2befbd
|
7
|
+
data.tar.gz: d5cea6d952ed121551506aa9b210a07d041b48a8296cf17ef48a2c97aa2efdd8fa7d54808f2f5537f8e4919a6280e79dc3c460308c3ee585c204fc40551b4fdd
|
data/lib/ish/crawler.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
|
2
|
+
def puts! a, b=''
|
3
|
+
puts "+++ #{b}"
|
4
|
+
puts a.inspect
|
5
|
+
end
|
6
|
+
|
7
|
+
module Ish
|
8
|
+
class Crawler
|
9
|
+
|
10
|
+
def self.google_first_result text
|
11
|
+
result = HTTParty.get( "https://www.google.com/search?q=#{text}", :verify => false )
|
12
|
+
r = Nokogiri::HTML(result.body)
|
13
|
+
website = r.css('cite')[0].text
|
14
|
+
website = "https://#{website}" unless website[0..3] == 'http'
|
15
|
+
|
16
|
+
puts! website, 'website'
|
17
|
+
|
18
|
+
begin
|
19
|
+
r = HTTParty.get( website, :verify => false )
|
20
|
+
rescue OpenSSL::SSL::SSLError => e
|
21
|
+
puts! e, 'e'
|
22
|
+
return { :url => website }
|
23
|
+
end
|
24
|
+
|
25
|
+
return { :url => website, :html => r.body }
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.look_for_emails text
|
29
|
+
email_regex = /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z\d\-]+)*\.[a-z]+\z/i
|
30
|
+
result = text.scan( email_regex )
|
31
|
+
return result.length > 0 ? result.join(',') : nil
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
data/lib/ish/lead.rb
CHANGED
@@ -17,6 +17,7 @@ class Ish::Lead
|
|
17
17
|
field :email
|
18
18
|
field :job_url
|
19
19
|
field :company_url
|
20
|
+
field :website_html
|
20
21
|
field :yelp_url
|
21
22
|
|
22
23
|
field :company
|
@@ -42,5 +43,9 @@ class Ish::Lead
|
|
42
43
|
write_attribute :phone, which.gsub(/\D/, '').to_i
|
43
44
|
end
|
44
45
|
field :phone, :type => Integer
|
46
|
+
field :address, :type => String
|
47
|
+
|
48
|
+
# If I crawl on 20180724, I add "20180724" here, so I don't crawl in the same way again.
|
49
|
+
field :extra, :type => Array, :default => []
|
45
50
|
|
46
51
|
end
|
data/lib/ish_models.rb
CHANGED
@@ -26,6 +26,7 @@ require 'co_tailors/order.rb'
|
|
26
26
|
# require 'co_tailors/order_item.rb' # this is required from within order.rb
|
27
27
|
require 'co_tailors/address.rb'
|
28
28
|
|
29
|
+
require 'ish/crawler.rb'
|
29
30
|
require 'ish/gallery_name.rb'
|
30
31
|
require 'ish/payment.rb'
|
31
32
|
require 'ish/stock_action.rb'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ish_models
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.33.
|
4
|
+
version: 0.0.33.76
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- piousbox
|
@@ -122,6 +122,7 @@ files:
|
|
122
122
|
- lib/gallery.rb
|
123
123
|
- lib/gallery2.rb
|
124
124
|
- lib/ish/campaign.rb
|
125
|
+
- lib/ish/crawler.rb
|
125
126
|
- lib/ish/gallery_name.rb
|
126
127
|
- lib/ish/invoice.rb
|
127
128
|
- lib/ish/issue.rb
|