ish_models 0.0.33.75 → 0.0.33.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ish/crawler.rb +35 -0
- data/lib/ish/lead.rb +5 -0
- data/lib/ish_models.rb +1 -0
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: deeec6fc835a3c8501c2b2441ae52be690fc05f9
|
4
|
+
data.tar.gz: a5ca96a086af904a497f264181d868e48cf68070
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f9332c01b89e1df6d74d85ad8452cd42ca3e36db82521b9451553246158b0622ec6ef26007f19208fc0c6f27c55d5d8c32d9690e2eaa59838df152584e2befbd
|
7
|
+
data.tar.gz: d5cea6d952ed121551506aa9b210a07d041b48a8296cf17ef48a2c97aa2efdd8fa7d54808f2f5537f8e4919a6280e79dc3c460308c3ee585c204fc40551b4fdd
|
data/lib/ish/crawler.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
|
2
|
+
def puts! a, b=''
|
3
|
+
puts "+++ #{b}"
|
4
|
+
puts a.inspect
|
5
|
+
end
|
6
|
+
|
7
|
+
module Ish
|
8
|
+
class Crawler
|
9
|
+
|
10
|
+
def self.google_first_result text
|
11
|
+
result = HTTParty.get( "https://www.google.com/search?q=#{text}", :verify => false )
|
12
|
+
r = Nokogiri::HTML(result.body)
|
13
|
+
website = r.css('cite')[0].text
|
14
|
+
website = "https://#{website}" unless website[0..3] == 'http'
|
15
|
+
|
16
|
+
puts! website, 'website'
|
17
|
+
|
18
|
+
begin
|
19
|
+
r = HTTParty.get( website, :verify => false )
|
20
|
+
rescue OpenSSL::SSL::SSLError => e
|
21
|
+
puts! e, 'e'
|
22
|
+
return { :url => website }
|
23
|
+
end
|
24
|
+
|
25
|
+
return { :url => website, :html => r.body }
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.look_for_emails text
|
29
|
+
email_regex = /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z\d\-]+)*\.[a-z]+\z/i
|
30
|
+
result = text.scan( email_regex )
|
31
|
+
return result.length > 0 ? result.join(',') : nil
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
data/lib/ish/lead.rb
CHANGED
@@ -17,6 +17,7 @@ class Ish::Lead
|
|
17
17
|
field :email
|
18
18
|
field :job_url
|
19
19
|
field :company_url
|
20
|
+
field :website_html
|
20
21
|
field :yelp_url
|
21
22
|
|
22
23
|
field :company
|
@@ -42,5 +43,9 @@ class Ish::Lead
|
|
42
43
|
write_attribute :phone, which.gsub(/\D/, '').to_i
|
43
44
|
end
|
44
45
|
field :phone, :type => Integer
|
46
|
+
field :address, :type => String
|
47
|
+
|
48
|
+
# If I crawl on 20180724, I add "20180724" here, so I don't crawl in the same way again.
|
49
|
+
field :extra, :type => Array, :default => []
|
45
50
|
|
46
51
|
end
|
data/lib/ish_models.rb
CHANGED
@@ -26,6 +26,7 @@ require 'co_tailors/order.rb'
|
|
26
26
|
# require 'co_tailors/order_item.rb' # this is required from within order.rb
|
27
27
|
require 'co_tailors/address.rb'
|
28
28
|
|
29
|
+
require 'ish/crawler.rb'
|
29
30
|
require 'ish/gallery_name.rb'
|
30
31
|
require 'ish/payment.rb'
|
31
32
|
require 'ish/stock_action.rb'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ish_models
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.33.
|
4
|
+
version: 0.0.33.76
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- piousbox
|
@@ -122,6 +122,7 @@ files:
|
|
122
122
|
- lib/gallery.rb
|
123
123
|
- lib/gallery2.rb
|
124
124
|
- lib/ish/campaign.rb
|
125
|
+
- lib/ish/crawler.rb
|
125
126
|
- lib/ish/gallery_name.rb
|
126
127
|
- lib/ish/invoice.rb
|
127
128
|
- lib/ish/issue.rb
|