ish_models 0.0.33.75 → 0.0.33.76

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8d6a34177992d8327e400da5ecd0b1761060f3ac
4
- data.tar.gz: 6c3bd3d89889af78955cdc3787d2ea211a45ab39
3
+ metadata.gz: deeec6fc835a3c8501c2b2441ae52be690fc05f9
4
+ data.tar.gz: a5ca96a086af904a497f264181d868e48cf68070
5
5
  SHA512:
6
- metadata.gz: 46fcdaf6bf4511d92f65d7dfffce1530840766ddf7af63c447f79965de07aeb6aad3b2ec097cb35c51333008acaafaee7bed166a5e4ca5e0166810fff5404899
7
- data.tar.gz: 9ffbb1d290ec5315f53b87ebde19cad6f23be90c8bf945f1e0675d8d5d1ab24d5bdcadec7bd62a5243d417a77734f248268afe0c41d6868a7f4b8a2d0cf565fd
6
+ metadata.gz: f9332c01b89e1df6d74d85ad8452cd42ca3e36db82521b9451553246158b0622ec6ef26007f19208fc0c6f27c55d5d8c32d9690e2eaa59838df152584e2befbd
7
+ data.tar.gz: d5cea6d952ed121551506aa9b210a07d041b48a8296cf17ef48a2c97aa2efdd8fa7d54808f2f5537f8e4919a6280e79dc3c460308c3ee585c204fc40551b4fdd
@@ -0,0 +1,35 @@
1
+
2
+ def puts! a, b=''
3
+ puts "+++ #{b}"
4
+ puts a.inspect
5
+ end
6
+
7
+ module Ish
8
+ class Crawler
9
+
10
+ def self.google_first_result text
11
+ result = HTTParty.get( "https://www.google.com/search?q=#{text}", :verify => false )
12
+ r = Nokogiri::HTML(result.body)
13
+ website = r.css('cite')[0].text
14
+ website = "https://#{website}" unless website[0..3] == 'http'
15
+
16
+ puts! website, 'website'
17
+
18
+ begin
19
+ r = HTTParty.get( website, :verify => false )
20
+ rescue OpenSSL::SSL::SSLError => e
21
+ puts! e, 'e'
22
+ return { :url => website }
23
+ end
24
+
25
+ return { :url => website, :html => r.body }
26
+ end
27
+
28
+ def self.look_for_emails text
29
+ email_regex = /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z\d\-]+)*\.[a-z]+\z/i
30
+ result = text.scan( email_regex )
31
+ return result.length > 0 ? result.join(',') : nil
32
+ end
33
+
34
+ end
35
+ end
data/lib/ish/lead.rb CHANGED
@@ -17,6 +17,7 @@ class Ish::Lead
17
17
  field :email
18
18
  field :job_url
19
19
  field :company_url
20
+ field :website_html
20
21
  field :yelp_url
21
22
 
22
23
  field :company
@@ -42,5 +43,9 @@ class Ish::Lead
42
43
  write_attribute :phone, which.gsub(/\D/, '').to_i
43
44
  end
44
45
  field :phone, :type => Integer
46
+ field :address, :type => String
47
+
48
+ # If I crawl on 20180724, I add "20180724" here, so I don't crawl in the same way again.
49
+ field :extra, :type => Array, :default => []
45
50
 
46
51
  end
data/lib/ish_models.rb CHANGED
@@ -26,6 +26,7 @@ require 'co_tailors/order.rb'
26
26
  # require 'co_tailors/order_item.rb' # this is required from within order.rb
27
27
  require 'co_tailors/address.rb'
28
28
 
29
+ require 'ish/crawler.rb'
29
30
  require 'ish/gallery_name.rb'
30
31
  require 'ish/payment.rb'
31
32
  require 'ish/stock_action.rb'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ish_models
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.33.75
4
+ version: 0.0.33.76
5
5
  platform: ruby
6
6
  authors:
7
7
  - piousbox
@@ -122,6 +122,7 @@ files:
122
122
  - lib/gallery.rb
123
123
  - lib/gallery2.rb
124
124
  - lib/ish/campaign.rb
125
+ - lib/ish/crawler.rb
125
126
  - lib/ish/gallery_name.rb
126
127
  - lib/ish/invoice.rb
127
128
  - lib/ish/issue.rb