ish_models 0.0.33.75 → 0.0.33.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8d6a34177992d8327e400da5ecd0b1761060f3ac
4
- data.tar.gz: 6c3bd3d89889af78955cdc3787d2ea211a45ab39
3
+ metadata.gz: deeec6fc835a3c8501c2b2441ae52be690fc05f9
4
+ data.tar.gz: a5ca96a086af904a497f264181d868e48cf68070
5
5
  SHA512:
6
- metadata.gz: 46fcdaf6bf4511d92f65d7dfffce1530840766ddf7af63c447f79965de07aeb6aad3b2ec097cb35c51333008acaafaee7bed166a5e4ca5e0166810fff5404899
7
- data.tar.gz: 9ffbb1d290ec5315f53b87ebde19cad6f23be90c8bf945f1e0675d8d5d1ab24d5bdcadec7bd62a5243d417a77734f248268afe0c41d6868a7f4b8a2d0cf565fd
6
+ metadata.gz: f9332c01b89e1df6d74d85ad8452cd42ca3e36db82521b9451553246158b0622ec6ef26007f19208fc0c6f27c55d5d8c32d9690e2eaa59838df152584e2befbd
7
+ data.tar.gz: d5cea6d952ed121551506aa9b210a07d041b48a8296cf17ef48a2c97aa2efdd8fa7d54808f2f5537f8e4919a6280e79dc3c460308c3ee585c204fc40551b4fdd
@@ -0,0 +1,35 @@
1
+
2
+ def puts! a, b=''
3
+ puts "+++ #{b}"
4
+ puts a.inspect
5
+ end
6
+
7
+ module Ish
8
+ class Crawler
9
+
10
+ def self.google_first_result text
11
+ result = HTTParty.get( "https://www.google.com/search?q=#{text}", :verify => false )
12
+ r = Nokogiri::HTML(result.body)
13
+ website = r.css('cite')[0].text
14
+ website = "https://#{website}" unless website[0..3] == 'http'
15
+
16
+ puts! website, 'website'
17
+
18
+ begin
19
+ r = HTTParty.get( website, :verify => false )
20
+ rescue OpenSSL::SSL::SSLError => e
21
+ puts! e, 'e'
22
+ return { :url => website }
23
+ end
24
+
25
+ return { :url => website, :html => r.body }
26
+ end
27
+
28
+ def self.look_for_emails text
29
+ email_regex = /\A[\w+\-.]+@[a-z\d\-]+(\.[a-z\d\-]+)*\.[a-z]+\z/i
30
+ result = text.scan( email_regex )
31
+ return result.length > 0 ? result.join(',') : nil
32
+ end
33
+
34
+ end
35
+ end
data/lib/ish/lead.rb CHANGED
@@ -17,6 +17,7 @@ class Ish::Lead
17
17
  field :email
18
18
  field :job_url
19
19
  field :company_url
20
+ field :website_html
20
21
  field :yelp_url
21
22
 
22
23
  field :company
@@ -42,5 +43,9 @@ class Ish::Lead
42
43
  write_attribute :phone, which.gsub(/\D/, '').to_i
43
44
  end
44
45
  field :phone, :type => Integer
46
+ field :address, :type => String
47
+
48
+ # If I crawl on 20180724, I add "20180724" here, so I don't crawl in the same way again.
49
+ field :extra, :type => Array, :default => []
45
50
 
46
51
  end
data/lib/ish_models.rb CHANGED
@@ -26,6 +26,7 @@ require 'co_tailors/order.rb'
26
26
  # require 'co_tailors/order_item.rb' # this is required from within order.rb
27
27
  require 'co_tailors/address.rb'
28
28
 
29
+ require 'ish/crawler.rb'
29
30
  require 'ish/gallery_name.rb'
30
31
  require 'ish/payment.rb'
31
32
  require 'ish/stock_action.rb'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ish_models
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.33.75
4
+ version: 0.0.33.76
5
5
  platform: ruby
6
6
  authors:
7
7
  - piousbox
@@ -122,6 +122,7 @@ files:
122
122
  - lib/gallery.rb
123
123
  - lib/gallery2.rb
124
124
  - lib/ish/campaign.rb
125
+ - lib/ish/crawler.rb
125
126
  - lib/ish/gallery_name.rb
126
127
  - lib/ish/invoice.rb
127
128
  - lib/ish/issue.rb