whos_using_what 0.3.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,7 +5,7 @@ class BaseApiClient < Base
5
5
  require "uri"
6
6
  require "rest-client"
7
7
 
8
- def arraySearch(array, rawHtml)
8
+ def self.arraySearch(array, rawHtml)
9
9
 
10
10
  rawHtml = rawHtml.downcase
11
11
  array.each do |token|
@@ -16,7 +16,8 @@ class BaseApiClient < Base
16
16
  return false
17
17
  end
18
18
 
19
- def arry_to_str_delim array, delim
19
+
20
+ def self.arry_to_str_delim array, delim
20
21
 
21
22
  str = ""
22
23
  i = 0
@@ -34,7 +35,7 @@ class BaseApiClient < Base
34
35
  end
35
36
 
36
37
 
37
- def cleanup_url url
38
+ def self.cleanup_url url
38
39
  #clean up url
39
40
  url = url.strip
40
41
  if url["www."] != nil
@@ -49,19 +50,20 @@ class BaseApiClient < Base
49
50
 
50
51
  def determineIfUsesTechnology(technology, rawHtml)
51
52
 
52
- isJobPage = arraySearch(@jobPageTokens, rawHtml)
53
+ isJobPage = BaseApiClient.arraySearch(@jobPageTokens, rawHtml)
53
54
 
54
55
  return isJobPage
55
56
 
56
57
  end
57
58
 
58
59
 
59
- def starts_with?(string, prefix)
60
+ def self.starts_with?(string, prefix)
60
61
  prefix = prefix.to_s
61
62
  string[0, prefix.length] == prefix
62
63
  end
63
64
 
64
- def prepare_params_from_map_helper (base_url, params_map)
65
+
66
+ def self.prepare_params_from_map_helper (base_url, params_map)
65
67
 
66
68
  iter = 1
67
69
 
@@ -47,17 +47,17 @@ class GoogleClient < BaseApiClient
47
47
  return acceptedUrls
48
48
  end
49
49
 
50
- mustContainUrl = cleanup_url mustContainUrl
50
+ mustContainUrl = BaseApiClient.cleanup_url mustContainUrl
51
51
 
52
52
  urls.each do |url|
53
53
 
54
- url = cleanup_url url
54
+ url = BaseApiClient.cleanup_url url
55
55
 
56
56
  accept_url_bool = false
57
57
 
58
58
  @positiveMatchUrlPatterns.each do |token|
59
- if (starts_with? url, token) ||
60
- (starts_with? url, mustContainUrl)
59
+ if (BaseApiClient.starts_with? url, token) ||
60
+ (BaseApiClient.starts_with? url, mustContainUrl)
61
61
  accept_url_bool = true
62
62
  break
63
63
  end
@@ -78,7 +78,7 @@ class GoogleClient < BaseApiClient
78
78
  end
79
79
  end
80
80
 
81
- url = cleanup_url url
81
+ url = BaseApiClient.cleanup_url url
82
82
 
83
83
  if accept_url_bool &&
84
84
  url != nil && !(acceptedUrls.include? url)
@@ -88,33 +88,11 @@ class GoogleClient < BaseApiClient
88
88
  acceptedUrls
89
89
  end
90
90
 
91
- def generate_google_url site_url, search_keyword
92
- query_url = [
93
- "http://www.google.com/search?",
94
- "hl=en",
95
- "&as_q=" << "hiring+" << search_keyword,
96
- "&as_sitesearch=" << (cleanup_url (site_url))
97
- ]
98
91
 
99
- url = arry_to_str_delim query_url, ""
100
-
101
- end
102
-
103
- def generate_duckduckgo_url site_url, search_keyword
104
-
105
- query_url = [
106
- "http://duckduckgo.com/?",
107
- "q=" <<
108
- "site:" << (cleanup_url (site_url)) <<
109
- "+hiring+" << search_keyword,
110
- ]
111
-
112
- url = arry_to_str_delim query_url, ""
113
- end
114
92
 
115
93
  #performs a search engine search that is restricted to a company's website and then attempts to determine if they have job listings for a given technology.
116
94
  #If an ad exists it is returned as part of map
117
- def google_search queries, site_url
95
+ def perform_search queries, site_url, search_query_url_generator_closure
118
96
 
119
97
  ret_map = Hash.new
120
98
 
@@ -125,7 +103,7 @@ class GoogleClient < BaseApiClient
125
103
 
126
104
  begin
127
105
 
128
- url = generate_duckduckgo_url site_url, search_keyword
106
+ url = search_query_url_generator_closure.call( site_url, search_keyword )
129
107
 
130
108
  #perform initial search engine search
131
109
  @browser.goto url
@@ -16,7 +16,7 @@ class TechAdTagger < Base
16
16
 
17
17
 
18
18
  #iterates through array and updates company db record with technologies found from ads from their website
19
- def tag_company_with_technologies tech_keywords
19
+ def tag_company_with_technologies tech_keywords, search_query_url_generator_closure
20
20
 
21
21
  # uncomment if need to clear out all existing technologies
22
22
  =begin
@@ -37,7 +37,7 @@ class TechAdTagger < Base
37
37
 
38
38
  languages = Hash.new
39
39
 
40
- company_languages_map = @search_client.google_search tech_keywords, company["websiteUrl"]
40
+ company_languages_map = @search_client.perform_search tech_keywords, company["websiteUrl"], search_query_url_generator_closure
41
41
 
42
42
  company_languages_map.each do |key, value|
43
43
 
@@ -6,6 +6,7 @@ class DataPopulators
6
6
 
7
7
  $:.unshift(File.expand_path('../../data_gatherers', __FILE__))
8
8
  $:.unshift(File.expand_path('../../data_searchers', __FILE__))
9
+ $:.unshift(File.expand_path('../../api_clients', __FILE__))
9
10
  $:.unshift(File.expand_path('../../logging', __FILE__))
10
11
 
11
12
  end
@@ -18,6 +19,7 @@ class DataPopulators
18
19
  require 'logger_factory'
19
20
  require 'gather_companies'
20
21
  require 'tech_ad_tagger'
22
+ require 'base_api_client'
21
23
 
22
24
  end
23
25
 
@@ -52,7 +54,7 @@ class DataPopulators
52
54
 
53
55
  begin
54
56
 
55
- # @@gather_companies.load_companies_to_db 700, 0, @@facet_location
57
+ # @@gather_companies.load_companies_to_db 700, 0, @@facet_location
56
58
 
57
59
  rescue Exception => e
58
60
  puts e.message
@@ -64,9 +66,35 @@ class DataPopulators
64
66
 
65
67
  t2 = Thread.new do
66
68
 
69
+ # def site_url, search_keyword
70
+ generate_google_url = lambda { |site_url, search_keyword|
71
+ query_url = [
72
+ "http://www.google.com/search?",
73
+ "hl=en",
74
+ "&as_q=" << "hiring+" << search_keyword,
75
+ "&as_sitesearch=" << (BaseApiClient.cleanup_url (site_url))
76
+ ]
77
+
78
+ url = BaseApiClient.arry_to_str_delim query_url, ""
79
+ }
80
+
81
+
82
+ # def site_url, search_keyword
83
+ generate_duckduckgo_url= lambda { |site_url, search_keyword|
84
+ query_url = [
85
+ "http://duckduckgo.com/?",
86
+ "q=" <<
87
+ "site:" << (BaseApiClient.cleanup_url (site_url)) <<
88
+ "+hiring+" << search_keyword,
89
+ ]
90
+
91
+ url = BaseApiClient.arry_to_str_delim query_url, ""
92
+ }
93
+
94
+
67
95
  begin
68
96
 
69
- @@tech_ad_tagger.tag_company_with_technologies @@programming_languages
97
+ @@tech_ad_tagger.tag_company_with_technologies @@programming_languages, generate_duckduckgo_url
70
98
 
71
99
  rescue Exception => e
72
100
  puts e.message
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whos_using_what
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: