whos_using_what 0.3.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ class BaseApiClient < Base
5
5
  require "uri"
6
6
  require "rest-client"
7
7
 
8
- def arraySearch(array, rawHtml)
8
+ def self.arraySearch(array, rawHtml)
9
9
 
10
10
  rawHtml = rawHtml.downcase
11
11
  array.each do |token|
@@ -16,7 +16,8 @@ class BaseApiClient < Base
16
16
  return false
17
17
  end
18
18
 
19
- def arry_to_str_delim array, delim
19
+
20
+ def self.arry_to_str_delim array, delim
20
21
 
21
22
  str = ""
22
23
  i = 0
@@ -34,7 +35,7 @@ class BaseApiClient < Base
34
35
  end
35
36
 
36
37
 
37
- def cleanup_url url
38
+ def self.cleanup_url url
38
39
  #clean up url
39
40
  url = url.strip
40
41
  if url["www."] != nil
@@ -49,19 +50,20 @@ class BaseApiClient < Base
49
50
 
50
51
  def determineIfUsesTechnology(technology, rawHtml)
51
52
 
52
- isJobPage = arraySearch(@jobPageTokens, rawHtml)
53
+ isJobPage = BaseApiClient.arraySearch(@jobPageTokens, rawHtml)
53
54
 
54
55
  return isJobPage
55
56
 
56
57
  end
57
58
 
58
59
 
59
- def starts_with?(string, prefix)
60
+ def self.starts_with?(string, prefix)
60
61
  prefix = prefix.to_s
61
62
  string[0, prefix.length] == prefix
62
63
  end
63
64
 
64
- def prepare_params_from_map_helper (base_url, params_map)
65
+
66
+ def self.prepare_params_from_map_helper (base_url, params_map)
65
67
 
66
68
  iter = 1
67
69
 
@@ -47,17 +47,17 @@ class GoogleClient < BaseApiClient
47
47
  return acceptedUrls
48
48
  end
49
49
 
50
- mustContainUrl = cleanup_url mustContainUrl
50
+ mustContainUrl = BaseApiClient.cleanup_url mustContainUrl
51
51
 
52
52
  urls.each do |url|
53
53
 
54
- url = cleanup_url url
54
+ url = BaseApiClient.cleanup_url url
55
55
 
56
56
  accept_url_bool = false
57
57
 
58
58
  @positiveMatchUrlPatterns.each do |token|
59
- if (starts_with? url, token) ||
60
- (starts_with? url, mustContainUrl)
59
+ if (BaseApiClient.starts_with? url, token) ||
60
+ (BaseApiClient.starts_with? url, mustContainUrl)
61
61
  accept_url_bool = true
62
62
  break
63
63
  end
@@ -78,7 +78,7 @@ class GoogleClient < BaseApiClient
78
78
  end
79
79
  end
80
80
 
81
- url = cleanup_url url
81
+ url = BaseApiClient.cleanup_url url
82
82
 
83
83
  if accept_url_bool &&
84
84
  url != nil && !(acceptedUrls.include? url)
@@ -88,33 +88,11 @@ class GoogleClient < BaseApiClient
88
88
  acceptedUrls
89
89
  end
90
90
 
91
- def generate_google_url site_url, search_keyword
92
- query_url = [
93
- "http://www.google.com/search?",
94
- "hl=en",
95
- "&as_q=" << "hiring+" << search_keyword,
96
- "&as_sitesearch=" << (cleanup_url (site_url))
97
- ]
98
91
 
99
- url = arry_to_str_delim query_url, ""
100
-
101
- end
102
-
103
- def generate_duckduckgo_url site_url, search_keyword
104
-
105
- query_url = [
106
- "http://duckduckgo.com/?",
107
- "q=" <<
108
- "site:" << (cleanup_url (site_url)) <<
109
- "+hiring+" << search_keyword,
110
- ]
111
-
112
- url = arry_to_str_delim query_url, ""
113
- end
114
92
 
115
93
  #performs a search engine search that is restricted to a company's website and then attempts to determine if they have job listings for a given technology.
116
94
  #If an ad exists it is returned as part of map
117
- def google_search queries, site_url
95
+ def perform_search queries, site_url, search_query_url_generator_closure
118
96
 
119
97
  ret_map = Hash.new
120
98
 
@@ -125,7 +103,7 @@ class GoogleClient < BaseApiClient
125
103
 
126
104
  begin
127
105
 
128
- url = generate_duckduckgo_url site_url, search_keyword
106
+ url = search_query_url_generator_closure.call( site_url, search_keyword )
129
107
 
130
108
  #perform initial search engine search
131
109
  @browser.goto url
@@ -16,7 +16,7 @@ class TechAdTagger < Base
16
16
 
17
17
 
18
18
  #iterates through array and updates company db record with technologies found from ads from their website
19
- def tag_company_with_technologies tech_keywords
19
+ def tag_company_with_technologies tech_keywords, search_query_url_generator_closure
20
20
 
21
21
  # uncomment if need to clear out all existing technologies
22
22
  =begin
@@ -37,7 +37,7 @@ class TechAdTagger < Base
37
37
 
38
38
  languages = Hash.new
39
39
 
40
- company_languages_map = @search_client.google_search tech_keywords, company["websiteUrl"]
40
+ company_languages_map = @search_client.perform_search tech_keywords, company["websiteUrl"], search_query_url_generator_closure
41
41
 
42
42
  company_languages_map.each do |key, value|
43
43
 
@@ -6,6 +6,7 @@ class DataPopulators
6
6
 
7
7
  $:.unshift(File.expand_path('../../data_gatherers', __FILE__))
8
8
  $:.unshift(File.expand_path('../../data_searchers', __FILE__))
9
+ $:.unshift(File.expand_path('../../api_clients', __FILE__))
9
10
  $:.unshift(File.expand_path('../../logging', __FILE__))
10
11
 
11
12
  end
@@ -18,6 +19,7 @@ class DataPopulators
18
19
  require 'logger_factory'
19
20
  require 'gather_companies'
20
21
  require 'tech_ad_tagger'
22
+ require 'base_api_client'
21
23
 
22
24
  end
23
25
 
@@ -52,7 +54,7 @@ class DataPopulators
52
54
 
53
55
  begin
54
56
 
55
- # @@gather_companies.load_companies_to_db 700, 0, @@facet_location
57
+ # @@gather_companies.load_companies_to_db 700, 0, @@facet_location
56
58
 
57
59
  rescue Exception => e
58
60
  puts e.message
@@ -64,9 +66,35 @@ class DataPopulators
64
66
 
65
67
  t2 = Thread.new do
66
68
 
69
+ # def site_url, search_keyword
70
+ generate_google_url = lambda { |site_url, search_keyword|
71
+ query_url = [
72
+ "http://www.google.com/search?",
73
+ "hl=en",
74
+ "&as_q=" << "hiring+" << search_keyword,
75
+ "&as_sitesearch=" << (BaseApiClient.cleanup_url (site_url))
76
+ ]
77
+
78
+ url = BaseApiClient.arry_to_str_delim query_url, ""
79
+ }
80
+
81
+
82
+ # def site_url, search_keyword
83
+ generate_duckduckgo_url= lambda { |site_url, search_keyword|
84
+ query_url = [
85
+ "http://duckduckgo.com/?",
86
+ "q=" <<
87
+ "site:" << (BaseApiClient.cleanup_url (site_url)) <<
88
+ "+hiring+" << search_keyword,
89
+ ]
90
+
91
+ url = BaseApiClient.arry_to_str_delim query_url, ""
92
+ }
93
+
94
+
67
95
  begin
68
96
 
69
- @@tech_ad_tagger.tag_company_with_technologies @@programming_languages
97
+ @@tech_ad_tagger.tag_company_with_technologies @@programming_languages, generate_duckduckgo_url
70
98
 
71
99
  rescue Exception => e
72
100
  puts e.message
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whos_using_what
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: