whos_using_what 0.3.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
@@ -5,7 +5,7 @@ class BaseApiClient < Base
|
|
5
5
|
require "uri"
|
6
6
|
require "rest-client"
|
7
7
|
|
8
|
-
def arraySearch(array, rawHtml)
|
8
|
+
def self.arraySearch(array, rawHtml)
|
9
9
|
|
10
10
|
rawHtml = rawHtml.downcase
|
11
11
|
array.each do |token|
|
@@ -16,7 +16,8 @@ class BaseApiClient < Base
|
|
16
16
|
return false
|
17
17
|
end
|
18
18
|
|
19
|
-
|
19
|
+
|
20
|
+
def self.arry_to_str_delim array, delim
|
20
21
|
|
21
22
|
str = ""
|
22
23
|
i = 0
|
@@ -34,7 +35,7 @@ class BaseApiClient < Base
|
|
34
35
|
end
|
35
36
|
|
36
37
|
|
37
|
-
def cleanup_url url
|
38
|
+
def self.cleanup_url url
|
38
39
|
#clean up url
|
39
40
|
url = url.strip
|
40
41
|
if url["www."] != nil
|
@@ -49,19 +50,20 @@ class BaseApiClient < Base
|
|
49
50
|
|
50
51
|
def determineIfUsesTechnology(technology, rawHtml)
|
51
52
|
|
52
|
-
isJobPage = arraySearch(@jobPageTokens, rawHtml)
|
53
|
+
isJobPage = BaseApiClient.arraySearch(@jobPageTokens, rawHtml)
|
53
54
|
|
54
55
|
return isJobPage
|
55
56
|
|
56
57
|
end
|
57
58
|
|
58
59
|
|
59
|
-
def starts_with?(string, prefix)
|
60
|
+
def self.starts_with?(string, prefix)
|
60
61
|
prefix = prefix.to_s
|
61
62
|
string[0, prefix.length] == prefix
|
62
63
|
end
|
63
64
|
|
64
|
-
|
65
|
+
|
66
|
+
def self.prepare_params_from_map_helper (base_url, params_map)
|
65
67
|
|
66
68
|
iter = 1
|
67
69
|
|
@@ -47,17 +47,17 @@ class GoogleClient < BaseApiClient
|
|
47
47
|
return acceptedUrls
|
48
48
|
end
|
49
49
|
|
50
|
-
mustContainUrl = cleanup_url mustContainUrl
|
50
|
+
mustContainUrl = BaseApiClient.cleanup_url mustContainUrl
|
51
51
|
|
52
52
|
urls.each do |url|
|
53
53
|
|
54
|
-
url = cleanup_url url
|
54
|
+
url = BaseApiClient.cleanup_url url
|
55
55
|
|
56
56
|
accept_url_bool = false
|
57
57
|
|
58
58
|
@positiveMatchUrlPatterns.each do |token|
|
59
|
-
if (starts_with? url, token) ||
|
60
|
-
(starts_with? url, mustContainUrl)
|
59
|
+
if (BaseApiClient.starts_with? url, token) ||
|
60
|
+
(BaseApiClient.starts_with? url, mustContainUrl)
|
61
61
|
accept_url_bool = true
|
62
62
|
break
|
63
63
|
end
|
@@ -78,7 +78,7 @@ class GoogleClient < BaseApiClient
|
|
78
78
|
end
|
79
79
|
end
|
80
80
|
|
81
|
-
url = cleanup_url url
|
81
|
+
url = BaseApiClient.cleanup_url url
|
82
82
|
|
83
83
|
if accept_url_bool &&
|
84
84
|
url != nil && !(acceptedUrls.include? url)
|
@@ -88,33 +88,11 @@ class GoogleClient < BaseApiClient
|
|
88
88
|
acceptedUrls
|
89
89
|
end
|
90
90
|
|
91
|
-
def generate_google_url site_url, search_keyword
|
92
|
-
query_url = [
|
93
|
-
"http://www.google.com/search?",
|
94
|
-
"hl=en",
|
95
|
-
"&as_q=" << "hiring+" << search_keyword,
|
96
|
-
"&as_sitesearch=" << (cleanup_url (site_url))
|
97
|
-
]
|
98
91
|
|
99
|
-
url = arry_to_str_delim query_url, ""
|
100
|
-
|
101
|
-
end
|
102
|
-
|
103
|
-
def generate_duckduckgo_url site_url, search_keyword
|
104
|
-
|
105
|
-
query_url = [
|
106
|
-
"http://duckduckgo.com/?",
|
107
|
-
"q=" <<
|
108
|
-
"site:" << (cleanup_url (site_url)) <<
|
109
|
-
"+hiring+" << search_keyword,
|
110
|
-
]
|
111
|
-
|
112
|
-
url = arry_to_str_delim query_url, ""
|
113
|
-
end
|
114
92
|
|
115
93
|
#performs a search engine search that is restricted to a company's website and then attempts to determine if they have job listings for a given technology.
|
116
94
|
#If an ad exists it is returned as part of map
|
117
|
-
def
|
95
|
+
def perform_search queries, site_url, search_query_url_generator_closure
|
118
96
|
|
119
97
|
ret_map = Hash.new
|
120
98
|
|
@@ -125,7 +103,7 @@ class GoogleClient < BaseApiClient
|
|
125
103
|
|
126
104
|
begin
|
127
105
|
|
128
|
-
url =
|
106
|
+
url = search_query_url_generator_closure.call( site_url, search_keyword )
|
129
107
|
|
130
108
|
#perform initial search engine search
|
131
109
|
@browser.goto url
|
@@ -16,7 +16,7 @@ class TechAdTagger < Base
|
|
16
16
|
|
17
17
|
|
18
18
|
#iterates through array and updates company db record with technologies found from ads from their website
|
19
|
-
def tag_company_with_technologies tech_keywords
|
19
|
+
def tag_company_with_technologies tech_keywords, search_query_url_generator_closure
|
20
20
|
|
21
21
|
# uncomment if need to clear out all existing technologies
|
22
22
|
=begin
|
@@ -37,7 +37,7 @@ class TechAdTagger < Base
|
|
37
37
|
|
38
38
|
languages = Hash.new
|
39
39
|
|
40
|
-
company_languages_map = @search_client.
|
40
|
+
company_languages_map = @search_client.perform_search tech_keywords, company["websiteUrl"], search_query_url_generator_closure
|
41
41
|
|
42
42
|
company_languages_map.each do |key, value|
|
43
43
|
|
@@ -6,6 +6,7 @@ class DataPopulators
|
|
6
6
|
|
7
7
|
$:.unshift(File.expand_path('../../data_gatherers', __FILE__))
|
8
8
|
$:.unshift(File.expand_path('../../data_searchers', __FILE__))
|
9
|
+
$:.unshift(File.expand_path('../../api_clients', __FILE__))
|
9
10
|
$:.unshift(File.expand_path('../../logging', __FILE__))
|
10
11
|
|
11
12
|
end
|
@@ -18,6 +19,7 @@ class DataPopulators
|
|
18
19
|
require 'logger_factory'
|
19
20
|
require 'gather_companies'
|
20
21
|
require 'tech_ad_tagger'
|
22
|
+
require 'base_api_client'
|
21
23
|
|
22
24
|
end
|
23
25
|
|
@@ -52,7 +54,7 @@ class DataPopulators
|
|
52
54
|
|
53
55
|
begin
|
54
56
|
|
55
|
-
|
57
|
+
# @@gather_companies.load_companies_to_db 700, 0, @@facet_location
|
56
58
|
|
57
59
|
rescue Exception => e
|
58
60
|
puts e.message
|
@@ -64,9 +66,35 @@ class DataPopulators
|
|
64
66
|
|
65
67
|
t2 = Thread.new do
|
66
68
|
|
69
|
+
# def site_url, search_keyword
|
70
|
+
generate_google_url = lambda { |site_url, search_keyword|
|
71
|
+
query_url = [
|
72
|
+
"http://www.google.com/search?",
|
73
|
+
"hl=en",
|
74
|
+
"&as_q=" << "hiring+" << search_keyword,
|
75
|
+
"&as_sitesearch=" << (BaseApiClient.cleanup_url (site_url))
|
76
|
+
]
|
77
|
+
|
78
|
+
url = BaseApiClient.arry_to_str_delim query_url, ""
|
79
|
+
}
|
80
|
+
|
81
|
+
|
82
|
+
# def site_url, search_keyword
|
83
|
+
generate_duckduckgo_url= lambda { |site_url, search_keyword|
|
84
|
+
query_url = [
|
85
|
+
"http://duckduckgo.com/?",
|
86
|
+
"q=" <<
|
87
|
+
"site:" << (BaseApiClient.cleanup_url (site_url)) <<
|
88
|
+
"+hiring+" << search_keyword,
|
89
|
+
]
|
90
|
+
|
91
|
+
url = BaseApiClient.arry_to_str_delim query_url, ""
|
92
|
+
}
|
93
|
+
|
94
|
+
|
67
95
|
begin
|
68
96
|
|
69
|
-
@@tech_ad_tagger.tag_company_with_technologies @@programming_languages
|
97
|
+
@@tech_ad_tagger.tag_company_with_technologies @@programming_languages, generate_duckduckgo_url
|
70
98
|
|
71
99
|
rescue Exception => e
|
72
100
|
puts e.message
|