whos_using_what 0.3.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ class BaseApiClient < Base
|
|
5
5
|
require "uri"
|
6
6
|
require "rest-client"
|
7
7
|
|
8
|
-
def arraySearch(array, rawHtml)
|
8
|
+
def self.arraySearch(array, rawHtml)
|
9
9
|
|
10
10
|
rawHtml = rawHtml.downcase
|
11
11
|
array.each do |token|
|
@@ -16,7 +16,8 @@ class BaseApiClient < Base
|
|
16
16
|
return false
|
17
17
|
end
|
18
18
|
|
19
|
-
|
19
|
+
|
20
|
+
def self.arry_to_str_delim array, delim
|
20
21
|
|
21
22
|
str = ""
|
22
23
|
i = 0
|
@@ -34,7 +35,7 @@ class BaseApiClient < Base
|
|
34
35
|
end
|
35
36
|
|
36
37
|
|
37
|
-
def cleanup_url url
|
38
|
+
def self.cleanup_url url
|
38
39
|
#clean up url
|
39
40
|
url = url.strip
|
40
41
|
if url["www."] != nil
|
@@ -49,19 +50,20 @@ class BaseApiClient < Base
|
|
49
50
|
|
50
51
|
def determineIfUsesTechnology(technology, rawHtml)
|
51
52
|
|
52
|
-
isJobPage = arraySearch(@jobPageTokens, rawHtml)
|
53
|
+
isJobPage = BaseApiClient.arraySearch(@jobPageTokens, rawHtml)
|
53
54
|
|
54
55
|
return isJobPage
|
55
56
|
|
56
57
|
end
|
57
58
|
|
58
59
|
|
59
|
-
def starts_with?(string, prefix)
|
60
|
+
def self.starts_with?(string, prefix)
|
60
61
|
prefix = prefix.to_s
|
61
62
|
string[0, prefix.length] == prefix
|
62
63
|
end
|
63
64
|
|
64
|
-
|
65
|
+
|
66
|
+
def self.prepare_params_from_map_helper (base_url, params_map)
|
65
67
|
|
66
68
|
iter = 1
|
67
69
|
|
@@ -47,17 +47,17 @@ class GoogleClient < BaseApiClient
|
|
47
47
|
return acceptedUrls
|
48
48
|
end
|
49
49
|
|
50
|
-
mustContainUrl = cleanup_url mustContainUrl
|
50
|
+
mustContainUrl = BaseApiClient.cleanup_url mustContainUrl
|
51
51
|
|
52
52
|
urls.each do |url|
|
53
53
|
|
54
|
-
url = cleanup_url url
|
54
|
+
url = BaseApiClient.cleanup_url url
|
55
55
|
|
56
56
|
accept_url_bool = false
|
57
57
|
|
58
58
|
@positiveMatchUrlPatterns.each do |token|
|
59
|
-
if (starts_with? url, token) ||
|
60
|
-
(starts_with? url, mustContainUrl)
|
59
|
+
if (BaseApiClient.starts_with? url, token) ||
|
60
|
+
(BaseApiClient.starts_with? url, mustContainUrl)
|
61
61
|
accept_url_bool = true
|
62
62
|
break
|
63
63
|
end
|
@@ -78,7 +78,7 @@ class GoogleClient < BaseApiClient
|
|
78
78
|
end
|
79
79
|
end
|
80
80
|
|
81
|
-
url = cleanup_url url
|
81
|
+
url = BaseApiClient.cleanup_url url
|
82
82
|
|
83
83
|
if accept_url_bool &&
|
84
84
|
url != nil && !(acceptedUrls.include? url)
|
@@ -88,33 +88,11 @@ class GoogleClient < BaseApiClient
|
|
88
88
|
acceptedUrls
|
89
89
|
end
|
90
90
|
|
91
|
-
def generate_google_url site_url, search_keyword
|
92
|
-
query_url = [
|
93
|
-
"http://www.google.com/search?",
|
94
|
-
"hl=en",
|
95
|
-
"&as_q=" << "hiring+" << search_keyword,
|
96
|
-
"&as_sitesearch=" << (cleanup_url (site_url))
|
97
|
-
]
|
98
91
|
|
99
|
-
url = arry_to_str_delim query_url, ""
|
100
|
-
|
101
|
-
end
|
102
|
-
|
103
|
-
def generate_duckduckgo_url site_url, search_keyword
|
104
|
-
|
105
|
-
query_url = [
|
106
|
-
"http://duckduckgo.com/?",
|
107
|
-
"q=" <<
|
108
|
-
"site:" << (cleanup_url (site_url)) <<
|
109
|
-
"+hiring+" << search_keyword,
|
110
|
-
]
|
111
|
-
|
112
|
-
url = arry_to_str_delim query_url, ""
|
113
|
-
end
|
114
92
|
|
115
93
|
#performs a search engine search that is restricted to a company's website and then attempts to determine if they have job listings for a given technology.
|
116
94
|
#If an ad exists it is returned as part of map
|
117
|
-
def
|
95
|
+
def perform_search queries, site_url, search_query_url_generator_closure
|
118
96
|
|
119
97
|
ret_map = Hash.new
|
120
98
|
|
@@ -125,7 +103,7 @@ class GoogleClient < BaseApiClient
|
|
125
103
|
|
126
104
|
begin
|
127
105
|
|
128
|
-
url =
|
106
|
+
url = search_query_url_generator_closure.call( site_url, search_keyword )
|
129
107
|
|
130
108
|
#perform initial search engine search
|
131
109
|
@browser.goto url
|
@@ -16,7 +16,7 @@ class TechAdTagger < Base
|
|
16
16
|
|
17
17
|
|
18
18
|
#iterates through array and updates company db record with technologies found from ads from their website
|
19
|
-
def tag_company_with_technologies tech_keywords
|
19
|
+
def tag_company_with_technologies tech_keywords, search_query_url_generator_closure
|
20
20
|
|
21
21
|
# uncomment if need to clear out all existing technologies
|
22
22
|
=begin
|
@@ -37,7 +37,7 @@ class TechAdTagger < Base
|
|
37
37
|
|
38
38
|
languages = Hash.new
|
39
39
|
|
40
|
-
company_languages_map = @search_client.
|
40
|
+
company_languages_map = @search_client.perform_search tech_keywords, company["websiteUrl"], search_query_url_generator_closure
|
41
41
|
|
42
42
|
company_languages_map.each do |key, value|
|
43
43
|
|
@@ -6,6 +6,7 @@ class DataPopulators
|
|
6
6
|
|
7
7
|
$:.unshift(File.expand_path('../../data_gatherers', __FILE__))
|
8
8
|
$:.unshift(File.expand_path('../../data_searchers', __FILE__))
|
9
|
+
$:.unshift(File.expand_path('../../api_clients', __FILE__))
|
9
10
|
$:.unshift(File.expand_path('../../logging', __FILE__))
|
10
11
|
|
11
12
|
end
|
@@ -18,6 +19,7 @@ class DataPopulators
|
|
18
19
|
require 'logger_factory'
|
19
20
|
require 'gather_companies'
|
20
21
|
require 'tech_ad_tagger'
|
22
|
+
require 'base_api_client'
|
21
23
|
|
22
24
|
end
|
23
25
|
|
@@ -52,7 +54,7 @@ class DataPopulators
|
|
52
54
|
|
53
55
|
begin
|
54
56
|
|
55
|
-
|
57
|
+
# @@gather_companies.load_companies_to_db 700, 0, @@facet_location
|
56
58
|
|
57
59
|
rescue Exception => e
|
58
60
|
puts e.message
|
@@ -64,9 +66,35 @@ class DataPopulators
|
|
64
66
|
|
65
67
|
t2 = Thread.new do
|
66
68
|
|
69
|
+
# def site_url, search_keyword
|
70
|
+
generate_google_url = lambda { |site_url, search_keyword|
|
71
|
+
query_url = [
|
72
|
+
"http://www.google.com/search?",
|
73
|
+
"hl=en",
|
74
|
+
"&as_q=" << "hiring+" << search_keyword,
|
75
|
+
"&as_sitesearch=" << (BaseApiClient.cleanup_url (site_url))
|
76
|
+
]
|
77
|
+
|
78
|
+
url = BaseApiClient.arry_to_str_delim query_url, ""
|
79
|
+
}
|
80
|
+
|
81
|
+
|
82
|
+
# def site_url, search_keyword
|
83
|
+
generate_duckduckgo_url= lambda { |site_url, search_keyword|
|
84
|
+
query_url = [
|
85
|
+
"http://duckduckgo.com/?",
|
86
|
+
"q=" <<
|
87
|
+
"site:" << (BaseApiClient.cleanup_url (site_url)) <<
|
88
|
+
"+hiring+" << search_keyword,
|
89
|
+
]
|
90
|
+
|
91
|
+
url = BaseApiClient.arry_to_str_delim query_url, ""
|
92
|
+
}
|
93
|
+
|
94
|
+
|
67
95
|
begin
|
68
96
|
|
69
|
-
@@tech_ad_tagger.tag_company_with_technologies @@programming_languages
|
97
|
+
@@tech_ad_tagger.tag_company_with_technologies @@programming_languages, generate_duckduckgo_url
|
70
98
|
|
71
99
|
rescue Exception => e
|
72
100
|
puts e.message
|