whos_using_what 0.1.5 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,14 +3,8 @@ require_relative "whos_using_what/search_client"
3
3
 
4
4
  class WhosUsingWhat
5
5
 
6
- def test
7
- puts "its working"
8
- end
9
-
10
6
  if __FILE__ == $PROGRAM_NAME
11
7
 
12
- client = SearchClient.new()
13
- #client.search("ebay.com", "ruby")
14
8
 
15
9
 
16
10
  end
@@ -7,14 +7,22 @@ class BaseApiClient
7
7
 
8
8
  def prepare_params_from_map_helper (base_url, params_map)
9
9
 
10
+ iter = 1
11
+
10
12
  base_url = base_url << "?"
11
13
  params_map.each do |key, value|
14
+
15
+ if iter != 1
16
+ base_url = base_url << "&"
17
+ end
18
+
12
19
  if starts_with?(key, "facet")
13
- base_url = base_url << "&" << key << "," << value
20
+
21
+ base_url = base_url << key << "," << value
14
22
  else
15
- base_url = base_url << "&" << key << "=" << value
23
+ base_url = base_url << key << "=" << value
16
24
  end
17
-
25
+ iter = iter + 1
18
26
  end
19
27
  base_url
20
28
 
@@ -0,0 +1,57 @@
1
+ require 'map_data_extraction_util'
2
+
3
+ class GoogleLocationsClient
4
+
5
+
6
+ def initialize
7
+
8
+ #todo pull this from config file if this becomes needed
9
+ @google_api_key = ""
10
+
11
+ end
12
+
13
+
14
+ def api_get_google_location_data zip_code
15
+
16
+ params = "q=" << zip_code
17
+ "&output=json"
18
+ "&key=" << @google_api_key
19
+
20
+
21
+ @geo_code_api_url_base = "http://maps.google.com/maps/geo?"
22
+
23
+ begin
24
+ rawHtml = RestClient.get(@geo_code_api_url_base << params)
25
+ rescue Exception => e
26
+ puts e
27
+ end
28
+
29
+ json_resp = JSON.parse(rawHtml.body)
30
+
31
+ resp_map = nil
32
+ begin
33
+ resp_map = json_resp["Placemark"][0]
34
+ rescue
35
+
36
+ end
37
+
38
+ if !resp_map
39
+ return nil
40
+ end
41
+ resp_map
42
+ end
43
+
44
+
45
+ def get_coords_from_google_location_resp_helper resp_map
46
+
47
+ keys_arr1 = ["Point", "coordinates", 0]
48
+ keys_arr2 = ["Point", "coordinates", 1]
49
+
50
+ part1 = MapDataExtractionUtil.safe_extract_helper keys_arr1, resp_map, nil, nil
51
+ part2 = MapDataExtractionUtil.safe_extract_helper keys_arr2, resp_map, nil, nil
52
+
53
+ arr = [part1, part2]
54
+
55
+ end
56
+
57
+ end
@@ -0,0 +1,60 @@
1
+ require 'oauth'
2
+ require 'json'
3
+ require 'base_api_client'
4
+
5
+ class LinkedinClient < BaseApiClient
6
+
7
+
8
+ @@json_indicator = "format=json"
9
+
10
+
11
+ def initialize(api_key, api_secret, user_token, user_secret, url)
12
+ super()
13
+
14
+ consumer = OAuth::Consumer.new(api_key, api_secret, {:site => url})
15
+
16
+ @access_token = OAuth::AccessToken.new(consumer, user_token, user_secret)
17
+
18
+ end
19
+
20
+
21
+ def query_companies params
22
+
23
+ @@base_url = "http://api.linkedin.com/v1/"
24
+
25
+ base_url = @@base_url <<
26
+ "company-search:(
27
+ companies:(
28
+ id,
29
+ name,
30
+ universal-name,
31
+ website-url,
32
+ industries,
33
+ logo-url,
34
+ employee-count-range,
35
+ locations
36
+ )
37
+ )"
38
+
39
+ json_api_call_helper(base_url, params)
40
+
41
+ end
42
+
43
+
44
+ def json_api_call_helper (base_url, params)
45
+
46
+ url = prepare_params_from_map_helper(base_url, params)
47
+
48
+ #remove white spaces, for ease in reading queries, they may have white spaces / line breaks
49
+ url = url.gsub(/\s+/, "")
50
+
51
+ puts url
52
+
53
+ json = @access_token.get(url << "&" << @@json_indicator)
54
+
55
+ JSON.parse(json.body)
56
+
57
+ end
58
+
59
+
60
+ end
@@ -0,0 +1,54 @@
1
+ require 'mongo_helper'
2
+ require 'linkedin_client'
3
+
4
+ #meant to be able to be used as long-running process to save company data to DB
5
+ class GatherCompanies
6
+
7
+ def initialize
8
+
9
+ @linkedin_tech_industry_codes = "4,132,6,96,113";
10
+
11
+ @@mongo_client = MongoHelper.get_mongo_connection
12
+
13
+ @@companies_coll = @@mongo_client['companies']
14
+
15
+ @li_config = YAML.load_file(File.expand_path("../../config/linkedin.env", __FILE__))
16
+
17
+ @@linkedin_client = LinkedinClient.new @li_config["api_key"], @li_config["api_secret"], @li_config["user_token"], @li_config["user_secret"], @li_config["url"]
18
+
19
+
20
+ end
21
+
22
+ def load_companies_to_db num_iterations, cur_start_position
23
+
24
+ increment = 20
25
+ cnt = 1
26
+
27
+ while cnt <= num_iterations do
28
+ puts cur_start_position.to_s
29
+
30
+ resp = @@linkedin_client.query_companies ({
31
+ "start" => cur_start_position.to_s << "&count=" << increment.to_s,
32
+ "facet=industry" => @linkedin_tech_industry_codes,
33
+ "locations:(address:(postal-code))" => "95688"
34
+ })
35
+ docs = resp['companies'].values[3]
36
+ if docs != nil
37
+ docs.each do |doc|
38
+ puts doc
39
+ @@companies_coll.insert(doc)
40
+ end
41
+ end
42
+
43
+ cur_start_position = cur_start_position + increment
44
+
45
+ cnt = cnt + 1
46
+
47
+ sleep_seconds = rand(1-35)
48
+ puts "sleeping for: " << sleep_seconds.to_s << " seconds"
49
+ sleep(sleep_seconds)
50
+
51
+ end
52
+ end
53
+
54
+ end
@@ -0,0 +1,151 @@
1
+ require 'mongo_helper'
2
+ require 'map_data_extraction_util'
3
+ require 'google_locations_client'
4
+
5
+ require "rest-client"
6
+
7
+ class GeoTagger
8
+
9
+ def initialize
10
+ @mongo_client = MongoHelper.get_mongo_connection
11
+ @companies_coll = @mongo_client['companies']
12
+ @coords_coll = @mongo_client['coordinates']
13
+
14
+ @locations_client = GoogleLocationsClient.new
15
+
16
+ #todo this code should possibly be moved somewhere else for clarity / encapsulation
17
+ @coords_coll.remove({"loc" => nil})
18
+
19
+ @coords_coll.ensure_index([["loc", Mongo::GEO2D]])
20
+ @companies_coll.ensure_index([["loc", Mongo::GEO2D]])
21
+
22
+ end
23
+
24
+
25
+ def zip_acceptance_predicate zip_code
26
+
27
+ if !zip_code
28
+ return false
29
+ end
30
+
31
+ accept = true
32
+
33
+ if !zip_code.start_with? ("9")
34
+ accept = false
35
+ end
36
+
37
+ accept
38
+
39
+ end
40
+
41
+
42
+ def load_geolocations_into_db
43
+
44
+ @companies_coll.find().to_a.each do |company|
45
+
46
+ if !company
47
+ next
48
+ end
49
+
50
+
51
+ keys_arr = ['locations', 'values']
52
+ locations = MapDataExtractionUtil.safe_extract_helper keys_arr, company, :nil, nil
53
+
54
+ if !locations
55
+ next
56
+ end
57
+ locations.each do |location|
58
+
59
+ zip_code = location['address']['postalCode']
60
+
61
+ #strip off anything past 5 characters, as we only want main part of zip code
62
+ if !zip_code || zip_code.size < 5
63
+ next
64
+ end
65
+
66
+ zip_code = zip_code[0...5]
67
+
68
+ if (zip_acceptance_predicate (zip_code))
69
+
70
+
71
+ begin
72
+ #todo figure how to do this now that we are using instance variables instead of class variables
73
+ process_zip_closure = lambda {
74
+
75
+ resp_map = @locations_client.api_get_google_location_data zip_code
76
+
77
+ doc = {}
78
+
79
+
80
+ doc[:zip] = zip_code
81
+
82
+ coords = @locations_client.get_coords_from_google_location_resp_helper resp_map
83
+ if coords[0] && coords[1]
84
+ doc[:loc] = {"lon" => coords[0], "lat" => coords[1]}
85
+ end
86
+
87
+
88
+ keys_arr = ["AddressDetails", "Country", "AdministrativeArea", "Locality", "LocalityName"]
89
+ MapDataExtractionUtil.safe_extract_helper keys_arr, resp_map, :city, doc
90
+
91
+ keys_arr = ["AddressDetails", "Country", "AdministrativeArea", "AdministrativeAreaName"]
92
+ MapDataExtractionUtil.safe_extract_helper keys_arr, resp_map, :state, doc
93
+
94
+ keys_arr = ["AddressDetails", "Country", "CountryNameCode"]
95
+ MapDataExtractionUtil.safe_extract_helper keys_arr, resp_map, :country, doc
96
+
97
+
98
+ if (doc.size > 1 && doc[:country] == "US")
99
+ coll = @coords_coll.find(zip: zip_code).to_a
100
+ end
101
+
102
+ if coll && coll.size < 1
103
+
104
+
105
+ @coords_coll.insert(doc)
106
+
107
+ end
108
+ }
109
+
110
+
111
+ process_zip_closure.call
112
+ rescue Exception => e
113
+ puts e.message
114
+ puts e.backtrace
115
+
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ def update_companies_with_latitude_longitude
123
+
124
+ @companies_coll.find().to_a.each do |company|
125
+
126
+
127
+ locations = MapDataExtractionUtil.safe_extract_helper ["locations", "values"], company, nil, nil
128
+
129
+ if locations
130
+
131
+ locations.each do |location|
132
+
133
+ zip = MapDataExtractionUtil.safe_extract_helper ["address", "postalCode"], location, nil, nil
134
+
135
+ if zip
136
+
137
+ coords = @coords_coll.find_one({:zip => zip})
138
+ if coords != nil
139
+
140
+ company["loc"] = coords["loc"]
141
+
142
+ @companies_coll.update({"_id" => company["_id"]}, company)
143
+
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
150
+
151
+ end
@@ -0,0 +1,41 @@
1
+ class CompaniesSearcher
2
+
3
+ def initialize
4
+
5
+ @mongo_client = MongoHelper.get_mongo_connection
6
+ @companies_coll = @mongo_client['companies']
7
+ @coords_coll = @mongo_client['coordinates']
8
+
9
+ @geo_tagger = GeoTagger.new
10
+
11
+ end
12
+
13
+ def geospatial_search lon, lat
14
+
15
+ near = @companies_coll.find({"loc" => {"$near" => [lat, lon]}})
16
+
17
+
18
+ end
19
+
20
+ def zip_code_search zip_code
21
+ zip_doc = @coords_coll.find_one({:zip => zip_code})
22
+ if zip_doc == nil
23
+
24
+ #todo consider making a method in GeoTagger class to do this instead of using closure directly here
25
+ #todo need to figure out how to call this from other class
26
+ closure = @geo_tagger.process_zip_closure zip_code
27
+
28
+ closure.call
29
+
30
+ end
31
+ zip_doc = @coords_coll.find_one({:zip => zip_code})
32
+
33
+ if zip_doc == nil
34
+ return nil
35
+ end
36
+
37
+ results = geospatial_search zip_doc["loc"]["lon"], zip_doc["loc"]["lat"]
38
+ results.to_a
39
+ end
40
+
41
+ end
@@ -0,0 +1,15 @@
1
+ require 'logger'
2
+
3
+ class LoggerFactory
4
+
5
+ @@log_path = "log.txt"
6
+ @@log = Logger.new (@@log_path);
7
+
8
+ def self.get_default_logger
9
+
10
+ @@log
11
+
12
+ end
13
+
14
+
15
+ end
@@ -0,0 +1,39 @@
1
+ require 'mongo'
2
+ require 'uri'
3
+ require 'json'
4
+ require 'yaml'
5
+
6
+ include Mongo
7
+
8
+ class MongoHelper
9
+
10
+ def self.get_connection
11
+ return @db_connection if @db_connection
12
+ db = URI.parse(ENV["mongo.uri"].strip)
13
+ db_name = db.path.gsub(/^\//, '')
14
+ @db_connection = Mongo::Connection.new(db.host, db.port).db(db_name)
15
+ @db_connection.authenticate(db.user, db.password) unless (db.user.nil? || db.user.nil?)
16
+ @db_connection
17
+
18
+ end
19
+
20
+ if (ENV["mongo.uri"] == nil)
21
+ @li_config = YAML.load_file(File.expand_path("../../config/mongo.env", __FILE__))
22
+ ENV["mongo.host"]= @li_config["mongo.host"]
23
+ ENV["mongo.port"]= @li_config["mongo.port"].to_s
24
+ ENV["mongo.user"]= @li_config["mongo.user"]
25
+ ENV["mongo.pass"]= @li_config["mongo.pass"]
26
+ ENV["mongo.dbname"] = @li_config["mongo.dbname"]
27
+ ENV["mongo.uri"] = "mongodb://" << ENV["mongo.user"] << ":" << ENV["mongo.pass"] << "@" <<
28
+ ENV["mongo.host"] <<":" << ENV["mongo.port"] <<"/" << ENV["mongo.dbname"]
29
+
30
+ end
31
+
32
+ get_connection
33
+
34
+
35
+ def self.get_mongo_connection
36
+ @db_connection
37
+ end
38
+
39
+ end
@@ -0,0 +1,25 @@
1
+ class MapDataExtractionUtil
2
+
3
+
4
+ #doc_key = nil && doc=nil to return the end value instead of to add it to the doc
5
+ def self.safe_extract_helper keys_arr, map, doc_key, doc
6
+
7
+ iter = 1
8
+ val = ""
9
+ keys_arr.each do |key|
10
+ val = map[key]
11
+ if !val
12
+ return
13
+ end
14
+ if (iter == keys_arr.length)
15
+ if doc
16
+ doc[doc_key] = val
17
+ end
18
+ end
19
+ iter = iter + 1
20
+ map = val
21
+ end
22
+ val
23
+ end
24
+
25
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whos_using_what
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -91,16 +91,54 @@ dependencies:
91
91
  - - ! '>='
92
92
  - !ruby/object:Gem::Version
93
93
  version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: uri
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: mongo
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
94
126
  description: What companies are using what technologies
95
127
  email: r.dane1010@gmail.com
96
128
  executables: []
97
129
  extensions: []
98
130
  extra_rdoc_files: []
99
131
  files:
100
- - lib/whos_using_what/base_api_client.rb
101
- - lib/whos_using_what/search_client.rb
102
- - lib/whos_using_what/linkedin_client.rb
103
- - lib/whos_using_what/config_module.rb
132
+ - lib/whos_using_what/data_searchers/companies_searcher.rb
133
+ - lib/whos_using_what/util/map_data_extraction_util.rb
134
+ - lib/whos_using_what/logging/logger_factory.rb
135
+ - lib/whos_using_what/api_clients/base_api_client.rb
136
+ - lib/whos_using_what/api_clients/search_client.rb
137
+ - lib/whos_using_what/api_clients/linkedin_client.rb
138
+ - lib/whos_using_what/api_clients/google_locations_client.rb
139
+ - lib/whos_using_what/data_gatherers/gather_companies.rb
140
+ - lib/whos_using_what/data_gatherers/geo_tagger.rb
141
+ - lib/whos_using_what/no_sql/mongo_helper.rb
104
142
  - lib/whos_using_what.rb
105
143
  homepage: http://rubygems.org/gems/whos_using_what
106
144
  licenses: []
@@ -1,10 +0,0 @@
1
- module ConfigModule
2
-
3
- attr_accessor :linkedin_tech_industry_codes
4
-
5
-
6
- def initialize
7
- @linkedin_tech_industry_codes = "4,132,6,96,113";
8
- end
9
-
10
- end
@@ -1,98 +0,0 @@
1
- require 'oauth'
2
- require 'json'
3
- require_relative 'config_module'
4
- require_relative 'base_api_client'
5
-
6
- class LinkedinClient < BaseApiClient
7
-
8
- #the company industry codes to search for, see: https://developer.linkedin.com/documents/industry-codes
9
- attr :access_token, true
10
- attr :companyUrls
11
- @@json_indicator = "format=json"
12
-
13
- include ConfigModule
14
-
15
-
16
- def initialize(api_key, api_secret, user_token, user_secret, url)
17
- super()
18
-
19
- @numberResults = "5"
20
- @start = "15"
21
-
22
- consumer = OAuth::Consumer.new(api_key, api_secret, {:site => url})
23
-
24
- @access_token = OAuth::AccessToken.new(consumer, user_token, user_secret)
25
-
26
- #this appears to be the most that linkedin will give back per request
27
- @max_results = 20
28
-
29
- end
30
-
31
- def add_json_to_map(key_field_name, raw_json_map, output_map)
32
- raw_json_map.each do |value|
33
- output_map[value[key_field_name]] = value
34
- end
35
- end
36
-
37
-
38
- #todo this should be put into module for re-use
39
- def json_api_call_helper (base_url, params)
40
- url = prepare_params_from_map_helper(base_url, params)
41
- json = @access_token.get(url << "&" << @@json_indicator)
42
-
43
- JSON.parse(json.body)
44
- end
45
-
46
- # this method searches for people from a specified company for a specific job type
47
- def people_search_for_company (location_code, title, company_name)
48
-
49
- params = Hash.new
50
- params["facet=location"] = "us:" <<location_code
51
- params["current-company"]= "true"
52
- params["title"]=title
53
- params["company-name"] = company_name
54
-
55
- base_url = "http://api.linkedin.com/v1/people-search:(people:(id,first-name,headline,positions:(company:(name)),public-profile-url,last-name,picture-url),facets)"
56
-
57
- puts json_api_call_helper(base_url, params)['people']['values']
58
-
59
- end
60
-
61
-
62
- def gather_company_data(start, number_to_collect, industry_codes)
63
-
64
- if number_to_collect == nil
65
- number_to_collect = 20
66
- end
67
-
68
- request_num = number_to_collect
69
- cnt = 0
70
- div = number_to_collect / @max_results
71
- if (div <1)
72
- div = 1
73
- end
74
-
75
- results = Hash.new
76
-
77
- if (industry_codes == nil)
78
- industry_codes = @linkedin_tech_industry_codes
79
- end
80
-
81
- while cnt < div do
82
- base_url = "http://api.linkedin.com/v1/company-search:(companies:(universal-name,id,website-url,locations:(address:(city,state))),facets,num-results)"
83
-
84
- params = Hash.new
85
- params["start"] = (start * @max_results + 1).to_s
86
- params["count"] = @max_results.to_s
87
- params["facet=location"] = "us:84"
88
- params["facet=industry"] = industry_codes
89
-
90
- raw_json_map = json_api_call_helper(base_url, params)['companies']['values']
91
- add_json_to_map("universalName", raw_json_map, results)
92
-
93
- cnt = cnt + 1
94
- end
95
- results
96
- end
97
-
98
- end