whos_using_what 0.1.5 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,14 +3,8 @@ require_relative "whos_using_what/search_client"
3
3
 
4
4
  class WhosUsingWhat
5
5
 
6
- def test
7
- puts "its working"
8
- end
9
-
10
6
  if __FILE__ == $PROGRAM_NAME
11
7
 
12
- client = SearchClient.new()
13
- #client.search("ebay.com", "ruby")
14
8
 
15
9
 
16
10
  end
@@ -7,14 +7,22 @@ class BaseApiClient
7
7
 
8
8
  def prepare_params_from_map_helper (base_url, params_map)
9
9
 
10
+ iter = 1
11
+
10
12
  base_url = base_url << "?"
11
13
  params_map.each do |key, value|
14
+
15
+ if iter != 1
16
+ base_url = base_url << "&"
17
+ end
18
+
12
19
  if starts_with?(key, "facet")
13
- base_url = base_url << "&" << key << "," << value
20
+
21
+ base_url = base_url << key << "," << value
14
22
  else
15
- base_url = base_url << "&" << key << "=" << value
23
+ base_url = base_url << key << "=" << value
16
24
  end
17
-
25
+ iter = iter + 1
18
26
  end
19
27
  base_url
20
28
 
@@ -0,0 +1,57 @@
1
+ require 'map_data_extraction_util'
2
+
3
+ class GoogleLocationsClient
4
+
5
+
6
+ def initialize
7
+
8
+ #todo pull this from config file if this becomes needed
9
+ @google_api_key = ""
10
+
11
+ end
12
+
13
+
14
+ def api_get_google_location_data zip_code
15
+
16
+ params = "q=" << zip_code
17
+ "&output=json"
18
+ "&key=" << @google_api_key
19
+
20
+
21
+ @geo_code_api_url_base = "http://maps.google.com/maps/geo?"
22
+
23
+ begin
24
+ rawHtml = RestClient.get(@geo_code_api_url_base << params)
25
+ rescue Exception => e
26
+ puts e
27
+ end
28
+
29
+ json_resp = JSON.parse(rawHtml.body)
30
+
31
+ resp_map = nil
32
+ begin
33
+ resp_map = json_resp["Placemark"][0]
34
+ rescue
35
+
36
+ end
37
+
38
+ if !resp_map
39
+ return nil
40
+ end
41
+ resp_map
42
+ end
43
+
44
+
45
+ def get_coords_from_google_location_resp_helper resp_map
46
+
47
+ keys_arr1 = ["Point", "coordinates", 0]
48
+ keys_arr2 = ["Point", "coordinates", 1]
49
+
50
+ part1 = MapDataExtractionUtil.safe_extract_helper keys_arr1, resp_map, nil, nil
51
+ part2 = MapDataExtractionUtil.safe_extract_helper keys_arr2, resp_map, nil, nil
52
+
53
+ arr = [part1, part2]
54
+
55
+ end
56
+
57
+ end
@@ -0,0 +1,60 @@
1
+ require 'oauth'
2
+ require 'json'
3
+ require 'base_api_client'
4
+
5
+ class LinkedinClient < BaseApiClient
6
+
7
+
8
+ @@json_indicator = "format=json"
9
+
10
+
11
+ def initialize(api_key, api_secret, user_token, user_secret, url)
12
+ super()
13
+
14
+ consumer = OAuth::Consumer.new(api_key, api_secret, {:site => url})
15
+
16
+ @access_token = OAuth::AccessToken.new(consumer, user_token, user_secret)
17
+
18
+ end
19
+
20
+
21
+ def query_companies params
22
+
23
+ @@base_url = "http://api.linkedin.com/v1/"
24
+
25
+ base_url = @@base_url <<
26
+ "company-search:(
27
+ companies:(
28
+ id,
29
+ name,
30
+ universal-name,
31
+ website-url,
32
+ industries,
33
+ logo-url,
34
+ employee-count-range,
35
+ locations
36
+ )
37
+ )"
38
+
39
+ json_api_call_helper(base_url, params)
40
+
41
+ end
42
+
43
+
44
+ def json_api_call_helper (base_url, params)
45
+
46
+ url = prepare_params_from_map_helper(base_url, params)
47
+
48
+ #remove white spaces, for ease in reading queries, they may have white spaces / line breaks
49
+ url = url.gsub(/\s+/, "")
50
+
51
+ puts url
52
+
53
+ json = @access_token.get(url << "&" << @@json_indicator)
54
+
55
+ JSON.parse(json.body)
56
+
57
+ end
58
+
59
+
60
+ end
@@ -0,0 +1,54 @@
1
+ require 'mongo_helper'
2
+ require 'linkedin_client'
3
+
4
+ #meant to be able to be used as long-running process to save company data to DB
5
+ class GatherCompanies
6
+
7
+ def initialize
8
+
9
+ @linkedin_tech_industry_codes = "4,132,6,96,113";
10
+
11
+ @@mongo_client = MongoHelper.get_mongo_connection
12
+
13
+ @@companies_coll = @@mongo_client['companies']
14
+
15
+ @li_config = YAML.load_file(File.expand_path("../../config/linkedin.env", __FILE__))
16
+
17
+ @@linkedin_client = LinkedinClient.new @li_config["api_key"], @li_config["api_secret"], @li_config["user_token"], @li_config["user_secret"], @li_config["url"]
18
+
19
+
20
+ end
21
+
22
+ def load_companies_to_db num_iterations, cur_start_position
23
+
24
+ increment = 20
25
+ cnt = 1
26
+
27
+ while cnt <= num_iterations do
28
+ puts cur_start_position.to_s
29
+
30
+ resp = @@linkedin_client.query_companies ({
31
+ "start" => cur_start_position.to_s << "&count=" << increment.to_s,
32
+ "facet=industry" => @linkedin_tech_industry_codes,
33
+ "locations:(address:(postal-code))" => "95688"
34
+ })
35
+ docs = resp['companies'].values[3]
36
+ if docs != nil
37
+ docs.each do |doc|
38
+ puts doc
39
+ @@companies_coll.insert(doc)
40
+ end
41
+ end
42
+
43
+ cur_start_position = cur_start_position + increment
44
+
45
+ cnt = cnt + 1
46
+
47
+ sleep_seconds = rand(1-35)
48
+ puts "sleeping for: " << sleep_seconds.to_s << " seconds"
49
+ sleep(sleep_seconds)
50
+
51
+ end
52
+ end
53
+
54
+ end
@@ -0,0 +1,151 @@
1
+ require 'mongo_helper'
2
+ require 'map_data_extraction_util'
3
+ require 'google_locations_client'
4
+
5
+ require "rest-client"
6
+
7
+ class GeoTagger
8
+
9
+ def initialize
10
+ @mongo_client = MongoHelper.get_mongo_connection
11
+ @companies_coll = @mongo_client['companies']
12
+ @coords_coll = @mongo_client['coordinates']
13
+
14
+ @locations_client = GoogleLocationsClient.new
15
+
16
+ #todo this code should possibly be moved somewhere else for clarity / encapsulation
17
+ @coords_coll.remove({"loc" => nil})
18
+
19
+ @coords_coll.ensure_index([["loc", Mongo::GEO2D]])
20
+ @companies_coll.ensure_index([["loc", Mongo::GEO2D]])
21
+
22
+ end
23
+
24
+
25
+ def zip_acceptance_predicate zip_code
26
+
27
+ if !zip_code
28
+ return false
29
+ end
30
+
31
+ accept = true
32
+
33
+ if !zip_code.start_with? ("9")
34
+ accept = false
35
+ end
36
+
37
+ accept
38
+
39
+ end
40
+
41
+
42
+ def load_geolocations_into_db
43
+
44
+ @companies_coll.find().to_a.each do |company|
45
+
46
+ if !company
47
+ next
48
+ end
49
+
50
+
51
+ keys_arr = ['locations', 'values']
52
+ locations = MapDataExtractionUtil.safe_extract_helper keys_arr, company, :nil, nil
53
+
54
+ if !locations
55
+ next
56
+ end
57
+ locations.each do |location|
58
+
59
+ zip_code = location['address']['postalCode']
60
+
61
+ #strip off anything past 5 characters, as we only want main part of zip code
62
+ if !zip_code || zip_code.size < 5
63
+ next
64
+ end
65
+
66
+ zip_code = zip_code[0...5]
67
+
68
+ if (zip_acceptance_predicate (zip_code))
69
+
70
+
71
+ begin
72
+ #todo figure how to do this now that we are using instance variables instead of class variables
73
+ process_zip_closure = lambda {
74
+
75
+ resp_map = @locations_client.api_get_google_location_data zip_code
76
+
77
+ doc = {}
78
+
79
+
80
+ doc[:zip] = zip_code
81
+
82
+ coords = @locations_client.get_coords_from_google_location_resp_helper resp_map
83
+ if coords[0] && coords[1]
84
+ doc[:loc] = {"lon" => coords[0], "lat" => coords[1]}
85
+ end
86
+
87
+
88
+ keys_arr = ["AddressDetails", "Country", "AdministrativeArea", "Locality", "LocalityName"]
89
+ MapDataExtractionUtil.safe_extract_helper keys_arr, resp_map, :city, doc
90
+
91
+ keys_arr = ["AddressDetails", "Country", "AdministrativeArea", "AdministrativeAreaName"]
92
+ MapDataExtractionUtil.safe_extract_helper keys_arr, resp_map, :state, doc
93
+
94
+ keys_arr = ["AddressDetails", "Country", "CountryNameCode"]
95
+ MapDataExtractionUtil.safe_extract_helper keys_arr, resp_map, :country, doc
96
+
97
+
98
+ if (doc.size > 1 && doc[:country] == "US")
99
+ coll = @coords_coll.find(zip: zip_code).to_a
100
+ end
101
+
102
+ if coll && coll.size < 1
103
+
104
+
105
+ @coords_coll.insert(doc)
106
+
107
+ end
108
+ }
109
+
110
+
111
+ process_zip_closure.call
112
+ rescue Exception => e
113
+ puts e.message
114
+ puts e.backtrace
115
+
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ def update_companies_with_latitude_longitude
123
+
124
+ @companies_coll.find().to_a.each do |company|
125
+
126
+
127
+ locations = MapDataExtractionUtil.safe_extract_helper ["locations", "values"], company, nil, nil
128
+
129
+ if locations
130
+
131
+ locations.each do |location|
132
+
133
+ zip = MapDataExtractionUtil.safe_extract_helper ["address", "postalCode"], location, nil, nil
134
+
135
+ if zip
136
+
137
+ coords = @coords_coll.find_one({:zip => zip})
138
+ if coords != nil
139
+
140
+ company["loc"] = coords["loc"]
141
+
142
+ @companies_coll.update({"_id" => company["_id"]}, company)
143
+
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
150
+
151
+ end
@@ -0,0 +1,41 @@
1
+ class CompaniesSearcher
2
+
3
+ def initialize
4
+
5
+ @mongo_client = MongoHelper.get_mongo_connection
6
+ @companies_coll = @mongo_client['companies']
7
+ @coords_coll = @mongo_client['coordinates']
8
+
9
+ @geo_tagger = GeoTagger.new
10
+
11
+ end
12
+
13
+ def geospatial_search lon, lat
14
+
15
+ near = @companies_coll.find({"loc" => {"$near" => [lat, lon]}})
16
+
17
+
18
+ end
19
+
20
+ def zip_code_search zip_code
21
+ zip_doc = @coords_coll.find_one({:zip => zip_code})
22
+ if zip_doc == nil
23
+
24
+ #todo consider making a method in GeoTagger class to do this instead of using closure directly here
25
+ #todo need to figure out how to call this from other class
26
+ closure = @geo_tagger.process_zip_closure zip_code
27
+
28
+ closure.call
29
+
30
+ end
31
+ zip_doc = @coords_coll.find_one({:zip => zip_code})
32
+
33
+ if zip_doc == nil
34
+ return nil
35
+ end
36
+
37
+ results = geospatial_search zip_doc["loc"]["lon"], zip_doc["loc"]["lat"]
38
+ results.to_a
39
+ end
40
+
41
+ end
@@ -0,0 +1,15 @@
1
+ require 'logger'
2
+
3
+ class LoggerFactory
4
+
5
+ @@log_path = "log.txt"
6
+ @@log = Logger.new (@@log_path);
7
+
8
+ def self.get_default_logger
9
+
10
+ @@log
11
+
12
+ end
13
+
14
+
15
+ end
@@ -0,0 +1,39 @@
1
+ require 'mongo'
2
+ require 'uri'
3
+ require 'json'
4
+ require 'yaml'
5
+
6
+ include Mongo
7
+
8
+ class MongoHelper
9
+
10
+ def self.get_connection
11
+ return @db_connection if @db_connection
12
+ db = URI.parse(ENV["mongo.uri"].strip)
13
+ db_name = db.path.gsub(/^\//, '')
14
+ @db_connection = Mongo::Connection.new(db.host, db.port).db(db_name)
15
+ @db_connection.authenticate(db.user, db.password) unless (db.user.nil? || db.user.nil?)
16
+ @db_connection
17
+
18
+ end
19
+
20
+ if (ENV["mongo.uri"] == nil)
21
+ @li_config = YAML.load_file(File.expand_path("../../config/mongo.env", __FILE__))
22
+ ENV["mongo.host"]= @li_config["mongo.host"]
23
+ ENV["mongo.port"]= @li_config["mongo.port"].to_s
24
+ ENV["mongo.user"]= @li_config["mongo.user"]
25
+ ENV["mongo.pass"]= @li_config["mongo.pass"]
26
+ ENV["mongo.dbname"] = @li_config["mongo.dbname"]
27
+ ENV["mongo.uri"] = "mongodb://" << ENV["mongo.user"] << ":" << ENV["mongo.pass"] << "@" <<
28
+ ENV["mongo.host"] <<":" << ENV["mongo.port"] <<"/" << ENV["mongo.dbname"]
29
+
30
+ end
31
+
32
+ get_connection
33
+
34
+
35
+ def self.get_mongo_connection
36
+ @db_connection
37
+ end
38
+
39
+ end
@@ -0,0 +1,25 @@
1
+ class MapDataExtractionUtil
2
+
3
+
4
+ #doc_key = nil && doc=nil to return the end value instead of to add it to the doc
5
+ def self.safe_extract_helper keys_arr, map, doc_key, doc
6
+
7
+ iter = 1
8
+ val = ""
9
+ keys_arr.each do |key|
10
+ val = map[key]
11
+ if !val
12
+ return
13
+ end
14
+ if (iter == keys_arr.length)
15
+ if doc
16
+ doc[doc_key] = val
17
+ end
18
+ end
19
+ iter = iter + 1
20
+ map = val
21
+ end
22
+ val
23
+ end
24
+
25
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whos_using_what
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -91,16 +91,54 @@ dependencies:
91
91
  - - ! '>='
92
92
  - !ruby/object:Gem::Version
93
93
  version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: uri
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: mongo
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
94
126
  description: What companies are using what technologies
95
127
  email: r.dane1010@gmail.com
96
128
  executables: []
97
129
  extensions: []
98
130
  extra_rdoc_files: []
99
131
  files:
100
- - lib/whos_using_what/base_api_client.rb
101
- - lib/whos_using_what/search_client.rb
102
- - lib/whos_using_what/linkedin_client.rb
103
- - lib/whos_using_what/config_module.rb
132
+ - lib/whos_using_what/data_searchers/companies_searcher.rb
133
+ - lib/whos_using_what/util/map_data_extraction_util.rb
134
+ - lib/whos_using_what/logging/logger_factory.rb
135
+ - lib/whos_using_what/api_clients/base_api_client.rb
136
+ - lib/whos_using_what/api_clients/search_client.rb
137
+ - lib/whos_using_what/api_clients/linkedin_client.rb
138
+ - lib/whos_using_what/api_clients/google_locations_client.rb
139
+ - lib/whos_using_what/data_gatherers/gather_companies.rb
140
+ - lib/whos_using_what/data_gatherers/geo_tagger.rb
141
+ - lib/whos_using_what/no_sql/mongo_helper.rb
104
142
  - lib/whos_using_what.rb
105
143
  homepage: http://rubygems.org/gems/whos_using_what
106
144
  licenses: []
@@ -1,10 +0,0 @@
1
- module ConfigModule
2
-
3
- attr_accessor :linkedin_tech_industry_codes
4
-
5
-
6
- def initialize
7
- @linkedin_tech_industry_codes = "4,132,6,96,113";
8
- end
9
-
10
- end
@@ -1,98 +0,0 @@
1
- require 'oauth'
2
- require 'json'
3
- require_relative 'config_module'
4
- require_relative 'base_api_client'
5
-
6
- class LinkedinClient < BaseApiClient
7
-
8
- #the company industry codes to search for, see: https://developer.linkedin.com/documents/industry-codes
9
- attr :access_token, true
10
- attr :companyUrls
11
- @@json_indicator = "format=json"
12
-
13
- include ConfigModule
14
-
15
-
16
- def initialize(api_key, api_secret, user_token, user_secret, url)
17
- super()
18
-
19
- @numberResults = "5"
20
- @start = "15"
21
-
22
- consumer = OAuth::Consumer.new(api_key, api_secret, {:site => url})
23
-
24
- @access_token = OAuth::AccessToken.new(consumer, user_token, user_secret)
25
-
26
- #this appears to be the most that linkedin will give back per request
27
- @max_results = 20
28
-
29
- end
30
-
31
- def add_json_to_map(key_field_name, raw_json_map, output_map)
32
- raw_json_map.each do |value|
33
- output_map[value[key_field_name]] = value
34
- end
35
- end
36
-
37
-
38
- #todo this should be put into module for re-use
39
- def json_api_call_helper (base_url, params)
40
- url = prepare_params_from_map_helper(base_url, params)
41
- json = @access_token.get(url << "&" << @@json_indicator)
42
-
43
- JSON.parse(json.body)
44
- end
45
-
46
- # this method searches for people from a specified company for a specific job type
47
- def people_search_for_company (location_code, title, company_name)
48
-
49
- params = Hash.new
50
- params["facet=location"] = "us:" <<location_code
51
- params["current-company"]= "true"
52
- params["title"]=title
53
- params["company-name"] = company_name
54
-
55
- base_url = "http://api.linkedin.com/v1/people-search:(people:(id,first-name,headline,positions:(company:(name)),public-profile-url,last-name,picture-url),facets)"
56
-
57
- puts json_api_call_helper(base_url, params)['people']['values']
58
-
59
- end
60
-
61
-
62
- def gather_company_data(start, number_to_collect, industry_codes)
63
-
64
- if number_to_collect == nil
65
- number_to_collect = 20
66
- end
67
-
68
- request_num = number_to_collect
69
- cnt = 0
70
- div = number_to_collect / @max_results
71
- if (div <1)
72
- div = 1
73
- end
74
-
75
- results = Hash.new
76
-
77
- if (industry_codes == nil)
78
- industry_codes = @linkedin_tech_industry_codes
79
- end
80
-
81
- while cnt < div do
82
- base_url = "http://api.linkedin.com/v1/company-search:(companies:(universal-name,id,website-url,locations:(address:(city,state))),facets,num-results)"
83
-
84
- params = Hash.new
85
- params["start"] = (start * @max_results + 1).to_s
86
- params["count"] = @max_results.to_s
87
- params["facet=location"] = "us:84"
88
- params["facet=industry"] = industry_codes
89
-
90
- raw_json_map = json_api_call_helper(base_url, params)['companies']['values']
91
- add_json_to_map("universalName", raw_json_map, results)
92
-
93
- cnt = cnt + 1
94
- end
95
- results
96
- end
97
-
98
- end