socrates_scraper 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YmFmZDIxNDVmOGFhYTI5ODY1NDQ5YmRmMWMwNDczZjNjNzU0NTkzYQ==
5
+ data.tar.gz: !binary |-
6
+ NmFmYTRhYWZmZjI4YmNlZmQyNGE2NDIyOTVkZTE4ZjFlZjE4ZWUwYw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NjdjMTE0ODVkZjQ0ZDQ2NWJhNzY2Yzk1ZTFkNzYzMGMxY2RhZDMyNGUxYjk2
10
+ ZDlkYTI4N2JmZWUyMjA0YjBmMTM4ZDIxZjFiMTQ0YmYzMjYxY2UzODBhZWNl
11
+ ZDczMGMxNGMyNGE4ZjBjYjQxMDg0Y2U4ZjZmNjE1NmIwOWUyYjQ=
12
+ data.tar.gz: !binary |-
13
+ YjliMDU0YTY1NmUwZjQ2Y2RlNjQxNTdlMTNhNjdiNDExZTUxYzFhZTJhOWM5
14
+ ZjdjM2MzODFjOTEwMWM0Y2ZlNzIyOGIzYTk5MDk3YzBmZTBiOWMzYTEzMDRh
15
+ M2RmNTdlNjY5YmNhMGJhOTQwOTlkMTBiMjM4ZjY0M2Y5Mjc5ZjY=
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ *.html
2
+ *.css
data/README.md ADDED
File without changes
data/gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in api_roulette.gemspec
4
+ gemspec
@@ -0,0 +1,2 @@
1
+ require_relative 'socrates_scraper/scraper'
2
+ require_relative 'socrates_scraper/student'
@@ -0,0 +1,88 @@
1
+ require 'mechanize'
2
+
3
+ class Scraper
4
+ URL = "https://socrates.devbootcamp.com/login"
5
+
6
+ def initialize(email, password)
7
+ @email = email
8
+ @password = password
9
+
10
+ @agent = Mechanize.new
11
+ end
12
+
13
+ def get_user_profiles
14
+ cohort_page = get_cohort_page
15
+ user_links = get_user_links(cohort_page)
16
+
17
+ raise "Invalid username or password" if user_links.empty?
18
+
19
+ puts "Getting user profiles..."
20
+ create_user_profiles(user_links)
21
+ end
22
+
23
+ private
24
+
25
+ def get_cohort_page
26
+ @agent.get(URL)
27
+ login
28
+ @agent.get("https://socrates.devbootcamp.com/cohorts/78")
29
+ end
30
+
31
+ def login
32
+ form = @agent.page.forms.first
33
+ form.email = @email
34
+ form.password = @password
35
+ form.submit
36
+ end
37
+
38
+ def get_user_links(page)
39
+ users = page.links_with(href: /users/)
40
+
41
+ # remove top link 'My Profile'
42
+ users.shift
43
+
44
+ # deduplicate links (photo link + text link)
45
+ users.uniq { |link| link.href }
46
+ end
47
+
48
+ def create_user_profiles(user_links)
49
+ user_links.map do |link|
50
+ # reset to cohort page after every user
51
+ @agent.transact do
52
+ user_html = extract_profile_html(link)
53
+ convert_html_to_person_hash(user_html)
54
+ end
55
+ end
56
+ end
57
+
58
+ def extract_profile_html(link)
59
+ page = @agent.click(link)
60
+
61
+ # get the socrates id and convert into an
62
+ # easily identifiable html element
63
+ socrates_id = link.href[/.*\/(\d*)/, 1]
64
+ socrates_tag = "<p class='soc_id'>#{socrates_id}</p>"
65
+
66
+ Nokogiri::HTML(page.search('div.profile').to_s << socrates_tag)
67
+ end
68
+
69
+ def convert_html_to_person_hash(html)
70
+ person = {}
71
+ person[:name] = get_name(html)
72
+ person[:image] = get_image(html)
73
+ person[:facebook_id] = get_facebook_id(html)
74
+ person
75
+ end
76
+
77
+ def get_name(html)
78
+ html.css('h1 > text()').first.text.lstrip.rstrip
79
+ end
80
+
81
+ def get_image(html)
82
+ html.css('.user > img').first['src']
83
+ end
84
+
85
+ def get_facebook_id(html)
86
+ html.css('dd')[4].text[/.*\/(.*)/, 1]
87
+ end
88
+ end
@@ -0,0 +1,9 @@
1
+ class Student
2
+ attr_reader :name, :facebook_id, :image
3
+
4
+ def initialize(args)
5
+ @name = args.fetch(:name)
6
+ @facebook_id = args.fetch(:facebook_id)
7
+ @image = args.fetch(:image)
8
+ end
9
+ end
@@ -0,0 +1,25 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = 'socrates_scraper'
6
+ spec.version = '0.0.1'
7
+ spec.date = '2014-08-22'
8
+ spec.summary = "Get user info from a Socrates cohort"
9
+ spec.description = "Scrapes user info from a cohort in Socrates"
10
+ spec.authors = ["James Robinson"]
11
+ spec.email = 'james.michael.robinson@gmail.com'
12
+ spec.files = ["lib/facebook_word_counter.rb"]
13
+ spec.homepage = 'http://rubygems.org/gems/facebook_word_counter'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_runtime_dependency "nokogiri", '~> 1.6'
24
+ spec.add_runtime_dependency 'mechanize', '~> 2.7.3'
25
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: socrates_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - James Robinson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mechanize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.7.3
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.7.3
69
+ description: Scrapes user info from a cohort in Socrates
70
+ email: james.michael.robinson@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - .gitignore
76
+ - README.md
77
+ - gemfile
78
+ - lib/socrates_scraper.rb
79
+ - lib/socrates_scraper/scraper.rb
80
+ - lib/socrates_scraper/student.rb
81
+ - socrates_scraper.gemspec
82
+ homepage: http://rubygems.org/gems/facebook_word_counter
83
+ licenses:
84
+ - MIT
85
+ metadata: {}
86
+ post_install_message:
87
+ rdoc_options: []
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ! '>='
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubyforge_project:
102
+ rubygems_version: 2.1.5
103
+ signing_key:
104
+ specification_version: 4
105
+ summary: Get user info from a Socrates cohort
106
+ test_files: []