socrates_scraper 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YmFmZDIxNDVmOGFhYTI5ODY1NDQ5YmRmMWMwNDczZjNjNzU0NTkzYQ==
5
+ data.tar.gz: !binary |-
6
+ NmFmYTRhYWZmZjI4YmNlZmQyNGE2NDIyOTVkZTE4ZjFlZjE4ZWUwYw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ NjdjMTE0ODVkZjQ0ZDQ2NWJhNzY2Yzk1ZTFkNzYzMGMxY2RhZDMyNGUxYjk2
10
+ ZDlkYTI4N2JmZWUyMjA0YjBmMTM4ZDIxZjFiMTQ0YmYzMjYxY2UzODBhZWNl
11
+ ZDczMGMxNGMyNGE4ZjBjYjQxMDg0Y2U4ZjZmNjE1NmIwOWUyYjQ=
12
+ data.tar.gz: !binary |-
13
+ YjliMDU0YTY1NmUwZjQ2Y2RlNjQxNTdlMTNhNjdiNDExZTUxYzFhZTJhOWM5
14
+ ZjdjM2MzODFjOTEwMWM0Y2ZlNzIyOGIzYTk5MDk3YzBmZTBiOWMzYTEzMDRh
15
+ M2RmNTdlNjY5YmNhMGJhOTQwOTlkMTBiMjM4ZjY0M2Y5Mjc5ZjY=
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ *.html
2
+ *.css
data/README.md ADDED
File without changes
data/gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in api_roulette.gemspec
4
+ gemspec
@@ -0,0 +1,2 @@
1
+ require_relative 'socrates_scraper/scraper'
2
+ require_relative 'socrates_scraper/student'
@@ -0,0 +1,88 @@
1
+ require 'mechanize'
2
+
3
+ class Scraper
4
+ URL = "https://socrates.devbootcamp.com/login"
5
+
6
+ def initialize(email, password)
7
+ @email = email
8
+ @password = password
9
+
10
+ @agent = Mechanize.new
11
+ end
12
+
13
+ def get_user_profiles
14
+ cohort_page = get_cohort_page
15
+ user_links = get_user_links(cohort_page)
16
+
17
+ raise "Invalid username or password" if user_links.empty?
18
+
19
+ puts "Getting user profiles..."
20
+ create_user_profiles(user_links)
21
+ end
22
+
23
+ private
24
+
25
+ def get_cohort_page
26
+ @agent.get(URL)
27
+ login
28
+ @agent.get("https://socrates.devbootcamp.com/cohorts/78")
29
+ end
30
+
31
+ def login
32
+ form = @agent.page.forms.first
33
+ form.email = @email
34
+ form.password = @password
35
+ form.submit
36
+ end
37
+
38
+ def get_user_links(page)
39
+ users = page.links_with(href: /users/)
40
+
41
+ # remove top link 'My Profile'
42
+ users.shift
43
+
44
+ # deduplicate links (photo link + text link)
45
+ users.uniq { |link| link.href }
46
+ end
47
+
48
+ def create_user_profiles(user_links)
49
+ user_links.map do |link|
50
+ # reset to cohort page after every user
51
+ @agent.transact do
52
+ user_html = extract_profile_html(link)
53
+ convert_html_to_person_hash(user_html)
54
+ end
55
+ end
56
+ end
57
+
58
+ def extract_profile_html(link)
59
+ page = @agent.click(link)
60
+
61
+ # get the socrates id and convert into an
62
+ # easily identifiable html element
63
+ socrates_id = link.href[/.*\/(\d*)/, 1]
64
+ socrates_tag = "<p class='soc_id'>#{socrates_id}</p>"
65
+
66
+ Nokogiri::HTML(page.search('div.profile').to_s << socrates_tag)
67
+ end
68
+
69
+ def convert_html_to_person_hash(html)
70
+ person = {}
71
+ person[:name] = get_name(html)
72
+ person[:image] = get_image(html)
73
+ person[:facebook_id] = get_facebook_id(html)
74
+ person
75
+ end
76
+
77
+ def get_name(html)
78
+ html.css('h1 > text()').first.text.lstrip.rstrip
79
+ end
80
+
81
+ def get_image(html)
82
+ html.css('.user > img').first['src']
83
+ end
84
+
85
+ def get_facebook_id(html)
86
+ html.css('dd')[4].text[/.*\/(.*)/, 1]
87
+ end
88
+ end
@@ -0,0 +1,9 @@
1
+ class Student
2
+ attr_reader :name, :facebook_id, :image
3
+
4
+ def initialize(args)
5
+ @name = args.fetch(:name)
6
+ @facebook_id = args.fetch(:facebook_id)
7
+ @image = args.fetch(:image)
8
+ end
9
+ end
@@ -0,0 +1,25 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ Gem::Specification.new do |spec|
5
+ spec.name = 'socrates_scraper'
6
+ spec.version = '0.0.1'
7
+ spec.date = '2014-08-22'
8
+ spec.summary = "Get user info from a Socrates cohort"
9
+ spec.description = "Scrapes user info from a cohort in Socrates"
10
+ spec.authors = ["James Robinson"]
11
+ spec.email = 'james.michael.robinson@gmail.com'
12
+ spec.files = ["lib/facebook_word_counter.rb"]
13
+ spec.homepage = 'http://rubygems.org/gems/facebook_word_counter'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_runtime_dependency "nokogiri", '~> 1.6'
24
+ spec.add_runtime_dependency 'mechanize', '~> 2.7.3'
25
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: socrates_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - James Robinson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '1.6'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '1.6'
55
+ - !ruby/object:Gem::Dependency
56
+ name: mechanize
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 2.7.3
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 2.7.3
69
+ description: Scrapes user info from a cohort in Socrates
70
+ email: james.michael.robinson@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - .gitignore
76
+ - README.md
77
+ - gemfile
78
+ - lib/socrates_scraper.rb
79
+ - lib/socrates_scraper/scraper.rb
80
+ - lib/socrates_scraper/student.rb
81
+ - socrates_scraper.gemspec
82
+ homepage: http://rubygems.org/gems/facebook_word_counter
83
+ licenses:
84
+ - MIT
85
+ metadata: {}
86
+ post_install_message:
87
+ rdoc_options: []
88
+ require_paths:
89
+ - lib
90
+ required_ruby_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ! '>='
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ required_rubygems_version: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - ! '>='
98
+ - !ruby/object:Gem::Version
99
+ version: '0'
100
+ requirements: []
101
+ rubyforge_project:
102
+ rubygems_version: 2.1.5
103
+ signing_key:
104
+ specification_version: 4
105
+ summary: Get user info from a Socrates cohort
106
+ test_files: []