socrates_scraper 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +2 -0
- data/README.md +0 -0
- data/gemfile +4 -0
- data/lib/socrates_scraper.rb +2 -0
- data/lib/socrates_scraper/scraper.rb +88 -0
- data/lib/socrates_scraper/student.rb +9 -0
- data/socrates_scraper.gemspec +25 -0
- metadata +106 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
---
|
|
2
|
+
!binary "U0hBMQ==":
|
|
3
|
+
metadata.gz: !binary |-
|
|
4
|
+
YmFmZDIxNDVmOGFhYTI5ODY1NDQ5YmRmMWMwNDczZjNjNzU0NTkzYQ==
|
|
5
|
+
data.tar.gz: !binary |-
|
|
6
|
+
NmFmYTRhYWZmZjI4YmNlZmQyNGE2NDIyOTVkZTE4ZjFlZjE4ZWUwYw==
|
|
7
|
+
SHA512:
|
|
8
|
+
metadata.gz: !binary |-
|
|
9
|
+
NjdjMTE0ODVkZjQ0ZDQ2NWJhNzY2Yzk1ZTFkNzYzMGMxY2RhZDMyNGUxYjk2
|
|
10
|
+
ZDlkYTI4N2JmZWUyMjA0YjBmMTM4ZDIxZjFiMTQ0YmYzMjYxY2UzODBhZWNl
|
|
11
|
+
ZDczMGMxNGMyNGE4ZjBjYjQxMDg0Y2U4ZjZmNjE1NmIwOWUyYjQ=
|
|
12
|
+
data.tar.gz: !binary |-
|
|
13
|
+
YjliMDU0YTY1NmUwZjQ2Y2RlNjQxNTdlMTNhNjdiNDExZTUxYzFhZTJhOWM5
|
|
14
|
+
ZjdjM2MzODFjOTEwMWM0Y2ZlNzIyOGIzYTk5MDk3YzBmZTBiOWMzYTEzMDRh
|
|
15
|
+
M2RmNTdlNjY5YmNhMGJhOTQwOTlkMTBiMjM4ZjY0M2Y5Mjc5ZjY=
|
data/.gitignore
ADDED
data/README.md
ADDED
|
File without changes
|
data/gemfile
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
require 'mechanize'
|
|
2
|
+
|
|
3
|
+
class Scraper
|
|
4
|
+
URL = "https://socrates.devbootcamp.com/login"
|
|
5
|
+
|
|
6
|
+
def initialize(email, password)
|
|
7
|
+
@email = email
|
|
8
|
+
@password = password
|
|
9
|
+
|
|
10
|
+
@agent = Mechanize.new
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def get_user_profiles
|
|
14
|
+
cohort_page = get_cohort_page
|
|
15
|
+
user_links = get_user_links(cohort_page)
|
|
16
|
+
|
|
17
|
+
raise "Invalid username or password" if user_links.empty?
|
|
18
|
+
|
|
19
|
+
puts "Getting user profiles..."
|
|
20
|
+
create_user_profiles(user_links)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def get_cohort_page
|
|
26
|
+
@agent.get(URL)
|
|
27
|
+
login
|
|
28
|
+
@agent.get("https://socrates.devbootcamp.com/cohorts/78")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def login
|
|
32
|
+
form = @agent.page.forms.first
|
|
33
|
+
form.email = @email
|
|
34
|
+
form.password = @password
|
|
35
|
+
form.submit
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def get_user_links(page)
|
|
39
|
+
users = page.links_with(href: /users/)
|
|
40
|
+
|
|
41
|
+
# remove top link 'My Profile'
|
|
42
|
+
users.shift
|
|
43
|
+
|
|
44
|
+
# deduplicate links (photo link + text link)
|
|
45
|
+
users.uniq { |link| link.href }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def create_user_profiles(user_links)
|
|
49
|
+
user_links.map do |link|
|
|
50
|
+
# reset to cohort page after every user
|
|
51
|
+
@agent.transact do
|
|
52
|
+
user_html = extract_profile_html(link)
|
|
53
|
+
convert_html_to_person_hash(user_html)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def extract_profile_html(link)
|
|
59
|
+
page = @agent.click(link)
|
|
60
|
+
|
|
61
|
+
# get the socrates id and convert into an
|
|
62
|
+
# easily identifiable html element
|
|
63
|
+
socrates_id = link.href[/.*\/(\d*)/, 1]
|
|
64
|
+
socrates_tag = "<p class='soc_id'>#{socrates_id}</p>"
|
|
65
|
+
|
|
66
|
+
Nokogiri::HTML(page.search('div.profile').to_s << socrates_tag)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def convert_html_to_person_hash(html)
|
|
70
|
+
person = {}
|
|
71
|
+
person[:name] = get_name(html)
|
|
72
|
+
person[:image] = get_image(html)
|
|
73
|
+
person[:facebook_id] = get_facebook_id(html)
|
|
74
|
+
person
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def get_name(html)
|
|
78
|
+
html.css('h1 > text()').first.text.lstrip.rstrip
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def get_image(html)
|
|
82
|
+
html.css('.user > img').first['src']
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def get_facebook_id(html)
|
|
86
|
+
html.css('dd')[4].text[/.*\/(.*)/, 1]
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
2
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
|
+
|
|
4
|
+
Gem::Specification.new do |spec|
|
|
5
|
+
spec.name = 'socrates_scraper'
|
|
6
|
+
spec.version = '0.0.1'
|
|
7
|
+
spec.date = '2014-08-22'
|
|
8
|
+
spec.summary = "Get user info from a Socrates cohort"
|
|
9
|
+
spec.description = "Scrapes user info from a cohort in Socrates"
|
|
10
|
+
spec.authors = ["James Robinson"]
|
|
11
|
+
spec.email = 'james.michael.robinson@gmail.com'
|
|
12
|
+
spec.files = ["lib/facebook_word_counter.rb"]
|
|
13
|
+
spec.homepage = 'http://rubygems.org/gems/facebook_word_counter'
|
|
14
|
+
spec.license = 'MIT'
|
|
15
|
+
|
|
16
|
+
spec.files = `git ls-files`.split($/)
|
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
19
|
+
spec.require_paths = ["lib"]
|
|
20
|
+
|
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
|
22
|
+
spec.add_development_dependency "rake"
|
|
23
|
+
spec.add_runtime_dependency "nokogiri", '~> 1.6'
|
|
24
|
+
spec.add_runtime_dependency 'mechanize', '~> 2.7.3'
|
|
25
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: socrates_scraper
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- James Robinson
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2014-08-22 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: bundler
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ~>
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.3'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ~>
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.3'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ! '>='
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ! '>='
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: nokogiri
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - ~>
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '1.6'
|
|
48
|
+
type: :runtime
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - ~>
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '1.6'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: mechanize
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ~>
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: 2.7.3
|
|
62
|
+
type: :runtime
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ~>
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: 2.7.3
|
|
69
|
+
description: Scrapes user info from a cohort in Socrates
|
|
70
|
+
email: james.michael.robinson@gmail.com
|
|
71
|
+
executables: []
|
|
72
|
+
extensions: []
|
|
73
|
+
extra_rdoc_files: []
|
|
74
|
+
files:
|
|
75
|
+
- .gitignore
|
|
76
|
+
- README.md
|
|
77
|
+
- gemfile
|
|
78
|
+
- lib/socrates_scraper.rb
|
|
79
|
+
- lib/socrates_scraper/scraper.rb
|
|
80
|
+
- lib/socrates_scraper/student.rb
|
|
81
|
+
- socrates_scraper.gemspec
|
|
82
|
+
homepage: http://rubygems.org/gems/facebook_word_counter
|
|
83
|
+
licenses:
|
|
84
|
+
- MIT
|
|
85
|
+
metadata: {}
|
|
86
|
+
post_install_message:
|
|
87
|
+
rdoc_options: []
|
|
88
|
+
require_paths:
|
|
89
|
+
- lib
|
|
90
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
91
|
+
requirements:
|
|
92
|
+
- - ! '>='
|
|
93
|
+
- !ruby/object:Gem::Version
|
|
94
|
+
version: '0'
|
|
95
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
96
|
+
requirements:
|
|
97
|
+
- - ! '>='
|
|
98
|
+
- !ruby/object:Gem::Version
|
|
99
|
+
version: '0'
|
|
100
|
+
requirements: []
|
|
101
|
+
rubyforge_project:
|
|
102
|
+
rubygems_version: 2.1.5
|
|
103
|
+
signing_key:
|
|
104
|
+
specification_version: 4
|
|
105
|
+
summary: Get user info from a Socrates cohort
|
|
106
|
+
test_files: []
|