insta_scrape 0.1.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +75 -19
- data/insta_scrape.gemspec +6 -6
- data/lib/dependencies.rb +7 -0
- data/lib/init/poltergeist.rb +5 -0
- data/lib/insta_scrape/version.rb +2 -2
- data/lib/insta_scrape.rb +104 -33
- data/lib/models/instagram_post.rb +7 -0
- data/lib/models/instagram_user.rb +11 -0
- data/lib/models/instagram_user_with_posts.rb +12 -0
- metadata +42 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abbaf8a5b61d0f1d25d3480a64b16eedacd32b58
|
4
|
+
data.tar.gz: a67b9a14e265d90f47dbd566b21d64e42694fe43
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a2cf697cafb2e5a46ac4dad8453666cb50d4c2923dae7d6344b9fd35aef517798d4a45c8b0d260c7f780b1036505eafd6efaa716f3b0759892ed5b6f97fd1fd
|
7
|
+
data.tar.gz: 235eb28eee937dfa4350ef708b1117b96f49689d89a2c65f95bc3fcd3e4514f170cb491544c58507225ba71a1a7403c235bd1e72543e775119f8d2da19bb0ca0
|
data/README.md
CHANGED
@@ -5,9 +5,11 @@
|
|
5
5
|
A ruby scraper for instagram in 2016. Because the hashtag deprecation in the API is just silly.
|
6
6
|
This gem is dependent on Capybara, PhantomJS, and Poltergeist.
|
7
7
|
|
8
|
+
Using this gem you can access multiple facets of the instagram API without needing authorization, most importantly the hashtag.
|
9
|
+
|
8
10
|
## Note
|
9
11
|
|
10
|
-
The number of results may vary as this isn't an official endpoint.
|
12
|
+
The number of results may vary when using certain methods as this isn't an official endpoint.
|
11
13
|
|
12
14
|
## Todo
|
13
15
|
|
@@ -32,41 +34,95 @@ Or install it yourself as:
|
|
32
34
|
|
33
35
|
## Usage
|
34
36
|
|
35
|
-
|
37
|
+
###Available methods
|
38
|
+
|
39
|
+
As of right now, each method accepts only one argument - a hashtag or a username.
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
#scrape a hashtag for as many results as possible
|
43
|
+
InstaScrape.hashtag("test")
|
44
|
+
#scrape all user info
|
45
|
+
InstaScrape.user_info("dannyvassallo")
|
46
|
+
#scrape all user info and posts
|
47
|
+
InstaScrape.user_info_and_posts('foofighters')
|
48
|
+
#scrape just a users posts (as many as possible)
|
49
|
+
InstaScrape.user_posts('foofighters')
|
50
|
+
#scrape a users follower count
|
51
|
+
InstaScrape.user_follower_count('foofighters')
|
52
|
+
#scrape a users following count
|
53
|
+
InstaScrape.user_following_count('foofighters')
|
54
|
+
#scrape a users post count
|
55
|
+
InstaScrape.user_post_count('foofighters')
|
56
|
+
#scrape a users description
|
57
|
+
InstaScrape.user_description('foofighters')
|
58
|
+
```
|
36
59
|
|
37
|
-
|
60
|
+
####Hashtag, User Post, and Nested Posts Scrape
|
38
61
|
|
39
62
|
```ruby
|
40
|
-
#
|
41
|
-
|
42
|
-
scrape_result =
|
63
|
+
#basic use case
|
64
|
+
|
65
|
+
#scrape_result = InstaScrape.user_info_and_posts('foofighters').posts
|
66
|
+
#scrape_result = InstaScrape.user_posts('foofighters')
|
67
|
+
scrape_result = InstaScrape.hashtag("test")
|
43
68
|
scrape_result.each do |post|
|
44
|
-
puts post
|
45
|
-
puts post
|
69
|
+
puts post.image
|
70
|
+
puts post.link
|
46
71
|
end
|
47
72
|
```
|
48
73
|
|
49
74
|
Here is a `.erb` example using MaterializeCSS to render the posts as cards:
|
50
|
-
```ruby
|
51
75
|
|
76
|
+
```ruby
|
52
77
|
#in your controller or helper assuming you aren't storing the posts
|
53
|
-
@insta_scrape = InstaScrape.new
|
54
|
-
@posts = @insta_scrape.hashtag("test")
|
55
78
|
|
79
|
+
#@posts = InstaScrape.user_info_and_posts('foofighters').posts
|
80
|
+
#@posts = InstaScrape.user_posts('foofighters')
|
81
|
+
@posts = InstaScrape.hashtag("test")
|
82
|
+
```
|
83
|
+
|
84
|
+
```ruby
|
56
85
|
# your .erb file
|
57
|
-
|
58
|
-
<div class="
|
59
|
-
|
60
|
-
|
61
|
-
<div class="card
|
62
|
-
|
86
|
+
# access post attributes using dot notation
|
87
|
+
<div class="row">
|
88
|
+
<% @posts.each do |post| %>
|
89
|
+
<div class="col s12 m6 l4">
|
90
|
+
<div class="card hoverable">
|
91
|
+
<div class="card-image"><a href="<%= post.link %>"><img src="<%= post.image %>"></a></div>
|
92
|
+
<div class="card-content">
|
93
|
+
<!-- <p></p> -->
|
94
|
+
</div>
|
95
|
+
<div class="card-action center-align"><a class="btn black" href="<%= post.link %>">Open Post</a></div>
|
63
96
|
</div>
|
64
|
-
<div class="card-action center-align"><a class="btn black" href="<%= post['link'] %>">Open Post</a></div>
|
65
97
|
</div>
|
98
|
+
<% end %>
|
66
99
|
</div>
|
67
|
-
<% end %>
|
68
100
|
```
|
69
101
|
|
102
|
+
####User Info
|
103
|
+
|
104
|
+
All user information is accessible using dot notation.
|
105
|
+
If we run:
|
106
|
+
```ruby
|
107
|
+
u = InstaScrape.user_info("foofighters")
|
108
|
+
```
|
109
|
+
We then have access to the following attributes:
|
110
|
+
```ruby
|
111
|
+
u.image
|
112
|
+
#returns => "https://instagram.fewr1-2.fna.fbcdn.net/t51.2885-19/11856782_370180896524950_961003442_a.jpg"
|
113
|
+
u.post_count
|
114
|
+
#returns => "305"
|
115
|
+
u.follower_count
|
116
|
+
#returns => "1.5m"
|
117
|
+
u.following_count
|
118
|
+
#returns => "35"
|
119
|
+
```
|
120
|
+
|
121
|
+
Each of these attributes is accessible using the methods listed above as well.
|
122
|
+
|
123
|
+
Using `u = InstaScrape.user_info_and_posts('foofighters')` will give access to the `u.posts` attribute and can be iterated through.
|
124
|
+
The example above covers this.
|
125
|
+
|
70
126
|
## Development
|
71
127
|
|
72
128
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/insta_scrape.gemspec
CHANGED
@@ -22,12 +22,12 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_development_dependency "bundler", "~> 1.11"
|
23
23
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
24
|
spec.add_development_dependency "rspec", "~> 3.0"
|
25
|
-
spec.add_development_dependency "capybara", "~> 2.7.1"
|
26
|
-
spec.add_development_dependency "phantomjs", "~> 2.1.1.0"
|
27
|
-
spec.add_development_dependency "poltergeist", "~> 1.9.0"
|
25
|
+
spec.add_development_dependency "capybara", "~> 2.7.1", ">= 2.7.1"
|
26
|
+
spec.add_development_dependency "phantomjs", "~> 2.1.1.0", ">= 2.1.1.0"
|
27
|
+
spec.add_development_dependency "poltergeist", "~> 1.9.0", ">= 1.9.0"
|
28
28
|
|
29
|
-
spec.add_runtime_dependency "capybara", ">= 2.7.1"
|
30
|
-
spec.add_runtime_dependency "phantomjs", ">= 2.1.1.0"
|
31
|
-
spec.add_runtime_dependency "poltergeist", ">= 1.9.0"
|
29
|
+
spec.add_runtime_dependency "capybara", "~> 2.7.1", ">= 2.7.1"
|
30
|
+
spec.add_runtime_dependency "phantomjs", "~> 2.1.1.0", ">= 2.1.1.0"
|
31
|
+
spec.add_runtime_dependency "poltergeist", "~> 1.9.0", ">= 1.9.0"
|
32
32
|
|
33
33
|
end
|
data/lib/dependencies.rb
ADDED
data/lib/insta_scrape/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
VERSION = "
|
1
|
+
module InstaScrape
|
2
|
+
VERSION = "1.0.0"
|
3
3
|
end
|
data/lib/insta_scrape.rb
CHANGED
@@ -1,22 +1,94 @@
|
|
1
|
-
require "
|
2
|
-
require "capybara"
|
3
|
-
require "capybara/dsl"
|
4
|
-
require "capybara/poltergeist"
|
5
|
-
require "phantomjs"
|
6
|
-
|
7
|
-
Capybara.register_driver :poltergeist do |app|
|
8
|
-
Capybara::Poltergeist::Driver.new(app, :phantomjs => Phantomjs.path)
|
9
|
-
end
|
10
|
-
|
11
|
-
Capybara.default_driver = :poltergeist
|
1
|
+
require "dependencies"
|
12
2
|
|
13
|
-
|
14
|
-
|
3
|
+
module InstaScrape
|
4
|
+
extend Capybara::DSL
|
15
5
|
|
16
|
-
|
6
|
+
#get a hashtag
|
7
|
+
def self.hashtag(hashtag)
|
17
8
|
visit "https://www.instagram.com/explore/tags/#{hashtag}/"
|
18
9
|
@posts = []
|
10
|
+
scrape_posts
|
11
|
+
end
|
12
|
+
|
13
|
+
#get user info
|
14
|
+
def self.user_info(username)
|
15
|
+
scrape_user_info(username)
|
16
|
+
@user = InstaScrape::InstagramUser.new(username, @image, @post_count, @follower_count, @following_count, @description)
|
17
|
+
end
|
18
|
+
|
19
|
+
#get user info and posts
|
20
|
+
def self.user_info_and_posts(username)
|
21
|
+
scrape_user_info(username)
|
22
|
+
scrape_user_posts(username)
|
23
|
+
@user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts)
|
24
|
+
end
|
25
|
+
|
26
|
+
#get user posts only
|
27
|
+
def self.user_posts(username)
|
28
|
+
scrape_user_posts(username)
|
29
|
+
end
|
30
|
+
|
31
|
+
#get user follower count
|
32
|
+
def self.user_follower_count(username)
|
33
|
+
scrape_user_info(username)
|
34
|
+
return @follower_count
|
35
|
+
end
|
36
|
+
|
37
|
+
#get user following count
|
38
|
+
def self.user_following_count(username)
|
39
|
+
scrape_user_info(username)
|
40
|
+
return @following_count
|
41
|
+
end
|
42
|
+
|
43
|
+
#get user post count
|
44
|
+
def self.user_post_count(username)
|
45
|
+
scrape_user_info(username)
|
46
|
+
return @post_count
|
47
|
+
end
|
48
|
+
|
49
|
+
#get user description
|
50
|
+
def self.user_description(username)
|
51
|
+
scrape_user_info(username)
|
52
|
+
return @description
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
#post iteration method
|
57
|
+
def self.iterate_through_posts
|
58
|
+
all("article div div div a").each do |post|
|
59
|
+
|
60
|
+
link = post["href"]
|
61
|
+
image = post.find("img")["src"]
|
62
|
+
info = InstaScrape::InstagramPost.new(link, image)
|
63
|
+
@posts << info
|
64
|
+
|
65
|
+
end
|
19
66
|
|
67
|
+
#log
|
68
|
+
puts "POST COUNT: #{@posts.length}"
|
69
|
+
self.log_posts
|
70
|
+
#return result
|
71
|
+
return @posts
|
72
|
+
end
|
73
|
+
|
74
|
+
#user info scraper method
|
75
|
+
def self.scrape_user_info(username)
|
76
|
+
visit "https://www.instagram.com/#{username}/"
|
77
|
+
@image = page.find('article header div img')["src"]
|
78
|
+
within("header") do
|
79
|
+
post_count_html = page.find('span', :text => "posts", exact: true)['innerHTML']
|
80
|
+
@post_count = get_span_value(post_count_html)
|
81
|
+
follower_count_html = page.find('span', :text => "followers", exact: true)['innerHTML']
|
82
|
+
@follower_count = get_span_value(follower_count_html)
|
83
|
+
following_count_html = page.find('span', :text => "following", exact: true)['innerHTML']
|
84
|
+
@following_count = get_span_value(following_count_html)
|
85
|
+
description = page.find('h2').first(:xpath,".//..")['innerHTML']
|
86
|
+
@description = Nokogiri::HTML(description).text
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
#scrape posts
|
91
|
+
def self.scrape_posts
|
20
92
|
begin
|
21
93
|
page.find('a', :text => "Load more", exact: true).click
|
22
94
|
max_iteration = 10
|
@@ -34,28 +106,27 @@ class InstaScrape
|
|
34
106
|
end
|
35
107
|
end
|
36
108
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
link = post["href"]
|
43
|
-
image = post.find("img")["src"]
|
44
|
-
|
45
|
-
info = {
|
46
|
-
"link" => link,
|
47
|
-
"image" => image
|
48
|
-
}
|
49
|
-
|
50
|
-
@posts << info
|
109
|
+
def self.scrape_user_posts(username)
|
110
|
+
@posts = []
|
111
|
+
visit "https://www.instagram.com/#{username}/"
|
112
|
+
scrape_posts
|
113
|
+
end
|
51
114
|
|
115
|
+
#post logger
|
116
|
+
def self.log_posts
|
117
|
+
@posts.each do |post|
|
118
|
+
puts "\n"
|
119
|
+
puts "Image: #{post.image}\n"
|
120
|
+
puts "Link: #{post.link}\n"
|
52
121
|
end
|
122
|
+
puts "\n"
|
123
|
+
end
|
53
124
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
return
|
125
|
+
#split away span tags from user info numbers
|
126
|
+
def self.get_span_value(element)
|
127
|
+
begin_split = "\">"
|
128
|
+
end_split = "</span>"
|
129
|
+
return element[/#{begin_split}(.*?)#{end_split}/m, 1]
|
59
130
|
end
|
60
131
|
|
61
132
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class InstaScrape::InstagramUser
|
2
|
+
attr_accessor :username, :image, :post_count, :follower_count, :following_count, :description
|
3
|
+
def initialize(username, image, post_count, follower_count, following_count, description)
|
4
|
+
@username = username
|
5
|
+
@image = image
|
6
|
+
@post_count = post_count
|
7
|
+
@follower_count = follower_count
|
8
|
+
@following_count = following_count
|
9
|
+
@description = description
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class InstaScrape::InstagramUserWithPosts
|
2
|
+
attr_accessor :username, :image, :post_count, :follower_count, :following_count, :description, :posts
|
3
|
+
def initialize(username, image, post_count, follower_count, following_count, description, posts)
|
4
|
+
@username = username
|
5
|
+
@image = image
|
6
|
+
@post_count = post_count
|
7
|
+
@follower_count = follower_count
|
8
|
+
@following_count = following_count
|
9
|
+
@description = description
|
10
|
+
@posts = posts
|
11
|
+
end
|
12
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: insta_scrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- dannyvassallo
|
@@ -59,6 +59,9 @@ dependencies:
|
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: 2.7.1
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 2.7.1
|
62
65
|
type: :development
|
63
66
|
prerelease: false
|
64
67
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -66,6 +69,9 @@ dependencies:
|
|
66
69
|
- - "~>"
|
67
70
|
- !ruby/object:Gem::Version
|
68
71
|
version: 2.7.1
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 2.7.1
|
69
75
|
- !ruby/object:Gem::Dependency
|
70
76
|
name: phantomjs
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -73,6 +79,9 @@ dependencies:
|
|
73
79
|
- - "~>"
|
74
80
|
- !ruby/object:Gem::Version
|
75
81
|
version: 2.1.1.0
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: 2.1.1.0
|
76
85
|
type: :development
|
77
86
|
prerelease: false
|
78
87
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -80,6 +89,9 @@ dependencies:
|
|
80
89
|
- - "~>"
|
81
90
|
- !ruby/object:Gem::Version
|
82
91
|
version: 2.1.1.0
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: 2.1.1.0
|
83
95
|
- !ruby/object:Gem::Dependency
|
84
96
|
name: poltergeist
|
85
97
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,6 +99,9 @@ dependencies:
|
|
87
99
|
- - "~>"
|
88
100
|
- !ruby/object:Gem::Version
|
89
101
|
version: 1.9.0
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: 1.9.0
|
90
105
|
type: :development
|
91
106
|
prerelease: false
|
92
107
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -94,10 +109,16 @@ dependencies:
|
|
94
109
|
- - "~>"
|
95
110
|
- !ruby/object:Gem::Version
|
96
111
|
version: 1.9.0
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: 1.9.0
|
97
115
|
- !ruby/object:Gem::Dependency
|
98
116
|
name: capybara
|
99
117
|
requirement: !ruby/object:Gem::Requirement
|
100
118
|
requirements:
|
119
|
+
- - "~>"
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: 2.7.1
|
101
122
|
- - ">="
|
102
123
|
- !ruby/object:Gem::Version
|
103
124
|
version: 2.7.1
|
@@ -105,6 +126,9 @@ dependencies:
|
|
105
126
|
prerelease: false
|
106
127
|
version_requirements: !ruby/object:Gem::Requirement
|
107
128
|
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 2.7.1
|
108
132
|
- - ">="
|
109
133
|
- !ruby/object:Gem::Version
|
110
134
|
version: 2.7.1
|
@@ -112,6 +136,9 @@ dependencies:
|
|
112
136
|
name: phantomjs
|
113
137
|
requirement: !ruby/object:Gem::Requirement
|
114
138
|
requirements:
|
139
|
+
- - "~>"
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 2.1.1.0
|
115
142
|
- - ">="
|
116
143
|
- !ruby/object:Gem::Version
|
117
144
|
version: 2.1.1.0
|
@@ -119,6 +146,9 @@ dependencies:
|
|
119
146
|
prerelease: false
|
120
147
|
version_requirements: !ruby/object:Gem::Requirement
|
121
148
|
requirements:
|
149
|
+
- - "~>"
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: 2.1.1.0
|
122
152
|
- - ">="
|
123
153
|
- !ruby/object:Gem::Version
|
124
154
|
version: 2.1.1.0
|
@@ -126,6 +156,9 @@ dependencies:
|
|
126
156
|
name: poltergeist
|
127
157
|
requirement: !ruby/object:Gem::Requirement
|
128
158
|
requirements:
|
159
|
+
- - "~>"
|
160
|
+
- !ruby/object:Gem::Version
|
161
|
+
version: 1.9.0
|
129
162
|
- - ">="
|
130
163
|
- !ruby/object:Gem::Version
|
131
164
|
version: 1.9.0
|
@@ -133,6 +166,9 @@ dependencies:
|
|
133
166
|
prerelease: false
|
134
167
|
version_requirements: !ruby/object:Gem::Requirement
|
135
168
|
requirements:
|
169
|
+
- - "~>"
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: 1.9.0
|
136
172
|
- - ">="
|
137
173
|
- !ruby/object:Gem::Version
|
138
174
|
version: 1.9.0
|
@@ -155,8 +191,13 @@ files:
|
|
155
191
|
- bin/console
|
156
192
|
- bin/setup
|
157
193
|
- insta_scrape.gemspec
|
194
|
+
- lib/dependencies.rb
|
195
|
+
- lib/init/poltergeist.rb
|
158
196
|
- lib/insta_scrape.rb
|
159
197
|
- lib/insta_scrape/version.rb
|
198
|
+
- lib/models/instagram_post.rb
|
199
|
+
- lib/models/instagram_user.rb
|
200
|
+
- lib/models/instagram_user_with_posts.rb
|
160
201
|
homepage: https://github.com/dannyvassallo/insta_scrape
|
161
202
|
licenses:
|
162
203
|
- MIT
|