insta_scrape 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +75 -19
- data/insta_scrape.gemspec +6 -6
- data/lib/dependencies.rb +7 -0
- data/lib/init/poltergeist.rb +5 -0
- data/lib/insta_scrape/version.rb +2 -2
- data/lib/insta_scrape.rb +104 -33
- data/lib/models/instagram_post.rb +7 -0
- data/lib/models/instagram_user.rb +11 -0
- data/lib/models/instagram_user_with_posts.rb +12 -0
- metadata +42 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abbaf8a5b61d0f1d25d3480a64b16eedacd32b58
|
4
|
+
data.tar.gz: a67b9a14e265d90f47dbd566b21d64e42694fe43
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a2cf697cafb2e5a46ac4dad8453666cb50d4c2923dae7d6344b9fd35aef517798d4a45c8b0d260c7f780b1036505eafd6efaa716f3b0759892ed5b6f97fd1fd
|
7
|
+
data.tar.gz: 235eb28eee937dfa4350ef708b1117b96f49689d89a2c65f95bc3fcd3e4514f170cb491544c58507225ba71a1a7403c235bd1e72543e775119f8d2da19bb0ca0
|
data/README.md
CHANGED
@@ -5,9 +5,11 @@
|
|
5
5
|
A ruby scraper for instagram in 2016. Because the hashtag deprecation in the API is just silly.
|
6
6
|
This gem is dependent on Capybara, PhantomJS, and Poltergeist.
|
7
7
|
|
8
|
+
Using this gem you can access multiple facets of the instagram API without needing authorization, most importantly the hashtag.
|
9
|
+
|
8
10
|
## Note
|
9
11
|
|
10
|
-
The number of results may vary as this isn't an official endpoint.
|
12
|
+
The number of results may vary when using certain methods as this isn't an official endpoint.
|
11
13
|
|
12
14
|
## Todo
|
13
15
|
|
@@ -32,41 +34,95 @@ Or install it yourself as:
|
|
32
34
|
|
33
35
|
## Usage
|
34
36
|
|
35
|
-
|
37
|
+
###Available methods
|
38
|
+
|
39
|
+
As of right now, each method accepts only one argument - a hashtag or a username.
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
#scrape a hashtag for as many results as possible
|
43
|
+
InstaScrape.hashtag("test")
|
44
|
+
#scrape all user info
|
45
|
+
InstaScrape.user_info("dannyvassallo")
|
46
|
+
#scrape all user info and posts
|
47
|
+
InstaScrape.user_info_and_posts('foofighters')
|
48
|
+
#scrape just a users posts (as many as possible)
|
49
|
+
InstaScrape.user_posts('foofighters')
|
50
|
+
#scrape a users follower count
|
51
|
+
InstaScrape.user_follower_count('foofighters')
|
52
|
+
#scrape a users following count
|
53
|
+
InstaScrape.user_following_count('foofighters')
|
54
|
+
#scrape a users post count
|
55
|
+
InstaScrape.user_post_count('foofighters')
|
56
|
+
#scrape a users description
|
57
|
+
InstaScrape.user_description('foofighters')
|
58
|
+
```
|
36
59
|
|
37
|
-
|
60
|
+
####Hashtag, User Post, and Nested Posts Scrape
|
38
61
|
|
39
62
|
```ruby
|
40
|
-
#
|
41
|
-
|
42
|
-
scrape_result =
|
63
|
+
#basic use case
|
64
|
+
|
65
|
+
#scrape_result = InstaScrape.user_info_and_posts('foofighters').posts
|
66
|
+
#scrape_result = InstaScrape.user_posts('foofighters')
|
67
|
+
scrape_result = InstaScrape.hashtag("test")
|
43
68
|
scrape_result.each do |post|
|
44
|
-
puts post
|
45
|
-
puts post
|
69
|
+
puts post.image
|
70
|
+
puts post.link
|
46
71
|
end
|
47
72
|
```
|
48
73
|
|
49
74
|
Here is a `.erb` example using MaterializeCSS to render the posts as cards:
|
50
|
-
```ruby
|
51
75
|
|
76
|
+
```ruby
|
52
77
|
#in your controller or helper assuming you aren't storing the posts
|
53
|
-
@insta_scrape = InstaScrape.new
|
54
|
-
@posts = @insta_scrape.hashtag("test")
|
55
78
|
|
79
|
+
#@posts = InstaScrape.user_info_and_posts('foofighters').posts
|
80
|
+
#@posts = InstaScrape.user_posts('foofighters')
|
81
|
+
@posts = InstaScrape.hashtag("test")
|
82
|
+
```
|
83
|
+
|
84
|
+
```ruby
|
56
85
|
# your .erb file
|
57
|
-
|
58
|
-
<div class="
|
59
|
-
|
60
|
-
|
61
|
-
<div class="card
|
62
|
-
|
86
|
+
# access post attributes using dot notation
|
87
|
+
<div class="row">
|
88
|
+
<% @posts.each do |post| %>
|
89
|
+
<div class="col s12 m6 l4">
|
90
|
+
<div class="card hoverable">
|
91
|
+
<div class="card-image"><a href="<%= post.link %>"><img src="<%= post.image %>"></a></div>
|
92
|
+
<div class="card-content">
|
93
|
+
<!-- <p></p> -->
|
94
|
+
</div>
|
95
|
+
<div class="card-action center-align"><a class="btn black" href="<%= post.link %>">Open Post</a></div>
|
63
96
|
</div>
|
64
|
-
<div class="card-action center-align"><a class="btn black" href="<%= post['link'] %>">Open Post</a></div>
|
65
97
|
</div>
|
98
|
+
<% end %>
|
66
99
|
</div>
|
67
|
-
<% end %>
|
68
100
|
```
|
69
101
|
|
102
|
+
####User Info
|
103
|
+
|
104
|
+
All user information is accessible using dot notation.
|
105
|
+
If we run:
|
106
|
+
```ruby
|
107
|
+
u = InstaScrape.user_info("foofighters")
|
108
|
+
```
|
109
|
+
We then have access to the following attributes:
|
110
|
+
```ruby
|
111
|
+
u.image
|
112
|
+
#returns => "https://instagram.fewr1-2.fna.fbcdn.net/t51.2885-19/11856782_370180896524950_961003442_a.jpg"
|
113
|
+
u.post_count
|
114
|
+
#returns => "305"
|
115
|
+
u.follower_count
|
116
|
+
#returns => "1.5m"
|
117
|
+
u.following_count
|
118
|
+
#returns => "35"
|
119
|
+
```
|
120
|
+
|
121
|
+
Each of these attributes is accessible using the methods listed above as well.
|
122
|
+
|
123
|
+
Using `u = InstaScrape.user_info_and_posts('foofighters')` will give access to the `u.posts` attribute and can be iterated through.
|
124
|
+
The example above covers this.
|
125
|
+
|
70
126
|
## Development
|
71
127
|
|
72
128
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/insta_scrape.gemspec
CHANGED
@@ -22,12 +22,12 @@ Gem::Specification.new do |spec|
|
|
22
22
|
spec.add_development_dependency "bundler", "~> 1.11"
|
23
23
|
spec.add_development_dependency "rake", "~> 10.0"
|
24
24
|
spec.add_development_dependency "rspec", "~> 3.0"
|
25
|
-
spec.add_development_dependency "capybara", "~> 2.7.1"
|
26
|
-
spec.add_development_dependency "phantomjs", "~> 2.1.1.0"
|
27
|
-
spec.add_development_dependency "poltergeist", "~> 1.9.0"
|
25
|
+
spec.add_development_dependency "capybara", "~> 2.7.1", ">= 2.7.1"
|
26
|
+
spec.add_development_dependency "phantomjs", "~> 2.1.1.0", ">= 2.1.1.0"
|
27
|
+
spec.add_development_dependency "poltergeist", "~> 1.9.0", ">= 1.9.0"
|
28
28
|
|
29
|
-
spec.add_runtime_dependency "capybara", ">= 2.7.1"
|
30
|
-
spec.add_runtime_dependency "phantomjs", ">= 2.1.1.0"
|
31
|
-
spec.add_runtime_dependency "poltergeist", ">= 1.9.0"
|
29
|
+
spec.add_runtime_dependency "capybara", "~> 2.7.1", ">= 2.7.1"
|
30
|
+
spec.add_runtime_dependency "phantomjs", "~> 2.1.1.0", ">= 2.1.1.0"
|
31
|
+
spec.add_runtime_dependency "poltergeist", "~> 1.9.0", ">= 1.9.0"
|
32
32
|
|
33
33
|
end
|
data/lib/dependencies.rb
ADDED
data/lib/insta_scrape/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
VERSION = "
|
1
|
+
module InstaScrape
|
2
|
+
VERSION = "1.0.0"
|
3
3
|
end
|
data/lib/insta_scrape.rb
CHANGED
@@ -1,22 +1,94 @@
|
|
1
|
-
require "
|
2
|
-
require "capybara"
|
3
|
-
require "capybara/dsl"
|
4
|
-
require "capybara/poltergeist"
|
5
|
-
require "phantomjs"
|
6
|
-
|
7
|
-
Capybara.register_driver :poltergeist do |app|
|
8
|
-
Capybara::Poltergeist::Driver.new(app, :phantomjs => Phantomjs.path)
|
9
|
-
end
|
10
|
-
|
11
|
-
Capybara.default_driver = :poltergeist
|
1
|
+
require "dependencies"
|
12
2
|
|
13
|
-
|
14
|
-
|
3
|
+
module InstaScrape
|
4
|
+
extend Capybara::DSL
|
15
5
|
|
16
|
-
|
6
|
+
#get a hashtag
|
7
|
+
def self.hashtag(hashtag)
|
17
8
|
visit "https://www.instagram.com/explore/tags/#{hashtag}/"
|
18
9
|
@posts = []
|
10
|
+
scrape_posts
|
11
|
+
end
|
12
|
+
|
13
|
+
#get user info
|
14
|
+
def self.user_info(username)
|
15
|
+
scrape_user_info(username)
|
16
|
+
@user = InstaScrape::InstagramUser.new(username, @image, @post_count, @follower_count, @following_count, @description)
|
17
|
+
end
|
18
|
+
|
19
|
+
#get user info and posts
|
20
|
+
def self.user_info_and_posts(username)
|
21
|
+
scrape_user_info(username)
|
22
|
+
scrape_user_posts(username)
|
23
|
+
@user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts)
|
24
|
+
end
|
25
|
+
|
26
|
+
#get user posts only
|
27
|
+
def self.user_posts(username)
|
28
|
+
scrape_user_posts(username)
|
29
|
+
end
|
30
|
+
|
31
|
+
#get user follower count
|
32
|
+
def self.user_follower_count(username)
|
33
|
+
scrape_user_info(username)
|
34
|
+
return @follower_count
|
35
|
+
end
|
36
|
+
|
37
|
+
#get user following count
|
38
|
+
def self.user_following_count(username)
|
39
|
+
scrape_user_info(username)
|
40
|
+
return @following_count
|
41
|
+
end
|
42
|
+
|
43
|
+
#get user post count
|
44
|
+
def self.user_post_count(username)
|
45
|
+
scrape_user_info(username)
|
46
|
+
return @post_count
|
47
|
+
end
|
48
|
+
|
49
|
+
#get user description
|
50
|
+
def self.user_description(username)
|
51
|
+
scrape_user_info(username)
|
52
|
+
return @description
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
#post iteration method
|
57
|
+
def self.iterate_through_posts
|
58
|
+
all("article div div div a").each do |post|
|
59
|
+
|
60
|
+
link = post["href"]
|
61
|
+
image = post.find("img")["src"]
|
62
|
+
info = InstaScrape::InstagramPost.new(link, image)
|
63
|
+
@posts << info
|
64
|
+
|
65
|
+
end
|
19
66
|
|
67
|
+
#log
|
68
|
+
puts "POST COUNT: #{@posts.length}"
|
69
|
+
self.log_posts
|
70
|
+
#return result
|
71
|
+
return @posts
|
72
|
+
end
|
73
|
+
|
74
|
+
#user info scraper method
|
75
|
+
def self.scrape_user_info(username)
|
76
|
+
visit "https://www.instagram.com/#{username}/"
|
77
|
+
@image = page.find('article header div img')["src"]
|
78
|
+
within("header") do
|
79
|
+
post_count_html = page.find('span', :text => "posts", exact: true)['innerHTML']
|
80
|
+
@post_count = get_span_value(post_count_html)
|
81
|
+
follower_count_html = page.find('span', :text => "followers", exact: true)['innerHTML']
|
82
|
+
@follower_count = get_span_value(follower_count_html)
|
83
|
+
following_count_html = page.find('span', :text => "following", exact: true)['innerHTML']
|
84
|
+
@following_count = get_span_value(following_count_html)
|
85
|
+
description = page.find('h2').first(:xpath,".//..")['innerHTML']
|
86
|
+
@description = Nokogiri::HTML(description).text
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
#scrape posts
|
91
|
+
def self.scrape_posts
|
20
92
|
begin
|
21
93
|
page.find('a', :text => "Load more", exact: true).click
|
22
94
|
max_iteration = 10
|
@@ -34,28 +106,27 @@ class InstaScrape
|
|
34
106
|
end
|
35
107
|
end
|
36
108
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
link = post["href"]
|
43
|
-
image = post.find("img")["src"]
|
44
|
-
|
45
|
-
info = {
|
46
|
-
"link" => link,
|
47
|
-
"image" => image
|
48
|
-
}
|
49
|
-
|
50
|
-
@posts << info
|
109
|
+
def self.scrape_user_posts(username)
|
110
|
+
@posts = []
|
111
|
+
visit "https://www.instagram.com/#{username}/"
|
112
|
+
scrape_posts
|
113
|
+
end
|
51
114
|
|
115
|
+
#post logger
|
116
|
+
def self.log_posts
|
117
|
+
@posts.each do |post|
|
118
|
+
puts "\n"
|
119
|
+
puts "Image: #{post.image}\n"
|
120
|
+
puts "Link: #{post.link}\n"
|
52
121
|
end
|
122
|
+
puts "\n"
|
123
|
+
end
|
53
124
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
return
|
125
|
+
#split away span tags from user info numbers
|
126
|
+
def self.get_span_value(element)
|
127
|
+
begin_split = "\">"
|
128
|
+
end_split = "</span>"
|
129
|
+
return element[/#{begin_split}(.*?)#{end_split}/m, 1]
|
59
130
|
end
|
60
131
|
|
61
132
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
class InstaScrape::InstagramUser
|
2
|
+
attr_accessor :username, :image, :post_count, :follower_count, :following_count, :description
|
3
|
+
def initialize(username, image, post_count, follower_count, following_count, description)
|
4
|
+
@username = username
|
5
|
+
@image = image
|
6
|
+
@post_count = post_count
|
7
|
+
@follower_count = follower_count
|
8
|
+
@following_count = following_count
|
9
|
+
@description = description
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class InstaScrape::InstagramUserWithPosts
|
2
|
+
attr_accessor :username, :image, :post_count, :follower_count, :following_count, :description, :posts
|
3
|
+
def initialize(username, image, post_count, follower_count, following_count, description, posts)
|
4
|
+
@username = username
|
5
|
+
@image = image
|
6
|
+
@post_count = post_count
|
7
|
+
@follower_count = follower_count
|
8
|
+
@following_count = following_count
|
9
|
+
@description = description
|
10
|
+
@posts = posts
|
11
|
+
end
|
12
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: insta_scrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- dannyvassallo
|
@@ -59,6 +59,9 @@ dependencies:
|
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: 2.7.1
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 2.7.1
|
62
65
|
type: :development
|
63
66
|
prerelease: false
|
64
67
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -66,6 +69,9 @@ dependencies:
|
|
66
69
|
- - "~>"
|
67
70
|
- !ruby/object:Gem::Version
|
68
71
|
version: 2.7.1
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: 2.7.1
|
69
75
|
- !ruby/object:Gem::Dependency
|
70
76
|
name: phantomjs
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
@@ -73,6 +79,9 @@ dependencies:
|
|
73
79
|
- - "~>"
|
74
80
|
- !ruby/object:Gem::Version
|
75
81
|
version: 2.1.1.0
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: 2.1.1.0
|
76
85
|
type: :development
|
77
86
|
prerelease: false
|
78
87
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -80,6 +89,9 @@ dependencies:
|
|
80
89
|
- - "~>"
|
81
90
|
- !ruby/object:Gem::Version
|
82
91
|
version: 2.1.1.0
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: 2.1.1.0
|
83
95
|
- !ruby/object:Gem::Dependency
|
84
96
|
name: poltergeist
|
85
97
|
requirement: !ruby/object:Gem::Requirement
|
@@ -87,6 +99,9 @@ dependencies:
|
|
87
99
|
- - "~>"
|
88
100
|
- !ruby/object:Gem::Version
|
89
101
|
version: 1.9.0
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: 1.9.0
|
90
105
|
type: :development
|
91
106
|
prerelease: false
|
92
107
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -94,10 +109,16 @@ dependencies:
|
|
94
109
|
- - "~>"
|
95
110
|
- !ruby/object:Gem::Version
|
96
111
|
version: 1.9.0
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: 1.9.0
|
97
115
|
- !ruby/object:Gem::Dependency
|
98
116
|
name: capybara
|
99
117
|
requirement: !ruby/object:Gem::Requirement
|
100
118
|
requirements:
|
119
|
+
- - "~>"
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: 2.7.1
|
101
122
|
- - ">="
|
102
123
|
- !ruby/object:Gem::Version
|
103
124
|
version: 2.7.1
|
@@ -105,6 +126,9 @@ dependencies:
|
|
105
126
|
prerelease: false
|
106
127
|
version_requirements: !ruby/object:Gem::Requirement
|
107
128
|
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 2.7.1
|
108
132
|
- - ">="
|
109
133
|
- !ruby/object:Gem::Version
|
110
134
|
version: 2.7.1
|
@@ -112,6 +136,9 @@ dependencies:
|
|
112
136
|
name: phantomjs
|
113
137
|
requirement: !ruby/object:Gem::Requirement
|
114
138
|
requirements:
|
139
|
+
- - "~>"
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 2.1.1.0
|
115
142
|
- - ">="
|
116
143
|
- !ruby/object:Gem::Version
|
117
144
|
version: 2.1.1.0
|
@@ -119,6 +146,9 @@ dependencies:
|
|
119
146
|
prerelease: false
|
120
147
|
version_requirements: !ruby/object:Gem::Requirement
|
121
148
|
requirements:
|
149
|
+
- - "~>"
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: 2.1.1.0
|
122
152
|
- - ">="
|
123
153
|
- !ruby/object:Gem::Version
|
124
154
|
version: 2.1.1.0
|
@@ -126,6 +156,9 @@ dependencies:
|
|
126
156
|
name: poltergeist
|
127
157
|
requirement: !ruby/object:Gem::Requirement
|
128
158
|
requirements:
|
159
|
+
- - "~>"
|
160
|
+
- !ruby/object:Gem::Version
|
161
|
+
version: 1.9.0
|
129
162
|
- - ">="
|
130
163
|
- !ruby/object:Gem::Version
|
131
164
|
version: 1.9.0
|
@@ -133,6 +166,9 @@ dependencies:
|
|
133
166
|
prerelease: false
|
134
167
|
version_requirements: !ruby/object:Gem::Requirement
|
135
168
|
requirements:
|
169
|
+
- - "~>"
|
170
|
+
- !ruby/object:Gem::Version
|
171
|
+
version: 1.9.0
|
136
172
|
- - ">="
|
137
173
|
- !ruby/object:Gem::Version
|
138
174
|
version: 1.9.0
|
@@ -155,8 +191,13 @@ files:
|
|
155
191
|
- bin/console
|
156
192
|
- bin/setup
|
157
193
|
- insta_scrape.gemspec
|
194
|
+
- lib/dependencies.rb
|
195
|
+
- lib/init/poltergeist.rb
|
158
196
|
- lib/insta_scrape.rb
|
159
197
|
- lib/insta_scrape/version.rb
|
198
|
+
- lib/models/instagram_post.rb
|
199
|
+
- lib/models/instagram_user.rb
|
200
|
+
- lib/models/instagram_user_with_posts.rb
|
160
201
|
homepage: https://github.com/dannyvassallo/insta_scrape
|
161
202
|
licenses:
|
162
203
|
- MIT
|