insta_scrape 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -3
- data/lib/insta_scrape.rb +35 -28
- data/lib/insta_scrape/version.rb +1 -1
- data/lib/models/instagram_post.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c55fecbadd0f789dfa52d2d26dab8175fb7acdd
|
4
|
+
data.tar.gz: ed3fa91e7b6422f435e6095d67a5abb175702b41
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 135f7ac6592b5a7f03d04874af4341f472d9e30b6e6378a6a6219ef2854fdba2f4f4a494d1ab3daa5145a7c36f7d5a4722cf6723b74920453dca7ce2b31f4426
|
7
|
+
data.tar.gz: 13287580a7b8731f3ffaa29cfe4b2acec51bb14d40e76bed5d4af2b16ce74a613860d268f245c776c25f8a227bc1b85591553081ef21d2cff2b81432527dc4e0
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
[![Build Status](https://travis-ci.org/dannyvassallo/insta_scrape.svg?branch=master)](https://travis-ci.org/dannyvassallo/insta_scrape)[![Gem Version](https://badge.fury.io/rb/insta_scrape.svg)](https://badge.fury.io/rb/insta_scrape)
|
1
|
+
[![Build Status](https://travis-ci.org/dannyvassallo/insta_scrape.svg?branch=master)](https://travis-ci.org/dannyvassallo/insta_scrape)[![Gem Version](https://badge.fury.io/rb/insta_scrape.svg)](https://badge.fury.io/rb/insta_scrape)![](http://ruby-gem-downloads-badge.herokuapp.com/insta_scrape?type=total&color=brightgreen)
|
2
|
+
|
2
3
|
![alt text](https://s3-us-west-2.amazonaws.com/instascrape/instascrapelogo.png "logo")
|
3
4
|
# InstaScrape
|
4
5
|
|
@@ -7,7 +8,8 @@ This gem is dependent on Capybara, PhantomJS, and Poltergeist.
|
|
7
8
|
|
8
9
|
Using this gem you can access multiple facets of the instagram API without needing authorization, most importantly the hashtag.
|
9
10
|
|
10
|
-
v.1.1.0 introducing "long_scrape" methods! Now with more
|
11
|
+
v.1.1.0 introducing "long_scrape" methods! Now with more instagram posts!
|
12
|
+
v.1.1.1 introducing "long_scrape" methods! Now with more instgram posts!
|
11
13
|
|
12
14
|
## Note
|
13
15
|
|
@@ -41,6 +43,23 @@ Or install it yourself as:
|
|
41
43
|
Long scrape method take two arguments -- (hashtag || username, time_in_seconds)
|
42
44
|
Each other method accepts only one argument - a hashtag or a username.
|
43
45
|
|
46
|
+
An additional feature has been added to grab some extra meta information (dates) from the posts.
|
47
|
+
While this feature makes the scrape perform much slower, you can opt in to using it with
|
48
|
+
the following option as a second or third argument on most methods:
|
49
|
+
|
50
|
+
`include_meta_data: true`
|
51
|
+
|
52
|
+
For Example:
|
53
|
+
|
54
|
+
```
|
55
|
+
InstaScrape.hashtag("foofighters", include_meta_data: true)
|
56
|
+
|
57
|
+
#OR
|
58
|
+
|
59
|
+
InstaScrape.long_scrape_hashtag("foofighters", 30, include_meta_data: true)
|
60
|
+
```
|
61
|
+
|
62
|
+
If you run the methods regularly, you won't get additional meta information.
|
44
63
|
|
45
64
|
####Long Scrape Methods
|
46
65
|
```ruby
|
@@ -174,4 +193,3 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/dannyv
|
|
174
193
|
## License
|
175
194
|
|
176
195
|
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
177
|
-
|
data/lib/insta_scrape.rb
CHANGED
@@ -4,29 +4,29 @@ module InstaScrape
|
|
4
4
|
extend Capybara::DSL
|
5
5
|
|
6
6
|
#get a hashtag
|
7
|
-
def self.hashtag(hashtag)
|
7
|
+
def self.hashtag(hashtag, include_meta_data: false)
|
8
8
|
visit "https://www.instagram.com/explore/tags/#{hashtag}/"
|
9
9
|
@posts = []
|
10
|
-
scrape_posts
|
10
|
+
scrape_posts(include_meta_data: include_meta_data)
|
11
11
|
end
|
12
12
|
|
13
13
|
#long scrape a hashtag
|
14
|
-
def self.long_scrape_hashtag(hashtag, scrape_length)
|
14
|
+
def self.long_scrape_hashtag(hashtag, scrape_length, include_meta_data: false)
|
15
15
|
visit "https://www.instagram.com/explore/tags/#{hashtag}/"
|
16
16
|
@posts = []
|
17
|
-
long_scrape_posts(scrape_length)
|
17
|
+
long_scrape_posts(scrape_length, include_meta_data: include_meta_data)
|
18
18
|
end
|
19
19
|
|
20
20
|
#long scrape a hashtag
|
21
|
-
def self.long_scrape_user_posts(username, scrape_length)
|
21
|
+
def self.long_scrape_user_posts(username, scrape_length, include_meta_data: false)
|
22
22
|
@posts = []
|
23
|
-
long_scrape_user_posts_method(username, scrape_length)
|
23
|
+
long_scrape_user_posts_method(username, scrape_length, include_meta_data: include_meta_data)
|
24
24
|
end
|
25
25
|
|
26
26
|
#get user info and posts
|
27
|
-
def self.long_scrape_user_info_and_posts(username, scrape_length)
|
27
|
+
def self.long_scrape_user_info_and_posts(username, scrape_length, include_meta_data: false)
|
28
28
|
scrape_user_info(username)
|
29
|
-
long_scrape_user_posts_method(username, scrape_length)
|
29
|
+
long_scrape_user_posts_method(username, scrape_length, include_meta_data: include_meta_data)
|
30
30
|
@user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts)
|
31
31
|
end
|
32
32
|
|
@@ -37,15 +37,15 @@ module InstaScrape
|
|
37
37
|
end
|
38
38
|
|
39
39
|
#get user info and posts
|
40
|
-
def self.user_info_and_posts(username)
|
40
|
+
def self.user_info_and_posts(username, include_meta_data: false)
|
41
41
|
scrape_user_info(username)
|
42
|
-
scrape_user_posts(username)
|
42
|
+
scrape_user_posts(username, include_meta_data: false)
|
43
43
|
@user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts)
|
44
44
|
end
|
45
45
|
|
46
46
|
#get user posts only
|
47
|
-
def self.user_posts(username)
|
48
|
-
scrape_user_posts(username)
|
47
|
+
def self.user_posts(username, include_meta_data: false)
|
48
|
+
scrape_user_posts(username, include_meta_data: include_meta_data)
|
49
49
|
end
|
50
50
|
|
51
51
|
#get user follower count
|
@@ -74,14 +74,21 @@ module InstaScrape
|
|
74
74
|
|
75
75
|
private
|
76
76
|
#post iteration method
|
77
|
-
def self.iterate_through_posts
|
78
|
-
all("article div div div a").
|
77
|
+
def self.iterate_through_posts(include_meta_data:)
|
78
|
+
posts = all("article div div div a").collect do |post|
|
79
|
+
{ link: post["href"],
|
80
|
+
image: post.find("img")["src"]}
|
81
|
+
end
|
79
82
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
+
posts.each do |post|
|
84
|
+
if include_meta_data
|
85
|
+
visit(post[:link])
|
86
|
+
date = page.find('time')["datetime"]
|
87
|
+
info = InstaScrape::InstagramPost.new(post[:link], post[:image], date)
|
88
|
+
else
|
89
|
+
info = InstaScrape::InstagramPost.new(post[:link], post[:image])
|
90
|
+
end
|
83
91
|
@posts << info
|
84
|
-
|
85
92
|
end
|
86
93
|
|
87
94
|
#log
|
@@ -108,7 +115,7 @@ module InstaScrape
|
|
108
115
|
end
|
109
116
|
|
110
117
|
#scrape posts
|
111
|
-
def self.scrape_posts
|
118
|
+
def self.scrape_posts(include_meta_data:)
|
112
119
|
begin
|
113
120
|
page.find('a', :text => "Load more", exact: true).click
|
114
121
|
max_iteration = 10
|
@@ -120,15 +127,15 @@ module InstaScrape
|
|
120
127
|
page.execute_script "window.scrollTo(0,(document.body.scrollHeight - 5000));"
|
121
128
|
sleep 0.1
|
122
129
|
end
|
123
|
-
iterate_through_posts
|
130
|
+
iterate_through_posts(include_meta_data: include_meta_data)
|
124
131
|
rescue Capybara::ElementNotFound => e
|
125
132
|
begin
|
126
|
-
iterate_through_posts
|
133
|
+
iterate_through_posts(include_meta_data: include_meta_data)
|
127
134
|
end
|
128
135
|
end
|
129
136
|
end
|
130
137
|
|
131
|
-
def self.long_scrape_posts(scrape_length_in_seconds)
|
138
|
+
def self.long_scrape_posts(scrape_length_in_seconds, include_meta_data:)
|
132
139
|
begin
|
133
140
|
page.find('a', :text => "Load more", exact: true).click
|
134
141
|
max_iteration = (scrape_length_in_seconds / 0.3)
|
@@ -145,24 +152,24 @@ module InstaScrape
|
|
145
152
|
@loader << "."
|
146
153
|
system "clear"
|
147
154
|
end
|
148
|
-
iterate_through_posts
|
155
|
+
iterate_through_posts(include_meta_data: include_meta_data)
|
149
156
|
rescue Capybara::ElementNotFound => e
|
150
157
|
begin
|
151
|
-
iterate_through_posts
|
158
|
+
iterate_through_posts(include_meta_data: include_meta_data)
|
152
159
|
end
|
153
160
|
end
|
154
161
|
end
|
155
162
|
|
156
|
-
def self.long_scrape_user_posts_method(username, scrape_length_in_seconds)
|
163
|
+
def self.long_scrape_user_posts_method(username, scrape_length_in_seconds, include_meta_data:)
|
157
164
|
@posts = []
|
158
165
|
visit "https://www.instagram.com/#{username}/"
|
159
|
-
long_scrape_posts(scrape_length_in_seconds)
|
166
|
+
long_scrape_posts(scrape_length_in_seconds, include_meta_data: include_meta_data)
|
160
167
|
end
|
161
168
|
|
162
|
-
def self.scrape_user_posts(username)
|
169
|
+
def self.scrape_user_posts(username, include_meta_data:)
|
163
170
|
@posts = []
|
164
171
|
visit "https://www.instagram.com/#{username}/"
|
165
|
-
scrape_posts
|
172
|
+
scrape_posts(include_meta_data: include_meta_data)
|
166
173
|
end
|
167
174
|
|
168
175
|
#post logger
|
data/lib/insta_scrape/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: insta_scrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- dannyvassallo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|