insta_scrape 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -3
- data/lib/insta_scrape.rb +35 -28
- data/lib/insta_scrape/version.rb +1 -1
- data/lib/models/instagram_post.rb +3 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c55fecbadd0f789dfa52d2d26dab8175fb7acdd
|
4
|
+
data.tar.gz: ed3fa91e7b6422f435e6095d67a5abb175702b41
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 135f7ac6592b5a7f03d04874af4341f472d9e30b6e6378a6a6219ef2854fdba2f4f4a494d1ab3daa5145a7c36f7d5a4722cf6723b74920453dca7ce2b31f4426
|
7
|
+
data.tar.gz: 13287580a7b8731f3ffaa29cfe4b2acec51bb14d40e76bed5d4af2b16ce74a613860d268f245c776c25f8a227bc1b85591553081ef21d2cff2b81432527dc4e0
|
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
[](https://travis-ci.org/dannyvassallo/insta_scrape)[](https://badge.fury.io/rb/insta_scrape)
|
1
|
+
[](https://travis-ci.org/dannyvassallo/insta_scrape)[](https://badge.fury.io/rb/insta_scrape)
|
2
|
+
|
2
3
|

|
3
4
|
# InstaScrape
|
4
5
|
|
@@ -7,7 +8,8 @@ This gem is dependent on Capybara, PhantomJS, and Poltergeist.
|
|
7
8
|
|
8
9
|
Using this gem you can access multiple facets of the instagram API without needing authorization, most importantly the hashtag.
|
9
10
|
|
10
|
-
v.1.1.0 introducing "long_scrape" methods! Now with more
|
11
|
+
v.1.1.0 introducing "long_scrape" methods! Now with more instagram posts!
|
12
|
+
v.1.1.1 introducing "long_scrape" methods! Now with more instgram posts!
|
11
13
|
|
12
14
|
## Note
|
13
15
|
|
@@ -41,6 +43,23 @@ Or install it yourself as:
|
|
41
43
|
Long scrape method take two arguments -- (hashtag || username, time_in_seconds)
|
42
44
|
Each other method accepts only one argument - a hashtag or a username.
|
43
45
|
|
46
|
+
An additional feature has been added to grab some extra meta information (dates) from the posts.
|
47
|
+
While this feature makes the scrape perform much slower, you can opt in to using it with
|
48
|
+
the following option as a second or third argument on most methods:
|
49
|
+
|
50
|
+
`include_meta_data: true`
|
51
|
+
|
52
|
+
For Example:
|
53
|
+
|
54
|
+
```
|
55
|
+
InstaScrape.hashtag("foofighters", include_meta_data: true)
|
56
|
+
|
57
|
+
#OR
|
58
|
+
|
59
|
+
InstaScrape.long_scrape_hashtag("foofighters", 30, include_meta_data: true)
|
60
|
+
```
|
61
|
+
|
62
|
+
If you run the methods regularly, you won't get additional meta information.
|
44
63
|
|
45
64
|
####Long Scrape Methods
|
46
65
|
```ruby
|
@@ -174,4 +193,3 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/dannyv
|
|
174
193
|
## License
|
175
194
|
|
176
195
|
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
177
|
-
|
data/lib/insta_scrape.rb
CHANGED
@@ -4,29 +4,29 @@ module InstaScrape
|
|
4
4
|
extend Capybara::DSL
|
5
5
|
|
6
6
|
#get a hashtag
|
7
|
-
def self.hashtag(hashtag)
|
7
|
+
def self.hashtag(hashtag, include_meta_data: false)
|
8
8
|
visit "https://www.instagram.com/explore/tags/#{hashtag}/"
|
9
9
|
@posts = []
|
10
|
-
scrape_posts
|
10
|
+
scrape_posts(include_meta_data: include_meta_data)
|
11
11
|
end
|
12
12
|
|
13
13
|
#long scrape a hashtag
|
14
|
-
def self.long_scrape_hashtag(hashtag, scrape_length)
|
14
|
+
def self.long_scrape_hashtag(hashtag, scrape_length, include_meta_data: false)
|
15
15
|
visit "https://www.instagram.com/explore/tags/#{hashtag}/"
|
16
16
|
@posts = []
|
17
|
-
long_scrape_posts(scrape_length)
|
17
|
+
long_scrape_posts(scrape_length, include_meta_data: include_meta_data)
|
18
18
|
end
|
19
19
|
|
20
20
|
#long scrape a hashtag
|
21
|
-
def self.long_scrape_user_posts(username, scrape_length)
|
21
|
+
def self.long_scrape_user_posts(username, scrape_length, include_meta_data: false)
|
22
22
|
@posts = []
|
23
|
-
long_scrape_user_posts_method(username, scrape_length)
|
23
|
+
long_scrape_user_posts_method(username, scrape_length, include_meta_data: include_meta_data)
|
24
24
|
end
|
25
25
|
|
26
26
|
#get user info and posts
|
27
|
-
def self.long_scrape_user_info_and_posts(username, scrape_length)
|
27
|
+
def self.long_scrape_user_info_and_posts(username, scrape_length, include_meta_data: false)
|
28
28
|
scrape_user_info(username)
|
29
|
-
long_scrape_user_posts_method(username, scrape_length)
|
29
|
+
long_scrape_user_posts_method(username, scrape_length, include_meta_data: include_meta_data)
|
30
30
|
@user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts)
|
31
31
|
end
|
32
32
|
|
@@ -37,15 +37,15 @@ module InstaScrape
|
|
37
37
|
end
|
38
38
|
|
39
39
|
#get user info and posts
|
40
|
-
def self.user_info_and_posts(username)
|
40
|
+
def self.user_info_and_posts(username, include_meta_data: false)
|
41
41
|
scrape_user_info(username)
|
42
|
-
scrape_user_posts(username)
|
42
|
+
scrape_user_posts(username, include_meta_data: false)
|
43
43
|
@user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts)
|
44
44
|
end
|
45
45
|
|
46
46
|
#get user posts only
|
47
|
-
def self.user_posts(username)
|
48
|
-
scrape_user_posts(username)
|
47
|
+
def self.user_posts(username, include_meta_data: false)
|
48
|
+
scrape_user_posts(username, include_meta_data: include_meta_data)
|
49
49
|
end
|
50
50
|
|
51
51
|
#get user follower count
|
@@ -74,14 +74,21 @@ module InstaScrape
|
|
74
74
|
|
75
75
|
private
|
76
76
|
#post iteration method
|
77
|
-
def self.iterate_through_posts
|
78
|
-
all("article div div div a").
|
77
|
+
def self.iterate_through_posts(include_meta_data:)
|
78
|
+
posts = all("article div div div a").collect do |post|
|
79
|
+
{ link: post["href"],
|
80
|
+
image: post.find("img")["src"]}
|
81
|
+
end
|
79
82
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
+
posts.each do |post|
|
84
|
+
if include_meta_data
|
85
|
+
visit(post[:link])
|
86
|
+
date = page.find('time')["datetime"]
|
87
|
+
info = InstaScrape::InstagramPost.new(post[:link], post[:image], date)
|
88
|
+
else
|
89
|
+
info = InstaScrape::InstagramPost.new(post[:link], post[:image])
|
90
|
+
end
|
83
91
|
@posts << info
|
84
|
-
|
85
92
|
end
|
86
93
|
|
87
94
|
#log
|
@@ -108,7 +115,7 @@ module InstaScrape
|
|
108
115
|
end
|
109
116
|
|
110
117
|
#scrape posts
|
111
|
-
def self.scrape_posts
|
118
|
+
def self.scrape_posts(include_meta_data:)
|
112
119
|
begin
|
113
120
|
page.find('a', :text => "Load more", exact: true).click
|
114
121
|
max_iteration = 10
|
@@ -120,15 +127,15 @@ module InstaScrape
|
|
120
127
|
page.execute_script "window.scrollTo(0,(document.body.scrollHeight - 5000));"
|
121
128
|
sleep 0.1
|
122
129
|
end
|
123
|
-
iterate_through_posts
|
130
|
+
iterate_through_posts(include_meta_data: include_meta_data)
|
124
131
|
rescue Capybara::ElementNotFound => e
|
125
132
|
begin
|
126
|
-
iterate_through_posts
|
133
|
+
iterate_through_posts(include_meta_data: include_meta_data)
|
127
134
|
end
|
128
135
|
end
|
129
136
|
end
|
130
137
|
|
131
|
-
def self.long_scrape_posts(scrape_length_in_seconds)
|
138
|
+
def self.long_scrape_posts(scrape_length_in_seconds, include_meta_data:)
|
132
139
|
begin
|
133
140
|
page.find('a', :text => "Load more", exact: true).click
|
134
141
|
max_iteration = (scrape_length_in_seconds / 0.3)
|
@@ -145,24 +152,24 @@ module InstaScrape
|
|
145
152
|
@loader << "."
|
146
153
|
system "clear"
|
147
154
|
end
|
148
|
-
iterate_through_posts
|
155
|
+
iterate_through_posts(include_meta_data: include_meta_data)
|
149
156
|
rescue Capybara::ElementNotFound => e
|
150
157
|
begin
|
151
|
-
iterate_through_posts
|
158
|
+
iterate_through_posts(include_meta_data: include_meta_data)
|
152
159
|
end
|
153
160
|
end
|
154
161
|
end
|
155
162
|
|
156
|
-
def self.long_scrape_user_posts_method(username, scrape_length_in_seconds)
|
163
|
+
def self.long_scrape_user_posts_method(username, scrape_length_in_seconds, include_meta_data:)
|
157
164
|
@posts = []
|
158
165
|
visit "https://www.instagram.com/#{username}/"
|
159
|
-
long_scrape_posts(scrape_length_in_seconds)
|
166
|
+
long_scrape_posts(scrape_length_in_seconds, include_meta_data: include_meta_data)
|
160
167
|
end
|
161
168
|
|
162
|
-
def self.scrape_user_posts(username)
|
169
|
+
def self.scrape_user_posts(username, include_meta_data:)
|
163
170
|
@posts = []
|
164
171
|
visit "https://www.instagram.com/#{username}/"
|
165
|
-
scrape_posts
|
172
|
+
scrape_posts(include_meta_data: include_meta_data)
|
166
173
|
end
|
167
174
|
|
168
175
|
#post logger
|
data/lib/insta_scrape/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: insta_scrape
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- dannyvassallo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|