insta_scraper 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -2
- data/insta_scraper.gemspec +2 -0
- data/lib/insta_scraper/html/account.rb +0 -7
- data/lib/insta_scraper/html/media.rb +0 -7
- data/lib/insta_scraper/html.rb +7 -0
- data/lib/insta_scraper/json/account.rb +0 -7
- data/lib/insta_scraper/json/account_media.rb +0 -7
- data/lib/insta_scraper/json/media.rb +0 -7
- data/lib/insta_scraper/json/media_comment.rb +1 -7
- data/lib/insta_scraper/json/media_comment_stream.rb +91 -0
- data/lib/insta_scraper/json.rb +14 -7
- data/lib/insta_scraper/version.rb +1 -1
- data/lib/insta_scraper.rb +3 -0
- metadata +31 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c93649bb03ee2ae908a682a6e363dc49653bcc28
|
4
|
+
data.tar.gz: 7c9671c32fa26a221ec53753850087f7a967d872
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 51a6b1f4c29c3769bca1d6adf0b7d3db0ad1377180b1039d4846e229e6b8305e51f7cbe0b1481e34905e54c00e6cd4ba0bc2ba18646557a671f25d01c90af368
|
7
|
+
data.tar.gz: 2d4c24a9967d5e03654013615b27aa0a864c5350883479ab1414ca4884a6ec00557bb3f6de45f1ab81f1ea9902ee125f1aaec68b1ffeaf22af6a3a90faa81016
|
data/README.md
CHANGED
@@ -70,14 +70,24 @@ account_media.data #=> #<Hashie::Mash...
|
|
70
70
|
account_media.data.fetch('items') #=> [...]
|
71
71
|
```
|
72
72
|
|
73
|
-
* InstaScraper::JSON::MediaComment
|
73
|
+
* InstaScraper::JSON::MediaComment (deprecated, use MediaCommentStream)
|
74
74
|
|
75
75
|
```ruby
|
76
76
|
media_comment = InstaScraper::JSON::MediaComment.new('BLeceL9BZNT', 17843235427151917, 20)
|
77
77
|
# shortcode, last_comment_id, per_page
|
78
78
|
|
79
79
|
media_comment.data #=> #<Hashie::Mash...
|
80
|
-
media_comment.data['comments']['nodes'] #=> [#<Hashie::Mash created_at=1476385792.0 id="17843206063151917" text="Now we can get the golden
|
80
|
+
media_comment.data['comments']['nodes'] #=> [#<Hashie::Mash created_at=1476385792.0 id="17843206063151917" text="Now we can get the golden ship!"
|
81
|
+
```
|
82
|
+
|
83
|
+
* InstaScraper::JSON::MediaCommentStream
|
84
|
+
|
85
|
+
```ruby
|
86
|
+
media_comments = InstaScraper::JSON::MediaCommentStream.new('BLeceL9BZNT', options = {})
|
87
|
+
# optional_hash = { last_comment_id: '17845167841178842', max_comments: 200, per_page: 20}
|
88
|
+
|
89
|
+
media_comments.data #=> #<Hashie::Mash...
|
90
|
+
media_comments.data['comments'] #=> [#<Hashie::Mash created_at=1476385792.0 id="17843206063151917" text="Now we can get the golden ship!"
|
81
91
|
```
|
82
92
|
|
83
93
|
|
data/insta_scraper.gemspec
CHANGED
@@ -19,6 +19,8 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ['lib']
|
20
20
|
|
21
21
|
spec.add_dependency 'hashie', '3.4.4'
|
22
|
+
spec.add_dependency 'faraday', '0.9.2'
|
23
|
+
spec.add_dependency 'faraday-cookie_jar', '0.0.6'
|
22
24
|
|
23
25
|
spec.add_development_dependency 'bundler', '~> 1.12'
|
24
26
|
spec.add_development_dependency 'rake', '~> 10.0'
|
@@ -14,12 +14,5 @@ module InstaScraper
|
|
14
14
|
def url
|
15
15
|
"https://www.instagram.com/#{username}/#{serialize_params}"
|
16
16
|
end
|
17
|
-
|
18
|
-
def data
|
19
|
-
@data ||=
|
20
|
-
Hashie::Mash.new(::JSON.parse(shared_data))
|
21
|
-
.extend(Hashie::Extensions::DeepFetch)
|
22
|
-
.extend(Hashie::Extensions::DeepFind)
|
23
|
-
end
|
24
17
|
end
|
25
18
|
end
|
@@ -14,12 +14,5 @@ module InstaScraper
|
|
14
14
|
def url
|
15
15
|
"https://www.instagram.com/p/#{code}/#{serialize_params}"
|
16
16
|
end
|
17
|
-
|
18
|
-
def data
|
19
|
-
@data ||=
|
20
|
-
Hashie::Mash.new(::JSON.parse(shared_data))
|
21
|
-
.extend(Hashie::Extensions::DeepFetch)
|
22
|
-
.extend(Hashie::Extensions::DeepFind)
|
23
|
-
end
|
24
17
|
end
|
25
18
|
end
|
data/lib/insta_scraper/html.rb
CHANGED
@@ -9,12 +9,5 @@ module InstaScraper
|
|
9
9
|
def url
|
10
10
|
"https://www.instagram.com/#{username}/?__a=1"
|
11
11
|
end
|
12
|
-
|
13
|
-
def data
|
14
|
-
@data ||=
|
15
|
-
Hashie::Mash.new(::JSON.parse(raw_json))
|
16
|
-
.extend(Hashie::Extensions::DeepFetch)
|
17
|
-
.extend(Hashie::Extensions::DeepFind)
|
18
|
-
end
|
19
12
|
end
|
20
13
|
end
|
@@ -11,12 +11,5 @@ module InstaScraper
|
|
11
11
|
def url
|
12
12
|
"https://www.instagram.com/#{username}/media/#{serialize_params}"
|
13
13
|
end
|
14
|
-
|
15
|
-
def data
|
16
|
-
@data ||=
|
17
|
-
Hashie::Mash.new(::JSON.parse(raw_json))
|
18
|
-
.extend(Hashie::Extensions::DeepFetch)
|
19
|
-
.extend(Hashie::Extensions::DeepFind)
|
20
|
-
end
|
21
14
|
end
|
22
15
|
end
|
@@ -9,12 +9,5 @@ module InstaScraper
|
|
9
9
|
def url
|
10
10
|
"https://www.instagram.com/p/#{code}/?__a=1"
|
11
11
|
end
|
12
|
-
|
13
|
-
def data
|
14
|
-
@data ||=
|
15
|
-
Hashie::Mash.new(::JSON.parse(raw_json))
|
16
|
-
.extend(Hashie::Extensions::DeepFetch)
|
17
|
-
.extend(Hashie::Extensions::DeepFind)
|
18
|
-
end
|
19
12
|
end
|
20
13
|
end
|
@@ -5,6 +5,7 @@ module InstaScraper
|
|
5
5
|
:per_page
|
6
6
|
|
7
7
|
def initialize(shortcode, last_comment_id, per_page = 20)
|
8
|
+
warn 'Instagram changed things, this no longer works, use MediaCommentStream'
|
8
9
|
@shortcode = shortcode
|
9
10
|
@last_comment_id = last_comment_id
|
10
11
|
@per_page = per_page
|
@@ -13,12 +14,5 @@ module InstaScraper
|
|
13
14
|
def url
|
14
15
|
"https://www.instagram.com/query/?q=ig_shortcode(#{shortcode}){comments.before(#{last_comment_id},#{per_page}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}"
|
15
16
|
end
|
16
|
-
|
17
|
-
def data
|
18
|
-
@data ||=
|
19
|
-
Hashie::Mash.new(::JSON.parse(raw_json))
|
20
|
-
.extend(Hashie::Extensions::DeepFetch)
|
21
|
-
.extend(Hashie::Extensions::DeepFind)
|
22
|
-
end
|
23
17
|
end
|
24
18
|
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module InstaScraper
|
2
|
+
|
3
|
+
class JSON::MediaCommentStream < InstaScraper::JSON
|
4
|
+
attr_reader :shortcode,
|
5
|
+
:last_comment_id,
|
6
|
+
:per_page,
|
7
|
+
:max_comments,
|
8
|
+
:responses,
|
9
|
+
:comments,
|
10
|
+
:error
|
11
|
+
|
12
|
+
def initialize(shortcode, options = {})#, last_comment_id = nil, max_comments = nil, per_page = 20)
|
13
|
+
@shortcode = shortcode
|
14
|
+
@last_comment_id = options[:last_comment_id] || nil
|
15
|
+
@per_page = options[:per_page] || 20
|
16
|
+
@max_comments = options[:max_comments] || nil
|
17
|
+
@responses = {}
|
18
|
+
@comments = {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def data
|
22
|
+
Hashie::Mash.new({ comments: _data })
|
23
|
+
.extend(Hashie::Extensions::DeepFetch)
|
24
|
+
.extend(Hashie::Extensions::DeepFind)
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def _data(current_last_comment_id = nil)
|
30
|
+
current_last_comment_id ||= (last_comment_id || default_last_comment_id)
|
31
|
+
|
32
|
+
response = connection.post do |req|
|
33
|
+
req.url '/query/'
|
34
|
+
#req.headers['Content-Type'] = 'application/json'
|
35
|
+
req.headers['content-type'] = 'application/x-www-form-urlencoded'
|
36
|
+
req.headers['X-CSRFToken'] = csrf_token
|
37
|
+
req.headers['Referer'] = "https://www.instagram.com/#{shortcode}/"
|
38
|
+
req.headers['x-instagram-ajax'] = '1'
|
39
|
+
req.headers['x-requested-with'] = 'XMLHttpRequest'
|
40
|
+
req.body = "q=ig_shortcode(#{shortcode}){comments.before(#{current_last_comment_id},#{per_page}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}"
|
41
|
+
end
|
42
|
+
|
43
|
+
current_comments = ::JSON.parse(response.body)['comments']['nodes']
|
44
|
+
|
45
|
+
@responses[current_last_comment_id] = response
|
46
|
+
@comments[current_last_comment_id] = current_comments
|
47
|
+
|
48
|
+
if current_comments.any? && !reached_max_comments?
|
49
|
+
_data(current_comments.first['id'])
|
50
|
+
end
|
51
|
+
|
52
|
+
all_comments
|
53
|
+
rescue => e
|
54
|
+
warn e.inspect
|
55
|
+
@error = e
|
56
|
+
all_comments
|
57
|
+
end
|
58
|
+
|
59
|
+
def all_comments
|
60
|
+
comments.values.flatten
|
61
|
+
end
|
62
|
+
|
63
|
+
def reached_max_comments?
|
64
|
+
return false unless max_comments
|
65
|
+
|
66
|
+
all_comments.count > max_comments
|
67
|
+
end
|
68
|
+
|
69
|
+
def connection
|
70
|
+
@connection ||= Faraday.new(:url => "https://www.instagram.com/") do |builder|
|
71
|
+
builder.use :cookie_jar
|
72
|
+
builder.adapter Faraday.default_adapter
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def connection_response
|
77
|
+
@connection_response ||= connection.get "/p/#{shortcode}/?__a=1"
|
78
|
+
end
|
79
|
+
|
80
|
+
def csrf_token
|
81
|
+
connection_response.headers["set-cookie"].match(%r{csrftoken=(?<token>\w+);})['token']
|
82
|
+
end
|
83
|
+
|
84
|
+
def default_last_comment_id
|
85
|
+
media = InstaScraper::JSON::Media.new(shortcode)
|
86
|
+
media.response = connection_response
|
87
|
+
media.data['media']['comments']['nodes'].last['id']
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
data/lib/insta_scraper/json.rb
CHANGED
@@ -1,7 +1,20 @@
|
|
1
1
|
module InstaScraper
|
2
2
|
class JSON
|
3
3
|
def raw_json
|
4
|
-
@raw_json ||=
|
4
|
+
@raw_json ||= response.body
|
5
|
+
end
|
6
|
+
|
7
|
+
def data
|
8
|
+
@data ||=
|
9
|
+
Hashie::Mash.new(::JSON.parse(raw_json))
|
10
|
+
.extend(Hashie::Extensions::DeepFetch)
|
11
|
+
.extend(Hashie::Extensions::DeepFind)
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_writer :response
|
15
|
+
|
16
|
+
def response
|
17
|
+
@response ||= Faraday.get(url)
|
5
18
|
end
|
6
19
|
|
7
20
|
protected
|
@@ -11,11 +24,5 @@ module InstaScraper
|
|
11
24
|
|
12
25
|
"?" + params.map {|k, v| "#{k}=#{v}"}.join('&')
|
13
26
|
end
|
14
|
-
|
15
|
-
private
|
16
|
-
|
17
|
-
def get_json
|
18
|
-
open(url).read
|
19
|
-
end
|
20
27
|
end
|
21
28
|
end
|
data/lib/insta_scraper.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'insta_scraper/version'
|
2
2
|
|
3
3
|
require 'open-uri'
|
4
|
+
require 'faraday'
|
5
|
+
require 'faraday-cookie_jar'
|
4
6
|
require 'json'
|
5
7
|
|
6
8
|
require 'hashie'
|
@@ -14,3 +16,4 @@ require 'insta_scraper/json/account'
|
|
14
16
|
require 'insta_scraper/json/media'
|
15
17
|
require 'insta_scraper/json/account_media'
|
16
18
|
require 'insta_scraper/json/media_comment'
|
19
|
+
require 'insta_scraper/json/media_comment_stream'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: insta_scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- preciz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hashie
|
@@ -24,6 +24,34 @@ dependencies:
|
|
24
24
|
- - '='
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 3.4.4
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: faraday
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.2
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.9.2
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: faraday-cookie_jar
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.0.6
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.0.6
|
27
55
|
- !ruby/object:Gem::Dependency
|
28
56
|
name: bundler
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -90,6 +118,7 @@ files:
|
|
90
118
|
- lib/insta_scraper/json/account_media.rb
|
91
119
|
- lib/insta_scraper/json/media.rb
|
92
120
|
- lib/insta_scraper/json/media_comment.rb
|
121
|
+
- lib/insta_scraper/json/media_comment_stream.rb
|
93
122
|
- lib/insta_scraper/version.rb
|
94
123
|
homepage:
|
95
124
|
licenses:
|