insta_scraper 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4039c631a9e86f78c8fb7b25f46e63fd23945d45
4
- data.tar.gz: 807caaab6fc4daa7a4aa8967b525d2a9c446e073
3
+ metadata.gz: c93649bb03ee2ae908a682a6e363dc49653bcc28
4
+ data.tar.gz: 7c9671c32fa26a221ec53753850087f7a967d872
5
5
  SHA512:
6
- metadata.gz: b2ba575596682082d83e7c84549c0db54408007f7031ef469856b5988b8cc5315d4ac22247fba94c9840a057aa060efeb4ddc06f99e8ead3a5a3d606d5ec8795
7
- data.tar.gz: 4f85446094701beb5ba1929adf6a3353d2f3d2aa1358331e6973224d0c9ca10c7678986777cd516d976940009b61293c6cbe326a2abbfb154f5590b8ca64796b
6
+ metadata.gz: 51a6b1f4c29c3769bca1d6adf0b7d3db0ad1377180b1039d4846e229e6b8305e51f7cbe0b1481e34905e54c00e6cd4ba0bc2ba18646557a671f25d01c90af368
7
+ data.tar.gz: 2d4c24a9967d5e03654013615b27aa0a864c5350883479ab1414ca4884a6ec00557bb3f6de45f1ab81f1ea9902ee125f1aaec68b1ffeaf22af6a3a90faa81016
data/README.md CHANGED
@@ -70,14 +70,24 @@ account_media.data #=> #<Hashie::Mash...
70
70
  account_media.data.fetch('items') #=> [...]
71
71
  ```
72
72
 
73
- * InstaScraper::JSON::MediaComment
73
+ * InstaScraper::JSON::MediaComment (deprecated, use MediaCommentStream)
74
74
 
75
75
  ```ruby
76
76
  media_comment = InstaScraper::JSON::MediaComment.new('BLeceL9BZNT', 17843235427151917, 20)
77
77
  # shortcode, last_comment_id, per_page
78
78
 
79
79
  media_comment.data #=> #<Hashie::Mash...
80
- media_comment.data['comments']['nodes'] #=> [#<Hashie::Mash created_at=1476385792.0 id="17843206063151917" text="Now we can get the golden penis ship!"
80
+ media_comment.data['comments']['nodes'] #=> [#<Hashie::Mash created_at=1476385792.0 id="17843206063151917" text="Now we can get the golden ship!"
81
+ ```
82
+
83
+ * InstaScraper::JSON::MediaCommentStream
84
+
85
+ ```ruby
86
+ media_comments = InstaScraper::JSON::MediaCommentStream.new('BLeceL9BZNT', options = {})
87
+ # optional_hash = { last_comment_id: '17845167841178842', max_comments: 200, per_page: 20}
88
+
89
+ media_comments.data #=> #<Hashie::Mash...
90
+ media_comments.data['comments'] #=> [#<Hashie::Mash created_at=1476385792.0 id="17843206063151917" text="Now we can get the golden ship!"
81
91
  ```
82
92
 
83
93
 
@@ -19,6 +19,8 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_dependency 'hashie', '3.4.4'
22
+ spec.add_dependency 'faraday', '0.9.2'
23
+ spec.add_dependency 'faraday-cookie_jar', '0.0.6'
22
24
 
23
25
  spec.add_development_dependency 'bundler', '~> 1.12'
24
26
  spec.add_development_dependency 'rake', '~> 10.0'
@@ -14,12 +14,5 @@ module InstaScraper
14
14
  def url
15
15
  "https://www.instagram.com/#{username}/#{serialize_params}"
16
16
  end
17
-
18
- def data
19
- @data ||=
20
- Hashie::Mash.new(::JSON.parse(shared_data))
21
- .extend(Hashie::Extensions::DeepFetch)
22
- .extend(Hashie::Extensions::DeepFind)
23
- end
24
17
  end
25
18
  end
@@ -14,12 +14,5 @@ module InstaScraper
14
14
  def url
15
15
  "https://www.instagram.com/p/#{code}/#{serialize_params}"
16
16
  end
17
-
18
- def data
19
- @data ||=
20
- Hashie::Mash.new(::JSON.parse(shared_data))
21
- .extend(Hashie::Extensions::DeepFetch)
22
- .extend(Hashie::Extensions::DeepFind)
23
- end
24
17
  end
25
18
  end
@@ -4,6 +4,13 @@ module InstaScraper
4
4
  @html ||= get_html
5
5
  end
6
6
 
7
+ def data
8
+ @data ||=
9
+ Hashie::Mash.new(::JSON.parse(shared_data))
10
+ .extend(Hashie::Extensions::DeepFetch)
11
+ .extend(Hashie::Extensions::DeepFind)
12
+ end
13
+
7
14
  protected
8
15
 
9
16
  def serialize_params
@@ -9,12 +9,5 @@ module InstaScraper
9
9
  def url
10
10
  "https://www.instagram.com/#{username}/?__a=1"
11
11
  end
12
-
13
- def data
14
- @data ||=
15
- Hashie::Mash.new(::JSON.parse(raw_json))
16
- .extend(Hashie::Extensions::DeepFetch)
17
- .extend(Hashie::Extensions::DeepFind)
18
- end
19
12
  end
20
13
  end
@@ -11,12 +11,5 @@ module InstaScraper
11
11
  def url
12
12
  "https://www.instagram.com/#{username}/media/#{serialize_params}"
13
13
  end
14
-
15
- def data
16
- @data ||=
17
- Hashie::Mash.new(::JSON.parse(raw_json))
18
- .extend(Hashie::Extensions::DeepFetch)
19
- .extend(Hashie::Extensions::DeepFind)
20
- end
21
14
  end
22
15
  end
@@ -9,12 +9,5 @@ module InstaScraper
9
9
  def url
10
10
  "https://www.instagram.com/p/#{code}/?__a=1"
11
11
  end
12
-
13
- def data
14
- @data ||=
15
- Hashie::Mash.new(::JSON.parse(raw_json))
16
- .extend(Hashie::Extensions::DeepFetch)
17
- .extend(Hashie::Extensions::DeepFind)
18
- end
19
12
  end
20
13
  end
@@ -5,6 +5,7 @@ module InstaScraper
5
5
  :per_page
6
6
 
7
7
  def initialize(shortcode, last_comment_id, per_page = 20)
8
+ warn 'Instagram changed things, this no longer works, use MediaCommentStream'
8
9
  @shortcode = shortcode
9
10
  @last_comment_id = last_comment_id
10
11
  @per_page = per_page
@@ -13,12 +14,5 @@ module InstaScraper
13
14
  def url
14
15
  "https://www.instagram.com/query/?q=ig_shortcode(#{shortcode}){comments.before(#{last_comment_id},#{per_page}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}"
15
16
  end
16
-
17
- def data
18
- @data ||=
19
- Hashie::Mash.new(::JSON.parse(raw_json))
20
- .extend(Hashie::Extensions::DeepFetch)
21
- .extend(Hashie::Extensions::DeepFind)
22
- end
23
17
  end
24
18
  end
@@ -0,0 +1,91 @@
1
+ module InstaScraper
2
+
3
+ class JSON::MediaCommentStream < InstaScraper::JSON
4
+ attr_reader :shortcode,
5
+ :last_comment_id,
6
+ :per_page,
7
+ :max_comments,
8
+ :responses,
9
+ :comments,
10
+ :error
11
+
12
+ def initialize(shortcode, options = {})#, last_comment_id = nil, max_comments = nil, per_page = 20)
13
+ @shortcode = shortcode
14
+ @last_comment_id = options[:last_comment_id] || nil
15
+ @per_page = options[:per_page] || 20
16
+ @max_comments = options[:max_comments] || nil
17
+ @responses = {}
18
+ @comments = {}
19
+ end
20
+
21
+ def data
22
+ Hashie::Mash.new({ comments: _data })
23
+ .extend(Hashie::Extensions::DeepFetch)
24
+ .extend(Hashie::Extensions::DeepFind)
25
+ end
26
+
27
+ private
28
+
29
+ def _data(current_last_comment_id = nil)
30
+ current_last_comment_id ||= (last_comment_id || default_last_comment_id)
31
+
32
+ response = connection.post do |req|
33
+ req.url '/query/'
34
+ #req.headers['Content-Type'] = 'application/json'
35
+ req.headers['content-type'] = 'application/x-www-form-urlencoded'
36
+ req.headers['X-CSRFToken'] = csrf_token
37
+ req.headers['Referer'] = "https://www.instagram.com/#{shortcode}/"
38
+ req.headers['x-instagram-ajax'] = '1'
39
+ req.headers['x-requested-with'] = 'XMLHttpRequest'
40
+ req.body = "q=ig_shortcode(#{shortcode}){comments.before(#{current_last_comment_id},#{per_page}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}"
41
+ end
42
+
43
+ current_comments = ::JSON.parse(response.body)['comments']['nodes']
44
+
45
+ @responses[current_last_comment_id] = response
46
+ @comments[current_last_comment_id] = current_comments
47
+
48
+ if current_comments.any? && !reached_max_comments?
49
+ _data(current_comments.first['id'])
50
+ end
51
+
52
+ all_comments
53
+ rescue => e
54
+ warn e.inspect
55
+ @error = e
56
+ all_comments
57
+ end
58
+
59
+ def all_comments
60
+ comments.values.flatten
61
+ end
62
+
63
+ def reached_max_comments?
64
+ return false unless max_comments
65
+
66
+ all_comments.count > max_comments
67
+ end
68
+
69
+ def connection
70
+ @connection ||= Faraday.new(:url => "https://www.instagram.com/") do |builder|
71
+ builder.use :cookie_jar
72
+ builder.adapter Faraday.default_adapter
73
+ end
74
+ end
75
+
76
+ def connection_response
77
+ @connection_response ||= connection.get "/p/#{shortcode}/?__a=1"
78
+ end
79
+
80
+ def csrf_token
81
+ connection_response.headers["set-cookie"].match(%r{csrftoken=(?<token>\w+);})['token']
82
+ end
83
+
84
+ def default_last_comment_id
85
+ media = InstaScraper::JSON::Media.new(shortcode)
86
+ media.response = connection_response
87
+ media.data['media']['comments']['nodes'].last['id']
88
+ end
89
+ end
90
+
91
+ end
@@ -1,7 +1,20 @@
1
1
  module InstaScraper
2
2
  class JSON
3
3
  def raw_json
4
- @raw_json ||= get_json
4
+ @raw_json ||= response.body
5
+ end
6
+
7
+ def data
8
+ @data ||=
9
+ Hashie::Mash.new(::JSON.parse(raw_json))
10
+ .extend(Hashie::Extensions::DeepFetch)
11
+ .extend(Hashie::Extensions::DeepFind)
12
+ end
13
+
14
+ attr_writer :response
15
+
16
+ def response
17
+ @response ||= Faraday.get(url)
5
18
  end
6
19
 
7
20
  protected
@@ -11,11 +24,5 @@ module InstaScraper
11
24
 
12
25
  "?" + params.map {|k, v| "#{k}=#{v}"}.join('&')
13
26
  end
14
-
15
- private
16
-
17
- def get_json
18
- open(url).read
19
- end
20
27
  end
21
28
  end
@@ -1,3 +1,3 @@
1
1
  module InstaScraper
2
- VERSION = '0.5.0'.freeze
2
+ VERSION = '0.6.0'.freeze
3
3
  end
data/lib/insta_scraper.rb CHANGED
@@ -1,6 +1,8 @@
1
1
  require 'insta_scraper/version'
2
2
 
3
3
  require 'open-uri'
4
+ require 'faraday'
5
+ require 'faraday-cookie_jar'
4
6
  require 'json'
5
7
 
6
8
  require 'hashie'
@@ -14,3 +16,4 @@ require 'insta_scraper/json/account'
14
16
  require 'insta_scraper/json/media'
15
17
  require 'insta_scraper/json/account_media'
16
18
  require 'insta_scraper/json/media_comment'
19
+ require 'insta_scraper/json/media_comment_stream'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: insta_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - preciz
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-10-14 00:00:00.000000000 Z
11
+ date: 2016-12-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hashie
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
26
  version: 3.4.4
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.9.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: faraday-cookie_jar
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.0.6
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.0.6
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: bundler
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -90,6 +118,7 @@ files:
90
118
  - lib/insta_scraper/json/account_media.rb
91
119
  - lib/insta_scraper/json/media.rb
92
120
  - lib/insta_scraper/json/media_comment.rb
121
+ - lib/insta_scraper/json/media_comment_stream.rb
93
122
  - lib/insta_scraper/version.rb
94
123
  homepage:
95
124
  licenses: