insta_scraper 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4039c631a9e86f78c8fb7b25f46e63fd23945d45
4
- data.tar.gz: 807caaab6fc4daa7a4aa8967b525d2a9c446e073
3
+ metadata.gz: c93649bb03ee2ae908a682a6e363dc49653bcc28
4
+ data.tar.gz: 7c9671c32fa26a221ec53753850087f7a967d872
5
5
  SHA512:
6
- metadata.gz: b2ba575596682082d83e7c84549c0db54408007f7031ef469856b5988b8cc5315d4ac22247fba94c9840a057aa060efeb4ddc06f99e8ead3a5a3d606d5ec8795
7
- data.tar.gz: 4f85446094701beb5ba1929adf6a3353d2f3d2aa1358331e6973224d0c9ca10c7678986777cd516d976940009b61293c6cbe326a2abbfb154f5590b8ca64796b
6
+ metadata.gz: 51a6b1f4c29c3769bca1d6adf0b7d3db0ad1377180b1039d4846e229e6b8305e51f7cbe0b1481e34905e54c00e6cd4ba0bc2ba18646557a671f25d01c90af368
7
+ data.tar.gz: 2d4c24a9967d5e03654013615b27aa0a864c5350883479ab1414ca4884a6ec00557bb3f6de45f1ab81f1ea9902ee125f1aaec68b1ffeaf22af6a3a90faa81016
data/README.md CHANGED
@@ -70,14 +70,24 @@ account_media.data #=> #<Hashie::Mash...
70
70
  account_media.data.fetch('items') #=> [...]
71
71
  ```
72
72
 
73
- * InstaScraper::JSON::MediaComment
73
+ * InstaScraper::JSON::MediaComment (deprecated, use MediaCommentStream)
74
74
 
75
75
  ```ruby
76
76
  media_comment = InstaScraper::JSON::MediaComment.new('BLeceL9BZNT', 17843235427151917, 20)
77
77
  # shortcode, last_comment_id, per_page
78
78
 
79
79
  media_comment.data #=> #<Hashie::Mash...
80
- media_comment.data['comments']['nodes'] #=> [#<Hashie::Mash created_at=1476385792.0 id="17843206063151917" text="Now we can get the golden penis ship!"
80
+ media_comment.data['comments']['nodes'] #=> [#<Hashie::Mash created_at=1476385792.0 id="17843206063151917" text="Now we can get the golden ship!"
81
+ ```
82
+
83
+ * InstaScraper::JSON::MediaCommentStream
84
+
85
+ ```ruby
86
+ media_comments = InstaScraper::JSON::MediaCommentStream.new('BLeceL9BZNT', options = {})
87
+ # optional_hash = { last_comment_id: '17845167841178842', max_comments: 200, per_page: 20}
88
+
89
+ media_comments.data #=> #<Hashie::Mash...
90
+ media_comments.data['comments'] #=> [#<Hashie::Mash created_at=1476385792.0 id="17843206063151917" text="Now we can get the golden ship!"
81
91
  ```
82
92
 
83
93
 
@@ -19,6 +19,8 @@ Gem::Specification.new do |spec|
19
19
  spec.require_paths = ['lib']
20
20
 
21
21
  spec.add_dependency 'hashie', '3.4.4'
22
+ spec.add_dependency 'faraday', '0.9.2'
23
+ spec.add_dependency 'faraday-cookie_jar', '0.0.6'
22
24
 
23
25
  spec.add_development_dependency 'bundler', '~> 1.12'
24
26
  spec.add_development_dependency 'rake', '~> 10.0'
@@ -14,12 +14,5 @@ module InstaScraper
14
14
  def url
15
15
  "https://www.instagram.com/#{username}/#{serialize_params}"
16
16
  end
17
-
18
- def data
19
- @data ||=
20
- Hashie::Mash.new(::JSON.parse(shared_data))
21
- .extend(Hashie::Extensions::DeepFetch)
22
- .extend(Hashie::Extensions::DeepFind)
23
- end
24
17
  end
25
18
  end
@@ -14,12 +14,5 @@ module InstaScraper
14
14
  def url
15
15
  "https://www.instagram.com/p/#{code}/#{serialize_params}"
16
16
  end
17
-
18
- def data
19
- @data ||=
20
- Hashie::Mash.new(::JSON.parse(shared_data))
21
- .extend(Hashie::Extensions::DeepFetch)
22
- .extend(Hashie::Extensions::DeepFind)
23
- end
24
17
  end
25
18
  end
@@ -4,6 +4,13 @@ module InstaScraper
4
4
  @html ||= get_html
5
5
  end
6
6
 
7
+ def data
8
+ @data ||=
9
+ Hashie::Mash.new(::JSON.parse(shared_data))
10
+ .extend(Hashie::Extensions::DeepFetch)
11
+ .extend(Hashie::Extensions::DeepFind)
12
+ end
13
+
7
14
  protected
8
15
 
9
16
  def serialize_params
@@ -9,12 +9,5 @@ module InstaScraper
9
9
  def url
10
10
  "https://www.instagram.com/#{username}/?__a=1"
11
11
  end
12
-
13
- def data
14
- @data ||=
15
- Hashie::Mash.new(::JSON.parse(raw_json))
16
- .extend(Hashie::Extensions::DeepFetch)
17
- .extend(Hashie::Extensions::DeepFind)
18
- end
19
12
  end
20
13
  end
@@ -11,12 +11,5 @@ module InstaScraper
11
11
  def url
12
12
  "https://www.instagram.com/#{username}/media/#{serialize_params}"
13
13
  end
14
-
15
- def data
16
- @data ||=
17
- Hashie::Mash.new(::JSON.parse(raw_json))
18
- .extend(Hashie::Extensions::DeepFetch)
19
- .extend(Hashie::Extensions::DeepFind)
20
- end
21
14
  end
22
15
  end
@@ -9,12 +9,5 @@ module InstaScraper
9
9
  def url
10
10
  "https://www.instagram.com/p/#{code}/?__a=1"
11
11
  end
12
-
13
- def data
14
- @data ||=
15
- Hashie::Mash.new(::JSON.parse(raw_json))
16
- .extend(Hashie::Extensions::DeepFetch)
17
- .extend(Hashie::Extensions::DeepFind)
18
- end
19
12
  end
20
13
  end
@@ -5,6 +5,7 @@ module InstaScraper
5
5
  :per_page
6
6
 
7
7
  def initialize(shortcode, last_comment_id, per_page = 20)
8
+ warn 'Instagram changed things, this no longer works, use MediaCommentStream'
8
9
  @shortcode = shortcode
9
10
  @last_comment_id = last_comment_id
10
11
  @per_page = per_page
@@ -13,12 +14,5 @@ module InstaScraper
13
14
  def url
14
15
  "https://www.instagram.com/query/?q=ig_shortcode(#{shortcode}){comments.before(#{last_comment_id},#{per_page}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}"
15
16
  end
16
-
17
- def data
18
- @data ||=
19
- Hashie::Mash.new(::JSON.parse(raw_json))
20
- .extend(Hashie::Extensions::DeepFetch)
21
- .extend(Hashie::Extensions::DeepFind)
22
- end
23
17
  end
24
18
  end
@@ -0,0 +1,91 @@
1
+ module InstaScraper
2
+
3
+ class JSON::MediaCommentStream < InstaScraper::JSON
4
+ attr_reader :shortcode,
5
+ :last_comment_id,
6
+ :per_page,
7
+ :max_comments,
8
+ :responses,
9
+ :comments,
10
+ :error
11
+
12
+ def initialize(shortcode, options = {})#, last_comment_id = nil, max_comments = nil, per_page = 20)
13
+ @shortcode = shortcode
14
+ @last_comment_id = options[:last_comment_id] || nil
15
+ @per_page = options[:per_page] || 20
16
+ @max_comments = options[:max_comments] || nil
17
+ @responses = {}
18
+ @comments = {}
19
+ end
20
+
21
+ def data
22
+ Hashie::Mash.new({ comments: _data })
23
+ .extend(Hashie::Extensions::DeepFetch)
24
+ .extend(Hashie::Extensions::DeepFind)
25
+ end
26
+
27
+ private
28
+
29
+ def _data(current_last_comment_id = nil)
30
+ current_last_comment_id ||= (last_comment_id || default_last_comment_id)
31
+
32
+ response = connection.post do |req|
33
+ req.url '/query/'
34
+ #req.headers['Content-Type'] = 'application/json'
35
+ req.headers['content-type'] = 'application/x-www-form-urlencoded'
36
+ req.headers['X-CSRFToken'] = csrf_token
37
+ req.headers['Referer'] = "https://www.instagram.com/#{shortcode}/"
38
+ req.headers['x-instagram-ajax'] = '1'
39
+ req.headers['x-requested-with'] = 'XMLHttpRequest'
40
+ req.body = "q=ig_shortcode(#{shortcode}){comments.before(#{current_last_comment_id},#{per_page}){count,nodes{id,created_at,text,user{id,profile_pic_url,username,follows{count},followed_by{count},biography,full_name,media{count},is_private,external_url,is_verified}},page_info}}"
41
+ end
42
+
43
+ current_comments = ::JSON.parse(response.body)['comments']['nodes']
44
+
45
+ @responses[current_last_comment_id] = response
46
+ @comments[current_last_comment_id] = current_comments
47
+
48
+ if current_comments.any? && !reached_max_comments?
49
+ _data(current_comments.first['id'])
50
+ end
51
+
52
+ all_comments
53
+ rescue => e
54
+ warn e.inspect
55
+ @error = e
56
+ all_comments
57
+ end
58
+
59
+ def all_comments
60
+ comments.values.flatten
61
+ end
62
+
63
+ def reached_max_comments?
64
+ return false unless max_comments
65
+
66
+ all_comments.count > max_comments
67
+ end
68
+
69
+ def connection
70
+ @connection ||= Faraday.new(:url => "https://www.instagram.com/") do |builder|
71
+ builder.use :cookie_jar
72
+ builder.adapter Faraday.default_adapter
73
+ end
74
+ end
75
+
76
+ def connection_response
77
+ @connection_response ||= connection.get "/p/#{shortcode}/?__a=1"
78
+ end
79
+
80
+ def csrf_token
81
+ connection_response.headers["set-cookie"].match(%r{csrftoken=(?<token>\w+);})['token']
82
+ end
83
+
84
+ def default_last_comment_id
85
+ media = InstaScraper::JSON::Media.new(shortcode)
86
+ media.response = connection_response
87
+ media.data['media']['comments']['nodes'].last['id']
88
+ end
89
+ end
90
+
91
+ end
@@ -1,7 +1,20 @@
1
1
  module InstaScraper
2
2
  class JSON
3
3
  def raw_json
4
- @raw_json ||= get_json
4
+ @raw_json ||= response.body
5
+ end
6
+
7
+ def data
8
+ @data ||=
9
+ Hashie::Mash.new(::JSON.parse(raw_json))
10
+ .extend(Hashie::Extensions::DeepFetch)
11
+ .extend(Hashie::Extensions::DeepFind)
12
+ end
13
+
14
+ attr_writer :response
15
+
16
+ def response
17
+ @response ||= Faraday.get(url)
5
18
  end
6
19
 
7
20
  protected
@@ -11,11 +24,5 @@ module InstaScraper
11
24
 
12
25
  "?" + params.map {|k, v| "#{k}=#{v}"}.join('&')
13
26
  end
14
-
15
- private
16
-
17
- def get_json
18
- open(url).read
19
- end
20
27
  end
21
28
  end
@@ -1,3 +1,3 @@
1
1
  module InstaScraper
2
- VERSION = '0.5.0'.freeze
2
+ VERSION = '0.6.0'.freeze
3
3
  end
data/lib/insta_scraper.rb CHANGED
@@ -1,6 +1,8 @@
1
1
  require 'insta_scraper/version'
2
2
 
3
3
  require 'open-uri'
4
+ require 'faraday'
5
+ require 'faraday-cookie_jar'
4
6
  require 'json'
5
7
 
6
8
  require 'hashie'
@@ -14,3 +16,4 @@ require 'insta_scraper/json/account'
14
16
  require 'insta_scraper/json/media'
15
17
  require 'insta_scraper/json/account_media'
16
18
  require 'insta_scraper/json/media_comment'
19
+ require 'insta_scraper/json/media_comment_stream'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: insta_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - preciz
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-10-14 00:00:00.000000000 Z
11
+ date: 2016-12-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: hashie
@@ -24,6 +24,34 @@ dependencies:
24
24
  - - '='
25
25
  - !ruby/object:Gem::Version
26
26
  version: 3.4.4
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.9.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: faraday-cookie_jar
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 0.0.6
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 0.0.6
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: bundler
29
57
  requirement: !ruby/object:Gem::Requirement
@@ -90,6 +118,7 @@ files:
90
118
  - lib/insta_scraper/json/account_media.rb
91
119
  - lib/insta_scraper/json/media.rb
92
120
  - lib/insta_scraper/json/media_comment.rb
121
+ - lib/insta_scraper/json/media_comment_stream.rb
93
122
  - lib/insta_scraper/version.rb
94
123
  homepage:
95
124
  licenses: