insta_scraper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 38215b1a3acd833f33afb46c2eed6ce9540f8c99
4
+ data.tar.gz: 2fa1a3112f193457316479b58ff3b2e08ccdbb94
5
+ SHA512:
6
+ metadata.gz: a0dd314dd16c00601e15c8c2575861dd4ef10d1a933c159f460abd39196723205be9103aa9bbbe70d7ace64b5fc7ca5396f2c21ce18abc93588918a2523f1968
7
+ data.tar.gz: 94c355556e21a3223ce191828439db47d711e997adfd818f8c6bbb65f2597bf618b7fe9bc0d0842d38dc5b607128ddf6ff7a8faa1bde7567ca6b76f521b99207
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.2.1
5
+ before_install: gem install bundler -v 1.12.5
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in insta_scraper.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 preciz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,67 @@
1
+ # InstaScraper
2
+
3
+ Scrapes Instagram
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'insta_scraper'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install insta_scraper
20
+
21
+ ## Usage
22
+
23
+ Subclasses of InstaScraper::HTML are scraping html endpoints.
24
+ Subclasses of InstaScraper::JSON are scraping json endpoints.
25
+
26
+ * InstaScraper::HTML::Account
27
+
28
+ ```ruby
29
+ account = InstaScraper::HTML::Account.new('barna.kovacs.codes')
30
+
31
+ account.json #=> #<Hashie::Mash...
32
+ account.json.deep_find('followed_by').fetch('count') #=> 4
33
+ ```
34
+
35
+ * InstaScraper::HTML::Media
36
+
37
+ ```ruby
38
+ media = InstaScraper::HTML::Media.new('BGFVAPPIaBQ')
39
+
40
+ media.json #=> #<Hashie::Mash...
41
+ media.json.deep_find('comments').fetch('count') #=> 1892
42
+ ```
43
+
44
+ * InstaScraper::JSON::AccountMedia
45
+
46
+ ```ruby
47
+ account_media = InstaScraper::JSON::AccountMedia.new('barna.kovacs.codes')
48
+
49
+ account_media.json #=> #<Hashie::Mash...
50
+ account_media.json.fetch('items') #=> [...]
51
+ ```
52
+
53
+ ## Development
54
+
55
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
56
+
57
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
58
+
59
+ ## Contributing
60
+
61
+ Bug reports and pull requests are welcome on GitHub at https://github.com/preciz/insta_scraper.
62
+
63
+
64
+ ## License
65
+
66
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
67
+
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << 'test'
6
+ t.libs << 'lib'
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task default: :test
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'insta_scraper'
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require 'irb'
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'insta_scraper/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'insta_scraper'
8
+ spec.version = InstaScraper::VERSION
9
+ spec.authors = ['preciz']
10
+
11
+ spec.summary = 'Scrapes Instagram accounts'
12
+ spec.description = 'Scrapes Instagram accounts public data'
13
+ # spec.homepage = "TODO: Put your gem's website or public repo URL here."
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.bindir = 'exe'
18
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_dependency 'hashie', '3.4.4'
22
+
23
+ spec.add_development_dependency 'bundler', '~> 1.12'
24
+ spec.add_development_dependency 'rake', '~> 10.0'
25
+ spec.add_development_dependency 'minitest', '~> 5.0'
26
+ end
@@ -0,0 +1,23 @@
1
+ module InstaScraper
2
+ class HTML::Account < InstaScraper::HTML
3
+ attr_reader :username
4
+
5
+ def initialize(username = nil, html = nil)
6
+ raise ArgumentError, 'Provide a username or html string' if !username && !html
7
+
8
+ @username = username
9
+ @html = html
10
+ end
11
+
12
+ def url
13
+ "https://www.instagram.com/#{username}/"
14
+ end
15
+
16
+ def json
17
+ @json ||=
18
+ Hashie::Mash.new(::JSON.parse(shared_data))
19
+ .extend(Hashie::Extensions::DeepFetch)
20
+ .extend(Hashie::Extensions::DeepFind)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ module InstaScraper
2
+ class HTML::Media < InstaScraper::HTML
3
+ attr_reader :code
4
+
5
+ def initialize(code = nil, html = nil)
6
+ raise ArgumentError, 'Provide a code or html string' if !code && !html
7
+
8
+ @code = code
9
+ @html = html
10
+ end
11
+
12
+ def url
13
+ "https://www.instagram.com/p/#{code}/"
14
+ end
15
+
16
+ def json
17
+ @json ||=
18
+ Hashie::Mash.new(::JSON.parse(shared_data))
19
+ .extend(Hashie::Extensions::DeepFetch)
20
+ .extend(Hashie::Extensions::DeepFind)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,31 @@
1
+ module InstaScraper
2
+ class HTML
3
+ def html
4
+ @html ||= get_html
5
+ end
6
+
7
+ private
8
+
9
+ def get_html
10
+ open(url).read
11
+ end
12
+
13
+ private
14
+
15
+ def line_with_data
16
+ html.each_line.detect { |l| l[/sharedData/] }
17
+ end
18
+
19
+ def shared_data
20
+ line_with_data[shared_data_start..shared_data_end]
21
+ end
22
+
23
+ def shared_data_start
24
+ (line_with_data =~ /sharedData/) + 13
25
+ end
26
+
27
+ def shared_data_end
28
+ line_with_data =~ /\};</
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,20 @@
1
+ module InstaScraper
2
+ class JSON::AccountMedia < InstaScraper::JSON
3
+ attr_reader :username
4
+
5
+ def initialize(username)
6
+ @username = username
7
+ end
8
+
9
+ def url
10
+ "https://www.instagram.com/#{username}/media/"
11
+ end
12
+
13
+ def json
14
+ @json ||=
15
+ Hashie::Mash.new(::JSON.parse(raw_json))
16
+ .extend(Hashie::Extensions::DeepFetch)
17
+ .extend(Hashie::Extensions::DeepFind)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,13 @@
1
+ module InstaScraper
2
+ class JSON
3
+ def raw_json
4
+ @raw_json ||= get_json
5
+ end
6
+
7
+ private
8
+
9
+ def get_json
10
+ open(url).read
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,3 @@
1
+ module InstaScraper
2
+ VERSION = '0.1.0'.freeze
3
+ end
@@ -0,0 +1,13 @@
1
+ require 'insta_scraper/version'
2
+
3
+ require 'open-uri'
4
+ require 'json'
5
+
6
+ require 'hashie'
7
+
8
+ require 'insta_scraper/html'
9
+ require 'insta_scraper/html/account'
10
+ require 'insta_scraper/html/media'
11
+
12
+ require 'insta_scraper/json'
13
+ require 'insta_scraper/json/account_media'
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: insta_scraper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - preciz
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-06-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: hashie
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 3.4.4
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 3.4.4
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.12'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.12'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '5.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '5.0'
69
+ description: Scrapes Instagram accounts public data
70
+ email:
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - ".gitignore"
76
+ - ".travis.yml"
77
+ - Gemfile
78
+ - LICENSE.txt
79
+ - README.md
80
+ - Rakefile
81
+ - bin/console
82
+ - bin/setup
83
+ - insta_scraper.gemspec
84
+ - lib/insta_scraper.rb
85
+ - lib/insta_scraper/html.rb
86
+ - lib/insta_scraper/html/account.rb
87
+ - lib/insta_scraper/html/media.rb
88
+ - lib/insta_scraper/json.rb
89
+ - lib/insta_scraper/json/account_media.rb
90
+ - lib/insta_scraper/version.rb
91
+ homepage:
92
+ licenses:
93
+ - MIT
94
+ metadata: {}
95
+ post_install_message:
96
+ rdoc_options: []
97
+ require_paths:
98
+ - lib
99
+ required_ruby_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ required_rubygems_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: '0'
109
+ requirements: []
110
+ rubyforge_project:
111
+ rubygems_version: 2.4.6
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: Scrapes Instagram accounts
115
+ test_files: []