instagram-crawler 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e460402bed3e496955845e5920100d7b216edda7a94e5f0635a0f5770fc34a44
4
- data.tar.gz: 2f51f10e8c9f4f1ff485232169b4b375f9c307b9b95b762fe24e4d8f048a875c
3
+ metadata.gz: d477137a184a3ac845344d26e54118733ea6d2f0e3bf3aacbd743b5f11db5e04
4
+ data.tar.gz: e1f145d7032addca16e3eba2654e35a7348a45e8e99c1a4c4d380d784cd83940
5
5
  SHA512:
6
- metadata.gz: 91a28b2486726b71a6dd011845af1c7e2d0f09cbea26f50c86e8b59a285aded39606b98f083c3d43913c894fc0c97e465e6650780652ec5d8e32472cca98a970
7
- data.tar.gz: 47b5d98b9d12b30fe466e86140ab96dbf3e3359616b7bfd73b9e60e3616145516f45b9e6b962c2f4c95700d2647bb08c481dd3cb3acc54e6d3e7e8b20e311506
6
+ metadata.gz: 65893d3b488960667d1ea31157a74e5785eae92550fcac8ae6cd28f6b6203bba12e87d04a9d0456ff4e2d86f8a3494f3dac05456d1a92c2111f44816828463df
7
+ data.tar.gz: 576b2455b75c6317ffbd9a6e5973e1c4ebbf362825946860d39fcc14f12176ab46724709814e61b625969f05aa50d6e4b0cc8c74962e6f76aaccc6f2c8f81c71
data/README.md CHANGED
@@ -48,6 +48,14 @@ instagram-crawler -u <user_name>
48
48
  instagram-crawler -u <user_name> -d -a 20181120
49
49
  ```
50
50
 
51
+ ### Download files before this date (YYYYMMDD)
52
+
53
+ `-b || --before `
54
+
55
+ ```ruby
56
+ instagram-crawler -u <user_name> -d -b 20181120
57
+ ```
58
+
51
59
  ### Generate log file
52
60
 
53
61
  `-l || --log `
@@ -77,6 +85,7 @@ options:
77
85
  -u, --username USERNAME Instagram username
78
86
  -d, --download Download files
79
87
  -a, --after DATE Download files after this date (YYYYMMDD)
88
+ -b, --before DATE Download files before this date (YYYYMMDD)
80
89
  -l, --log Generate a log file in the current directory
81
90
  -P, --proxyname PROXYNAME Specify proxyname of your proxy server
82
91
  -p, --port PORT Specify port of your proxy server (default port: 8080)
@@ -98,6 +107,15 @@ docker pull mgleon08/instagram-crawler
98
107
  docker run -it --rm -v $PWD/instagram-crawler:/instagram-crawler -e sessionid=$sessionid --name marvel mgleon08/instagram-crawler -u marvel -a 20181124 -d -l
99
108
  ```
100
109
 
110
+
111
+ ## Terms of Use
112
+
113
+ [Instagram Terms of Use](https://www.instagram.com/about/legal/terms/before-january-19-2013/)
114
+
115
+ > 9.You must not access Instagram's private API by any other means other than the Instagram application itself.
116
+ 10.You must not crawl, scrape, or otherwise cache any content from Instagram including but not limited to user profiles and photos.
117
+
118
+
101
119
  ## Contributing
102
120
 
103
121
  Bug reports and pull requests are welcome on GitHub at [`https://github.com/mgleon08/instagram-crawler/pulls`](https://github.com/mgleon08/instagram-crawler/pulls)
@@ -3,7 +3,7 @@ module InstagramCrawler
3
3
  @default_url = "https://www.instagram.com".freeze
4
4
  class << self
5
5
  attr_reader :default_url, :user_name, :base_url, :base_path,
6
- :log_path, :after_date, :parse_date
6
+ :log_path, :after_date, :before_date, :parse_after_date, :parse_before_date
7
7
  attr_accessor :download, :proxyname
8
8
  attr_writer :port
9
9
 
@@ -16,7 +16,12 @@ module InstagramCrawler
16
16
 
17
17
  def after_date=(after_date)
18
18
  @after_date = after_date
19
- @parse_date = Time.parse(after_date).to_i
19
+ @parse_after_date = Time.parse(after_date).to_i
20
+ end
21
+
22
+ def before_date=(before_date)
23
+ @before_date = before_date
24
+ @parse_before_date = Time.parse(before_date).to_i
20
25
  end
21
26
 
22
27
  def port
@@ -19,6 +19,7 @@ module InstagramCrawler
19
19
  opts.on('-u', '--username USERNAME', 'Instagram username') { |user_name| Config.user_name = user_name }
20
20
  opts.on('-d', '--download', 'Download files') { |download| Config.download = true }
21
21
  opts.on('-a', '--after DATE', 'Download files after this date (YYYYMMDD)') { |after_date| Config.after_date = after_date }
22
+ opts.on('-b', '--before DATE', 'Download files before this date (YYYYMMDD)') { |before_date| Config.before_date = before_date }
22
23
  opts.on('-l', '--log', 'Generate a log file in the current directory') { self.log = true }
23
24
  opts.on('-P', '--proxyname PROXYNAME', 'Specify proxyname of your proxy server') { |proxyname| Config.proxyname = proxyname }
24
25
  opts.on('-p', '--port PORT', 'Specify port of your proxy server (default port: 8080)') { |port| Config.port = port }
@@ -22,8 +22,8 @@ module InstagramCrawler
22
22
  Time.at(ts).strftime('%Y-%m-%dT%H:%M')
23
23
  end
24
24
 
25
- def check_time(time)
26
- if Config.after_date && (Config.parse_date > time)
25
+ def check_after_time(time)
26
+ if Config.after_date && (Config.parse_after_date > time)
27
27
  Logger.info "\nSuccess, the files after #{Config.after_date} have been downloaded!".light_green
28
28
  exit
29
29
  end
@@ -45,7 +45,8 @@ module InstagramCrawler
45
45
  def loop_edges(edges)
46
46
  edges.each do |edge|
47
47
  node = edge["node"]
48
- check_time(node["taken_at_timestamp"])
48
+ next if Config.before_date && (Config.parse_before_date < node["taken_at_timestamp"])
49
+ check_after_time(node["taken_at_timestamp"])
49
50
  time = parse_to_date(node["taken_at_timestamp"])
50
51
  page_url = "https://www.instagram.com/p/#{node["shortcode"]}/"
51
52
 
@@ -26,7 +26,8 @@ module InstagramCrawler
26
26
  def loop_edges(edges)
27
27
  edges.each do |edge|
28
28
  node = edge["node"]
29
- check_time(node["taken_at_timestamp"])
29
+ next if Config.before_date && (Config.parse_before_date < node["taken_at_timestamp"])
30
+ check_after_time(node["taken_at_timestamp"])
30
31
  time = parse_to_date(node["taken_at_timestamp"])
31
32
 
32
33
  if node["is_video"]
@@ -1,3 +1,3 @@
1
1
  module InstagramCrawler
2
- VERSION = "0.2.1".freeze
2
+ VERSION = "0.3.0".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: instagram-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leon Ji
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-27 00:00:00.000000000 Z
11
+ date: 2019-04-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler