instagram-crawler 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e460402bed3e496955845e5920100d7b216edda7a94e5f0635a0f5770fc34a44
4
- data.tar.gz: 2f51f10e8c9f4f1ff485232169b4b375f9c307b9b95b762fe24e4d8f048a875c
3
+ metadata.gz: d477137a184a3ac845344d26e54118733ea6d2f0e3bf3aacbd743b5f11db5e04
4
+ data.tar.gz: e1f145d7032addca16e3eba2654e35a7348a45e8e99c1a4c4d380d784cd83940
5
5
  SHA512:
6
- metadata.gz: 91a28b2486726b71a6dd011845af1c7e2d0f09cbea26f50c86e8b59a285aded39606b98f083c3d43913c894fc0c97e465e6650780652ec5d8e32472cca98a970
7
- data.tar.gz: 47b5d98b9d12b30fe466e86140ab96dbf3e3359616b7bfd73b9e60e3616145516f45b9e6b962c2f4c95700d2647bb08c481dd3cb3acc54e6d3e7e8b20e311506
6
+ metadata.gz: 65893d3b488960667d1ea31157a74e5785eae92550fcac8ae6cd28f6b6203bba12e87d04a9d0456ff4e2d86f8a3494f3dac05456d1a92c2111f44816828463df
7
+ data.tar.gz: 576b2455b75c6317ffbd9a6e5973e1c4ebbf362825946860d39fcc14f12176ab46724709814e61b625969f05aa50d6e4b0cc8c74962e6f76aaccc6f2c8f81c71
data/README.md CHANGED
@@ -48,6 +48,14 @@ instagram-crawler -u <user_name>
48
48
  instagram-crawler -u <user_name> -d -a 20181120
49
49
  ```
50
50
 
51
+ ### Download files before this date (YYYYMMDD)
52
+
53
+ `-b || --before `
54
+
55
+ ```ruby
56
+ instagram-crawler -u <user_name> -d -b 20181120
57
+ ```
58
+
51
59
  ### Generate log file
52
60
 
53
61
  `-l || --log `
@@ -77,6 +85,7 @@ options:
77
85
  -u, --username USERNAME Instagram username
78
86
  -d, --download Download files
79
87
  -a, --after DATE Download files after this date (YYYYMMDD)
88
+ -b, --before DATE Download files before this date (YYYYMMDD)
80
89
  -l, --log Generate a log file in the current directory
81
90
  -P, --proxyname PROXYNAME Specify proxyname of your proxy server
82
91
  -p, --port PORT Specify port of your proxy server (default port: 8080)
@@ -98,6 +107,15 @@ docker pull mgleon08/instagram-crawler
98
107
  docker run -it --rm -v $PWD/instagram-crawler:/instagram-crawler -e sessionid=$sessionid --name marvel mgleon08/instagram-crawler -u marvel -a 20181124 -d -l
99
108
  ```
100
109
 
110
+
111
+ ## Terms of Use
112
+
113
+ [Instagram Terms of Use](https://www.instagram.com/about/legal/terms/before-january-19-2013/)
114
+
115
+ > 9.You must not access Instagram's private API by any other means other than the Instagram application itself.
116
+ 10.You must not crawl, scrape, or otherwise cache any content from Instagram including but not limited to user profiles and photos.
117
+
118
+
101
119
  ## Contributing
102
120
 
103
121
  Bug reports and pull requests are welcome on GitHub at [`https://github.com/mgleon08/instagram-crawler/pulls`](https://github.com/mgleon08/instagram-crawler/pulls)
@@ -3,7 +3,7 @@ module InstagramCrawler
3
3
  @default_url = "https://www.instagram.com".freeze
4
4
  class << self
5
5
  attr_reader :default_url, :user_name, :base_url, :base_path,
6
- :log_path, :after_date, :parse_date
6
+ :log_path, :after_date, :before_date, :parse_after_date, :parse_before_date
7
7
  attr_accessor :download, :proxyname
8
8
  attr_writer :port
9
9
 
@@ -16,7 +16,12 @@ module InstagramCrawler
16
16
 
17
17
  def after_date=(after_date)
18
18
  @after_date = after_date
19
- @parse_date = Time.parse(after_date).to_i
19
+ @parse_after_date = Time.parse(after_date).to_i
20
+ end
21
+
22
+ def before_date=(before_date)
23
+ @before_date = before_date
24
+ @parse_before_date = Time.parse(before_date).to_i
20
25
  end
21
26
 
22
27
  def port
@@ -19,6 +19,7 @@ module InstagramCrawler
19
19
  opts.on('-u', '--username USERNAME', 'Instagram username') { |user_name| Config.user_name = user_name }
20
20
  opts.on('-d', '--download', 'Download files') { |download| Config.download = true }
21
21
  opts.on('-a', '--after DATE', 'Download files after this date (YYYYMMDD)') { |after_date| Config.after_date = after_date }
22
+ opts.on('-b', '--before DATE', 'Download files before this date (YYYYMMDD)') { |before_date| Config.before_date = before_date }
22
23
  opts.on('-l', '--log', 'Generate a log file in the current directory') { self.log = true }
23
24
  opts.on('-P', '--proxyname PROXYNAME', 'Specify proxyname of your proxy server') { |proxyname| Config.proxyname = proxyname }
24
25
  opts.on('-p', '--port PORT', 'Specify port of your proxy server (default port: 8080)') { |port| Config.port = port }
@@ -22,8 +22,8 @@ module InstagramCrawler
22
22
  Time.at(ts).strftime('%Y-%m-%dT%H:%M')
23
23
  end
24
24
 
25
- def check_time(time)
26
- if Config.after_date && (Config.parse_date > time)
25
+ def check_after_time(time)
26
+ if Config.after_date && (Config.parse_after_date > time)
27
27
  Logger.info "\nSuccess, the files after #{Config.after_date} have been downloaded!".light_green
28
28
  exit
29
29
  end
@@ -45,7 +45,8 @@ module InstagramCrawler
45
45
  def loop_edges(edges)
46
46
  edges.each do |edge|
47
47
  node = edge["node"]
48
- check_time(node["taken_at_timestamp"])
48
+ next if Config.before_date && (Config.parse_before_date < node["taken_at_timestamp"])
49
+ check_after_time(node["taken_at_timestamp"])
49
50
  time = parse_to_date(node["taken_at_timestamp"])
50
51
  page_url = "https://www.instagram.com/p/#{node["shortcode"]}/"
51
52
 
@@ -26,7 +26,8 @@ module InstagramCrawler
26
26
  def loop_edges(edges)
27
27
  edges.each do |edge|
28
28
  node = edge["node"]
29
- check_time(node["taken_at_timestamp"])
29
+ next if Config.before_date && (Config.parse_before_date < node["taken_at_timestamp"])
30
+ check_after_time(node["taken_at_timestamp"])
30
31
  time = parse_to_date(node["taken_at_timestamp"])
31
32
 
32
33
  if node["is_video"]
@@ -1,3 +1,3 @@
1
1
  module InstagramCrawler
2
- VERSION = "0.2.1".freeze
2
+ VERSION = "0.3.0".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: instagram-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leon Ji
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-03-27 00:00:00.000000000 Z
11
+ date: 2019-04-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler