pork_sandwich 0.4.10 → 0.4.11

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.10
1
+ 0.4.11
@@ -9,6 +9,7 @@ module Pork
9
9
  @from_user = options[:from_user]
10
10
  @db_ids_created = []
11
11
  @collect_users = options[:collect_users]
12
+ @pulls_per_hour = options[:pulls_per_hour]? options[:pulls_per_hour] : 1500
12
13
  end
13
14
 
14
15
  def historical_pull
@@ -16,24 +17,31 @@ module Pork
16
17
  @search_params.from(@from_user) if @from_user
17
18
  begin
18
19
  loop do
20
+ time_at_start = Time.now
19
21
  if $PORK_LOG
20
22
  $PORK_LOG.write("historical pull, query = #{@query}, max_id = #{@search_params.query[:max_id].to_s}")
21
23
  end
22
- @tweets_pulled = @search_params.dup.fetch.results
24
+ @return_data = @search_params.dup.fetch
25
+ if @return_data.error == "You have been rate limited. Enhance your calm."
26
+ raise Pork::RateLimitExceeded
27
+ end
28
+ @tweets_pulled = @return_data.results
23
29
  @tweets_pulled.each do |tweet|
24
30
  tweet.status_id = tweet.id
25
- @db_ids_created << $SAVER.save(tweet, &TWEET_SAVE).id
31
+ # @db_ids_created << $SAVER.save(tweet, &TWEET_SAVE).id
26
32
  # $CRAWLER.append(tweet.from_user) if @collect_users
27
33
  @current_count += 1
28
34
  if reached_desired_count?
29
35
  break
30
36
  end
31
37
  end
38
+
32
39
  if reached_desired_count? or @search_params.query[:max_id] == @tweets_pulled.last.id
33
40
  break
34
41
  else
35
42
  @search_params.query[:max_id] = @tweets_pulled.last.id
36
43
  end
44
+ manage_pull_rate(time_at_start)
37
45
  end
38
46
  rescue Twitter::Unavailable
39
47
  if $PORK_LOG
@@ -50,7 +58,8 @@ module Pork
50
58
  if $PORK_LOG
51
59
  $PORK_LOG.write("Error: JSON Parsing error, trying to skip past problem tweet")
52
60
  end
53
- @search_params.query[:max_id] -= 1000
61
+ @search_params.query[:max_id] -= 1000 if @search_params.query[:max_id]
62
+ manage_pull_rate
54
63
  retry
55
64
  rescue Errno::ETIMEDOUT
56
65
  if $PORK_LOG
@@ -64,10 +73,13 @@ module Pork
64
73
  end
65
74
  sleep 30
66
75
  retry
67
- # rescue NoMethodError
68
- # p "Rate limited; holding off for a bit then trying again"
69
- # sleep 600
70
- # retry
76
+ rescue Pork::RateLimitExceeded
77
+ if $PORK_LOG
78
+ $PORK_LOG.write("ERROR: Rate limit exceeded; holding off for a bit then trying again")
79
+ end
80
+ sleep 300
81
+ reduce_pull_rate
82
+ retry
71
83
  end
72
84
  return true
73
85
  end
@@ -80,5 +92,22 @@ module Pork
80
92
  end
81
93
  end
82
94
 
95
+ def manage_pull_rate(time_at_start)
96
+ desired_pause = 1.0 / (@pulls_per_hour / 60.0 / 60.0)
97
+ pull_duration = Time.now - time_at_start
98
+ if desired_pause - pull_duration > 0
99
+ actual_pause = desired_pause - pull_duration
100
+ else
101
+ actual_pause = 0
102
+ end
103
+ sleep actual_pause
104
+ end
105
+
106
+ def reduce_pull_rate
107
+ if @pulls_per_hour > 100
108
+ @pulls_per_hour -= 100
109
+ end
110
+ end
111
+
83
112
  end
84
113
  end
data/lib/pork_sandwich.rb CHANGED
@@ -12,6 +12,9 @@ module Pork
12
12
  #Object.send :undef_method, :id
13
13
  require 'twitter'
14
14
  $SAVER = Pork::Saver.new
15
+
16
+ class RateLimitExceeded < StandardError
17
+ end
15
18
  end
16
19
 
17
20
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pork_sandwich
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.10
4
+ version: 0.4.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Gilbert