pork_sandwich 0.4.10 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.10
1
+ 0.4.11
@@ -9,6 +9,7 @@ module Pork
9
9
  @from_user = options[:from_user]
10
10
  @db_ids_created = []
11
11
  @collect_users = options[:collect_users]
12
+ @pulls_per_hour = options[:pulls_per_hour]? options[:pulls_per_hour] : 1500
12
13
  end
13
14
 
14
15
  def historical_pull
@@ -16,24 +17,31 @@ module Pork
16
17
  @search_params.from(@from_user) if @from_user
17
18
  begin
18
19
  loop do
20
+ time_at_start = Time.now
19
21
  if $PORK_LOG
20
22
  $PORK_LOG.write("historical pull, query = #{@query}, max_id = #{@search_params.query[:max_id].to_s}")
21
23
  end
22
- @tweets_pulled = @search_params.dup.fetch.results
24
+ @return_data = @search_params.dup.fetch
25
+ if @return_data.error == "You have been rate limited. Enhance your calm."
26
+ raise Pork::RateLimitExceeded
27
+ end
28
+ @tweets_pulled = @return_data.results
23
29
  @tweets_pulled.each do |tweet|
24
30
  tweet.status_id = tweet.id
25
- @db_ids_created << $SAVER.save(tweet, &TWEET_SAVE).id
31
+ # @db_ids_created << $SAVER.save(tweet, &TWEET_SAVE).id
26
32
  # $CRAWLER.append(tweet.from_user) if @collect_users
27
33
  @current_count += 1
28
34
  if reached_desired_count?
29
35
  break
30
36
  end
31
37
  end
38
+
32
39
  if reached_desired_count? or @search_params.query[:max_id] == @tweets_pulled.last.id
33
40
  break
34
41
  else
35
42
  @search_params.query[:max_id] = @tweets_pulled.last.id
36
43
  end
44
+ manage_pull_rate(time_at_start)
37
45
  end
38
46
  rescue Twitter::Unavailable
39
47
  if $PORK_LOG
@@ -50,7 +58,8 @@ module Pork
50
58
  if $PORK_LOG
51
59
  $PORK_LOG.write("Error: JSON Parsing error, trying to skip past problem tweet")
52
60
  end
53
- @search_params.query[:max_id] -= 1000
61
+ @search_params.query[:max_id] -= 1000 if @search_params.query[:max_id]
62
+ manage_pull_rate
54
63
  retry
55
64
  rescue Errno::ETIMEDOUT
56
65
  if $PORK_LOG
@@ -64,10 +73,13 @@ module Pork
64
73
  end
65
74
  sleep 30
66
75
  retry
67
- # rescue NoMethodError
68
- # p "Rate limited; holding off for a bit then trying again"
69
- # sleep 600
70
- # retry
76
+ rescue Pork::RateLimitExceeded
77
+ if $PORK_LOG
78
+ $PORK_LOG.write("ERROR: Rate limit exceeded; holding off for a bit then trying again")
79
+ end
80
+ sleep 300
81
+ reduce_pull_rate
82
+ retry
71
83
  end
72
84
  return true
73
85
  end
@@ -80,5 +92,22 @@ module Pork
80
92
  end
81
93
  end
82
94
 
95
+ def manage_pull_rate(time_at_start)
96
+ desired_pause = 1.0 / (@pulls_per_hour / 60.0 / 60.0)
97
+ pull_duration = Time.now - time_at_start
98
+ if desired_pause - pull_duration > 0
99
+ actual_pause = desired_pause - pull_duration
100
+ else
101
+ actual_pause = 0
102
+ end
103
+ sleep actual_pause
104
+ end
105
+
106
+ def reduce_pull_rate
107
+ if @pulls_per_hour > 100
108
+ @pulls_per_hour -= 100
109
+ end
110
+ end
111
+
83
112
  end
84
113
  end
data/lib/pork_sandwich.rb CHANGED
@@ -12,6 +12,9 @@ module Pork
12
12
  #Object.send :undef_method, :id
13
13
  require 'twitter'
14
14
  $SAVER = Pork::Saver.new
15
+
16
+ class RateLimitExceeded < StandardError
17
+ end
15
18
  end
16
19
 
17
20
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pork_sandwich
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.10
4
+ version: 0.4.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sam Gilbert