pork_sandwich 0.4.10 → 0.4.11
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/pork_sandwich/search.rb +36 -7
- data/lib/pork_sandwich.rb +3 -0
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.11
|
data/lib/pork_sandwich/search.rb
CHANGED
@@ -9,6 +9,7 @@ module Pork
|
|
9
9
|
@from_user = options[:from_user]
|
10
10
|
@db_ids_created = []
|
11
11
|
@collect_users = options[:collect_users]
|
12
|
+
@pulls_per_hour = options[:pulls_per_hour]? options[:pulls_per_hour] : 1500
|
12
13
|
end
|
13
14
|
|
14
15
|
def historical_pull
|
@@ -16,24 +17,31 @@ module Pork
|
|
16
17
|
@search_params.from(@from_user) if @from_user
|
17
18
|
begin
|
18
19
|
loop do
|
20
|
+
time_at_start = Time.now
|
19
21
|
if $PORK_LOG
|
20
22
|
$PORK_LOG.write("historical pull, query = #{@query}, max_id = #{@search_params.query[:max_id].to_s}")
|
21
23
|
end
|
22
|
-
@
|
24
|
+
@return_data = @search_params.dup.fetch
|
25
|
+
if @return_data.error == "You have been rate limited. Enhance your calm."
|
26
|
+
raise Pork::RateLimitExceeded
|
27
|
+
end
|
28
|
+
@tweets_pulled = @return_data.results
|
23
29
|
@tweets_pulled.each do |tweet|
|
24
30
|
tweet.status_id = tweet.id
|
25
|
-
@db_ids_created << $SAVER.save(tweet, &TWEET_SAVE).id
|
31
|
+
# @db_ids_created << $SAVER.save(tweet, &TWEET_SAVE).id
|
26
32
|
# $CRAWLER.append(tweet.from_user) if @collect_users
|
27
33
|
@current_count += 1
|
28
34
|
if reached_desired_count?
|
29
35
|
break
|
30
36
|
end
|
31
37
|
end
|
38
|
+
|
32
39
|
if reached_desired_count? or @search_params.query[:max_id] == @tweets_pulled.last.id
|
33
40
|
break
|
34
41
|
else
|
35
42
|
@search_params.query[:max_id] = @tweets_pulled.last.id
|
36
43
|
end
|
44
|
+
manage_pull_rate(time_at_start)
|
37
45
|
end
|
38
46
|
rescue Twitter::Unavailable
|
39
47
|
if $PORK_LOG
|
@@ -50,7 +58,8 @@ module Pork
|
|
50
58
|
if $PORK_LOG
|
51
59
|
$PORK_LOG.write("Error: JSON Parsing error, trying to skip past problem tweet")
|
52
60
|
end
|
53
|
-
@search_params.query[:max_id] -= 1000
|
61
|
+
@search_params.query[:max_id] -= 1000 if @search_params.query[:max_id]
|
62
|
+
manage_pull_rate
|
54
63
|
retry
|
55
64
|
rescue Errno::ETIMEDOUT
|
56
65
|
if $PORK_LOG
|
@@ -64,10 +73,13 @@ module Pork
|
|
64
73
|
end
|
65
74
|
sleep 30
|
66
75
|
retry
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
76
|
+
rescue Pork::RateLimitExceeded
|
77
|
+
if $PORK_LOG
|
78
|
+
$PORK_LOG.write("ERROR: Rate limit exceeded; holding off for a bit then trying again")
|
79
|
+
end
|
80
|
+
sleep 300
|
81
|
+
reduce_pull_rate
|
82
|
+
retry
|
71
83
|
end
|
72
84
|
return true
|
73
85
|
end
|
@@ -80,5 +92,22 @@ module Pork
|
|
80
92
|
end
|
81
93
|
end
|
82
94
|
|
95
|
+
def manage_pull_rate(time_at_start)
|
96
|
+
desired_pause = 1.0 / (@pulls_per_hour / 60.0 / 60.0)
|
97
|
+
pull_duration = Time.now - time_at_start
|
98
|
+
if desired_pause - pull_duration > 0
|
99
|
+
actual_pause = desired_pause - pull_duration
|
100
|
+
else
|
101
|
+
actual_pause = 0
|
102
|
+
end
|
103
|
+
sleep actual_pause
|
104
|
+
end
|
105
|
+
|
106
|
+
def reduce_pull_rate
|
107
|
+
if @pulls_per_hour > 100
|
108
|
+
@pulls_per_hour -= 100
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
83
112
|
end
|
84
113
|
end
|
data/lib/pork_sandwich.rb
CHANGED