pork_sandwich 0.4.10 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/pork_sandwich/search.rb +36 -7
- data/lib/pork_sandwich.rb +3 -0
- metadata +1 -1
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.4.
|
1
|
+
0.4.11
|
data/lib/pork_sandwich/search.rb
CHANGED
@@ -9,6 +9,7 @@ module Pork
|
|
9
9
|
@from_user = options[:from_user]
|
10
10
|
@db_ids_created = []
|
11
11
|
@collect_users = options[:collect_users]
|
12
|
+
@pulls_per_hour = options[:pulls_per_hour]? options[:pulls_per_hour] : 1500
|
12
13
|
end
|
13
14
|
|
14
15
|
def historical_pull
|
@@ -16,24 +17,31 @@ module Pork
|
|
16
17
|
@search_params.from(@from_user) if @from_user
|
17
18
|
begin
|
18
19
|
loop do
|
20
|
+
time_at_start = Time.now
|
19
21
|
if $PORK_LOG
|
20
22
|
$PORK_LOG.write("historical pull, query = #{@query}, max_id = #{@search_params.query[:max_id].to_s}")
|
21
23
|
end
|
22
|
-
@
|
24
|
+
@return_data = @search_params.dup.fetch
|
25
|
+
if @return_data.error == "You have been rate limited. Enhance your calm."
|
26
|
+
raise Pork::RateLimitExceeded
|
27
|
+
end
|
28
|
+
@tweets_pulled = @return_data.results
|
23
29
|
@tweets_pulled.each do |tweet|
|
24
30
|
tweet.status_id = tweet.id
|
25
|
-
@db_ids_created << $SAVER.save(tweet, &TWEET_SAVE).id
|
31
|
+
# @db_ids_created << $SAVER.save(tweet, &TWEET_SAVE).id
|
26
32
|
# $CRAWLER.append(tweet.from_user) if @collect_users
|
27
33
|
@current_count += 1
|
28
34
|
if reached_desired_count?
|
29
35
|
break
|
30
36
|
end
|
31
37
|
end
|
38
|
+
|
32
39
|
if reached_desired_count? or @search_params.query[:max_id] == @tweets_pulled.last.id
|
33
40
|
break
|
34
41
|
else
|
35
42
|
@search_params.query[:max_id] = @tweets_pulled.last.id
|
36
43
|
end
|
44
|
+
manage_pull_rate(time_at_start)
|
37
45
|
end
|
38
46
|
rescue Twitter::Unavailable
|
39
47
|
if $PORK_LOG
|
@@ -50,7 +58,8 @@ module Pork
|
|
50
58
|
if $PORK_LOG
|
51
59
|
$PORK_LOG.write("Error: JSON Parsing error, trying to skip past problem tweet")
|
52
60
|
end
|
53
|
-
@search_params.query[:max_id] -= 1000
|
61
|
+
@search_params.query[:max_id] -= 1000 if @search_params.query[:max_id]
|
62
|
+
manage_pull_rate
|
54
63
|
retry
|
55
64
|
rescue Errno::ETIMEDOUT
|
56
65
|
if $PORK_LOG
|
@@ -64,10 +73,13 @@ module Pork
|
|
64
73
|
end
|
65
74
|
sleep 30
|
66
75
|
retry
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
76
|
+
rescue Pork::RateLimitExceeded
|
77
|
+
if $PORK_LOG
|
78
|
+
$PORK_LOG.write("ERROR: Rate limit exceeded; holding off for a bit then trying again")
|
79
|
+
end
|
80
|
+
sleep 300
|
81
|
+
reduce_pull_rate
|
82
|
+
retry
|
71
83
|
end
|
72
84
|
return true
|
73
85
|
end
|
@@ -80,5 +92,22 @@ module Pork
|
|
80
92
|
end
|
81
93
|
end
|
82
94
|
|
95
|
+
def manage_pull_rate(time_at_start)
|
96
|
+
desired_pause = 1.0 / (@pulls_per_hour / 60.0 / 60.0)
|
97
|
+
pull_duration = Time.now - time_at_start
|
98
|
+
if desired_pause - pull_duration > 0
|
99
|
+
actual_pause = desired_pause - pull_duration
|
100
|
+
else
|
101
|
+
actual_pause = 0
|
102
|
+
end
|
103
|
+
sleep actual_pause
|
104
|
+
end
|
105
|
+
|
106
|
+
def reduce_pull_rate
|
107
|
+
if @pulls_per_hour > 100
|
108
|
+
@pulls_per_hour -= 100
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
83
112
|
end
|
84
113
|
end
|
data/lib/pork_sandwich.rb
CHANGED