pork_sandwich 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +0 -0
- data/Rakefile +79 -0
- data/VERSION +1 -0
- data/generators/pork_sandwich_migration/pork_sandwich_migration_generator.rb +7 -0
- data/generators/pork_sandwich_migration/templates/pork_sandwich_migration.rb +147 -0
- data/lib/pork/auth.rb +11 -0
- data/lib/pork/config.rb +3 -0
- data/lib/pork/crawler.rb +91 -0
- data/lib/pork/log.rb +16 -0
- data/lib/pork/puller.rb +141 -0
- data/lib/pork/reaction_processor.rb +140 -0
- data/lib/pork/saver.rb +211 -0
- data/lib/pork/search.rb +69 -0
- data/lib/pork/twitter_user.rb +83 -0
- data/lib/pork.rb +28 -0
- data/test/auth_test.rb +16 -0
- data/test/crawler_test.rb +52 -0
- data/test/factories.rb +56 -0
- data/test/fakewebs.rb +66 -0
- data/test/log_test.rb +14 -0
- data/test/puller_test.rb +51 -0
- data/test/reaction_processor_test.rb +46 -0
- data/test/saver_test.rb +110 -0
- data/test/schema.rb +120 -0
- data/test/search_test.rb +50 -0
- data/test/test_helper.rb +32 -0
- data/test/twitter_user_test.rb +66 -0
- metadata +100 -0
data/README
ADDED
File without changes
|
data/Rakefile
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'rake/testtask'
|
2
|
+
require 'active_record'
|
3
|
+
require 'rcov'
|
4
|
+
require 'metric_fu'
|
5
|
+
require 'acts-as-taggable-on'
|
6
|
+
require 'twitter'
|
7
|
+
|
8
|
+
task :default => :test
|
9
|
+
|
10
|
+
Rake::TestTask.new(:test) do |test|
|
11
|
+
test.libs << 'lib' << 'test'
|
12
|
+
test.pattern = 'test/**/*_test.rb'
|
13
|
+
test.verbose = true
|
14
|
+
end
|
15
|
+
|
16
|
+
desc "Migrate the database through scripts in db/migrate. Target specific version with VERSION=x"
|
17
|
+
task :migrate => :connect do
|
18
|
+
ActiveRecord::Migrator.migrate('db/migrate', ENV["VERSION"] ? ENV["VERSION"].to_i : nil )
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
desc "Loads the database environment"
|
23
|
+
task :environment => :connect do
|
24
|
+
ActiveRecord::Base.send :include, ActiveRecord::Acts::TaggableOn
|
25
|
+
ActiveRecord::Base.send :include, ActiveRecord::Acts::Tagger
|
26
|
+
|
27
|
+
require 'lib/pork'
|
28
|
+
end
|
29
|
+
|
30
|
+
task :connect do
|
31
|
+
ActiveRecord::Base.establish_connection(YAML::load(File.open("config/database.yml"))['production'])
|
32
|
+
ActiveRecord::Base.logger = Logger.new(File.open('database.log', 'a'))
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
MetricFu::Configuration.run do |config|
|
37
|
+
#define which metrics you want to use
|
38
|
+
#eliminated "stats" because it's specific to rails
|
39
|
+
config.metrics = [:churn, :saikuro, :flog, :flay, :reek, :roodi, :rcov]
|
40
|
+
config.graphs = [:flog, :flay, :reek, :roodi, :rcov]
|
41
|
+
config.flay = { :dirs_to_flay => ['lib'] }
|
42
|
+
config.flog = { :dirs_to_flog => ['lib'] }
|
43
|
+
config.reek = { :dirs_to_reek => ['lib'] }
|
44
|
+
config.roodi = { :dirs_to_roodi => ['lib'] }
|
45
|
+
config.saikuro = { :output_directory => 'scratch_directory/saikuro',
|
46
|
+
:input_directory => ['lib'],
|
47
|
+
:cyclo => "",
|
48
|
+
:filter_cyclo => "0",
|
49
|
+
:warn_cyclo => "5",
|
50
|
+
:error_cyclo => "7",
|
51
|
+
:formater => "text"}
|
52
|
+
config.churn = { :start_date => "1 year ago", :minimum_churn_count => 10}
|
53
|
+
config.rcov = { :test_files => ['test/*_test.rb'],
|
54
|
+
:rcov_opts => ["--sort coverage",
|
55
|
+
"--no-html",
|
56
|
+
"--text-coverage",
|
57
|
+
"--no-color",
|
58
|
+
"--profile"]}
|
59
|
+
end
|
60
|
+
|
61
|
+
begin
|
62
|
+
require 'jeweler'
|
63
|
+
Jeweler::Tasks.new do |gemspec|
|
64
|
+
gemspec.name = "pork_sandwich"
|
65
|
+
gemspec.summary = "A tool for pulling and storing delicious, delicious Twitter data"
|
66
|
+
gemspec.description = "Ideal for pulling Twitter search tweets, tweets from a twitter account, twitter account info, twitter relationship data, and trends. All data is stored in a handy schema for easy access."
|
67
|
+
gemspec.email = "sam.o.gilbert@gmail.com"
|
68
|
+
gemspec.homepage = "http://github.com/sam1vp/pork_sandwich"
|
69
|
+
gemspec.authors = ["Sam Gilbert", "Evan Burchard"]
|
70
|
+
gemspec.add_dependency('acts-as-taggable-on', '>= 1.0.12')
|
71
|
+
gemspec.add_dependency('twitter', '>= 0.7.9')
|
72
|
+
gemspec.files = FileList['lib/pork/*.rb', 'lib/pork.rb', 'lib/table_classes/*.rb', 'generators/pork_sandwich_migration/*.rb', 'generators/pork_sandwich_migration/templates/*.rb', 'Rakefile', 'README', 'VERSION' ]
|
73
|
+
gemspec.test_files = ['test/*.rb']
|
74
|
+
end
|
75
|
+
Jeweler::GemcutterTasks.new
|
76
|
+
rescue LoadError
|
77
|
+
puts "Jeweler not available. Install it with: gem install jeweler"
|
78
|
+
|
79
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,147 @@
|
|
1
|
+
class PorkSandwichMigration < ActiveRecord::Migration
|
2
|
+
self.up
|
3
|
+
create_table :twitter_accounts do |t|
|
4
|
+
t.integer :twitter_id, :limit => 8
|
5
|
+
t.integer :twitter_id_for_search, :limit => 8
|
6
|
+
t.string :screen_name
|
7
|
+
t.string :name
|
8
|
+
t.string :location
|
9
|
+
t.datetime :time_of_user_creation
|
10
|
+
t.text :description
|
11
|
+
t.string :profile_image_url
|
12
|
+
t.string :url
|
13
|
+
t.boolean :protected
|
14
|
+
t.integer :followers_count
|
15
|
+
t.string :profile_background_color
|
16
|
+
t.string :profile_text_color
|
17
|
+
t.string :profile_link_color
|
18
|
+
t.string :profile_sidebar_fill_color
|
19
|
+
t.string :profile_sidebar_border_color
|
20
|
+
t.integer :friends_count
|
21
|
+
t.integer :favourites_count
|
22
|
+
t.integer :utc_offset
|
23
|
+
t.string :time_zone
|
24
|
+
t.string :profile_background_image_url
|
25
|
+
t.boolean :profile_background_tile
|
26
|
+
t.integer :statuses_count
|
27
|
+
t.boolean :notifications
|
28
|
+
t.boolean :verified
|
29
|
+
t.timestamps
|
30
|
+
end
|
31
|
+
|
32
|
+
create_table :tweets do |t|
|
33
|
+
t.string :text, :null => false
|
34
|
+
t.datetime :time_of_tweet
|
35
|
+
t.string :from_user, :null => false
|
36
|
+
t.integer :to_user_id
|
37
|
+
t.integer :to_user_id_search
|
38
|
+
t.string :to_user
|
39
|
+
t.integer :status_id, :null => false, :limit => 8
|
40
|
+
t.integer :from_user_id_search, :limit => 8
|
41
|
+
t.string :iso_language_code
|
42
|
+
t.string :source
|
43
|
+
t.string :profile_image_url
|
44
|
+
t.integer :twitter_account_id, :limit => 8
|
45
|
+
t.boolean :truncated
|
46
|
+
t.timestamps
|
47
|
+
end
|
48
|
+
|
49
|
+
create_table :trends do |t|
|
50
|
+
t.string :topic
|
51
|
+
t.timestamps
|
52
|
+
end
|
53
|
+
|
54
|
+
create_table :tags do |t|
|
55
|
+
t.column :name, :string
|
56
|
+
end
|
57
|
+
|
58
|
+
create_table :taggings do |t|
|
59
|
+
t.column :tag_id, :integer
|
60
|
+
t.column :taggable_id, :integer
|
61
|
+
t.column :tagger_id, :integer
|
62
|
+
t.column :tagger_type, :string
|
63
|
+
t.column :taggable_type, :string
|
64
|
+
t.column :context, :string
|
65
|
+
t.column :created_at, :datetime
|
66
|
+
end
|
67
|
+
|
68
|
+
create_table :twitter_relationships do |t|
|
69
|
+
t.integer :follower_id, :null => false, :dependent => :destroy, :limit => 8
|
70
|
+
t.integer :friend_id, :null => false, :dependent => :destroy, :limit => 8
|
71
|
+
t.boolean :current
|
72
|
+
t.boolean :complete_follower_set
|
73
|
+
t.boolean :complete_friend_set
|
74
|
+
t.timestamps
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
create_table :trends_tweets, :id => false do |t|
|
79
|
+
t.integer :tweet_id, :null => false, :dependent => :destroy, :limit => 8
|
80
|
+
t.integer :trend_id, :null => false, :dependent => :destroy, :limit => 8
|
81
|
+
end
|
82
|
+
|
83
|
+
create_table :trends_twitter_accounts, :id => false do |t|
|
84
|
+
t.integer :trend_id, :null => false, :dependent => :destroy, :limit => 8
|
85
|
+
t.integer :twitter_account_id, :null => false, :dependent => :destroy, :limit => 8
|
86
|
+
end
|
87
|
+
|
88
|
+
create_table :reactions do |t|
|
89
|
+
t.string :reaction_type, :limit => 20
|
90
|
+
t.float :value
|
91
|
+
end
|
92
|
+
|
93
|
+
create_table :tweet_reactions do |t|
|
94
|
+
t.integer :initiator_id, :null => false, :dependent => :destroy, :limit => 8
|
95
|
+
t.integer :responder_id, :null => false, :dependent => :destroy, :limit => 8
|
96
|
+
t.integer :tweet_id, :null => false, :dependent => :destroy, :limit => 8
|
97
|
+
t.references :reaction
|
98
|
+
t.boolean :current
|
99
|
+
t.timestamps
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
execute "alter table tweets " +
|
104
|
+
"alter column time_of_tweet type timestamp with time zone;"
|
105
|
+
execute "alter table tweets " +
|
106
|
+
"alter column created_at type timestamp with time zone;"
|
107
|
+
execute "alter table tweets " +
|
108
|
+
"alter column updated_at type timestamp with time zone;"
|
109
|
+
execute "alter table twitter_accounts " +
|
110
|
+
"alter column time_of_user_creation type timestamp with time zone;"
|
111
|
+
execute "alter table twitter_accounts " +
|
112
|
+
"alter column created_at type timestamp with time zone;"
|
113
|
+
execute "alter table twitter_accounts " +
|
114
|
+
"alter column updated_at type timestamp with time zone;"
|
115
|
+
execute "alter table taggings " +
|
116
|
+
"alter column created_at type timestamp with time zone;"
|
117
|
+
execute "alter table trends " +
|
118
|
+
"alter column created_at type timestamp with time zone;"
|
119
|
+
execute "alter table trends " +
|
120
|
+
"alter column updated_at type timestamp with time zone;"
|
121
|
+
execute "alter table tweet_reactions " +
|
122
|
+
"alter column created_at type timestamp with time zone;"
|
123
|
+
execute "alter table tweet_reactions " +
|
124
|
+
"alter column updated_at type timestamp with time zone;"
|
125
|
+
|
126
|
+
reaction_types = ['retweet', 'mention', 'reply']
|
127
|
+
reaction_types.each do |r|
|
128
|
+
unless Reaction.find_by_reaction_type(r)
|
129
|
+
Reaction.create(:reaction_type => r)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
def self.down
|
136
|
+
drop_table :twitter_accounts
|
137
|
+
drop_table :tweets
|
138
|
+
drop_table :trends
|
139
|
+
drop_table :tags
|
140
|
+
drop_table :taggings
|
141
|
+
drop_table :twitter_relationships
|
142
|
+
drop_table :trends_tweets
|
143
|
+
drop_table :trends_twitter_accounts
|
144
|
+
drop_table :reactions
|
145
|
+
drop_table :tweet_reactions
|
146
|
+
end
|
147
|
+
end
|
data/lib/pork/auth.rb
ADDED
data/lib/pork/config.rb
ADDED
data/lib/pork/crawler.rb
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
module Pork
|
2
|
+
class Crawler
|
3
|
+
attr_accessor :users, :depth, :crawl_type
|
4
|
+
def initialize(users = [], depth = 0, crawl_type = nil, count = nil)
|
5
|
+
@depth = depth
|
6
|
+
@crawl_type = crawl_type_str_to_proc(crawl_type)
|
7
|
+
@users = users
|
8
|
+
@count = count
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
def crawl_type_str_to_proc(str)
|
13
|
+
proc_dict = {'followers' => FOLLOWERS_CRAWL, 'friends' => FRIENDS_CRAWL, 'follower_ids' => FOLLOWER_IDS_CRAWL, 'friend_ids' => FRIEND_IDS_CRAWL}
|
14
|
+
proc_dict[str]
|
15
|
+
end
|
16
|
+
# crawl type = RT_TO_USER_CRAWL, RT_FROM_USER_CRAWL, 'mention_to_user' 'mention_from_user', 'reply_to_user', 'reply_from_user',
|
17
|
+
|
18
|
+
def crawl(search_query = nil)
|
19
|
+
unless @users.empty?
|
20
|
+
while @depth > 0
|
21
|
+
@users.dup.each do |user|
|
22
|
+
unless user.crawled?
|
23
|
+
@crawl_type.call(user, search_query, @count)
|
24
|
+
@users.each do |u|
|
25
|
+
if u.search == user
|
26
|
+
u.crawled = true
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
@depth -= 1
|
32
|
+
end
|
33
|
+
else
|
34
|
+
@crawl_type.call(nil,search_query, @count)
|
35
|
+
end
|
36
|
+
@users # .collect { |u| u.search }
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
def append(user, user_info = nil)
|
41
|
+
user_search_args = @users.collect do |u|
|
42
|
+
u.search
|
43
|
+
end
|
44
|
+
unless user_search_args.include?(user)
|
45
|
+
if user.class == String
|
46
|
+
@user = Pork::TwitterUser.new(:crawled => false, :twitter_screen_name => user)
|
47
|
+
else
|
48
|
+
@user = Pork::TwitterUser.new(:crawled => false, :twitter_id => user)
|
49
|
+
end
|
50
|
+
if user_info
|
51
|
+
@user.user_info = user_info
|
52
|
+
end
|
53
|
+
|
54
|
+
@users << @user
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
SEARCH_CRAWL = lambda do |user, search_query, count|
|
61
|
+
$LOG.info "SEARCH CRAWL"
|
62
|
+
$TWITERATOR.twiterate({:collect_users => true}, {:search_query => search_query}, &SEARCH_ITER)
|
63
|
+
# @users.keys
|
64
|
+
search_query
|
65
|
+
end
|
66
|
+
|
67
|
+
FOLLOWER_IDS_CRAWL = lambda do |user, search_query, count|
|
68
|
+
$LOG.info "FOLLOWER IDS CRAWL"
|
69
|
+
$PULLER.pull({:user_id => user, :collect_users => true}, &FOLLOWER_IDS_PULL)
|
70
|
+
end
|
71
|
+
|
72
|
+
FRIEND_IDS_CRAWL = lambda do |user, search_query, count|
|
73
|
+
$LOG.info "FRIEND IDS CRAWL"
|
74
|
+
$PULLER.pull({:user_id => user, :collect_users => true}, &FRIEND_IDS_PULL)
|
75
|
+
end
|
76
|
+
|
77
|
+
FOLLOWERS_CRAWL = lambda do |user, search_query, count|
|
78
|
+
$LOG.info "FOLLOWERS CRAWL"
|
79
|
+
if not user.db_object
|
80
|
+
user.db_object = $PULLER.pull({:user => user}, &USER_PULL)
|
81
|
+
end
|
82
|
+
$TWITERATOR.twiterate({:count => count}, {:collect_users => true, :user => user}, &FOLLOWERS_ITER)
|
83
|
+
end
|
84
|
+
|
85
|
+
FRIENDS_CRAWL = lambda do |user, search_query, count|
|
86
|
+
$LOG.info "FRIENDS CRAWL"
|
87
|
+
if not user.db_object
|
88
|
+
user.db_object = $PULLER.pull({:user => user}, &USER_PULL)
|
89
|
+
end
|
90
|
+
$TWITERATOR.twiterate({ :count => count}, {:collect_users => true, :user => user}, &FRIENDS_ITER)
|
91
|
+
end
|
data/lib/pork/log.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
module Pork
|
2
|
+
class Log
|
3
|
+
attr_accessor :log, :researcher, :project, :output
|
4
|
+
|
5
|
+
def initialize(output, options = {})
|
6
|
+
@log = Logger.new(output)
|
7
|
+
@log.level = "info"
|
8
|
+
@researcher = options[:researcher] if options[:researcher]
|
9
|
+
@project = options[:project] if options[:project]
|
10
|
+
end
|
11
|
+
|
12
|
+
def write(message)
|
13
|
+
@log.info("#{@project}, #{@researcher}, #{Time.now}: " + message)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/pork/puller.rb
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
module Pork
|
2
|
+
class Puller
|
3
|
+
attr_accessor :auth_object
|
4
|
+
|
5
|
+
def initialize(auth_object = nil)
|
6
|
+
@auth_object = auth_object
|
7
|
+
end
|
8
|
+
|
9
|
+
def pull(user_object, &pull_type)
|
10
|
+
@user = user_object
|
11
|
+
begin
|
12
|
+
pull_type.call(@user, @auth_object)
|
13
|
+
# rescue Twitter::Unauthorized
|
14
|
+
rescue Twitter::Unavailable
|
15
|
+
$LOG.error "ERROR: Twitter unavailable, trying in 60"
|
16
|
+
sleep 60
|
17
|
+
retry
|
18
|
+
rescue Twitter::NotFound
|
19
|
+
$LOG.error "ERROR: Info target not found, trying to skip"
|
20
|
+
# rescue Crack::ParseError
|
21
|
+
# raise Crack::ParseError
|
22
|
+
rescue Errno::ETIMEDOUT
|
23
|
+
$LOG.error "ERROR: Puller timed out, retrying in 10"
|
24
|
+
sleep 10
|
25
|
+
retry
|
26
|
+
rescue Twitter::InformTwitter
|
27
|
+
$LOG.error "ERROR: Twitter internal error, retrying in 30"
|
28
|
+
sleep 30
|
29
|
+
retry
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
|
39
|
+
ACCOUNT_INFO = lambda do |user, auth_object|
|
40
|
+
$LOG.info "USER PULL"
|
41
|
+
@pull_data = auth_object.user(user.search)
|
42
|
+
{:pull_data => @pull_data, :db_object => $SAVER.save(@pull_data, &TWITTER_ACCOUNT_SAVE)}
|
43
|
+
end
|
44
|
+
|
45
|
+
FOLLOWERS = lambda do |user, auth_object|
|
46
|
+
user.pull_account_info
|
47
|
+
rules = {:user_id => user.twitter_id}
|
48
|
+
follower_relationship_db_ids = []
|
49
|
+
# unless user.desired_follower_count
|
50
|
+
# $SAVER.rules[:complete_follower_set] = true
|
51
|
+
# end
|
52
|
+
loop do
|
53
|
+
rules[:cursor] = -1 if !rules[:cursor]
|
54
|
+
@pull_data = auth_object.followers(rules)
|
55
|
+
@pull_data.users.each do |follower_mash|
|
56
|
+
db_user_object = $SAVER.save(follower_mash, &TWITTER_ACCOUNT_SAVE)
|
57
|
+
follower_relationship_db_ids << $SAVER.save({:friend => user, :follower => Pork::TwitterUser.new(:twitter_id => follower_mash.id, :twitter_screen_name => follower_mash.screen_name, :db_object => db_user_object)}, &RELATIONSHIP_SAVE).id
|
58
|
+
end
|
59
|
+
if @pull_data.next_cursor == 0
|
60
|
+
break
|
61
|
+
else
|
62
|
+
rules[:cursor] = @pull_data.next_cursor
|
63
|
+
end
|
64
|
+
end
|
65
|
+
{:follower_relationship_db_ids => follower_relationship_db_ids}
|
66
|
+
end
|
67
|
+
|
68
|
+
FOLLOWER_IDS = lambda do |user, auth_object|
|
69
|
+
user.pull_account_info
|
70
|
+
# rules[:user_id] = user.twitter_id
|
71
|
+
$SAVER.rules[:complete_follower_set] = true
|
72
|
+
follower_relationship_db_ids = []
|
73
|
+
@pull_data = auth_object.follower_ids({:user_id => user.twitter_id})
|
74
|
+
@pull_data.each do |user_id|
|
75
|
+
db_user_object = $SAVER.save(Pork::TwitterUser.new(:twitter_id => user_id), &TWITTER_ACCOUNT_SAVE)
|
76
|
+
follower_relationship_db_ids << $SAVER.save({:friend => user, :follower => Pork::TwitterUser.new(:twitter_id => user_id, :db_object => db_user_object)}, &RELATIONSHIP_SAVE)
|
77
|
+
end
|
78
|
+
$SAVER.rules[:complete_follower_set] = false
|
79
|
+
{:follower_relationship_db_ids => follower_relationship_db_ids}
|
80
|
+
end
|
81
|
+
|
82
|
+
FRIENDS = lambda do |user, auth_object|
|
83
|
+
user.pull_account_info
|
84
|
+
rules = {:user_id => user.twitter_id}
|
85
|
+
friend_relationship_db_ids = []
|
86
|
+
# unless user.desired_friend_count
|
87
|
+
# $SAVER.rules[:complete_friend_set] = true
|
88
|
+
# end
|
89
|
+
loop do
|
90
|
+
rules[:cursor] = -1 if !rules[:cursor]
|
91
|
+
@pull_data = auth_object.friends(rules)
|
92
|
+
@pull_data.users.each do |friend_mash|
|
93
|
+
db_user_object = $SAVER.save(friend_mash, &TWITTER_ACCOUNT_SAVE)
|
94
|
+
friend_relationship_db_ids << $SAVER.save({:friend => Pork::TwitterUser.new(:twitter_id => friend_mash.id, :twitter_screen_name => friend_mash.screen_name, :db_object => db_user_object), :follower => user}, &RELATIONSHIP_SAVE).id
|
95
|
+
end
|
96
|
+
if @pull_data.next_cursor == 0
|
97
|
+
break
|
98
|
+
else
|
99
|
+
rules[:cursor] = @pull_data.next_cursor
|
100
|
+
end
|
101
|
+
end
|
102
|
+
{:friend_relationship_db_ids => friend_relationship_db_ids}
|
103
|
+
end
|
104
|
+
|
105
|
+
FRIEND_IDS = lambda do |user, auth_object|
|
106
|
+
user.pull_account_info
|
107
|
+
# rules[:user_id] = user.twitter_id
|
108
|
+
$SAVER.rules[:complete_friend_set] = true
|
109
|
+
friend_relationship_db_ids = []
|
110
|
+
@pull_data = auth_object.friend_ids({:user_id => user.twitter_id})
|
111
|
+
@pull_data.each do |user_id|
|
112
|
+
db_user_object = $SAVER.save(Pork::TwitterUser.new(:twitter_id => user_id), &TWITTER_ACCOUNT_SAVE)
|
113
|
+
friend_relationship_db_ids << $SAVER.save({:follower => user, :friend => Pork::TwitterUser.new(:twitter_id => user_id, :db_object => db_user_object)}, &RELATIONSHIP_SAVE)
|
114
|
+
end
|
115
|
+
$SAVER.rules[:complete_friend_set] = false
|
116
|
+
{:friend_relationship_db_ids => friend_relationship_db_ids}
|
117
|
+
end
|
118
|
+
|
119
|
+
TWEETS = lambda do |user, auth_object|
|
120
|
+
$LOG.info "USER TWEETS PULL"
|
121
|
+
rules = {:count => 200}
|
122
|
+
if user.twitter_id
|
123
|
+
rules[:user_id] = user.twitter_id
|
124
|
+
else
|
125
|
+
rules[:screen_name] = user.screen_name
|
126
|
+
end
|
127
|
+
@tweet_db_ids = []
|
128
|
+
@pull_data = auth_object.user_timeline(rules)
|
129
|
+
@pull_data.each do |result|
|
130
|
+
@tweet_db_ids << $SAVER.save(result, &USER_TWEET_SAVE).id
|
131
|
+
end
|
132
|
+
# rules[:reactions] ? $REACTION_PROCESSOR.process_reactions(@tweet_db_objects) : nil
|
133
|
+
{:db_ids => @tweet_db_ids}
|
134
|
+
end
|
135
|
+
|
136
|
+
TRENDS_PULL = lambda do |rules, auth_object|
|
137
|
+
$LOG.info "TRENDS PULL"
|
138
|
+
Twitter::Trends.current().each do |trend|
|
139
|
+
$SAVER.save({:name => trend.name, :query => trend.query}, &TREND_SAVE)
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,140 @@
|
|
1
|
+
module Pork
|
2
|
+
class ReactionProcessor
|
3
|
+
attr_accessor :rules, :user
|
4
|
+
def initialize(user, rules = {:pull_secondary_influence_user_info => false})
|
5
|
+
@rules = rules
|
6
|
+
@user = user
|
7
|
+
fill_in_user_info
|
8
|
+
end
|
9
|
+
|
10
|
+
def process_reactions()
|
11
|
+
if rules[:mention_from] or rules[:reply_from] or rules[:rt_from]
|
12
|
+
if user.twitter_id
|
13
|
+
tweet_ids = $TWITERATOR.twiterate({}, {:user_id => user.twitter_id}, &USER_TWEETS_ITER)[:tweets]
|
14
|
+
else
|
15
|
+
tweet_ids = $TWITERATOR.twiterate({}, {:screen_name => user.twitter_screen_name}, &USER_TWEETS_ITER)[:tweets]
|
16
|
+
end
|
17
|
+
if rules[:rt_from]
|
18
|
+
$TWITERATOR.twiterate({}, {:from => user.twitter_screen_name, :search_query => "RT"}, &SEARCH_ITER)[:tweets].each do |tweet_id|
|
19
|
+
tweet_ids << tweet_id
|
20
|
+
end
|
21
|
+
end
|
22
|
+
tweets_from_tweet_ids(tweet_ids).each do |tweet|
|
23
|
+
influentials = parse_tweet_for_influentials(tweet)
|
24
|
+
if rules[:mention_from]
|
25
|
+
influentials[:mention_screen_names].each do |screen_name|
|
26
|
+
save_from(screen_name, tweet, 'mention')
|
27
|
+
end
|
28
|
+
end
|
29
|
+
if rules[:reply_from]
|
30
|
+
save_from(influentials[:reply_screen_name], tweet, 'reply')
|
31
|
+
end
|
32
|
+
if rules[:rt_from]
|
33
|
+
influentials[:rt_screen_names].each do |screen_name|
|
34
|
+
save_from(screen_name, tweet, 'retweet')
|
35
|
+
end
|
36
|
+
pull_and_save_rts_from
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
if rules[:mention_to] or rules[:reply_to] or rules[:rt_to]
|
41
|
+
tweet_ids = []
|
42
|
+
$TWITERATOR.twiterate({}, {:search_query => "#{user.twitter_screen_name}"}, &SEARCH_ITER)[:tweets].each do |tweet_id|
|
43
|
+
tweet_ids << tweet_id
|
44
|
+
end
|
45
|
+
tweets_from_tweet_ids(tweet_ids).each do |tweet|
|
46
|
+
source_user_screen_name = tweet.from_user
|
47
|
+
influentials = parse_tweet_for_influentials(tweet)
|
48
|
+
if rules[:mention_to]
|
49
|
+
influentials[:mention_screen_names].each do |screen_name|
|
50
|
+
if screen_name == user.twitter_screen_name
|
51
|
+
save_to(source_user_screen_name, tweet, 'mention')
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
if rules[:reply_to]
|
56
|
+
if influentials[:reply_screen_name] == user.twitter_screen_name
|
57
|
+
save_to(source_user_screen_name, tweet, 'reply')
|
58
|
+
end
|
59
|
+
end
|
60
|
+
if rules[:rt_to]
|
61
|
+
influentials[:rt_screen_names].each do |screen_name|
|
62
|
+
if screen_name == user.twitter_screen_name
|
63
|
+
save_to(source_user_screen_name, tweet, 'retweet')
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
def save_from(secondary_screen_name, tweet, type)
|
73
|
+
$SAVER.save({:initiator => user.db_object, :responder => find_or_create_secondary_account(secondary_screen_name), :tweet => tweet, :type => type}, &REACTION_SAVE)
|
74
|
+
end
|
75
|
+
|
76
|
+
def save_to(secondary_screen_name, tweet, type)
|
77
|
+
$SAVER.save({:initiator => find_or_create_secondary_account(secondary_screen_name), :responder => user.db_object, :tweet => tweet, :type => type}, &REACTION_SAVE)
|
78
|
+
end
|
79
|
+
|
80
|
+
def tweets_from_tweet_ids(tweet_ids)
|
81
|
+
tweets = tweet_ids.map do |tweet_id| Tweet.find(tweet_id) end
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
|
86
|
+
def parse_tweet_for_influentials(tweet)
|
87
|
+
mention_regex = /[@]\w+/
|
88
|
+
reply_regex = /^[@]\w+/
|
89
|
+
rt_regex = /(^[Rr][Tt] ?[@]\w+(:?)| [Rr][Tt] ?[@]\w+(:?))/
|
90
|
+
t_copy = tweet.text.dup
|
91
|
+
reply_screen_name = t_copy.scan(reply_regex)
|
92
|
+
reply_screen_name.each do |ru|
|
93
|
+
t_copy.slice!(/^#{ru}/)
|
94
|
+
end
|
95
|
+
rt_screen_names = t_copy.scan(rt_regex)
|
96
|
+
unless rt_screen_names.empty?
|
97
|
+
rt_screen_names.map! do |rt| rt.first end
|
98
|
+
end
|
99
|
+
rt_screen_names.each do |rtu|
|
100
|
+
t_copy.slice!(/^#{rtu}| #{rtu}/)
|
101
|
+
end
|
102
|
+
mention_screen_names = t_copy.scan(mention_regex)
|
103
|
+
sanitize_screen_names(reply_screen_name.first, rt_screen_names, mention_screen_names)
|
104
|
+
end
|
105
|
+
|
106
|
+
def sanitize_screen_names(reply_screen_name, rt_screen_names, mention_screen_names)
|
107
|
+
sanitizing_regex = / ?([rR][tT])? ?@/
|
108
|
+
reply_screen_name ? reply_screen_name.slice!(sanitizing_regex) : nil
|
109
|
+
rt_screen_names.each do |rtu|
|
110
|
+
rtu.slice!(sanitizing_regex)
|
111
|
+
end
|
112
|
+
mention_screen_names.each do |mu|
|
113
|
+
mu.slice!(sanitizing_regex)
|
114
|
+
end
|
115
|
+
{:reply_screen_name => reply_screen_name, :rt_screen_names => rt_screen_names, :mention_screen_names => mention_screen_names}
|
116
|
+
end
|
117
|
+
|
118
|
+
def fill_in_user_info
|
119
|
+
if not user.db_object
|
120
|
+
user.db_object = $PULLER.pull({:user=>user}, &USER_PULL)[:db_object]
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def find_or_create_secondary_account(screen_name)
|
125
|
+
account = TwitterAccount.find_by_screen_name(screen_name)
|
126
|
+
unless account
|
127
|
+
if rules[:pull_secondary_influence_user_info]
|
128
|
+
account = $PULLER.pull({:user => Pork::TwitterUser.new(:twitter_screen_name => screen_name)})[:db_object]
|
129
|
+
else
|
130
|
+
account = TwitterAccount.create(:screen_name => screen_name)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
account
|
134
|
+
end
|
135
|
+
|
136
|
+
def pull_and_save_rts_from
|
137
|
+
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|