wuclan 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/LICENSE.textile +20 -0
  2. data/README.textile +28 -0
  3. data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
  4. data/examples/analyze/strong_links/main.rb +51 -0
  5. data/examples/analyze/word_count/dump_schema.rb +13 -0
  6. data/examples/analyze/word_count/freq_user.rb +31 -0
  7. data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
  8. data/examples/analyze/word_count/word_count.pig +43 -0
  9. data/examples/analyze/word_count/word_count.rb +34 -0
  10. data/examples/lastfm/scrape/load_lastfm.rb +31 -0
  11. data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
  12. data/examples/lastfm/scrape/seed.tsv +147 -0
  13. data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
  14. data/examples/twitter/old/scrape_twitter_api.rb +104 -0
  15. data/examples/twitter/old/scrape_twitter_search.rb +57 -0
  16. data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
  17. data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
  18. data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
  19. data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
  20. data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
  21. data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
  22. data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
  23. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
  24. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
  25. data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
  26. data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
  27. data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
  28. data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
  29. data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
  30. data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
  31. data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
  32. data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
  33. data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
  34. data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
  35. data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
  36. data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
  37. data/lib/old/twitter_api.rb +88 -0
  38. data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
  39. data/lib/wuclan/delicious/delicious_models.rb +26 -0
  40. data/lib/wuclan/delicious/delicious_request.rb +65 -0
  41. data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
  42. data/lib/wuclan/friendster.rb +7 -0
  43. data/lib/wuclan/lastfm/model/base.rb +49 -0
  44. data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
  45. data/lib/wuclan/lastfm/scrape/base.rb +195 -0
  46. data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
  47. data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
  48. data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
  49. data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
  50. data/lib/wuclan/lastfm/scrape.rb +12 -0
  51. data/lib/wuclan/lastfm.rb +7 -0
  52. data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
  53. data/lib/wuclan/metrics/user_metrics.rb +443 -0
  54. data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
  55. data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
  56. data/lib/wuclan/metrics.rb +0 -0
  57. data/lib/wuclan/myspace.rb +21 -0
  58. data/lib/wuclan/open_social/model/base.rb +0 -0
  59. data/lib/wuclan/open_social/scrape/base.rb +111 -0
  60. data/lib/wuclan/open_social/scrape_request.rb +6 -0
  61. data/lib/wuclan/open_social.rb +0 -0
  62. data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
  63. data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
  64. data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
  65. data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
  66. data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
  67. data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
  68. data/lib/wuclan/twitter/api_response_examples.textile +300 -0
  69. data/lib/wuclan/twitter/model/base.rb +72 -0
  70. data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
  71. data/lib/wuclan/twitter/model/relationship.rb +176 -0
  72. data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
  73. data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
  74. data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
  75. data/lib/wuclan/twitter/model/text_element.rb +38 -0
  76. data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
  77. data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
  78. data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
  79. data/lib/wuclan/twitter/model/tweet.rb +74 -0
  80. data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
  81. data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
  82. data/lib/wuclan/twitter/model.rb +21 -0
  83. data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
  84. data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
  85. data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
  86. data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
  87. data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
  88. data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
  89. data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
  90. data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
  91. data/lib/wuclan/twitter/scrape/base.rb +97 -0
  92. data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
  93. data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
  94. data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
  95. data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
  96. data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
  97. data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
  98. data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
  99. data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
  100. data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
  101. data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
  102. data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
  103. data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
  104. data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
  105. data/lib/wuclan/twitter/scrape.rb +27 -0
  106. data/lib/wuclan/twitter.rb +7 -0
  107. data/lib/wuclan.rb +1 -0
  108. data/spec/spec_helper.rb +9 -0
  109. data/spec/wuclan_spec.rb +7 -0
  110. data/wuclan.gemspec +184 -0
  111. metadata +219 -0
@@ -0,0 +1,443 @@
1
+ #!/usr/bin/env ruby
2
+ require 'date'
3
+ # KLUDGE Executes only if run from command line
4
+ if __FILE__ == $0 then $: << File.dirname(__FILE__)+'/../..'; require 'wukong'; end
5
+
6
+ #
7
+ # Sampling Multiplier
8
+ #
9
+ TWEETS_SAMPLED_FRACTION = 2.62
10
+
11
+
12
+ require 'wukong/datatypes/enum'; include Wukong::Datatypes
13
+ class FoBin < Binned ; enumerates 0, 0, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10_000, 20_000, Infinity ;end
14
+ class FrBin < FoBin ; end
15
+ class NbhdSizeBin < FoBin ; end
16
+
17
+ def round1 f
18
+ (10 * f.to_f).round / 10.0
19
+ end
20
+ def octave3 i
21
+ 10.0 ** round1(i.to_f / 3.0)
22
+ end
23
+
24
+ class TwDayBin < Binned ;
25
+ MO = 30.4368499
26
+ WK = 7.0
27
+ enumerates(* (
28
+ [-Infinity, 1.0 / MO, 2.0 / MO, 1 / WK, ] +
29
+ (-2 .. 9).map{|i| octave3(i + 0.5) } +
30
+ [Infinity]))
31
+ self.write_inheritable_attribute :names, (
32
+ ['< 1/mo', ' 1-2/mo', ' 2/mo-1/wk', ' 1-2/wk', '~ 4/wk' ] +
33
+ [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000].map{|i| "~ %4d/day"%i } +
34
+ ['> 1500/day'])
35
+ end
36
+ class TwDayRecentBin < TwDayBin ; end
37
+
38
+ # < 1/mo 0.032854911177914
39
+ # 1-2/mo 0.065709822355828
40
+ # 2/mo-1/wk 0.142857142857143
41
+ # 1-2/wk 0.316227766016838
42
+ # ~ 4/wk 0.630957344480193
43
+ # ~ 1/day 1.58489319246111
44
+ # ~ 2/day 3.16227766016838
45
+ # ~ 5/day 6.30957344480193
46
+ # ~ 10/day 15.8489319246111
47
+ # ~ 20/day 31.6227766016838
48
+ # ~ 50/day 63.0957344480193
49
+ # ~ 100/day 158.489319246111
50
+ # ~ 200/day 316.227766016838
51
+ # ~ 500/day 630.957344480193
52
+ # ~ 1000/day 1584.89319246111
53
+ # > 1500/day Infinity
54
+
55
+
56
+ class TinyInt < Integer ; end
57
+ class NbhdBalBin < Binned
58
+ enumerates(* ([-Infinity] + ( (0..15).map{|n| (0.125+(n.to_f/20))} ) + [Infinity]) )
59
+ self.write_inheritable_attribute :names,
60
+ (['Few followers'] + (0..14).map{|n| (15+(5*n)) } + ['Mostly Followers'])
61
+ self.names[8] = 'Balanced'
62
+ end
63
+
64
+ #
65
+ #
66
+ #
67
+ module Wuclan
68
+ module Models
69
+ class BaseUserTweetMetrics < TypedStruct.new(
70
+ [:id, Integer],
71
+ [:tw_sampled, Integer],
72
+ [:last_tw_at, Bignum],
73
+ [:tw_recent, Integer]
74
+ )
75
+ end
76
+ class UserTweetMetrics < BaseUserTweetMetrics ; end
77
+ class UserTweetUrlMetrics < BaseUserTweetMetrics ; end
78
+ class UserHashtagMetrics < BaseUserTweetMetrics ; end
79
+
80
+ # 1 rsrc 2 id 3 screen_name 4protect 5 age 6duratio 7age_use 8age_las 9 fo
81
+ # 10fr 11 tw 12 fv 13 fr_fo 14 fo_week 15 fr_week 16 tw_day 17 fv_mo 18fo_sampled
82
+ # 19fr_sampled 20tw_sampled 21tw_rece 22tw_day_ 23at_in_s 24at_out_ 25rt_in_s
83
+ # 26rt_out_ 27fv_in_s 28fv_out_ 29any_wit 30any_out 31any_in_ 32at_in_w
84
+ # 33at_out_ 34rt_in_w 35rt_out_ 36fv_in_w 37fv_out_ 38at_tw_o 39rt_tw_o
85
+ # 40rt_at_o 41at_in_t 42rt_in_t 43rt_at_i 44 reach 45fo_cove 46fr_cove
86
+ # 47tw_cove 48fv_cove 49scrape_ 50scrape_ 51scrape_ 52 scraped_at
87
+ # 53part_scraped_at 54 created_at 55 last_tw_at
88
+
89
+ module StructToSQL
90
+ def to_sql_str
91
+ members.zip(mtypes, mnames).each do |attr, type, name|
92
+ type_str = case
93
+ when type <= String then 'VARCHAR(255) CHARACTER SET ASCII'
94
+ when type <= Enum then type.to_sql_str
95
+ else type.to_s.upcase
96
+ end
97
+ puts " %-23s\t%-23s\t-- %s" %["`#{attr}`", type_str+',', name]
98
+ end
99
+ [ UserMetrics.members.map{|attr| "`#{attr}`" }.join(", ") ]
100
+ end
101
+ end
102
+
103
+ class NamedTypedStruct < TypedStruct
104
+ def self.new *args
105
+ members, mtypes, mnames = args.transpose
106
+ thing = super *[members, mtypes].transpose
107
+ if mnames
108
+ thing.class_eval do
109
+ cattr_accessor :mnames
110
+ self.mnames = mnames
111
+ extend StructToSQL
112
+ end
113
+ end
114
+ thing
115
+ end
116
+
117
+ end
118
+
119
+
120
+ class UserMetrics < NamedTypedStruct.new(
121
+ [:id, Integer , "User ID" ],
122
+ [:screen_name, String , "Twitter Name" ],
123
+ # 4
124
+ [:created_on, Date , "Created On Date" ],
125
+ [:created_at, DateTime , "Created At Date-Time" ],
126
+ [:protected, Integer , "Protected?" ],
127
+ [:active, TinyInt , "Active?" ],
128
+ # 8
129
+ [:fo, Integer , "# Followers" ],
130
+ [:fr, Integer , "# Friends" ],
131
+ [:tw, Integer , "# Tweets Sent" ],
132
+ [:fv, Integer , "# Favorites Out" ],
133
+ # 12
134
+ [:fo_week, Float , "Followers accumulated / week", ], # x
135
+ [:fr_week, Float , "Friends accumulated / week", ], # x
136
+ [:tw_day, Float , "Tweets sent / day", ], # x
137
+ [:fv_mo, Float , "Favorites accumulated / month", ], # x
138
+ # 16
139
+ [:tw_recent, Integer , "Tweets sampled 2008 Dec 1", ], # t
140
+ [:tw_day_recent, Float , "Recent Tweets / day (*)", ], # t
141
+ [:last_tw_at, Date , "Last Tweet Date (*)", ], # t
142
+ [:last_tw_age, Integer , "Age of most recent tweet (*)", ], # x
143
+ # 20
144
+ [:at_in_sampled, Integer , "# @atsigns to (*)", ], # g
145
+ [:at_out_sampled, Integer , "# @atsigns from (*)" ], # g
146
+ [:rt_in_sampled, Integer , "# ReTweets of (*)", ], # g
147
+ [:rt_out_sampled, Integer , "# ReTweets by (*)" ], # g
148
+ [:fv_in_sampled, Integer , "# favorites of (*)", ], # g
149
+ [:fv_out_sampled, Integer , "# favorites by (*)", ], # c
150
+ # 26
151
+ [:any_with, Integer , "Users linked to or from", ], # A
152
+ [:any_in_with, Integer , "Distinct At+RT+Fv users in", ], # A
153
+ [:any_out_with, Integer , "Distinct At+RT+Fv users out", ], # A
154
+ # 29
155
+ [:at_in_with, Integer , "Users @atsigned in (*)", ], # g
156
+ [:at_out_with, Integer , "Users @atsigned out (*)" ], # g",
157
+ [:rt_in_with, Integer , "Users Retweeted in (*)", ], # g
158
+ [:rt_out_with, Integer , "Users Retweeted out (*)" ], # g
159
+ [:fv_in_with, Integer , "Users Favorited in (*)", ], # g
160
+ [:fv_out_with, Integer , "Users Favorited out (*)", ], # c
161
+ # 35
162
+ [:tw_sampled, Integer , "How many tweets have we sampled", ], # t
163
+ [:hashtag_sampled, Integer, "How many hashtags have we seen"],
164
+ [:tweet_url_sampled, Integer, "How many tweet_url's have we seen"],
165
+ [:at_tw_out, Float , "Atsigns out per tweet out (*)", ], # g
166
+ [:rt_tw_out, Float , "Retweets out per tweet out (*)", ], # g
167
+ [:rt_at_out, Float , "Retweets out per atsign out (*)", ], # g
168
+ [:at_in_tw_out, Float , "Atsigns in per tweet out (*)", ], # g
169
+ [:rt_in_tw_out, Float , "Retweets in per tweet out (*)", ], # g
170
+ [:rt_at_in, Float , "Retweets in per atsign in (*)", ], # g
171
+ # 40
172
+ [:nbhd_bal, Float , "Neighborhood Balance", ], #
173
+ [:nbhd_size, Integer , "Neighborhood Size", ], #
174
+ [:reach, Integer , "Reach: (tweets/day) * |followers|", ], # x
175
+ # 43
176
+ [:fo_bin, FoBin , "# Followers grp"],
177
+ [:fr_bin, FrBin , "# Friends grp"],
178
+ [:nbhd_size_bin, NbhdSizeBin , "Neighborhood Size grp"],
179
+ [:nbhd_bal_bin, NbhdBalBin , "Neighborhood Balance grp"],
180
+ [:tw_day_bin, TwDayBin , "Tweets / day grp (*)"],
181
+ [:tw_day_recent_bin, TwDayRecentBin , "Recent Tweets / day grp (*)"],
182
+ #
183
+ [:part_scraped_at, Date , "User partial Scrape Date", ], #
184
+ [:scraped_at, Date , "User Scrape Date" ]
185
+ # #
186
+ # [:fo_sampled, Integer , "How many followers have we sampled", ], # c
187
+ # [:fr_sampled, Integer , "How many friend have we sampled", ], # c
188
+ # [:fo_coverage, Float , "Friends sampled / known to exist", ], # c
189
+ # [:fr_coverage, Float , "Followers sampled / known to exist", ], # c
190
+ # [:tw_coverage, Float , "Tweets sampled / known to exist", ], # c
191
+ # [:fv_coverage, Float , "Favorites sampled / known to exist", ], # c
192
+ # #
193
+ # [:scrape_age_fo, Integer , "How long since your followers graph record was scraped", ], # c
194
+ # [:scrape_age_fr, Integer , "How long since your friends graph record was scraped", ], # c
195
+ # [:scrape_age_fv, Integer , "How long since your favorites graph record was scraped", ], # c
196
+ #
197
+ # [:age, Integer , "Days between creation and now", ], # x
198
+ # [:duration, Integer , "Days between last scrape and creation", ], # x
199
+ # [:age_user_scrape, Integer , "How long since your user record was scraped", ], # x
200
+ )
201
+ SCRAPING_DAY_ZERO_STR = 20081201000000
202
+ SCRAPING_DAY_ZERO = DateTime.parse_safely(SCRAPING_DAY_ZERO_STR.to_s)
203
+ SINCE_DAY_ZERO = DateTime.now.utc - SCRAPING_DAY_ZERO
204
+
205
+ def fix!
206
+ get_tw_day_bin
207
+ get_tw_day_recent_bin
208
+ get_fo_bin
209
+ get_fr_bin
210
+ get_nbhd_size_bin
211
+ get_nbhd_bal_bin
212
+ get_active
213
+ end
214
+
215
+ #
216
+ # Fix formatting as we flatten
217
+ #
218
+ def to_a
219
+ members.zip(mtypes).map do |member, type|
220
+ val = self[member]
221
+ next if val.nil?
222
+ case
223
+ when member.to_sym == :id then "%010d" % val.to_i
224
+ when type == Float then "%f" % val.to_f
225
+ when type == Integer then "%7d" % val.to_i
226
+ when type == DateTime then val.strftime("%Y/%m/%d %H:%M:%S")
227
+ when type == Date then val.strftime("%Y/%m/%d")
228
+ else val
229
+ end
230
+ end
231
+ end
232
+ def protected?
233
+ protected.to_i == 1
234
+ end
235
+
236
+ #
237
+ #
238
+ #
239
+ def adopt_user user
240
+ @user_adopted = true
241
+ self.merge! user
242
+ self.created_at = DateTime.parse_safely(created_at)
243
+ self.scraped_at = DateTime.parse_safely(scraped_at)
244
+ end
245
+ def user_adopted?
246
+ @user_adopted
247
+ end
248
+
249
+ #
250
+ #
251
+ #
252
+ def adopt_user_partial user_partial
253
+ user_scraped_at = self.scraped_at
254
+ self.merge! user_partial
255
+ # restore scraped dates
256
+ self.part_scraped_at = DateTime.parse_safely(user_partial.scraped_at)
257
+ self.scraped_at = user_scraped_at
258
+ end
259
+
260
+ #
261
+ #
262
+ #
263
+ def adopt_scraping_metrics usm
264
+ [
265
+ [:friends_ids, :scrape_age_fr, ],
266
+ [:followers_ids, :scrape_age_fo, ],
267
+ [:favorites, :scrape_age_fv, ],
268
+ ].each do |context, attr|
269
+ dt = usm.get context, :scraped_at
270
+ next if (usm.get(context, :successes).to_i < 1) || (dt.blank?)
271
+ # fudge bogus date records
272
+ dt = (dt.to_i < SCRAPING_DAY_ZERO_STR) ? SCRAPING_DAY_ZERO : DateTime.parse_safely(dt)
273
+ next if dt.blank?
274
+ self[attr] = now - dt
275
+ end
276
+ end
277
+
278
+ #
279
+ # From simple graph metrics
280
+ #
281
+ def adopt_graph_metrics user_graph_metrics
282
+ self.merge! user_graph_metrics
283
+ end
284
+
285
+ #
286
+ # User's Tweets -- from UserTweetMetrics
287
+ #
288
+ def adopt_tweet_metrics metrics
289
+ case metrics
290
+ when UserTweetMetrics
291
+ self.tw_sampled = metrics.tw_sampled
292
+ self.last_tw_at = DateTime.parse_safely(metrics.last_tw_at)
293
+ self.tw_recent = metrics.tw_recent
294
+ when UserHashtagMetrics
295
+ self.hashtag_sampled = metrics.tw_sampled
296
+ when UserTweetUrlMetrics
297
+ self.tweet_url_sampled = metrics.tw_sampled
298
+ else raise "Can't adopt #{metrics}" end
299
+ end
300
+
301
+ def followers_count=( fo) self.fo = fo.to_i end
302
+ def friends_count=( fr) self.fr = fr.to_i end
303
+ def statuses_count=( tw) self.tw = tw.to_i end
304
+ def favourites_count=(fv) self.fv = fv.to_i end
305
+ def followers_count() self.fo end
306
+ def friends_count() self.fr end
307
+ def statuses_count() self.tw end
308
+ def favourites_count() self.fv end
309
+ #
310
+ # Larger of fr and fo
311
+ #
312
+ def get_nbhd_size
313
+ self.nbhd_size = [fr, fo].compact.max
314
+ end
315
+ #
316
+ def get_nbhd_bal
317
+ return unless fr && fo && ((fr.to_i > 0) || (fo.to_i > 0))
318
+ self.nbhd_bal = ((fr > fo) ?
319
+ ( (0.5 * fo.to_f / fr.to_f)) :
320
+ (1.0 - (0.5 * fr.to_f / fo.to_f)) )
321
+ end
322
+ def get_created_on
323
+ self.created_on = self.created_at
324
+ end
325
+
326
+ def crat
327
+ @crat ||= created_at
328
+ end
329
+ def scat
330
+ @scat ||= scraped_at
331
+ end
332
+ def part_scat
333
+ @part_scat ||= part_scraped_at
334
+ end
335
+ def twat
336
+ @twat ||= last_tw_at
337
+ end
338
+ def now
339
+ @now ||= DateTime.now
340
+ end
341
+
342
+
343
+ #
344
+ # duration --
345
+ #
346
+ def age()
347
+ return @age if @age
348
+ return unless crat
349
+ @age = ( now - crat ).to_i
350
+ end
351
+ def duration()
352
+ return @duration if @duration
353
+ return unless crat && (scat || part_scat)
354
+ scat_latest = [scat, part_scat].compact.max
355
+ @duration = ( scat_latest - crat ).to_i
356
+ end
357
+ def get_last_tw_age()
358
+ return unless twat && crat
359
+ self.last_tw_age = ( crat - twat ).to_i
360
+ end
361
+ def get_age_user_scrape()
362
+ return unless scat
363
+ self.age_user_scrape = ( now - scat ).to_i
364
+ end
365
+
366
+ #
367
+ # Per-day metrics
368
+ #
369
+ # Should possibly use duration but need to worry about which one.
370
+ #
371
+ def get_fo_week() self.fo_week = 7 * (fo.to_f / duration) unless (duration.to_i == 0) || (!fo) end
372
+ def get_fr_week() self.fr_week = 7 * (fr.to_f / age) unless (age.to_i == 0) || (!fr) end
373
+ def get_tw_day() self.tw_day = (tw.to_f / duration) unless (duration.to_i == 0) || (!tw) end
374
+ def get_fv_mo() self.fv_mo = 30.4368499 * (fv.to_f / age) unless (age.to_i == 0) || (!fv) end
375
+ # def get_fr_fo() self.fr_fo = (fr.to_f / fo) unless (fo.to_i == 0) end
376
+ def get_tw_day_recent()
377
+ self.tw_day_recent = TWEETS_SAMPLED_FRACTION * (tw.to_f / SINCE_DAY_ZERO) unless (! tw)
378
+ end
379
+
380
+ #
381
+ # Coverage: how many sampled vs. how many known to exist.
382
+ #
383
+ def get_fo_coverage() self.fo_coverage = (fo_sampled.to_f / fo) unless (fo.to_i == 0) end
384
+ def get_fr_coverage() self.fr_coverage = (fr_sampled.to_f / fr) unless (fr.to_i == 0) end
385
+ def get_tw_coverage() self.tw_coverage = (tw_sampled.to_f / tw) unless (tw.to_i == 0) end
386
+ def get_fv_coverage() self.fv_coverage = (fv_out_sampled.to_f / fv) unless (fv.to_i == 0) end
387
+
388
+ #
389
+ # Conversational metrics:
390
+ # favorites, @atsigns and RT's per tweet, in and out
391
+ # RT per @atsign, in and out
392
+ #
393
+ def get_at_tw_out() self.at_tw_out = (at_out_sampled.to_f / tw_sampled.to_f) unless (tw_sampled.to_i == 0) || (! at_out_sampled) end
394
+ def get_rt_tw_out() self.rt_tw_out = (rt_out_sampled.to_f / tw_sampled.to_f) unless (tw_sampled.to_i == 0) || (! rt_out_sampled) end
395
+ #
396
+ def get_at_in_tw_out() self.at_in_tw_out = (at_in_sampled.to_f / tw_sampled.to_f) unless (tw_sampled.to_i == 0) || (! at_in_sampled) end
397
+ def get_rt_in_tw_out() self.rt_in_tw_out = (rt_in_sampled.to_f / tw_sampled.to_f) unless (tw_sampled.to_i == 0) || (! rt_in_sampled) end
398
+ #
399
+ def get_rt_at_out() self.rt_at_out = (rt_out_sampled.to_f / at_out_sampled.to_f) unless (at_out_sampled.to_i == 0) || (! rt_out_sampled) end
400
+ def get_rt_at_in() self.rt_at_in = (rt_in_sampled.to_f / at_in_sampled.to_f) unless (at_in_sampled.to_i == 0) || (! rt_in_sampled) end
401
+
402
+ #
403
+ # Reach:
404
+ #
405
+ # (your msgs/day) * |n1|
406
+ #
407
+ # How many of your messages/day might get read. Audience Share (tw_out_share)
408
+ # is a better measure of your impact.
409
+ #
410
+ def get_reach()
411
+ self.get_tw_day or return
412
+ self.reach = (tw_day.to_f * fo)
413
+ end
414
+
415
+
416
+ #
417
+ # Bins
418
+ #
419
+ def get_fo_bin() self.fo_bin = FoBin[fo] end
420
+ def get_fr_bin() self.fr_bin = FrBin[fr] end
421
+ def get_nbhd_size_bin() self.nbhd_size_bin = NbhdSizeBin[nbhd_size] end
422
+ def get_nbhd_bal_bin() self.nbhd_bal_bin = NbhdBalBin[nbhd_bal] end
423
+ def get_tw_day_bin() self.tw_day_bin = TwDayBin[tw_day] end
424
+ def get_tw_day_recent_bin() self.tw_day_recent_bin = TwDayRecentBin[tw_day_recent] end
425
+ def get_active()
426
+ exists = (fo && fr && tw) or return
427
+ tweets = tw >= 3
428
+ has_followers = fo > 15
429
+ has_nbhd = (fo >= 3) && (fr >= 2)
430
+ self.active = ( exists && tweets && (has_followers || has_nbhd) ) ? 1 : 0
431
+ end
432
+ end
433
+ end
434
+ end
435
+
436
+
437
+
438
+ #
439
+ # Executes only if run from command line
440
+ #
441
+ if __FILE__ == $0
442
+ puts "rsrc\t"+Wuclan::Models::UserMetrics.members.join("\t")
443
+ end