wuclan 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. data/LICENSE.textile +20 -0
  2. data/README.textile +28 -0
  3. data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
  4. data/examples/analyze/strong_links/main.rb +51 -0
  5. data/examples/analyze/word_count/dump_schema.rb +13 -0
  6. data/examples/analyze/word_count/freq_user.rb +31 -0
  7. data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
  8. data/examples/analyze/word_count/word_count.pig +43 -0
  9. data/examples/analyze/word_count/word_count.rb +34 -0
  10. data/examples/lastfm/scrape/load_lastfm.rb +31 -0
  11. data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
  12. data/examples/lastfm/scrape/seed.tsv +147 -0
  13. data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
  14. data/examples/twitter/old/scrape_twitter_api.rb +104 -0
  15. data/examples/twitter/old/scrape_twitter_search.rb +57 -0
  16. data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
  17. data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
  18. data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
  19. data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
  20. data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
  21. data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
  22. data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
  23. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
  24. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
  25. data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
  26. data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
  27. data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
  28. data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
  29. data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
  30. data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
  31. data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
  32. data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
  33. data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
  34. data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
  35. data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
  36. data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
  37. data/lib/old/twitter_api.rb +88 -0
  38. data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
  39. data/lib/wuclan/delicious/delicious_models.rb +26 -0
  40. data/lib/wuclan/delicious/delicious_request.rb +65 -0
  41. data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
  42. data/lib/wuclan/friendster.rb +7 -0
  43. data/lib/wuclan/lastfm/model/base.rb +49 -0
  44. data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
  45. data/lib/wuclan/lastfm/scrape/base.rb +195 -0
  46. data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
  47. data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
  48. data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
  49. data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
  50. data/lib/wuclan/lastfm/scrape.rb +12 -0
  51. data/lib/wuclan/lastfm.rb +7 -0
  52. data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
  53. data/lib/wuclan/metrics/user_metrics.rb +443 -0
  54. data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
  55. data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
  56. data/lib/wuclan/metrics.rb +0 -0
  57. data/lib/wuclan/myspace.rb +21 -0
  58. data/lib/wuclan/open_social/model/base.rb +0 -0
  59. data/lib/wuclan/open_social/scrape/base.rb +111 -0
  60. data/lib/wuclan/open_social/scrape_request.rb +6 -0
  61. data/lib/wuclan/open_social.rb +0 -0
  62. data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
  63. data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
  64. data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
  65. data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
  66. data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
  67. data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
  68. data/lib/wuclan/twitter/api_response_examples.textile +300 -0
  69. data/lib/wuclan/twitter/model/base.rb +72 -0
  70. data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
  71. data/lib/wuclan/twitter/model/relationship.rb +176 -0
  72. data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
  73. data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
  74. data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
  75. data/lib/wuclan/twitter/model/text_element.rb +38 -0
  76. data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
  77. data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
  78. data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
  79. data/lib/wuclan/twitter/model/tweet.rb +74 -0
  80. data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
  81. data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
  82. data/lib/wuclan/twitter/model.rb +21 -0
  83. data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
  84. data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
  85. data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
  86. data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
  87. data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
  88. data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
  89. data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
  90. data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
  91. data/lib/wuclan/twitter/scrape/base.rb +97 -0
  92. data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
  93. data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
  94. data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
  95. data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
  96. data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
  97. data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
  98. data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
  99. data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
  100. data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
  101. data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
  102. data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
  103. data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
  104. data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
  105. data/lib/wuclan/twitter/scrape.rb +27 -0
  106. data/lib/wuclan/twitter.rb +7 -0
  107. data/lib/wuclan.rb +1 -0
  108. data/spec/spec_helper.rb +9 -0
  109. data/spec/wuclan_spec.rb +7 -0
  110. data/wuclan.gemspec +184 -0
  111. metadata +219 -0
@@ -0,0 +1,443 @@
1
+ #!/usr/bin/env ruby
2
+ require 'date'
3
+ # KLUDGE Executes only if run from command line
4
+ if __FILE__ == $0 then $: << File.dirname(__FILE__)+'/../..'; require 'wukong'; end
5
+
6
+ #
7
+ # Sampling Multiplier
8
+ #
9
+ TWEETS_SAMPLED_FRACTION = 2.62
10
+
11
+
12
+ require 'wukong/datatypes/enum'; include Wukong::Datatypes
13
+ class FoBin < Binned ; enumerates 0, 0, 2, 5, 10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10_000, 20_000, Infinity ;end
14
+ class FrBin < FoBin ; end
15
+ class NbhdSizeBin < FoBin ; end
16
+
17
+ def round1 f
18
+ (10 * f.to_f).round / 10.0
19
+ end
20
+ def octave3 i
21
+ 10.0 ** round1(i.to_f / 3.0)
22
+ end
23
+
24
+ class TwDayBin < Binned ;
25
+ MO = 30.4368499
26
+ WK = 7.0
27
+ enumerates(* (
28
+ [-Infinity, 1.0 / MO, 2.0 / MO, 1 / WK, ] +
29
+ (-2 .. 9).map{|i| octave3(i + 0.5) } +
30
+ [Infinity]))
31
+ self.write_inheritable_attribute :names, (
32
+ ['< 1/mo', ' 1-2/mo', ' 2/mo-1/wk', ' 1-2/wk', '~ 4/wk' ] +
33
+ [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000].map{|i| "~ %4d/day"%i } +
34
+ ['> 1500/day'])
35
+ end
36
+ class TwDayRecentBin < TwDayBin ; end
37
+
38
+ # < 1/mo 0.032854911177914
39
+ # 1-2/mo 0.065709822355828
40
+ # 2/mo-1/wk 0.142857142857143
41
+ # 1-2/wk 0.316227766016838
42
+ # ~ 4/wk 0.630957344480193
43
+ # ~ 1/day 1.58489319246111
44
+ # ~ 2/day 3.16227766016838
45
+ # ~ 5/day 6.30957344480193
46
+ # ~ 10/day 15.8489319246111
47
+ # ~ 20/day 31.6227766016838
48
+ # ~ 50/day 63.0957344480193
49
+ # ~ 100/day 158.489319246111
50
+ # ~ 200/day 316.227766016838
51
+ # ~ 500/day 630.957344480193
52
+ # ~ 1000/day 1584.89319246111
53
+ # > 1500/day Infinity
54
+
55
+
56
+ class TinyInt < Integer ; end
57
+ class NbhdBalBin < Binned
58
+ enumerates(* ([-Infinity] + ( (0..15).map{|n| (0.125+(n.to_f/20))} ) + [Infinity]) )
59
+ self.write_inheritable_attribute :names,
60
+ (['Few followers'] + (0..14).map{|n| (15+(5*n)) } + ['Mostly Followers'])
61
+ self.names[8] = 'Balanced'
62
+ end
63
+
64
+ #
65
+ #
66
+ #
67
+ module Wuclan
68
+ module Models
69
+ class BaseUserTweetMetrics < TypedStruct.new(
70
+ [:id, Integer],
71
+ [:tw_sampled, Integer],
72
+ [:last_tw_at, Bignum],
73
+ [:tw_recent, Integer]
74
+ )
75
+ end
76
+ class UserTweetMetrics < BaseUserTweetMetrics ; end
77
+ class UserTweetUrlMetrics < BaseUserTweetMetrics ; end
78
+ class UserHashtagMetrics < BaseUserTweetMetrics ; end
79
+
80
+ # 1 rsrc 2 id 3 screen_name 4protect 5 age 6duratio 7age_use 8age_las 9 fo
81
+ # 10fr 11 tw 12 fv 13 fr_fo 14 fo_week 15 fr_week 16 tw_day 17 fv_mo 18fo_sampled
82
+ # 19fr_sampled 20tw_sampled 21tw_rece 22tw_day_ 23at_in_s 24at_out_ 25rt_in_s
83
+ # 26rt_out_ 27fv_in_s 28fv_out_ 29any_wit 30any_out 31any_in_ 32at_in_w
84
+ # 33at_out_ 34rt_in_w 35rt_out_ 36fv_in_w 37fv_out_ 38at_tw_o 39rt_tw_o
85
+ # 40rt_at_o 41at_in_t 42rt_in_t 43rt_at_i 44 reach 45fo_cove 46fr_cove
86
+ # 47tw_cove 48fv_cove 49scrape_ 50scrape_ 51scrape_ 52 scraped_at
87
+ # 53part_scraped_at 54 created_at 55 last_tw_at
88
+
89
+ module StructToSQL
90
+ def to_sql_str
91
+ members.zip(mtypes, mnames).each do |attr, type, name|
92
+ type_str = case
93
+ when type <= String then 'VARCHAR(255) CHARACTER SET ASCII'
94
+ when type <= Enum then type.to_sql_str
95
+ else type.to_s.upcase
96
+ end
97
+ puts " %-23s\t%-23s\t-- %s" %["`#{attr}`", type_str+',', name]
98
+ end
99
+ [ UserMetrics.members.map{|attr| "`#{attr}`" }.join(", ") ]
100
+ end
101
+ end
102
+
103
+ class NamedTypedStruct < TypedStruct
104
+ def self.new *args
105
+ members, mtypes, mnames = args.transpose
106
+ thing = super *[members, mtypes].transpose
107
+ if mnames
108
+ thing.class_eval do
109
+ cattr_accessor :mnames
110
+ self.mnames = mnames
111
+ extend StructToSQL
112
+ end
113
+ end
114
+ thing
115
+ end
116
+
117
+ end
118
+
119
+
120
+ class UserMetrics < NamedTypedStruct.new(
121
+ [:id, Integer , "User ID" ],
122
+ [:screen_name, String , "Twitter Name" ],
123
+ # 4
124
+ [:created_on, Date , "Created On Date" ],
125
+ [:created_at, DateTime , "Created At Date-Time" ],
126
+ [:protected, Integer , "Protected?" ],
127
+ [:active, TinyInt , "Active?" ],
128
+ # 8
129
+ [:fo, Integer , "# Followers" ],
130
+ [:fr, Integer , "# Friends" ],
131
+ [:tw, Integer , "# Tweets Sent" ],
132
+ [:fv, Integer , "# Favorites Out" ],
133
+ # 12
134
+ [:fo_week, Float , "Followers accumulated / week", ], # x
135
+ [:fr_week, Float , "Friends accumulated / week", ], # x
136
+ [:tw_day, Float , "Tweets sent / day", ], # x
137
+ [:fv_mo, Float , "Favorites accumulated / month", ], # x
138
+ # 16
139
+ [:tw_recent, Integer , "Tweets sampled 2008 Dec 1", ], # t
140
+ [:tw_day_recent, Float , "Recent Tweets / day (*)", ], # t
141
+ [:last_tw_at, Date , "Last Tweet Date (*)", ], # t
142
+ [:last_tw_age, Integer , "Age of most recent tweet (*)", ], # x
143
+ # 20
144
+ [:at_in_sampled, Integer , "# @atsigns to (*)", ], # g
145
+ [:at_out_sampled, Integer , "# @atsigns from (*)" ], # g
146
+ [:rt_in_sampled, Integer , "# ReTweets of (*)", ], # g
147
+ [:rt_out_sampled, Integer , "# ReTweets by (*)" ], # g
148
+ [:fv_in_sampled, Integer , "# favorites of (*)", ], # g
149
+ [:fv_out_sampled, Integer , "# favorites by (*)", ], # c
150
+ # 26
151
+ [:any_with, Integer , "Users linked to or from", ], # A
152
+ [:any_in_with, Integer , "Distinct At+RT+Fv users in", ], # A
153
+ [:any_out_with, Integer , "Distinct At+RT+Fv users out", ], # A
154
+ # 29
155
+ [:at_in_with, Integer , "Users @atsigned in (*)", ], # g
156
+ [:at_out_with, Integer , "Users @atsigned out (*)" ], # g",
157
+ [:rt_in_with, Integer , "Users Retweeted in (*)", ], # g
158
+ [:rt_out_with, Integer , "Users Retweeted out (*)" ], # g
159
+ [:fv_in_with, Integer , "Users Favorited in (*)", ], # g
160
+ [:fv_out_with, Integer , "Users Favorited out (*)", ], # c
161
+ # 35
162
+ [:tw_sampled, Integer , "How many tweets have we sampled", ], # t
163
+ [:hashtag_sampled, Integer, "How many hashtags have we seen"],
164
+ [:tweet_url_sampled, Integer, "How many tweet_url's have we seen"],
165
+ [:at_tw_out, Float , "Atsigns out per tweet out (*)", ], # g
166
+ [:rt_tw_out, Float , "Retweets out per tweet out (*)", ], # g
167
+ [:rt_at_out, Float , "Retweets out per atsign out (*)", ], # g
168
+ [:at_in_tw_out, Float , "Atsigns in per tweet out (*)", ], # g
169
+ [:rt_in_tw_out, Float , "Retweets in per tweet out (*)", ], # g
170
+ [:rt_at_in, Float , "Retweets in per atsign in (*)", ], # g
171
+ # 40
172
+ [:nbhd_bal, Float , "Neighborhood Balance", ], #
173
+ [:nbhd_size, Integer , "Neighborhood Size", ], #
174
+ [:reach, Integer , "Reach: (tweets/day) * |followers|", ], # x
175
+ # 43
176
+ [:fo_bin, FoBin , "# Followers grp"],
177
+ [:fr_bin, FrBin , "# Friends grp"],
178
+ [:nbhd_size_bin, NbhdSizeBin , "Neighborhood Size grp"],
179
+ [:nbhd_bal_bin, NbhdBalBin , "Neighborhood Balance grp"],
180
+ [:tw_day_bin, TwDayBin , "Tweets / day grp (*)"],
181
+ [:tw_day_recent_bin, TwDayRecentBin , "Recent Tweets / day grp (*)"],
182
+ #
183
+ [:part_scraped_at, Date , "User partial Scrape Date", ], #
184
+ [:scraped_at, Date , "User Scrape Date" ]
185
+ # #
186
+ # [:fo_sampled, Integer , "How many followers have we sampled", ], # c
187
+ # [:fr_sampled, Integer , "How many friend have we sampled", ], # c
188
+ # [:fo_coverage, Float , "Friends sampled / known to exist", ], # c
189
+ # [:fr_coverage, Float , "Followers sampled / known to exist", ], # c
190
+ # [:tw_coverage, Float , "Tweets sampled / known to exist", ], # c
191
+ # [:fv_coverage, Float , "Favorites sampled / known to exist", ], # c
192
+ # #
193
+ # [:scrape_age_fo, Integer , "How long since your followers graph record was scraped", ], # c
194
+ # [:scrape_age_fr, Integer , "How long since your friends graph record was scraped", ], # c
195
+ # [:scrape_age_fv, Integer , "How long since your favorites graph record was scraped", ], # c
196
+ #
197
+ # [:age, Integer , "Days between creation and now", ], # x
198
+ # [:duration, Integer , "Days between last scrape and creation", ], # x
199
+ # [:age_user_scrape, Integer , "How long since your user record was scraped", ], # x
200
+ )
201
+ SCRAPING_DAY_ZERO_STR = 20081201000000
202
+ SCRAPING_DAY_ZERO = DateTime.parse_safely(SCRAPING_DAY_ZERO_STR.to_s)
203
+ SINCE_DAY_ZERO = DateTime.now.utc - SCRAPING_DAY_ZERO
204
+
205
+ def fix!
206
+ get_tw_day_bin
207
+ get_tw_day_recent_bin
208
+ get_fo_bin
209
+ get_fr_bin
210
+ get_nbhd_size_bin
211
+ get_nbhd_bal_bin
212
+ get_active
213
+ end
214
+
215
+ #
216
+ # Fix formatting as we flatten
217
+ #
218
+ def to_a
219
+ members.zip(mtypes).map do |member, type|
220
+ val = self[member]
221
+ next if val.nil?
222
+ case
223
+ when member.to_sym == :id then "%010d" % val.to_i
224
+ when type == Float then "%f" % val.to_f
225
+ when type == Integer then "%7d" % val.to_i
226
+ when type == DateTime then val.strftime("%Y/%m/%d %H:%M:%S")
227
+ when type == Date then val.strftime("%Y/%m/%d")
228
+ else val
229
+ end
230
+ end
231
+ end
232
+ def protected?
233
+ protected.to_i == 1
234
+ end
235
+
236
+ #
237
+ #
238
+ #
239
+ def adopt_user user
240
+ @user_adopted = true
241
+ self.merge! user
242
+ self.created_at = DateTime.parse_safely(created_at)
243
+ self.scraped_at = DateTime.parse_safely(scraped_at)
244
+ end
245
+ def user_adopted?
246
+ @user_adopted
247
+ end
248
+
249
+ #
250
+ #
251
+ #
252
+ def adopt_user_partial user_partial
253
+ user_scraped_at = self.scraped_at
254
+ self.merge! user_partial
255
+ # restore scraped dates
256
+ self.part_scraped_at = DateTime.parse_safely(user_partial.scraped_at)
257
+ self.scraped_at = user_scraped_at
258
+ end
259
+
260
+ #
261
+ #
262
+ #
263
+ def adopt_scraping_metrics usm
264
+ [
265
+ [:friends_ids, :scrape_age_fr, ],
266
+ [:followers_ids, :scrape_age_fo, ],
267
+ [:favorites, :scrape_age_fv, ],
268
+ ].each do |context, attr|
269
+ dt = usm.get context, :scraped_at
270
+ next if (usm.get(context, :successes).to_i < 1) || (dt.blank?)
271
+ # fudge bogus date records
272
+ dt = (dt.to_i < SCRAPING_DAY_ZERO_STR) ? SCRAPING_DAY_ZERO : DateTime.parse_safely(dt)
273
+ next if dt.blank?
274
+ self[attr] = now - dt
275
+ end
276
+ end
277
+
278
+ #
279
+ # From simple graph metrics
280
+ #
281
+ def adopt_graph_metrics user_graph_metrics
282
+ self.merge! user_graph_metrics
283
+ end
284
+
285
+ #
286
+ # User's Tweets -- from UserTweetMetrics
287
+ #
288
+ def adopt_tweet_metrics metrics
289
+ case metrics
290
+ when UserTweetMetrics
291
+ self.tw_sampled = metrics.tw_sampled
292
+ self.last_tw_at = DateTime.parse_safely(metrics.last_tw_at)
293
+ self.tw_recent = metrics.tw_recent
294
+ when UserHashtagMetrics
295
+ self.hashtag_sampled = metrics.tw_sampled
296
+ when UserTweetUrlMetrics
297
+ self.tweet_url_sampled = metrics.tw_sampled
298
+ else raise "Can't adopt #{metrics}" end
299
+ end
300
+
301
+ def followers_count=( fo) self.fo = fo.to_i end
302
+ def friends_count=( fr) self.fr = fr.to_i end
303
+ def statuses_count=( tw) self.tw = tw.to_i end
304
+ def favourites_count=(fv) self.fv = fv.to_i end
305
+ def followers_count() self.fo end
306
+ def friends_count() self.fr end
307
+ def statuses_count() self.tw end
308
+ def favourites_count() self.fv end
309
+ #
310
+ # Larger of fr and fo
311
+ #
312
+ def get_nbhd_size
313
+ self.nbhd_size = [fr, fo].compact.max
314
+ end
315
+ #
316
+ def get_nbhd_bal
317
+ return unless fr && fo && ((fr.to_i > 0) || (fo.to_i > 0))
318
+ self.nbhd_bal = ((fr > fo) ?
319
+ ( (0.5 * fo.to_f / fr.to_f)) :
320
+ (1.0 - (0.5 * fr.to_f / fo.to_f)) )
321
+ end
322
+ def get_created_on
323
+ self.created_on = self.created_at
324
+ end
325
+
326
+ def crat
327
+ @crat ||= created_at
328
+ end
329
+ def scat
330
+ @scat ||= scraped_at
331
+ end
332
+ def part_scat
333
+ @part_scat ||= part_scraped_at
334
+ end
335
+ def twat
336
+ @twat ||= last_tw_at
337
+ end
338
+ def now
339
+ @now ||= DateTime.now
340
+ end
341
+
342
+
343
+ #
344
+ # duration --
345
+ #
346
+ def age()
347
+ return @age if @age
348
+ return unless crat
349
+ @age = ( now - crat ).to_i
350
+ end
351
+ def duration()
352
+ return @duration if @duration
353
+ return unless crat && (scat || part_scat)
354
+ scat_latest = [scat, part_scat].compact.max
355
+ @duration = ( scat_latest - crat ).to_i
356
+ end
357
+ def get_last_tw_age()
358
+ return unless twat && crat
359
+ self.last_tw_age = ( crat - twat ).to_i
360
+ end
361
+ def get_age_user_scrape()
362
+ return unless scat
363
+ self.age_user_scrape = ( now - scat ).to_i
364
+ end
365
+
366
+ #
367
+ # Per-day metrics
368
+ #
369
+ # Should possibly use duration but need to worry about which one.
370
+ #
371
+ def get_fo_week() self.fo_week = 7 * (fo.to_f / duration) unless (duration.to_i == 0) || (!fo) end
372
+ def get_fr_week() self.fr_week = 7 * (fr.to_f / age) unless (age.to_i == 0) || (!fr) end
373
+ def get_tw_day() self.tw_day = (tw.to_f / duration) unless (duration.to_i == 0) || (!tw) end
374
+ def get_fv_mo() self.fv_mo = 30.4368499 * (fv.to_f / age) unless (age.to_i == 0) || (!fv) end
375
+ # def get_fr_fo() self.fr_fo = (fr.to_f / fo) unless (fo.to_i == 0) end
376
+ def get_tw_day_recent()
377
+ self.tw_day_recent = TWEETS_SAMPLED_FRACTION * (tw.to_f / SINCE_DAY_ZERO) unless (! tw)
378
+ end
379
+
380
+ #
381
+ # Coverage: how many sampled vs. how many known to exist.
382
+ #
383
+ def get_fo_coverage() self.fo_coverage = (fo_sampled.to_f / fo) unless (fo.to_i == 0) end
384
+ def get_fr_coverage() self.fr_coverage = (fr_sampled.to_f / fr) unless (fr.to_i == 0) end
385
+ def get_tw_coverage() self.tw_coverage = (tw_sampled.to_f / tw) unless (tw.to_i == 0) end
386
+ def get_fv_coverage() self.fv_coverage = (fv_out_sampled.to_f / fv) unless (fv.to_i == 0) end
387
+
388
+ #
389
+ # Conversational metrics:
390
+ # favorites, @atsigns and RT's per tweet, in and out
391
+ # RT per @atsign, in and out
392
+ #
393
+ def get_at_tw_out() self.at_tw_out = (at_out_sampled.to_f / tw_sampled.to_f) unless (tw_sampled.to_i == 0) || (! at_out_sampled) end
394
+ def get_rt_tw_out() self.rt_tw_out = (rt_out_sampled.to_f / tw_sampled.to_f) unless (tw_sampled.to_i == 0) || (! rt_out_sampled) end
395
+ #
396
+ def get_at_in_tw_out() self.at_in_tw_out = (at_in_sampled.to_f / tw_sampled.to_f) unless (tw_sampled.to_i == 0) || (! at_in_sampled) end
397
+ def get_rt_in_tw_out() self.rt_in_tw_out = (rt_in_sampled.to_f / tw_sampled.to_f) unless (tw_sampled.to_i == 0) || (! rt_in_sampled) end
398
+ #
399
+ def get_rt_at_out() self.rt_at_out = (rt_out_sampled.to_f / at_out_sampled.to_f) unless (at_out_sampled.to_i == 0) || (! rt_out_sampled) end
400
+ def get_rt_at_in() self.rt_at_in = (rt_in_sampled.to_f / at_in_sampled.to_f) unless (at_in_sampled.to_i == 0) || (! rt_in_sampled) end
401
+
402
+ #
403
+ # Reach:
404
+ #
405
+ # (your msgs/day) * |n1|
406
+ #
407
+ # How many of your messages/day might get read. Audience Share (tw_out_share)
408
+ # is a better measure of your impact.
409
+ #
410
+ def get_reach()
411
+ self.get_tw_day or return
412
+ self.reach = (tw_day.to_f * fo)
413
+ end
414
+
415
+
416
+ #
417
+ # Bins
418
+ #
419
+ def get_fo_bin() self.fo_bin = FoBin[fo] end
420
+ def get_fr_bin() self.fr_bin = FrBin[fr] end
421
+ def get_nbhd_size_bin() self.nbhd_size_bin = NbhdSizeBin[nbhd_size] end
422
+ def get_nbhd_bal_bin() self.nbhd_bal_bin = NbhdBalBin[nbhd_bal] end
423
+ def get_tw_day_bin() self.tw_day_bin = TwDayBin[tw_day] end
424
+ def get_tw_day_recent_bin() self.tw_day_recent_bin = TwDayRecentBin[tw_day_recent] end
425
+ def get_active()
426
+ exists = (fo && fr && tw) or return
427
+ tweets = tw >= 3
428
+ has_followers = fo > 15
429
+ has_nbhd = (fo >= 3) && (fr >= 2)
430
+ self.active = ( exists && tweets && (has_followers || has_nbhd) ) ? 1 : 0
431
+ end
432
+ end
433
+ end
434
+ end
435
+
436
+
437
+
438
+ #
439
+ # Executes only if run from command line
440
+ #
441
+ if __FILE__ == $0
442
+ puts "rsrc\t"+Wuclan::Models::UserMetrics.members.join("\t")
443
+ end