wuclan 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/LICENSE.textile +20 -0
  2. data/README.textile +28 -0
  3. data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
  4. data/examples/analyze/strong_links/main.rb +51 -0
  5. data/examples/analyze/word_count/dump_schema.rb +13 -0
  6. data/examples/analyze/word_count/freq_user.rb +31 -0
  7. data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
  8. data/examples/analyze/word_count/word_count.pig +43 -0
  9. data/examples/analyze/word_count/word_count.rb +34 -0
  10. data/examples/lastfm/scrape/load_lastfm.rb +31 -0
  11. data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
  12. data/examples/lastfm/scrape/seed.tsv +147 -0
  13. data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
  14. data/examples/twitter/old/scrape_twitter_api.rb +104 -0
  15. data/examples/twitter/old/scrape_twitter_search.rb +57 -0
  16. data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
  17. data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
  18. data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
  19. data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
  20. data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
  21. data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
  22. data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
  23. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
  24. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
  25. data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
  26. data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
  27. data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
  28. data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
  29. data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
  30. data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
  31. data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
  32. data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
  33. data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
  34. data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
  35. data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
  36. data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
  37. data/lib/old/twitter_api.rb +88 -0
  38. data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
  39. data/lib/wuclan/delicious/delicious_models.rb +26 -0
  40. data/lib/wuclan/delicious/delicious_request.rb +65 -0
  41. data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
  42. data/lib/wuclan/friendster.rb +7 -0
  43. data/lib/wuclan/lastfm/model/base.rb +49 -0
  44. data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
  45. data/lib/wuclan/lastfm/scrape/base.rb +195 -0
  46. data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
  47. data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
  48. data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
  49. data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
  50. data/lib/wuclan/lastfm/scrape.rb +12 -0
  51. data/lib/wuclan/lastfm.rb +7 -0
  52. data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
  53. data/lib/wuclan/metrics/user_metrics.rb +443 -0
  54. data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
  55. data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
  56. data/lib/wuclan/metrics.rb +0 -0
  57. data/lib/wuclan/myspace.rb +21 -0
  58. data/lib/wuclan/open_social/model/base.rb +0 -0
  59. data/lib/wuclan/open_social/scrape/base.rb +111 -0
  60. data/lib/wuclan/open_social/scrape_request.rb +6 -0
  61. data/lib/wuclan/open_social.rb +0 -0
  62. data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
  63. data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
  64. data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
  65. data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
  66. data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
  67. data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
  68. data/lib/wuclan/twitter/api_response_examples.textile +300 -0
  69. data/lib/wuclan/twitter/model/base.rb +72 -0
  70. data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
  71. data/lib/wuclan/twitter/model/relationship.rb +176 -0
  72. data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
  73. data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
  74. data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
  75. data/lib/wuclan/twitter/model/text_element.rb +38 -0
  76. data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
  77. data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
  78. data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
  79. data/lib/wuclan/twitter/model/tweet.rb +74 -0
  80. data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
  81. data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
  82. data/lib/wuclan/twitter/model.rb +21 -0
  83. data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
  84. data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
  85. data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
  86. data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
  87. data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
  88. data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
  89. data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
  90. data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
  91. data/lib/wuclan/twitter/scrape/base.rb +97 -0
  92. data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
  93. data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
  94. data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
  95. data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
  96. data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
  97. data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
  98. data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
  99. data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
  100. data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
  101. data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
  102. data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
  103. data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
  104. data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
  105. data/lib/wuclan/twitter/scrape.rb +27 -0
  106. data/lib/wuclan/twitter.rb +7 -0
  107. data/lib/wuclan.rb +1 -0
  108. data/spec/spec_helper.rb +9 -0
  109. data/spec/wuclan_spec.rb +7 -0
  110. data/wuclan.gemspec +184 -0
  111. metadata +219 -0
@@ -0,0 +1,154 @@
1
+ require 'monkeyshines/scrape_request/raw_json_contents'
2
+ module Wuclan
3
+ module Lastfm
4
+ module Scrape
5
+
6
+ #
7
+ # Simple requestables
8
+ #
9
+
10
+ class LastfmArtistInfoRequest
11
+ self.requestables = [
12
+ LastfmArtistSimilarRequest,
13
+ LastfmArtistTopAlbumsRequest,
14
+ LastfmArtistTopTracksRequest,
15
+ LastfmArtistShoutsRequest,
16
+ LastfmArtistEventsRequest,
17
+ LastfmArtistTopFansRequest,
18
+ # LastfmArtistTopTagsRequest, LastfmArtistImagesRequest, LastfmArtistPodcastRequest,
19
+ ]
20
+ end
21
+
22
+ class LastfmTrackInfoRequest
23
+ self.requestables = [LastfmTrackSimilarRequest, LastfmTrackTopFansRequest, LastfmTrackTopTagsRequest]
24
+ end
25
+ class LastfmEventInfoRequest
26
+ self.requestables = [LastfmEventAttendeesRequest, LastfmEventShoutsRequest]
27
+ end
28
+ class LastfmUserTopTagsRequest # LastfmUserInfoRequest
29
+ self.requestables = [
30
+ # LastfmUserTopTagsRequest,
31
+ LastfmUserEventsRequest,
32
+ LastfmUserPastEventsRequest,
33
+ LastfmUserFriendsRequest, # recenttracks
34
+ LastfmUserNeighboursRequest,
35
+ LastfmUserLovedTracksRequest,
36
+ LastfmUserRecentTracksRequest,
37
+ LastfmUserShoutsRequest,
38
+ LastfmUserTopAlbumsRequest, # period (Optional) : overall | 7day | 3month | 6month | 12month
39
+ LastfmUserTopArtistsRequest, # period (Optional) : overall | 7day | 3month | 6month | 12month
40
+ LastfmUserTopTracksRequest, # period (Optional) : overall | 7day | 3month | 6month | 12month
41
+ # uninteresting(?): LastfmUserPlaylistsRequest, LastfmUserWeeklyAlbumChartRequest, LastfmUserWeeklyArtistChartRequest, LastfmUserWeeklyChartListRequest, LastfmUserWeeklyTrackChartRequest,
42
+ # needs auth: LastfmUserInfoRequest, LastfmUserRecentStationsRequest, LastfmUserRecommendedArtistsRequest, LastfmUserRecommendedEventsRequest,
43
+ ]
44
+ end
45
+
46
+ #
47
+ # Recursive requests based on contents
48
+ #
49
+
50
+ module LastfmTimeWindowed
51
+ def recursive_requests *args, &block
52
+ super(*args, &block)
53
+ unless (identifier =~ /&period=/)
54
+ ['7day', '3month', '6month'].each do |period|
55
+ req = self.class.new(identifier+"&period=#{period}")
56
+ req.generation = generation.to_i
57
+ yield req
58
+ end
59
+ end
60
+ end
61
+ end
62
+ [LastfmUserTopArtistsRequest, LastfmUserTopAlbumsRequest, LastfmUserTopTracksRequest
63
+ ].each do |klass|
64
+ klass.class_eval do include LastfmTimeWindowed ; end
65
+ end
66
+
67
+ module LastfmContainsArtists
68
+ def recursive_requests *args, &block
69
+ super(*args, &block)
70
+ items.each do |artist|
71
+ req = LastfmArtistInfoRequest.new(url_encode(artist['name']))
72
+ req.generation = generation.to_i + 1
73
+ yield req
74
+ end
75
+ end
76
+ end
77
+ [ LastfmArtistSimilarRequest, LastfmGeoTopArtistsRequest, LastfmTagTopArtistsRequest,
78
+ LastfmUserRecommendedArtistsRequest, LastfmUserTopArtistsRequest,
79
+ ].each do |klass|
80
+ klass.class_eval do include LastfmContainsArtists ; end
81
+ end
82
+
83
+ module LastfmContainsAlbums
84
+ def recursive_requests *args, &block
85
+ super(*args, &block)
86
+ items.each do |item|
87
+ obj_artist = item['artist']['name'] || item['artist']['#text'] rescue nil
88
+ req = LastfmAlbumInfoRequest.from_identifier_hash(
89
+ item['name'], :artist => obj_artist, :mbid => item['mbid'] )
90
+ req.generation = generation.to_i + 1
91
+ yield req
92
+ end
93
+ end
94
+ end
95
+ [ LastfmArtistTopAlbumsRequest, LastfmTagTopAlbumsRequest, LastfmUserTopAlbumsRequest,
96
+ ].each do |klass|
97
+ klass.class_eval do include LastfmContainsAlbums ; end
98
+ end
99
+
100
+ module LastfmContainsTracks
101
+ def recursive_requests *args, &block
102
+ super(*args, &block)
103
+ items.each do |track|
104
+ obj_artist = track['artist']['name'] || track['artist']['#text'] rescue nil
105
+ req = LastfmTrackInfoRequest.from_identifier_hash(
106
+ track['name'], :artist => obj_artist, :mbid => track['mbid'])
107
+ req.generation = generation.to_i + 1
108
+ yield req
109
+ end
110
+ end
111
+ end
112
+ [ LastfmArtistTopTracksRequest, LastfmGeoTopTracksRequest, LastfmTagTopTracksRequest,
113
+ LastfmTrackSimilarRequest, LastfmUserLovedTracksRequest, LastfmUserRecentTracksRequest,
114
+ LastfmUserTopTracksRequest,
115
+ ].each do |klass|
116
+ klass.class_eval do include LastfmContainsTracks ; end
117
+ end
118
+
119
+ module LastfmContainsEvents
120
+ def recursive_requests *args, &block
121
+ super(*args, &block)
122
+ items.each do |event|
123
+ req = LastfmEventInfoRequest.new(event['id'])
124
+ req.generation = generation.to_i + 1
125
+ yield req
126
+ end
127
+ end
128
+ end
129
+ [ LastfmArtistEventsRequest, LastfmGeoEventsRequest, LastfmUserEventsRequest,
130
+ LastfmUserPastEventsRequest, LastfmUserRecommendedEventsRequest, LastfmVenueEventsRequest,
131
+ LastfmVenuePastEventsRequest,
132
+ ].each do |klass|
133
+ klass.class_eval do include LastfmContainsEvents ; end
134
+ end
135
+
136
+ module LastfmContainsUsers
137
+ def recursive_requests *args, &block
138
+ super(*args, &block)
139
+ items.each do |user|
140
+ req = LastfmUserTopTagsRequest.new(url_encode(user['name']))
141
+ req.generation = generation.to_i + 1
142
+ yield req
143
+ end
144
+ end
145
+ end
146
+ [ LastfmArtistTopFansRequest, LastfmEventAttendeesRequest, LastfmGroupMembersRequest,
147
+ LastfmTrackTopFansRequest, LastfmUserFriendsRequest, LastfmUserNeighboursRequest,
148
+ ].each do |klass|
149
+ klass.class_eval do include LastfmContainsUsers ; end
150
+ end
151
+
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,12 @@
1
+ require 'monkeyshines/scrape_request/raw_json_contents'
2
+ require 'wuclan/lastfm/scrape/base.rb'
3
+ require 'wuclan/lastfm/scrape/concrete.rb'
4
+
5
+ module Wuclan
6
+ module Lastfm
7
+ module Scrape
8
+ autoload :LastfmJob, 'wuclan/lastfm/scrape/lastfm_job.rb'
9
+ autoload :LastfmRequestStream, 'wuclan/lastfm/scrape/lastfm_request_stream.rb'
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,7 @@
1
+ require 'monkeyshines/scrape_request/raw_json_contents'
2
+ module Wuclan
3
+ module Lastfm
4
+ autoload :Scrape, 'wuclan/lastfm/scrape.rb'
5
+ autoload :Parse, 'wuclan/lastfm/parse.rb'
6
+ end
7
+ end
@@ -0,0 +1,99 @@
1
+ module Wuclan
2
+ module Models
3
+ class UserGraphMetrics < Struct.new(
4
+ :id,
5
+ :any_with,
6
+ :any_out_with,
7
+ :any_in_with,
8
+ #
9
+ :fo_sampled,
10
+ :fr_sampled,
11
+ #
12
+ :re_out_sampled,
13
+ :re_in_sampled,
14
+ #
15
+ :at_out_sampled,
16
+ :at_in_sampled,
17
+ :at_out_with,
18
+ :at_in_with,
19
+ #
20
+ :rt_out_sampled,
21
+ :rt_in_sampled,
22
+ :rt_out_with,
23
+ :rt_in_with,
24
+ #
25
+ :fv_out_sampled,
26
+ :fv_in_sampled,
27
+ :fv_out_with,
28
+ :fv_in_with
29
+ )
30
+
31
+ # ===========================================================================
32
+ #
33
+ # Graph Measures
34
+ #
35
+
36
+ #
37
+ # Influx:
38
+ #
39
+ # (messages/day) from all your n1
40
+ #
41
+ # This says how many messages you see go by in a day.
42
+ #
43
+ # A person with a massive influx is either not reading any tweets (uses
44
+ # twitter as a podium), is dipping into twitter as a news river (we should
45
+ # discount follow links), or is using a tool such as TweetDeck to fake
46
+ # follow (we should more aggressively segregate their strong links)
47
+ #
48
+ def get_influx()
49
+ #
50
+ end
51
+
52
+ #
53
+ # tw_out_share -- Audience Share:
54
+ #
55
+ # (your msgs out/day) / (Sum over n1o of msgs in / day)
56
+ #
57
+ # This says how much of your followers' attention is occupied by your tweet
58
+ # stream
59
+ #
60
+ def get_tw_out_share()
61
+ self.tw_out_share = twitter_user.tw_out_share
62
+ end
63
+
64
+ #
65
+ # n1i_fv_share -- Sum, for all that favorite you, of
66
+ #
67
+ # (favs to you / (max[20, number faved])
68
+ #
69
+ # if I have 12 faves and four are to you you get (4/20)favshare ; if I have
70
+ # twenty-four, and four are to you, that makes a (1/6)favshare contribution.
71
+ #
72
+ def get_n1i_fv_share( twitter_user)
73
+ self.n1i_fv_share = twitter_user.n1i_fv_share
74
+ end
75
+
76
+ #
77
+ # n1o_strong -- Strong links out
78
+ #
79
+ def get_n1o_strong( twitter_user)
80
+ self.n1o_strong = twitter_user.n1o_strong
81
+ end
82
+
83
+ #
84
+ # n1i_strong -- Strong links in
85
+ #
86
+ def get_n1i_strong( twitter_user)
87
+ self.n1i_strong = twitter_user.n1i_strong
88
+ end
89
+
90
+ #
91
+ # cluster_coeff -- Strong links between members of n1 over number of possible
92
+ # links between members of n1
93
+ #
94
+ def cluster_coeff(twitter_user, multi_edge)
95
+ end
96
+ end
97
+
98
+ end
99
+ end