wuclan 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/LICENSE.textile +20 -0
  2. data/README.textile +28 -0
  3. data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
  4. data/examples/analyze/strong_links/main.rb +51 -0
  5. data/examples/analyze/word_count/dump_schema.rb +13 -0
  6. data/examples/analyze/word_count/freq_user.rb +31 -0
  7. data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
  8. data/examples/analyze/word_count/word_count.pig +43 -0
  9. data/examples/analyze/word_count/word_count.rb +34 -0
  10. data/examples/lastfm/scrape/load_lastfm.rb +31 -0
  11. data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
  12. data/examples/lastfm/scrape/seed.tsv +147 -0
  13. data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
  14. data/examples/twitter/old/scrape_twitter_api.rb +104 -0
  15. data/examples/twitter/old/scrape_twitter_search.rb +57 -0
  16. data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
  17. data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
  18. data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
  19. data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
  20. data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
  21. data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
  22. data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
  23. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
  24. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
  25. data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
  26. data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
  27. data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
  28. data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
  29. data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
  30. data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
  31. data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
  32. data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
  33. data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
  34. data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
  35. data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
  36. data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
  37. data/lib/old/twitter_api.rb +88 -0
  38. data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
  39. data/lib/wuclan/delicious/delicious_models.rb +26 -0
  40. data/lib/wuclan/delicious/delicious_request.rb +65 -0
  41. data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
  42. data/lib/wuclan/friendster.rb +7 -0
  43. data/lib/wuclan/lastfm/model/base.rb +49 -0
  44. data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
  45. data/lib/wuclan/lastfm/scrape/base.rb +195 -0
  46. data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
  47. data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
  48. data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
  49. data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
  50. data/lib/wuclan/lastfm/scrape.rb +12 -0
  51. data/lib/wuclan/lastfm.rb +7 -0
  52. data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
  53. data/lib/wuclan/metrics/user_metrics.rb +443 -0
  54. data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
  55. data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
  56. data/lib/wuclan/metrics.rb +0 -0
  57. data/lib/wuclan/myspace.rb +21 -0
  58. data/lib/wuclan/open_social/model/base.rb +0 -0
  59. data/lib/wuclan/open_social/scrape/base.rb +111 -0
  60. data/lib/wuclan/open_social/scrape_request.rb +6 -0
  61. data/lib/wuclan/open_social.rb +0 -0
  62. data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
  63. data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
  64. data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
  65. data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
  66. data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
  67. data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
  68. data/lib/wuclan/twitter/api_response_examples.textile +300 -0
  69. data/lib/wuclan/twitter/model/base.rb +72 -0
  70. data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
  71. data/lib/wuclan/twitter/model/relationship.rb +176 -0
  72. data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
  73. data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
  74. data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
  75. data/lib/wuclan/twitter/model/text_element.rb +38 -0
  76. data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
  77. data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
  78. data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
  79. data/lib/wuclan/twitter/model/tweet.rb +74 -0
  80. data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
  81. data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
  82. data/lib/wuclan/twitter/model.rb +21 -0
  83. data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
  84. data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
  85. data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
  86. data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
  87. data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
  88. data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
  89. data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
  90. data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
  91. data/lib/wuclan/twitter/scrape/base.rb +97 -0
  92. data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
  93. data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
  94. data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
  95. data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
  96. data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
  97. data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
  98. data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
  99. data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
  100. data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
  101. data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
  102. data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
  103. data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
  104. data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
  105. data/lib/wuclan/twitter/scrape.rb +27 -0
  106. data/lib/wuclan/twitter.rb +7 -0
  107. data/lib/wuclan.rb +1 -0
  108. data/spec/spec_helper.rb +9 -0
  109. data/spec/wuclan_spec.rb +7 -0
  110. data/wuclan.gemspec +184 -0
  111. metadata +219 -0
@@ -0,0 +1,44 @@
1
+ require 'set'
2
+ class TwitterRequestStream < Monkeyshines::RequestStream::SimpleRequestStream
3
+ DEFAULT_REQUEST_SCOPE = Wuclan::Twitter::Scrape
4
+ TwitterRequestStream::DEFAULT_OPTIONS = { :klass => TwitterUserRequest, }
5
+
6
+ def initialize _options={}
7
+ super _options
8
+ self.request_klasses = options[:fetches]
9
+ end
10
+
11
+ # Set the list of follow-on requests
12
+ # 'followers_ids,friends_ids'
13
+ def request_klasses=(klass_names)
14
+ @request_klasses = FactoryModule.list_of_classes(DEFAULT_REQUEST_SCOPE, klass_names, 'twitter', 'request').to_set
15
+ @request_klasses.delete TwitterUserRequest
16
+ end
17
+
18
+ # Get the user and then get all other requested classes.
19
+ # The user's parameters (followers_count, etc.) fix the items to request
20
+ # The users' numeric ID replaces the supplied identifier (the first request
21
+ # can be a screen_name, but we need the numeric ID for followers_request's, etc.
22
+ def each_request twitter_user_id, *args
23
+ user_req = TwitterUserRequest.new(twitter_user_id)
24
+ yield(user_req)
25
+ return unless user_req.healthy?
26
+ twitter_user_id = user_req.parsed_contents['id'].to_i if (user_req.parsed_contents['id'].to_i > 0)
27
+ @request_klasses.each do |request_klass|
28
+ req = request_klass.new(twitter_user_id)
29
+ req.set_total_items user_req.parsed_contents
30
+ yield req
31
+ end
32
+ end
33
+
34
+ #
35
+ # for the given user_id,
36
+ # gets the user
37
+ # and then each of the requests in more_request_klasses
38
+ #
39
+ def each *args, &block
40
+ request_store.each do |*raw_job_args|
41
+ self.each_request(*raw_job_args, &block)
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,44 @@
1
+
2
+ class TwitterSearchFakeFetcher < Monkeyshines::Fetcher::FakeFetcher
3
+ cattr_accessor :items_rate
4
+ def self.fake_time item_on_page, base=nil
5
+ base ||= 86_400
6
+ base - (item_on_page.to_f / items_rate)
7
+ end
8
+
9
+ def fake_contents req
10
+ max_time = self.class.fake_time((req.page - 1) * 105)
11
+ max_id = max_time.to_i
12
+ case req.query_term
13
+ when '_no_results'
14
+ return { :max_id => -1, :results => [],}
15
+ when '_one_result'
16
+ n_results = 1
17
+ else
18
+ n_results = 100
19
+ end
20
+ { :max_id => max_id,
21
+ # :next_page => "?page=2&max_id=#{max_id}&rpp=100&q=#{req.query_term}",
22
+ :results => (0 ... n_results).map{|i| {
23
+ :text => "%s-%04d-%03d"%[req.query_term, req.page, i],
24
+ :created_at => Time.now - (86_400 - self.class.fake_time(i, max_time)),
25
+ :id => (self.class.fake_time(i, max_id)*100).to_i } } }
26
+ end
27
+
28
+ def get req
29
+ super req
30
+ req.contents = fake_contents(req).to_json
31
+ req
32
+ end
33
+ end
34
+
35
+ # TwitterSearchRequestStream.class_eval do
36
+ # def do_faking job
37
+ # TwitterSearchFakeFetcher.items_rate = (1 / job.scheduling.delay) || 1
38
+ # # job.scheduling.prev_max = (TwitterSearchFakeFetcher.fake_time(rand(15) * 105)*100).to_i
39
+ # p [
40
+ # job.scheduling.prev_max,
41
+ # TwitterSearchFakeFetcher.fake_time(0).to_i
42
+ # ]
43
+ # end
44
+ # end
@@ -0,0 +1,30 @@
1
+ #
2
+ # #
3
+ # #
4
+ # # query terms must be URL-encoded
5
+ # # (use '+' for space; %23 # %27 ' etc)
6
+ # #
7
+ # def initialize *args
8
+ # super *args
9
+ # raise "Query term missing" if self.query_term.blank?
10
+ # self[:query_term].strip!
11
+ # [:priority, :prev_items, :prev_span_min, :prev_span_max].each{|attr| self[attr] = self[attr].to_i if self[attr] }
12
+ # self[:prev_rate] = self[:prev_rate].to_f
13
+ # self[:priority] = DEFAULT_PRIORITY if (self[:priority] == 0)
14
+ # self[:prev_rate] = nil if (self[:prev_rate] < 1e-6)
15
+ # end
16
+ #
17
+ # class TwitterSearchStream < Monkeyshines::RequestStream::SimpleRequestStream
18
+ # #
19
+ # # for the given user_id,
20
+ # # gets the user
21
+ # # and then each of the requests in more_request_klasses
22
+ # #
23
+ # def each *args, &block
24
+ # request_store.each do |*raw_job_args|
25
+ # job = klass.new(*raw_job_args)
26
+ # # do_faking(job)
27
+ # job.each_request(*args, &block)
28
+ # end
29
+ # end
30
+ # end
@@ -0,0 +1,25 @@
1
+ class TwitterSearchJob < Edamame::Job
2
+ #
3
+ # Pagination
4
+ #
5
+ include Monkeyshines::ScrapeRequestCore::Paginating
6
+ include Monkeyshines::ScrapeRequestCore::PaginatedWithLimit
7
+ include Monkeyshines::ScrapeRequestCore::PaginatedWithRate
8
+ # API max pages
9
+ self.hard_request_limit = 15
10
+
11
+ # Items to get each re-visit. If there are up to 50 items per page,
12
+ # target_items_per_job of 1000 will try to reschedule so that its return visit
13
+ # makes about twenty page requests.
14
+ #
15
+ # For Twitter, 1500 is the max, so 1000 gives a safety margin.
16
+ self.target_items_per_job = 1000
17
+
18
+ # creates the paginated request
19
+ def request_for_page page, info=nil
20
+ req = TwitterSearchRequest.new(obj[:key], page)
21
+ req.url << "&rpp=#{req.max_items}"
22
+ req.url << "&max_id=#{sess_span.min - 1}" if sess_span.min
23
+ req
24
+ end
25
+ end
@@ -0,0 +1,70 @@
1
+ module Wuclan
2
+ module Twitter
3
+ module Scrape
4
+ #
5
+ # ScrapeRequest for the twitter Search API.
6
+ #
7
+ # Examines the parsed contents to describe result
8
+ #
9
+ class TwitterSearchRequest < Monkeyshines::ScrapeRequest
10
+ # Contents are JSON
11
+ include Monkeyshines::RawJsonContents
12
+ # Pagination
13
+ include Monkeyshines::ScrapeRequestCore::Paginated
14
+ # API max items per response
15
+ self.max_items = 100
16
+ # API max pages
17
+ self.hard_request_limit = 15
18
+
19
+ def initialize *args
20
+ if args.first =~ %r{\Ahttp://.*q=([^&]+)&}
21
+ super $1, nil, {}, *args
22
+ else
23
+ super *args
24
+ end
25
+ end
26
+
27
+ #
28
+ #
29
+ #
30
+ def make_url
31
+ "http://search.twitter.com/search.json?q=#{query_term}"
32
+ end
33
+
34
+ def query_term
35
+ identifier
36
+ end
37
+ def key
38
+ identifier
39
+ end
40
+
41
+ # Checks that the response parses and has the right data structure.
42
+ # if healthy? is true things should generally work
43
+ def healthy?
44
+ items && items.is_a?(Array)
45
+ end
46
+
47
+ #
48
+ # Rescheduling
49
+ #
50
+
51
+ # Extract the actual search items returned
52
+ def items
53
+ parsed_contents['results'] if parsed_contents
54
+ end
55
+ # Span of IDs. Assumes the response has the ids in sort order oldest to newest
56
+ # (which the twitter API provides)
57
+ def span
58
+ [items.last['id'], items.first['id']] rescue nil
59
+ end
60
+ # Span of created_at times covered by this request.
61
+ # Useful for rate estimation.
62
+ def timespan
63
+ [Time.parse(items.last['created_at']).utc, Time.parse(items.first['created_at']).utc] rescue nil
64
+ end
65
+
66
+ end
67
+ end
68
+ end
69
+ end
70
+
@@ -0,0 +1,19 @@
1
+ #
2
+ # TwitterSearchJob for the twitter Search API
3
+ #
4
+ # * Manages a series of paginated requests from first result back to last item in
5
+ # previous scrape scrape_job.
6
+ #
7
+ #
8
+ class TwitterSearchRequestStream < Monkeyshines::RequestStream::EdamameQueue
9
+ # priority for search jobs if not otherwise given
10
+ QUEUE_PRIORITY = 65536
11
+
12
+ def each *args, &block
13
+ work(queue_request_timeout, TwitterSearchJob) do |job|
14
+ # do_faking(qjob)
15
+ job.each_request(&block)
16
+ end
17
+ end
18
+ end
19
+
@@ -0,0 +1,72 @@
1
+ module Wuclan
2
+ module Twitter
3
+ module Scrape
4
+
5
+ class TimelineRequest < Wuclan::Twitter::Scrape::Base
6
+
7
+ # Extracted JSON should be an array
8
+ def healthy?()
9
+ parsed_contents && parsed_contents.is_a?(Array)
10
+ end
11
+
12
+ #
13
+ # unpacks the raw API response, yielding all the interesting objects
14
+ # and relationships within.
15
+ #
16
+ def parse *args, &block
17
+ return unless healthy?
18
+ parsed_contents.each do |hsh|
19
+ json_obj = JsonTweetWithUser.new(hsh, 'scraped_at' => scraped_at)
20
+ next unless json_obj && json_obj.healthy?
21
+ # Extract user, tweet and relationship
22
+ json_obj.each(&block)
23
+ end
24
+ end
25
+ end
26
+
27
+
28
+ #
29
+ # API request for a user's status timeline.
30
+ # Maximum 16 pages, 200 a pop.
31
+ #
32
+ # Produces up to 200 Tweets.
33
+ #
34
+ # http://apiwiki.twitter.com/Twitter-REST-API-Method%3A-statuses-user_timeline
35
+ #
36
+ class TwitterUserTimelineRequest < Wuclan::Twitter::Scrape::TimelineRequest
37
+ self.resource_path = 'statuses/user_timeline'
38
+ self.hard_request_limit = 16
39
+ self.max_items = 200
40
+ def items_count(thing) thing.status_count end
41
+
42
+ # Url from properties
43
+ def make_url
44
+ "http://twitter.com/#{resource_path}/#{twitter_user_id}.json?page=#{page}&count=#{max_items}"
45
+ end
46
+
47
+ # set max_total_items from the statuses_count.
48
+ def set_total_items twitter_user_info
49
+ self.max_total_items = twitter_user_info['statuses_count'].to_i rescue nil
50
+ end
51
+ end
52
+
53
+ #
54
+ # API request for public timeline
55
+ #
56
+ # Not available any more after May 2009 -- use Hosebird
57
+ #
58
+ class TwitterPublicTimelineRequest < Wuclan::Twitter::Scrape::TimelineRequest
59
+ self.resource_path = 'statuses/public_timeline'
60
+ self.hard_request_limit = 1
61
+ self.max_items = 600
62
+ def items_count(thing) 1 end
63
+ def make_url() "http://twitter.com/#{resource_path}.json" end
64
+ end
65
+
66
+ # class HosebirdRequest < Wuclan::Twitter::Scrape::Base
67
+ # #self.resource_path = 'statuses/public_timeline'
68
+ # end
69
+ end
70
+
71
+ end
72
+ end
@@ -0,0 +1,64 @@
1
+ module Wuclan
2
+ module Twitter
3
+ module Scrape
4
+
5
+ #
6
+ # API request for a user profile.
7
+ #
8
+ # Produces a TwitterUser,Profile,Style
9
+ #
10
+ # http://apiwiki.twitter.com/Twitter-REST-API-Method%3A-users%C2%A0show
11
+ #
12
+ #
13
+ class TwitterUserRequest < Wuclan::Twitter::Scrape::Base
14
+ self.resource_path = 'users/show'
15
+ self.hard_request_limit = 1
16
+ self.max_items = 1
17
+ def items_count(thing) 1 end
18
+
19
+ # Extracted JSON should be a single user_with_tweet hash
20
+ def healthy?()
21
+ parsed_contents && parsed_contents.is_a?(Hash)
22
+ end
23
+
24
+ # Generate request URL
25
+ def make_url
26
+ "http://twitter.com/#{resource_path}/#{twitter_user_id}.json"
27
+ end
28
+
29
+ def key
30
+ twitter_user_id
31
+ end
32
+
33
+ #
34
+ # unpacks the raw API response, yielding all the interesting objects
35
+ # and relationships within.
36
+ #
37
+ def parse *args, &block
38
+ return unless healthy?
39
+ json_obj = JsonUserWithTweet.new(parsed_contents, 'scraped_at' => scraped_at)
40
+ next unless json_obj && json_obj.healthy?
41
+ # Extract user and tweet
42
+ json_obj.each(&block)
43
+ end
44
+
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ # language: http://en.wikipedia.org/wiki/ISO_639-1
51
+ #
52
+ # * Find tweets containing a word: http://search.twitter.com/search.atom?q=twitter
53
+ # * Find tweets from a user: http://search.twitter.com/search.atom?q=from%3Aalexiskold
54
+ # * Find tweets to a user: http://search.twitter.com/search.atom?q=to%3Atechcrunch
55
+ # * Find tweets referencing a user: http://search.twitter.com/search.atom?q=%40mashable
56
+ # * Find tweets containing a hashtag: http://search.twitter.com/search.atom?q=%23haiku
57
+ # * Combine any of the operators together: http://search.twitter.com/search.atom?q=movie+%3A%29
58
+ #
59
+ # * lang: restricts tweets to the given language, given by an ISO 639-1 code. Ex: http://search.twitter.com/search.atom?lang=en&q=devo
60
+ # * rpp: the number of tweets to return per page, up to a max of 100. Ex: http://search.twitter.com/search.atom?lang=en&q=devo&rpp=15
61
+ # * page: the page number (starting at 1) to return, up to a max of roughly 1500 results (based on rpp * page)
62
+ # * since_id: returns tweets with status ids greater than the given id.
63
+ # * geocode: returns tweets by users located within a given radius of the given latitude/longitude, where the user's location is taken from their Twitter profile. The parameter value is specified by "latitide,longitude,radius", where radius units must be specified as either "mi" (miles) or "km" (kilometers). Ex: http://search.twitter.com/search.atom?geocode=40.757929%2C-73.985506%2C25km. Note that you cannot use the near operator via the API to geocode arbitrary locations; however you can use this geocode parameter to search near geocodes directly.
64
+ # * show_user: when "true", adds "<user>:" to the beginning of the tweet. This is useful for readers that do not display Atom's author field. The default is "false".
@@ -0,0 +1,27 @@
1
+ module Wuclan
2
+ module Twitter
3
+ module Scrape
4
+ # Search API
5
+ autoload :TwitterSearchRequest, 'wuclan/twitter/scrape/twitter_search_request'
6
+ autoload :TwitterSearchJob, 'wuclan/twitter/scrape/twitter_search_job'
7
+ # Main API
8
+ autoload :Base, 'wuclan/twitter/scrape/base'
9
+ autoload :TwitterUserRequest, 'wuclan/twitter/scrape/twitter_user_request'
10
+ autoload :TwitterFollowersRequest, 'wuclan/twitter/scrape/twitter_followers_request'
11
+ autoload :TwitterFriendsRequest, 'wuclan/twitter/scrape/twitter_followers_request'
12
+ autoload :TwitterFavoritesRequest, 'wuclan/twitter/scrape/twitter_followers_request'
13
+ autoload :TwitterFollowersIdsRequest, 'wuclan/twitter/scrape/twitter_ff_ids_request'
14
+ autoload :TwitterFriendsIdsRequest, 'wuclan/twitter/scrape/twitter_ff_ids_request'
15
+ autoload :TwitterUserTimelineRequest, 'wuclan/twitter/scrape/twitter_timeline_request'
16
+ autoload :TwitterPublicTimelineRequest, 'wuclan/twitter/scrape/twitter_timeline_request'
17
+ autoload :JsonUserWithTweet, 'wuclan/twitter/scrape/twitter_json_response'
18
+ autoload :JsonTweetWithUser, 'wuclan/twitter/scrape/twitter_json_response'
19
+
20
+ end
21
+ end
22
+ end
23
+ autoload :TwitterRequestStream, 'wuclan/twitter/scrape/twitter_request_stream'
24
+ autoload :TwitterFakeFetcher, 'wuclan/twitter/scrape/twitter_fake_fetcher'
25
+ autoload :TwitterSearchRequestStream, 'wuclan/twitter/scrape/twitter_search_request_stream'
26
+ autoload :TwitterSearchFakeFetcher, 'wuclan/twitter/scrape/twitter_search_fake_fetcher'
27
+ autoload :TwitterSearchJob, 'wuclan/twitter/scrape/twitter_search_job'
@@ -0,0 +1,7 @@
1
+ module Wuclan
2
+ module Twitter
3
+ autoload :Scrape, 'wuclan/twitter/scrape'
4
+ autoload :Model, 'wuclan/twitter/model'
5
+ end
6
+ end
7
+
data/lib/wuclan.rb ADDED
@@ -0,0 +1 @@
1
+
@@ -0,0 +1,9 @@
1
+ require 'spec'
2
+
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
5
+ require 'wuclan'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Wuclan" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
data/wuclan.gemspec ADDED
@@ -0,0 +1,184 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run `rake gemspec`
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{wuclan}
8
+ s.version = "0.2.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Philip (flip) Kromer"]
12
+ s.date = %q{2009-10-12}
13
+ s.description = %q{Massive-scale social network analysis. Nothing to f with.}
14
+ s.email = %q{flip@infochimps.org}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.textile",
17
+ "README.textile"
18
+ ]
19
+ s.files = [
20
+ "LICENSE.textile",
21
+ "README.textile",
22
+ "examples/analyze/strong_links/gen_multi_edge.rb",
23
+ "examples/analyze/strong_links/main.rb",
24
+ "examples/analyze/word_count/dump_schema.rb",
25
+ "examples/analyze/word_count/freq_user.rb",
26
+ "examples/analyze/word_count/freq_whole_corpus.rb",
27
+ "examples/analyze/word_count/word_count.pig",
28
+ "examples/analyze/word_count/word_count.rb",
29
+ "examples/lastfm/scrape/load_lastfm.rb",
30
+ "examples/lastfm/scrape/scrape_lastfm.rb",
31
+ "examples/lastfm/scrape/seed.tsv",
32
+ "examples/twitter/old/load_twitter_search_jobs.rb",
33
+ "examples/twitter/old/scrape_twitter_api.rb",
34
+ "examples/twitter/old/scrape_twitter_search.rb",
35
+ "examples/twitter/old/scrape_twitter_trending.rb",
36
+ "examples/twitter/parse/parse_twitter_requests.rb",
37
+ "examples/twitter/parse/parse_twitter_search_requests.rb",
38
+ "examples/twitter/scrape_twitter_api/scrape_twitter_api.rb",
39
+ "examples/twitter/scrape_twitter_api/seed.tsv",
40
+ "examples/twitter/scrape_twitter_api/start_cache_twitter.sh",
41
+ "examples/twitter/scrape_twitter_api/support/make_request_stats.rb",
42
+ "examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb",
43
+ "examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig",
44
+ "examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv",
45
+ "examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv",
46
+ "examples/twitter/scrape_twitter_hosebird/edamame-killall",
47
+ "examples/twitter/scrape_twitter_hosebird/foo.rb",
48
+ "examples/twitter/scrape_twitter_hosebird/ps_emulation.rb",
49
+ "examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb",
50
+ "examples/twitter/scrape_twitter_hosebird/test_spewer.rb",
51
+ "examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml",
52
+ "examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb",
53
+ "examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb",
54
+ "examples/twitter/scrape_twitter_search/scrape_twitter_search.rb",
55
+ "examples/twitter/scrape_twitter_search/twitter_search_daemons.god",
56
+ "lib/old/twitter_api.rb",
57
+ "lib/wuclan.rb",
58
+ "lib/wuclan/delicious/delicious_html_request.rb",
59
+ "lib/wuclan/delicious/delicious_models.rb",
60
+ "lib/wuclan/delicious/delicious_request.rb",
61
+ "lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb",
62
+ "lib/wuclan/friendster.rb",
63
+ "lib/wuclan/lastfm.rb",
64
+ "lib/wuclan/lastfm/model/base.rb",
65
+ "lib/wuclan/lastfm/model/sample_responses.txt",
66
+ "lib/wuclan/lastfm/scrape.rb",
67
+ "lib/wuclan/lastfm/scrape/base.rb",
68
+ "lib/wuclan/lastfm/scrape/concrete.rb",
69
+ "lib/wuclan/lastfm/scrape/lastfm_job.rb",
70
+ "lib/wuclan/lastfm/scrape/lastfm_request_stream.rb",
71
+ "lib/wuclan/lastfm/scrape/recursive_requests.rb",
72
+ "lib/wuclan/metrics.rb",
73
+ "lib/wuclan/metrics/user_graph_metrics.rb",
74
+ "lib/wuclan/metrics/user_metrics.rb",
75
+ "lib/wuclan/metrics/user_metrics_basic.rb",
76
+ "lib/wuclan/metrics/user_scraping_metrics.rb",
77
+ "lib/wuclan/myspace.rb",
78
+ "lib/wuclan/open_social.rb",
79
+ "lib/wuclan/open_social/model/base.rb",
80
+ "lib/wuclan/open_social/scrape/base.rb",
81
+ "lib/wuclan/open_social/scrape_request.rb",
82
+ "lib/wuclan/rdf_output/relationship_rdf.rb",
83
+ "lib/wuclan/rdf_output/text_element_rdf.rb",
84
+ "lib/wuclan/rdf_output/tweet_rdf.rb",
85
+ "lib/wuclan/rdf_output/twitter_rdf.rb",
86
+ "lib/wuclan/rdf_output/twitter_user_rdf.rb",
87
+ "lib/wuclan/shorturl/shorturl_request.rb",
88
+ "lib/wuclan/twitter.rb",
89
+ "lib/wuclan/twitter/api_response_examples.textile",
90
+ "lib/wuclan/twitter/api_response_examples.textile",
91
+ "lib/wuclan/twitter/model.rb",
92
+ "lib/wuclan/twitter/model/base.rb",
93
+ "lib/wuclan/twitter/model/multi_edge.rb",
94
+ "lib/wuclan/twitter/model/relationship.rb",
95
+ "lib/wuclan/twitter/model/text_element.rb",
96
+ "lib/wuclan/twitter/model/text_element/extract_info_tests.rb",
97
+ "lib/wuclan/twitter/model/text_element/grok_tweets.rb",
98
+ "lib/wuclan/twitter/model/text_element/more_regexes.rb",
99
+ "lib/wuclan/twitter/model/tweet.rb",
100
+ "lib/wuclan/twitter/model/tweet/tokenize.rb",
101
+ "lib/wuclan/twitter/model/tweet/tweet_regexes.rb",
102
+ "lib/wuclan/twitter/model/tweet/tweet_token.rb",
103
+ "lib/wuclan/twitter/model/twitter_user.rb",
104
+ "lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb",
105
+ "lib/wuclan/twitter/parse/ff_ids_parser.rb",
106
+ "lib/wuclan/twitter/parse/friends_followers_parser.rb",
107
+ "lib/wuclan/twitter/parse/generic_json_parser.rb",
108
+ "lib/wuclan/twitter/parse/json_tweet.rb",
109
+ "lib/wuclan/twitter/parse/json_twitter_user.rb",
110
+ "lib/wuclan/twitter/parse/public_timeline_parser.rb",
111
+ "lib/wuclan/twitter/parse/twitter_search_parse.rb",
112
+ "lib/wuclan/twitter/parse/user_parser.rb",
113
+ "lib/wuclan/twitter/scrape.rb",
114
+ "lib/wuclan/twitter/scrape/base.rb",
115
+ "lib/wuclan/twitter/scrape/old_skool_request_classes.rb",
116
+ "lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb",
117
+ "lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb",
118
+ "lib/wuclan/twitter/scrape/twitter_followers_request.rb",
119
+ "lib/wuclan/twitter/scrape/twitter_json_response.rb",
120
+ "lib/wuclan/twitter/scrape/twitter_request_stream.rb",
121
+ "lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb",
122
+ "lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb",
123
+ "lib/wuclan/twitter/scrape/twitter_search_job.rb",
124
+ "lib/wuclan/twitter/scrape/twitter_search_request.rb",
125
+ "lib/wuclan/twitter/scrape/twitter_search_request_stream.rb",
126
+ "lib/wuclan/twitter/scrape/twitter_timeline_request.rb",
127
+ "lib/wuclan/twitter/scrape/twitter_user_request.rb",
128
+ "spec/spec_helper.rb",
129
+ "spec/wuclan_spec.rb",
130
+ "wuclan.gemspec"
131
+ ]
132
+ s.homepage = %q{http://github.com/mrflip/wuclan}
133
+ s.rdoc_options = ["--charset=UTF-8"]
134
+ s.require_paths = ["lib"]
135
+ s.rubygems_version = %q{1.3.5}
136
+ s.summary = %q{Massive-scale social network analysis. Nothing to f with.}
137
+ s.test_files = [
138
+ "spec/spec_helper.rb",
139
+ "spec/wuclan_spec.rb",
140
+ "examples/analyze/strong_links/gen_multi_edge.rb",
141
+ "examples/analyze/strong_links/main.rb",
142
+ "examples/analyze/word_count/dump_schema.rb",
143
+ "examples/analyze/word_count/freq_user.rb",
144
+ "examples/analyze/word_count/freq_whole_corpus.rb",
145
+ "examples/analyze/word_count/word_count.rb",
146
+ "examples/lastfm/scrape/load_lastfm.rb",
147
+ "examples/lastfm/scrape/scrape_lastfm.rb",
148
+ "examples/twitter/old/load_twitter_search_jobs.rb",
149
+ "examples/twitter/old/scrape_twitter_api.rb",
150
+ "examples/twitter/old/scrape_twitter_search.rb",
151
+ "examples/twitter/old/scrape_twitter_trending.rb",
152
+ "examples/twitter/parse/parse_twitter_requests.rb",
153
+ "examples/twitter/parse/parse_twitter_search_requests.rb",
154
+ "examples/twitter/scrape_twitter_api/scrape_twitter_api.rb",
155
+ "examples/twitter/scrape_twitter_api/support/make_request_stats.rb",
156
+ "examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb",
157
+ "examples/twitter/scrape_twitter_hosebird/foo.rb",
158
+ "examples/twitter/scrape_twitter_hosebird/ps_emulation.rb",
159
+ "examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb",
160
+ "examples/twitter/scrape_twitter_hosebird/test_spewer.rb",
161
+ "examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb",
162
+ "examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb",
163
+ "examples/twitter/scrape_twitter_search/scrape_twitter_search.rb"
164
+ ]
165
+
166
+ if s.respond_to? :specification_version then
167
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
168
+ s.specification_version = 3
169
+
170
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
171
+ s.add_runtime_dependency(%q<wukong>, [">= 0"])
172
+ s.add_runtime_dependency(%q<monkeyshines>, [">= 0"])
173
+ s.add_runtime_dependency(%q<edamame>, [">= 0"])
174
+ else
175
+ s.add_dependency(%q<wukong>, [">= 0"])
176
+ s.add_dependency(%q<monkeyshines>, [">= 0"])
177
+ s.add_dependency(%q<edamame>, [">= 0"])
178
+ end
179
+ else
180
+ s.add_dependency(%q<wukong>, [">= 0"])
181
+ s.add_dependency(%q<monkeyshines>, [">= 0"])
182
+ s.add_dependency(%q<edamame>, [">= 0"])
183
+ end
184
+ end