wuclan 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. data/LICENSE.textile +20 -0
  2. data/README.textile +28 -0
  3. data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
  4. data/examples/analyze/strong_links/main.rb +51 -0
  5. data/examples/analyze/word_count/dump_schema.rb +13 -0
  6. data/examples/analyze/word_count/freq_user.rb +31 -0
  7. data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
  8. data/examples/analyze/word_count/word_count.pig +43 -0
  9. data/examples/analyze/word_count/word_count.rb +34 -0
  10. data/examples/lastfm/scrape/load_lastfm.rb +31 -0
  11. data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
  12. data/examples/lastfm/scrape/seed.tsv +147 -0
  13. data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
  14. data/examples/twitter/old/scrape_twitter_api.rb +104 -0
  15. data/examples/twitter/old/scrape_twitter_search.rb +57 -0
  16. data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
  17. data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
  18. data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
  19. data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
  20. data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
  21. data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
  22. data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
  23. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
  24. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
  25. data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
  26. data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
  27. data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
  28. data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
  29. data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
  30. data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
  31. data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
  32. data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
  33. data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
  34. data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
  35. data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
  36. data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
  37. data/lib/old/twitter_api.rb +88 -0
  38. data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
  39. data/lib/wuclan/delicious/delicious_models.rb +26 -0
  40. data/lib/wuclan/delicious/delicious_request.rb +65 -0
  41. data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
  42. data/lib/wuclan/friendster.rb +7 -0
  43. data/lib/wuclan/lastfm/model/base.rb +49 -0
  44. data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
  45. data/lib/wuclan/lastfm/scrape/base.rb +195 -0
  46. data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
  47. data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
  48. data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
  49. data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
  50. data/lib/wuclan/lastfm/scrape.rb +12 -0
  51. data/lib/wuclan/lastfm.rb +7 -0
  52. data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
  53. data/lib/wuclan/metrics/user_metrics.rb +443 -0
  54. data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
  55. data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
  56. data/lib/wuclan/metrics.rb +0 -0
  57. data/lib/wuclan/myspace.rb +21 -0
  58. data/lib/wuclan/open_social/model/base.rb +0 -0
  59. data/lib/wuclan/open_social/scrape/base.rb +111 -0
  60. data/lib/wuclan/open_social/scrape_request.rb +6 -0
  61. data/lib/wuclan/open_social.rb +0 -0
  62. data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
  63. data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
  64. data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
  65. data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
  66. data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
  67. data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
  68. data/lib/wuclan/twitter/api_response_examples.textile +300 -0
  69. data/lib/wuclan/twitter/model/base.rb +72 -0
  70. data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
  71. data/lib/wuclan/twitter/model/relationship.rb +176 -0
  72. data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
  73. data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
  74. data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
  75. data/lib/wuclan/twitter/model/text_element.rb +38 -0
  76. data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
  77. data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
  78. data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
  79. data/lib/wuclan/twitter/model/tweet.rb +74 -0
  80. data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
  81. data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
  82. data/lib/wuclan/twitter/model.rb +21 -0
  83. data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
  84. data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
  85. data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
  86. data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
  87. data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
  88. data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
  89. data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
  90. data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
  91. data/lib/wuclan/twitter/scrape/base.rb +97 -0
  92. data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
  93. data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
  94. data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
  95. data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
  96. data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
  97. data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
  98. data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
  99. data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
  100. data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
  101. data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
  102. data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
  103. data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
  104. data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
  105. data/lib/wuclan/twitter/scrape.rb +27 -0
  106. data/lib/wuclan/twitter.rb +7 -0
  107. data/lib/wuclan.rb +1 -0
  108. data/spec/spec_helper.rb +9 -0
  109. data/spec/wuclan_spec.rb +7 -0
  110. data/wuclan.gemspec +184 -0
  111. metadata +219 -0
@@ -0,0 +1,291 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ require 'wukong'
4
+ require 'monkeyshines'
5
+ require 'wuclan/twitter'
6
+ $: << '/home/flip/ics/rubygems/json-1.1.7/lib'
7
+ include Wuclan::Twitter::Scrape
8
+ include Wuclan::Twitter::Model
9
+
10
+
11
+ require 'wukong/schema'
12
+
13
+
14
+ Wuclan::Twitter::Scrape::Base.class_eval do
15
+ extend Wukong::Schema
16
+ end
17
+
18
+ p Wuclan::Twitter::Scrape::TwitterUserRequest.pig_load
19
+
20
+ # Requests = LOAD 'ripd/com.tw/com.twitter/*' AS ( rsrc:chararray, priority:int, twitter_user_id: int, page: int, moreinfo: chararray, url: chararray, scraped_at: long, response_code: int, response_message: chararray, contents: chararray );
21
+ # request_classes = FOREACH Requests GENERATE rsrc, (int) ((double)scraped_at / 1000000.0) AS scon, response_code ;
22
+ # rc_grp = GROUP request_classes BY (rsrc, scon, response_code) ;
23
+ # rc_count = FOREACH rc_grp GENERATE COUNT(request_classes) AS freq, group.scon AS scraped_on, group.rsrc AS rsrc , group.response_code AS response_code ;
24
+ # rc_count_1 = ORDER rc_count BY scraped_on, rsrc, response_code ;
25
+ # rmf tmp/rc_count
26
+ # STORE rc_count_1 INTO 'tmp/rc_count' ;
27
+
28
+
29
+ # 20090304152029 bad utf8
30
+ # 20090308
31
+
32
+
33
+
34
+ # 1 9999999 20081207052456 1 old_scraper 20081207052456 200 old_scraper [{"user":{"followers_count":23,"description":"","url":"","profile_image_url":"http:\/\/s3.amazonaws.com /twi
35
+ # 1 9999999 20081207055023 1 old_scraper 20081207055023 200 old_scraper [{"user":{"followers_count":32,"description":"","url":"http:\/\/www.mychurch.org /gervis","profile_image_url
36
+ # 1 9999999 20081209041619 1 old_scraper 20081209041619 200 old_scraper [{"user":{"followers_count":80,"description":"1983, Amersfoort, audio-producer @ NPS 3FM","url":"http:\/\/ww
37
+ # 1 9999999 20081209115725 1 old_scraper 20081209115725 200 old_scraper [{"user":{"followers_count":19,"description":"Pozzo e Luck não acessavam a net.","url":"","profile_image_ur
38
+ # 1 9999999 20081209232718 1 old_scraper 20081209232718 200 old_scraper [{"user":{"followers_count":105,"description":"札幌←→東京頻繁。IT系,広告系,プランナー,<E5><85>
39
+ # 1 9999999 20081210061628 1 old_scraper 20081210061628 200 old_scraper [{"user":{"followers_count":736,"description":"I AM","url":"http:\/\/www.frankvandun.nl","profile_image_url"
40
+ # 1 9999999 20081210185703 1 old_scraper 20081210185703 200 old_scraper [{"user":{"followers_count":644,"description":"Noticias de Chile actualizadas cada hora","url":"http:\/\/www
41
+ # 1 9999999 20081211095702 1 old_scraper 20081211095702 200 old_scraper [{"user":{"followers_count":64,"description":"","url":"http:\/\/tautin.blogspot.com /","profile_image_url":"
42
+ # 1 9999999 20081213073636 1 old_scraper 20081213073636 200 old_scraper [{"user":{"followers_count":178,"description":"","url":"http:\/\/www.gazetadopovo.com.br","profile_image_url
43
+ # 1 9999999 20081214100003 1 old_scraper 20081214100003 200 old_scraper [{"user":{"followers_count":7,"description":"ResearchBlogging.org feeds in Deutsch","url":"http:\/\/research
44
+ # 1 9999999 20081215105211 1 old_scraper 20081215105211 200 old_scraper [{"user":{"followers_count":165,"description":"I am a stay-at-home mother of two, with one on the way! I am
45
+ # 1 9999999 20081218075108 1 old_scraper 20081218075108 200 old_scraper [{"user":{"followers_count":17,"description":"","url":"http:\/\/hard-hitting-news.blogspot.com /","profile_i
46
+ # 1 9999999 20081219065853 1 old_scraper 20081219065853 200 old_scraper [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
47
+ # 1 9999999 20081220010525 1 old_scraper 20081220010525 200 old_scraper [{"user":{"followers_count":202,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.c
48
+ # 1 9999999 20081220113649 1 old_scraper 20081220113649 200 old_scraper [{"user":{"followers_count":42,"description":"Contrary to popular belief, I am in fact a robot.","url":"http
49
+ # 1 9999999 20081221083623 1 old_scraper 20081221083623 200 old_scraper [{"user":{"followers_count":565,"description":"癒し系プログラマ","url":"http:\/\/polog.org /","profi
50
+ # 1 9999999 20081224110505 1 old_scraper 20081224110505 200 old_scraper [{"user":{"followers_count":304,"description":" A group of green women bloggers, uniting our voices to hel
51
+ # 1 9999999 20081225055913 1 old_scraper 20081225055913 200 old_scraper [{"user":{"followers_count":213,"description":"同人ゲーム作ってます。 お気軽にフォロー<E3>
52
+ # 1 9999999 20081229072914 1 old_scraper 20081229072914 200 old_scraper [{"user":{"followers_count":15,"description":"","url":"http:\/\/www.rodia.info","profile_image_url":"http:
53
+ # 1 9999999 20081229084830 1 old_scraper 20081229084830 200 old_scraper [{"user":{"followers_count":191,"description":"3rd Generation Real Estate Investor and Author","url":"http:\
54
+ # 1 9999999 20090102103315 1 old_scraper 20090102103315 200 old_scraper [{"user":{"followers_count":21,"description":"takin over one city at a time","url":"","profile_image_url":"h
55
+ # 1 9999999 20090104084017 1 old_scraper 20090104084017 200 old_scraper [{"user":{"followers_count":299,"description":"スウィーツ(呪)。12時過ぎるとたいてい就<E5><AF>
56
+ # 1 9999999 20090105101608 1 old_scraper 20090105101608 200 old_scraper [{"user":{"followers_count":2171,"description":"LIVE wildlife 24\/7 from Djuma in South Africa. LIVE safari.
57
+ # 1 9999999 20090105103520 1 old_scraper 20090105103520 200 old_scraper [{"user":{"followers_count":19,"description":"Learning to dance like no one is watching","url":"","profile_i
58
+ # 1 9999999 20090106165730 1 old_scraper 20090106165730 200 old_scraper [{"user":{"followers_count":10,"description":"Live.Love.Laugh.","url":"","profile_image_url":"http:\/\/s3.am
59
+ # 1 9999999 20090112091101 1 old_scraper 20090112091101 200 old_scraper [{"user":{"followers_count":25,"description":"Ostravak je stav duše, i když člověk žije v Praze.","url"
60
+ # 1 9999999 20090117090748 1 old_scraper 20090117090748 200 old_scraper [{"user":{"followers_count":58,"description":"Moving on up.","url":"http:\/\/sarah-dear.blogspot.com","profi
61
+ # 1 9999999 20090418173317 1 old_scraper 20090418173317 200 old_scraper [{"user":{"followers_count":69,"description":"The Fail Whale is my spirit animal","url":"","profile_image_ur
62
+ # 1 9999999 20090418231828 1 old_scraper 20090418231828 200 old_scraper [{"user":{"followers_count":125,"description":"Cre@t!ve T!r@de","url":"","profile_image_url":"http:\/\/stati
63
+ # 1 9999999 20090419014909 1 old_scraper 20090419014909 200 old_scraper [{"user":{"followers_count":14,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
64
+ # 1 9999999 20090419052121 1 old_scraper 20090419052121 200 old_scraper [{"user":{"followers_count":743,"description":"ゆるい感じで参加してみました。野球と温泉<E3>
65
+ # 1 9999999 20090419233942 1 old_scraper 20090419233942 200 old_scraper [{"user":{"followers_count":36,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
66
+ # 1 9999999 20090420033745 1 old_scraper 20090420033745 200 old_scraper [{"user":{"followers_count":1815,"description":"Doing it for the girls baby, chicks, , ladies, women, Its ok
67
+ # 1 9999999 20090420112345 1 old_scraper 20090420112345 200 old_scraper [{"user":{"followers_count":13,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.co
68
+ # 1 9999999 20090421010538 1 old_scraper 20090421010538 200 old_scraper [{"user":{"followers_count":30,"description":"There are those who think they can and those who think they ca
69
+ # 1 9999999 20090421084441 1 old_scraper 20090421084441 200 old_scraper [{"user":{"followers_count":119,"description":"web designer, photographer, musical genius","url":"","profile
70
+ # 1 9999999 20090421101818 1 old_scraper 20090421101818 200 old_scraper [{"user":{"followers_count":10,"description":"An eternal learner. Master student in education : can wiki in
71
+ # 1 9999999 20090421232814 1 old_scraper 20090421232814 200 old_fetcher [{"user":{"followers_count":1,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
72
+ # 1 9999999 20090422065945 1 old_fetcher 20090422065945 200 old_fetcher [{"user":{"followers_count":118,"description":"Curiosa, consultora de IT y madre a la vez!","url":"","profil
73
+ # 1 9999999 20090422083321 1 old_fetcher 20090422083321 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"http:\/\/www.myspace.com /sweetitdm","profile_image_u
74
+ # 1 9999999 20090423045905 1 old_fetcher 20090423045905 200 old_fetcher [{"user":{"followers_count":79,"description":"","url":"http:\/\/flickr.com /photos\/malugreen","profile_imag
75
+ # 1 9999999 20090423063900 1 old_fetcher 20090423063900 200 old_fetcher [{"user":{"followers_count":388,"description":"Instructional technology grad student, dog lover, optimist,an
76
+ # 1 9999999 20090423135519 1 old_fetcher 20090423135519 200 old_fetcher [{"user":{"followers_count":628,"description":"MsBeat runs the show at Beatblogging.org. A news-savvy mistre
77
+ # 1 9999999 20090425052649 1 old_fetcher 20090425052649 200 old_fetcher [{"user":{"followers_count":12,"description":"mixiやってます。「ふじしょう」で検索してみ<E3>
78
+ # 1 9999999 20090426061449 1 old_fetcher 20090426061449 200 old_fetcher [{"user":{"followers_count":5,"description":"im in the land of soft drugs, legal whoring, windmills and tuli
79
+ # 1 9999999 20090428044727 1 old_fetcher 20090428044727 200 old_fetcher [{"user":{"followers_count":290,"description":"Online and Onair radioshow for geeks only!","url":"http:\/\/w
80
+ # 1 9999999 20090428151030 1 old_fetcher 20090428151030 200 old_fetcher [{"user":{"followers_count":520,"description":"The official home of New Zealand Rugby on Twitter","url":"htt
81
+ # 1 9999999 20090428232804 1 old_fetcher 20090428232804 200 old_fetcher [{"user":{"followers_count":3,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
82
+ # 1 9999999 20090503152828 1 old_fetcher 20090503152828 200 old_fetcher [{"user":{"followers_count":49,"description":"","url":"http:\/\/www.myspace.com /silisali","profile_image_ur
83
+ # 1 9999999 20090503195932 1 old_fetcher 20090503195932 200 old_fetcher [{"user":{"followers_count":35,"description":"I am a national level bodybuilder working for Bodywell Nutriti
84
+ # 1 9999999 20090504020126 1 old_fetcher 20090504020126 200 old_fetcher [{"user":{"followers_count":35,"description":"Christian. Bass Player. Singer. Amateur Photographer. News Adv
85
+ # 1 9999999 20081209232718 1 old_fetcher 20081209232718 200 old_fetcher [{"user":{"followers_count":105,"description":"札幌←→東京頻繁。IT系,広告系,プランナー,<E5><85>1 9999999 20081210061628 1 old_fetcher 20081210061628 200 old_fetcher [{"user":{"followers_count":736,"description":"I AM","url":"http:\/\/www.frankvandun.nl","profile_image_url"
86
+ # 1 9999999 20081210185703 1 old_fetcher 20081210185703 200 old_fetcher [{"user":{"followers_count":644,"description":"Noticias de Chile actualizadas cada hora","url":"http:\/\/www
87
+ # 1 9999999 20081211095702 1 old_fetcher 20081211095702 200 old_fetcher [{"user":{"followers_count":64,"description":"","url":"http:\/\/tautin.blogspot.com /","profile_image_url":" 1 9999999 20081213073636 1 old_fetcher 20081213073636 200 old_fetcher [{"user":{"followers_count":178,"description":"","url":"http:\/\/www.gazetadopovo.com.br","profile_image_url
88
+ # 1 9999999 20081214100003 1 old_fetcher 20081214100003 200 old_fetcher [{"user":{"followers_count":7,"description":"ResearchBlogging.org feeds in Deutsch","url":"http:\/\/research
89
+ # 1 9999999 20081215105211 1 old_fetcher 20081215105211 200 old_fetcher [{"user":{"followers_count":165,"description":"I am a stay-at-home mother of two, with one on the way! I am
90
+ # 1 9999999 20081218075108 1 old_fetcher 20081218075108 200 old_fetcher [{"user":{"followers_count":17,"description":"","url":"http:\/\/hard-hitting-news.blogspot.com /","profile_i
91
+ # 1 9999999 20081219065853 1 old_fetcher 20081219065853 200 old_fetcher [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
92
+ # /im
93
+ # 1 9999999 20081220010525 1 old_fetcher 20081220010525 200 old_fetcher [{"user":{"followers_count":202,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.c
94
+ # 1 9999999 20081220113649 1 old_fetcher 20081220113649 200 old_fetcher [{"user":{"followers_count":42,"description":"Contrary to popular belief, I am in fact a robot.","url":"http
95
+ # 1 9999999 20081221083623 1 old_fetcher 20081221083623 200 old_fetcher [{"user":{"followers_count":565,"description":"癒し系プログラマ","url":"http:\/\/polog.org /","profi 1 9999999 20081224110505 1 old_fetcher 20081224110505 200 old_fetcher [{"user":{"followers_count":304,"description":" A group of green women bloggers, uniting our voices to hel
96
+ # 1 9999999 20081225055913 1 old_fetcher 20081225055913 200 old_fetcher [{"user":{"followers_count":213,"description":"同人ゲーム作ってます。 お気軽にフォロー<E3> 1 9999999 20081229072914 1 old_fetcher 20081229072914 200 old_fetcher [{"user":{"followers_count":15,"description":"","url":"http:\/\/www.rodia.info","profile_image_url":"http:
97
+ # /
98
+ # 1 9999999 20081229084830 1 old_fetcher 20081229084830 200 old_fetcher [{"user":{"followers_count":191,"description":"3rd Generation Real Estate Investor and Author","url":"http:\
99
+ # 1 9999999 20090102103315 1 old_fetcher 20090102103315 200 old_fetcher [{"user":{"followers_count":21,"description":"takin over one city at a time","url":"","profile_image_url":"h
100
+ # 1 9999999 20090104084017 1 old_fetcher 20090104084017 200 old_fetcher [{"user":{"followers_count":299,"description":"スウィーツ(呪)。12時過ぎるとたいてい就<E5><AF>1 9999999 20090105101608 1 old_fetcher 20090105101608 200 old_fetcher [{"user":{"followers_count":2171,"description":"LIVE wildlife 24\/7 from Djuma in South Africa. LIVE safari.
101
+ # 1 9999999 20090105103520 1 old_fetcher 20090105103520 200 old_fetcher [{"user":{"followers_count":19,"description":"Learning to dance like no one is watching","url":"","profile_i
102
+ # 1 9999999 20090106165730 1 old_fetcher 20090106165730 200 old_fetcher [{"user":{"followers_count":10,"description":"Live.Love.Laugh.","url":"","profile_image_url":"http:\/\/s3.am
103
+ # 1 9999999 20090112091101 1 old_fetcher 20090112091101 200 old_fetcher [{"user":{"followers_count":25,"description":"Ostravak je stav duše, i když člověk žije v Praze.","url"
104
+ # 1 9999999 20090117090748 1 old_fetcher 20090117090748 200 old_fetcher [{"user":{"followers_count":58,"description":"Moving on up.","url":"http:\/\/sarah-dear.blogspot.com","profi
105
+ # 1 9999999 20090418173317 1 old_fetcher 20090418173317 200 old_fetcher [{"user":{"followers_count":69,"description":"The Fail Whale is my spirit animal","url":"","profile_image_ur
106
+ # 1 9999999 20090418231828 1 old_fetcher 20090418231828 200 old_fetcher [{"user":{"followers_count":125,"description":"Cre@t!ve T!r@de","url":"","profile_image_url":"http:\/\/stati
107
+ # 1 9999999 20090419014909 1 old_fetcher 20090419014909 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
108
+ # 1 9999999 20090419052121 1 old_fetcher 20090419052121 200 old_fetcher [{"user":{"followers_count":743,"description":"ゆるい感じで参加してみました。野球と温泉<E3>
109
+ # 1 9999999 20090419233942 1 old_fetcher 20090419233942 200 old_fetcher [{"user":{"followers_count":36,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
110
+ # 1 9999999 20090420033745 1 old_fetcher 20090420033745 200 old_fetcher [{"user":{"followers_count":1815,"description":"Doing it for the girls baby, chicks, , ladies, women, Its ok
111
+ # 1 9999999 20090420112345 1 old_fetcher 20090420112345 200 old_fetcher [{"user":{"followers_count":13,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.co
112
+ # 1 9999999 20090421010538 1 old_fetcher 20090421010538 200 old_fetcher [{"user":{"followers_count":30,"description":"There are those who think they can and those who think they ca
113
+ # 1 9999999 20090421084441 1 old_fetcher 20090421084441 200 old_fetcher [{"user":{"followers_count":119,"description":"web designer, photographer, musical genius","url":"","profile
114
+ # 1 9999999 20090421101818 1 old_fetcher 20090421101818 200 old_fetcher [{"user":{"followers_count":10,"description":"An eternal learner. Master student in education : can wiki in
115
+ # 1 9999999 20090421232814 1 old_fetcher 20090421232814 200 old_fetcher [{"user":{"followers_count":1,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
116
+ # 1 9999999 20090422065945 1 old_fetcher 20090422065945 200 old_fetcher [{"user":{"followers_count":118,"description":"Curiosa, consultora de IT y madre a la vez!","url":"","profil
117
+ # 1 9999999 20090422083321 1 old_fetcher 20090422083321 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"http:\/\/www.myspace.com /sweetitdm","profile_image_u
118
+ # 1 9999999 20090423045905 1 old_fetcher 20090423045905 200 old_fetcher [{"user":{"followers_count":79,"description":"","url":"http:\/\/flickr.com /photos\/malugreen","profile_imag
119
+ # 1 9999999 20090423063900 1 old_fetcher 20090423063900 200 old_fetcher [{"user":{"followers_count":388,"description":"Instructional technology grad student, dog lover, optimist,an
120
+ # 1 9999999 20090423135519 1 old_fetcher 20090423135519 200 old_fetcher [{"user":{"followers_count":628,"description":"MsBeat runs the show at Beatblogging.org. A news-savvy mistre
121
+ # 1 9999999 20090425052649 1 old_fetcher 20090425052649 200 old_fetcher [{"user":{"followers_count":12,"description":"mixiやってます。「ふじしょう」で検索してみ<E3>
122
+ # 1 9999999 20090426061449 1 old_fetcher 20090426061449 200 old_fetcher [{"user":{"followers_count":5,"description":"im in the land of soft drugs, legal whoring, windmills and tuli
123
+ # 1 9999999 20090428044727 1 old_fetcher 20090428044727 200 old_fetcher [{"user":{"followers_count":290,"description":"Online and Onair radioshow for geeks only!","url":"http:\/\/w
124
+ # 1 9999999 20090428151030 1 old_fetcher 20090428151030 200 old_fetcher [{"user":{"followers_count":520,"description":"The official home of New Zealand Rugby on Twitter","url":"htt
125
+ # 1 9999999 20090428232804 1 old_fetcher 20090428232804 200 old_fetcher [{"user":{"followers_count":3,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
126
+ # 1 9999999 20090503152828 1 old_fetcher 20090503152828 200 old_fetcher [{"user":{"followers_count":49,"description":"","url":"http:\/\/www.myspace.com /silisali","profile_image_ur
127
+ # 1 9999999 20090503195932 1 old_fetcher 20090503195932 200 old_fetcher [{"user":{"followers_count":35,"description":"I am a national level bodybuilder working for Bodywell Nutriti
128
+ # 1 9999999 20090504020126 1 old_fetcher 20090504020126 200 old_fetcher [{"user":{"followers_count":35,"description":"Christian. Bass Player. Singer. Amateur Photographer. News Adv
129
+ # 1 9999999 20090504045337 1 old_fetcher 20090504045337 200 old_fetcher [{"user":{"followers_count":171,"description":"i'm a bboy and a multimedia designer","url":"http:\/\/pitiscm
130
+ # 1 9999999 20090507112755 1 old_fetcher 20090507112755 200 old_fetcher [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
131
+ # 1 9999999 20090509222228 1 old_fetcher 20090509222228 200 old_fetcher [{"user":{"followers_count":256,"description":"representing NJ\/NY","url":"http:\/ /www.myspace.com\/darknes
132
+ # 1 9999999 20090512052820 1 old_fetcher 20090512052820 200 old_fetcher [{"user":{"followers_count":0,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
133
+ # 1 9999999 20090512101017 1 old_fetcher 20090512101017 200 old_fetcher [{"user":{"followers_count":685,"description":"Gainfully employed doing Linux 'stuff', part-time fitness fan
134
+ # 1 9999999 20090513062843 1 old_fetcher 20090513062843 200 old_fetcher [{"user":{"followers_count":7,"description":"Non-commercial radio for everyone who loves original music and
135
+ #
136
+ # 1 20090427 followers_ids 200 /images\/themes\/theme1\/bg.gif","profile_link_color":"0000ff","time_zone":"Cairo","created_at":"Sun Sep 02 13:44:12 +0000 2007","profile_sidebar_fill_c
137
+ # 1 20090427 followers_ids 200
138
+ # 1 20090427 followers_ids 209 /help.twitter.com\/index.php?pg=kb.page&id=75\">txt<\/a>","created_at":"Sun Feb 08 08:17:30 +0000 2009"},{"user":{"profile_background_image_url":"http:\
139
+ # 1 20090427 followers_ids 20 /twitter_production\/profile_images\/62248324\/086_copy_3_normal.jpg","statuses_count":16,"profile_text_color":"666666","screen_name":"cheekydonkey","pr
140
+ # 1 20090427 followers_ids 20 /images\/themes\/theme1\/bg.gif","created_at":"Fri Apr 18 19:34:26 +0000 2008","profile_text_color":"000000","location":null,"id":14436644,"time_zone":"
141
+ # 1 20090427 followers_ids 242
142
+ # 1 20090427 followers_ids 24479801
143
+ # 1 20090427 followers_ids 2 /images\/default_profile_normal.png","statuses_count":0,"profile_text_color":"000000","screen_name":"kaylazastrow","profile_background_tile":false,"prof
144
+ # 3256 20090427 followers_ids 400
145
+ #
146
+ # 1 0 .subpage #content ol, #side ol { padding-left: 30px; } a{text-decoration:none;color: #0084b4;} #content div.desc { margin: 11px 0px 10px 0px; } a img{border:0;} ul{list
147
+ # 1 0 ":0,"profile_background_color":"9ae4e8","profile_background_image_url":"http:\/\/static.twitter.com 0
148
+ # 1 0 "profile_image_url":"http:\/\/s3.amazonaws.com 0
149
+ # 1 0 ,"favourites_count":0,"profile_background_color":"f8eb8b","profile_image_url":"http:\/\/static.twitter.com 0
150
+ # 1 0 /static.twitter.com\/images\/default_profile_normal.png","notifications":false,"statuses_count":6,"profile_sidebar_border_color":"87bc44","screen_name":"JoeLorah","profile_background_t
151
+ # 1 0 6}
152
+ # 1 0 _background_image_url":"http:\/\/s3.amazonaws.com 0
153
+ # 1 0 _color":"000000","url":null,"name":"Brett Speth","time_zone":null,"protected":false,"profile_link_color":"0000ff","followers_count":0,"profile_sidebar_fill_color":"e0ff92","profile_ima
154
+ #
155
+ # 1 0 .subpage #content ol, #side ol { padding-left: 30px; } a{text-decoration:none;color: #0084b4;} #content div.desc { margin: 11px 0px 10px 0px; } a img{border:0;} ul{list
156
+ # 1 0 ":0,"profile_background_color":"9ae4e8","profile_background_image_url":"http:\/\/static.twitter.com 0
157
+ # 1 0 "profile_image_url":"http:\/\/s3.amazonaws.com 0
158
+ # 1 0 ,"favourites_count":0,"profile_background_color":"f8eb8b","profile_image_url":"http:\/\/static.twitter.com 0
159
+ # 1 0 /static.twitter.com\/images\/default_profile_normal.png","notifications":false,"statuses_count":6,"profile_sidebar_border_color":"87bc44","screen_name":"JoeLorah","profile_background_t
160
+ # 1 0 6}
161
+ # 1 0 _background_image_url":"http:\/\/s3.amazonaws.com 0
162
+ # 1 0 _color":"000000","url":null,"name":"Brett Speth","time_zone":null,"protected":false,"profile_link_color":"0000ff","followers_count":0,"profile_sidebar_fill_color":"e0ff92","profile_ima
163
+ # 572 0 bogus-all_numeric-favorites 200
164
+ # 8 0 bogus-bad_chars-favorites 200
165
+ # 11 0 bogus-bad_chars-followers 200
166
+ # 3 0 bogus-bad_chars-friends 200
167
+ # 1867 0 bogus-missing_id-favorites 200
168
+ # 1 0 eply_to_status_id":null,"source":"web"},"notifications":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
169
+ # 1 0 f.com/friends/ids/17799430.json
170
+ # 1 0 f49.json
171
+ # 1 0 f81852492\/Bread__normal.jpg","status":{"truncated":false,"in_reply_to_status_id":1625610632,"text":"@podcasthelper oh yes yes i do still need help. It is ok to call upon your expertis
172
+ # 1 0 fat":"Tue Aug 12 15:27:32 +0000 2008","friends_count":87,"profile_background_color":"FF6699","location":"Newcastle, UK","id":15823576,"time_zone":"Hawaii","favourites_count":0,"profile
173
+ # 890016 0 favorites 200
174
+ # 1 0 file_image_url":"http:\/\/static.twitter.com 0
175
+ # 1 0 fo:null,"name":"THE_REAL_SHAQ","protected":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
176
+ # 1 0 foll3183,14885034,17824762,25320311,26651936,5520952,16092530,15466712,18414465,20019951,22151420,26332254,7096192,13434972,26275705,27923225,15770739,19900326,15654216,20486512,167358
177
+ # 1 0 folleply_to_screen_name":null,"id":1618358723,"source":"<a href=\"http:\/ 0
178
+ # 1 0 follo54:45 +0000 2009"}]
179
+ # 1 0 follos\/71101463\/LegalTimes_1651_normal.jpg","status":{"truncated":false,"in_reply_to_status_id":null,"text":"The Morning Wrap http: 0
180
+ # 1 0 followeada)","favourites_count":1,"profile_text_color":"666666"},{"description":"Writer, Pick-up Artist, Social Mastermind, and Traveler","profile_background_image_url":"http:\/\/stati
181
+ # 1 0 followekground_tile":false,"description":"Gamer\/Skier\/Drummer 0
182
+ # 1 0 followers,"profile_sidebar_border_color":"87bc44","time_zone":"London","profile_image_url":"http:\/\/s3.amazonaws.com 0
183
+ # 1 0 followers,12836312,18993475,16860914,16142878,18504804,17810432,18661758,17356420,17901504,15535360,19240090,16180026,14614833,18264863,17807744,19459418,19356460,8112832,18637695,1925
184
+ # 1 0 followers.json
185
+ # 1 0 followers_":"need coffee","favorited":false,"in_reply_to_screen_name":null,"created_at":"Mon Apr 27 14:11:52 +0000 2009","truncated":false,"id":1629096949,"in_reply_to_status_id":null,
186
+ # 1 0 followers__Close_normal.JPG","status":{"truncated":false,"in_reply_to_status_id":null,"text":"is finally home and going to bed. Have to get up for work in about 4.5 hours.","in_reply_t
187
+ # 1 0 followers_id 0
188
+ # 1 0 followers_id,"profile_background_color":"9ae4e8","profile_image_url":"http:\/\/s3.amazonaws.com 0
189
+ # 1 0 followers_id13165892,15131310,6970122,13838022,15136098,14590445,15184346,6264392,12650292,16159919,16725668,16816616,15984607,16895930,12228062,15224867,859221,12364022,15316113,15624
190
+ # 1 0 followers_id2009","truncated":false,"id":1625606751,"in_reply_to_status_id":1623299478,"source":"web"},"notifications":false,"time_zone":"Pacific Time (US & Canada)","favourites_count"
191
+ # 1 0 followers_idbackground_tile":false,"followers_count":78,"url":"http:\/\/danfitek.com","screen_name":"fitekker","name":"Dan Fitek","friends_count":100,"profile_background_color":"9ae4e8
192
+ # 149 0 followers_ids
193
+ # 1 0 followers_ids 1
194
+ # 1 0 followers_ids 17975054
195
+ # 1 0 followers_ids 200
196
+ # 1 0 followers_ids 20090412070434
197
+ # 4 0 followers_ids 0
198
+ # 1 0 followers_ids 0
199
+ # 1 0 followers_ids 0
200
+ # 1 0 followers_ids"in_reply_to_screen_name":null,"created_at":"Fri Apr 24 19:22:24 +0000 2009","truncated":false,"id":1606556267,"in_reply_to_status_id":null,"source":"<a href=\"http:\/ 0
201
+ # 1 0 followers_ids00,"profile_link_color":"0000ff","profile_image_url":"http:\/\/static.twitter.com 0
202
+ # 1 0 followers_idsC2EF","location":"San Diego, CA","id":9628922,"time_zone":"Pacific Time (US & Canada)","created_at":"Tue Oct 23 17:30:49 +0000 2007"}]
203
+ # 1 0 followers_idst":"Mon Jan 28 03:48:51 +0000 2008","screen_name":"siolanthe"},{"description":"","profile_background_image_url":"http:\/\/static.twitter.com 0
204
+ # 1 0 followers_ilocation":null,"id":15311449,"time_zone":"Greenland"},{"description":"","profile_background_image_url":"http:\/\/s3.amazonaws.com 0
205
+ # 1 0 followerwing":false,"profile_link_color":"CD0033","url":"http:\/\/foodfeed.us","name":"FoodFeed","notifications":false,"profile_sidebar_fill_color":"fafaf5","followers_count":4399,"pro
206
+ # 1 0 followetp://twitter.com/followers/ids/15737773.json
207
+ # 1 0 fri:5,"profile_sidebar_border_color":"87bc44","url":null,"screen_name":"seniorpoopypant","name":"seniorpoopypant","favourites_count":0,"protected":false,"status":{"truncated":false,"in
208
+ # 1 0 frie:"Pirate LadyZebra. (also know as Zoaea)","utc_offset":-18000,"profile_sidebar_fill_color":"e0ff92","followers_count":19,"favourites_count":0,"profile_image_url":"http:\/\/s3.amazo
209
+ # 1 0 friend219,14213042,29736155,27530456,18755292]
210
+ # 1 0 friends":"web"},"profile_background_image_url":"http:\/\/static.twitter.com 0
211
+ # 1 0 friends\/\/s3.amazonaws.com 0
212
+ # 1 0 friends_3,27039226,29988381,35486899,18900303,16044047]
213
+ # 1 0 friends__count":0,"profile_background_color":"9ae4e8","profile_image_url":"http:\/\/s3.amazonaws.com 0
214
+ # 1 0 friends_i12809262,12767592,13084172,12803292,12775072,12129872,14198789,29866309]
215
+ # 1 0 friends_i1355,"source":"web","created_at":"Fri Jun 27 20:51:55 +0000 2008"},{"truncated":false,"user":{"description":"Recently married! Work for Victory - vc.tv - lovin' life!","utc_o
216
+ # 1 0 friends_id":null,"text":"One Laptop per Child Lands in Indiahttp:\/\/tinyurl.com 0
217
+ # 1 0 friends_id,17213487,20820391,1050851,23817210,15117375,14790735,16069532,14634720,23306376,14470037,24754635,18666525,16798949,17118708,17492127,16563598,22731226,20253928,17139092,240
218
+ # 1 0 friends_idile_background_images\/3476247\/BJMendelson_388_twitbacks.jpg","profile_link_color":"0084B4","location":"Glens Falls, New York","id":12687952,"time_zone":"Indiana (East)","cr
219
+ # 161 0 friends_ids
220
+ # 1 0 friends_ids 18706826
221
+ # 1 0 friends_ids 16624466
222
+ # 1 0 friends_ids 20090427091534
223
+ # 1 0 friends_ids 20090427094351
224
+ # 1 0 friends_ids 0
225
+ # 1 0 friends_ids 0
226
+ # 1 0 friends_ids 0
227
+ # 1 0 friends_ids 0
228
+ # 1 0 friends_ids 0
229
+ # 1 0 friends_ids 0
230
+ # 1 0 friends_ids/\/orangatame.com\/products 0
231
+ # 1 0 friends_idsl_color":"F3F3F3","followers_count":25,"location":"St. Louis","id":14708168,"notifications":false,"friends_count":23,"profile_sidebar_border_color":"DFDFDF"},"text":"@Raptor
232
+ # 1 0 friends_iound_images\/4821472\/Mississippi_River_TypeMap2.jpg","profile_link_color":"1F98C7","location":"San Francisco","id":18257438,"time_zone":"Pacific Time (US & Canada)","created_
233
+ # 1 0 friprofile_background_color":"1A1B1F","protected":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
234
+ # 1 0 frmusings of a young Catholic in Yorkshire, England","utc_offset":0,"notifications":false,"profile_sidebar_fill_color":"e0ff92","followers_count":89,"profile_image_url":"http:\/\/s3.am
235
+ # 1 0 frollowing":false,"statuses_count":468,"profile_link_color":"2FC2EF","url":"http:\/\/myspace.com 0
236
+ # 1 0 fws.com\/twitter_production\/profile_background_images 0
237
+ # 1 0 hu Mar 05 07:45:42 +0000 2009","id":1282474011,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"C6E2EE","notifications":false,"created_at":"Thu Mar 05 07:36
238
+ # 1 0 ile_image_url":"http:\/\/static.twitter.com 0
239
+ # 1 0 imit exceeded. Clients may not make more than 20000 requests per hour."}
240
+ # 1 0 location":null,"id":22893663,"profile_link_color":"0000ff"}
241
+ # 1 0 nk faudrait demander \u00e0 Michel Bergeron ,on aurait du fun pour 30 minutes","in_reply_to_user_id":21818830,"created_at":"Wed Mar 04 19:18:53 +0000 2009","truncated":false,"id":12798
242
+ # 1 0 oz","profile_background_image_url":"http:\/\/static.twitter.com 0
243
+ # 1 0 s football, beer, and technology! Tweet away!","statuses_count":1444,"utc_offset":-21600,"profile_sidebar_border_color":"87bc44","profile_background_tile":true,"following":false,"prof
244
+ # 24228 0 timeline 200
245
+ # 1 0 u"created_at":"Wed Mar 04 05:45:20 +0000 2009","in_reply_to_user_id":null,"in_reply_to_status_id":null,"truncated":false,"id":1277431048,"source":"<a href=\"http:\/ 0
246
+ # 1 0 u128
247
+ # 1 0 u88,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"F2E195","notifications":false,"created_at":"Wed Mar 04 05:32:57 +0000 2009","profile_background_image_u
248
+ # 1 0 uat":"Thu Mar 05 09:07:06 +0000 2009","id":1282640319,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Thu Mar 0
249
+ # 1 0 ul":null,"name":"kimberly luzier","profile_background_tile":false,"protected":false,"status":{"in_reply_to_user_id":null,"text":"pictures for ebayy","created_at":"Thu Mar 05 17:21:37 +
250
+ # 1 0 us":"http:\/\/s3.amazonaws.com 0
251
+ # 1 0 us":false,"location":null,"id":22740024}
252
+ # 1 0 us0308222713
253
+ # 1 0 use":2,"url":"http:\/\/www.pat-bach.com","name":"Tim Bach","profile_background_tile":false,"protected":false,"status":{"truncated":false,"favorited":false,"text":"Setting up my Twitter
254
+ # 1 0 use,"id":1288256641,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Thu Mar 05 08:33:22 +0000 2009","profile_ba
255
+ # 1 0 usefalse,"favorited":false,"text":"Wondering what twitter is all about and if I am missing out!","in_reply_to_user_id":null,"created_at":"Tue Mar 03 13:21:30 +0000 2009","id":127364277
256
+ # 1 0 useme":"magic 93.1 Radio","profile_background_image_url":"http:\/\/s3.amazonaws.com 0
257
+ # 1 0 useprofile_text_color":"000000","description":null,"screen_name":"JohnNMiller","utc_offset":null,"profile_link_color":"0000ff","time_zone":null,"profile_sidebar_fill_color":"e0ff92","f
258
+ #
259
+ # 474 0 user
260
+ # 1 0 user 20104991
261
+ # 2 0 user 1
262
+ # 1 0 user 14686512
263
+ # 80 0 user 200
264
+ # 1 0 user 20090308201437
265
+ # 1 0 user 20090308201710
266
+ # 1 0 user 20090308201901
267
+ # 1 0 user 20090308202228
268
+ # 1 0 user 20090308204043
269
+ # 1 0 user 20090308214100
270
+ # 8 0 user 0
271
+ # 1 0 user 0
272
+ # 1 0 user 0
273
+ # 1 0 user 0
274
+ # 1 0 user 0
275
+ # 1 0 user 0
276
+ # 1 0 user 0
277
+ # 1 0 user 0
278
+ # 1 0 user 0 /lovechelle","name":"nichellemicole","profile_background_tile":false,"protected":true,"profile_sidebar_border_color":"D9B17E","notifications":false,"cre
279
+ # 1 0 user 0 /images\/themes\/theme1\/bg.gif","statuses_count":1,"profile_text_color":"000000","time_zone":null,"url":null,"name":"Ben Pitz","friends_count":10,"prof
280
+ # 1 0 user 0 /help.twitter.com\/index.php?pg=kb.page&id=75\">txt<\/a>"},"notifications":false,"profile_image_url":"http:\/\/static.twitter.com\/images\/default_profi
281
+ # 1 0 user Ocean from my office window.","favorited":false,"created_at":"Wed Feb 25 15:55:00 +0000 2009","in_reply_to_user_id":null,"id":1249633057,"source":"web"},"time_zone":null,"profile_
282
+ # 5 0 user_timeline
283
+ # 1 0 user_timeline 0
284
+ # 1 0 userr.com\/images\/default_profile_normal.png","followers_count":3,"location":null,"id":21311967,"created_at":"Thu Feb 19 16:16:04 +0000 2009","profile_sidebar_border_color":"87bc44","
285
+ # 1 0 usertp:\/\/orangatame.com 0
286
+ # 1 0 usertter_production\/profile_images\/87717775 0
287
+ # 1 0 usm/users/show/0022897534.json?page=1
288
+ # 1 0 usmtwittercom.html\">mobile web<\/a>"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Tue Sep 02 07:53:34 +0000 2008","profile_background_image_url":"http:
289
+ # 8 20081218 bogus-all_numeric-followers 200
290
+ # 12 20081218 bogus-all_numeric-friends 200
291
+ # :
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'wukong'
4
+ require 'monkeyshines'
5
+ require 'wuclan/twitter'
6
+ # un-namespace request classes.
7
+ include Wuclan::Twitter::Scrape
8
+ include Wuclan::Twitter::Model
9
+ # if you're anyone but original author this next require is useless but harmless.
10
+ require 'wuclan/twitter/scrape/old_skool_request_classes'
11
+
12
+ #
13
+ # req, id, page, scraped_at, response_code
14
+ #
15
+ REQUEST_MAPPER_COMMAND = "/usr/bin/cut -d\"\t\" -f1,3,4,7,8 "
16
+
17
+ class TwitterRequestUniqer < Wukong::Streamer::UniqByLastReducer
18
+ attr_accessor :response_codes
19
+ def get_key req=nil, id=nil, pg=nil, *args
20
+ [req, id]
21
+ end
22
+
23
+ def start! *args
24
+ self.response_codes = { 200=>0,400=>0,401=>0,403=>0,404=>0 }
25
+ super *args
26
+ end
27
+
28
+ require 'json'
29
+ def accumulate *args
30
+ req, id, page, scraped_at, resp = args
31
+ resp = resp.to_i
32
+ return unless scraped_at =~ /\d{14}/
33
+ response_codes[resp] += 1 if response_codes.include?(resp)
34
+ super *args
35
+ end
36
+
37
+ def finalize *args
38
+ return if final_value.blank?
39
+ req, id, page, scraped_at, resp = final_value
40
+ id = "%010d"%(id.to_i)
41
+ yield( [id, req, page, scraped_at] + response_codes.values_at(200,400,401,403,404) )
42
+ end
43
+ end
44
+
45
+
46
+ # Make the script go.
47
+ Wukong::Script.new(
48
+ nil, TwitterRequestUniqer,
49
+ :map_command => REQUEST_MAPPER_COMMAND,
50
+ :partition_fields => 2, :sort_fields => 3
51
+ ).run
52
+
53
+
54
+ # 49522
55
+ # 74975395 200
56
+ # 84 302
57
+ # 277786 400
58
+ # 972881 401
59
+ # 94647 403
60
+ # 178105 404
61
+ # 9710 500
62
+ # 23134 502
63
+ # 1588 503
64
+ # 2479 504
65
+
66
+ # Wuclan::Twitter::Scrape::Base.class_eval do class_inheritable_accessor :req_code ; end
67
+ # TwitterUserRequest.class_eval do self.req_code = :tw_user ; end
68
+ # TwitterFollowersRequest.class_eval do self.req_code = :tw_foll ; end
69
+ # TwitterFriendsRequest.class_eval do self.req_code = :tw_frnd ; end
70
+ # TwitterFollowersIdsRequest.class_eval do self.req_code = :tw_foid ; end
71
+ # TwitterFriendsIdsRequest.class_eval do self.req_code = :tw_frid ; end
72
+ # TwitterUserTimelineRequest.class_eval do self.req_code = :tw_ustl ; end
73
+ #
74
+ # REQ_CODES = {
75
+ # 'followers' => :tw_fo, 'twitter_followers_request' => :tw_fo,
76
+ # 'friends' => :tw_fr, 'twitter_friends_request' => :tw_fr,
77
+ # 'followers_ids' => :tw_fi, 'twitter_followers_ids_request' => :tw_fi,
78
+ # 'friends_ids' => :tw_ri, 'twitter_friends_ids_request' => :tw_ri,
79
+ # 'user' => :tw_us, 'twitter_user_request' => :tw_us,
80
+ # 'user_timeline' => :tw_ut, 'twitter_user_timeline_request' => :tw_ut,
81
+ # }
82
+
83
+ # #
84
+ # #
85
+ # #
86
+ # class TwitterRequestParser < Wukong::Streamer::StructStreamer
87
+ #
88
+ # def process request, *args, &block
89
+ # next if request.page.to_i > 1
90
+ # next if request.response_code != '200'
91
+ # req_code = REQ_CODES[request]
92
+ # case request
93
+ # when TwitterUserRequest, TwitterFollowersRequest, TwitterFriendsRequest,
94
+ # TwitterFollowersIdsRequest, TwitterFriendsIdsRequest, TwitterUserTimelineRequest
95
+ # yield [request.twitter_user_id, request.req_code, request.scraped_at]
96
+ # end
97
+ # end
98
+ # end
@@ -0,0 +1,4 @@
1
+ Scrapes = LOAD 'tmp/last_requests_and_codes' AS user_id:int, rsrc:chararray, page:int, datetime:long, r200:int, r400:int, r401:int, r403:int, r404:int ;
2
+ UserScrapes = FILTER Scrapes BY rsrc == 'user' ;
3
+ UserScrapesOrdered = ORDER UserScrapes BY datetime ASC ;
4
+ STORE UserScrapesOrdered INTO 'twmeta/scrape_requests/users_by_staleness-20090730.tsv' ;
@@ -0,0 +1,6 @@
1
+ love 65536 3.36366494448618 6698 2686693027 2688691633
2
+ red+sox 65536 0.0113457581992013 1500 2661001994 2688059232
3
+ britney+spears 65536 0.00866753886170806 184 2685103763 2688130850
4
+ hadoop 65536 0.000661831916251315 614 2501794487 2687967783
5
+ infochimps 65536 2.24964286919452e-05 16 2541533220 2683708276
6
+ hapaxlegomenon 65536 0.0 1 2646535741 2646535741