wuclan 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. data/LICENSE.textile +20 -0
  2. data/README.textile +28 -0
  3. data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
  4. data/examples/analyze/strong_links/main.rb +51 -0
  5. data/examples/analyze/word_count/dump_schema.rb +13 -0
  6. data/examples/analyze/word_count/freq_user.rb +31 -0
  7. data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
  8. data/examples/analyze/word_count/word_count.pig +43 -0
  9. data/examples/analyze/word_count/word_count.rb +34 -0
  10. data/examples/lastfm/scrape/load_lastfm.rb +31 -0
  11. data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
  12. data/examples/lastfm/scrape/seed.tsv +147 -0
  13. data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
  14. data/examples/twitter/old/scrape_twitter_api.rb +104 -0
  15. data/examples/twitter/old/scrape_twitter_search.rb +57 -0
  16. data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
  17. data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
  18. data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
  19. data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
  20. data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
  21. data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
  22. data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
  23. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
  24. data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
  25. data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
  26. data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
  27. data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
  28. data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
  29. data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
  30. data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
  31. data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
  32. data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
  33. data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
  34. data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
  35. data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
  36. data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
  37. data/lib/old/twitter_api.rb +88 -0
  38. data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
  39. data/lib/wuclan/delicious/delicious_models.rb +26 -0
  40. data/lib/wuclan/delicious/delicious_request.rb +65 -0
  41. data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
  42. data/lib/wuclan/friendster.rb +7 -0
  43. data/lib/wuclan/lastfm/model/base.rb +49 -0
  44. data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
  45. data/lib/wuclan/lastfm/scrape/base.rb +195 -0
  46. data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
  47. data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
  48. data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
  49. data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
  50. data/lib/wuclan/lastfm/scrape.rb +12 -0
  51. data/lib/wuclan/lastfm.rb +7 -0
  52. data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
  53. data/lib/wuclan/metrics/user_metrics.rb +443 -0
  54. data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
  55. data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
  56. data/lib/wuclan/metrics.rb +0 -0
  57. data/lib/wuclan/myspace.rb +21 -0
  58. data/lib/wuclan/open_social/model/base.rb +0 -0
  59. data/lib/wuclan/open_social/scrape/base.rb +111 -0
  60. data/lib/wuclan/open_social/scrape_request.rb +6 -0
  61. data/lib/wuclan/open_social.rb +0 -0
  62. data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
  63. data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
  64. data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
  65. data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
  66. data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
  67. data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
  68. data/lib/wuclan/twitter/api_response_examples.textile +300 -0
  69. data/lib/wuclan/twitter/model/base.rb +72 -0
  70. data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
  71. data/lib/wuclan/twitter/model/relationship.rb +176 -0
  72. data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
  73. data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
  74. data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
  75. data/lib/wuclan/twitter/model/text_element.rb +38 -0
  76. data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
  77. data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
  78. data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
  79. data/lib/wuclan/twitter/model/tweet.rb +74 -0
  80. data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
  81. data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
  82. data/lib/wuclan/twitter/model.rb +21 -0
  83. data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
  84. data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
  85. data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
  86. data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
  87. data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
  88. data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
  89. data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
  90. data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
  91. data/lib/wuclan/twitter/scrape/base.rb +97 -0
  92. data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
  93. data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
  94. data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
  95. data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
  96. data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
  97. data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
  98. data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
  99. data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
  100. data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
  101. data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
  102. data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
  103. data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
  104. data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
  105. data/lib/wuclan/twitter/scrape.rb +27 -0
  106. data/lib/wuclan/twitter.rb +7 -0
  107. data/lib/wuclan.rb +1 -0
  108. data/spec/spec_helper.rb +9 -0
  109. data/spec/wuclan_spec.rb +7 -0
  110. data/wuclan.gemspec +184 -0
  111. metadata +219 -0
@@ -0,0 +1,291 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- coding: utf-8 -*-
3
+ require 'wukong'
4
+ require 'monkeyshines'
5
+ require 'wuclan/twitter'
6
+ $: << '/home/flip/ics/rubygems/json-1.1.7/lib'
7
+ include Wuclan::Twitter::Scrape
8
+ include Wuclan::Twitter::Model
9
+
10
+
11
+ require 'wukong/schema'
12
+
13
+
14
+ Wuclan::Twitter::Scrape::Base.class_eval do
15
+ extend Wukong::Schema
16
+ end
17
+
18
+ p Wuclan::Twitter::Scrape::TwitterUserRequest.pig_load
19
+
20
+ # Requests = LOAD 'ripd/com.tw/com.twitter/*' AS ( rsrc:chararray, priority:int, twitter_user_id: int, page: int, moreinfo: chararray, url: chararray, scraped_at: long, response_code: int, response_message: chararray, contents: chararray );
21
+ # request_classes = FOREACH Requests GENERATE rsrc, (int) ((double)scraped_at / 1000000.0) AS scon, response_code ;
22
+ # rc_grp = GROUP request_classes BY (rsrc, scon, response_code) ;
23
+ # rc_count = FOREACH rc_grp GENERATE COUNT(request_classes) AS freq, group.scon AS scraped_on, group.rsrc AS rsrc , group.response_code AS response_code ;
24
+ # rc_count_1 = ORDER rc_count BY scraped_on, rsrc, response_code ;
25
+ # rmf tmp/rc_count
26
+ # STORE rc_count_1 INTO 'tmp/rc_count' ;
27
+
28
+
29
+ # 20090304152029 bad utf8
30
+ # 20090308
31
+
32
+
33
+
34
+ # 1 9999999 20081207052456 1 old_scraper 20081207052456 200 old_scraper [{"user":{"followers_count":23,"description":"","url":"","profile_image_url":"http:\/\/s3.amazonaws.com /twi
35
+ # 1 9999999 20081207055023 1 old_scraper 20081207055023 200 old_scraper [{"user":{"followers_count":32,"description":"","url":"http:\/\/www.mychurch.org /gervis","profile_image_url
36
+ # 1 9999999 20081209041619 1 old_scraper 20081209041619 200 old_scraper [{"user":{"followers_count":80,"description":"1983, Amersfoort, audio-producer @ NPS 3FM","url":"http:\/\/ww
37
+ # 1 9999999 20081209115725 1 old_scraper 20081209115725 200 old_scraper [{"user":{"followers_count":19,"description":"Pozzo e Luck não acessavam a net.","url":"","profile_image_ur
38
+ # 1 9999999 20081209232718 1 old_scraper 20081209232718 200 old_scraper [{"user":{"followers_count":105,"description":"札幌←→東京頻繁。IT系,広告系,プランナー,<E5><85>
39
+ # 1 9999999 20081210061628 1 old_scraper 20081210061628 200 old_scraper [{"user":{"followers_count":736,"description":"I AM","url":"http:\/\/www.frankvandun.nl","profile_image_url"
40
+ # 1 9999999 20081210185703 1 old_scraper 20081210185703 200 old_scraper [{"user":{"followers_count":644,"description":"Noticias de Chile actualizadas cada hora","url":"http:\/\/www
41
+ # 1 9999999 20081211095702 1 old_scraper 20081211095702 200 old_scraper [{"user":{"followers_count":64,"description":"","url":"http:\/\/tautin.blogspot.com /","profile_image_url":"
42
+ # 1 9999999 20081213073636 1 old_scraper 20081213073636 200 old_scraper [{"user":{"followers_count":178,"description":"","url":"http:\/\/www.gazetadopovo.com.br","profile_image_url
43
+ # 1 9999999 20081214100003 1 old_scraper 20081214100003 200 old_scraper [{"user":{"followers_count":7,"description":"ResearchBlogging.org feeds in Deutsch","url":"http:\/\/research
44
+ # 1 9999999 20081215105211 1 old_scraper 20081215105211 200 old_scraper [{"user":{"followers_count":165,"description":"I am a stay-at-home mother of two, with one on the way! I am
45
+ # 1 9999999 20081218075108 1 old_scraper 20081218075108 200 old_scraper [{"user":{"followers_count":17,"description":"","url":"http:\/\/hard-hitting-news.blogspot.com /","profile_i
46
+ # 1 9999999 20081219065853 1 old_scraper 20081219065853 200 old_scraper [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
47
+ # 1 9999999 20081220010525 1 old_scraper 20081220010525 200 old_scraper [{"user":{"followers_count":202,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.c
48
+ # 1 9999999 20081220113649 1 old_scraper 20081220113649 200 old_scraper [{"user":{"followers_count":42,"description":"Contrary to popular belief, I am in fact a robot.","url":"http
49
+ # 1 9999999 20081221083623 1 old_scraper 20081221083623 200 old_scraper [{"user":{"followers_count":565,"description":"癒し系プログラマ","url":"http:\/\/polog.org /","profi
50
+ # 1 9999999 20081224110505 1 old_scraper 20081224110505 200 old_scraper [{"user":{"followers_count":304,"description":" A group of green women bloggers, uniting our voices to hel
51
+ # 1 9999999 20081225055913 1 old_scraper 20081225055913 200 old_scraper [{"user":{"followers_count":213,"description":"同人ゲーム作ってます。 お気軽にフォロー<E3>
52
+ # 1 9999999 20081229072914 1 old_scraper 20081229072914 200 old_scraper [{"user":{"followers_count":15,"description":"","url":"http:\/\/www.rodia.info","profile_image_url":"http:
53
+ # 1 9999999 20081229084830 1 old_scraper 20081229084830 200 old_scraper [{"user":{"followers_count":191,"description":"3rd Generation Real Estate Investor and Author","url":"http:\
54
+ # 1 9999999 20090102103315 1 old_scraper 20090102103315 200 old_scraper [{"user":{"followers_count":21,"description":"takin over one city at a time","url":"","profile_image_url":"h
55
+ # 1 9999999 20090104084017 1 old_scraper 20090104084017 200 old_scraper [{"user":{"followers_count":299,"description":"スウィーツ(呪)。12時過ぎるとたいてい就<E5><AF>
56
+ # 1 9999999 20090105101608 1 old_scraper 20090105101608 200 old_scraper [{"user":{"followers_count":2171,"description":"LIVE wildlife 24\/7 from Djuma in South Africa. LIVE safari.
57
+ # 1 9999999 20090105103520 1 old_scraper 20090105103520 200 old_scraper [{"user":{"followers_count":19,"description":"Learning to dance like no one is watching","url":"","profile_i
58
+ # 1 9999999 20090106165730 1 old_scraper 20090106165730 200 old_scraper [{"user":{"followers_count":10,"description":"Live.Love.Laugh.","url":"","profile_image_url":"http:\/\/s3.am
59
+ # 1 9999999 20090112091101 1 old_scraper 20090112091101 200 old_scraper [{"user":{"followers_count":25,"description":"Ostravak je stav duše, i když člověk žije v Praze.","url"
60
+ # 1 9999999 20090117090748 1 old_scraper 20090117090748 200 old_scraper [{"user":{"followers_count":58,"description":"Moving on up.","url":"http:\/\/sarah-dear.blogspot.com","profi
61
+ # 1 9999999 20090418173317 1 old_scraper 20090418173317 200 old_scraper [{"user":{"followers_count":69,"description":"The Fail Whale is my spirit animal","url":"","profile_image_ur
62
+ # 1 9999999 20090418231828 1 old_scraper 20090418231828 200 old_scraper [{"user":{"followers_count":125,"description":"Cre@t!ve T!r@de","url":"","profile_image_url":"http:\/\/stati
63
+ # 1 9999999 20090419014909 1 old_scraper 20090419014909 200 old_scraper [{"user":{"followers_count":14,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
64
+ # 1 9999999 20090419052121 1 old_scraper 20090419052121 200 old_scraper [{"user":{"followers_count":743,"description":"ゆるい感じで参加してみました。野球と温泉<E3>
65
+ # 1 9999999 20090419233942 1 old_scraper 20090419233942 200 old_scraper [{"user":{"followers_count":36,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
66
+ # 1 9999999 20090420033745 1 old_scraper 20090420033745 200 old_scraper [{"user":{"followers_count":1815,"description":"Doing it for the girls baby, chicks, , ladies, women, Its ok
67
+ # 1 9999999 20090420112345 1 old_scraper 20090420112345 200 old_scraper [{"user":{"followers_count":13,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.co
68
+ # 1 9999999 20090421010538 1 old_scraper 20090421010538 200 old_scraper [{"user":{"followers_count":30,"description":"There are those who think they can and those who think they ca
69
+ # 1 9999999 20090421084441 1 old_scraper 20090421084441 200 old_scraper [{"user":{"followers_count":119,"description":"web designer, photographer, musical genius","url":"","profile
70
+ # 1 9999999 20090421101818 1 old_scraper 20090421101818 200 old_scraper [{"user":{"followers_count":10,"description":"An eternal learner. Master student in education : can wiki in
71
+ # 1 9999999 20090421232814 1 old_scraper 20090421232814 200 old_fetcher [{"user":{"followers_count":1,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
72
+ # 1 9999999 20090422065945 1 old_fetcher 20090422065945 200 old_fetcher [{"user":{"followers_count":118,"description":"Curiosa, consultora de IT y madre a la vez!","url":"","profil
73
+ # 1 9999999 20090422083321 1 old_fetcher 20090422083321 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"http:\/\/www.myspace.com /sweetitdm","profile_image_u
74
+ # 1 9999999 20090423045905 1 old_fetcher 20090423045905 200 old_fetcher [{"user":{"followers_count":79,"description":"","url":"http:\/\/flickr.com /photos\/malugreen","profile_imag
75
+ # 1 9999999 20090423063900 1 old_fetcher 20090423063900 200 old_fetcher [{"user":{"followers_count":388,"description":"Instructional technology grad student, dog lover, optimist,an
76
+ # 1 9999999 20090423135519 1 old_fetcher 20090423135519 200 old_fetcher [{"user":{"followers_count":628,"description":"MsBeat runs the show at Beatblogging.org. A news-savvy mistre
77
+ # 1 9999999 20090425052649 1 old_fetcher 20090425052649 200 old_fetcher [{"user":{"followers_count":12,"description":"mixiやってます。「ふじしょう」で検索してみ<E3>
78
+ # 1 9999999 20090426061449 1 old_fetcher 20090426061449 200 old_fetcher [{"user":{"followers_count":5,"description":"im in the land of soft drugs, legal whoring, windmills and tuli
79
+ # 1 9999999 20090428044727 1 old_fetcher 20090428044727 200 old_fetcher [{"user":{"followers_count":290,"description":"Online and Onair radioshow for geeks only!","url":"http:\/\/w
80
+ # 1 9999999 20090428151030 1 old_fetcher 20090428151030 200 old_fetcher [{"user":{"followers_count":520,"description":"The official home of New Zealand Rugby on Twitter","url":"htt
81
+ # 1 9999999 20090428232804 1 old_fetcher 20090428232804 200 old_fetcher [{"user":{"followers_count":3,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
82
+ # 1 9999999 20090503152828 1 old_fetcher 20090503152828 200 old_fetcher [{"user":{"followers_count":49,"description":"","url":"http:\/\/www.myspace.com /silisali","profile_image_ur
83
+ # 1 9999999 20090503195932 1 old_fetcher 20090503195932 200 old_fetcher [{"user":{"followers_count":35,"description":"I am a national level bodybuilder working for Bodywell Nutriti
84
+ # 1 9999999 20090504020126 1 old_fetcher 20090504020126 200 old_fetcher [{"user":{"followers_count":35,"description":"Christian. Bass Player. Singer. Amateur Photographer. News Adv
85
+ # 1 9999999 20081209232718 1 old_fetcher 20081209232718 200 old_fetcher [{"user":{"followers_count":105,"description":"札幌←→東京頻繁。IT系,広告系,プランナー,<E5><85>1 9999999 20081210061628 1 old_fetcher 20081210061628 200 old_fetcher [{"user":{"followers_count":736,"description":"I AM","url":"http:\/\/www.frankvandun.nl","profile_image_url"
86
+ # 1 9999999 20081210185703 1 old_fetcher 20081210185703 200 old_fetcher [{"user":{"followers_count":644,"description":"Noticias de Chile actualizadas cada hora","url":"http:\/\/www
87
+ # 1 9999999 20081211095702 1 old_fetcher 20081211095702 200 old_fetcher [{"user":{"followers_count":64,"description":"","url":"http:\/\/tautin.blogspot.com /","profile_image_url":" 1 9999999 20081213073636 1 old_fetcher 20081213073636 200 old_fetcher [{"user":{"followers_count":178,"description":"","url":"http:\/\/www.gazetadopovo.com.br","profile_image_url
88
+ # 1 9999999 20081214100003 1 old_fetcher 20081214100003 200 old_fetcher [{"user":{"followers_count":7,"description":"ResearchBlogging.org feeds in Deutsch","url":"http:\/\/research
89
+ # 1 9999999 20081215105211 1 old_fetcher 20081215105211 200 old_fetcher [{"user":{"followers_count":165,"description":"I am a stay-at-home mother of two, with one on the way! I am
90
+ # 1 9999999 20081218075108 1 old_fetcher 20081218075108 200 old_fetcher [{"user":{"followers_count":17,"description":"","url":"http:\/\/hard-hitting-news.blogspot.com /","profile_i
91
+ # 1 9999999 20081219065853 1 old_fetcher 20081219065853 200 old_fetcher [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
92
+ # /im
93
+ # 1 9999999 20081220010525 1 old_fetcher 20081220010525 200 old_fetcher [{"user":{"followers_count":202,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.c
94
+ # 1 9999999 20081220113649 1 old_fetcher 20081220113649 200 old_fetcher [{"user":{"followers_count":42,"description":"Contrary to popular belief, I am in fact a robot.","url":"http
95
+ # 1 9999999 20081221083623 1 old_fetcher 20081221083623 200 old_fetcher [{"user":{"followers_count":565,"description":"癒し系プログラマ","url":"http:\/\/polog.org /","profi 1 9999999 20081224110505 1 old_fetcher 20081224110505 200 old_fetcher [{"user":{"followers_count":304,"description":" A group of green women bloggers, uniting our voices to hel
96
+ # 1 9999999 20081225055913 1 old_fetcher 20081225055913 200 old_fetcher [{"user":{"followers_count":213,"description":"同人ゲーム作ってます。 お気軽にフォロー<E3> 1 9999999 20081229072914 1 old_fetcher 20081229072914 200 old_fetcher [{"user":{"followers_count":15,"description":"","url":"http:\/\/www.rodia.info","profile_image_url":"http:
97
+ # /
98
+ # 1 9999999 20081229084830 1 old_fetcher 20081229084830 200 old_fetcher [{"user":{"followers_count":191,"description":"3rd Generation Real Estate Investor and Author","url":"http:\
99
+ # 1 9999999 20090102103315 1 old_fetcher 20090102103315 200 old_fetcher [{"user":{"followers_count":21,"description":"takin over one city at a time","url":"","profile_image_url":"h
100
+ # 1 9999999 20090104084017 1 old_fetcher 20090104084017 200 old_fetcher [{"user":{"followers_count":299,"description":"スウィーツ(呪)。12時過ぎるとたいてい就<E5><AF>1 9999999 20090105101608 1 old_fetcher 20090105101608 200 old_fetcher [{"user":{"followers_count":2171,"description":"LIVE wildlife 24\/7 from Djuma in South Africa. LIVE safari.
101
+ # 1 9999999 20090105103520 1 old_fetcher 20090105103520 200 old_fetcher [{"user":{"followers_count":19,"description":"Learning to dance like no one is watching","url":"","profile_i
102
+ # 1 9999999 20090106165730 1 old_fetcher 20090106165730 200 old_fetcher [{"user":{"followers_count":10,"description":"Live.Love.Laugh.","url":"","profile_image_url":"http:\/\/s3.am
103
+ # 1 9999999 20090112091101 1 old_fetcher 20090112091101 200 old_fetcher [{"user":{"followers_count":25,"description":"Ostravak je stav duše, i když člověk žije v Praze.","url"
104
+ # 1 9999999 20090117090748 1 old_fetcher 20090117090748 200 old_fetcher [{"user":{"followers_count":58,"description":"Moving on up.","url":"http:\/\/sarah-dear.blogspot.com","profi
105
+ # 1 9999999 20090418173317 1 old_fetcher 20090418173317 200 old_fetcher [{"user":{"followers_count":69,"description":"The Fail Whale is my spirit animal","url":"","profile_image_ur
106
+ # 1 9999999 20090418231828 1 old_fetcher 20090418231828 200 old_fetcher [{"user":{"followers_count":125,"description":"Cre@t!ve T!r@de","url":"","profile_image_url":"http:\/\/stati
107
+ # 1 9999999 20090419014909 1 old_fetcher 20090419014909 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
108
+ # 1 9999999 20090419052121 1 old_fetcher 20090419052121 200 old_fetcher [{"user":{"followers_count":743,"description":"ゆるい感じで参加してみました。野球と温泉<E3>
109
+ # 1 9999999 20090419233942 1 old_fetcher 20090419233942 200 old_fetcher [{"user":{"followers_count":36,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
110
+ # 1 9999999 20090420033745 1 old_fetcher 20090420033745 200 old_fetcher [{"user":{"followers_count":1815,"description":"Doing it for the girls baby, chicks, , ladies, women, Its ok
111
+ # 1 9999999 20090420112345 1 old_fetcher 20090420112345 200 old_fetcher [{"user":{"followers_count":13,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.co
112
+ # 1 9999999 20090421010538 1 old_fetcher 20090421010538 200 old_fetcher [{"user":{"followers_count":30,"description":"There are those who think they can and those who think they ca
113
+ # 1 9999999 20090421084441 1 old_fetcher 20090421084441 200 old_fetcher [{"user":{"followers_count":119,"description":"web designer, photographer, musical genius","url":"","profile
114
+ # 1 9999999 20090421101818 1 old_fetcher 20090421101818 200 old_fetcher [{"user":{"followers_count":10,"description":"An eternal learner. Master student in education : can wiki in
115
+ # 1 9999999 20090421232814 1 old_fetcher 20090421232814 200 old_fetcher [{"user":{"followers_count":1,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
116
+ # 1 9999999 20090422065945 1 old_fetcher 20090422065945 200 old_fetcher [{"user":{"followers_count":118,"description":"Curiosa, consultora de IT y madre a la vez!","url":"","profil
117
+ # 1 9999999 20090422083321 1 old_fetcher 20090422083321 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"http:\/\/www.myspace.com /sweetitdm","profile_image_u
118
+ # 1 9999999 20090423045905 1 old_fetcher 20090423045905 200 old_fetcher [{"user":{"followers_count":79,"description":"","url":"http:\/\/flickr.com /photos\/malugreen","profile_imag
119
+ # 1 9999999 20090423063900 1 old_fetcher 20090423063900 200 old_fetcher [{"user":{"followers_count":388,"description":"Instructional technology grad student, dog lover, optimist,an
120
+ # 1 9999999 20090423135519 1 old_fetcher 20090423135519 200 old_fetcher [{"user":{"followers_count":628,"description":"MsBeat runs the show at Beatblogging.org. A news-savvy mistre
121
+ # 1 9999999 20090425052649 1 old_fetcher 20090425052649 200 old_fetcher [{"user":{"followers_count":12,"description":"mixiやってます。「ふじしょう」で検索してみ<E3>
122
+ # 1 9999999 20090426061449 1 old_fetcher 20090426061449 200 old_fetcher [{"user":{"followers_count":5,"description":"im in the land of soft drugs, legal whoring, windmills and tuli
123
+ # 1 9999999 20090428044727 1 old_fetcher 20090428044727 200 old_fetcher [{"user":{"followers_count":290,"description":"Online and Onair radioshow for geeks only!","url":"http:\/\/w
124
+ # 1 9999999 20090428151030 1 old_fetcher 20090428151030 200 old_fetcher [{"user":{"followers_count":520,"description":"The official home of New Zealand Rugby on Twitter","url":"htt
125
+ # 1 9999999 20090428232804 1 old_fetcher 20090428232804 200 old_fetcher [{"user":{"followers_count":3,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
126
+ # 1 9999999 20090503152828 1 old_fetcher 20090503152828 200 old_fetcher [{"user":{"followers_count":49,"description":"","url":"http:\/\/www.myspace.com /silisali","profile_image_ur
127
+ # 1 9999999 20090503195932 1 old_fetcher 20090503195932 200 old_fetcher [{"user":{"followers_count":35,"description":"I am a national level bodybuilder working for Bodywell Nutriti
128
+ # 1 9999999 20090504020126 1 old_fetcher 20090504020126 200 old_fetcher [{"user":{"followers_count":35,"description":"Christian. Bass Player. Singer. Amateur Photographer. News Adv
129
+ # 1 9999999 20090504045337 1 old_fetcher 20090504045337 200 old_fetcher [{"user":{"followers_count":171,"description":"i'm a bboy and a multimedia designer","url":"http:\/\/pitiscm
130
+ # 1 9999999 20090507112755 1 old_fetcher 20090507112755 200 old_fetcher [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
131
+ # 1 9999999 20090509222228 1 old_fetcher 20090509222228 200 old_fetcher [{"user":{"followers_count":256,"description":"representing NJ\/NY","url":"http:\/ /www.myspace.com\/darknes
132
+ # 1 9999999 20090512052820 1 old_fetcher 20090512052820 200 old_fetcher [{"user":{"followers_count":0,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
133
+ # 1 9999999 20090512101017 1 old_fetcher 20090512101017 200 old_fetcher [{"user":{"followers_count":685,"description":"Gainfully employed doing Linux 'stuff', part-time fitness fan
134
+ # 1 9999999 20090513062843 1 old_fetcher 20090513062843 200 old_fetcher [{"user":{"followers_count":7,"description":"Non-commercial radio for everyone who loves original music and
135
+ #
136
+ # 1 20090427 followers_ids 200 /images\/themes\/theme1\/bg.gif","profile_link_color":"0000ff","time_zone":"Cairo","created_at":"Sun Sep 02 13:44:12 +0000 2007","profile_sidebar_fill_c
137
+ # 1 20090427 followers_ids 200
138
+ # 1 20090427 followers_ids 209 /help.twitter.com\/index.php?pg=kb.page&id=75\">txt<\/a>","created_at":"Sun Feb 08 08:17:30 +0000 2009"},{"user":{"profile_background_image_url":"http:\
139
+ # 1 20090427 followers_ids 20 /twitter_production\/profile_images\/62248324\/086_copy_3_normal.jpg","statuses_count":16,"profile_text_color":"666666","screen_name":"cheekydonkey","pr
140
+ # 1 20090427 followers_ids 20 /images\/themes\/theme1\/bg.gif","created_at":"Fri Apr 18 19:34:26 +0000 2008","profile_text_color":"000000","location":null,"id":14436644,"time_zone":"
141
+ # 1 20090427 followers_ids 242
142
+ # 1 20090427 followers_ids 24479801
143
+ # 1 20090427 followers_ids 2 /images\/default_profile_normal.png","statuses_count":0,"profile_text_color":"000000","screen_name":"kaylazastrow","profile_background_tile":false,"prof
144
+ # 3256 20090427 followers_ids 400
145
+ #
146
+ # 1 0 .subpage #content ol, #side ol { padding-left: 30px; } a{text-decoration:none;color: #0084b4;} #content div.desc { margin: 11px 0px 10px 0px; } a img{border:0;} ul{list
147
+ # 1 0 ":0,"profile_background_color":"9ae4e8","profile_background_image_url":"http:\/\/static.twitter.com 0
148
+ # 1 0 "profile_image_url":"http:\/\/s3.amazonaws.com 0
149
+ # 1 0 ,"favourites_count":0,"profile_background_color":"f8eb8b","profile_image_url":"http:\/\/static.twitter.com 0
150
+ # 1 0 /static.twitter.com\/images\/default_profile_normal.png","notifications":false,"statuses_count":6,"profile_sidebar_border_color":"87bc44","screen_name":"JoeLorah","profile_background_t
151
+ # 1 0 6}
152
+ # 1 0 _background_image_url":"http:\/\/s3.amazonaws.com 0
153
+ # 1 0 _color":"000000","url":null,"name":"Brett Speth","time_zone":null,"protected":false,"profile_link_color":"0000ff","followers_count":0,"profile_sidebar_fill_color":"e0ff92","profile_ima
154
+ #
155
+ # 1 0 .subpage #content ol, #side ol { padding-left: 30px; } a{text-decoration:none;color: #0084b4;} #content div.desc { margin: 11px 0px 10px 0px; } a img{border:0;} ul{list
156
+ # 1 0 ":0,"profile_background_color":"9ae4e8","profile_background_image_url":"http:\/\/static.twitter.com 0
157
+ # 1 0 "profile_image_url":"http:\/\/s3.amazonaws.com 0
158
+ # 1 0 ,"favourites_count":0,"profile_background_color":"f8eb8b","profile_image_url":"http:\/\/static.twitter.com 0
159
+ # 1 0 /static.twitter.com\/images\/default_profile_normal.png","notifications":false,"statuses_count":6,"profile_sidebar_border_color":"87bc44","screen_name":"JoeLorah","profile_background_t
160
+ # 1 0 6}
161
+ # 1 0 _background_image_url":"http:\/\/s3.amazonaws.com 0
162
+ # 1 0 _color":"000000","url":null,"name":"Brett Speth","time_zone":null,"protected":false,"profile_link_color":"0000ff","followers_count":0,"profile_sidebar_fill_color":"e0ff92","profile_ima
163
+ # 572 0 bogus-all_numeric-favorites 200
164
+ # 8 0 bogus-bad_chars-favorites 200
165
+ # 11 0 bogus-bad_chars-followers 200
166
+ # 3 0 bogus-bad_chars-friends 200
167
+ # 1867 0 bogus-missing_id-favorites 200
168
+ # 1 0 eply_to_status_id":null,"source":"web"},"notifications":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
169
+ # 1 0 f.com/friends/ids/17799430.json
170
+ # 1 0 f49.json
171
+ # 1 0 f81852492\/Bread__normal.jpg","status":{"truncated":false,"in_reply_to_status_id":1625610632,"text":"@podcasthelper oh yes yes i do still need help. It is ok to call upon your expertis
172
+ # 1 0 fat":"Tue Aug 12 15:27:32 +0000 2008","friends_count":87,"profile_background_color":"FF6699","location":"Newcastle, UK","id":15823576,"time_zone":"Hawaii","favourites_count":0,"profile
173
+ # 890016 0 favorites 200
174
+ # 1 0 file_image_url":"http:\/\/static.twitter.com 0
175
+ # 1 0 fo:null,"name":"THE_REAL_SHAQ","protected":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
176
+ # 1 0 foll3183,14885034,17824762,25320311,26651936,5520952,16092530,15466712,18414465,20019951,22151420,26332254,7096192,13434972,26275705,27923225,15770739,19900326,15654216,20486512,167358
177
+ # 1 0 folleply_to_screen_name":null,"id":1618358723,"source":"<a href=\"http:\/ 0
178
+ # 1 0 follo54:45 +0000 2009"}]
179
+ # 1 0 follos\/71101463\/LegalTimes_1651_normal.jpg","status":{"truncated":false,"in_reply_to_status_id":null,"text":"The Morning Wrap http: 0
180
+ # 1 0 followeada)","favourites_count":1,"profile_text_color":"666666"},{"description":"Writer, Pick-up Artist, Social Mastermind, and Traveler","profile_background_image_url":"http:\/\/stati
181
+ # 1 0 followekground_tile":false,"description":"Gamer\/Skier\/Drummer 0
182
+ # 1 0 followers,"profile_sidebar_border_color":"87bc44","time_zone":"London","profile_image_url":"http:\/\/s3.amazonaws.com 0
183
+ # 1 0 followers,12836312,18993475,16860914,16142878,18504804,17810432,18661758,17356420,17901504,15535360,19240090,16180026,14614833,18264863,17807744,19459418,19356460,8112832,18637695,1925
184
+ # 1 0 followers.json
185
+ # 1 0 followers_":"need coffee","favorited":false,"in_reply_to_screen_name":null,"created_at":"Mon Apr 27 14:11:52 +0000 2009","truncated":false,"id":1629096949,"in_reply_to_status_id":null,
186
+ # 1 0 followers__Close_normal.JPG","status":{"truncated":false,"in_reply_to_status_id":null,"text":"is finally home and going to bed. Have to get up for work in about 4.5 hours.","in_reply_t
187
+ # 1 0 followers_id 0
188
+ # 1 0 followers_id,"profile_background_color":"9ae4e8","profile_image_url":"http:\/\/s3.amazonaws.com 0
189
+ # 1 0 followers_id13165892,15131310,6970122,13838022,15136098,14590445,15184346,6264392,12650292,16159919,16725668,16816616,15984607,16895930,12228062,15224867,859221,12364022,15316113,15624
190
+ # 1 0 followers_id2009","truncated":false,"id":1625606751,"in_reply_to_status_id":1623299478,"source":"web"},"notifications":false,"time_zone":"Pacific Time (US & Canada)","favourites_count"
191
+ # 1 0 followers_idbackground_tile":false,"followers_count":78,"url":"http:\/\/danfitek.com","screen_name":"fitekker","name":"Dan Fitek","friends_count":100,"profile_background_color":"9ae4e8
192
+ # 149 0 followers_ids
193
+ # 1 0 followers_ids 1
194
+ # 1 0 followers_ids 17975054
195
+ # 1 0 followers_ids 200
196
+ # 1 0 followers_ids 20090412070434
197
+ # 4 0 followers_ids 0
198
+ # 1 0 followers_ids 0
199
+ # 1 0 followers_ids 0
200
+ # 1 0 followers_ids"in_reply_to_screen_name":null,"created_at":"Fri Apr 24 19:22:24 +0000 2009","truncated":false,"id":1606556267,"in_reply_to_status_id":null,"source":"<a href=\"http:\/ 0
201
+ # 1 0 followers_ids00,"profile_link_color":"0000ff","profile_image_url":"http:\/\/static.twitter.com 0
202
+ # 1 0 followers_idsC2EF","location":"San Diego, CA","id":9628922,"time_zone":"Pacific Time (US & Canada)","created_at":"Tue Oct 23 17:30:49 +0000 2007"}]
203
+ # 1 0 followers_idst":"Mon Jan 28 03:48:51 +0000 2008","screen_name":"siolanthe"},{"description":"","profile_background_image_url":"http:\/\/static.twitter.com 0
204
+ # 1 0 followers_ilocation":null,"id":15311449,"time_zone":"Greenland"},{"description":"","profile_background_image_url":"http:\/\/s3.amazonaws.com 0
205
+ # 1 0 followerwing":false,"profile_link_color":"CD0033","url":"http:\/\/foodfeed.us","name":"FoodFeed","notifications":false,"profile_sidebar_fill_color":"fafaf5","followers_count":4399,"pro
206
+ # 1 0 followetp://twitter.com/followers/ids/15737773.json
207
+ # 1 0 fri:5,"profile_sidebar_border_color":"87bc44","url":null,"screen_name":"seniorpoopypant","name":"seniorpoopypant","favourites_count":0,"protected":false,"status":{"truncated":false,"in
208
+ # 1 0 frie:"Pirate LadyZebra. (also know as Zoaea)","utc_offset":-18000,"profile_sidebar_fill_color":"e0ff92","followers_count":19,"favourites_count":0,"profile_image_url":"http:\/\/s3.amazo
209
+ # 1 0 friend219,14213042,29736155,27530456,18755292]
210
+ # 1 0 friends":"web"},"profile_background_image_url":"http:\/\/static.twitter.com 0
211
+ # 1 0 friends\/\/s3.amazonaws.com 0
212
+ # 1 0 friends_3,27039226,29988381,35486899,18900303,16044047]
213
+ # 1 0 friends__count":0,"profile_background_color":"9ae4e8","profile_image_url":"http:\/\/s3.amazonaws.com 0
214
+ # 1 0 friends_i12809262,12767592,13084172,12803292,12775072,12129872,14198789,29866309]
215
+ # 1 0 friends_i1355,"source":"web","created_at":"Fri Jun 27 20:51:55 +0000 2008"},{"truncated":false,"user":{"description":"Recently married! Work for Victory - vc.tv - lovin' life!","utc_o
216
+ # 1 0 friends_id":null,"text":"One Laptop per Child Lands in Indiahttp:\/\/tinyurl.com 0
217
+ # 1 0 friends_id,17213487,20820391,1050851,23817210,15117375,14790735,16069532,14634720,23306376,14470037,24754635,18666525,16798949,17118708,17492127,16563598,22731226,20253928,17139092,240
218
+ # 1 0 friends_idile_background_images\/3476247\/BJMendelson_388_twitbacks.jpg","profile_link_color":"0084B4","location":"Glens Falls, New York","id":12687952,"time_zone":"Indiana (East)","cr
219
+ # 161 0 friends_ids
220
+ # 1 0 friends_ids 18706826
221
+ # 1 0 friends_ids 16624466
222
+ # 1 0 friends_ids 20090427091534
223
+ # 1 0 friends_ids 20090427094351
224
+ # 1 0 friends_ids 0
225
+ # 1 0 friends_ids 0
226
+ # 1 0 friends_ids 0
227
+ # 1 0 friends_ids 0
228
+ # 1 0 friends_ids 0
229
+ # 1 0 friends_ids 0
230
+ # 1 0 friends_ids/\/orangatame.com\/products 0
231
+ # 1 0 friends_idsl_color":"F3F3F3","followers_count":25,"location":"St. Louis","id":14708168,"notifications":false,"friends_count":23,"profile_sidebar_border_color":"DFDFDF"},"text":"@Raptor
232
+ # 1 0 friends_iound_images\/4821472\/Mississippi_River_TypeMap2.jpg","profile_link_color":"1F98C7","location":"San Francisco","id":18257438,"time_zone":"Pacific Time (US & Canada)","created_
233
+ # 1 0 friprofile_background_color":"1A1B1F","protected":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
234
+ # 1 0 frmusings of a young Catholic in Yorkshire, England","utc_offset":0,"notifications":false,"profile_sidebar_fill_color":"e0ff92","followers_count":89,"profile_image_url":"http:\/\/s3.am
235
+ # 1 0 frollowing":false,"statuses_count":468,"profile_link_color":"2FC2EF","url":"http:\/\/myspace.com 0
236
+ # 1 0 fws.com\/twitter_production\/profile_background_images 0
237
+ # 1 0 hu Mar 05 07:45:42 +0000 2009","id":1282474011,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"C6E2EE","notifications":false,"created_at":"Thu Mar 05 07:36
238
+ # 1 0 ile_image_url":"http:\/\/static.twitter.com 0
239
+ # 1 0 imit exceeded. Clients may not make more than 20000 requests per hour."}
240
+ # 1 0 location":null,"id":22893663,"profile_link_color":"0000ff"}
241
+ # 1 0 nk faudrait demander \u00e0 Michel Bergeron ,on aurait du fun pour 30 minutes","in_reply_to_user_id":21818830,"created_at":"Wed Mar 04 19:18:53 +0000 2009","truncated":false,"id":12798
242
+ # 1 0 oz","profile_background_image_url":"http:\/\/static.twitter.com 0
243
+ # 1 0 s football, beer, and technology! Tweet away!","statuses_count":1444,"utc_offset":-21600,"profile_sidebar_border_color":"87bc44","profile_background_tile":true,"following":false,"prof
244
+ # 24228 0 timeline 200
245
+ # 1 0 u"created_at":"Wed Mar 04 05:45:20 +0000 2009","in_reply_to_user_id":null,"in_reply_to_status_id":null,"truncated":false,"id":1277431048,"source":"<a href=\"http:\/ 0
246
+ # 1 0 u128
247
+ # 1 0 u88,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"F2E195","notifications":false,"created_at":"Wed Mar 04 05:32:57 +0000 2009","profile_background_image_u
248
+ # 1 0 uat":"Thu Mar 05 09:07:06 +0000 2009","id":1282640319,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Thu Mar 0
249
+ # 1 0 ul":null,"name":"kimberly luzier","profile_background_tile":false,"protected":false,"status":{"in_reply_to_user_id":null,"text":"pictures for ebayy","created_at":"Thu Mar 05 17:21:37 +
250
+ # 1 0 us":"http:\/\/s3.amazonaws.com 0
251
+ # 1 0 us":false,"location":null,"id":22740024}
252
+ # 1 0 us0308222713
253
+ # 1 0 use":2,"url":"http:\/\/www.pat-bach.com","name":"Tim Bach","profile_background_tile":false,"protected":false,"status":{"truncated":false,"favorited":false,"text":"Setting up my Twitter
254
+ # 1 0 use,"id":1288256641,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Thu Mar 05 08:33:22 +0000 2009","profile_ba
255
+ # 1 0 usefalse,"favorited":false,"text":"Wondering what twitter is all about and if I am missing out!","in_reply_to_user_id":null,"created_at":"Tue Mar 03 13:21:30 +0000 2009","id":127364277
256
+ # 1 0 useme":"magic 93.1 Radio","profile_background_image_url":"http:\/\/s3.amazonaws.com 0
257
+ # 1 0 useprofile_text_color":"000000","description":null,"screen_name":"JohnNMiller","utc_offset":null,"profile_link_color":"0000ff","time_zone":null,"profile_sidebar_fill_color":"e0ff92","f
258
+ #
259
+ # 474 0 user
260
+ # 1 0 user 20104991
261
+ # 2 0 user 1
262
+ # 1 0 user 14686512
263
+ # 80 0 user 200
264
+ # 1 0 user 20090308201437
265
+ # 1 0 user 20090308201710
266
+ # 1 0 user 20090308201901
267
+ # 1 0 user 20090308202228
268
+ # 1 0 user 20090308204043
269
+ # 1 0 user 20090308214100
270
+ # 8 0 user 0
271
+ # 1 0 user 0
272
+ # 1 0 user 0
273
+ # 1 0 user 0
274
+ # 1 0 user 0
275
+ # 1 0 user 0
276
+ # 1 0 user 0
277
+ # 1 0 user 0
278
+ # 1 0 user 0 /lovechelle","name":"nichellemicole","profile_background_tile":false,"protected":true,"profile_sidebar_border_color":"D9B17E","notifications":false,"cre
279
+ # 1 0 user 0 /images\/themes\/theme1\/bg.gif","statuses_count":1,"profile_text_color":"000000","time_zone":null,"url":null,"name":"Ben Pitz","friends_count":10,"prof
280
+ # 1 0 user 0 /help.twitter.com\/index.php?pg=kb.page&id=75\">txt<\/a>"},"notifications":false,"profile_image_url":"http:\/\/static.twitter.com\/images\/default_profi
281
+ # 1 0 user Ocean from my office window.","favorited":false,"created_at":"Wed Feb 25 15:55:00 +0000 2009","in_reply_to_user_id":null,"id":1249633057,"source":"web"},"time_zone":null,"profile_
282
+ # 5 0 user_timeline
283
+ # 1 0 user_timeline 0
284
+ # 1 0 userr.com\/images\/default_profile_normal.png","followers_count":3,"location":null,"id":21311967,"created_at":"Thu Feb 19 16:16:04 +0000 2009","profile_sidebar_border_color":"87bc44","
285
+ # 1 0 usertp:\/\/orangatame.com 0
286
+ # 1 0 usertter_production\/profile_images\/87717775 0
287
+ # 1 0 usm/users/show/0022897534.json?page=1
288
+ # 1 0 usmtwittercom.html\">mobile web<\/a>"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Tue Sep 02 07:53:34 +0000 2008","profile_background_image_url":"http:
289
+ # 8 20081218 bogus-all_numeric-followers 200
290
+ # 12 20081218 bogus-all_numeric-friends 200
291
+ # :
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'wukong'
4
+ require 'monkeyshines'
5
+ require 'wuclan/twitter'
6
+ # un-namespace request classes.
7
+ include Wuclan::Twitter::Scrape
8
+ include Wuclan::Twitter::Model
9
+ # if you're anyone but original author this next require is useless but harmless.
10
+ require 'wuclan/twitter/scrape/old_skool_request_classes'
11
+
12
+ #
13
+ # req, id, page, scraped_at, response_code
14
+ #
15
+ REQUEST_MAPPER_COMMAND = "/usr/bin/cut -d\"\t\" -f1,3,4,7,8 "
16
+
17
+ class TwitterRequestUniqer < Wukong::Streamer::UniqByLastReducer
18
+ attr_accessor :response_codes
19
+ def get_key req=nil, id=nil, pg=nil, *args
20
+ [req, id]
21
+ end
22
+
23
+ def start! *args
24
+ self.response_codes = { 200=>0,400=>0,401=>0,403=>0,404=>0 }
25
+ super *args
26
+ end
27
+
28
+ require 'json'
29
+ def accumulate *args
30
+ req, id, page, scraped_at, resp = args
31
+ resp = resp.to_i
32
+ return unless scraped_at =~ /\d{14}/
33
+ response_codes[resp] += 1 if response_codes.include?(resp)
34
+ super *args
35
+ end
36
+
37
+ def finalize *args
38
+ return if final_value.blank?
39
+ req, id, page, scraped_at, resp = final_value
40
+ id = "%010d"%(id.to_i)
41
+ yield( [id, req, page, scraped_at] + response_codes.values_at(200,400,401,403,404) )
42
+ end
43
+ end
44
+
45
+
46
+ # Make the script go.
47
+ Wukong::Script.new(
48
+ nil, TwitterRequestUniqer,
49
+ :map_command => REQUEST_MAPPER_COMMAND,
50
+ :partition_fields => 2, :sort_fields => 3
51
+ ).run
52
+
53
+
54
+ # 49522
55
+ # 74975395 200
56
+ # 84 302
57
+ # 277786 400
58
+ # 972881 401
59
+ # 94647 403
60
+ # 178105 404
61
+ # 9710 500
62
+ # 23134 502
63
+ # 1588 503
64
+ # 2479 504
65
+
66
+ # Wuclan::Twitter::Scrape::Base.class_eval do class_inheritable_accessor :req_code ; end
67
+ # TwitterUserRequest.class_eval do self.req_code = :tw_user ; end
68
+ # TwitterFollowersRequest.class_eval do self.req_code = :tw_foll ; end
69
+ # TwitterFriendsRequest.class_eval do self.req_code = :tw_frnd ; end
70
+ # TwitterFollowersIdsRequest.class_eval do self.req_code = :tw_foid ; end
71
+ # TwitterFriendsIdsRequest.class_eval do self.req_code = :tw_frid ; end
72
+ # TwitterUserTimelineRequest.class_eval do self.req_code = :tw_ustl ; end
73
+ #
74
+ # REQ_CODES = {
75
+ # 'followers' => :tw_fo, 'twitter_followers_request' => :tw_fo,
76
+ # 'friends' => :tw_fr, 'twitter_friends_request' => :tw_fr,
77
+ # 'followers_ids' => :tw_fi, 'twitter_followers_ids_request' => :tw_fi,
78
+ # 'friends_ids' => :tw_ri, 'twitter_friends_ids_request' => :tw_ri,
79
+ # 'user' => :tw_us, 'twitter_user_request' => :tw_us,
80
+ # 'user_timeline' => :tw_ut, 'twitter_user_timeline_request' => :tw_ut,
81
+ # }
82
+
83
+ # #
84
+ # #
85
+ # #
86
+ # class TwitterRequestParser < Wukong::Streamer::StructStreamer
87
+ #
88
+ # def process request, *args, &block
89
+ # next if request.page.to_i > 1
90
+ # next if request.response_code != '200'
91
+ # req_code = REQ_CODES[request]
92
+ # case request
93
+ # when TwitterUserRequest, TwitterFollowersRequest, TwitterFriendsRequest,
94
+ # TwitterFollowersIdsRequest, TwitterFriendsIdsRequest, TwitterUserTimelineRequest
95
+ # yield [request.twitter_user_id, request.req_code, request.scraped_at]
96
+ # end
97
+ # end
98
+ # end
@@ -0,0 +1,4 @@
1
+ Scrapes = LOAD 'tmp/last_requests_and_codes' AS user_id:int, rsrc:chararray, page:int, datetime:long, r200:int, r400:int, r401:int, r403:int, r404:int ;
2
+ UserScrapes = FILTER Scrapes BY rsrc == 'user' ;
3
+ UserScrapesOrdered = ORDER UserScrapes BY datetime ASC ;
4
+ STORE UserScrapesOrdered INTO 'twmeta/scrape_requests/users_by_staleness-20090730.tsv' ;
@@ -0,0 +1,6 @@
1
+ love 65536 3.36366494448618 6698 2686693027 2688691633
2
+ red+sox 65536 0.0113457581992013 1500 2661001994 2688059232
3
+ britney+spears 65536 0.00866753886170806 184 2685103763 2688130850
4
+ hadoop 65536 0.000661831916251315 614 2501794487 2687967783
5
+ infochimps 65536 2.24964286919452e-05 16 2541533220 2683708276
6
+ hapaxlegomenon 65536 0.0 1 2646535741 2646535741