wuclan 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.textile +20 -0
- data/README.textile +28 -0
- data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
- data/examples/analyze/strong_links/main.rb +51 -0
- data/examples/analyze/word_count/dump_schema.rb +13 -0
- data/examples/analyze/word_count/freq_user.rb +31 -0
- data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
- data/examples/analyze/word_count/word_count.pig +43 -0
- data/examples/analyze/word_count/word_count.rb +34 -0
- data/examples/lastfm/scrape/load_lastfm.rb +31 -0
- data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
- data/examples/lastfm/scrape/seed.tsv +147 -0
- data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
- data/examples/twitter/old/scrape_twitter_api.rb +104 -0
- data/examples/twitter/old/scrape_twitter_search.rb +57 -0
- data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
- data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
- data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
- data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
- data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
- data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
- data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
- data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
- data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
- data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
- data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
- data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
- data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
- data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
- data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
- data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
- data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
- data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
- data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
- data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
- data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
- data/lib/old/twitter_api.rb +88 -0
- data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
- data/lib/wuclan/delicious/delicious_models.rb +26 -0
- data/lib/wuclan/delicious/delicious_request.rb +65 -0
- data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
- data/lib/wuclan/friendster.rb +7 -0
- data/lib/wuclan/lastfm/model/base.rb +49 -0
- data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
- data/lib/wuclan/lastfm/scrape/base.rb +195 -0
- data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
- data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
- data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
- data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
- data/lib/wuclan/lastfm/scrape.rb +12 -0
- data/lib/wuclan/lastfm.rb +7 -0
- data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
- data/lib/wuclan/metrics/user_metrics.rb +443 -0
- data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
- data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
- data/lib/wuclan/metrics.rb +0 -0
- data/lib/wuclan/myspace.rb +21 -0
- data/lib/wuclan/open_social/model/base.rb +0 -0
- data/lib/wuclan/open_social/scrape/base.rb +111 -0
- data/lib/wuclan/open_social/scrape_request.rb +6 -0
- data/lib/wuclan/open_social.rb +0 -0
- data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
- data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
- data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
- data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
- data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
- data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
- data/lib/wuclan/twitter/api_response_examples.textile +300 -0
- data/lib/wuclan/twitter/model/base.rb +72 -0
- data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
- data/lib/wuclan/twitter/model/relationship.rb +176 -0
- data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
- data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
- data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
- data/lib/wuclan/twitter/model/text_element.rb +38 -0
- data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
- data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
- data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
- data/lib/wuclan/twitter/model/tweet.rb +74 -0
- data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
- data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
- data/lib/wuclan/twitter/model.rb +21 -0
- data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
- data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
- data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
- data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
- data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
- data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
- data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
- data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
- data/lib/wuclan/twitter/scrape/base.rb +97 -0
- data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
- data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
- data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
- data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
- data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
- data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
- data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
- data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
- data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
- data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
- data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
- data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
- data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
- data/lib/wuclan/twitter/scrape.rb +27 -0
- data/lib/wuclan/twitter.rb +7 -0
- data/lib/wuclan.rb +1 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/wuclan_spec.rb +7 -0
- data/wuclan.gemspec +184 -0
- metadata +219 -0
@@ -0,0 +1,291 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
require 'wukong'
|
4
|
+
require 'monkeyshines'
|
5
|
+
require 'wuclan/twitter'
|
6
|
+
$: << '/home/flip/ics/rubygems/json-1.1.7/lib'
|
7
|
+
include Wuclan::Twitter::Scrape
|
8
|
+
include Wuclan::Twitter::Model
|
9
|
+
|
10
|
+
|
11
|
+
require 'wukong/schema'
|
12
|
+
|
13
|
+
|
14
|
+
Wuclan::Twitter::Scrape::Base.class_eval do
|
15
|
+
extend Wukong::Schema
|
16
|
+
end
|
17
|
+
|
18
|
+
p Wuclan::Twitter::Scrape::TwitterUserRequest.pig_load
|
19
|
+
|
20
|
+
# Requests = LOAD 'ripd/com.tw/com.twitter/*' AS ( rsrc:chararray, priority:int, twitter_user_id: int, page: int, moreinfo: chararray, url: chararray, scraped_at: long, response_code: int, response_message: chararray, contents: chararray );
|
21
|
+
# request_classes = FOREACH Requests GENERATE rsrc, (int) ((double)scraped_at / 1000000.0) AS scon, response_code ;
|
22
|
+
# rc_grp = GROUP request_classes BY (rsrc, scon, response_code) ;
|
23
|
+
# rc_count = FOREACH rc_grp GENERATE COUNT(request_classes) AS freq, group.scon AS scraped_on, group.rsrc AS rsrc , group.response_code AS response_code ;
|
24
|
+
# rc_count_1 = ORDER rc_count BY scraped_on, rsrc, response_code ;
|
25
|
+
# rmf tmp/rc_count
|
26
|
+
# STORE rc_count_1 INTO 'tmp/rc_count' ;
|
27
|
+
|
28
|
+
|
29
|
+
# 20090304152029 bad utf8
|
30
|
+
# 20090308
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
# 1 9999999 20081207052456 1 old_scraper 20081207052456 200 old_scraper [{"user":{"followers_count":23,"description":"","url":"","profile_image_url":"http:\/\/s3.amazonaws.com /twi
|
35
|
+
# 1 9999999 20081207055023 1 old_scraper 20081207055023 200 old_scraper [{"user":{"followers_count":32,"description":"","url":"http:\/\/www.mychurch.org /gervis","profile_image_url
|
36
|
+
# 1 9999999 20081209041619 1 old_scraper 20081209041619 200 old_scraper [{"user":{"followers_count":80,"description":"1983, Amersfoort, audio-producer @ NPS 3FM","url":"http:\/\/ww
|
37
|
+
# 1 9999999 20081209115725 1 old_scraper 20081209115725 200 old_scraper [{"user":{"followers_count":19,"description":"Pozzo e Luck não acessavam a net.","url":"","profile_image_ur
|
38
|
+
# 1 9999999 20081209232718 1 old_scraper 20081209232718 200 old_scraper [{"user":{"followers_count":105,"description":"æå¹ââæ±äº¬é »ç¹ãITç³»,åºåç³»,ãã©ã³ãã¼,<E5><85>
|
39
|
+
# 1 9999999 20081210061628 1 old_scraper 20081210061628 200 old_scraper [{"user":{"followers_count":736,"description":"I AM","url":"http:\/\/www.frankvandun.nl","profile_image_url"
|
40
|
+
# 1 9999999 20081210185703 1 old_scraper 20081210185703 200 old_scraper [{"user":{"followers_count":644,"description":"Noticias de Chile actualizadas cada hora","url":"http:\/\/www
|
41
|
+
# 1 9999999 20081211095702 1 old_scraper 20081211095702 200 old_scraper [{"user":{"followers_count":64,"description":"","url":"http:\/\/tautin.blogspot.com /","profile_image_url":"
|
42
|
+
# 1 9999999 20081213073636 1 old_scraper 20081213073636 200 old_scraper [{"user":{"followers_count":178,"description":"","url":"http:\/\/www.gazetadopovo.com.br","profile_image_url
|
43
|
+
# 1 9999999 20081214100003 1 old_scraper 20081214100003 200 old_scraper [{"user":{"followers_count":7,"description":"ResearchBlogging.org feeds in Deutsch","url":"http:\/\/research
|
44
|
+
# 1 9999999 20081215105211 1 old_scraper 20081215105211 200 old_scraper [{"user":{"followers_count":165,"description":"I am a stay-at-home mother of two, with one on the way! I am
|
45
|
+
# 1 9999999 20081218075108 1 old_scraper 20081218075108 200 old_scraper [{"user":{"followers_count":17,"description":"","url":"http:\/\/hard-hitting-news.blogspot.com /","profile_i
|
46
|
+
# 1 9999999 20081219065853 1 old_scraper 20081219065853 200 old_scraper [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
47
|
+
# 1 9999999 20081220010525 1 old_scraper 20081220010525 200 old_scraper [{"user":{"followers_count":202,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.c
|
48
|
+
# 1 9999999 20081220113649 1 old_scraper 20081220113649 200 old_scraper [{"user":{"followers_count":42,"description":"Contrary to popular belief, I am in fact a robot.","url":"http
|
49
|
+
# 1 9999999 20081221083623 1 old_scraper 20081221083623 200 old_scraper [{"user":{"followers_count":565,"description":"çãç³»ããã°ã©ã","url":"http:\/\/polog.org /","profi
|
50
|
+
# 1 9999999 20081224110505 1 old_scraper 20081224110505 200 old_scraper [{"user":{"followers_count":304,"description":" A group of green women bloggers, uniting our voices to hel
|
51
|
+
# 1 9999999 20081225055913 1 old_scraper 20081225055913 200 old_scraper [{"user":{"followers_count":213,"description":"å人ã²ã¼ã ä½ã£ã¦ã¾ããããæ°è»½ã«ãã©ãã¼<E3>
|
52
|
+
# 1 9999999 20081229072914 1 old_scraper 20081229072914 200 old_scraper [{"user":{"followers_count":15,"description":"","url":"http:\/\/www.rodia.info","profile_image_url":"http:
|
53
|
+
# 1 9999999 20081229084830 1 old_scraper 20081229084830 200 old_scraper [{"user":{"followers_count":191,"description":"3rd Generation Real Estate Investor and Author","url":"http:\
|
54
|
+
# 1 9999999 20090102103315 1 old_scraper 20090102103315 200 old_scraper [{"user":{"followers_count":21,"description":"takin over one city at a time","url":"","profile_image_url":"h
|
55
|
+
# 1 9999999 20090104084017 1 old_scraper 20090104084017 200 old_scraper [{"user":{"followers_count":299,"description":"ã¹ã¦ã£ã¼ãï¼åªï¼ã12æéããã¨ããã¦ãå°±<E5><AF>
|
56
|
+
# 1 9999999 20090105101608 1 old_scraper 20090105101608 200 old_scraper [{"user":{"followers_count":2171,"description":"LIVE wildlife 24\/7 from Djuma in South Africa. LIVE safari.
|
57
|
+
# 1 9999999 20090105103520 1 old_scraper 20090105103520 200 old_scraper [{"user":{"followers_count":19,"description":"Learning to dance like no one is watching","url":"","profile_i
|
58
|
+
# 1 9999999 20090106165730 1 old_scraper 20090106165730 200 old_scraper [{"user":{"followers_count":10,"description":"Live.Love.Laugh.","url":"","profile_image_url":"http:\/\/s3.am
|
59
|
+
# 1 9999999 20090112091101 1 old_scraper 20090112091101 200 old_scraper [{"user":{"followers_count":25,"description":"Ostravak je stav duÅ¡e, i když ÄlovÄk žije v Praze.","url"
|
60
|
+
# 1 9999999 20090117090748 1 old_scraper 20090117090748 200 old_scraper [{"user":{"followers_count":58,"description":"Moving on up.","url":"http:\/\/sarah-dear.blogspot.com","profi
|
61
|
+
# 1 9999999 20090418173317 1 old_scraper 20090418173317 200 old_scraper [{"user":{"followers_count":69,"description":"The Fail Whale is my spirit animal","url":"","profile_image_ur
|
62
|
+
# 1 9999999 20090418231828 1 old_scraper 20090418231828 200 old_scraper [{"user":{"followers_count":125,"description":"Cre@t!ve T!r@de","url":"","profile_image_url":"http:\/\/stati
|
63
|
+
# 1 9999999 20090419014909 1 old_scraper 20090419014909 200 old_scraper [{"user":{"followers_count":14,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
64
|
+
# 1 9999999 20090419052121 1 old_scraper 20090419052121 200 old_scraper [{"user":{"followers_count":743,"description":"ãããæãã§åå ãã¦ã¿ã¾ãããéçã¨æ¸©æ³<E3>
|
65
|
+
# 1 9999999 20090419233942 1 old_scraper 20090419233942 200 old_scraper [{"user":{"followers_count":36,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
66
|
+
# 1 9999999 20090420033745 1 old_scraper 20090420033745 200 old_scraper [{"user":{"followers_count":1815,"description":"Doing it for the girls baby, chicks, , ladies, women, Its ok
|
67
|
+
# 1 9999999 20090420112345 1 old_scraper 20090420112345 200 old_scraper [{"user":{"followers_count":13,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.co
|
68
|
+
# 1 9999999 20090421010538 1 old_scraper 20090421010538 200 old_scraper [{"user":{"followers_count":30,"description":"There are those who think they can and those who think they ca
|
69
|
+
# 1 9999999 20090421084441 1 old_scraper 20090421084441 200 old_scraper [{"user":{"followers_count":119,"description":"web designer, photographer, musical genius","url":"","profile
|
70
|
+
# 1 9999999 20090421101818 1 old_scraper 20090421101818 200 old_scraper [{"user":{"followers_count":10,"description":"An eternal learner. Master student in education : can wiki in
|
71
|
+
# 1 9999999 20090421232814 1 old_scraper 20090421232814 200 old_fetcher [{"user":{"followers_count":1,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
72
|
+
# 1 9999999 20090422065945 1 old_fetcher 20090422065945 200 old_fetcher [{"user":{"followers_count":118,"description":"Curiosa, consultora de IT y madre a la vez!","url":"","profil
|
73
|
+
# 1 9999999 20090422083321 1 old_fetcher 20090422083321 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"http:\/\/www.myspace.com /sweetitdm","profile_image_u
|
74
|
+
# 1 9999999 20090423045905 1 old_fetcher 20090423045905 200 old_fetcher [{"user":{"followers_count":79,"description":"","url":"http:\/\/flickr.com /photos\/malugreen","profile_imag
|
75
|
+
# 1 9999999 20090423063900 1 old_fetcher 20090423063900 200 old_fetcher [{"user":{"followers_count":388,"description":"Instructional technology grad student, dog lover, optimist,an
|
76
|
+
# 1 9999999 20090423135519 1 old_fetcher 20090423135519 200 old_fetcher [{"user":{"followers_count":628,"description":"MsBeat runs the show at Beatblogging.org. A news-savvy mistre
|
77
|
+
# 1 9999999 20090425052649 1 old_fetcher 20090425052649 200 old_fetcher [{"user":{"followers_count":12,"description":"mixiãã£ã¦ã¾ããããµãããããã§æ¤ç´¢ãã¦ã¿<E3>
|
78
|
+
# 1 9999999 20090426061449 1 old_fetcher 20090426061449 200 old_fetcher [{"user":{"followers_count":5,"description":"im in the land of soft drugs, legal whoring, windmills and tuli
|
79
|
+
# 1 9999999 20090428044727 1 old_fetcher 20090428044727 200 old_fetcher [{"user":{"followers_count":290,"description":"Online and Onair radioshow for geeks only!","url":"http:\/\/w
|
80
|
+
# 1 9999999 20090428151030 1 old_fetcher 20090428151030 200 old_fetcher [{"user":{"followers_count":520,"description":"The official home of New Zealand Rugby on Twitter","url":"htt
|
81
|
+
# 1 9999999 20090428232804 1 old_fetcher 20090428232804 200 old_fetcher [{"user":{"followers_count":3,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
82
|
+
# 1 9999999 20090503152828 1 old_fetcher 20090503152828 200 old_fetcher [{"user":{"followers_count":49,"description":"","url":"http:\/\/www.myspace.com /silisali","profile_image_ur
|
83
|
+
# 1 9999999 20090503195932 1 old_fetcher 20090503195932 200 old_fetcher [{"user":{"followers_count":35,"description":"I am a national level bodybuilder working for Bodywell Nutriti
|
84
|
+
# 1 9999999 20090504020126 1 old_fetcher 20090504020126 200 old_fetcher [{"user":{"followers_count":35,"description":"Christian. Bass Player. Singer. Amateur Photographer. News Adv
|
85
|
+
# 1 9999999 20081209232718 1 old_fetcher 20081209232718 200 old_fetcher [{"user":{"followers_count":105,"description":"æå¹ââæ±äº¬é »ç¹ãITç³»,åºåç³»,ãã©ã³ãã¼,<E5><85>1 9999999 20081210061628 1 old_fetcher 20081210061628 200 old_fetcher [{"user":{"followers_count":736,"description":"I AM","url":"http:\/\/www.frankvandun.nl","profile_image_url"
|
86
|
+
# 1 9999999 20081210185703 1 old_fetcher 20081210185703 200 old_fetcher [{"user":{"followers_count":644,"description":"Noticias de Chile actualizadas cada hora","url":"http:\/\/www
|
87
|
+
# 1 9999999 20081211095702 1 old_fetcher 20081211095702 200 old_fetcher [{"user":{"followers_count":64,"description":"","url":"http:\/\/tautin.blogspot.com /","profile_image_url":" 1 9999999 20081213073636 1 old_fetcher 20081213073636 200 old_fetcher [{"user":{"followers_count":178,"description":"","url":"http:\/\/www.gazetadopovo.com.br","profile_image_url
|
88
|
+
# 1 9999999 20081214100003 1 old_fetcher 20081214100003 200 old_fetcher [{"user":{"followers_count":7,"description":"ResearchBlogging.org feeds in Deutsch","url":"http:\/\/research
|
89
|
+
# 1 9999999 20081215105211 1 old_fetcher 20081215105211 200 old_fetcher [{"user":{"followers_count":165,"description":"I am a stay-at-home mother of two, with one on the way! I am
|
90
|
+
# 1 9999999 20081218075108 1 old_fetcher 20081218075108 200 old_fetcher [{"user":{"followers_count":17,"description":"","url":"http:\/\/hard-hitting-news.blogspot.com /","profile_i
|
91
|
+
# 1 9999999 20081219065853 1 old_fetcher 20081219065853 200 old_fetcher [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
92
|
+
# /im
|
93
|
+
# 1 9999999 20081220010525 1 old_fetcher 20081220010525 200 old_fetcher [{"user":{"followers_count":202,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.c
|
94
|
+
# 1 9999999 20081220113649 1 old_fetcher 20081220113649 200 old_fetcher [{"user":{"followers_count":42,"description":"Contrary to popular belief, I am in fact a robot.","url":"http
|
95
|
+
# 1 9999999 20081221083623 1 old_fetcher 20081221083623 200 old_fetcher [{"user":{"followers_count":565,"description":"çãç³»ããã°ã©ã","url":"http:\/\/polog.org /","profi 1 9999999 20081224110505 1 old_fetcher 20081224110505 200 old_fetcher [{"user":{"followers_count":304,"description":" A group of green women bloggers, uniting our voices to hel
|
96
|
+
# 1 9999999 20081225055913 1 old_fetcher 20081225055913 200 old_fetcher [{"user":{"followers_count":213,"description":"å人ã²ã¼ã ä½ã£ã¦ã¾ããããæ°è»½ã«ãã©ãã¼<E3> 1 9999999 20081229072914 1 old_fetcher 20081229072914 200 old_fetcher [{"user":{"followers_count":15,"description":"","url":"http:\/\/www.rodia.info","profile_image_url":"http:
|
97
|
+
# /
|
98
|
+
# 1 9999999 20081229084830 1 old_fetcher 20081229084830 200 old_fetcher [{"user":{"followers_count":191,"description":"3rd Generation Real Estate Investor and Author","url":"http:\
|
99
|
+
# 1 9999999 20090102103315 1 old_fetcher 20090102103315 200 old_fetcher [{"user":{"followers_count":21,"description":"takin over one city at a time","url":"","profile_image_url":"h
|
100
|
+
# 1 9999999 20090104084017 1 old_fetcher 20090104084017 200 old_fetcher [{"user":{"followers_count":299,"description":"ã¹ã¦ã£ã¼ãï¼åªï¼ã12æéããã¨ããã¦ãå°±<E5><AF>1 9999999 20090105101608 1 old_fetcher 20090105101608 200 old_fetcher [{"user":{"followers_count":2171,"description":"LIVE wildlife 24\/7 from Djuma in South Africa. LIVE safari.
|
101
|
+
# 1 9999999 20090105103520 1 old_fetcher 20090105103520 200 old_fetcher [{"user":{"followers_count":19,"description":"Learning to dance like no one is watching","url":"","profile_i
|
102
|
+
# 1 9999999 20090106165730 1 old_fetcher 20090106165730 200 old_fetcher [{"user":{"followers_count":10,"description":"Live.Love.Laugh.","url":"","profile_image_url":"http:\/\/s3.am
|
103
|
+
# 1 9999999 20090112091101 1 old_fetcher 20090112091101 200 old_fetcher [{"user":{"followers_count":25,"description":"Ostravak je stav duÅ¡e, i když ÄlovÄk žije v Praze.","url"
|
104
|
+
# 1 9999999 20090117090748 1 old_fetcher 20090117090748 200 old_fetcher [{"user":{"followers_count":58,"description":"Moving on up.","url":"http:\/\/sarah-dear.blogspot.com","profi
|
105
|
+
# 1 9999999 20090418173317 1 old_fetcher 20090418173317 200 old_fetcher [{"user":{"followers_count":69,"description":"The Fail Whale is my spirit animal","url":"","profile_image_ur
|
106
|
+
# 1 9999999 20090418231828 1 old_fetcher 20090418231828 200 old_fetcher [{"user":{"followers_count":125,"description":"Cre@t!ve T!r@de","url":"","profile_image_url":"http:\/\/stati
|
107
|
+
# 1 9999999 20090419014909 1 old_fetcher 20090419014909 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
108
|
+
# 1 9999999 20090419052121 1 old_fetcher 20090419052121 200 old_fetcher [{"user":{"followers_count":743,"description":"ãããæãã§åå ãã¦ã¿ã¾ãããéçã¨æ¸©æ³<E3>
|
109
|
+
# 1 9999999 20090419233942 1 old_fetcher 20090419233942 200 old_fetcher [{"user":{"followers_count":36,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
110
|
+
# 1 9999999 20090420033745 1 old_fetcher 20090420033745 200 old_fetcher [{"user":{"followers_count":1815,"description":"Doing it for the girls baby, chicks, , ladies, women, Its ok
|
111
|
+
# 1 9999999 20090420112345 1 old_fetcher 20090420112345 200 old_fetcher [{"user":{"followers_count":13,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.co
|
112
|
+
# 1 9999999 20090421010538 1 old_fetcher 20090421010538 200 old_fetcher [{"user":{"followers_count":30,"description":"There are those who think they can and those who think they ca
|
113
|
+
# 1 9999999 20090421084441 1 old_fetcher 20090421084441 200 old_fetcher [{"user":{"followers_count":119,"description":"web designer, photographer, musical genius","url":"","profile
|
114
|
+
# 1 9999999 20090421101818 1 old_fetcher 20090421101818 200 old_fetcher [{"user":{"followers_count":10,"description":"An eternal learner. Master student in education : can wiki in
|
115
|
+
# 1 9999999 20090421232814 1 old_fetcher 20090421232814 200 old_fetcher [{"user":{"followers_count":1,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
116
|
+
# 1 9999999 20090422065945 1 old_fetcher 20090422065945 200 old_fetcher [{"user":{"followers_count":118,"description":"Curiosa, consultora de IT y madre a la vez!","url":"","profil
|
117
|
+
# 1 9999999 20090422083321 1 old_fetcher 20090422083321 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"http:\/\/www.myspace.com /sweetitdm","profile_image_u
|
118
|
+
# 1 9999999 20090423045905 1 old_fetcher 20090423045905 200 old_fetcher [{"user":{"followers_count":79,"description":"","url":"http:\/\/flickr.com /photos\/malugreen","profile_imag
|
119
|
+
# 1 9999999 20090423063900 1 old_fetcher 20090423063900 200 old_fetcher [{"user":{"followers_count":388,"description":"Instructional technology grad student, dog lover, optimist,an
|
120
|
+
# 1 9999999 20090423135519 1 old_fetcher 20090423135519 200 old_fetcher [{"user":{"followers_count":628,"description":"MsBeat runs the show at Beatblogging.org. A news-savvy mistre
|
121
|
+
# 1 9999999 20090425052649 1 old_fetcher 20090425052649 200 old_fetcher [{"user":{"followers_count":12,"description":"mixiãã£ã¦ã¾ããããµãããããã§æ¤ç´¢ãã¦ã¿<E3>
|
122
|
+
# 1 9999999 20090426061449 1 old_fetcher 20090426061449 200 old_fetcher [{"user":{"followers_count":5,"description":"im in the land of soft drugs, legal whoring, windmills and tuli
|
123
|
+
# 1 9999999 20090428044727 1 old_fetcher 20090428044727 200 old_fetcher [{"user":{"followers_count":290,"description":"Online and Onair radioshow for geeks only!","url":"http:\/\/w
|
124
|
+
# 1 9999999 20090428151030 1 old_fetcher 20090428151030 200 old_fetcher [{"user":{"followers_count":520,"description":"The official home of New Zealand Rugby on Twitter","url":"htt
|
125
|
+
# 1 9999999 20090428232804 1 old_fetcher 20090428232804 200 old_fetcher [{"user":{"followers_count":3,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
126
|
+
# 1 9999999 20090503152828 1 old_fetcher 20090503152828 200 old_fetcher [{"user":{"followers_count":49,"description":"","url":"http:\/\/www.myspace.com /silisali","profile_image_ur
|
127
|
+
# 1 9999999 20090503195932 1 old_fetcher 20090503195932 200 old_fetcher [{"user":{"followers_count":35,"description":"I am a national level bodybuilder working for Bodywell Nutriti
|
128
|
+
# 1 9999999 20090504020126 1 old_fetcher 20090504020126 200 old_fetcher [{"user":{"followers_count":35,"description":"Christian. Bass Player. Singer. Amateur Photographer. News Adv
|
129
|
+
# 1 9999999 20090504045337 1 old_fetcher 20090504045337 200 old_fetcher [{"user":{"followers_count":171,"description":"i'm a bboy and a multimedia designer","url":"http:\/\/pitiscm
|
130
|
+
# 1 9999999 20090507112755 1 old_fetcher 20090507112755 200 old_fetcher [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
131
|
+
# 1 9999999 20090509222228 1 old_fetcher 20090509222228 200 old_fetcher [{"user":{"followers_count":256,"description":"representing NJ\/NY","url":"http:\/ /www.myspace.com\/darknes
|
132
|
+
# 1 9999999 20090512052820 1 old_fetcher 20090512052820 200 old_fetcher [{"user":{"followers_count":0,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
133
|
+
# 1 9999999 20090512101017 1 old_fetcher 20090512101017 200 old_fetcher [{"user":{"followers_count":685,"description":"Gainfully employed doing Linux 'stuff', part-time fitness fan
|
134
|
+
# 1 9999999 20090513062843 1 old_fetcher 20090513062843 200 old_fetcher [{"user":{"followers_count":7,"description":"Non-commercial radio for everyone who loves original music and
|
135
|
+
#
|
136
|
+
# 1 20090427 followers_ids 200 /images\/themes\/theme1\/bg.gif","profile_link_color":"0000ff","time_zone":"Cairo","created_at":"Sun Sep 02 13:44:12 +0000 2007","profile_sidebar_fill_c
|
137
|
+
# 1 20090427 followers_ids 200
|
138
|
+
# 1 20090427 followers_ids 209 /help.twitter.com\/index.php?pg=kb.page&id=75\">txt<\/a>","created_at":"Sun Feb 08 08:17:30 +0000 2009"},{"user":{"profile_background_image_url":"http:\
|
139
|
+
# 1 20090427 followers_ids 20 /twitter_production\/profile_images\/62248324\/086_copy_3_normal.jpg","statuses_count":16,"profile_text_color":"666666","screen_name":"cheekydonkey","pr
|
140
|
+
# 1 20090427 followers_ids 20 /images\/themes\/theme1\/bg.gif","created_at":"Fri Apr 18 19:34:26 +0000 2008","profile_text_color":"000000","location":null,"id":14436644,"time_zone":"
|
141
|
+
# 1 20090427 followers_ids 242
|
142
|
+
# 1 20090427 followers_ids 24479801
|
143
|
+
# 1 20090427 followers_ids 2 /images\/default_profile_normal.png","statuses_count":0,"profile_text_color":"000000","screen_name":"kaylazastrow","profile_background_tile":false,"prof
|
144
|
+
# 3256 20090427 followers_ids 400
|
145
|
+
#
|
146
|
+
# 1 0 .subpage #content ol, #side ol { padding-left: 30px; } a{text-decoration:none;color: #0084b4;} #content div.desc { margin: 11px 0px 10px 0px; } a img{border:0;} ul{list
|
147
|
+
# 1 0 ":0,"profile_background_color":"9ae4e8","profile_background_image_url":"http:\/\/static.twitter.com 0
|
148
|
+
# 1 0 "profile_image_url":"http:\/\/s3.amazonaws.com 0
|
149
|
+
# 1 0 ,"favourites_count":0,"profile_background_color":"f8eb8b","profile_image_url":"http:\/\/static.twitter.com 0
|
150
|
+
# 1 0 /static.twitter.com\/images\/default_profile_normal.png","notifications":false,"statuses_count":6,"profile_sidebar_border_color":"87bc44","screen_name":"JoeLorah","profile_background_t
|
151
|
+
# 1 0 6}
|
152
|
+
# 1 0 _background_image_url":"http:\/\/s3.amazonaws.com 0
|
153
|
+
# 1 0 _color":"000000","url":null,"name":"Brett Speth","time_zone":null,"protected":false,"profile_link_color":"0000ff","followers_count":0,"profile_sidebar_fill_color":"e0ff92","profile_ima
|
154
|
+
#
|
155
|
+
# 1 0 .subpage #content ol, #side ol { padding-left: 30px; } a{text-decoration:none;color: #0084b4;} #content div.desc { margin: 11px 0px 10px 0px; } a img{border:0;} ul{list
|
156
|
+
# 1 0 ":0,"profile_background_color":"9ae4e8","profile_background_image_url":"http:\/\/static.twitter.com 0
|
157
|
+
# 1 0 "profile_image_url":"http:\/\/s3.amazonaws.com 0
|
158
|
+
# 1 0 ,"favourites_count":0,"profile_background_color":"f8eb8b","profile_image_url":"http:\/\/static.twitter.com 0
|
159
|
+
# 1 0 /static.twitter.com\/images\/default_profile_normal.png","notifications":false,"statuses_count":6,"profile_sidebar_border_color":"87bc44","screen_name":"JoeLorah","profile_background_t
|
160
|
+
# 1 0 6}
|
161
|
+
# 1 0 _background_image_url":"http:\/\/s3.amazonaws.com 0
|
162
|
+
# 1 0 _color":"000000","url":null,"name":"Brett Speth","time_zone":null,"protected":false,"profile_link_color":"0000ff","followers_count":0,"profile_sidebar_fill_color":"e0ff92","profile_ima
|
163
|
+
# 572 0 bogus-all_numeric-favorites 200
|
164
|
+
# 8 0 bogus-bad_chars-favorites 200
|
165
|
+
# 11 0 bogus-bad_chars-followers 200
|
166
|
+
# 3 0 bogus-bad_chars-friends 200
|
167
|
+
# 1867 0 bogus-missing_id-favorites 200
|
168
|
+
# 1 0 eply_to_status_id":null,"source":"web"},"notifications":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
|
169
|
+
# 1 0 f.com/friends/ids/17799430.json
|
170
|
+
# 1 0 f49.json
|
171
|
+
# 1 0 f81852492\/Bread__normal.jpg","status":{"truncated":false,"in_reply_to_status_id":1625610632,"text":"@podcasthelper oh yes yes i do still need help. It is ok to call upon your expertis
|
172
|
+
# 1 0 fat":"Tue Aug 12 15:27:32 +0000 2008","friends_count":87,"profile_background_color":"FF6699","location":"Newcastle, UK","id":15823576,"time_zone":"Hawaii","favourites_count":0,"profile
|
173
|
+
# 890016 0 favorites 200
|
174
|
+
# 1 0 file_image_url":"http:\/\/static.twitter.com 0
|
175
|
+
# 1 0 fo:null,"name":"THE_REAL_SHAQ","protected":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
|
176
|
+
# 1 0 foll3183,14885034,17824762,25320311,26651936,5520952,16092530,15466712,18414465,20019951,22151420,26332254,7096192,13434972,26275705,27923225,15770739,19900326,15654216,20486512,167358
|
177
|
+
# 1 0 folleply_to_screen_name":null,"id":1618358723,"source":"<a href=\"http:\/ 0
|
178
|
+
# 1 0 follo54:45 +0000 2009"}]
|
179
|
+
# 1 0 follos\/71101463\/LegalTimes_1651_normal.jpg","status":{"truncated":false,"in_reply_to_status_id":null,"text":"The Morning Wrap http: 0
|
180
|
+
# 1 0 followeada)","favourites_count":1,"profile_text_color":"666666"},{"description":"Writer, Pick-up Artist, Social Mastermind, and Traveler","profile_background_image_url":"http:\/\/stati
|
181
|
+
# 1 0 followekground_tile":false,"description":"Gamer\/Skier\/Drummer 0
|
182
|
+
# 1 0 followers,"profile_sidebar_border_color":"87bc44","time_zone":"London","profile_image_url":"http:\/\/s3.amazonaws.com 0
|
183
|
+
# 1 0 followers,12836312,18993475,16860914,16142878,18504804,17810432,18661758,17356420,17901504,15535360,19240090,16180026,14614833,18264863,17807744,19459418,19356460,8112832,18637695,1925
|
184
|
+
# 1 0 followers.json
|
185
|
+
# 1 0 followers_":"need coffee","favorited":false,"in_reply_to_screen_name":null,"created_at":"Mon Apr 27 14:11:52 +0000 2009","truncated":false,"id":1629096949,"in_reply_to_status_id":null,
|
186
|
+
# 1 0 followers__Close_normal.JPG","status":{"truncated":false,"in_reply_to_status_id":null,"text":"is finally home and going to bed. Have to get up for work in about 4.5 hours.","in_reply_t
|
187
|
+
# 1 0 followers_id 0
|
188
|
+
# 1 0 followers_id,"profile_background_color":"9ae4e8","profile_image_url":"http:\/\/s3.amazonaws.com 0
|
189
|
+
# 1 0 followers_id13165892,15131310,6970122,13838022,15136098,14590445,15184346,6264392,12650292,16159919,16725668,16816616,15984607,16895930,12228062,15224867,859221,12364022,15316113,15624
|
190
|
+
# 1 0 followers_id2009","truncated":false,"id":1625606751,"in_reply_to_status_id":1623299478,"source":"web"},"notifications":false,"time_zone":"Pacific Time (US & Canada)","favourites_count"
|
191
|
+
# 1 0 followers_idbackground_tile":false,"followers_count":78,"url":"http:\/\/danfitek.com","screen_name":"fitekker","name":"Dan Fitek","friends_count":100,"profile_background_color":"9ae4e8
|
192
|
+
# 149 0 followers_ids
|
193
|
+
# 1 0 followers_ids 1
|
194
|
+
# 1 0 followers_ids 17975054
|
195
|
+
# 1 0 followers_ids 200
|
196
|
+
# 1 0 followers_ids 20090412070434
|
197
|
+
# 4 0 followers_ids 0
|
198
|
+
# 1 0 followers_ids 0
|
199
|
+
# 1 0 followers_ids 0
|
200
|
+
# 1 0 followers_ids"in_reply_to_screen_name":null,"created_at":"Fri Apr 24 19:22:24 +0000 2009","truncated":false,"id":1606556267,"in_reply_to_status_id":null,"source":"<a href=\"http:\/ 0
|
201
|
+
# 1 0 followers_ids00,"profile_link_color":"0000ff","profile_image_url":"http:\/\/static.twitter.com 0
|
202
|
+
# 1 0 followers_idsC2EF","location":"San Diego, CA","id":9628922,"time_zone":"Pacific Time (US & Canada)","created_at":"Tue Oct 23 17:30:49 +0000 2007"}]
|
203
|
+
# 1 0 followers_idst":"Mon Jan 28 03:48:51 +0000 2008","screen_name":"siolanthe"},{"description":"","profile_background_image_url":"http:\/\/static.twitter.com 0
|
204
|
+
# 1 0 followers_ilocation":null,"id":15311449,"time_zone":"Greenland"},{"description":"","profile_background_image_url":"http:\/\/s3.amazonaws.com 0
|
205
|
+
# 1 0 followerwing":false,"profile_link_color":"CD0033","url":"http:\/\/foodfeed.us","name":"FoodFeed","notifications":false,"profile_sidebar_fill_color":"fafaf5","followers_count":4399,"pro
|
206
|
+
# 1 0 followetp://twitter.com/followers/ids/15737773.json
|
207
|
+
# 1 0 fri:5,"profile_sidebar_border_color":"87bc44","url":null,"screen_name":"seniorpoopypant","name":"seniorpoopypant","favourites_count":0,"protected":false,"status":{"truncated":false,"in
|
208
|
+
# 1 0 frie:"Pirate LadyZebra. (also know as Zoaea)","utc_offset":-18000,"profile_sidebar_fill_color":"e0ff92","followers_count":19,"favourites_count":0,"profile_image_url":"http:\/\/s3.amazo
|
209
|
+
# 1 0 friend219,14213042,29736155,27530456,18755292]
|
210
|
+
# 1 0 friends":"web"},"profile_background_image_url":"http:\/\/static.twitter.com 0
|
211
|
+
# 1 0 friends\/\/s3.amazonaws.com 0
|
212
|
+
# 1 0 friends_3,27039226,29988381,35486899,18900303,16044047]
|
213
|
+
# 1 0 friends__count":0,"profile_background_color":"9ae4e8","profile_image_url":"http:\/\/s3.amazonaws.com 0
|
214
|
+
# 1 0 friends_i12809262,12767592,13084172,12803292,12775072,12129872,14198789,29866309]
|
215
|
+
# 1 0 friends_i1355,"source":"web","created_at":"Fri Jun 27 20:51:55 +0000 2008"},{"truncated":false,"user":{"description":"Recently married! Work for Victory - vc.tv - lovin' life!","utc_o
|
216
|
+
# 1 0 friends_id":null,"text":"One Laptop per Child Lands in Indiahttp:\/\/tinyurl.com 0
|
217
|
+
# 1 0 friends_id,17213487,20820391,1050851,23817210,15117375,14790735,16069532,14634720,23306376,14470037,24754635,18666525,16798949,17118708,17492127,16563598,22731226,20253928,17139092,240
|
218
|
+
# 1 0 friends_idile_background_images\/3476247\/BJMendelson_388_twitbacks.jpg","profile_link_color":"0084B4","location":"Glens Falls, New York","id":12687952,"time_zone":"Indiana (East)","cr
|
219
|
+
# 161 0 friends_ids
|
220
|
+
# 1 0 friends_ids 18706826
|
221
|
+
# 1 0 friends_ids 16624466
|
222
|
+
# 1 0 friends_ids 20090427091534
|
223
|
+
# 1 0 friends_ids 20090427094351
|
224
|
+
# 1 0 friends_ids 0
|
225
|
+
# 1 0 friends_ids 0
|
226
|
+
# 1 0 friends_ids 0
|
227
|
+
# 1 0 friends_ids 0
|
228
|
+
# 1 0 friends_ids 0
|
229
|
+
# 1 0 friends_ids 0
|
230
|
+
# 1 0 friends_ids/\/orangatame.com\/products 0
|
231
|
+
# 1 0 friends_idsl_color":"F3F3F3","followers_count":25,"location":"St. Louis","id":14708168,"notifications":false,"friends_count":23,"profile_sidebar_border_color":"DFDFDF"},"text":"@Raptor
|
232
|
+
# 1 0 friends_iound_images\/4821472\/Mississippi_River_TypeMap2.jpg","profile_link_color":"1F98C7","location":"San Francisco","id":18257438,"time_zone":"Pacific Time (US & Canada)","created_
|
233
|
+
# 1 0 friprofile_background_color":"1A1B1F","protected":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
|
234
|
+
# 1 0 frmusings of a young Catholic in Yorkshire, England","utc_offset":0,"notifications":false,"profile_sidebar_fill_color":"e0ff92","followers_count":89,"profile_image_url":"http:\/\/s3.am
|
235
|
+
# 1 0 frollowing":false,"statuses_count":468,"profile_link_color":"2FC2EF","url":"http:\/\/myspace.com 0
|
236
|
+
# 1 0 fws.com\/twitter_production\/profile_background_images 0
|
237
|
+
# 1 0 hu Mar 05 07:45:42 +0000 2009","id":1282474011,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"C6E2EE","notifications":false,"created_at":"Thu Mar 05 07:36
|
238
|
+
# 1 0 ile_image_url":"http:\/\/static.twitter.com 0
|
239
|
+
# 1 0 imit exceeded. Clients may not make more than 20000 requests per hour."}
|
240
|
+
# 1 0 location":null,"id":22893663,"profile_link_color":"0000ff"}
|
241
|
+
# 1 0 nk faudrait demander \u00e0 Michel Bergeron ,on aurait du fun pour 30 minutes","in_reply_to_user_id":21818830,"created_at":"Wed Mar 04 19:18:53 +0000 2009","truncated":false,"id":12798
|
242
|
+
# 1 0 oz","profile_background_image_url":"http:\/\/static.twitter.com 0
|
243
|
+
# 1 0 s football, beer, and technology! Tweet away!","statuses_count":1444,"utc_offset":-21600,"profile_sidebar_border_color":"87bc44","profile_background_tile":true,"following":false,"prof
|
244
|
+
# 24228 0 timeline 200
|
245
|
+
# 1 0 u"created_at":"Wed Mar 04 05:45:20 +0000 2009","in_reply_to_user_id":null,"in_reply_to_status_id":null,"truncated":false,"id":1277431048,"source":"<a href=\"http:\/ 0
|
246
|
+
# 1 0 u128
|
247
|
+
# 1 0 u88,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"F2E195","notifications":false,"created_at":"Wed Mar 04 05:32:57 +0000 2009","profile_background_image_u
|
248
|
+
# 1 0 uat":"Thu Mar 05 09:07:06 +0000 2009","id":1282640319,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Thu Mar 0
|
249
|
+
# 1 0 ul":null,"name":"kimberly luzier","profile_background_tile":false,"protected":false,"status":{"in_reply_to_user_id":null,"text":"pictures for ebayy","created_at":"Thu Mar 05 17:21:37 +
|
250
|
+
# 1 0 us":"http:\/\/s3.amazonaws.com 0
|
251
|
+
# 1 0 us":false,"location":null,"id":22740024}
|
252
|
+
# 1 0 us0308222713
|
253
|
+
# 1 0 use":2,"url":"http:\/\/www.pat-bach.com","name":"Tim Bach","profile_background_tile":false,"protected":false,"status":{"truncated":false,"favorited":false,"text":"Setting up my Twitter
|
254
|
+
# 1 0 use,"id":1288256641,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Thu Mar 05 08:33:22 +0000 2009","profile_ba
|
255
|
+
# 1 0 usefalse,"favorited":false,"text":"Wondering what twitter is all about and if I am missing out!","in_reply_to_user_id":null,"created_at":"Tue Mar 03 13:21:30 +0000 2009","id":127364277
|
256
|
+
# 1 0 useme":"magic 93.1 Radio","profile_background_image_url":"http:\/\/s3.amazonaws.com 0
|
257
|
+
# 1 0 useprofile_text_color":"000000","description":null,"screen_name":"JohnNMiller","utc_offset":null,"profile_link_color":"0000ff","time_zone":null,"profile_sidebar_fill_color":"e0ff92","f
|
258
|
+
#
|
259
|
+
# 474 0 user
|
260
|
+
# 1 0 user 20104991
|
261
|
+
# 2 0 user 1
|
262
|
+
# 1 0 user 14686512
|
263
|
+
# 80 0 user 200
|
264
|
+
# 1 0 user 20090308201437
|
265
|
+
# 1 0 user 20090308201710
|
266
|
+
# 1 0 user 20090308201901
|
267
|
+
# 1 0 user 20090308202228
|
268
|
+
# 1 0 user 20090308204043
|
269
|
+
# 1 0 user 20090308214100
|
270
|
+
# 8 0 user 0
|
271
|
+
# 1 0 user 0
|
272
|
+
# 1 0 user 0
|
273
|
+
# 1 0 user 0
|
274
|
+
# 1 0 user 0
|
275
|
+
# 1 0 user 0
|
276
|
+
# 1 0 user 0
|
277
|
+
# 1 0 user 0
|
278
|
+
# 1 0 user 0 /lovechelle","name":"nichellemicole","profile_background_tile":false,"protected":true,"profile_sidebar_border_color":"D9B17E","notifications":false,"cre
|
279
|
+
# 1 0 user 0 /images\/themes\/theme1\/bg.gif","statuses_count":1,"profile_text_color":"000000","time_zone":null,"url":null,"name":"Ben Pitz","friends_count":10,"prof
|
280
|
+
# 1 0 user 0 /help.twitter.com\/index.php?pg=kb.page&id=75\">txt<\/a>"},"notifications":false,"profile_image_url":"http:\/\/static.twitter.com\/images\/default_profi
|
281
|
+
# 1 0 user Ocean from my office window.","favorited":false,"created_at":"Wed Feb 25 15:55:00 +0000 2009","in_reply_to_user_id":null,"id":1249633057,"source":"web"},"time_zone":null,"profile_
|
282
|
+
# 5 0 user_timeline
|
283
|
+
# 1 0 user_timeline 0
|
284
|
+
# 1 0 userr.com\/images\/default_profile_normal.png","followers_count":3,"location":null,"id":21311967,"created_at":"Thu Feb 19 16:16:04 +0000 2009","profile_sidebar_border_color":"87bc44","
|
285
|
+
# 1 0 usertp:\/\/orangatame.com 0
|
286
|
+
# 1 0 usertter_production\/profile_images\/87717775 0
|
287
|
+
# 1 0 usm/users/show/0022897534.json?page=1
|
288
|
+
# 1 0 usmtwittercom.html\">mobile web<\/a>"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Tue Sep 02 07:53:34 +0000 2008","profile_background_image_url":"http:
|
289
|
+
# 8 20081218 bogus-all_numeric-followers 200
|
290
|
+
# 12 20081218 bogus-all_numeric-friends 200
|
291
|
+
# :
|
@@ -0,0 +1,98 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'rubygems'
|
3
|
+
require 'wukong'
|
4
|
+
require 'monkeyshines'
|
5
|
+
require 'wuclan/twitter'
|
6
|
+
# un-namespace request classes.
|
7
|
+
include Wuclan::Twitter::Scrape
|
8
|
+
include Wuclan::Twitter::Model
|
9
|
+
# if you're anyone but original author this next require is useless but harmless.
|
10
|
+
require 'wuclan/twitter/scrape/old_skool_request_classes'
|
11
|
+
|
12
|
+
#
|
13
|
+
# req, id, page, scraped_at, response_code
|
14
|
+
#
|
15
|
+
REQUEST_MAPPER_COMMAND = "/usr/bin/cut -d\"\t\" -f1,3,4,7,8 "
|
16
|
+
|
17
|
+
class TwitterRequestUniqer < Wukong::Streamer::UniqByLastReducer
|
18
|
+
attr_accessor :response_codes
|
19
|
+
def get_key req=nil, id=nil, pg=nil, *args
|
20
|
+
[req, id]
|
21
|
+
end
|
22
|
+
|
23
|
+
def start! *args
|
24
|
+
self.response_codes = { 200=>0,400=>0,401=>0,403=>0,404=>0 }
|
25
|
+
super *args
|
26
|
+
end
|
27
|
+
|
28
|
+
require 'json'
|
29
|
+
def accumulate *args
|
30
|
+
req, id, page, scraped_at, resp = args
|
31
|
+
resp = resp.to_i
|
32
|
+
return unless scraped_at =~ /\d{14}/
|
33
|
+
response_codes[resp] += 1 if response_codes.include?(resp)
|
34
|
+
super *args
|
35
|
+
end
|
36
|
+
|
37
|
+
def finalize *args
|
38
|
+
return if final_value.blank?
|
39
|
+
req, id, page, scraped_at, resp = final_value
|
40
|
+
id = "%010d"%(id.to_i)
|
41
|
+
yield( [id, req, page, scraped_at] + response_codes.values_at(200,400,401,403,404) )
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
# Make the script go.
|
47
|
+
Wukong::Script.new(
|
48
|
+
nil, TwitterRequestUniqer,
|
49
|
+
:map_command => REQUEST_MAPPER_COMMAND,
|
50
|
+
:partition_fields => 2, :sort_fields => 3
|
51
|
+
).run
|
52
|
+
|
53
|
+
|
54
|
+
# 49522
|
55
|
+
# 74975395 200
|
56
|
+
# 84 302
|
57
|
+
# 277786 400
|
58
|
+
# 972881 401
|
59
|
+
# 94647 403
|
60
|
+
# 178105 404
|
61
|
+
# 9710 500
|
62
|
+
# 23134 502
|
63
|
+
# 1588 503
|
64
|
+
# 2479 504
|
65
|
+
|
66
|
+
# Wuclan::Twitter::Scrape::Base.class_eval do class_inheritable_accessor :req_code ; end
|
67
|
+
# TwitterUserRequest.class_eval do self.req_code = :tw_user ; end
|
68
|
+
# TwitterFollowersRequest.class_eval do self.req_code = :tw_foll ; end
|
69
|
+
# TwitterFriendsRequest.class_eval do self.req_code = :tw_frnd ; end
|
70
|
+
# TwitterFollowersIdsRequest.class_eval do self.req_code = :tw_foid ; end
|
71
|
+
# TwitterFriendsIdsRequest.class_eval do self.req_code = :tw_frid ; end
|
72
|
+
# TwitterUserTimelineRequest.class_eval do self.req_code = :tw_ustl ; end
|
73
|
+
#
|
74
|
+
# REQ_CODES = {
|
75
|
+
# 'followers' => :tw_fo, 'twitter_followers_request' => :tw_fo,
|
76
|
+
# 'friends' => :tw_fr, 'twitter_friends_request' => :tw_fr,
|
77
|
+
# 'followers_ids' => :tw_fi, 'twitter_followers_ids_request' => :tw_fi,
|
78
|
+
# 'friends_ids' => :tw_ri, 'twitter_friends_ids_request' => :tw_ri,
|
79
|
+
# 'user' => :tw_us, 'twitter_user_request' => :tw_us,
|
80
|
+
# 'user_timeline' => :tw_ut, 'twitter_user_timeline_request' => :tw_ut,
|
81
|
+
# }
|
82
|
+
|
83
|
+
# #
|
84
|
+
# #
|
85
|
+
# #
|
86
|
+
# class TwitterRequestParser < Wukong::Streamer::StructStreamer
|
87
|
+
#
|
88
|
+
# def process request, *args, &block
|
89
|
+
# next if request.page.to_i > 1
|
90
|
+
# next if request.response_code != '200'
|
91
|
+
# req_code = REQ_CODES[request]
|
92
|
+
# case request
|
93
|
+
# when TwitterUserRequest, TwitterFollowersRequest, TwitterFriendsRequest,
|
94
|
+
# TwitterFollowersIdsRequest, TwitterFriendsIdsRequest, TwitterUserTimelineRequest
|
95
|
+
# yield [request.twitter_user_id, request.req_code, request.scraped_at]
|
96
|
+
# end
|
97
|
+
# end
|
98
|
+
# end
|
@@ -0,0 +1,4 @@
|
|
1
|
+
Scrapes = LOAD 'tmp/last_requests_and_codes' AS user_id:int, rsrc:chararray, page:int, datetime:long, r200:int, r400:int, r401:int, r403:int, r404:int ;
|
2
|
+
UserScrapes = FILTER Scrapes BY rsrc == 'user' ;
|
3
|
+
UserScrapesOrdered = ORDER UserScrapes BY datetime ASC ;
|
4
|
+
STORE UserScrapesOrdered INTO 'twmeta/scrape_requests/users_by_staleness-20090730.tsv' ;
|
@@ -0,0 +1,6 @@
|
|
1
|
+
love 65536 3.36366494448618 6698 2686693027 2688691633
|
2
|
+
red+sox 65536 0.0113457581992013 1500 2661001994 2688059232
|
3
|
+
britney+spears 65536 0.00866753886170806 184 2685103763 2688130850
|
4
|
+
hadoop 65536 0.000661831916251315 614 2501794487 2687967783
|
5
|
+
infochimps 65536 2.24964286919452e-05 16 2541533220 2683708276
|
6
|
+
hapaxlegomenon 65536 0.0 1 2646535741 2646535741
|