wuclan 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.textile +20 -0
- data/README.textile +28 -0
- data/examples/analyze/strong_links/gen_multi_edge.rb +103 -0
- data/examples/analyze/strong_links/main.rb +51 -0
- data/examples/analyze/word_count/dump_schema.rb +13 -0
- data/examples/analyze/word_count/freq_user.rb +31 -0
- data/examples/analyze/word_count/freq_whole_corpus.rb +27 -0
- data/examples/analyze/word_count/word_count.pig +43 -0
- data/examples/analyze/word_count/word_count.rb +34 -0
- data/examples/lastfm/scrape/load_lastfm.rb +31 -0
- data/examples/lastfm/scrape/scrape_lastfm.rb +47 -0
- data/examples/lastfm/scrape/seed.tsv +147 -0
- data/examples/twitter/old/load_twitter_search_jobs.rb +157 -0
- data/examples/twitter/old/scrape_twitter_api.rb +104 -0
- data/examples/twitter/old/scrape_twitter_search.rb +57 -0
- data/examples/twitter/old/scrape_twitter_trending.rb +73 -0
- data/examples/twitter/parse/parse_twitter_requests.rb +81 -0
- data/examples/twitter/parse/parse_twitter_search_requests.rb +28 -0
- data/examples/twitter/scrape_twitter_api/scrape_twitter_api.rb +61 -0
- data/examples/twitter/scrape_twitter_api/seed.tsv +4 -0
- data/examples/twitter/scrape_twitter_api/start_cache_twitter.sh +2 -0
- data/examples/twitter/scrape_twitter_api/support/make_request_stats.rb +291 -0
- data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_1.rb +98 -0
- data/examples/twitter/scrape_twitter_api/support/make_requests_by_id_and_date_2.pig +4 -0
- data/examples/twitter/scrape_twitter_api/support/twitter_search_jobs.tsv +6 -0
- data/examples/twitter/scrape_twitter_api/support/twitter_trending_seed.tsv +725 -0
- data/examples/twitter/scrape_twitter_hosebird/edamame-killall +4 -0
- data/examples/twitter/scrape_twitter_hosebird/foo.rb +19 -0
- data/examples/twitter/scrape_twitter_hosebird/ps_emulation.rb +111 -0
- data/examples/twitter/scrape_twitter_hosebird/scrape_twitter_hosebird.rb +110 -0
- data/examples/twitter/scrape_twitter_hosebird/test_spewer.rb +20 -0
- data/examples/twitter/scrape_twitter_hosebird/twitter_hosebird_god.yaml +10 -0
- data/examples/twitter/scrape_twitter_search/dump_twitter_search_jobs.rb +38 -0
- data/examples/twitter/scrape_twitter_search/load_twitter_search_jobs.rb +63 -0
- data/examples/twitter/scrape_twitter_search/scrape_twitter_search.rb +44 -0
- data/examples/twitter/scrape_twitter_search/twitter_search_daemons.god +25 -0
- data/lib/old/twitter_api.rb +88 -0
- data/lib/wuclan/delicious/delicious_html_request.rb +31 -0
- data/lib/wuclan/delicious/delicious_models.rb +26 -0
- data/lib/wuclan/delicious/delicious_request.rb +65 -0
- data/lib/wuclan/friendfeed/scrape/friendfeed_search_request.rb +60 -0
- data/lib/wuclan/friendster.rb +7 -0
- data/lib/wuclan/lastfm/model/base.rb +49 -0
- data/lib/wuclan/lastfm/model/sample_responses.txt +16 -0
- data/lib/wuclan/lastfm/scrape/base.rb +195 -0
- data/lib/wuclan/lastfm/scrape/concrete.rb +143 -0
- data/lib/wuclan/lastfm/scrape/lastfm_job.rb +12 -0
- data/lib/wuclan/lastfm/scrape/lastfm_request_stream.rb +17 -0
- data/lib/wuclan/lastfm/scrape/recursive_requests.rb +154 -0
- data/lib/wuclan/lastfm/scrape.rb +12 -0
- data/lib/wuclan/lastfm.rb +7 -0
- data/lib/wuclan/metrics/user_graph_metrics.rb +99 -0
- data/lib/wuclan/metrics/user_metrics.rb +443 -0
- data/lib/wuclan/metrics/user_metrics_basic.rb +277 -0
- data/lib/wuclan/metrics/user_scraping_metrics.rb +64 -0
- data/lib/wuclan/metrics.rb +0 -0
- data/lib/wuclan/myspace.rb +21 -0
- data/lib/wuclan/open_social/model/base.rb +0 -0
- data/lib/wuclan/open_social/scrape/base.rb +111 -0
- data/lib/wuclan/open_social/scrape_request.rb +6 -0
- data/lib/wuclan/open_social.rb +0 -0
- data/lib/wuclan/rdf_output/relationship_rdf.rb +47 -0
- data/lib/wuclan/rdf_output/text_element_rdf.rb +64 -0
- data/lib/wuclan/rdf_output/tweet_rdf.rb +10 -0
- data/lib/wuclan/rdf_output/twitter_rdf.rb +84 -0
- data/lib/wuclan/rdf_output/twitter_user_rdf.rb +12 -0
- data/lib/wuclan/shorturl/shorturl_request.rb +271 -0
- data/lib/wuclan/twitter/api_response_examples.textile +300 -0
- data/lib/wuclan/twitter/model/base.rb +72 -0
- data/lib/wuclan/twitter/model/multi_edge.rb +31 -0
- data/lib/wuclan/twitter/model/relationship.rb +176 -0
- data/lib/wuclan/twitter/model/text_element/extract_info_tests.rb +83 -0
- data/lib/wuclan/twitter/model/text_element/grok_tweets.rb +96 -0
- data/lib/wuclan/twitter/model/text_element/more_regexes.rb +370 -0
- data/lib/wuclan/twitter/model/text_element.rb +38 -0
- data/lib/wuclan/twitter/model/tweet/tokenize.rb +38 -0
- data/lib/wuclan/twitter/model/tweet/tweet_regexes.rb +202 -0
- data/lib/wuclan/twitter/model/tweet/tweet_token.rb +79 -0
- data/lib/wuclan/twitter/model/tweet.rb +74 -0
- data/lib/wuclan/twitter/model/twitter_user/style/color_to_hsv.rb +57 -0
- data/lib/wuclan/twitter/model/twitter_user.rb +145 -0
- data/lib/wuclan/twitter/model.rb +21 -0
- data/lib/wuclan/twitter/parse/ff_ids_parser.rb +27 -0
- data/lib/wuclan/twitter/parse/friends_followers_parser.rb +52 -0
- data/lib/wuclan/twitter/parse/generic_json_parser.rb +26 -0
- data/lib/wuclan/twitter/parse/json_tweet.rb +63 -0
- data/lib/wuclan/twitter/parse/json_twitter_user.rb +122 -0
- data/lib/wuclan/twitter/parse/public_timeline_parser.rb +54 -0
- data/lib/wuclan/twitter/parse/twitter_search_parse.rb +60 -0
- data/lib/wuclan/twitter/parse/user_parser.rb +30 -0
- data/lib/wuclan/twitter/scrape/base.rb +97 -0
- data/lib/wuclan/twitter/scrape/old_skool_request_classes.rb +40 -0
- data/lib/wuclan/twitter/scrape/twitter_fake_fetcher.rb +31 -0
- data/lib/wuclan/twitter/scrape/twitter_ff_ids_request.rb +75 -0
- data/lib/wuclan/twitter/scrape/twitter_followers_request.rb +135 -0
- data/lib/wuclan/twitter/scrape/twitter_json_response.rb +124 -0
- data/lib/wuclan/twitter/scrape/twitter_request_stream.rb +44 -0
- data/lib/wuclan/twitter/scrape/twitter_search_fake_fetcher.rb +44 -0
- data/lib/wuclan/twitter/scrape/twitter_search_flat_stream.rb +30 -0
- data/lib/wuclan/twitter/scrape/twitter_search_job.rb +25 -0
- data/lib/wuclan/twitter/scrape/twitter_search_request.rb +70 -0
- data/lib/wuclan/twitter/scrape/twitter_search_request_stream.rb +19 -0
- data/lib/wuclan/twitter/scrape/twitter_timeline_request.rb +72 -0
- data/lib/wuclan/twitter/scrape/twitter_user_request.rb +64 -0
- data/lib/wuclan/twitter/scrape.rb +27 -0
- data/lib/wuclan/twitter.rb +7 -0
- data/lib/wuclan.rb +1 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/wuclan_spec.rb +7 -0
- data/wuclan.gemspec +184 -0
- metadata +219 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
require 'wukong'
|
|
4
|
+
require 'monkeyshines'
|
|
5
|
+
require 'wuclan/twitter'
|
|
6
|
+
$: << '/home/flip/ics/rubygems/json-1.1.7/lib'
|
|
7
|
+
include Wuclan::Twitter::Scrape
|
|
8
|
+
include Wuclan::Twitter::Model
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
require 'wukong/schema'
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
Wuclan::Twitter::Scrape::Base.class_eval do
|
|
15
|
+
extend Wukong::Schema
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
p Wuclan::Twitter::Scrape::TwitterUserRequest.pig_load
|
|
19
|
+
|
|
20
|
+
# Requests = LOAD 'ripd/com.tw/com.twitter/*' AS ( rsrc:chararray, priority:int, twitter_user_id: int, page: int, moreinfo: chararray, url: chararray, scraped_at: long, response_code: int, response_message: chararray, contents: chararray );
|
|
21
|
+
# request_classes = FOREACH Requests GENERATE rsrc, (int) ((double)scraped_at / 1000000.0) AS scon, response_code ;
|
|
22
|
+
# rc_grp = GROUP request_classes BY (rsrc, scon, response_code) ;
|
|
23
|
+
# rc_count = FOREACH rc_grp GENERATE COUNT(request_classes) AS freq, group.scon AS scraped_on, group.rsrc AS rsrc , group.response_code AS response_code ;
|
|
24
|
+
# rc_count_1 = ORDER rc_count BY scraped_on, rsrc, response_code ;
|
|
25
|
+
# rmf tmp/rc_count
|
|
26
|
+
# STORE rc_count_1 INTO 'tmp/rc_count' ;
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# 20090304152029 bad utf8
|
|
30
|
+
# 20090308
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# 1 9999999 20081207052456 1 old_scraper 20081207052456 200 old_scraper [{"user":{"followers_count":23,"description":"","url":"","profile_image_url":"http:\/\/s3.amazonaws.com /twi
|
|
35
|
+
# 1 9999999 20081207055023 1 old_scraper 20081207055023 200 old_scraper [{"user":{"followers_count":32,"description":"","url":"http:\/\/www.mychurch.org /gervis","profile_image_url
|
|
36
|
+
# 1 9999999 20081209041619 1 old_scraper 20081209041619 200 old_scraper [{"user":{"followers_count":80,"description":"1983, Amersfoort, audio-producer @ NPS 3FM","url":"http:\/\/ww
|
|
37
|
+
# 1 9999999 20081209115725 1 old_scraper 20081209115725 200 old_scraper [{"user":{"followers_count":19,"description":"Pozzo e Luck não acessavam a net.","url":"","profile_image_ur
|
|
38
|
+
# 1 9999999 20081209232718 1 old_scraper 20081209232718 200 old_scraper [{"user":{"followers_count":105,"description":"æå¹ââæ±äº¬é »ç¹ãITç³»,åºåç³»,ãã©ã³ãã¼,<E5><85>
|
|
39
|
+
# 1 9999999 20081210061628 1 old_scraper 20081210061628 200 old_scraper [{"user":{"followers_count":736,"description":"I AM","url":"http:\/\/www.frankvandun.nl","profile_image_url"
|
|
40
|
+
# 1 9999999 20081210185703 1 old_scraper 20081210185703 200 old_scraper [{"user":{"followers_count":644,"description":"Noticias de Chile actualizadas cada hora","url":"http:\/\/www
|
|
41
|
+
# 1 9999999 20081211095702 1 old_scraper 20081211095702 200 old_scraper [{"user":{"followers_count":64,"description":"","url":"http:\/\/tautin.blogspot.com /","profile_image_url":"
|
|
42
|
+
# 1 9999999 20081213073636 1 old_scraper 20081213073636 200 old_scraper [{"user":{"followers_count":178,"description":"","url":"http:\/\/www.gazetadopovo.com.br","profile_image_url
|
|
43
|
+
# 1 9999999 20081214100003 1 old_scraper 20081214100003 200 old_scraper [{"user":{"followers_count":7,"description":"ResearchBlogging.org feeds in Deutsch","url":"http:\/\/research
|
|
44
|
+
# 1 9999999 20081215105211 1 old_scraper 20081215105211 200 old_scraper [{"user":{"followers_count":165,"description":"I am a stay-at-home mother of two, with one on the way! I am
|
|
45
|
+
# 1 9999999 20081218075108 1 old_scraper 20081218075108 200 old_scraper [{"user":{"followers_count":17,"description":"","url":"http:\/\/hard-hitting-news.blogspot.com /","profile_i
|
|
46
|
+
# 1 9999999 20081219065853 1 old_scraper 20081219065853 200 old_scraper [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
|
47
|
+
# 1 9999999 20081220010525 1 old_scraper 20081220010525 200 old_scraper [{"user":{"followers_count":202,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.c
|
|
48
|
+
# 1 9999999 20081220113649 1 old_scraper 20081220113649 200 old_scraper [{"user":{"followers_count":42,"description":"Contrary to popular belief, I am in fact a robot.","url":"http
|
|
49
|
+
# 1 9999999 20081221083623 1 old_scraper 20081221083623 200 old_scraper [{"user":{"followers_count":565,"description":"çãç³»ããã°ã©ã","url":"http:\/\/polog.org /","profi
|
|
50
|
+
# 1 9999999 20081224110505 1 old_scraper 20081224110505 200 old_scraper [{"user":{"followers_count":304,"description":" A group of green women bloggers, uniting our voices to hel
|
|
51
|
+
# 1 9999999 20081225055913 1 old_scraper 20081225055913 200 old_scraper [{"user":{"followers_count":213,"description":"å人ã²ã¼ã ä½ã£ã¦ã¾ããããæ°è»½ã«ãã©ãã¼<E3>
|
|
52
|
+
# 1 9999999 20081229072914 1 old_scraper 20081229072914 200 old_scraper [{"user":{"followers_count":15,"description":"","url":"http:\/\/www.rodia.info","profile_image_url":"http:
|
|
53
|
+
# 1 9999999 20081229084830 1 old_scraper 20081229084830 200 old_scraper [{"user":{"followers_count":191,"description":"3rd Generation Real Estate Investor and Author","url":"http:\
|
|
54
|
+
# 1 9999999 20090102103315 1 old_scraper 20090102103315 200 old_scraper [{"user":{"followers_count":21,"description":"takin over one city at a time","url":"","profile_image_url":"h
|
|
55
|
+
# 1 9999999 20090104084017 1 old_scraper 20090104084017 200 old_scraper [{"user":{"followers_count":299,"description":"ã¹ã¦ã£ã¼ãï¼åªï¼ã12æéããã¨ããã¦ãå°±<E5><AF>
|
|
56
|
+
# 1 9999999 20090105101608 1 old_scraper 20090105101608 200 old_scraper [{"user":{"followers_count":2171,"description":"LIVE wildlife 24\/7 from Djuma in South Africa. LIVE safari.
|
|
57
|
+
# 1 9999999 20090105103520 1 old_scraper 20090105103520 200 old_scraper [{"user":{"followers_count":19,"description":"Learning to dance like no one is watching","url":"","profile_i
|
|
58
|
+
# 1 9999999 20090106165730 1 old_scraper 20090106165730 200 old_scraper [{"user":{"followers_count":10,"description":"Live.Love.Laugh.","url":"","profile_image_url":"http:\/\/s3.am
|
|
59
|
+
# 1 9999999 20090112091101 1 old_scraper 20090112091101 200 old_scraper [{"user":{"followers_count":25,"description":"Ostravak je stav duÅ¡e, i když ÄlovÄk žije v Praze.","url"
|
|
60
|
+
# 1 9999999 20090117090748 1 old_scraper 20090117090748 200 old_scraper [{"user":{"followers_count":58,"description":"Moving on up.","url":"http:\/\/sarah-dear.blogspot.com","profi
|
|
61
|
+
# 1 9999999 20090418173317 1 old_scraper 20090418173317 200 old_scraper [{"user":{"followers_count":69,"description":"The Fail Whale is my spirit animal","url":"","profile_image_ur
|
|
62
|
+
# 1 9999999 20090418231828 1 old_scraper 20090418231828 200 old_scraper [{"user":{"followers_count":125,"description":"Cre@t!ve T!r@de","url":"","profile_image_url":"http:\/\/stati
|
|
63
|
+
# 1 9999999 20090419014909 1 old_scraper 20090419014909 200 old_scraper [{"user":{"followers_count":14,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
|
64
|
+
# 1 9999999 20090419052121 1 old_scraper 20090419052121 200 old_scraper [{"user":{"followers_count":743,"description":"ãããæãã§åå ãã¦ã¿ã¾ãããéçã¨æ¸©æ³<E3>
|
|
65
|
+
# 1 9999999 20090419233942 1 old_scraper 20090419233942 200 old_scraper [{"user":{"followers_count":36,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
|
66
|
+
# 1 9999999 20090420033745 1 old_scraper 20090420033745 200 old_scraper [{"user":{"followers_count":1815,"description":"Doing it for the girls baby, chicks, , ladies, women, Its ok
|
|
67
|
+
# 1 9999999 20090420112345 1 old_scraper 20090420112345 200 old_scraper [{"user":{"followers_count":13,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.co
|
|
68
|
+
# 1 9999999 20090421010538 1 old_scraper 20090421010538 200 old_scraper [{"user":{"followers_count":30,"description":"There are those who think they can and those who think they ca
|
|
69
|
+
# 1 9999999 20090421084441 1 old_scraper 20090421084441 200 old_scraper [{"user":{"followers_count":119,"description":"web designer, photographer, musical genius","url":"","profile
|
|
70
|
+
# 1 9999999 20090421101818 1 old_scraper 20090421101818 200 old_scraper [{"user":{"followers_count":10,"description":"An eternal learner. Master student in education : can wiki in
|
|
71
|
+
# 1 9999999 20090421232814 1 old_scraper 20090421232814 200 old_fetcher [{"user":{"followers_count":1,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
|
72
|
+
# 1 9999999 20090422065945 1 old_fetcher 20090422065945 200 old_fetcher [{"user":{"followers_count":118,"description":"Curiosa, consultora de IT y madre a la vez!","url":"","profil
|
|
73
|
+
# 1 9999999 20090422083321 1 old_fetcher 20090422083321 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"http:\/\/www.myspace.com /sweetitdm","profile_image_u
|
|
74
|
+
# 1 9999999 20090423045905 1 old_fetcher 20090423045905 200 old_fetcher [{"user":{"followers_count":79,"description":"","url":"http:\/\/flickr.com /photos\/malugreen","profile_imag
|
|
75
|
+
# 1 9999999 20090423063900 1 old_fetcher 20090423063900 200 old_fetcher [{"user":{"followers_count":388,"description":"Instructional technology grad student, dog lover, optimist,an
|
|
76
|
+
# 1 9999999 20090423135519 1 old_fetcher 20090423135519 200 old_fetcher [{"user":{"followers_count":628,"description":"MsBeat runs the show at Beatblogging.org. A news-savvy mistre
|
|
77
|
+
# 1 9999999 20090425052649 1 old_fetcher 20090425052649 200 old_fetcher [{"user":{"followers_count":12,"description":"mixiãã£ã¦ã¾ããããµãããããã§æ¤ç´¢ãã¦ã¿<E3>
|
|
78
|
+
# 1 9999999 20090426061449 1 old_fetcher 20090426061449 200 old_fetcher [{"user":{"followers_count":5,"description":"im in the land of soft drugs, legal whoring, windmills and tuli
|
|
79
|
+
# 1 9999999 20090428044727 1 old_fetcher 20090428044727 200 old_fetcher [{"user":{"followers_count":290,"description":"Online and Onair radioshow for geeks only!","url":"http:\/\/w
|
|
80
|
+
# 1 9999999 20090428151030 1 old_fetcher 20090428151030 200 old_fetcher [{"user":{"followers_count":520,"description":"The official home of New Zealand Rugby on Twitter","url":"htt
|
|
81
|
+
# 1 9999999 20090428232804 1 old_fetcher 20090428232804 200 old_fetcher [{"user":{"followers_count":3,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
|
82
|
+
# 1 9999999 20090503152828 1 old_fetcher 20090503152828 200 old_fetcher [{"user":{"followers_count":49,"description":"","url":"http:\/\/www.myspace.com /silisali","profile_image_ur
|
|
83
|
+
# 1 9999999 20090503195932 1 old_fetcher 20090503195932 200 old_fetcher [{"user":{"followers_count":35,"description":"I am a national level bodybuilder working for Bodywell Nutriti
|
|
84
|
+
# 1 9999999 20090504020126 1 old_fetcher 20090504020126 200 old_fetcher [{"user":{"followers_count":35,"description":"Christian. Bass Player. Singer. Amateur Photographer. News Adv
|
|
85
|
+
# 1 9999999 20081209232718 1 old_fetcher 20081209232718 200 old_fetcher [{"user":{"followers_count":105,"description":"æå¹ââæ±äº¬é »ç¹ãITç³»,åºåç³»,ãã©ã³ãã¼,<E5><85>1 9999999 20081210061628 1 old_fetcher 20081210061628 200 old_fetcher [{"user":{"followers_count":736,"description":"I AM","url":"http:\/\/www.frankvandun.nl","profile_image_url"
|
|
86
|
+
# 1 9999999 20081210185703 1 old_fetcher 20081210185703 200 old_fetcher [{"user":{"followers_count":644,"description":"Noticias de Chile actualizadas cada hora","url":"http:\/\/www
|
|
87
|
+
# 1 9999999 20081211095702 1 old_fetcher 20081211095702 200 old_fetcher [{"user":{"followers_count":64,"description":"","url":"http:\/\/tautin.blogspot.com /","profile_image_url":" 1 9999999 20081213073636 1 old_fetcher 20081213073636 200 old_fetcher [{"user":{"followers_count":178,"description":"","url":"http:\/\/www.gazetadopovo.com.br","profile_image_url
|
|
88
|
+
# 1 9999999 20081214100003 1 old_fetcher 20081214100003 200 old_fetcher [{"user":{"followers_count":7,"description":"ResearchBlogging.org feeds in Deutsch","url":"http:\/\/research
|
|
89
|
+
# 1 9999999 20081215105211 1 old_fetcher 20081215105211 200 old_fetcher [{"user":{"followers_count":165,"description":"I am a stay-at-home mother of two, with one on the way! I am
|
|
90
|
+
# 1 9999999 20081218075108 1 old_fetcher 20081218075108 200 old_fetcher [{"user":{"followers_count":17,"description":"","url":"http:\/\/hard-hitting-news.blogspot.com /","profile_i
|
|
91
|
+
# 1 9999999 20081219065853 1 old_fetcher 20081219065853 200 old_fetcher [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
|
92
|
+
# /im
|
|
93
|
+
# 1 9999999 20081220010525 1 old_fetcher 20081220010525 200 old_fetcher [{"user":{"followers_count":202,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.c
|
|
94
|
+
# 1 9999999 20081220113649 1 old_fetcher 20081220113649 200 old_fetcher [{"user":{"followers_count":42,"description":"Contrary to popular belief, I am in fact a robot.","url":"http
|
|
95
|
+
# 1 9999999 20081221083623 1 old_fetcher 20081221083623 200 old_fetcher [{"user":{"followers_count":565,"description":"çãç³»ããã°ã©ã","url":"http:\/\/polog.org /","profi 1 9999999 20081224110505 1 old_fetcher 20081224110505 200 old_fetcher [{"user":{"followers_count":304,"description":" A group of green women bloggers, uniting our voices to hel
|
|
96
|
+
# 1 9999999 20081225055913 1 old_fetcher 20081225055913 200 old_fetcher [{"user":{"followers_count":213,"description":"å人ã²ã¼ã ä½ã£ã¦ã¾ããããæ°è»½ã«ãã©ãã¼<E3> 1 9999999 20081229072914 1 old_fetcher 20081229072914 200 old_fetcher [{"user":{"followers_count":15,"description":"","url":"http:\/\/www.rodia.info","profile_image_url":"http:
|
|
97
|
+
# /
|
|
98
|
+
# 1 9999999 20081229084830 1 old_fetcher 20081229084830 200 old_fetcher [{"user":{"followers_count":191,"description":"3rd Generation Real Estate Investor and Author","url":"http:\
|
|
99
|
+
# 1 9999999 20090102103315 1 old_fetcher 20090102103315 200 old_fetcher [{"user":{"followers_count":21,"description":"takin over one city at a time","url":"","profile_image_url":"h
|
|
100
|
+
# 1 9999999 20090104084017 1 old_fetcher 20090104084017 200 old_fetcher [{"user":{"followers_count":299,"description":"ã¹ã¦ã£ã¼ãï¼åªï¼ã12æéããã¨ããã¦ãå°±<E5><AF>1 9999999 20090105101608 1 old_fetcher 20090105101608 200 old_fetcher [{"user":{"followers_count":2171,"description":"LIVE wildlife 24\/7 from Djuma in South Africa. LIVE safari.
|
|
101
|
+
# 1 9999999 20090105103520 1 old_fetcher 20090105103520 200 old_fetcher [{"user":{"followers_count":19,"description":"Learning to dance like no one is watching","url":"","profile_i
|
|
102
|
+
# 1 9999999 20090106165730 1 old_fetcher 20090106165730 200 old_fetcher [{"user":{"followers_count":10,"description":"Live.Love.Laugh.","url":"","profile_image_url":"http:\/\/s3.am
|
|
103
|
+
# 1 9999999 20090112091101 1 old_fetcher 20090112091101 200 old_fetcher [{"user":{"followers_count":25,"description":"Ostravak je stav duÅ¡e, i když ÄlovÄk žije v Praze.","url"
|
|
104
|
+
# 1 9999999 20090117090748 1 old_fetcher 20090117090748 200 old_fetcher [{"user":{"followers_count":58,"description":"Moving on up.","url":"http:\/\/sarah-dear.blogspot.com","profi
|
|
105
|
+
# 1 9999999 20090418173317 1 old_fetcher 20090418173317 200 old_fetcher [{"user":{"followers_count":69,"description":"The Fail Whale is my spirit animal","url":"","profile_image_ur
|
|
106
|
+
# 1 9999999 20090418231828 1 old_fetcher 20090418231828 200 old_fetcher [{"user":{"followers_count":125,"description":"Cre@t!ve T!r@de","url":"","profile_image_url":"http:\/\/stati
|
|
107
|
+
# 1 9999999 20090419014909 1 old_fetcher 20090419014909 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
|
108
|
+
# 1 9999999 20090419052121 1 old_fetcher 20090419052121 200 old_fetcher [{"user":{"followers_count":743,"description":"ãããæãã§åå ãã¦ã¿ã¾ãããéçã¨æ¸©æ³<E3>
|
|
109
|
+
# 1 9999999 20090419233942 1 old_fetcher 20090419233942 200 old_fetcher [{"user":{"followers_count":36,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
|
110
|
+
# 1 9999999 20090420033745 1 old_fetcher 20090420033745 200 old_fetcher [{"user":{"followers_count":1815,"description":"Doing it for the girls baby, chicks, , ladies, women, Its ok
|
|
111
|
+
# 1 9999999 20090420112345 1 old_fetcher 20090420112345 200 old_fetcher [{"user":{"followers_count":13,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.co
|
|
112
|
+
# 1 9999999 20090421010538 1 old_fetcher 20090421010538 200 old_fetcher [{"user":{"followers_count":30,"description":"There are those who think they can and those who think they ca
|
|
113
|
+
# 1 9999999 20090421084441 1 old_fetcher 20090421084441 200 old_fetcher [{"user":{"followers_count":119,"description":"web designer, photographer, musical genius","url":"","profile
|
|
114
|
+
# 1 9999999 20090421101818 1 old_fetcher 20090421101818 200 old_fetcher [{"user":{"followers_count":10,"description":"An eternal learner. Master student in education : can wiki in
|
|
115
|
+
# 1 9999999 20090421232814 1 old_fetcher 20090421232814 200 old_fetcher [{"user":{"followers_count":1,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
|
116
|
+
# 1 9999999 20090422065945 1 old_fetcher 20090422065945 200 old_fetcher [{"user":{"followers_count":118,"description":"Curiosa, consultora de IT y madre a la vez!","url":"","profil
|
|
117
|
+
# 1 9999999 20090422083321 1 old_fetcher 20090422083321 200 old_fetcher [{"user":{"followers_count":14,"description":"","url":"http:\/\/www.myspace.com /sweetitdm","profile_image_u
|
|
118
|
+
# 1 9999999 20090423045905 1 old_fetcher 20090423045905 200 old_fetcher [{"user":{"followers_count":79,"description":"","url":"http:\/\/flickr.com /photos\/malugreen","profile_imag
|
|
119
|
+
# 1 9999999 20090423063900 1 old_fetcher 20090423063900 200 old_fetcher [{"user":{"followers_count":388,"description":"Instructional technology grad student, dog lover, optimist,an
|
|
120
|
+
# 1 9999999 20090423135519 1 old_fetcher 20090423135519 200 old_fetcher [{"user":{"followers_count":628,"description":"MsBeat runs the show at Beatblogging.org. A news-savvy mistre
|
|
121
|
+
# 1 9999999 20090425052649 1 old_fetcher 20090425052649 200 old_fetcher [{"user":{"followers_count":12,"description":"mixiãã£ã¦ã¾ããããµãããããã§æ¤ç´¢ãã¦ã¿<E3>
|
|
122
|
+
# 1 9999999 20090426061449 1 old_fetcher 20090426061449 200 old_fetcher [{"user":{"followers_count":5,"description":"im in the land of soft drugs, legal whoring, windmills and tuli
|
|
123
|
+
# 1 9999999 20090428044727 1 old_fetcher 20090428044727 200 old_fetcher [{"user":{"followers_count":290,"description":"Online and Onair radioshow for geeks only!","url":"http:\/\/w
|
|
124
|
+
# 1 9999999 20090428151030 1 old_fetcher 20090428151030 200 old_fetcher [{"user":{"followers_count":520,"description":"The official home of New Zealand Rugby on Twitter","url":"htt
|
|
125
|
+
# 1 9999999 20090428232804 1 old_fetcher 20090428232804 200 old_fetcher [{"user":{"followers_count":3,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
|
126
|
+
# 1 9999999 20090503152828 1 old_fetcher 20090503152828 200 old_fetcher [{"user":{"followers_count":49,"description":"","url":"http:\/\/www.myspace.com /silisali","profile_image_ur
|
|
127
|
+
# 1 9999999 20090503195932 1 old_fetcher 20090503195932 200 old_fetcher [{"user":{"followers_count":35,"description":"I am a national level bodybuilder working for Bodywell Nutriti
|
|
128
|
+
# 1 9999999 20090504020126 1 old_fetcher 20090504020126 200 old_fetcher [{"user":{"followers_count":35,"description":"Christian. Bass Player. Singer. Amateur Photographer. News Adv
|
|
129
|
+
# 1 9999999 20090504045337 1 old_fetcher 20090504045337 200 old_fetcher [{"user":{"followers_count":171,"description":"i'm a bboy and a multimedia designer","url":"http:\/\/pitiscm
|
|
130
|
+
# 1 9999999 20090507112755 1 old_fetcher 20090507112755 200 old_fetcher [{"user":{"followers_count":2,"description":"","url":"","profile_image_url":"http:\/\/static.twitter.com
|
|
131
|
+
# 1 9999999 20090509222228 1 old_fetcher 20090509222228 200 old_fetcher [{"user":{"followers_count":256,"description":"representing NJ\/NY","url":"http:\/ /www.myspace.com\/darknes
|
|
132
|
+
# 1 9999999 20090512052820 1 old_fetcher 20090512052820 200 old_fetcher [{"user":{"followers_count":0,"description":null,"url":null,"profile_image_url":"http:\/\/static.twitter.com
|
|
133
|
+
# 1 9999999 20090512101017 1 old_fetcher 20090512101017 200 old_fetcher [{"user":{"followers_count":685,"description":"Gainfully employed doing Linux 'stuff', part-time fitness fan
|
|
134
|
+
# 1 9999999 20090513062843 1 old_fetcher 20090513062843 200 old_fetcher [{"user":{"followers_count":7,"description":"Non-commercial radio for everyone who loves original music and
|
|
135
|
+
#
|
|
136
|
+
# 1 20090427 followers_ids 200 /images\/themes\/theme1\/bg.gif","profile_link_color":"0000ff","time_zone":"Cairo","created_at":"Sun Sep 02 13:44:12 +0000 2007","profile_sidebar_fill_c
|
|
137
|
+
# 1 20090427 followers_ids 200
|
|
138
|
+
# 1 20090427 followers_ids 209 /help.twitter.com\/index.php?pg=kb.page&id=75\">txt<\/a>","created_at":"Sun Feb 08 08:17:30 +0000 2009"},{"user":{"profile_background_image_url":"http:\
|
|
139
|
+
# 1 20090427 followers_ids 20 /twitter_production\/profile_images\/62248324\/086_copy_3_normal.jpg","statuses_count":16,"profile_text_color":"666666","screen_name":"cheekydonkey","pr
|
|
140
|
+
# 1 20090427 followers_ids 20 /images\/themes\/theme1\/bg.gif","created_at":"Fri Apr 18 19:34:26 +0000 2008","profile_text_color":"000000","location":null,"id":14436644,"time_zone":"
|
|
141
|
+
# 1 20090427 followers_ids 242
|
|
142
|
+
# 1 20090427 followers_ids 24479801
|
|
143
|
+
# 1 20090427 followers_ids 2 /images\/default_profile_normal.png","statuses_count":0,"profile_text_color":"000000","screen_name":"kaylazastrow","profile_background_tile":false,"prof
|
|
144
|
+
# 3256 20090427 followers_ids 400
|
|
145
|
+
#
|
|
146
|
+
# 1 0 .subpage #content ol, #side ol { padding-left: 30px; } a{text-decoration:none;color: #0084b4;} #content div.desc { margin: 11px 0px 10px 0px; } a img{border:0;} ul{list
|
|
147
|
+
# 1 0 ":0,"profile_background_color":"9ae4e8","profile_background_image_url":"http:\/\/static.twitter.com 0
|
|
148
|
+
# 1 0 "profile_image_url":"http:\/\/s3.amazonaws.com 0
|
|
149
|
+
# 1 0 ,"favourites_count":0,"profile_background_color":"f8eb8b","profile_image_url":"http:\/\/static.twitter.com 0
|
|
150
|
+
# 1 0 /static.twitter.com\/images\/default_profile_normal.png","notifications":false,"statuses_count":6,"profile_sidebar_border_color":"87bc44","screen_name":"JoeLorah","profile_background_t
|
|
151
|
+
# 1 0 6}
|
|
152
|
+
# 1 0 _background_image_url":"http:\/\/s3.amazonaws.com 0
|
|
153
|
+
# 1 0 _color":"000000","url":null,"name":"Brett Speth","time_zone":null,"protected":false,"profile_link_color":"0000ff","followers_count":0,"profile_sidebar_fill_color":"e0ff92","profile_ima
|
|
154
|
+
#
|
|
155
|
+
# 1 0 .subpage #content ol, #side ol { padding-left: 30px; } a{text-decoration:none;color: #0084b4;} #content div.desc { margin: 11px 0px 10px 0px; } a img{border:0;} ul{list
|
|
156
|
+
# 1 0 ":0,"profile_background_color":"9ae4e8","profile_background_image_url":"http:\/\/static.twitter.com 0
|
|
157
|
+
# 1 0 "profile_image_url":"http:\/\/s3.amazonaws.com 0
|
|
158
|
+
# 1 0 ,"favourites_count":0,"profile_background_color":"f8eb8b","profile_image_url":"http:\/\/static.twitter.com 0
|
|
159
|
+
# 1 0 /static.twitter.com\/images\/default_profile_normal.png","notifications":false,"statuses_count":6,"profile_sidebar_border_color":"87bc44","screen_name":"JoeLorah","profile_background_t
|
|
160
|
+
# 1 0 6}
|
|
161
|
+
# 1 0 _background_image_url":"http:\/\/s3.amazonaws.com 0
|
|
162
|
+
# 1 0 _color":"000000","url":null,"name":"Brett Speth","time_zone":null,"protected":false,"profile_link_color":"0000ff","followers_count":0,"profile_sidebar_fill_color":"e0ff92","profile_ima
|
|
163
|
+
# 572 0 bogus-all_numeric-favorites 200
|
|
164
|
+
# 8 0 bogus-bad_chars-favorites 200
|
|
165
|
+
# 11 0 bogus-bad_chars-followers 200
|
|
166
|
+
# 3 0 bogus-bad_chars-friends 200
|
|
167
|
+
# 1867 0 bogus-missing_id-favorites 200
|
|
168
|
+
# 1 0 eply_to_status_id":null,"source":"web"},"notifications":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
|
|
169
|
+
# 1 0 f.com/friends/ids/17799430.json
|
|
170
|
+
# 1 0 f49.json
|
|
171
|
+
# 1 0 f81852492\/Bread__normal.jpg","status":{"truncated":false,"in_reply_to_status_id":1625610632,"text":"@podcasthelper oh yes yes i do still need help. It is ok to call upon your expertis
|
|
172
|
+
# 1 0 fat":"Tue Aug 12 15:27:32 +0000 2008","friends_count":87,"profile_background_color":"FF6699","location":"Newcastle, UK","id":15823576,"time_zone":"Hawaii","favourites_count":0,"profile
|
|
173
|
+
# 890016 0 favorites 200
|
|
174
|
+
# 1 0 file_image_url":"http:\/\/static.twitter.com 0
|
|
175
|
+
# 1 0 fo:null,"name":"THE_REAL_SHAQ","protected":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
|
|
176
|
+
# 1 0 foll3183,14885034,17824762,25320311,26651936,5520952,16092530,15466712,18414465,20019951,22151420,26332254,7096192,13434972,26275705,27923225,15770739,19900326,15654216,20486512,167358
|
|
177
|
+
# 1 0 folleply_to_screen_name":null,"id":1618358723,"source":"<a href=\"http:\/ 0
|
|
178
|
+
# 1 0 follo54:45 +0000 2009"}]
|
|
179
|
+
# 1 0 follos\/71101463\/LegalTimes_1651_normal.jpg","status":{"truncated":false,"in_reply_to_status_id":null,"text":"The Morning Wrap http: 0
|
|
180
|
+
# 1 0 followeada)","favourites_count":1,"profile_text_color":"666666"},{"description":"Writer, Pick-up Artist, Social Mastermind, and Traveler","profile_background_image_url":"http:\/\/stati
|
|
181
|
+
# 1 0 followekground_tile":false,"description":"Gamer\/Skier\/Drummer 0
|
|
182
|
+
# 1 0 followers,"profile_sidebar_border_color":"87bc44","time_zone":"London","profile_image_url":"http:\/\/s3.amazonaws.com 0
|
|
183
|
+
# 1 0 followers,12836312,18993475,16860914,16142878,18504804,17810432,18661758,17356420,17901504,15535360,19240090,16180026,14614833,18264863,17807744,19459418,19356460,8112832,18637695,1925
|
|
184
|
+
# 1 0 followers.json
|
|
185
|
+
# 1 0 followers_":"need coffee","favorited":false,"in_reply_to_screen_name":null,"created_at":"Mon Apr 27 14:11:52 +0000 2009","truncated":false,"id":1629096949,"in_reply_to_status_id":null,
|
|
186
|
+
# 1 0 followers__Close_normal.JPG","status":{"truncated":false,"in_reply_to_status_id":null,"text":"is finally home and going to bed. Have to get up for work in about 4.5 hours.","in_reply_t
|
|
187
|
+
# 1 0 followers_id 0
|
|
188
|
+
# 1 0 followers_id,"profile_background_color":"9ae4e8","profile_image_url":"http:\/\/s3.amazonaws.com 0
|
|
189
|
+
# 1 0 followers_id13165892,15131310,6970122,13838022,15136098,14590445,15184346,6264392,12650292,16159919,16725668,16816616,15984607,16895930,12228062,15224867,859221,12364022,15316113,15624
|
|
190
|
+
# 1 0 followers_id2009","truncated":false,"id":1625606751,"in_reply_to_status_id":1623299478,"source":"web"},"notifications":false,"time_zone":"Pacific Time (US & Canada)","favourites_count"
|
|
191
|
+
# 1 0 followers_idbackground_tile":false,"followers_count":78,"url":"http:\/\/danfitek.com","screen_name":"fitekker","name":"Dan Fitek","friends_count":100,"profile_background_color":"9ae4e8
|
|
192
|
+
# 149 0 followers_ids
|
|
193
|
+
# 1 0 followers_ids 1
|
|
194
|
+
# 1 0 followers_ids 17975054
|
|
195
|
+
# 1 0 followers_ids 200
|
|
196
|
+
# 1 0 followers_ids 20090412070434
|
|
197
|
+
# 4 0 followers_ids 0
|
|
198
|
+
# 1 0 followers_ids 0
|
|
199
|
+
# 1 0 followers_ids 0
|
|
200
|
+
# 1 0 followers_ids"in_reply_to_screen_name":null,"created_at":"Fri Apr 24 19:22:24 +0000 2009","truncated":false,"id":1606556267,"in_reply_to_status_id":null,"source":"<a href=\"http:\/ 0
|
|
201
|
+
# 1 0 followers_ids00,"profile_link_color":"0000ff","profile_image_url":"http:\/\/static.twitter.com 0
|
|
202
|
+
# 1 0 followers_idsC2EF","location":"San Diego, CA","id":9628922,"time_zone":"Pacific Time (US & Canada)","created_at":"Tue Oct 23 17:30:49 +0000 2007"}]
|
|
203
|
+
# 1 0 followers_idst":"Mon Jan 28 03:48:51 +0000 2008","screen_name":"siolanthe"},{"description":"","profile_background_image_url":"http:\/\/static.twitter.com 0
|
|
204
|
+
# 1 0 followers_ilocation":null,"id":15311449,"time_zone":"Greenland"},{"description":"","profile_background_image_url":"http:\/\/s3.amazonaws.com 0
|
|
205
|
+
# 1 0 followerwing":false,"profile_link_color":"CD0033","url":"http:\/\/foodfeed.us","name":"FoodFeed","notifications":false,"profile_sidebar_fill_color":"fafaf5","followers_count":4399,"pro
|
|
206
|
+
# 1 0 followetp://twitter.com/followers/ids/15737773.json
|
|
207
|
+
# 1 0 fri:5,"profile_sidebar_border_color":"87bc44","url":null,"screen_name":"seniorpoopypant","name":"seniorpoopypant","favourites_count":0,"protected":false,"status":{"truncated":false,"in
|
|
208
|
+
# 1 0 frie:"Pirate LadyZebra. (also know as Zoaea)","utc_offset":-18000,"profile_sidebar_fill_color":"e0ff92","followers_count":19,"favourites_count":0,"profile_image_url":"http:\/\/s3.amazo
|
|
209
|
+
# 1 0 friend219,14213042,29736155,27530456,18755292]
|
|
210
|
+
# 1 0 friends":"web"},"profile_background_image_url":"http:\/\/static.twitter.com 0
|
|
211
|
+
# 1 0 friends\/\/s3.amazonaws.com 0
|
|
212
|
+
# 1 0 friends_3,27039226,29988381,35486899,18900303,16044047]
|
|
213
|
+
# 1 0 friends__count":0,"profile_background_color":"9ae4e8","profile_image_url":"http:\/\/s3.amazonaws.com 0
|
|
214
|
+
# 1 0 friends_i12809262,12767592,13084172,12803292,12775072,12129872,14198789,29866309]
|
|
215
|
+
# 1 0 friends_i1355,"source":"web","created_at":"Fri Jun 27 20:51:55 +0000 2008"},{"truncated":false,"user":{"description":"Recently married! Work for Victory - vc.tv - lovin' life!","utc_o
|
|
216
|
+
# 1 0 friends_id":null,"text":"One Laptop per Child Lands in Indiahttp:\/\/tinyurl.com 0
|
|
217
|
+
# 1 0 friends_id,17213487,20820391,1050851,23817210,15117375,14790735,16069532,14634720,23306376,14470037,24754635,18666525,16798949,17118708,17492127,16563598,22731226,20253928,17139092,240
|
|
218
|
+
# 1 0 friends_idile_background_images\/3476247\/BJMendelson_388_twitbacks.jpg","profile_link_color":"0084B4","location":"Glens Falls, New York","id":12687952,"time_zone":"Indiana (East)","cr
|
|
219
|
+
# 161 0 friends_ids
|
|
220
|
+
# 1 0 friends_ids 18706826
|
|
221
|
+
# 1 0 friends_ids 16624466
|
|
222
|
+
# 1 0 friends_ids 20090427091534
|
|
223
|
+
# 1 0 friends_ids 20090427094351
|
|
224
|
+
# 1 0 friends_ids 0
|
|
225
|
+
# 1 0 friends_ids 0
|
|
226
|
+
# 1 0 friends_ids 0
|
|
227
|
+
# 1 0 friends_ids 0
|
|
228
|
+
# 1 0 friends_ids 0
|
|
229
|
+
# 1 0 friends_ids 0
|
|
230
|
+
# 1 0 friends_ids/\/orangatame.com\/products 0
|
|
231
|
+
# 1 0 friends_idsl_color":"F3F3F3","followers_count":25,"location":"St. Louis","id":14708168,"notifications":false,"friends_count":23,"profile_sidebar_border_color":"DFDFDF"},"text":"@Raptor
|
|
232
|
+
# 1 0 friends_iound_images\/4821472\/Mississippi_River_TypeMap2.jpg","profile_link_color":"1F98C7","location":"San Francisco","id":18257438,"time_zone":"Pacific Time (US & Canada)","created_
|
|
233
|
+
# 1 0 friprofile_background_color":"1A1B1F","protected":false,"profile_image_url":"http:\/\/s3.amazonaws.com 0
|
|
234
|
+
# 1 0 frmusings of a young Catholic in Yorkshire, England","utc_offset":0,"notifications":false,"profile_sidebar_fill_color":"e0ff92","followers_count":89,"profile_image_url":"http:\/\/s3.am
|
|
235
|
+
# 1 0 frollowing":false,"statuses_count":468,"profile_link_color":"2FC2EF","url":"http:\/\/myspace.com 0
|
|
236
|
+
# 1 0 fws.com\/twitter_production\/profile_background_images 0
|
|
237
|
+
# 1 0 hu Mar 05 07:45:42 +0000 2009","id":1282474011,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"C6E2EE","notifications":false,"created_at":"Thu Mar 05 07:36
|
|
238
|
+
# 1 0 ile_image_url":"http:\/\/static.twitter.com 0
|
|
239
|
+
# 1 0 imit exceeded. Clients may not make more than 20000 requests per hour."}
|
|
240
|
+
# 1 0 location":null,"id":22893663,"profile_link_color":"0000ff"}
|
|
241
|
+
# 1 0 nk faudrait demander \u00e0 Michel Bergeron ,on aurait du fun pour 30 minutes","in_reply_to_user_id":21818830,"created_at":"Wed Mar 04 19:18:53 +0000 2009","truncated":false,"id":12798
|
|
242
|
+
# 1 0 oz","profile_background_image_url":"http:\/\/static.twitter.com 0
|
|
243
|
+
# 1 0 s football, beer, and technology! Tweet away!","statuses_count":1444,"utc_offset":-21600,"profile_sidebar_border_color":"87bc44","profile_background_tile":true,"following":false,"prof
|
|
244
|
+
# 24228 0 timeline 200
|
|
245
|
+
# 1 0 u"created_at":"Wed Mar 04 05:45:20 +0000 2009","in_reply_to_user_id":null,"in_reply_to_status_id":null,"truncated":false,"id":1277431048,"source":"<a href=\"http:\/ 0
|
|
246
|
+
# 1 0 u128
|
|
247
|
+
# 1 0 u88,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"F2E195","notifications":false,"created_at":"Wed Mar 04 05:32:57 +0000 2009","profile_background_image_u
|
|
248
|
+
# 1 0 uat":"Thu Mar 05 09:07:06 +0000 2009","id":1282640319,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Thu Mar 0
|
|
249
|
+
# 1 0 ul":null,"name":"kimberly luzier","profile_background_tile":false,"protected":false,"status":{"in_reply_to_user_id":null,"text":"pictures for ebayy","created_at":"Thu Mar 05 17:21:37 +
|
|
250
|
+
# 1 0 us":"http:\/\/s3.amazonaws.com 0
|
|
251
|
+
# 1 0 us":false,"location":null,"id":22740024}
|
|
252
|
+
# 1 0 us0308222713
|
|
253
|
+
# 1 0 use":2,"url":"http:\/\/www.pat-bach.com","name":"Tim Bach","profile_background_tile":false,"protected":false,"status":{"truncated":false,"favorited":false,"text":"Setting up my Twitter
|
|
254
|
+
# 1 0 use,"id":1288256641,"in_reply_to_status_id":null,"source":"web"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Thu Mar 05 08:33:22 +0000 2009","profile_ba
|
|
255
|
+
# 1 0 usefalse,"favorited":false,"text":"Wondering what twitter is all about and if I am missing out!","in_reply_to_user_id":null,"created_at":"Tue Mar 03 13:21:30 +0000 2009","id":127364277
|
|
256
|
+
# 1 0 useme":"magic 93.1 Radio","profile_background_image_url":"http:\/\/s3.amazonaws.com 0
|
|
257
|
+
# 1 0 useprofile_text_color":"000000","description":null,"screen_name":"JohnNMiller","utc_offset":null,"profile_link_color":"0000ff","time_zone":null,"profile_sidebar_fill_color":"e0ff92","f
|
|
258
|
+
#
|
|
259
|
+
# 474 0 user
|
|
260
|
+
# 1 0 user 20104991
|
|
261
|
+
# 2 0 user 1
|
|
262
|
+
# 1 0 user 14686512
|
|
263
|
+
# 80 0 user 200
|
|
264
|
+
# 1 0 user 20090308201437
|
|
265
|
+
# 1 0 user 20090308201710
|
|
266
|
+
# 1 0 user 20090308201901
|
|
267
|
+
# 1 0 user 20090308202228
|
|
268
|
+
# 1 0 user 20090308204043
|
|
269
|
+
# 1 0 user 20090308214100
|
|
270
|
+
# 8 0 user 0
|
|
271
|
+
# 1 0 user 0
|
|
272
|
+
# 1 0 user 0
|
|
273
|
+
# 1 0 user 0
|
|
274
|
+
# 1 0 user 0
|
|
275
|
+
# 1 0 user 0
|
|
276
|
+
# 1 0 user 0
|
|
277
|
+
# 1 0 user 0
|
|
278
|
+
# 1 0 user 0 /lovechelle","name":"nichellemicole","profile_background_tile":false,"protected":true,"profile_sidebar_border_color":"D9B17E","notifications":false,"cre
|
|
279
|
+
# 1 0 user 0 /images\/themes\/theme1\/bg.gif","statuses_count":1,"profile_text_color":"000000","time_zone":null,"url":null,"name":"Ben Pitz","friends_count":10,"prof
|
|
280
|
+
# 1 0 user 0 /help.twitter.com\/index.php?pg=kb.page&id=75\">txt<\/a>"},"notifications":false,"profile_image_url":"http:\/\/static.twitter.com\/images\/default_profi
|
|
281
|
+
# 1 0 user Ocean from my office window.","favorited":false,"created_at":"Wed Feb 25 15:55:00 +0000 2009","in_reply_to_user_id":null,"id":1249633057,"source":"web"},"time_zone":null,"profile_
|
|
282
|
+
# 5 0 user_timeline
|
|
283
|
+
# 1 0 user_timeline 0
|
|
284
|
+
# 1 0 userr.com\/images\/default_profile_normal.png","followers_count":3,"location":null,"id":21311967,"created_at":"Thu Feb 19 16:16:04 +0000 2009","profile_sidebar_border_color":"87bc44","
|
|
285
|
+
# 1 0 usertp:\/\/orangatame.com 0
|
|
286
|
+
# 1 0 usertter_production\/profile_images\/87717775 0
|
|
287
|
+
# 1 0 usm/users/show/0022897534.json?page=1
|
|
288
|
+
# 1 0 usmtwittercom.html\">mobile web<\/a>"},"profile_sidebar_border_color":"87bc44","notifications":false,"created_at":"Tue Sep 02 07:53:34 +0000 2008","profile_background_image_url":"http:
|
|
289
|
+
# 8 20081218 bogus-all_numeric-followers 200
|
|
290
|
+
# 12 20081218 bogus-all_numeric-friends 200
|
|
291
|
+
# :
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
require 'rubygems'
|
|
3
|
+
require 'wukong'
|
|
4
|
+
require 'monkeyshines'
|
|
5
|
+
require 'wuclan/twitter'
|
|
6
|
+
# un-namespace request classes.
|
|
7
|
+
include Wuclan::Twitter::Scrape
|
|
8
|
+
include Wuclan::Twitter::Model
|
|
9
|
+
# if you're anyone but original author this next require is useless but harmless.
|
|
10
|
+
require 'wuclan/twitter/scrape/old_skool_request_classes'
|
|
11
|
+
|
|
12
|
+
#
|
|
13
|
+
# req, id, page, scraped_at, response_code
|
|
14
|
+
#
|
|
15
|
+
REQUEST_MAPPER_COMMAND = "/usr/bin/cut -d\"\t\" -f1,3,4,7,8 "
|
|
16
|
+
|
|
17
|
+
class TwitterRequestUniqer < Wukong::Streamer::UniqByLastReducer
|
|
18
|
+
attr_accessor :response_codes
|
|
19
|
+
def get_key req=nil, id=nil, pg=nil, *args
|
|
20
|
+
[req, id]
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def start! *args
|
|
24
|
+
self.response_codes = { 200=>0,400=>0,401=>0,403=>0,404=>0 }
|
|
25
|
+
super *args
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
require 'json'
|
|
29
|
+
def accumulate *args
|
|
30
|
+
req, id, page, scraped_at, resp = args
|
|
31
|
+
resp = resp.to_i
|
|
32
|
+
return unless scraped_at =~ /\d{14}/
|
|
33
|
+
response_codes[resp] += 1 if response_codes.include?(resp)
|
|
34
|
+
super *args
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def finalize *args
|
|
38
|
+
return if final_value.blank?
|
|
39
|
+
req, id, page, scraped_at, resp = final_value
|
|
40
|
+
id = "%010d"%(id.to_i)
|
|
41
|
+
yield( [id, req, page, scraped_at] + response_codes.values_at(200,400,401,403,404) )
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Make the script go.
|
|
47
|
+
Wukong::Script.new(
|
|
48
|
+
nil, TwitterRequestUniqer,
|
|
49
|
+
:map_command => REQUEST_MAPPER_COMMAND,
|
|
50
|
+
:partition_fields => 2, :sort_fields => 3
|
|
51
|
+
).run
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# 49522
|
|
55
|
+
# 74975395 200
|
|
56
|
+
# 84 302
|
|
57
|
+
# 277786 400
|
|
58
|
+
# 972881 401
|
|
59
|
+
# 94647 403
|
|
60
|
+
# 178105 404
|
|
61
|
+
# 9710 500
|
|
62
|
+
# 23134 502
|
|
63
|
+
# 1588 503
|
|
64
|
+
# 2479 504
|
|
65
|
+
|
|
66
|
+
# Wuclan::Twitter::Scrape::Base.class_eval do class_inheritable_accessor :req_code ; end
|
|
67
|
+
# TwitterUserRequest.class_eval do self.req_code = :tw_user ; end
|
|
68
|
+
# TwitterFollowersRequest.class_eval do self.req_code = :tw_foll ; end
|
|
69
|
+
# TwitterFriendsRequest.class_eval do self.req_code = :tw_frnd ; end
|
|
70
|
+
# TwitterFollowersIdsRequest.class_eval do self.req_code = :tw_foid ; end
|
|
71
|
+
# TwitterFriendsIdsRequest.class_eval do self.req_code = :tw_frid ; end
|
|
72
|
+
# TwitterUserTimelineRequest.class_eval do self.req_code = :tw_ustl ; end
|
|
73
|
+
#
|
|
74
|
+
# REQ_CODES = {
|
|
75
|
+
# 'followers' => :tw_fo, 'twitter_followers_request' => :tw_fo,
|
|
76
|
+
# 'friends' => :tw_fr, 'twitter_friends_request' => :tw_fr,
|
|
77
|
+
# 'followers_ids' => :tw_fi, 'twitter_followers_ids_request' => :tw_fi,
|
|
78
|
+
# 'friends_ids' => :tw_ri, 'twitter_friends_ids_request' => :tw_ri,
|
|
79
|
+
# 'user' => :tw_us, 'twitter_user_request' => :tw_us,
|
|
80
|
+
# 'user_timeline' => :tw_ut, 'twitter_user_timeline_request' => :tw_ut,
|
|
81
|
+
# }
|
|
82
|
+
|
|
83
|
+
# #
|
|
84
|
+
# #
|
|
85
|
+
# #
|
|
86
|
+
# class TwitterRequestParser < Wukong::Streamer::StructStreamer
|
|
87
|
+
#
|
|
88
|
+
# def process request, *args, &block
|
|
89
|
+
# next if request.page.to_i > 1
|
|
90
|
+
# next if request.response_code != '200'
|
|
91
|
+
# req_code = REQ_CODES[request]
|
|
92
|
+
# case request
|
|
93
|
+
# when TwitterUserRequest, TwitterFollowersRequest, TwitterFriendsRequest,
|
|
94
|
+
# TwitterFollowersIdsRequest, TwitterFriendsIdsRequest, TwitterUserTimelineRequest
|
|
95
|
+
# yield [request.twitter_user_id, request.req_code, request.scraped_at]
|
|
96
|
+
# end
|
|
97
|
+
# end
|
|
98
|
+
# end
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
Scrapes = LOAD 'tmp/last_requests_and_codes' AS user_id:int, rsrc:chararray, page:int, datetime:long, r200:int, r400:int, r401:int, r403:int, r404:int ;
|
|
2
|
+
UserScrapes = FILTER Scrapes BY rsrc == 'user' ;
|
|
3
|
+
UserScrapesOrdered = ORDER UserScrapes BY datetime ASC ;
|
|
4
|
+
STORE UserScrapesOrdered INTO 'twmeta/scrape_requests/users_by_staleness-20090730.tsv' ;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
love 65536 3.36366494448618 6698 2686693027 2688691633
|
|
2
|
+
red+sox 65536 0.0113457581992013 1500 2661001994 2688059232
|
|
3
|
+
britney+spears 65536 0.00866753886170806 184 2685103763 2688130850
|
|
4
|
+
hadoop 65536 0.000661831916251315 614 2501794487 2687967783
|
|
5
|
+
infochimps 65536 2.24964286919452e-05 16 2541533220 2683708276
|
|
6
|
+
hapaxlegomenon 65536 0.0 1 2646535741 2646535741
|