RubyGems - twitter_to_csv - Versions diffs - 0.0.5 → 0.1.0 - Mend

twitter_to_csv 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/.gitignore +0 -2
data/.rvmrc +1 -1
data/Gemfile +1 -3
data/README.markdown +34 -4
data/bin/twitter_to_csv +60 -11
data/lib/twitter_to_csv/afinn/AFINN-111.txt +2478 -0
data/lib/twitter_to_csv/afinn/AFINN-96.txt +1480 -0
data/lib/twitter_to_csv/afinn/AFINN-README.txt +43 -0
data/lib/twitter_to_csv/csv_builder.rb +137 -17
data/lib/twitter_to_csv/twitter_watcher.rb +24 -16
data/lib/twitter_to_csv/version.rb +1 -1
data/spec/csv_builder_spec.rb +253 -9
data/twitter_to_csv.gemspec +4 -3
metadata +53 -13

data/lib/twitter_to_csv/afinn/AFINN-README.txt ADDED Viewed

@@ -0,0 +1,43 @@
+AFINN is a list of English words rated for valence with an integer
+between minus five (negative) and plus five (positive). The words have
+been manually labeled by Finn Årup Nielsen in 2009-2011. The file
+is tab-separated. There are two versions:
+AFINN-111: Newest version with 2477 words and phrases.
+AFINN-96: 1468 unique words and phrases on 1480 lines. Note that there
+are 1480 lines, as some words are listed twice. The word list in not
+entirely in alphabetic ordering.
+An evaluation of the word list is available in:
+Finn Årup Nielsen, "A new ANEW: Evaluation of a word list for
+sentiment analysis in microblogs", http://arxiv.org/abs/1103.2903
+The list was used in:
+Lars Kai Hansen, Adam Arvidsson, Finn Årup Nielsen, Elanor Colleoni,
+Michael Etter, "Good Friends, Bad News - Affect and Virality in
+Twitter", The 2011 International Workshop on Social Computing,
+Network, and Services (SocialComNet 2011).
+This database of words is copyright protected and distributed under
+"Open Database License (ODbL) v1.0"
+http://www.opendatacommons.org/licenses/odbl/1.0/ or a similar
+copyleft license.
+See comments on the word list here:
+http://fnielsen.posterous.com/old-anew-a-sentiment-about-sentiment-analysis
+In Python the file may be read into a dictionary with:
+>>> afinn = dict(map(lambda (k,v): (k,int(v)),
+                     [ line.split('\t') for line in open("AFINN-111.txt") ]))
+>>> afinn["Good".lower()]
+3
+>>> sum(map(lambda word: afinn.get(word, 0), "Rainy day but still in a good mood".lower().split()))
+2

data/lib/twitter_to_csv/csv_builder.rb CHANGED Viewed

@@ -1,17 +1,18 @@
 # encoding: UTF-8
 require 'pp'
+require 'elif'
+require 'time'
 module TwitterToCsv
   class CsvBuilder
     attr_accessor :options, :sampled_fields
-    # http://daringfireball.net/2010/07/improved_regex_for_matching_urls
-    URL_REGEX = %r"\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s\(\)<>]+|\((?:[^\s\(\)<>]+|(?:\([^\s\(\)<>]+\)))*\))+(?:\((?:[^\s\(\)<>]+|(?:\([^\s\(\)<>]+\)))*\)|[^\s\`\!\(\)\[\]\{\};:'\".,<>\?«»“”‘’]))"i
     def initialize(options = {})
       @options = options
       @sampled_fields = {}
       @num_samples = 0
+      @retweet_counts = {}
+      @retweet_hour_counts = {}
     end
     def run(&block)
@@ -35,22 +36,79 @@ module TwitterToCsv
       end
     end
+    def within_time_window?(status)
+      if options[:start] || options[:end]
+        created_at = status['created_at'].is_a?(Time) ? status['created_at'] : Time.parse(status['created_at'])
+        return false if options[:start] && created_at < options[:start]
+        return false if options[:end] && created_at >= options[:end]
+      end
+      true
+    end
+    def display_rolledup_status?(status)
+      created_at = status['created_at'].is_a?(Time) ? status['created_at'] : Time.parse(status['created_at'])
+      @newest_status_at = created_at if @newest_status_at.nil?
+      if status['retweeted_status'] && status['retweeted_status']['id']
+        # This is a retweet.
+        original_created_at = status['retweeted_status']['created_at'].is_a?(Time) ? status['retweeted_status']['created_at'] : Time.parse(status['retweeted_status']['created_at'])
+        if !options[:retweet_window] || created_at <= original_created_at + options[:retweet_window] * 60 * 60 * 24
+          @retweet_counts[status['retweeted_status']['id']] ||= 0
+          @retweet_counts[status['retweeted_status']['id']] = status['retweeted_status']['retweet_count'] if status['retweeted_status']['retweet_count'] > @retweet_counts[status['retweeted_status']['id']]
+          if options[:retweet_counts_at]
+            @retweet_hour_counts[status['retweeted_status']['id']] ||= options[:retweet_counts_at].map { 0 }
+            options[:retweet_counts_at].each.with_index do |hour_mark, index|
+              if created_at <= original_created_at + hour_mark * 60 * 60 && status['retweeted_status']['retweet_count'] > @retweet_hour_counts[status['retweeted_status']['id']][index]
+                @retweet_hour_counts[status['retweeted_status']['id']][index] = status['retweeted_status']['retweet_count']
+              end
+            end
+          end
+        end
+        false
+      else
+        # This is an original status.
+        if (@retweet_counts[status['id']] || 0) >= (options[:retweet_threshold] || 0)
+          if !options[:retweet_window] || created_at <= @newest_status_at - options[:retweet_window] * 60 * 60 * 24
+            status['retweet_count'] = @retweet_counts[status['id']] if @retweet_counts[status['id']] && @retweet_counts[status['id']] > status['retweet_count']
+            status['_retweet_hour_counts'] = @retweet_hour_counts.delete(status['id']) if options[:retweet_counts_at]
+            true
+          else
+            false
+          end
+        else
+          false
+        end
+      end
+    end
     def handle_status(status, &block)
-      if (options[:require_english] && is_english?(status)) || !options[:require_english]
-        log_json(status) if options[:json]
-        log_csv(status) if options[:csv]
-        yield_status(status, &block) if block
-        sample_fields(status) if options[:sample_fields]
-        STDERR.puts "Logging: #{status['text']}" if options[:verbose]
+      if within_time_window?(status)
+        if (options[:require_english] && is_english?(status)) || !options[:require_english]
+          if options[:retweet_mode] != :rollup || display_rolledup_status?(status)
+            log_json(status) if options[:json]
+            log_csv(status) if options[:csv]
+            yield_status(status, &block) if block
+            sample_fields(status) if options[:sample_fields]
+            analyze_gaps(status, options[:analyze_gaps]) if options[:analyze_gaps]
+            STDERR.puts "Logging: #{status['text']}" if options[:verbose]
+          end
+        end
       end
     end
     def log_csv_header
       header_labels = options[:fields].dup
-      if options[:url_columns] && options[:url_columns] > 0
-        options[:url_columns].times { |i| header_labels << "url_#{i+1}" }
-      end
+      header_labels += ["average_sentiment", "sentiment_words"] if options[:compute_sentiment]
+      header_labels << "word_count" if options[:compute_word_count]
+      options[:retweet_counts_at].each { |hours| header_labels << "retweets_at_#{hours}_hours" } if options[:retweet_counts_at]
+      options[:url_columns].times { |i| header_labels << "url_#{i+1}" } if options[:url_columns] && options[:url_columns] > 0
+      options[:hashtag_columns].times { |i| header_labels << "hash_tag_#{i+1}" } if options[:hashtag_columns] && options[:url_columns] > 0
+      options[:user_mention_columns].times { |i| header_labels << "user_mention_#{i+1}" } if options[:user_mention_columns] && options[:user_mention_columns] > 0
       options[:csv].puts header_labels.to_csv(:encoding => 'UTF-8', :force_quotes => true)
     end
@@ -70,22 +128,84 @@ module TwitterToCsv
         }.to_s
       end
+      row += compute_sentiment(status["text"]) if options[:compute_sentiment]
+      row << status["text"].split(/\s+/).length if options[:compute_word_count]
+      row += status["_retweet_hour_counts"] if options[:retweet_counts_at]
       if options[:url_columns] && options[:url_columns] > 0
-        urls = status['text'].scan(URL_REGEX).flatten.compact
+        urls = (status["entities"] && (status["entities"]["urls"] || []).map {|i| i["expanded_url"] || i["url"] }) || []
         options[:url_columns].times { |i| row << urls[i].to_s }
       end
+      if options[:hashtag_columns] && options[:hashtag_columns] > 0
+        hashes = (status["entities"] && (status["entities"]["hashtags"] || []).map {|i| i["text"] }) || []
+        options[:hashtag_columns].times { |i| row << hashes[i].to_s }
+      end
+      if options[:user_mention_columns] && options[:user_mention_columns] > 0
+        users = (status["entities"] && (status["entities"]["user_mentions"] || []).map {|i| i["screen_name"] }) || []
+        options[:user_mention_columns].times { |i| row << users[i].to_s }
+      end
       row
     end
+    def afinn
+      @afinn_cache ||= begin
+        words_or_phrases = []
+        File.read(File.expand_path(File.join(File.dirname(__FILE__), "afinn", "AFINN-111.txt"))).each_line do |line|
+          word_or_phrase, valence = line.split(/\t/)
+          pattern = Regexp::escape word_or_phrase.gsub(/-/, " ").gsub(/'/, '')
+          words_or_phrases << [/\b#{pattern}\b/i, pattern.length, valence.to_f]
+        end
+        words_or_phrases.sort {|b, a| a[1] <=> b[1] }
+      end
+    end
+    def compute_sentiment(original_text)
+      text = original_text.downcase.gsub(/'/, '').gsub(/[^a-z0-9]/, ' ').gsub(/\s+/, ' ').strip
+      count = 0
+      valence_sum = 0
+      afinn.each do |pattern, length, valence|
+        while text =~ pattern
+          text.sub! pattern, ''
+          valence_sum += valence
+          count += 1
+        end
+      end
+      if count > 0
+        [valence_sum / count.to_f, count]
+      else
+        [0, 0]
+      end
+    end
     def replay_from(filename, &block)
-      File.open(filename, "r") do |file|
-        until file.eof?
-          line = file.readline
+      # If a retweet mode is being used, we read the file backwards using the Elif gem.
+      opener = options[:retweet_mode] ? Elif : File
+      opener.open(filename, "r") do |file|
+        file.each do |line|
           next if line =~ /\A------SEP.RATOR------\Z/i
           handle_status JSON.parse(line), &block
         end
       end
+      puts "Last status seen at #{@last_status_seen_at}." if options[:analyze_gaps] && @last_status_seen_at
+    end
+    def analyze_gaps(status, min_gap_size_in_minutes)
+      time = Time.parse(status['created_at'])
+      if !@last_status_seen_at
+        puts "First status seen at #{time}."
+      else
+        gap_length = (time - @last_status_seen_at) / 60
+        if gap_length > min_gap_size_in_minutes
+          puts "Gap of #{gap_length.to_i} minutes from #{@last_status_seen_at} to #{time}."
+        end
+      end
+      @last_status_seen_at = time
     end
     def sample_fields(status)
@@ -142,4 +262,4 @@ module TwitterToCsv
       true
     end
   end
-end
+end

data/lib/twitter_to_csv/twitter_watcher.rb CHANGED Viewed

@@ -17,25 +17,33 @@ module TwitterToCsv
     end
     def run(&block)
-      EventMachine::run do
-        stream = Twitter::JSONStream.connect(
-          :path    => "/1/statuses/#{(filter && filter.length > 0) ? 'filter' : 'sample'}.json#{"?track=#{filter.join(",")}" if filter && filter.length > 0}",
-          :auth    => "#{username}:#{password}",
-          :ssl     => true
-        )
-        stream.each_item do |item|
-          handle_status JSON.parse(item), block
-        end
+      while true
+        EventMachine::run do
+          stream = Twitter::JSONStream.connect(
+            :path    => "/1/statuses/#{(filter && filter.length > 0) ? 'filter' : 'sample'}.json#{"?track=#{filter.join(",")}" if filter && filter.length > 0}",
+            :auth    => "#{username}:#{password}",
+            :ssl     => true
+          )
-        stream.on_error do |message|
-          STDERR.puts " --> Twitter error: #{message} <--"
-        end
+          stream.each_item do |item|
+            handle_status JSON.parse(item), block
+          end
+          stream.on_error do |message|
+            STDERR.puts " --> Twitter error: #{message} <--"
+          end
+          stream.on_no_data do |message|
+            STDERR.puts " --> Got no data for awhile; trying to reconnect."
+            EventMachine::stop_event_loop
+          end
-        stream.on_max_reconnects do |timeout, retries|
-          STDERR.puts " --> Oops, tried too many times! <--"
-          EventMachine::stop_event_loop
+          stream.on_max_reconnects do |timeout, retries|
+            STDERR.puts " --> Oops, tried too many times! <--"
+            EventMachine::stop_event_loop
+          end
         end
+        puts " --> Reconnecting..."
       end
     end

data/lib/twitter_to_csv/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module TwitterToCsv
-  VERSION = "0.0.5"
+  VERSION = "0.1.0"
 end

data/spec/csv_builder_spec.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # encoding: utf-8
 require 'spec_helper'
+require 'time'
 describe TwitterToCsv::CsvBuilder do
   describe "#handle_status" do
@@ -15,6 +16,23 @@ describe TwitterToCsv::CsvBuilder do
         string_io.rewind
         string_io.read.should == "\"This is English\"\n\"This is still English\"\n"
       end
+      it "honors start_time and end_time" do
+        string_io = StringIO.new
+        csv_builder = TwitterToCsv::CsvBuilder.new(:csv => string_io, :fields => %w[text],
+                                                   :start => Time.parse("Mon Mar 07 07:42:22 +0000 2011"),
+                                                   :end   => Time.parse("Mon Mar 08 02:00:00 +0000 2011"))
+        # Order shouldn't matter
+        csv_builder.handle_status('text' => "1", 'created_at' => 'Mon Mar 07 07:41:22 +0000 2011')
+        csv_builder.handle_status('text' => "6", 'created_at' => 'Mon Mar 08 02:01:00 +0000 2011')
+        csv_builder.handle_status('text' => "2", 'created_at' => 'Mon Mar 07 07:42:22 +0000 2011')
+        csv_builder.handle_status('text' => "4", 'created_at' => 'Mon Mar 08 01:41:22 +0000 2011')
+        csv_builder.handle_status('text' => "5", 'created_at' => 'Mon Mar 08 02:00:00 +0000 2011')
+        csv_builder.handle_status('text' => "3", 'created_at' => 'Mon Mar 07 10:00:00 +0000 2011')
+        string_io.rewind
+        string_io.read.should == "\"2\"\n\"4\"\n\"3\"\n"
+      end
     end
     describe "log_csv_header" do
@@ -33,10 +51,23 @@ describe TwitterToCsv::CsvBuilder do
         string_io.rewind
         string_io.read.should == '"something","url_1","url_2"' + "\n"
       end
+      it "includes columns for the retweet_counts_at entries, if present" do
+        string_io = StringIO.new
+        csv_builder = TwitterToCsv::CsvBuilder.new(:csv => string_io,
+                                                   :fields => %w[something],
+                                                   :retweet_mode => :rollup,
+                                                   :retweet_threshold => 1,
+                                                   :retweet_window => 4,
+                                                   :retweet_counts_at => [0.5, 24, 48])
+        csv_builder.log_csv_header
+        string_io.rewind
+        string_io.read.should == '"something","retweets_at_0.5_hours","retweets_at_24_hours","retweets_at_48_hours"' + "\n"
+      end
     end
     describe "logging to a CSV" do
-      it "outputs the requested fields when requested in dot-notation" do
+      it "outputs the requested fields when specified in dot-notation" do
         string_io = StringIO.new
         csv_builder = TwitterToCsv::CsvBuilder.new(:csv => string_io, :fields => %w[something something_else.a something_else.c.d])
         csv_builder.handle_status({
@@ -54,20 +85,233 @@ describe TwitterToCsv::CsvBuilder do
         string_io.read.should == "\"hello\",\"b\",\"foo\"\n"
       end
-      it "can extract URLs" do
+      it "can extract URLs, hashtags, and user mentions" do
         string_io = StringIO.new
-        csv_builder = TwitterToCsv::CsvBuilder.new(:csv => string_io, :fields => %w[something], :url_columns => 2)
+        csv_builder = TwitterToCsv::CsvBuilder.new(:csv => string_io, :fields => %w[something], :url_columns => 2, :hashtag_columns => 2, :user_mention_columns => 1)
         csv_builder.handle_status({
-            'something' => "hello",
-            'text' => 'this is http://a.com/url and http://a.com/nother'
+            'something' => "hello1",
+            "entities" => {
+                "hashtags" => [
+                    { "text" => "AHashTag" },
+                    { "text" => "AnotherHashTag" },
+                    { "text" => "AThirdHashTag" }
+                ],
+                "user_mentions" => [
+                    { "screen_name" => "ScreenNameOne" },
+                    { "screen_name" => "ScreenNameTwo" },
+                    { "screen_name" => "ScreenNameThree" }
+                ],
+                "urls" => [
+                    { "url" => "http://t.co/1231" },
+                    { "url" => "http://t.co/1232", "expanded_url" => "http://a.real.url2" },
+                    { "url" => "http://t.co/1233", "expanded_url" => "http://a.real.url3" }
+                ]
+            },
+            'text' => 'some text'
         })
         csv_builder.handle_status({
-            'something' => "hello",
-            'text' => 'this is http://a.com/url/again'
+            'something' => "hello2",
+            "entities" => {
+                "hashtags" => [],
+                "user_mentions" => [],
+                "urls" => []
+            },
+            'text' => 'this is another status'
+        })
+        string_io.rewind
+        string_io.read.should == "\"hello1\",\"http://t.co/1231\",\"http://a.real.url2\",\"AHashTag\",\"AnotherHashTag\",\"ScreenNameOne\"\n" +
+                                 "\"hello2\",\"\",\"\",\"\",\"\",\"\"\n"
+      end
+      it "can compute the average sentiment" do
+        string_io = StringIO.new
+        csv_builder = TwitterToCsv::CsvBuilder.new(:csv => string_io, :fields => %w[something], :compute_sentiment => true)
+        csv_builder.handle_status({
+            'something' => "hello1",
+            'text' => 'i love cheese'
+        })
+        csv_builder.handle_status({
+            'something' => "hello2",
+            'text' => 'i love cheese and like bread'
+        })
+        csv_builder.handle_status({
+            'something' => "hello3",
+            'text' => 'some   kind of once-in-a-lifetime cool-fest in the right   direction or the right-direction or the son_of a bitch' # it tries both hyphenated and non-hyphenated, and does phrases
         })
         string_io.rewind
-        string_io.read.should == "\"hello\",\"http://a.com/url\",\"http://a.com/nother\"\n" +
-                                 "\"hello\",\"http://a.com/url/again\",\"\"\n"
+        string_io.read.should == "\"hello1\",\"3.0\",\"1\"\n" +
+                                 "\"hello2\",\"#{(3 + 2) / 2.0}\",\"2\"\n" +
+                                 "\"hello3\",\"#{(0 + 3 + 1 + 3 + 3 + -5) / 6.0}\",\"6\"\n"
+      end
+      it "can compute word count" do
+        string_io = StringIO.new
+        csv_builder = TwitterToCsv::CsvBuilder.new(:csv => string_io, :fields => %w[something], :compute_word_count => true)
+        csv_builder.handle_status({
+            'something' => "hello1",
+            'text' => 'i love cheese'
+        })
+        csv_builder.handle_status({
+            'something' => "hello2",
+            'text' => 'foo_bar baz9bing'
+        })
+        string_io.rewind
+        string_io.read.should == "\"hello1\",\"3\"\n" +
+                                 "\"hello2\",\"2\"\n"
+      end
+    end
+    describe "retweet handling" do
+      def play_data(builder)
+        days = 60 * 60 * 24
+        now = Time.now
+        builder.handle_status({
+            'created_at' => now,
+            'retweeted_status' => {
+                'id' => 3,
+                'created_at' => now - 1 * days,
+                'retweet_count' => 1
+            },
+            'text' => 'RT not enough time has passed'
+        })
+        builder.handle_status({
+            'id' => 3,
+            'created_at' => now - 1 * days,
+            'text' => 'not enough time has passed',
+            'retweet_count' => 0
+        })
+        builder.handle_status({
+            'created_at' => now - 1 * days,
+            'retweeted_status' => {
+                'id' => 2,
+                'created_at' => now - 4 * days,
+                'retweet_count' => 3
+            },
+            'text' => 'RT 2 retweets'
+        })
+        builder.handle_status({
+            'created_at' => now - 2 * days,
+            'retweeted_status' => {
+                'id' => 4,
+                'created_at' => now - 5 * days,
+                'retweet_count' => 1
+            },
+            'text' => 'RT 1 retweet'
+        })
+        builder.handle_status({
+            'created_at' => now - 3 * days,
+            'retweeted_status' => {
+                'id' => 2,
+                'created_at' => now - 4 * days,
+                'retweet_count' => 2
+            },
+            'text' => 'RT 2 retweets'
+        })
+        builder.handle_status({
+            'created_at' => now - 3.99 * days,
+            'retweeted_status' => {
+                'id' => 2,
+                'created_at' => now - 4 * days,
+                'retweet_count' => 1
+            },
+            'text' => 'RT 2 retweets'
+        })
+        builder.handle_status({
+            'id' => 2,
+            'created_at' => now - 4 * days,
+            'text' => '2 retweets',
+            'retweet_count' => 0
+        })
+        builder.handle_status({
+            'id' => 4,
+            'created_at' => now - 5 * days,
+            'text' => '1 retweet',
+            'retweet_count' => 0
+        })
+        builder.handle_status({
+            'id' => 5,
+            'created_at' => now - 5.1 * days,
+            'text' => 'no retweets',
+            'retweet_count' => 0
+        })
+      end
+      it "skips statuses with fewer than :retweet_threshold retweets and ignores statues that haven't been seen for retweet_window yet" do
+        string_io = StringIO.new
+        builder = TwitterToCsv::CsvBuilder.new(:retweet_mode => :rollup,
+                                               :retweet_threshold => 2,
+                                               :retweet_window => 2,
+                                               :csv => string_io,
+                                               :fields => %w[id retweet_count])
+        play_data builder
+        string_io.rewind
+        string_io.read.should == "\"2\",\"2\"\n"
+        string_io = StringIO.new
+        builder = TwitterToCsv::CsvBuilder.new(:retweet_mode => :rollup,
+                                               :retweet_threshold => 1,
+                                               :retweet_window => 3,
+                                               :csv => string_io,
+                                               :fields => %w[id retweet_count])
+        play_data builder
+        string_io.rewind
+        string_io.read.should == "\"2\",\"3\"\n" + "\"4\",\"1\"\n"
+        string_io = StringIO.new
+        builder = TwitterToCsv::CsvBuilder.new(:retweet_mode => :rollup,
+                                               :retweet_threshold => 1,
+                                               :retweet_window => 20,
+                                               :csv => string_io,
+                                               :fields => %w[id retweet_count])
+        play_data builder
+        string_io.rewind
+        string_io.read.should == ""
+        string_io = StringIO.new
+        builder = TwitterToCsv::CsvBuilder.new(:retweet_mode => :rollup,
+                                               :retweet_threshold => 1,
+                                               :retweet_window => nil,
+                                               :csv => string_io,
+                                               :fields => %w[id retweet_count])
+        play_data builder
+        string_io.rewind
+        string_io.read.should == "\"3\",\"1\"\n\"2\",\"3\"\n\"4\",\"1\"\n"
+        string_io = StringIO.new
+        builder = TwitterToCsv::CsvBuilder.new(:retweet_mode => :rollup,
+                                               :retweet_threshold => 0,
+                                               :retweet_window => nil,
+                                               :csv => string_io,
+                                               :fields => %w[id retweet_count])
+        play_data builder
+        string_io.rewind
+        string_io.read.should == "\"3\",\"1\"\n\"2\",\"3\"\n\"4\",\"1\"\n\"5\",\"0\"\n"
+      end
+      it "logs at the hourly marks requested in retweet_counts_at" do
+        string_io = StringIO.new
+        builder = TwitterToCsv::CsvBuilder.new(:retweet_mode => :rollup,
+                                               :retweet_threshold => 1,
+                                               :retweet_window => 4,
+                                               :retweet_counts_at => [0.5, 23, 24, 48, 73, 1000],
+                                               :csv => string_io,
+                                               :fields => %w[id retweet_count])
+        play_data builder
+        string_io.rewind
+        string_io.read.should == "\"2\",\"3\",\"1\",\"1\",\"2\",\"2\",\"3\",\"3\"\n" +
+                                 "\"4\",\"1\",\"0\",\"0\",\"0\",\"0\",\"1\",\"1\"\n"
       end
     end
   end