RubyGems - twitter_ebooks - Versions diffs - 2.1.2 → 2.1.3 - Mend

twitter_ebooks 2.1.2 → 2.1.3

Files changed (30) hide show

data/.gitignore +0 -0
data/Gemfile +0 -0
data/Gemfile.lock +0 -0
data/LICENSE +0 -0
data/README.md +1 -1
data/Rakefile +0 -0
data/bin/ebooks +28 -1
data/data/adjectives.txt +0 -0
data/data/nouns.txt +0 -0
data/data/stopwords.txt +0 -0
data/lib/twitter_ebooks/archive.rb +93 -0
data/lib/twitter_ebooks/bot.rb +0 -0
data/lib/twitter_ebooks/markov.rb +0 -0
data/lib/twitter_ebooks/model.rb +0 -0
data/lib/twitter_ebooks/nlp.rb +0 -0
data/lib/twitter_ebooks/suffix.rb +0 -0
data/lib/twitter_ebooks/version.rb +1 -1
data/lib/twitter_ebooks.rb +1 -1
data/script/process_anc_data.rb +0 -0
data/skeleton/.gitignore +0 -0
data/skeleton/Procfile +0 -0
data/skeleton/bots.rb +0 -0
data/skeleton/run.rb +0 -0
data/test/corpus/0xabad1dea.tweets +0 -0
data/test/keywords.rb +0 -0
data/test/tokenize.rb +0 -0
data/twitter_ebooks.gemspec +0 -0
metadata +3 -4
data/NOTES.md +0 -4
data/lib/twitter_ebooks/archiver.rb +0 -82

data/.gitignore CHANGED Viewed

File without changes

data/Gemfile CHANGED Viewed

File without changes

data/Gemfile.lock CHANGED Viewed

File without changes

data/LICENSE CHANGED Viewed

File without changes

data/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# twitter\_ebooks 2.1.2
+# twitter\_ebooks 2.1.3
 Complete rewrite of twitter\_ebooks. Allows context-sensitive responsive bots via the Twitter streaming API, along with higher-quality ngram modeling. Still needs a bit of cleaning and documenting.

data/Rakefile CHANGED Viewed

File without changes

data/bin/ebooks CHANGED Viewed

@@ -60,7 +60,7 @@ module Ebooks
   end
   def self.archive(username, outpath)
-    Archiver.new(username, outpath).fetch_tweets
+    Archive.new(username, outpath).sync
   end
   def self.tweet(modelpath, username)
@@ -73,6 +73,31 @@ module Ebooks
     bot.tweet(statement)
   end
+  def self.jsonify(old_path, new_path)
+    name = File.basename(old_path).split('.')[0]
+    new_path ||= name + ".json"
+    tweets = []
+    id = nil
+    File.read(old_path).split("\n").each do |l|
+      if l.start_with?('# ')
+        id = l.split('# ')[-1]
+      else
+        tweet = { text: l }
+        if id
+          tweet[:id] = id
+          id = nil
+        end
+        tweets << tweet
+      end
+    end
+    File.open(new_path, 'w') do |f|
+      log "Writing #{tweets.length} tweets to #{new_path}"
+      f.write(JSON.pretty_generate(tweets))
+    end
+  end
   def self.command(args)
     usage = """Usage:
      ebooks new <reponame>
@@ -81,6 +106,7 @@ module Ebooks
      ebooks score <model_path> <input>
      ebooks archive <@user> <outpath>
      ebooks tweet <model_path> <@bot>
+     ebooks jsonify <old_corpus_path> [new_corpus_path]
 """
     if args.length == 0
@@ -95,6 +121,7 @@ module Ebooks
     when "score" then score(args[1], args[2..-1].join(' '))
     when "archive" then archive(args[1], args[2])
     when "tweet" then tweet(args[1], args[2])
+    when "jsonify" then jsonify(args[1], args[2])
     end
   end
 end

data/data/adjectives.txt CHANGED Viewed

File without changes

data/data/nouns.txt CHANGED Viewed

File without changes

data/data/stopwords.txt CHANGED Viewed

File without changes

data/lib/twitter_ebooks/archive.rb ADDED Viewed

@@ -0,0 +1,93 @@
+#!/usr/bin/env ruby
+# encoding: utf-8
+require 'twitter'
+require 'json'
+CONFIG_PATH = "/home/#{ENV['USER']}/.ebooksrc"
+module Ebooks
+  class Archive
+    attr_reader :tweets
+    def make_client
+      if File.exists?(CONFIG_PATH)
+        @config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
+      else
+        @config = {}
+        puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
+        print "Consumer key: "
+        @config[:consumer_key] = STDIN.gets.chomp
+        print "Consumer secret: "
+        @config[:consumer_secret] = STDIN.gets.chomp
+        print "Oauth token: "
+        @config[:oauth_token] = STDIN.gets.chomp
+        print "Oauth secret: "
+        @config[:oauth_token_secret] = STDIN.gets.chomp
+        File.open(CONFIG_PATH, 'w') do |f|
+          f.write(JSON.pretty_generate(@config))
+        end
+      end
+      Twitter.configure do |config|
+        config.consumer_key = @config[:consumer_key]
+        config.consumer_secret = @config[:consumer_secret]
+        config.oauth_token = @config[:oauth_token]
+        config.oauth_token_secret = @config[:oauth_token_secret]
+      end
+      Twitter::Client.new
+    end
+    def initialize(username, path, client=nil)
+      @username = username
+      @path = path || "#{username}.json"
+      @client = client || make_client
+      if File.exists?(@path)
+        @tweets = JSON.parse(File.read(@path), symbolize_names: true)
+        log "Currently #{@tweets.length} tweets for #{@username}"
+      else
+        @tweets.nil?
+        log "New archive for @#{username} at #{@path}"
+      end
+    end
+    def sync
+      retries = 0
+      tweets = []
+      max_id = nil
+      opts = {
+        count: 200,
+        #include_rts: false,
+        trim_user: true
+      }
+      opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
+      loop do
+        opts[:max_id] = max_id unless max_id.nil?
+        new = @client.user_timeline(@username, opts)
+        break if new.length <= 1
+        tweets += new
+        puts "Received #{tweets.length} new tweets"
+        max_id = new.last.id
+      end
+      if tweets.length == 0
+        log "No new tweets"
+      else
+        @tweets ||= []
+        @tweets = tweets.map(&:attrs).each { |tw|
+          tw.delete(:entities)
+        } + @tweets
+        File.open(@path, 'w') do |f|
+          f.write(JSON.pretty_generate(@tweets))
+        end
+      end
+    end
+  end
+end

data/lib/twitter_ebooks/bot.rb CHANGED Viewed

File without changes

data/lib/twitter_ebooks/markov.rb CHANGED Viewed

File without changes

data/lib/twitter_ebooks/model.rb CHANGED Viewed

File without changes

data/lib/twitter_ebooks/nlp.rb CHANGED Viewed

File without changes

data/lib/twitter_ebooks/suffix.rb CHANGED Viewed

File without changes

data/lib/twitter_ebooks/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Ebooks
-  VERSION = "2.1.2"
+  VERSION = "2.1.3"
 end

data/lib/twitter_ebooks.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module Ebooks
 end
 require 'twitter_ebooks/nlp'
-require 'twitter_ebooks/archiver'
+require 'twitter_ebooks/archive'
 require 'twitter_ebooks/markov'
 require 'twitter_ebooks/suffix'
 require 'twitter_ebooks/model'

data/script/process_anc_data.rb CHANGED Viewed

File without changes

data/skeleton/.gitignore CHANGED Viewed

File without changes

data/skeleton/Procfile CHANGED Viewed

File without changes

data/skeleton/bots.rb CHANGED Viewed

File without changes

data/skeleton/run.rb CHANGED Viewed

File without changes

data/test/corpus/0xabad1dea.tweets CHANGED Viewed

File without changes

data/test/keywords.rb CHANGED Viewed

File without changes

data/test/tokenize.rb CHANGED Viewed

File without changes

data/twitter_ebooks.gemspec CHANGED Viewed

File without changes

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: twitter_ebooks
 version: !ruby/object:Gem::Version
-  version: 2.1.2
+  version: 2.1.3
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-11-21 00:00:00.000000000 Z
+date: 2013-11-24 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: minitest
@@ -167,7 +167,6 @@ files:
 - Gemfile
 - Gemfile.lock
 - LICENSE
-- NOTES.md
 - README.md
 - Rakefile
 - bin/ebooks
@@ -175,7 +174,7 @@ files:
 - data/nouns.txt
 - data/stopwords.txt
 - lib/twitter_ebooks.rb
-- lib/twitter_ebooks/archiver.rb
+- lib/twitter_ebooks/archive.rb
 - lib/twitter_ebooks/bot.rb
 - lib/twitter_ebooks/markov.rb
 - lib/twitter_ebooks/model.rb

data/NOTES.md DELETED Viewed

@@ -1,4 +0,0 @@
-- Files in text/ are preprocessed by `rake consume` and serialized
-- e.g. text/foo.tweets becomes consumed/foo.corpus
-- `rake consume` looks at hashes to know which it needs to update
-- Preprocessed corpus files are loaded at runtime by Corpus.load('foo')

data/lib/twitter_ebooks/archiver.rb DELETED Viewed

@@ -1,82 +0,0 @@
-#!/usr/bin/env ruby
-# encoding: utf-8
-require 'twitter'
-module Ebooks
-  class Archiver
-    def initialize(username, outpath)
-      @username = username
-      @outpath = outpath
-      @client = Twitter::Client.new
-    end
-    # Read exiting corpus into memory.
-    # Return list of tweet lines and the last tweet id.
-    def read_corpus
-      lines = []
-      since_id = nil
-      if File.exists?(@outpath)
-        lines = File.read(@outpath).split("\n")
-        if lines[0].start_with?('#')
-          since_id = lines[0].split('# ').last
-        end
-      end
-      [lines, since_id]
-    end
-    # Retrieve all available tweets for a given user since the last tweet id
-    def tweets_since(since_id)
-      page = 1
-      retries = 0
-      tweets = []
-      max_id = nil
-      opts = {
-        count: 200,
-        include_rts: false,
-        trim_user: true
-      }
-      opts[:since_id] = since_id unless since_id.nil?
-      loop do
-        opts[:max_id] = max_id unless max_id.nil?
-        new = @client.user_timeline(@username, opts)
-        break if new.length <= 1
-        puts "Received #{new.length} tweets"
-        tweets += new
-        max_id = new.last.id
-        break
-      end
-      tweets
-    end
-    def fetch_tweets
-      lines, since_id = read_corpus
-      if since_id.nil?
-        puts "Retrieving tweets from @#{@username}"
-      else
-        puts "Retrieving tweets from @#{@username} since #{since_id}"
-      end
-      tweets = tweets_since(since_id)
-      if tweets.length == 0
-        puts "No new tweets"
-        return
-      end
-      new_lines = tweets.map { |tweet| tweet.text.gsub("\n", " ") }
-      new_since_id = tweets[0].id.to_s
-      lines = ["# " + new_since_id] + new_lines + lines
-      corpus = File.open(@outpath, 'w')
-      corpus.write(lines.join("\n"))
-      corpus.close
-    end
-  end
-end