twitter_ebooks 2.1.2 → 2.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +0 -0
- data/Gemfile +0 -0
- data/Gemfile.lock +0 -0
- data/LICENSE +0 -0
- data/README.md +1 -1
- data/Rakefile +0 -0
- data/bin/ebooks +28 -1
- data/data/adjectives.txt +0 -0
- data/data/nouns.txt +0 -0
- data/data/stopwords.txt +0 -0
- data/lib/twitter_ebooks/archive.rb +93 -0
- data/lib/twitter_ebooks/bot.rb +0 -0
- data/lib/twitter_ebooks/markov.rb +0 -0
- data/lib/twitter_ebooks/model.rb +0 -0
- data/lib/twitter_ebooks/nlp.rb +0 -0
- data/lib/twitter_ebooks/suffix.rb +0 -0
- data/lib/twitter_ebooks/version.rb +1 -1
- data/lib/twitter_ebooks.rb +1 -1
- data/script/process_anc_data.rb +0 -0
- data/skeleton/.gitignore +0 -0
- data/skeleton/Procfile +0 -0
- data/skeleton/bots.rb +0 -0
- data/skeleton/run.rb +0 -0
- data/test/corpus/0xabad1dea.tweets +0 -0
- data/test/keywords.rb +0 -0
- data/test/tokenize.rb +0 -0
- data/twitter_ebooks.gemspec +0 -0
- metadata +3 -4
- data/NOTES.md +0 -4
- data/lib/twitter_ebooks/archiver.rb +0 -82
    
        data/.gitignore
    CHANGED
    
    | 
            File without changes
         | 
    
        data/Gemfile
    CHANGED
    
    | 
            File without changes
         | 
    
        data/Gemfile.lock
    CHANGED
    
    | 
            File without changes
         | 
    
        data/LICENSE
    CHANGED
    
    | 
            File without changes
         | 
    
        data/README.md
    CHANGED
    
    
    
        data/Rakefile
    CHANGED
    
    | 
            File without changes
         | 
    
        data/bin/ebooks
    CHANGED
    
    | @@ -60,7 +60,7 @@ module Ebooks | |
| 60 60 | 
             
              end
         | 
| 61 61 |  | 
| 62 62 | 
             
              def self.archive(username, outpath)
         | 
| 63 | 
            -
                 | 
| 63 | 
            +
                Archive.new(username, outpath).sync
         | 
| 64 64 | 
             
              end
         | 
| 65 65 |  | 
| 66 66 | 
             
              def self.tweet(modelpath, username)
         | 
| @@ -73,6 +73,31 @@ module Ebooks | |
| 73 73 | 
             
                bot.tweet(statement)
         | 
| 74 74 | 
             
              end
         | 
| 75 75 |  | 
| 76 | 
            +
              def self.jsonify(old_path, new_path)
         | 
| 77 | 
            +
                name = File.basename(old_path).split('.')[0]
         | 
| 78 | 
            +
                new_path ||= name + ".json"
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                tweets = []
         | 
| 81 | 
            +
                id = nil
         | 
| 82 | 
            +
                File.read(old_path).split("\n").each do |l|
         | 
| 83 | 
            +
                  if l.start_with?('# ')
         | 
| 84 | 
            +
                    id = l.split('# ')[-1]
         | 
| 85 | 
            +
                  else
         | 
| 86 | 
            +
                    tweet = { text: l }
         | 
| 87 | 
            +
                    if id
         | 
| 88 | 
            +
                      tweet[:id] = id
         | 
| 89 | 
            +
                      id = nil
         | 
| 90 | 
            +
                    end
         | 
| 91 | 
            +
                    tweets << tweet
         | 
| 92 | 
            +
                  end
         | 
| 93 | 
            +
                end
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                File.open(new_path, 'w') do |f|
         | 
| 96 | 
            +
                  log "Writing #{tweets.length} tweets to #{new_path}"
         | 
| 97 | 
            +
                  f.write(JSON.pretty_generate(tweets))
         | 
| 98 | 
            +
                end
         | 
| 99 | 
            +
              end
         | 
| 100 | 
            +
             | 
| 76 101 | 
             
              def self.command(args)
         | 
| 77 102 | 
             
                usage = """Usage: 
         | 
| 78 103 | 
             
                 ebooks new <reponame>
         | 
| @@ -81,6 +106,7 @@ module Ebooks | |
| 81 106 | 
             
                 ebooks score <model_path> <input>
         | 
| 82 107 | 
             
                 ebooks archive <@user> <outpath>
         | 
| 83 108 | 
             
                 ebooks tweet <model_path> <@bot>
         | 
| 109 | 
            +
                 ebooks jsonify <old_corpus_path> [new_corpus_path]
         | 
| 84 110 | 
             
            """
         | 
| 85 111 |  | 
| 86 112 | 
             
                if args.length == 0
         | 
| @@ -95,6 +121,7 @@ module Ebooks | |
| 95 121 | 
             
                when "score" then score(args[1], args[2..-1].join(' '))
         | 
| 96 122 | 
             
                when "archive" then archive(args[1], args[2])
         | 
| 97 123 | 
             
                when "tweet" then tweet(args[1], args[2])
         | 
| 124 | 
            +
                when "jsonify" then jsonify(args[1], args[2])
         | 
| 98 125 | 
             
                end
         | 
| 99 126 | 
             
              end
         | 
| 100 127 | 
             
            end
         | 
    
        data/data/adjectives.txt
    CHANGED
    
    | 
            File without changes
         | 
    
        data/data/nouns.txt
    CHANGED
    
    | 
            File without changes
         | 
    
        data/data/stopwords.txt
    CHANGED
    
    | 
            File without changes
         | 
| @@ -0,0 +1,93 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            # encoding: utf-8
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'twitter'
         | 
| 5 | 
            +
            require 'json'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            CONFIG_PATH = "/home/#{ENV['USER']}/.ebooksrc"
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            module Ebooks
         | 
| 10 | 
            +
              class Archive
         | 
| 11 | 
            +
                attr_reader :tweets
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def make_client
         | 
| 14 | 
            +
                  if File.exists?(CONFIG_PATH)
         | 
| 15 | 
            +
                    @config = JSON.parse(File.read(CONFIG_PATH), symbolize_names: true)
         | 
| 16 | 
            +
                  else
         | 
| 17 | 
            +
                    @config = {}
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                    puts "As Twitter no longer allows anonymous API access, you'll need to enter the auth details of any account to use for archiving. These will be stored in #{CONFIG_PATH} if you need to change them later."
         | 
| 20 | 
            +
                    print "Consumer key: "
         | 
| 21 | 
            +
                    @config[:consumer_key] = STDIN.gets.chomp
         | 
| 22 | 
            +
                    print "Consumer secret: "
         | 
| 23 | 
            +
                    @config[:consumer_secret] = STDIN.gets.chomp
         | 
| 24 | 
            +
                    print "Oauth token: "
         | 
| 25 | 
            +
                    @config[:oauth_token] = STDIN.gets.chomp
         | 
| 26 | 
            +
                    print "Oauth secret: "
         | 
| 27 | 
            +
                    @config[:oauth_token_secret] = STDIN.gets.chomp
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    File.open(CONFIG_PATH, 'w') do |f|
         | 
| 30 | 
            +
                      f.write(JSON.pretty_generate(@config))
         | 
| 31 | 
            +
                    end
         | 
| 32 | 
            +
                  end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                  Twitter.configure do |config|
         | 
| 35 | 
            +
                    config.consumer_key = @config[:consumer_key]
         | 
| 36 | 
            +
                    config.consumer_secret = @config[:consumer_secret]
         | 
| 37 | 
            +
                    config.oauth_token = @config[:oauth_token]
         | 
| 38 | 
            +
                    config.oauth_token_secret = @config[:oauth_token_secret]
         | 
| 39 | 
            +
                  end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                  Twitter::Client.new
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def initialize(username, path, client=nil)
         | 
| 45 | 
            +
                  @username = username
         | 
| 46 | 
            +
                  @path = path || "#{username}.json"
         | 
| 47 | 
            +
                  @client = client || make_client
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                  if File.exists?(@path)
         | 
| 50 | 
            +
                    @tweets = JSON.parse(File.read(@path), symbolize_names: true)
         | 
| 51 | 
            +
                    log "Currently #{@tweets.length} tweets for #{@username}"
         | 
| 52 | 
            +
                  else
         | 
| 53 | 
            +
                    @tweets.nil?
         | 
| 54 | 
            +
                    log "New archive for @#{username} at #{@path}"
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
                end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                def sync
         | 
| 59 | 
            +
                  retries = 0
         | 
| 60 | 
            +
                  tweets = []
         | 
| 61 | 
            +
                  max_id = nil
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                  opts = {
         | 
| 64 | 
            +
                    count: 200,
         | 
| 65 | 
            +
                    #include_rts: false,
         | 
| 66 | 
            +
                    trim_user: true
         | 
| 67 | 
            +
                  }
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                  opts[:since_id] = @tweets[0][:id] unless @tweets.nil?
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                  loop do
         | 
| 72 | 
            +
                    opts[:max_id] = max_id unless max_id.nil?
         | 
| 73 | 
            +
                    new = @client.user_timeline(@username, opts)
         | 
| 74 | 
            +
                    break if new.length <= 1
         | 
| 75 | 
            +
                    tweets += new
         | 
| 76 | 
            +
                    puts "Received #{tweets.length} new tweets"
         | 
| 77 | 
            +
                    max_id = new.last.id
         | 
| 78 | 
            +
                  end
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                  if tweets.length == 0
         | 
| 81 | 
            +
                    log "No new tweets"
         | 
| 82 | 
            +
                  else
         | 
| 83 | 
            +
                    @tweets ||= []
         | 
| 84 | 
            +
                    @tweets = tweets.map(&:attrs).each { |tw|
         | 
| 85 | 
            +
                      tw.delete(:entities)
         | 
| 86 | 
            +
                    } + @tweets
         | 
| 87 | 
            +
                    File.open(@path, 'w') do |f|
         | 
| 88 | 
            +
                      f.write(JSON.pretty_generate(@tweets))
         | 
| 89 | 
            +
                    end
         | 
| 90 | 
            +
                  end
         | 
| 91 | 
            +
                end
         | 
| 92 | 
            +
              end
         | 
| 93 | 
            +
            end
         | 
    
        data/lib/twitter_ebooks/bot.rb
    CHANGED
    
    | 
            File without changes
         | 
| 
            File without changes
         | 
    
        data/lib/twitter_ebooks/model.rb
    CHANGED
    
    | 
            File without changes
         | 
    
        data/lib/twitter_ebooks/nlp.rb
    CHANGED
    
    | 
            File without changes
         | 
| 
            File without changes
         | 
    
        data/lib/twitter_ebooks.rb
    CHANGED
    
    
    
        data/script/process_anc_data.rb
    CHANGED
    
    | 
            File without changes
         | 
    
        data/skeleton/.gitignore
    CHANGED
    
    | 
            File without changes
         | 
    
        data/skeleton/Procfile
    CHANGED
    
    | 
            File without changes
         | 
    
        data/skeleton/bots.rb
    CHANGED
    
    | 
            File without changes
         | 
    
        data/skeleton/run.rb
    CHANGED
    
    | 
            File without changes
         | 
| 
            File without changes
         | 
    
        data/test/keywords.rb
    CHANGED
    
    | 
            File without changes
         | 
    
        data/test/tokenize.rb
    CHANGED
    
    | 
            File without changes
         | 
    
        data/twitter_ebooks.gemspec
    CHANGED
    
    | 
            File without changes
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: twitter_ebooks
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 2.1. | 
| 4 | 
            +
              version: 2.1.3
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2013-11- | 
| 12 | 
            +
            date: 2013-11-24 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: minitest
         | 
| @@ -167,7 +167,6 @@ files: | |
| 167 167 | 
             
            - Gemfile
         | 
| 168 168 | 
             
            - Gemfile.lock
         | 
| 169 169 | 
             
            - LICENSE
         | 
| 170 | 
            -
            - NOTES.md
         | 
| 171 170 | 
             
            - README.md
         | 
| 172 171 | 
             
            - Rakefile
         | 
| 173 172 | 
             
            - bin/ebooks
         | 
| @@ -175,7 +174,7 @@ files: | |
| 175 174 | 
             
            - data/nouns.txt
         | 
| 176 175 | 
             
            - data/stopwords.txt
         | 
| 177 176 | 
             
            - lib/twitter_ebooks.rb
         | 
| 178 | 
            -
            - lib/twitter_ebooks/ | 
| 177 | 
            +
            - lib/twitter_ebooks/archive.rb
         | 
| 179 178 | 
             
            - lib/twitter_ebooks/bot.rb
         | 
| 180 179 | 
             
            - lib/twitter_ebooks/markov.rb
         | 
| 181 180 | 
             
            - lib/twitter_ebooks/model.rb
         | 
    
        data/NOTES.md
    DELETED
    
    
| @@ -1,82 +0,0 @@ | |
| 1 | 
            -
            #!/usr/bin/env ruby
         | 
| 2 | 
            -
            # encoding: utf-8
         | 
| 3 | 
            -
             | 
| 4 | 
            -
            require 'twitter'
         | 
| 5 | 
            -
             | 
| 6 | 
            -
            module Ebooks
         | 
| 7 | 
            -
              class Archiver
         | 
| 8 | 
            -
                def initialize(username, outpath)
         | 
| 9 | 
            -
                  @username = username
         | 
| 10 | 
            -
                  @outpath = outpath
         | 
| 11 | 
            -
                  @client = Twitter::Client.new
         | 
| 12 | 
            -
                end
         | 
| 13 | 
            -
             | 
| 14 | 
            -
                # Read exiting corpus into memory.
         | 
| 15 | 
            -
                # Return list of tweet lines and the last tweet id.
         | 
| 16 | 
            -
                def read_corpus
         | 
| 17 | 
            -
                  lines = []
         | 
| 18 | 
            -
                  since_id = nil
         | 
| 19 | 
            -
             | 
| 20 | 
            -
                  if File.exists?(@outpath)
         | 
| 21 | 
            -
                    lines = File.read(@outpath).split("\n")
         | 
| 22 | 
            -
                    if lines[0].start_with?('#')
         | 
| 23 | 
            -
                      since_id = lines[0].split('# ').last
         | 
| 24 | 
            -
                    end
         | 
| 25 | 
            -
                  end
         | 
| 26 | 
            -
             | 
| 27 | 
            -
                  [lines, since_id]
         | 
| 28 | 
            -
                end
         | 
| 29 | 
            -
             | 
| 30 | 
            -
                # Retrieve all available tweets for a given user since the last tweet id
         | 
| 31 | 
            -
                def tweets_since(since_id)
         | 
| 32 | 
            -
                  page = 1
         | 
| 33 | 
            -
                  retries = 0
         | 
| 34 | 
            -
                  tweets = []
         | 
| 35 | 
            -
                  max_id = nil
         | 
| 36 | 
            -
             | 
| 37 | 
            -
                  opts = {
         | 
| 38 | 
            -
                    count: 200,
         | 
| 39 | 
            -
                    include_rts: false,
         | 
| 40 | 
            -
                    trim_user: true
         | 
| 41 | 
            -
                  }
         | 
| 42 | 
            -
             | 
| 43 | 
            -
                  opts[:since_id] = since_id unless since_id.nil?
         | 
| 44 | 
            -
             | 
| 45 | 
            -
                  loop do
         | 
| 46 | 
            -
                    opts[:max_id] = max_id unless max_id.nil?
         | 
| 47 | 
            -
                    new = @client.user_timeline(@username, opts)
         | 
| 48 | 
            -
                    break if new.length <= 1
         | 
| 49 | 
            -
                    puts "Received #{new.length} tweets"
         | 
| 50 | 
            -
                    tweets += new
         | 
| 51 | 
            -
                    max_id = new.last.id
         | 
| 52 | 
            -
                    break
         | 
| 53 | 
            -
                  end
         | 
| 54 | 
            -
             | 
| 55 | 
            -
                  tweets
         | 
| 56 | 
            -
                end
         | 
| 57 | 
            -
             | 
| 58 | 
            -
                def fetch_tweets
         | 
| 59 | 
            -
                  lines, since_id = read_corpus
         | 
| 60 | 
            -
             | 
| 61 | 
            -
                  if since_id.nil?
         | 
| 62 | 
            -
                    puts "Retrieving tweets from @#{@username}"
         | 
| 63 | 
            -
                  else
         | 
| 64 | 
            -
                    puts "Retrieving tweets from @#{@username} since #{since_id}"
         | 
| 65 | 
            -
                  end
         | 
| 66 | 
            -
             | 
| 67 | 
            -
                  tweets = tweets_since(since_id)
         | 
| 68 | 
            -
             | 
| 69 | 
            -
                  if tweets.length == 0
         | 
| 70 | 
            -
                    puts "No new tweets"
         | 
| 71 | 
            -
                    return
         | 
| 72 | 
            -
                  end
         | 
| 73 | 
            -
             | 
| 74 | 
            -
                  new_lines = tweets.map { |tweet| tweet.text.gsub("\n", " ") }
         | 
| 75 | 
            -
                  new_since_id = tweets[0].id.to_s
         | 
| 76 | 
            -
                  lines = ["# " + new_since_id] + new_lines + lines
         | 
| 77 | 
            -
                  corpus = File.open(@outpath, 'w')
         | 
| 78 | 
            -
                  corpus.write(lines.join("\n"))
         | 
| 79 | 
            -
                  corpus.close
         | 
| 80 | 
            -
                end
         | 
| 81 | 
            -
              end
         | 
| 82 | 
            -
            end
         |