rbitter 0.1.2-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +15 -0
  5. data/Gemfile +12 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +72 -0
  8. data/Rakefile +8 -0
  9. data/XMLRPC.md +19 -0
  10. data/bin/rbitter +20 -0
  11. data/lib/rbitter.rb +86 -0
  12. data/lib/rbitter/arcserver.rb +130 -0
  13. data/lib/rbitter/console.rb +93 -0
  14. data/lib/rbitter/default/config_json.rb +37 -0
  15. data/lib/rbitter/dlthread.rb +63 -0
  16. data/lib/rbitter/env.rb +62 -0
  17. data/lib/rbitter/libtwitter_connection_override.rb +46 -0
  18. data/lib/rbitter/records.rb +121 -0
  19. data/lib/rbitter/records_migrate/.keep +0 -0
  20. data/lib/rbitter/records_migrate/20150327_add_index.rb +12 -0
  21. data/lib/rbitter/records_migrate/20150504_add_replyto_column.rb +12 -0
  22. data/lib/rbitter/streaming.rb +105 -0
  23. data/lib/rbitter/version.rb +20 -0
  24. data/lib/rbitter/xmlrpc.rb +4 -0
  25. data/lib/rbitter/xmlrpcd/base.rb +25 -0
  26. data/lib/rbitter/xmlrpcd/rpchandles.rb +12 -0
  27. data/lib/rbitter/xmlrpcd/xmlrpc_auth_server.rb +83 -0
  28. data/lib/rbitter/xmlrpcd/xmlrpcd.rb +69 -0
  29. data/rbitter.gemspec +46 -0
  30. data/spec/config/.keep +0 -0
  31. data/spec/config/default.json +33 -0
  32. data/spec/rbitter/arcserver_spec.rb +30 -0
  33. data/spec/rbitter/console_spec.rb +9 -0
  34. data/spec/rbitter/default/config_json_spec.rb +3 -0
  35. data/spec/rbitter/dlthread_spec.rb +8 -0
  36. data/spec/rbitter/env_spec.rb +62 -0
  37. data/spec/rbitter/libtwitter_connection_override_spec.rb +8 -0
  38. data/spec/rbitter/records_spec.rb +13 -0
  39. data/spec/rbitter/streaming_spec.rb +9 -0
  40. data/spec/rbitter/version_spec.rb +8 -0
  41. data/spec/rbitter/xmlrpc_spec.rb +8 -0
  42. data/spec/rbitter/xmlrpcd/base_spec.rb +29 -0
  43. data/spec/rbitter/xmlrpcd/rpchandles_spec.rb +10 -0
  44. data/spec/rbitter/xmlrpcd/xmlrpc_auth_server_spec.rb +8 -0
  45. data/spec/rbitter/xmlrpcd/xmlrpcd_spec.rb +9 -0
  46. data/spec/rbitter_spec.rb +42 -0
  47. data/spec/sample_data/.keep +0 -0
  48. data/spec/spec_helper.rb +39 -0
  49. metadata +265 -0
@@ -0,0 +1,37 @@
1
+ module Rbitter
2
+ DEFAULT_CONFIG_JSON = <<-ENDOFJSON
3
+ {
4
+ "twitter": {
5
+ "consumer_key": "",
6
+ "consumer_secret": "",
7
+ "access_token": "",
8
+ "access_token_secret": ""
9
+ },
10
+ "activerecord": "sqlite3",
11
+ "sqlite3": {
12
+ "dbfile": "rbitter.sqlite"
13
+ },
14
+ "mysql2": {
15
+ "host": "localhost",
16
+ "port": 3306,
17
+ "dbname": "archive",
18
+ "username": "",
19
+ "password": ""
20
+ },
21
+ "media_downloader": {
22
+ "large_image": true,
23
+ "download_dir": "imgs/"
24
+ },
25
+ "xmlrpc": {
26
+ "enable": true,
27
+ "bind_host": "0.0.0.0",
28
+ "bind_port": 1400,
29
+ "auth": {
30
+ "username": "username",
31
+ "password": "password"
32
+ },
33
+ "handles": ["/path/to/handles"]
34
+ }
35
+ }
36
+ ENDOFJSON
37
+ end
@@ -0,0 +1,63 @@
1
+ # encoding: utf-8
2
+
3
+ require "net/http"
4
+ require "openssl"
5
+
6
+ module Rbitter
7
+ class DLThread
8
+ def initialize(dlfolder, large_flag)
9
+ @dest = dlfolder
10
+ if not File.directory?(dlfolder)
11
+ warn "[dlthread] Given download location is not available for downloading."
12
+ warn "[dlthread] Fallback to current directory."
13
+ @dest = "./"
14
+ end
15
+
16
+ if large_flag.nil?
17
+ @large_image = false
18
+ else
19
+ @large_image = large_flag
20
+ end
21
+
22
+ @pool = Array.new
23
+ end
24
+
25
+ def <<(url_array)
26
+ download_task = Thread.new {
27
+ url_array.each { |url|
28
+ uri = URI.parse(@large_image ? url + ":large" : url)
29
+ ssl = uri.scheme.downcase == 'https'
30
+
31
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => ssl) { |h|
32
+ req = Net::HTTP::Get.new uri.request_uri
33
+ h.request(req) { |res|
34
+ case res
35
+ when Net::HTTPOK
36
+ fname = File.basename(url)
37
+
38
+ puts "[fetch] remote: #{uri.path} => local: #{fname}"
39
+ open(File.join(@dest, fname), "wb") { |file|
40
+ res.read_body { |chunk| file.write(chunk) }
41
+ }
42
+ end
43
+ }
44
+ }
45
+ }
46
+ }
47
+
48
+ @pool.push download_task
49
+ end
50
+
51
+ def job_cleanup
52
+ until @pool.empty?
53
+ dlthrd = @pool.shift
54
+
55
+ if dlthrd.alive?
56
+ puts "[dlthread] Thread forceful cleaning up [remains: #{@pool.length}]"
57
+ dlthrd.terminate
58
+ dlthrd.join
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,62 @@
1
+ # encoding: utf-8
2
+
3
+ require "json"
4
+
5
+ module Rbitter
6
+ @@env = Hash.new
7
+
8
+ class ConfigFileError < StandardError; end
9
+
10
+ def self.[](k)
11
+ @@env[k]
12
+ end
13
+
14
+ module_function
15
+ def env
16
+ @@env
17
+ end
18
+
19
+ def env_reset
20
+ @@env.clear
21
+ end
22
+
23
+ def env_validate?
24
+ # TODO: Add validator
25
+ true
26
+ end
27
+
28
+ def config_initialize json_path=nil
29
+ env_reset
30
+
31
+ unless json_path.nil?
32
+ begin
33
+ open(json_path, 'r') { |file|
34
+ @@env = JSON.parse(file.read)
35
+ }
36
+
37
+ return @@env if env_validate?
38
+ fail StandardError, "Invalid configuration"
39
+ rescue => e
40
+ fail ConfigFileError, "Load Failure (#{json_path}): #{e.to_s}"
41
+ end
42
+ end
43
+
44
+ # Configuration default location
45
+ # 1. (current_dir)/config.json
46
+ # 2. (current_dir)/.rbitter/config.json
47
+ locations = ["config.json", ".rbitter/config.json"]
48
+ locations.collect! { |base| File.join(Dir.pwd, base) }
49
+
50
+ for location in locations
51
+ next unless File.file?(location)
52
+ open(location, 'r') { |file|
53
+ @@env = JSON.parse(file.read)
54
+ }
55
+ break if env_validate?
56
+ end
57
+
58
+ if @@env.empty?
59
+ fail ConfigFileError, "Can not load any configuration in [#{locations.join(', ')}]"
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,46 @@
1
+ # encoding: utf-8
2
+
3
+ require 'http/parser'
4
+ require 'openssl'
5
+ require 'resolv'
6
+
7
+ module Twitter
8
+ module Streaming
9
+ class Connection
10
+ MODIFIED = true
11
+ attr_reader :tcp_socket_class, :ssl_socket_class
12
+
13
+ def initialize(options = {})
14
+ @tcp_socket_class = options.fetch(:tcp_socket_class) { TCPSocket }
15
+ @ssl_socket_class = options.fetch(:ssl_socket_class) { OpenSSL::SSL::SSLSocket }
16
+ end
17
+
18
+ def stream(request, response)
19
+ client_context = OpenSSL::SSL::SSLContext.new
20
+ client = @tcp_socket_class.new(Resolv.getaddress(request.uri.host), request.uri.port)
21
+ ssl_client = @ssl_socket_class.new(client, client_context)
22
+ ssl_client.connect
23
+ request.stream(ssl_client)
24
+
25
+ loop {
26
+ begin
27
+ body = ssl_client.read_nonblock(1024) # rubocop:disable AssignmentInCondition, WhileUntilModifier
28
+ response << body
29
+ rescue IO::WaitReadable
30
+ # The reason for setting 90 seconds as a timeout is documented on:
31
+ # https://dev.twitter.com/streaming/overview/connecting
32
+ r, w, e = IO.select([ssl_client], [], [], 90)
33
+ if r.nil?
34
+ # If timeout occurs
35
+ ssl_client.close
36
+ raise Twitter::Error::ServerError.new("Connection stalled")
37
+ else
38
+ # If socket is readable
39
+ retry
40
+ end
41
+ end
42
+ }
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,121 @@
1
+ # encoding: utf-8
2
+
3
+ require "active_record"
4
+ require "date"
5
+
6
+ module Rbitter
7
+ class Record < ActiveRecord::Base
8
+ end
9
+ end
10
+
11
+ module ARSupport
12
+ SCHEME_VERSION = 20150504
13
+ SCHEME = {
14
+ :marker => :integer, # 0 normal, 1 begin 2 halt
15
+ :marker_msg => :string,
16
+ :userid => :integer,
17
+ :username => :string,
18
+ :tweetid => :integer,
19
+ :replyto => :integer,
20
+ :tweet => :text, # with url unpacked
21
+ :date => :datetime,
22
+ :rt_count => :integer,
23
+ :fav_count => :integer
24
+ }
25
+
26
+ module_function
27
+ def prepared?
28
+ ActiveRecord::Base.connection.table_exists?(:records)
29
+ end
30
+
31
+ def connect_database
32
+ if Rbitter['activerecord'] == 'sqlite3'
33
+ warn "Warning: If you enable XMLRPC access, using sqlite is not recommended."
34
+ warn "Warning: Random crash can happen because of concurrency."
35
+
36
+ if RUBY_PLATFORM == 'java'
37
+ require "jdbc/sqlite3"
38
+ Jdbc::SQLite3.load_driver
39
+ ActiveRecord::Base.establish_connection(
40
+ adapter: 'jdbcsqlite3',
41
+ database: Rbitter['sqlite3']['dbfile'],
42
+ timeout: 10000) # Long timeout for slow computer
43
+ else
44
+ ActiveRecord::Base.establish_connection(
45
+ adapter: 'sqlite3',
46
+ database: Rbitter['sqlite3']['dbfile'],
47
+ timeout: 10000) # Long timeout for slow computer
48
+ end
49
+ elsif Rbitter['activerecord'] == 'mysql2'
50
+ Jdbc::MySQL.load_driver if RUBY_PLATFORM == 'java'
51
+
52
+ ActiveRecord::Base.establish_connection(
53
+ adapter: (RUBY_PLATFORM == 'java' ? 'jdbcmysql' : 'mysql2'),
54
+ host: Rbitter['mysql2']['host'],
55
+ port: Rbitter['mysql2']['port'],
56
+ database: Rbitter['mysql2']['dbname'],
57
+ username: Rbitter['mysql2']['username'],
58
+ password: Rbitter['mysql2']['password'],
59
+ encoding: "utf8mb4",
60
+ collation: "utf8mb4_unicode_ci")
61
+ else
62
+ raise RuntimeException.new("Unknown configuration value. 'activerecord' value should be sqlite3 or mysql2.")
63
+ end
64
+ end
65
+
66
+ def update_database_scheme
67
+ current_version = ActiveRecord::Migrator.current_version
68
+ if current_version < SCHEME_VERSION
69
+ warn "[records] Your ActiveRecord scheme is outdated."
70
+ warn "[records] Migrate... #{current_version} => #{SCHEME_VERSION}"
71
+ ActiveRecord::Migrator.migrate(File.expand_path("../records_migrate", __FILE__), SCHEME_VERSION)
72
+ end
73
+ end
74
+
75
+ def prepare option_string=""
76
+ ActiveRecord::Schema.define(version: SCHEME_VERSION) {
77
+ # MySQL specific option_string:
78
+ # utf8mb4 -> supporting UTF-8 4-byte characters (i.e. Emoji)
79
+ create_table(:records, { :options => option_string }) do |t|
80
+ SCHEME.each_key { |column|
81
+ case SCHEME[column]
82
+ when :string
83
+ t.string column
84
+ when :integer
85
+ t.integer column, :limit => 8
86
+ when :datetime
87
+ t.datetime column
88
+ when :text
89
+ t.text column
90
+ else
91
+ puts "Unexpected column type '#{SCHEME[column]}' of #{column}"
92
+ end
93
+ }
94
+ end
95
+
96
+ add_index :records, :tweetid
97
+ }
98
+ end
99
+
100
+ def any_to_datestring(obj)
101
+ if obj.is_a?(String)
102
+ # try to parse it
103
+ DateTime.parse(obj).strftime("%Y-%m-%d %H:%M:%S")
104
+ elsif obj.is_a?(DateTime) or obj.is_a?(Time)
105
+ obj.strftime("%Y-%m-%d %H:%M:%S")
106
+ else
107
+ raise ArgumentError.new("Can\'t automatically extract DateTime info")
108
+ end
109
+ end
110
+
111
+ def export_to_csv(csvfile)
112
+ open(csvfile, 'w') { |f|
113
+ f.write("marker,marker_msg,userid,username,tweetid,replyto,tweet,date,rt_count,fav_count")
114
+ f.write("\n")
115
+ Rbitter::Record.find_each { |t|
116
+ f.write("#{t.marker},#{t.marker_msg},#{t.userid},#{t.username},#{t.tweetid},")
117
+ f.write("#{t.replyto},#{t.tweet},#{t.date},#{t.rt_count},#{t.fav_count}\n")
118
+ }
119
+ }
120
+ end
121
+ end
File without changes
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class AddIndex < ActiveRecord::Migration
5
+ def up
6
+ add_index :records, :tweetid
7
+ end
8
+
9
+ def change
10
+ up
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class AddReplytoColumn < ActiveRecord::Migration
5
+ def up
6
+ add_column :records, :replyto, :integer, :limit => 8
7
+ end
8
+
9
+ def change
10
+ up
11
+ end
12
+ end
@@ -0,0 +1,105 @@
1
+ # encoding: utf-8
2
+
3
+ require 'twitter'
4
+
5
+ module Rbitter
6
+ class DummyStreamClient
7
+ def initialize(tokens); end
8
+
9
+ def run(&operation_block)
10
+ internal(&operation_block)
11
+ end
12
+
13
+ private
14
+ def internal(&operation_block)
15
+ tweets = [{
16
+ "tweetid" => 1,
17
+ "userid" => 1,
18
+ "replyto" => nil,
19
+ "tweet" => "test",
20
+ "rt_count" => 0,
21
+ "fav_count" => 0,
22
+ "screen_name" => "twitter",
23
+ "date" => "2015-01-01 12:11:10",
24
+ "media_urls" => ["https://pbs.twimg.com/media/CEPWFtgUgAEmbcV.png"],
25
+ "web_urls" => ["https://www.google.com/"]
26
+ }]
27
+
28
+ tweets.each { |tweet|
29
+ yield tweet
30
+ }
31
+ end
32
+ end
33
+
34
+ class StreamClient
35
+ def initialize(tokens)
36
+ @t = Twitter::Streaming::Client.new do |object|
37
+ object.consumer_key = tokens['consumer_key']
38
+ object.consumer_secret = tokens['consumer_secret']
39
+ object.access_token = tokens['access_token']
40
+ object.access_token_secret = tokens['access_token_secret']
41
+ end
42
+ end
43
+
44
+ def run(&operation_block)
45
+ begin
46
+ internal(&operation_block)
47
+ rescue EOFError => e
48
+ puts "Network unreachable. Retry in 3 seconds..."
49
+ sleep 3
50
+ retry
51
+ end
52
+ end
53
+
54
+ private
55
+ def internal(&operation_block)
56
+ @t.user do |tweet|
57
+ if tweet.is_a?(Twitter::Tweet)
58
+ if tweet.retweet?
59
+ tweet = tweet.retweeted_tweet
60
+ end
61
+
62
+ text = tweet.full_text.gsub(/(\r\n|\n)/, '')
63
+
64
+ # unpack uris and media links
65
+ media_urls = Array.new
66
+ web_urls = Array.new
67
+
68
+ if tweet.entities?
69
+ if tweet.media?
70
+ tweet.media.each { |media|
71
+ media_urls.push("#{media.media_uri_https}")
72
+ text.gsub!("#{media.url}", "#{media.display_url}")
73
+ }
74
+ end
75
+
76
+ text += " "
77
+ text += media_urls.join(" ")
78
+
79
+ if tweet.uris?
80
+ tweet.uris.each { |uri|
81
+ web_urls.push("#{uri.expanded_url}")
82
+ text.gsub!("#{uri.url}", "#{uri.expanded_url}")
83
+ }
84
+ end
85
+ end
86
+
87
+ res = {
88
+ "tweetid" => tweet.id,
89
+ "userid" => tweet.user.id,
90
+ "replyto" => tweet.in_reply_to_status_id? ? tweet.in_reply_to_status_id : nil,
91
+ "tweet" => text,
92
+ "rt_count" => tweet.retweet_count,
93
+ "fav_count" => tweet.favorite_count,
94
+ "screen_name" => tweet.user.screen_name,
95
+ "date" => tweet.created_at,
96
+ "media_urls" => media_urls,
97
+ "web_urls" => web_urls
98
+ }
99
+
100
+ yield res
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end