rbitter 0.1.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +21 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +15 -0
  5. data/Gemfile +12 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +72 -0
  8. data/Rakefile +8 -0
  9. data/XMLRPC.md +19 -0
  10. data/bin/rbitter +20 -0
  11. data/lib/rbitter.rb +86 -0
  12. data/lib/rbitter/arcserver.rb +130 -0
  13. data/lib/rbitter/console.rb +93 -0
  14. data/lib/rbitter/default/config_json.rb +37 -0
  15. data/lib/rbitter/dlthread.rb +63 -0
  16. data/lib/rbitter/env.rb +62 -0
  17. data/lib/rbitter/libtwitter_connection_override.rb +46 -0
  18. data/lib/rbitter/records.rb +121 -0
  19. data/lib/rbitter/records_migrate/.keep +0 -0
  20. data/lib/rbitter/records_migrate/20150327_add_index.rb +12 -0
  21. data/lib/rbitter/records_migrate/20150504_add_replyto_column.rb +12 -0
  22. data/lib/rbitter/streaming.rb +105 -0
  23. data/lib/rbitter/version.rb +20 -0
  24. data/lib/rbitter/xmlrpc.rb +4 -0
  25. data/lib/rbitter/xmlrpcd/base.rb +25 -0
  26. data/lib/rbitter/xmlrpcd/rpchandles.rb +12 -0
  27. data/lib/rbitter/xmlrpcd/xmlrpc_auth_server.rb +83 -0
  28. data/lib/rbitter/xmlrpcd/xmlrpcd.rb +69 -0
  29. data/rbitter.gemspec +46 -0
  30. data/spec/config/.keep +0 -0
  31. data/spec/config/default.json +33 -0
  32. data/spec/rbitter/arcserver_spec.rb +30 -0
  33. data/spec/rbitter/console_spec.rb +9 -0
  34. data/spec/rbitter/default/config_json_spec.rb +3 -0
  35. data/spec/rbitter/dlthread_spec.rb +8 -0
  36. data/spec/rbitter/env_spec.rb +62 -0
  37. data/spec/rbitter/libtwitter_connection_override_spec.rb +8 -0
  38. data/spec/rbitter/records_spec.rb +13 -0
  39. data/spec/rbitter/streaming_spec.rb +9 -0
  40. data/spec/rbitter/version_spec.rb +8 -0
  41. data/spec/rbitter/xmlrpc_spec.rb +8 -0
  42. data/spec/rbitter/xmlrpcd/base_spec.rb +29 -0
  43. data/spec/rbitter/xmlrpcd/rpchandles_spec.rb +10 -0
  44. data/spec/rbitter/xmlrpcd/xmlrpc_auth_server_spec.rb +8 -0
  45. data/spec/rbitter/xmlrpcd/xmlrpcd_spec.rb +9 -0
  46. data/spec/rbitter_spec.rb +42 -0
  47. data/spec/sample_data/.keep +0 -0
  48. data/spec/spec_helper.rb +39 -0
  49. metadata +265 -0
@@ -0,0 +1,37 @@
1
+ module Rbitter
2
+ DEFAULT_CONFIG_JSON = <<-ENDOFJSON
3
+ {
4
+ "twitter": {
5
+ "consumer_key": "",
6
+ "consumer_secret": "",
7
+ "access_token": "",
8
+ "access_token_secret": ""
9
+ },
10
+ "activerecord": "sqlite3",
11
+ "sqlite3": {
12
+ "dbfile": "rbitter.sqlite"
13
+ },
14
+ "mysql2": {
15
+ "host": "localhost",
16
+ "port": 3306,
17
+ "dbname": "archive",
18
+ "username": "",
19
+ "password": ""
20
+ },
21
+ "media_downloader": {
22
+ "large_image": true,
23
+ "download_dir": "imgs/"
24
+ },
25
+ "xmlrpc": {
26
+ "enable": true,
27
+ "bind_host": "0.0.0.0",
28
+ "bind_port": 1400,
29
+ "auth": {
30
+ "username": "username",
31
+ "password": "password"
32
+ },
33
+ "handles": ["/path/to/handles"]
34
+ }
35
+ }
36
+ ENDOFJSON
37
+ end
@@ -0,0 +1,63 @@
1
+ # encoding: utf-8
2
+
3
+ require "net/http"
4
+ require "openssl"
5
+
6
+ module Rbitter
7
+ class DLThread
8
+ def initialize(dlfolder, large_flag)
9
+ @dest = dlfolder
10
+ if not File.directory?(dlfolder)
11
+ warn "[dlthread] Given download location is not available for downloading."
12
+ warn "[dlthread] Fallback to current directory."
13
+ @dest = "./"
14
+ end
15
+
16
+ if large_flag.nil?
17
+ @large_image = false
18
+ else
19
+ @large_image = large_flag
20
+ end
21
+
22
+ @pool = Array.new
23
+ end
24
+
25
+ def <<(url_array)
26
+ download_task = Thread.new {
27
+ url_array.each { |url|
28
+ uri = URI.parse(@large_image ? url + ":large" : url)
29
+ ssl = uri.scheme.downcase == 'https'
30
+
31
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => ssl) { |h|
32
+ req = Net::HTTP::Get.new uri.request_uri
33
+ h.request(req) { |res|
34
+ case res
35
+ when Net::HTTPOK
36
+ fname = File.basename(url)
37
+
38
+ puts "[fetch] remote: #{uri.path} => local: #{fname}"
39
+ open(File.join(@dest, fname), "wb") { |file|
40
+ res.read_body { |chunk| file.write(chunk) }
41
+ }
42
+ end
43
+ }
44
+ }
45
+ }
46
+ }
47
+
48
+ @pool.push download_task
49
+ end
50
+
51
+ def job_cleanup
52
+ until @pool.empty?
53
+ dlthrd = @pool.shift
54
+
55
+ if dlthrd.alive?
56
+ puts "[dlthread] Thread forceful cleaning up [remains: #{@pool.length}]"
57
+ dlthrd.terminate
58
+ dlthrd.join
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,62 @@
1
+ # encoding: utf-8
2
+
3
+ require "json"
4
+
5
+ module Rbitter
6
+ @@env = Hash.new
7
+
8
+ class ConfigFileError < StandardError; end
9
+
10
+ def self.[](k)
11
+ @@env[k]
12
+ end
13
+
14
+ module_function
15
+ def env
16
+ @@env
17
+ end
18
+
19
+ def env_reset
20
+ @@env.clear
21
+ end
22
+
23
+ def env_validate?
24
+ # TODO: Add validator
25
+ true
26
+ end
27
+
28
+ def config_initialize json_path=nil
29
+ env_reset
30
+
31
+ unless json_path.nil?
32
+ begin
33
+ open(json_path, 'r') { |file|
34
+ @@env = JSON.parse(file.read)
35
+ }
36
+
37
+ return @@env if env_validate?
38
+ fail StandardError, "Invalid configuration"
39
+ rescue => e
40
+ fail ConfigFileError, "Load Failure (#{json_path}): #{e.to_s}"
41
+ end
42
+ end
43
+
44
+ # Configuration default location
45
+ # 1. (current_dir)/config.json
46
+ # 2. (current_dir)/.rbitter/config.json
47
+ locations = ["config.json", ".rbitter/config.json"]
48
+ locations.collect! { |base| File.join(Dir.pwd, base) }
49
+
50
+ for location in locations
51
+ next unless File.file?(location)
52
+ open(location, 'r') { |file|
53
+ @@env = JSON.parse(file.read)
54
+ }
55
+ break if env_validate?
56
+ end
57
+
58
+ if @@env.empty?
59
+ fail ConfigFileError, "Can not load any configuration in [#{locations.join(', ')}]"
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,46 @@
1
+ # encoding: utf-8
2
+
3
+ require 'http/parser'
4
+ require 'openssl'
5
+ require 'resolv'
6
+
7
+ module Twitter
8
+ module Streaming
9
+ class Connection
10
+ MODIFIED = true
11
+ attr_reader :tcp_socket_class, :ssl_socket_class
12
+
13
+ def initialize(options = {})
14
+ @tcp_socket_class = options.fetch(:tcp_socket_class) { TCPSocket }
15
+ @ssl_socket_class = options.fetch(:ssl_socket_class) { OpenSSL::SSL::SSLSocket }
16
+ end
17
+
18
+ def stream(request, response)
19
+ client_context = OpenSSL::SSL::SSLContext.new
20
+ client = @tcp_socket_class.new(Resolv.getaddress(request.uri.host), request.uri.port)
21
+ ssl_client = @ssl_socket_class.new(client, client_context)
22
+ ssl_client.connect
23
+ request.stream(ssl_client)
24
+
25
+ loop {
26
+ begin
27
+ body = ssl_client.read_nonblock(1024) # rubocop:disable AssignmentInCondition, WhileUntilModifier
28
+ response << body
29
+ rescue IO::WaitReadable
30
+ # The reason for setting 90 seconds as a timeout is documented on:
31
+ # https://dev.twitter.com/streaming/overview/connecting
32
+ r, w, e = IO.select([ssl_client], [], [], 90)
33
+ if r.nil?
34
+ # If timeout occurs
35
+ ssl_client.close
36
+ raise Twitter::Error::ServerError.new("Connection stalled")
37
+ else
38
+ # If socket is readable
39
+ retry
40
+ end
41
+ end
42
+ }
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,121 @@
1
+ # encoding: utf-8
2
+
3
+ require "active_record"
4
+ require "date"
5
+
6
+ module Rbitter
7
+ class Record < ActiveRecord::Base
8
+ end
9
+ end
10
+
11
+ module ARSupport
12
+ SCHEME_VERSION = 20150504
13
+ SCHEME = {
14
+ :marker => :integer, # 0 normal, 1 begin 2 halt
15
+ :marker_msg => :string,
16
+ :userid => :integer,
17
+ :username => :string,
18
+ :tweetid => :integer,
19
+ :replyto => :integer,
20
+ :tweet => :text, # with url unpacked
21
+ :date => :datetime,
22
+ :rt_count => :integer,
23
+ :fav_count => :integer
24
+ }
25
+
26
+ module_function
27
+ def prepared?
28
+ ActiveRecord::Base.connection.table_exists?(:records)
29
+ end
30
+
31
+ def connect_database
32
+ if Rbitter['activerecord'] == 'sqlite3'
33
+ warn "Warning: If you enable XMLRPC access, using sqlite is not recommended."
34
+ warn "Warning: Random crash can happen because of concurrency."
35
+
36
+ if RUBY_PLATFORM == 'java'
37
+ require "jdbc/sqlite3"
38
+ Jdbc::SQLite3.load_driver
39
+ ActiveRecord::Base.establish_connection(
40
+ adapter: 'jdbcsqlite3',
41
+ database: Rbitter['sqlite3']['dbfile'],
42
+ timeout: 10000) # Long timeout for slow computer
43
+ else
44
+ ActiveRecord::Base.establish_connection(
45
+ adapter: 'sqlite3',
46
+ database: Rbitter['sqlite3']['dbfile'],
47
+ timeout: 10000) # Long timeout for slow computer
48
+ end
49
+ elsif Rbitter['activerecord'] == 'mysql2'
50
+ Jdbc::MySQL.load_driver if RUBY_PLATFORM == 'java'
51
+
52
+ ActiveRecord::Base.establish_connection(
53
+ adapter: (RUBY_PLATFORM == 'java' ? 'jdbcmysql' : 'mysql2'),
54
+ host: Rbitter['mysql2']['host'],
55
+ port: Rbitter['mysql2']['port'],
56
+ database: Rbitter['mysql2']['dbname'],
57
+ username: Rbitter['mysql2']['username'],
58
+ password: Rbitter['mysql2']['password'],
59
+ encoding: "utf8mb4",
60
+ collation: "utf8mb4_unicode_ci")
61
+ else
62
+ raise RuntimeException.new("Unknown configuration value. 'activerecord' value should be sqlite3 or mysql2.")
63
+ end
64
+ end
65
+
66
+ def update_database_scheme
67
+ current_version = ActiveRecord::Migrator.current_version
68
+ if current_version < SCHEME_VERSION
69
+ warn "[records] Your ActiveRecord scheme is outdated."
70
+ warn "[records] Migrate... #{current_version} => #{SCHEME_VERSION}"
71
+ ActiveRecord::Migrator.migrate(File.expand_path("../records_migrate", __FILE__), SCHEME_VERSION)
72
+ end
73
+ end
74
+
75
+ def prepare option_string=""
76
+ ActiveRecord::Schema.define(version: SCHEME_VERSION) {
77
+ # MySQL specific option_string:
78
+ # utf8mb4 -> supporting UTF-8 4-byte characters (i.e. Emoji)
79
+ create_table(:records, { :options => option_string }) do |t|
80
+ SCHEME.each_key { |column|
81
+ case SCHEME[column]
82
+ when :string
83
+ t.string column
84
+ when :integer
85
+ t.integer column, :limit => 8
86
+ when :datetime
87
+ t.datetime column
88
+ when :text
89
+ t.text column
90
+ else
91
+ puts "Unexpected column type '#{SCHEME[column]}' of #{column}"
92
+ end
93
+ }
94
+ end
95
+
96
+ add_index :records, :tweetid
97
+ }
98
+ end
99
+
100
+ def any_to_datestring(obj)
101
+ if obj.is_a?(String)
102
+ # try to parse it
103
+ DateTime.parse(obj).strftime("%Y-%m-%d %H:%M:%S")
104
+ elsif obj.is_a?(DateTime) or obj.is_a?(Time)
105
+ obj.strftime("%Y-%m-%d %H:%M:%S")
106
+ else
107
+ raise ArgumentError.new("Can\'t automatically extract DateTime info")
108
+ end
109
+ end
110
+
111
+ def export_to_csv(csvfile)
112
+ open(csvfile, 'w') { |f|
113
+ f.write("marker,marker_msg,userid,username,tweetid,replyto,tweet,date,rt_count,fav_count")
114
+ f.write("\n")
115
+ Rbitter::Record.find_each { |t|
116
+ f.write("#{t.marker},#{t.marker_msg},#{t.userid},#{t.username},#{t.tweetid},")
117
+ f.write("#{t.replyto},#{t.tweet},#{t.date},#{t.rt_count},#{t.fav_count}\n")
118
+ }
119
+ }
120
+ end
121
+ end
File without changes
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class AddIndex < ActiveRecord::Migration
5
+ def up
6
+ add_index :records, :tweetid
7
+ end
8
+
9
+ def change
10
+ up
11
+ end
12
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class AddReplytoColumn < ActiveRecord::Migration
5
+ def up
6
+ add_column :records, :replyto, :integer, :limit => 8
7
+ end
8
+
9
+ def change
10
+ up
11
+ end
12
+ end
@@ -0,0 +1,105 @@
1
+ # encoding: utf-8
2
+
3
+ require 'twitter'
4
+
5
+ module Rbitter
6
+ class DummyStreamClient
7
+ def initialize(tokens); end
8
+
9
+ def run(&operation_block)
10
+ internal(&operation_block)
11
+ end
12
+
13
+ private
14
+ def internal(&operation_block)
15
+ tweets = [{
16
+ "tweetid" => 1,
17
+ "userid" => 1,
18
+ "replyto" => nil,
19
+ "tweet" => "test",
20
+ "rt_count" => 0,
21
+ "fav_count" => 0,
22
+ "screen_name" => "twitter",
23
+ "date" => "2015-01-01 12:11:10",
24
+ "media_urls" => ["https://pbs.twimg.com/media/CEPWFtgUgAEmbcV.png"],
25
+ "web_urls" => ["https://www.google.com/"]
26
+ }]
27
+
28
+ tweets.each { |tweet|
29
+ yield tweet
30
+ }
31
+ end
32
+ end
33
+
34
+ class StreamClient
35
+ def initialize(tokens)
36
+ @t = Twitter::Streaming::Client.new do |object|
37
+ object.consumer_key = tokens['consumer_key']
38
+ object.consumer_secret = tokens['consumer_secret']
39
+ object.access_token = tokens['access_token']
40
+ object.access_token_secret = tokens['access_token_secret']
41
+ end
42
+ end
43
+
44
+ def run(&operation_block)
45
+ begin
46
+ internal(&operation_block)
47
+ rescue EOFError => e
48
+ puts "Network unreachable. Retry in 3 seconds..."
49
+ sleep 3
50
+ retry
51
+ end
52
+ end
53
+
54
+ private
55
+ def internal(&operation_block)
56
+ @t.user do |tweet|
57
+ if tweet.is_a?(Twitter::Tweet)
58
+ if tweet.retweet?
59
+ tweet = tweet.retweeted_tweet
60
+ end
61
+
62
+ text = tweet.full_text.gsub(/(\r\n|\n)/, '')
63
+
64
+ # unpack uris and media links
65
+ media_urls = Array.new
66
+ web_urls = Array.new
67
+
68
+ if tweet.entities?
69
+ if tweet.media?
70
+ tweet.media.each { |media|
71
+ media_urls.push("#{media.media_uri_https}")
72
+ text.gsub!("#{media.url}", "#{media.display_url}")
73
+ }
74
+ end
75
+
76
+ text += " "
77
+ text += media_urls.join(" ")
78
+
79
+ if tweet.uris?
80
+ tweet.uris.each { |uri|
81
+ web_urls.push("#{uri.expanded_url}")
82
+ text.gsub!("#{uri.url}", "#{uri.expanded_url}")
83
+ }
84
+ end
85
+ end
86
+
87
+ res = {
88
+ "tweetid" => tweet.id,
89
+ "userid" => tweet.user.id,
90
+ "replyto" => tweet.in_reply_to_status_id? ? tweet.in_reply_to_status_id : nil,
91
+ "tweet" => text,
92
+ "rt_count" => tweet.retweet_count,
93
+ "fav_count" => tweet.favorite_count,
94
+ "screen_name" => tweet.user.screen_name,
95
+ "date" => tweet.created_at,
96
+ "media_urls" => media_urls,
97
+ "web_urls" => web_urls
98
+ }
99
+
100
+ yield res
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end