mediawiki_robot 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in mediawiki_robot.gemspec
4
+ gemspec
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
@@ -0,0 +1,6 @@
1
+ require 'mediawiki_robot/enhanced_gateway'
2
+ require 'mediawiki_robot/robot'
3
+
4
+ module MediawikiRobot
5
+
6
+ end
@@ -0,0 +1,121 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'media_wiki'
4
+ require 'media_wiki/gateway'
5
+ require 'media_wiki/config'
6
+
7
+ module MediawikiRobot
8
+
9
+ class EnhancedGateway < MediaWiki::Gateway
10
+
11
+ # overrides the usual 'login' to provide retries upon timeout
12
+ def login(user, pass)
13
+ done = false
14
+ while not done
15
+ begin
16
+ super(user, pass)
17
+ done = true
18
+ rescue SocketError
19
+ puts "MediaWiki::Gateway::login -- caught SocketError, retrying..."
20
+ rescue Errno::ETIMEDOUT
21
+ puts "MediaWiki::Gateway::login -- caught Errno::ETIMEDOUT, retrying..."
22
+ #rescue MediaWiki::Exception
23
+ # puts "MediaWiki::Gateway::login -- caught MediaWiki::Exception, retrying..."
24
+ end
25
+ end
26
+ end
27
+
28
+ def make_api_request_with_retry(form_data)
29
+ res = nil
30
+ while res.nil?
31
+ begin
32
+ res = make_api_request(form_data)
33
+ rescue SocketError
34
+ puts "MediaWiki::Gateway::make_api_request_with_retry -- caught SocketError, retrying..."
35
+ rescue Errno::ETIMEDOUT
36
+ puts "MediaWiki::Gateway::make_api_request_with_retry -- caught Errno::ETIMEDOUT, retrying..."
37
+ rescue MediaWiki::Exception
38
+ puts "MediaWiki::Gateway::make_api_request_with_retry -- caught MediaWiki::Exception, retrying..."
39
+ end
40
+ end
41
+
42
+ return res
43
+ end
44
+
45
+ def get_with_retry(title)
46
+ res = nil
47
+ while res.nil?
48
+ begin
49
+ res = get(title)
50
+ rescue SocketError
51
+ puts "MediaWiki::Gateway::get_with_retry -- caught SocketError, retrying..."
52
+ rescue Errno::ETIMEDOUT
53
+ puts "MediaWiki::Gateway::get_with_retry -- caught Errno::ETIMEDOUT, retrying..."
54
+ rescue MediaWiki::Exception
55
+ puts "MediaWiki::Gateway::get_with_retry -- caught MediaWiki::Exception, retrying..."
56
+ end
57
+ end
58
+
59
+ return res
60
+ end
61
+
62
+ def recent_changes(num_changes, end_time)
63
+ form_data =
64
+ {'action' => 'query',
65
+ 'list' => 'recentchanges'}
66
+ form_data['rclimit'] = num_changes if !num_changes.nil?
67
+ form_data['rcend'] = end_time.strftime("%Y%m%d%H%M%S") if !end_time.nil?
68
+
69
+ res = make_api_request_with_retry(form_data)
70
+
71
+ changes = REXML::XPath.match(res, "//rc").map { |x| { :type => x.attributes["type"],
72
+ :title => x.attributes["title"],
73
+ :timestamp => x.attributes["timestamp"],
74
+ :revision_id => x.attributes["revid"] } }
75
+ return changes
76
+ end
77
+
78
+ def get_all_pages_in_category(category_title) # e.g., 'Category:Articles_with_hCards'
79
+ form_data =
80
+ {'action' => 'query',
81
+ 'list' => 'categorymembers',
82
+ 'cmtitle' => category_title,
83
+ 'cmlimit' => '5000'}
84
+
85
+ res = make_api_request_with_retry(form_data)
86
+
87
+ titles = REXML::XPath.match(res, "//cm").map { |x| x.attributes["title"] }
88
+ return titles
89
+ end
90
+
91
+ def get_page_revision(title)
92
+ form_data =
93
+ {'action' => 'query',
94
+ 'titles' => title,
95
+ 'prop' => 'revisions'}
96
+
97
+ res = make_api_request_with_retry(form_data)
98
+
99
+ rev_ids = REXML::XPath.match(res, "////rev").map { |x| x.attributes["revid"] }
100
+ return rev_ids[0]
101
+ end
102
+
103
+ def exists(page_title)
104
+ # used to be:
105
+ # return ! get_with_retry(page_title).nil?
106
+ # but that was slower.
107
+
108
+ form_data =
109
+ {'action' => 'query',
110
+ 'titles' => page_title,
111
+ 'prop' => 'revisions'}
112
+
113
+ res = make_api_request_with_retry(form_data)
114
+
115
+ rev_ids = REXML::XPath.match(res, "////rev").map { |x| x.attributes["revid"] }
116
+ return !( rev_ids.nil? or rev_ids.empty? )
117
+ end
118
+
119
+ end
120
+
121
+ end
@@ -0,0 +1,97 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'thread'
4
+ require 'time'
5
+ require 'mediawiki_robot/enhanced_gateway'
6
+
7
+ module MediawikiRobot
8
+
9
+ class Robot
10
+
11
+ attr_accessor :mw
12
+
13
+ API_SUFFIX = '/api.php'
14
+ MAX_SIMULTANEOUS_THREADS = 10
15
+
16
+ def initialize(mw_opts)
17
+ api_url = mw_opts[:base_url] + mw_opts[:normal_prefix] + API_SUFFIX
18
+ @mw = MediawikiRobot::EnhancedGateway.new(api_url, {:ignorewarnings=>1})
19
+
20
+ @thread = nil
21
+
22
+ @mainloop_sleep_secs = 5.0 # make this non-constant so that it can be overridden for testing
23
+ end
24
+
25
+ def start
26
+ @thread = Thread.new { main_loop }
27
+ end
28
+
29
+ def stop
30
+ @thread.kill
31
+ end
32
+
33
+ def is_running
34
+ return false if @thread.nil?
35
+ return true unless @thread.status.nil? or @thread.status == false
36
+ return false
37
+ end
38
+
39
+ private
40
+
41
+ def handle_single_change(change)
42
+ raise "not implemented"
43
+ end
44
+
45
+ def handle_changes(changes)
46
+
47
+ while not changes.empty?
48
+
49
+ # spin up a bunch of threads to pull down these batches of changes in parallel
50
+ threads = []
51
+ for i in 1..MAX_SIMULTANEOUS_THREADS do
52
+ if not changes.empty?
53
+
54
+ threads << Thread.new(changes.shift) do |cur_change|
55
+ handle_single_change(cur_change)
56
+ end
57
+
58
+ end
59
+ end
60
+ threads.each { |aThread| aThread.join }
61
+
62
+ end
63
+
64
+ end
65
+
66
+ def main_loop
67
+
68
+ # find the timestamp of the first change. We only want NEW changes
69
+ num_recent_changes = 500
70
+ prev_time = nil
71
+ changes = @mw.recent_changes(num_recent_changes, prev_time)
72
+
73
+ if !changes.nil? and !changes.empty?
74
+ timestamp_of_first_change = changes[0][:timestamp].gsub(/[-T:]/, ' ').gsub(/Z/,'')
75
+ prev_time = Time.strptime(timestamp_of_first_change, "%Y %m %d %H %M %S")
76
+ prev_time = prev_time + 1 # move 1 sec past the last change
77
+ end
78
+
79
+ while true
80
+
81
+ changes = @mw.recent_changes(num_recent_changes, prev_time)
82
+
83
+ if !changes.nil? and !changes.empty?
84
+ timestamp_of_first_change = changes[0][:timestamp].gsub(/[-T:]/, ' ').gsub(/Z/,'')
85
+ prev_time = Time.strptime(timestamp_of_first_change, "%Y %m %d %H %M %S")
86
+ prev_time = prev_time + 1 # move 1 sec past the last change
87
+
88
+ handle_changes(changes)
89
+ end
90
+
91
+ sleep @mainloop_sleep_secs
92
+ end
93
+ end
94
+
95
+ end
96
+
97
+ end
@@ -0,0 +1,3 @@
1
+ module MediawikiRobot
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "mediawiki_robot/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "mediawiki_robot"
7
+ s.version = MediawikiRobot::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Jim Lindstrom"]
10
+ s.email = ["jim.lindstrom@gmail.com"]
11
+ s.homepage = ""
12
+ s.summary = %q{MediaWiki robot framework}
13
+ s.description = %q{Gem for creating robots that monitor/maintain MediaWiki-based wikis.}
14
+
15
+ s.rubyforge_project = "mediawiki_robot"
16
+
17
+ s.files = `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.add_dependency 'mediawiki-gateway'
23
+ s.add_dependency 'active_support'
24
+ end
@@ -0,0 +1,139 @@
1
+ # mediawiki_enhanced_gateway_spec.rb
2
+
3
+ require 'rubygems'
4
+ require 'mediawiki_robot/enhanced_gateway'
5
+
6
+ DO_MESSY_TESTS = false # These tests muck with the 'recent_changes' list and so I try to minimize them
7
+
8
+ API_SUFFIX = '/api.php'
9
+
10
+ def rand_alphanumeric_str(len)
11
+ o = [('a'..'z'),('A'..'Z'),('0'..'9')].map{|i| i.to_a}.flatten;
12
+ (0..len).map{ o[rand(o.length)] }.join;
13
+ end
14
+
15
+ describe MediawikiRobot::EnhancedGateway do
16
+
17
+ before(:each) do
18
+
19
+ @mw_opts =
20
+ {:base_url => 'http://jimlindstrom.com',
21
+ :normal_prefix => '/mediawiki',
22
+ :special_prefix => '/mediawiki'}
23
+ @robot_acct =
24
+ {:user => "robot",
25
+ :pass => "robotpass"}
26
+
27
+ api_url = @mw_opts[:base_url] + @mw_opts[:normal_prefix] + API_SUFFIX
28
+ @gateway = MediawikiRobot::EnhancedGateway.new(api_url, {:ignorewarnings=>1})
29
+
30
+ @bogus_user = rand_alphanumeric_str(10)
31
+ @bogus_password = rand_alphanumeric_str(10)
32
+
33
+ @nonexistant_page = rand_alphanumeric_str(20)
34
+ @existing_page = "Main_Page"
35
+
36
+ @existing_category = "Category:Wikipedia_protected_templates"
37
+ @page_in_existing_category = "Template:Navbox"
38
+ end
39
+
40
+ describe "#login" do
41
+ it "throws MediaWiki::Unauthorized if bad password" do
42
+ lambda {
43
+ @gateway.login(@robot_acct[:user], @bogus_password)
44
+ }.should raise_error(MediaWiki::Unauthorized)
45
+ end
46
+
47
+ it "throws MediaWiki::Unauthorized if bad username" do
48
+ lambda {
49
+ @gateway.login(@bogus_user, @robot_acct[:pass])
50
+ }.should raise_error(MediaWiki::Unauthorized)
51
+ end
52
+
53
+ it "doesn't throw anything if good credentials" do
54
+ lambda {
55
+ @gateway.login(@robot_acct[:user], @robot_acct[:pass])
56
+ }.should_not raise_error(MediaWiki::Unauthorized)
57
+ end
58
+ end
59
+
60
+ describe "#exists" do
61
+ it "returns false if page does not exist" do
62
+ @gateway.exists(@nonexistant_page).should == false
63
+ end
64
+
65
+ it "returns true if page exists" do
66
+ if DO_MESSY_TESTS
67
+ @gateway.login(user,pass)
68
+ @gateway.exists(@nonexistant_page).should == false
69
+ @gateway.create(@nonexistant_page, "Testing page creation")
70
+ @gateway.exists(@nonexistant_page).should == true
71
+ @gateway.delete(@nonexistant_page)
72
+ @gateway.exists(@nonexistant_page).should == false
73
+ else
74
+ @gateway.exists(@existing_page).should == true
75
+ end
76
+ end
77
+ end
78
+
79
+ describe "#recent_changes" do
80
+ it "returns the requested number of items" do
81
+ pages = @gateway.recent_changes(50,nil).length.should == 50
82
+ end
83
+
84
+ it "returns no items if no edits since the start time" do
85
+ t = Time.new
86
+ starttime = t.getgm
87
+ pages = @gateway.recent_changes(nil,starttime).length.should == 0
88
+ end
89
+
90
+ it "returns the items edited since the start time" do
91
+ if DO_MESSY_TESTS
92
+ t = Time.new
93
+ starttime = t.getgm
94
+ @gateway.login(@robot_acct[:user], @robot_acct[:pass])
95
+ @gateway.create(@nonexistant_page, "Testing page creation")
96
+ @gateway.delete(@nonexistant_page)
97
+ pages = @gateway.recent_changes(nil,starttime).length.should >= 1
98
+ else
99
+ t = Time.gm(2010, 7, 8, 9, 10, 11)
100
+ starttime = t.getgm
101
+ @gateway.recent_changes(nil,starttime).length.should >= 1
102
+ end
103
+ end
104
+
105
+ it "returns a list of items that are hashes containing 'type', 'title', 'timestamp', and 'revision_id' keys" do
106
+ if DO_MESSY_TESTS
107
+ t = Time.new
108
+ starttime = t.getgm
109
+ @gateway.login(@robot_acct[:user], @robot_acct[:pass])
110
+ @gateway.create(@nonexistant_page, "Testing page creation")
111
+ @gateway.recent_changes(nil,starttime).should == [ { :type => "new", :title => @nonexistant_page } ] # FIXME: needs to take into account revision_id
112
+ @gateway.delete(@nonexistant_page)
113
+ else
114
+ @gateway.login(@robot_acct[:user], @robot_acct[:pass])
115
+ @gateway.recent_changes(10,nil)[0].keys.should == [:type, :title, :timestamp, :revision_id]
116
+ end
117
+ end
118
+ end
119
+
120
+ describe "#get" do
121
+ it "returns nil if doesn't exist" do
122
+ @gateway.get(@nonexistant_page).nil?.should == true
123
+ end
124
+ it "returns a String object" do
125
+ @gateway.get(@existing_page).class.should == String
126
+ end
127
+ end
128
+
129
+ describe "#get_all_pages_in_category" do
130
+ it "returns a list" do
131
+ @gateway.get_all_pages_in_category(@existing_category).class.should == Array
132
+ end
133
+ it "returns a list of all the person pages on the mediawiki" do
134
+ @gateway.get_all_pages_in_category(@existing_category).index(@page_in_existing_category).nil?.should == false
135
+ @gateway.get_all_pages_in_category(@existing_category).length.should > 1
136
+ end
137
+ end
138
+
139
+ end
@@ -0,0 +1,109 @@
1
+ # mediawiki_robot_spec.rb
2
+
3
+ require 'rubygems'
4
+ require 'mediawiki_robot'
5
+
6
+ def wait_on_condition(max_retries, wait_time, condition)
7
+
8
+ retries = max_retries
9
+ while retries > 0 and condition.call==false do
10
+ sleep wait_time
11
+ retries -= 1
12
+ end
13
+
14
+ end
15
+
16
+ def rand_alphanumeric_str(len)
17
+ o = [('a'..'z'),('A'..'Z'),('0'..'9')].map{|i| i.to_a}.flatten;
18
+ (0..len).map{ o[rand(o.length)] }.join;
19
+ end
20
+
21
+ class RobotWithTestHarness < MediawikiRobot::Robot
22
+
23
+ def initialize(mw_opts)
24
+ super(mw_opts)
25
+
26
+ @last_change = nil
27
+ @mainloop_sleep_secs = 0.1
28
+ end
29
+
30
+ def handle_single_change(change)
31
+ @last_change = change
32
+ end
33
+
34
+ def get_last_change
35
+ return @last_change
36
+ end
37
+
38
+ end
39
+
40
+ describe MediawikiRobot::Robot do
41
+
42
+ before(:each) do
43
+
44
+ @mw_opts =
45
+ {:base_url => 'http://jimlindstrom.com',
46
+ :normal_prefix => '/mediawiki',
47
+ :special_prefix => '/mediawiki'}
48
+ @robot_acct =
49
+ {:user => "robot",
50
+ :pass => "robotpass"}
51
+
52
+ @robot = RobotWithTestHarness.new(@mw_opts)
53
+ end
54
+
55
+ describe "#start" do
56
+ it "starts the main loop" do
57
+ @robot.start
58
+
59
+ wait_on_condition(10, 0.1, lambda { @robot.is_running } )
60
+
61
+ @robot.is_running.should == true
62
+ @robot.stop
63
+ end
64
+
65
+ it "doesn't call 'handle_single_change' unless a page on the mediawiki is changed" do
66
+ @robot.start
67
+ @robot.mw.login(@robot_acct[:user], @robot_acct[:pass])
68
+
69
+ last_change = @robot.get_last_change
70
+ last_change.nil?.should == true
71
+
72
+ @robot.stop
73
+ end
74
+
75
+ it "calls 'handle_single_change' when a page on the mediawiki is changed" do
76
+ @robot.start
77
+ @robot.mw.login(@robot_acct[:user], @robot_acct[:pass])
78
+
79
+ rand_page = rand_alphanumeric_str(20)
80
+ @robot.mw.create(rand_page, "Testing robot functionality")
81
+
82
+ sleep 0.5
83
+
84
+ last_change = @robot.get_last_change
85
+ last_change.nil?.should == false
86
+ last_change.delete(:revision_id)
87
+ last_change.delete(:timestamp)
88
+ last_change.should == {:type=>"new", :title=>rand_page }
89
+
90
+ @robot.mw.delete(rand_page)
91
+
92
+ @robot.stop
93
+ end
94
+ end
95
+
96
+ describe "#stop" do
97
+ it "stops the main loop" do
98
+ @robot.start
99
+ sleep 0.1
100
+ @robot.stop
101
+
102
+ wait_on_condition(10, 0.1, lambda { !@robot.is_running } )
103
+
104
+ @robot.is_running.should == false
105
+ end
106
+ end
107
+
108
+ end
109
+
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mediawiki_robot
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Jim Lindstrom
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-06-14 00:00:00.000000000 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: mediawiki-gateway
17
+ requirement: &79825670 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *79825670
26
+ - !ruby/object:Gem::Dependency
27
+ name: active_support
28
+ requirement: &79825460 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: *79825460
37
+ description: Gem for creating robots that monitor/maintain MediaWiki-based wikis.
38
+ email:
39
+ - jim.lindstrom@gmail.com
40
+ executables: []
41
+ extensions: []
42
+ extra_rdoc_files: []
43
+ files:
44
+ - .gitignore
45
+ - Gemfile
46
+ - Rakefile
47
+ - lib/mediawiki_robot.rb
48
+ - lib/mediawiki_robot/enhanced_gateway.rb
49
+ - lib/mediawiki_robot/robot.rb
50
+ - lib/mediawiki_robot/version.rb
51
+ - mediawiki_robot.gemspec
52
+ - specs/mediawiki_robot_enhanced_gateway_spec.rb
53
+ - specs/mediawiki_robot_robot_spec.rb
54
+ has_rdoc: true
55
+ homepage: ''
56
+ licenses: []
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ! '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubyforge_project: mediawiki_robot
75
+ rubygems_version: 1.6.2
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: MediaWiki robot framework
79
+ test_files: []